summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-06-28 08:19:53 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-06-28 08:19:53 -0400
commite7eff8b01e3890f11dacf558bd9fd71c6dcbc29e (patch)
tree53715f1704f6cf4c6135dce8c6fd464a0e54fa22
parent7d21694874e51a45a3ca03db1ad7c299e36834a7 (diff)
Accept 'xml' as an unambiguous identifier for the lxml XML parser, since it's the only XML parser supported at the moment.
-rw-r--r--bs4/__init__.py9
-rw-r--r--bs4/builder/_lxml.py1
2 files changed, 8 insertions, 2 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index e167544..f24d69c 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -77,7 +77,7 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
- NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+ NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
@@ -157,8 +157,13 @@ class BeautifulSoup(Tag):
builder = builder_class()
if not (original_features == builder.NAME or
original_features in builder.ALTERNATE_NAMES):
+ if builder.is_xml:
+ markup_type = "XML"
+ else:
+ markup_type = "HTML"
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
- parser=builder.NAME))
+ parser=builder.NAME,
+ markup_type=markup_type))
self.builder = builder
self.is_xml = builder.is_xml
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 2e33386..9e8f88f 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -31,6 +31,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
is_xml = True
NAME = "lxml-xml"
+ ALTERNATE_NAMES = ["xml"]
# Well, it's permissive by XML parser standards.
features = [NAME, LXML, XML, FAST, PERMISSIVE]