summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/__init__.py8
-rw-r--r--bs4/builder/__init__.py1
-rw-r--r--bs4/builder/_lxml.py5
-rw-r--r--doc/source/index.rst4
4 files changed, 10 insertions, 8 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 34a72e4..b74acee 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -77,7 +77,7 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
- NO_PARSER_SPECIFIED_WARNING = 'Parser was not explicitly specified. Using the best available parser for this system ("%s"). The same code on other systems may use a different parser and behave differently.'
+ NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, **kwargs):
@@ -155,9 +155,9 @@ class BeautifulSoup(Tag):
% ",".join(features))
builder = builder_class()
if not (original_features == builder.NAME or
- (not isinstance(builder.NAME, basestring) and
- original_features in builder.NAME)):
- warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % builder.NAME)
+ original_features in builder.ALTERNATE_NAMES):
+ warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+ parser=builder.NAME))
self.builder = builder
self.is_xml = builder.is_xml
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 0e84fae..820bc80 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -81,6 +81,7 @@ class TreeBuilder(object):
"""Turn a document into a Beautiful Soup object tree."""
NAME = "[Unknown tree builder]"
+ ALTERNATE_NAMES = []
features = []
is_xml = False
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 110e9d2..978c8df 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -214,9 +214,10 @@ class LXMLTreeBuilderForXML(TreeBuilder):
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
- NAME = [LXML, "lxml-html"]
+ NAME = LXML
+ ALTERNATE_NAMES = ["lxml-html"]
- features = NAME + [HTML, FAST, PERMISSIVE]
+ features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
is_xml = False
def default_parser(self, encoding):
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 11d9f88..0d91c1c 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -68,7 +68,7 @@ Running the "three sisters" document through Beautiful Soup gives us a
data structure::
from bs4 import BeautifulSoup
- soup = BeautifulSoup(html_doc)
+ soup = BeautifulSoup(html_doc, 'html.parser')
print(soup.prettify())
# <html>
@@ -270,7 +270,7 @@ This table summarizes the advantages and disadvantages of each parser library:
| lxml's HTML parser | ``BeautifulSoup(markup, "lxml")`` | * Very fast | * External C dependency |
| | | * Lenient | |
+----------------------+--------------------------------------------+--------------------------------+--------------------------+
-| lxml's XML parser | ``BeautifulSoup(markup, ["lxml", "xml"])`` | * Very fast | * External C dependency |
+| lxml's XML parser | ``BeautifulSoup(markup, "lxml-xml")`` | * Very fast | * External C dependency |
| | ``BeautifulSoup(markup, "xml")`` | * The only currently supported | |
| | | XML parser | |
+----------------------+--------------------------------------------+--------------------------------+--------------------------+