diff options
-rw-r--r-- | bs4/tests/test_soup.py | 9 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 1 | ||||
-rw-r--r-- | doc/source/index.rst | 7 |
3 files changed, 13 insertions, 4 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 39e1964..94f325e 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -21,10 +21,10 @@ from bs4.testing import ( import warnings try: - import chardet - CHARDET_PRESENT = True + from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML + LXML_PRESENT = True except ImportError, e: - CHARDET_PRESENT = False + LXML_PRESENT = False class TestDeprecatedConstructorArguments(SoupTest): @@ -49,6 +49,9 @@ class TestDeprecatedConstructorArguments(SoupTest): self.assertRaises( TypeError, self.soup, "<a>", no_such_argument=True) + @skipIf( + not LXML_PRESENT, + "lxml not present, not testing BeautifulStoneSoup.") def test_beautifulstonesoup(self): with warnings.catch_warnings(record=True) as w: soup = BeautifulStoneSoup("<markup>") diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 6cb1b7f..5acaeea 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1492,7 +1492,6 @@ class TestSoupSelector(TreeTest): self.assertSelects('.s1 > a', ['s1a1', 's1a2']) self.assertSelects('.s1 > a span', ['s1a2s1']) - def test_attribute_equals(self): self.assertSelectMultiple( ('p[class="onep"]', ['p1']), diff --git a/doc/source/index.rst b/doc/source/index.rst index 734851d..5b65354 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -2670,6 +2670,13 @@ deprecated and removed in Python 3.0. Beautiful Soup 4 uses ``html.parser`` by default, but you can plug in lxml or html5lib and use that instead. See `Installing a parser`_ for a comparison. +Since ``html.parser`` is not the same parser as ``SGMLParser``, it +will treat invalid markup differently. Usually the "difference" is +that ``html.parser`` crashes. In that case, you'll need to install +another parser. But sometimes ``html.parser`` just creates a different +parse tree than ``SGMLParser`` would. If this happens, you may need to +update your BS3 scraping code to deal with the new tree. + Method names ^^^^^^^^^^^^ |