diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-24 10:37:47 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-24 10:37:47 -0500 |
commit | 2966334d384946c16a104b6c9964a1999b23b838 (patch) | |
tree | 8bb022bfada0c44920f23e536686e3893ee6de79 /bs4 | |
parent | 97b54c4bdbee0f109c444b50d8102ae8d7abb7c4 (diff) |
Warn when SoupStrainer is used with the html5lib tree builder.
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/builder/_html5lib.py | 3 | ||||
-rw-r--r-- | bs4/tests/test_html5lib.py | 10 |
2 files changed, 11 insertions, 2 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 26b1773..cf716df 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -2,6 +2,7 @@ __all__ = [ 'HTML5TreeBuilder', ] +import warnings from bs4.builder import ( PERMISSIVE, HTML, @@ -30,6 +31,8 @@ class HTML5TreeBuilder(HTMLTreeBuilder): # These methods are defined by Beautiful Soup. def feed(self, markup): + if self.soup.parse_only is not None: + warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.") parser = html5lib.HTMLParser(tree=self.create_treebuilder) doc = parser.parse(markup, encoding=self.user_specified_encoding) diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 0828cfd..f195f7d 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -1,5 +1,7 @@ """Tests to ensure that the html5lib tree builder generates good trees.""" +import warnings + try: from bs4.builder import HTML5TreeBuilder HTML5LIB_PRESENT = True @@ -26,11 +28,15 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): # The html5lib tree builder does not support SoupStrainers. strainer = SoupStrainer("b") markup = "<p>A <b>bold</b> statement.</p>" - soup = self.soup(markup, - parse_only=strainer) + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup, parse_only=strainer) self.assertEqual( soup.decode(), self.document_for(markup)) + self.assertTrue( + "the html5lib tree builder doesn't support parse_only" in + str(w[0].message)) + def test_correctly_nested_tables(self): """html5lib inserts <tbody> tags where other parsers don't.""" markup = ('<table id="1">' |