summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-24 10:37:47 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-24 10:37:47 -0500
commit2966334d384946c16a104b6c9964a1999b23b838 (patch)
tree8bb022bfada0c44920f23e536686e3893ee6de79 /bs4
parent97b54c4bdbee0f109c444b50d8102ae8d7abb7c4 (diff)
Warn when SoupStrainer is used with the html5lib tree builder.
Diffstat (limited to 'bs4')
-rw-r--r--bs4/builder/_html5lib.py3
-rw-r--r--bs4/tests/test_html5lib.py10
2 files changed, 11 insertions, 2 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 26b1773..cf716df 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -2,6 +2,7 @@ __all__ = [
'HTML5TreeBuilder',
]
+import warnings
from bs4.builder import (
PERMISSIVE,
HTML,
@@ -30,6 +31,8 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# These methods are defined by Beautiful Soup.
def feed(self, markup):
+ if self.soup.parse_only is not None:
+ warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
doc = parser.parse(markup, encoding=self.user_specified_encoding)
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 0828cfd..f195f7d 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -1,5 +1,7 @@
"""Tests to ensure that the html5lib tree builder generates good trees."""
+import warnings
+
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
@@ -26,11 +28,15 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
# The html5lib tree builder does not support SoupStrainers.
strainer = SoupStrainer("b")
markup = "<p>A <b>bold</b> statement.</p>"
- soup = self.soup(markup,
- parse_only=strainer)
+ with warnings.catch_warnings(record=True) as w:
+ soup = self.soup(markup, parse_only=strainer)
self.assertEqual(
soup.decode(), self.document_for(markup))
+ self.assertTrue(
+ "the html5lib tree builder doesn't support parse_only" in
+ str(w[0].message))
+
def test_correctly_nested_tables(self):
"""html5lib inserts <tbody> tags where other parsers don't."""
markup = ('<table id="1">'