summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/beautifulsoup/tests/helpers.py67
-rw-r--r--src/beautifulsoup/tests/test_html5lib.py25
-rw-r--r--src/beautifulsoup/tests/test_lxml.py18
-rw-r--r--src/beautifulsoup/tests/test_soup.py6
4 files changed, 84 insertions, 32 deletions
diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py
index d237556..a4156cd 100644
--- a/src/beautifulsoup/tests/helpers.py
+++ b/src/beautifulsoup/tests/helpers.py
@@ -7,12 +7,18 @@ from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
class SoupTest(unittest.TestCase):
- default_builder = None
+ def setUp(self):
+ # LXMLTreeBuilder won't handle bad markup, but that's fine,
+ # since all the parsing tests take place in parser-specific
+ # test suites that override default_builder.
+ self.default_builder = LXMLTreeBuilder()
+
+ def soup(self, markup):
+ """Build a Beautiful Soup object from markup."""
+ return BeautifulSoup(markup, builder=self.default_builder)
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
builder = self.default_builder
- if builder is None:
- builder = LXMLTreeBuilder()
obj = BeautifulSoup(to_parse, builder=builder)
if compare_parsed_to is None:
compare_parsed_to = to_parse
@@ -21,3 +27,58 @@ class SoupTest(unittest.TestCase):
obj.decode(),
builder.test_fragment_to_document(compare_parsed_to))
+
+
+class BuilderSmokeTest(SoupTest):
+ """A generic smoke test for tree builders.
+
+ Subclasses of this test ensure that all of Beautiful Soup's tree
+ builders generate more or less the same trees. It's okay for trees
+ to differ, especially when given invalid markup--just override the
+ appropriate test method to demonstrate how one tree builder
+ differs from others.
+ """
+
+ def test_bare_string(self):
+ # A bare string is turned into some kind of HTML document or
+ # fragment recognizable as the original string.
+ self.assertSoupEquals("A bare string")
+
+ def test_self_closing(self):
+ # HTML's self-closing tags are recognized as such.
+ self.assertSoupEquals(
+ "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
+
+ def test_nested_inline_elements(self):
+ # Inline tags can be nested indefinitely.
+ b_tag = "<b>Inside a B tag</b>"
+ self.assertSoupEquals(b_tag)
+
+ nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
+ self.assertSoupEquals(nested_b_tag)
+
+ double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
+ self.assertSoupEquals(nested_b_tag)
+
+ def test_nested_block_level_elements(self):
+ soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
+ blockquote = soup.blockquote
+ self.assertEqual(blockquote.p.b.string, 'Foo')
+ self.assertEqual(blockquote.b.string, 'Foo')
+
+
+class BuilderInvalidMarkupSmokeTest(SoupTest):
+ """Tests of invalid markup.
+
+ These are very likely to give different results for different tree
+ builders.
+
+ It's not required that a tree builder handle invalid markup at
+ all.
+ """
+
+ def test_unclosed_block_level_elements(self):
+ # Unclosed block-level elements should be closed.
+ self.assertSoupEquals(
+ '<blockquote><p><b>Foo</blockquote><p>Bar',
+ '<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>')
diff --git a/src/beautifulsoup/tests/test_html5lib.py b/src/beautifulsoup/tests/test_html5lib.py
index f92771b..f66e750 100644
--- a/src/beautifulsoup/tests/test_html5lib.py
+++ b/src/beautifulsoup/tests/test_html5lib.py
@@ -1,23 +1,24 @@
-from helpers import SoupTest
+from helpers import BuilderInvalidMarkupSmokeTest, BuilderSmokeTest
from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
-class TestHTML5Builder(SoupTest):
+class TestHTML5Builder(BuilderSmokeTest):
+ """See `BuilderSmokeTest`."""
def setUp(self):
self.default_builder = HTML5TreeBuilder()
- def test_bare_string(self):
- self.assertSoupEquals("A bare string")
- def test_tag_nesting(self):
- b_tag = "<b>Inside a B tag</b>"
- self.assertSoupEquals(b_tag)
+class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
+ """See `BuilderInvalidMarkupSmokeTest`."""
- nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
- self.assertSoupEquals(nested_b_tag)
+ def setUp(self):
+ self.default_builder = HTML5TreeBuilder()
- def test_self_closing(self):
+ def test_unclosed_block_level_elements(self):
+ # The unclosed <b> tag is closed so that the block-level tag
+ # can be closed, and another <b> tag is inserted after the
+ # next block-level tag begins.
self.assertSoupEquals(
- "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
-
+ '<blockquote><p><b>Foo</blockquote><p>Bar',
+ '<blockquote><p><b>Foo</b></p></blockquote><p><b>Bar</b></p>')
diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py
index c0ed31e..cd22b6f 100644
--- a/src/beautifulsoup/tests/test_lxml.py
+++ b/src/beautifulsoup/tests/test_lxml.py
@@ -1,20 +1,16 @@
-from helpers import SoupTest
-from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
+"""Tests to ensure that the lxml tree builder generates good trees."""
+from helpers import BuilderInvalidMarkupSmokeTest, BuilderSmokeTest
-class TestLXMLBuilder(SoupTest):
+class TestLXMLBuilder(BuilderSmokeTest):
+ """See `BuilderSmokeTest`."""
def test_bare_string(self):
+ # lxml puts a <p> tag around the bare string.
self.assertSoupEquals(
"A bare string", "<p>A bare string</p>")
- def test_tag_nesting(self):
- b_tag = "<b>Inside a B tag</b>"
- self.assertSoupEquals(b_tag)
- nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
- self.assertSoupEquals(nested_b_tag)
+class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
+ """See `BuilderInvalidMarkupSmokeTest`."""
- def test_self_closing(self):
- self.assertSoupEquals(
- "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py
index 571164a..c35d10b 100644
--- a/src/beautifulsoup/tests/test_soup.py
+++ b/src/beautifulsoup/tests/test_soup.py
@@ -492,12 +492,6 @@ class OperatorOverload(SoupTest):
class NestableEgg(SoupTest):
"""Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!"""
- def testParaInsideBlockquote(self):
- soup = BeautifulSoup('<blockquote><p><b>Foo</blockquote><p>Bar')
- self.assertEqual(soup.blockquote.p.b.string, 'Foo')
- self.assertEqual(soup.blockquote.b.string, 'Foo')
- self.assertEqual(soup.find('p', recursive=False).string, 'Bar')
-
def testNestedTables(self):
text = """<table id="1"><tr><td>Here's another table:
<table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>"""