diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/beautifulsoup/tests/helpers.py | 67 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_html5lib.py | 25 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_lxml.py | 18 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_soup.py | 6 |
4 files changed, 84 insertions, 32 deletions
diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py index d237556..a4156cd 100644 --- a/src/beautifulsoup/tests/helpers.py +++ b/src/beautifulsoup/tests/helpers.py @@ -7,12 +7,18 @@ from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): - default_builder = None + def setUp(self): + # LXMLTreeBuilder won't handle bad markup, but that's fine, + # since all the parsing tests take place in parser-specific + # test suites that override default_builder. + self.default_builder = LXMLTreeBuilder() + + def soup(self, markup): + """Build a Beautiful Soup object from markup.""" + return BeautifulSoup(markup, builder=self.default_builder) def assertSoupEquals(self, to_parse, compare_parsed_to=None): builder = self.default_builder - if builder is None: - builder = LXMLTreeBuilder() obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse @@ -21,3 +27,58 @@ class SoupTest(unittest.TestCase): obj.decode(), builder.test_fragment_to_document(compare_parsed_to)) + + +class BuilderSmokeTest(SoupTest): + """A generic smoke test for tree builders. + + Subclasses of this test ensure that all of Beautiful Soup's tree + builders generate more or less the same trees. It's okay for trees + to differ, especially when given invalid markup--just override the + appropriate test method to demonstrate how one tree builder + differs from others. + """ + + def test_bare_string(self): + # A bare string is turned into some kind of HTML document or + # fragment recognizable as the original string. + self.assertSoupEquals("A bare string") + + def test_self_closing(self): + # HTML's self-closing tags are recognized as such. + self.assertSoupEquals( + "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>") + + def test_nested_inline_elements(self): + # Inline tags can be nested indefinitely. + b_tag = "<b>Inside a B tag</b>" + self.assertSoupEquals(b_tag) + + nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + +class BuilderInvalidMarkupSmokeTest(SoupTest): + """Tests of invalid markup. + + These are very likely to give different results for different tree + builders. + + It's not required that a tree builder handle invalid markup at + all. + """ + + def test_unclosed_block_level_elements(self): + # Unclosed block-level elements should be closed. + self.assertSoupEquals( + '<blockquote><p><b>Foo</blockquote><p>Bar', + '<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>') diff --git a/src/beautifulsoup/tests/test_html5lib.py b/src/beautifulsoup/tests/test_html5lib.py index f92771b..f66e750 100644 --- a/src/beautifulsoup/tests/test_html5lib.py +++ b/src/beautifulsoup/tests/test_html5lib.py @@ -1,23 +1,24 @@ -from helpers import SoupTest +from helpers import BuilderInvalidMarkupSmokeTest, BuilderSmokeTest from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder -class TestHTML5Builder(SoupTest): +class TestHTML5Builder(BuilderSmokeTest): + """See `BuilderSmokeTest`.""" def setUp(self): self.default_builder = HTML5TreeBuilder() - def test_bare_string(self): - self.assertSoupEquals("A bare string") - def test_tag_nesting(self): - b_tag = "<b>Inside a B tag</b>" - self.assertSoupEquals(b_tag) +class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): + """See `BuilderInvalidMarkupSmokeTest`.""" - nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>" - self.assertSoupEquals(nested_b_tag) + def setUp(self): + self.default_builder = HTML5TreeBuilder() - def test_self_closing(self): + def test_unclosed_block_level_elements(self): + # The unclosed <b> tag is closed so that the block-level tag + # can be closed, and another <b> tag is inserted after the + # next block-level tag begins. self.assertSoupEquals( - "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>") - + '<blockquote><p><b>Foo</blockquote><p>Bar', + '<blockquote><p><b>Foo</b></p></blockquote><p><b>Bar</b></p>') diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py index c0ed31e..cd22b6f 100644 --- a/src/beautifulsoup/tests/test_lxml.py +++ b/src/beautifulsoup/tests/test_lxml.py @@ -1,20 +1,16 @@ -from helpers import SoupTest -from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder +"""Tests to ensure that the lxml tree builder generates good trees.""" +from helpers import BuilderInvalidMarkupSmokeTest, BuilderSmokeTest -class TestLXMLBuilder(SoupTest): +class TestLXMLBuilder(BuilderSmokeTest): + """See `BuilderSmokeTest`.""" def test_bare_string(self): + # lxml puts a <p> tag around the bare string. self.assertSoupEquals( "A bare string", "<p>A bare string</p>") - def test_tag_nesting(self): - b_tag = "<b>Inside a B tag</b>" - self.assertSoupEquals(b_tag) - nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>" - self.assertSoupEquals(nested_b_tag) +class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): + """See `BuilderInvalidMarkupSmokeTest`.""" - def test_self_closing(self): - self.assertSoupEquals( - "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>") diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index 571164a..c35d10b 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -492,12 +492,6 @@ class OperatorOverload(SoupTest): class NestableEgg(SoupTest): """Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!""" - def testParaInsideBlockquote(self): - soup = BeautifulSoup('<blockquote><p><b>Foo</blockquote><p>Bar') - self.assertEqual(soup.blockquote.p.b.string, 'Foo') - self.assertEqual(soup.blockquote.b.string, 'Foo') - self.assertEqual(soup.find('p', recursive=False).string, 'Bar') - def testNestedTables(self): text = """<table id="1"><tr><td>Here's another table: <table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>""" |