From c2cbc8580695d5efa7ab226ade55958632499bf1 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 29 Dec 2010 11:31:00 -0500 Subject: Refactored common markup tests into smoke-test classes so as not to duplicate code between the different builder tests. --- src/beautifulsoup/tests/helpers.py | 67 ++++++++++++++++++++++++++++++-- src/beautifulsoup/tests/test_html5lib.py | 25 ++++++------ src/beautifulsoup/tests/test_lxml.py | 18 ++++----- src/beautifulsoup/tests/test_soup.py | 6 --- 4 files changed, 84 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py index d237556..a4156cd 100644 --- a/src/beautifulsoup/tests/helpers.py +++ b/src/beautifulsoup/tests/helpers.py @@ -7,12 +7,18 @@ from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): - default_builder = None + def setUp(self): + # LXMLTreeBuilder won't handle bad markup, but that's fine, + # since all the parsing tests take place in parser-specific + # test suites that override default_builder. + self.default_builder = LXMLTreeBuilder() + + def soup(self, markup): + """Build a Beautiful Soup object from markup.""" + return BeautifulSoup(markup, builder=self.default_builder) def assertSoupEquals(self, to_parse, compare_parsed_to=None): builder = self.default_builder - if builder is None: - builder = LXMLTreeBuilder() obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse @@ -21,3 +27,58 @@ class SoupTest(unittest.TestCase): obj.decode(), builder.test_fragment_to_document(compare_parsed_to)) + + +class BuilderSmokeTest(SoupTest): + """A generic smoke test for tree builders. + + Subclasses of this test ensure that all of Beautiful Soup's tree + builders generate more or less the same trees. It's okay for trees + to differ, especially when given invalid markup--just override the + appropriate test method to demonstrate how one tree builder + differs from others. + """ + + def test_bare_string(self): + # A bare string is turned into some kind of HTML document or + # fragment recognizable as the original string. + self.assertSoupEquals("A bare string") + + def test_self_closing(self): + # HTML's self-closing tags are recognized as such. + self.assertSoupEquals( + "

A tag

", "

A tag

") + + def test_nested_inline_elements(self): + # Inline tags can be nested indefinitely. + b_tag = "Inside a B tag" + self.assertSoupEquals(b_tag) + + nested_b_tag = "

A nested tag

" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + soup = self.soup('

Foo

') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + +class BuilderInvalidMarkupSmokeTest(SoupTest): + """Tests of invalid markup. + + These are very likely to give different results for different tree + builders. + + It's not required that a tree builder handle invalid markup at + all. + """ + + def test_unclosed_block_level_elements(self): + # Unclosed block-level elements should be closed. + self.assertSoupEquals( + '

Foo

Bar', + '

Foo

Bar

') diff --git a/src/beautifulsoup/tests/test_html5lib.py b/src/beautifulsoup/tests/test_html5lib.py index f92771b..f66e750 100644 --- a/src/beautifulsoup/tests/test_html5lib.py +++ b/src/beautifulsoup/tests/test_html5lib.py @@ -1,23 +1,24 @@ -from helpers import SoupTest +from helpers import BuilderInvalidMarkupSmokeTest, BuilderSmokeTest from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder -class TestHTML5Builder(SoupTest): +class TestHTML5Builder(BuilderSmokeTest): + """See `BuilderSmokeTest`.""" def setUp(self): self.default_builder = HTML5TreeBuilder() - def test_bare_string(self): - self.assertSoupEquals("A bare string") - def test_tag_nesting(self): - b_tag = "Inside a B tag" - self.assertSoupEquals(b_tag) +class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): + """See `BuilderInvalidMarkupSmokeTest`.""" - nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) + def setUp(self): + self.default_builder = HTML5TreeBuilder() - def test_self_closing(self): + def test_unclosed_block_level_elements(self): + # The unclosed tag is closed so that the block-level tag + # can be closed, and another tag is inserted after the + # next block-level tag begins. self.assertSoupEquals( - "

A tag

", "

A tag

") - + '

Foo

Bar', + '

Foo

Bar

') diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py index c0ed31e..cd22b6f 100644 --- a/src/beautifulsoup/tests/test_lxml.py +++ b/src/beautifulsoup/tests/test_lxml.py @@ -1,20 +1,16 @@ -from helpers import SoupTest -from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder +"""Tests to ensure that the lxml tree builder generates good trees.""" +from helpers import BuilderInvalidMarkupSmokeTest, BuilderSmokeTest -class TestLXMLBuilder(SoupTest): +class TestLXMLBuilder(BuilderSmokeTest): + """See `BuilderSmokeTest`.""" def test_bare_string(self): + # lxml puts a

tag around the bare string. self.assertSoupEquals( "A bare string", "

A bare string

") - def test_tag_nesting(self): - b_tag = "Inside a B tag" - self.assertSoupEquals(b_tag) - nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) +class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): + """See `BuilderInvalidMarkupSmokeTest`.""" - def test_self_closing(self): - self.assertSoupEquals( - "

A tag

", "

A tag

") diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index 571164a..c35d10b 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -492,12 +492,6 @@ class OperatorOverload(SoupTest): class NestableEgg(SoupTest): """Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!""" - def testParaInsideBlockquote(self): - soup = BeautifulSoup('

Foo

Bar') - self.assertEqual(soup.blockquote.p.b.string, 'Foo') - self.assertEqual(soup.blockquote.b.string, 'Foo') - self.assertEqual(soup.find('p', recursive=False).string, 'Bar') - def testNestedTables(self): text = """
Here's another table:
Juicy text
""" -- cgit v1.2.3