Stop pretending that the 'generic' builder test is different from the lxml test.

author: Leonard Richardson <leonard.richardson@canonical.com> 2011-01-30 22:22:28 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2011-01-30 22:22:28 -0500
commit: 02b149019ac6c7b7791c63b5bbc312a6b1a0636c (patch)
tree: fd41c4fa0a2fade6b6c98aca581e5e72a32168fb
parent: f79871fb934eeb3ab220bcbf7d471dd9f6feca93 (diff)
3 files changed, 150 insertions, 154 deletions
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
index 9d0fa3a..74937d9 100644
--- a/beautifulsoup/testing.py
+++ b/beautifulsoup/testing.py
@@ -32,144 +32,5 @@ class SoupTest(unittest.TestCase):
 
 
 
-class BuilderSmokeTest(SoupTest):
-    """A generic smoke test for tree builders.
-
-    Subclasses of this test ensure that all of Beautiful Soup's tree
-    builders generate more or less the same trees. It's okay for trees
-    to differ, especially when given invalid markup--just override the
-    appropriate test method to demonstrate how one tree builder
-    differs from others.
-    """
-
-    def test_bare_string(self):
-        # A bare string is turned into some kind of HTML document or
-        # fragment recognizable as the original string.
-        #
-        # In this case, lxml puts a <p> tag around the bare string.
-        self.assertSoupEquals(
-            "A bare string", "<p>A bare string</p>")
-
-    def test_mixed_case_tags(self):
-        # Mixed-case tags are folded to lowercase.
-        self.assertSoupEquals(
-            "<a><B><Cd><EFG></efg></CD></b></A>",
-            "<a><b><cd><efg></efg></cd></b></a>")
-
-    def test_self_closing(self):
-        # HTML's self-closing tags are recognized as such.
-        self.assertSoupEquals(
-            "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
-
-        self.assertSoupEquals(
-            "<p>Foo<br/>bar</p>", "<p>Foo<br />bar</p>")
-
-    def test_comment(self):
-        # Comments are represented as Comment objects.
-        markup = "<p>foo<!--foobar-->baz</p>"
-        self.assertSoupEquals(markup)
-
-        soup = self.soup(markup)
-        comment = soup.find(text="foobar")
-        self.assertEquals(comment.__class__, Comment)
-
-    def test_nested_inline_elements(self):
-        # Inline tags can be nested indefinitely.
-        b_tag = "<b>Inside a B tag</b>"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
-        self.assertSoupEquals(nested_b_tag)
-
-        double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
-        self.assertSoupEquals(nested_b_tag)
-
-    def test_nested_block_level_elements(self):
-        soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
-        blockquote = soup.blockquote
-        self.assertEqual(blockquote.p.b.string, 'Foo')
-        self.assertEqual(blockquote.b.string, 'Foo')
-
-    def test_collapsed_whitespace(self):
-        """In most tags, whitespace is collapsed."""
-        self.assertSoupEquals("<p>   </p>", "<p> </p>")
-
-    def test_preserved_whitespace_in_pre_and_textarea(self):
-        """In <pre> and <textarea> tags, whitespace is preserved."""
-        self.assertSoupEquals("<pre>   </pre>")
-        self.assertSoupEquals("<textarea> woo  </textarea>")
-
-    def test_single_quote_attribute_values_become_double_quotes(self):
-        self.assertSoupEquals("<foo attr='bar'></foo>",
-                              '<foo attr="bar"></foo>')
-
-    def test_attribute_values_with_nested_quotes_are_left_alone(self):
-        text = """<foo attr='bar "brawls" happen'>a</foo>"""
-        self.assertSoupEquals(text)
-
-    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
-        text = """<foo attr='bar "brawls" happen'>a</foo>"""
-        soup = self.soup(text)
-        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
-        self.assertSoupEquals(
-            soup.foo.decode(),
-            """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""")
-
-    def test_ampersand_in_attribute_value_gets_quoted(self):
-        self.assertSoupEquals('<this is="really messed up & stuff"></this>',
-                              '<this is="really messed up &amp; stuff"></this>')
-
-    def test_literal_in_textarea(self):
-        # Anything inside a <textarea> is supposed to be treated as
-        # the literal value of the field, (XXX citation needed).
-        #
-        # But, both lxml and html5lib do their best to parse the
-        # contents of a <textarea> as HTML.
-        text = '<textarea>Junk like <b> tags and <&<&amp;</textarea>'
-        soup = BeautifulSoup(text)
-        self.assertEquals(len(soup.textarea.contents), 2)
-        self.assertEquals(soup.textarea.contents[0], u"Junk like ")
-        self.assertEquals(soup.textarea.contents[1].name, 'b')
-        self.assertEquals(soup.textarea.b.string, u" tags and ")
-
-    def test_literal_in_script(self):
-        # The contents of a <script> tag are treated as a literal string,
-        # even if that string contains HTML.
-        javascript = 'if (i < 2) { alert("<b>foo</b>"); }'
-        soup = BeautifulSoup('<script>%s</script>' % javascript)
-        self.assertEquals(soup.script.string, javascript)
-
-
-class BuilderInvalidMarkupSmokeTest(SoupTest):
-    """Tests of invalid markup.
-
-    These are very likely to give different results for different tree
-    builders. It's not required that a tree builder handle invalid
-    markup at all.
-    """
-
-    def test_unclosed_block_level_elements(self):
-        # Unclosed block-level elements should be closed.
-        self.assertSoupEquals(
-            '<blockquote><p><b>Foo</blockquote><p>Bar',
-            '<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>')
-
-    def test_fake_self_closing_tag(self):
-        # If a self-closing tag presents as a normal tag, the 'open'
-        # tag is treated as an instance of the self-closing tag and
-        # the 'close' tag is ignored.
-        self.assertSoupEquals(
-            "<item><link>http://foo.com/</link></item>",
-            "<item><link />http://foo.com/</item>")
-
-    def test_boolean_attribute_with_no_value_gets_empty_value(self):
-        soup = self.soup("<table><td nowrap>foo</td></table>")
-        self.assertEquals(soup.table.td['nowrap'], '')
-
-    def test_incorrectly_nested_tables(self):
-        self.assertSoupEquals(
-            '<table><tr><table><tr id="nested">',
-            '<table><tr><table><tr id="nested"></tr></table></tr></table>')
-
 
 
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 417e87b..3a4ee27 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -1,11 +1,10 @@
 from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
-from beautifulsoup.testing import (
-    BuilderInvalidMarkupSmokeTest,
-    BuilderSmokeTest,
-)
+from test_lxml import (
+    TestLXMLBuilder,
+    TestLXMLBuilderInvalidMarkup,
+    )
 
-
-class TestHTML5Builder(BuilderSmokeTest):
+class TestHTML5Builder(TestLXMLBuilder):
     """See `BuilderSmokeTest`."""
 
     @property
@@ -26,7 +25,7 @@ class TestHTML5Builder(BuilderSmokeTest):
         self.assertSoupEquals("<b>   </b>")
 
 
-class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
+class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
     """See `BuilderInvalidMarkupSmokeTest`."""
 
     @property
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 7fe6870..d16e8d9 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -1,13 +1,118 @@
 """Tests to ensure that the lxml tree builder generates good trees."""
 
+from beautifulsoup import BeautifulSoup
 from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
-from beautifulsoup.testing import (
-    BuilderInvalidMarkupSmokeTest,
-    BuilderSmokeTest,
-)
+from beautifulsoup.element import Comment
+from beautifulsoup.testing import SoupTest
 
-class TestLXMLBuilder(BuilderSmokeTest):
-    """See `BuilderSmokeTest`."""
+
+class TestLXMLBuilder(SoupTest):
+    """A smoke test for the LXML tree builders.
+
+    Subclass this to test some other tree builder. Subclasses of this
+    test ensure that all of Beautiful Soup's tree builders generate
+    more or less the same trees. It's okay for trees to differ,
+    especially when given invalid markup--just override the
+    appropriate test method to demonstrate how one tree builder
+    differs from the LXML builder.
+    """
+
+    def test_bare_string(self):
+        # A bare string is turned into some kind of HTML document or
+        # fragment recognizable as the original string.
+        #
+        # In this case, lxml puts a <p> tag around the bare string.
+        self.assertSoupEquals(
+            "A bare string", "<p>A bare string</p>")
+
+    def test_mixed_case_tags(self):
+        # Mixed-case tags are folded to lowercase.
+        self.assertSoupEquals(
+            "<a><B><Cd><EFG></efg></CD></b></A>",
+            "<a><b><cd><efg></efg></cd></b></a>")
+
+    def test_self_closing(self):
+        # HTML's self-closing tags are recognized as such.
+        self.assertSoupEquals(
+            "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
+
+        self.assertSoupEquals(
+            "<p>Foo<br/>bar</p>", "<p>Foo<br />bar</p>")
+
+    def test_comment(self):
+        # Comments are represented as Comment objects.
+        markup = "<p>foo<!--foobar-->baz</p>"
+        self.assertSoupEquals(markup)
+
+        soup = self.soup(markup)
+        comment = soup.find(text="foobar")
+        self.assertEquals(comment.__class__, Comment)
+
+    def test_nested_inline_elements(self):
+        # Inline tags can be nested indefinitely.
+        b_tag = "<b>Inside a B tag</b>"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
+        self.assertSoupEquals(nested_b_tag)
+
+        double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
+        self.assertSoupEquals(nested_b_tag)
+
+    def test_nested_block_level_elements(self):
+        soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
+        blockquote = soup.blockquote
+        self.assertEqual(blockquote.p.b.string, 'Foo')
+        self.assertEqual(blockquote.b.string, 'Foo')
+
+    def test_collapsed_whitespace(self):
+        """In most tags, whitespace is collapsed."""
+        self.assertSoupEquals("<p>   </p>", "<p> </p>")
+
+    def test_preserved_whitespace_in_pre_and_textarea(self):
+        """In <pre> and <textarea> tags, whitespace is preserved."""
+        self.assertSoupEquals("<pre>   </pre>")
+        self.assertSoupEquals("<textarea> woo  </textarea>")
+
+    def test_single_quote_attribute_values_become_double_quotes(self):
+        self.assertSoupEquals("<foo attr='bar'></foo>",
+                              '<foo attr="bar"></foo>')
+
+    def test_attribute_values_with_nested_quotes_are_left_alone(self):
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        self.assertSoupEquals(text)
+
+    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        soup = self.soup(text)
+        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+        self.assertSoupEquals(
+            soup.foo.decode(),
+            """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""")
+
+    def test_ampersand_in_attribute_value_gets_quoted(self):
+        self.assertSoupEquals('<this is="really messed up & stuff"></this>',
+                              '<this is="really messed up &amp; stuff"></this>')
+
+    def test_literal_in_textarea(self):
+        # Anything inside a <textarea> is supposed to be treated as
+        # the literal value of the field, (XXX citation needed).
+        #
+        # But, both lxml and html5lib do their best to parse the
+        # contents of a <textarea> as HTML.
+        text = '<textarea>Junk like <b> tags and <&<&amp;</textarea>'
+        soup = BeautifulSoup(text)
+        self.assertEquals(len(soup.textarea.contents), 2)
+        self.assertEquals(soup.textarea.contents[0], u"Junk like ")
+        self.assertEquals(soup.textarea.contents[1].name, 'b')
+        self.assertEquals(soup.textarea.b.string, u" tags and ")
+
+    def test_literal_in_script(self):
+        # The contents of a <script> tag are treated as a literal string,
+        # even if that string contains HTML.
+        javascript = 'if (i < 2) { alert("<b>foo</b>"); }'
+        soup = BeautifulSoup('<script>%s</script>' % javascript)
+        self.assertEquals(soup.script.string, javascript)
 
     def test_foo(self):
         isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
@@ -19,6 +124,37 @@ class TestLXMLBuilder(BuilderSmokeTest):
         print soup
 
 
-class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
-    """See `BuilderInvalidMarkupSmokeTest`."""
+class TestLXMLBuilderInvalidMarkup(SoupTest):
+    """Tests of invalid markup for the LXML tree builder.
+
+    Subclass this to test other builders.
+
+    These are very likely to give different results for different tree
+    builders. It's not required that a tree builder handle invalid
+    markup at all.
+    """
+
+    def test_unclosed_block_level_elements(self):
+        # Unclosed block-level elements should be closed.
+        self.assertSoupEquals(
+            '<blockquote><p><b>Foo</blockquote><p>Bar',
+            '<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>')
+
+    def test_fake_self_closing_tag(self):
+        # If a self-closing tag presents as a normal tag, the 'open'
+        # tag is treated as an instance of the self-closing tag and
+        # the 'close' tag is ignored.
+        self.assertSoupEquals(
+            "<item><link>http://foo.com/</link></item>",
+            "<item><link />http://foo.com/</item>")
+
+    def test_boolean_attribute_with_no_value_gets_empty_value(self):
+        soup = self.soup("<table><td nowrap>foo</td></table>")
+        self.assertEquals(soup.table.td['nowrap'], '')
+
+    def test_incorrectly_nested_tables(self):
+        self.assertSoupEquals(
+            '<table><tr><table><tr id="nested">',
+            '<table><tr><table><tr id="nested"></tr></table></tr></table>')
+
author	Leonard Richardson <leonard.richardson@canonical.com>	2011-01-30 22:22:28 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2011-01-30 22:22:28 -0500
commit	02b149019ac6c7b7791c63b5bbc312a6b1a0636c (patch)
tree	fd41c4fa0a2fade6b6c98aca581e5e72a32168fb
parent	f79871fb934eeb3ab220bcbf7d471dd9f6feca93 (diff)