From 02b149019ac6c7b7791c63b5bbc312a6b1a0636c Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 30 Jan 2011 22:22:28 -0500 Subject: Stop pretending that the 'generic' builder test is different from the lxml test. --- beautifulsoup/testing.py | 139 ------------------------------------------- tests/test_html5lib.py | 13 ++-- tests/test_lxml.py | 152 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 150 insertions(+), 154 deletions(-) diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py index 9d0fa3a..74937d9 100644 --- a/beautifulsoup/testing.py +++ b/beautifulsoup/testing.py @@ -32,144 +32,5 @@ class SoupTest(unittest.TestCase): -class BuilderSmokeTest(SoupTest): - """A generic smoke test for tree builders. - - Subclasses of this test ensure that all of Beautiful Soup's tree - builders generate more or less the same trees. It's okay for trees - to differ, especially when given invalid markup--just override the - appropriate test method to demonstrate how one tree builder - differs from others. - """ - - def test_bare_string(self): - # A bare string is turned into some kind of HTML document or - # fragment recognizable as the original string. - # - # In this case, lxml puts a

tag around the bare string. - self.assertSoupEquals( - "A bare string", "

A bare string

") - - def test_mixed_case_tags(self): - # Mixed-case tags are folded to lowercase. - self.assertSoupEquals( - "", - "") - - def test_self_closing(self): - # HTML's self-closing tags are recognized as such. - self.assertSoupEquals( - "

A tag

", "

A tag

") - - self.assertSoupEquals( - "

Foo
bar

", "

Foo
bar

") - - def test_comment(self): - # Comments are represented as Comment objects. - markup = "

foobaz

" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEquals(comment.__class__, Comment) - - def test_nested_inline_elements(self): - # Inline tags can be nested indefinitely. - b_tag = "Inside a B tag" - self.assertSoupEquals(b_tag) - - nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - def test_collapsed_whitespace(self): - """In most tags, whitespace is collapsed.""" - self.assertSoupEquals("

", "

") - - def test_preserved_whitespace_in_pre_and_textarea(self): - """In
 and ")
-
-    def test_single_quote_attribute_values_become_double_quotes(self):
-        self.assertSoupEquals("",
-                              '')
-
-    def test_attribute_values_with_nested_quotes_are_left_alone(self):
-        text = """a"""
-        self.assertSoupEquals(text)
-
-    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
-        text = """a"""
-        soup = self.soup(text)
-        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
-        self.assertSoupEquals(
-            soup.foo.decode(),
-            """a""")
-
-    def test_ampersand_in_attribute_value_gets_quoted(self):
-        self.assertSoupEquals('',
-                              '')
-
-    def test_literal_in_textarea(self):
-        # Anything inside a '
-        soup = BeautifulSoup(text)
-        self.assertEquals(len(soup.textarea.contents), 2)
-        self.assertEquals(soup.textarea.contents[0], u"Junk like ")
-        self.assertEquals(soup.textarea.contents[1].name, 'b')
-        self.assertEquals(soup.textarea.b.string, u" tags and ")
-
-    def test_literal_in_script(self):
-        # The contents of a ' % javascript)
-        self.assertEquals(soup.script.string, javascript)
-
-
-class BuilderInvalidMarkupSmokeTest(SoupTest):
-    """Tests of invalid markup.
-
-    These are very likely to give different results for different tree
-    builders. It's not required that a tree builder handle invalid
-    markup at all.
-    """
-
-    def test_unclosed_block_level_elements(self):
-        # Unclosed block-level elements should be closed.
-        self.assertSoupEquals(
-            '

Foo

Bar', - '

Foo

Bar

') - - def test_fake_self_closing_tag(self): - # If a self-closing tag presents as a normal tag, the 'open' - # tag is treated as an instance of the self-closing tag and - # the 'close' tag is ignored. - self.assertSoupEquals( - "http://foo.com/", - "http://foo.com/") - - def test_boolean_attribute_with_no_value_gets_empty_value(self): - soup = self.soup("
foo
") - self.assertEquals(soup.table.td['nowrap'], '') - - def test_incorrectly_nested_tables(self): - self.assertSoupEquals( - '
', - '
') - diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 417e87b..3a4ee27 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -1,11 +1,10 @@ from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder -from beautifulsoup.testing import ( - BuilderInvalidMarkupSmokeTest, - BuilderSmokeTest, -) +from test_lxml import ( + TestLXMLBuilder, + TestLXMLBuilderInvalidMarkup, + ) - -class TestHTML5Builder(BuilderSmokeTest): +class TestHTML5Builder(TestLXMLBuilder): """See `BuilderSmokeTest`.""" @property @@ -26,7 +25,7 @@ class TestHTML5Builder(BuilderSmokeTest): self.assertSoupEquals(" ") -class TestHTML5BuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest): +class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): """See `BuilderInvalidMarkupSmokeTest`.""" @property diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 7fe6870..d16e8d9 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -1,13 +1,118 @@ """Tests to ensure that the lxml tree builder generates good trees.""" +from beautifulsoup import BeautifulSoup from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder -from beautifulsoup.testing import ( - BuilderInvalidMarkupSmokeTest, - BuilderSmokeTest, -) +from beautifulsoup.element import Comment +from beautifulsoup.testing import SoupTest -class TestLXMLBuilder(BuilderSmokeTest): - """See `BuilderSmokeTest`.""" + +class TestLXMLBuilder(SoupTest): + """A smoke test for the LXML tree builders. + + Subclass this to test some other tree builder. Subclasses of this + test ensure that all of Beautiful Soup's tree builders generate + more or less the same trees. It's okay for trees to differ, + especially when given invalid markup--just override the + appropriate test method to demonstrate how one tree builder + differs from the LXML builder. + """ + + def test_bare_string(self): + # A bare string is turned into some kind of HTML document or + # fragment recognizable as the original string. + # + # In this case, lxml puts a

tag around the bare string. + self.assertSoupEquals( + "A bare string", "

A bare string

") + + def test_mixed_case_tags(self): + # Mixed-case tags are folded to lowercase. + self.assertSoupEquals( + "", + "") + + def test_self_closing(self): + # HTML's self-closing tags are recognized as such. + self.assertSoupEquals( + "

A tag

", "

A tag

") + + self.assertSoupEquals( + "

Foo
bar

", "

Foo
bar

") + + def test_comment(self): + # Comments are represented as Comment objects. + markup = "

foobaz

" + self.assertSoupEquals(markup) + + soup = self.soup(markup) + comment = soup.find(text="foobar") + self.assertEquals(comment.__class__, Comment) + + def test_nested_inline_elements(self): + # Inline tags can be nested indefinitely. + b_tag = "Inside a B tag" + self.assertSoupEquals(b_tag) + + nested_b_tag = "

A nested tag

" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + soup = self.soup('

Foo

') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + def test_collapsed_whitespace(self): + """In most tags, whitespace is collapsed.""" + self.assertSoupEquals("

", "

") + + def test_preserved_whitespace_in_pre_and_textarea(self): + """In
 and ")
+
+    def test_single_quote_attribute_values_become_double_quotes(self):
+        self.assertSoupEquals("",
+                              '')
+
+    def test_attribute_values_with_nested_quotes_are_left_alone(self):
+        text = """a"""
+        self.assertSoupEquals(text)
+
+    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
+        text = """a"""
+        soup = self.soup(text)
+        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+        self.assertSoupEquals(
+            soup.foo.decode(),
+            """a""")
+
+    def test_ampersand_in_attribute_value_gets_quoted(self):
+        self.assertSoupEquals('',
+                              '')
+
+    def test_literal_in_textarea(self):
+        # Anything inside a '
+        soup = BeautifulSoup(text)
+        self.assertEquals(len(soup.textarea.contents), 2)
+        self.assertEquals(soup.textarea.contents[0], u"Junk like ")
+        self.assertEquals(soup.textarea.contents[1].name, 'b')
+        self.assertEquals(soup.textarea.b.string, u" tags and ")
+
+    def test_literal_in_script(self):
+        # The contents of a ' % javascript)
+        self.assertEquals(soup.script.string, javascript)
 
     def test_foo(self):
         isolatin = """Sacr\xe9 bleu!"""
@@ -19,6 +124,37 @@ class TestLXMLBuilder(BuilderSmokeTest):
         print soup
 
 
-class TestLXMLBuilderInvalidMarkup(BuilderInvalidMarkupSmokeTest):
-    """See `BuilderInvalidMarkupSmokeTest`."""
+class TestLXMLBuilderInvalidMarkup(SoupTest):
+    """Tests of invalid markup for the LXML tree builder.
+
+    Subclass this to test other builders.
+
+    These are very likely to give different results for different tree
+    builders. It's not required that a tree builder handle invalid
+    markup at all.
+    """
+
+    def test_unclosed_block_level_elements(self):
+        # Unclosed block-level elements should be closed.
+        self.assertSoupEquals(
+            '

Foo

Bar', + '

Foo

Bar

') + + def test_fake_self_closing_tag(self): + # If a self-closing tag presents as a normal tag, the 'open' + # tag is treated as an instance of the self-closing tag and + # the 'close' tag is ignored. + self.assertSoupEquals( + "http://foo.com/", + "http://foo.com/") + + def test_boolean_attribute_with_no_value_gets_empty_value(self): + soup = self.soup("
foo
") + self.assertEquals(soup.table.td['nowrap'], '') + + def test_incorrectly_nested_tables(self): + self.assertSoupEquals( + '
', + '
') + -- cgit v1.2.3