From d18cebdeffafb4e4c9c6239da2b3ae94ae5a42e8 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 29 Dec 2010 11:52:00 -0500 Subject: Ported some tests from the old system to the new smoke test class. --- src/beautifulsoup/builder/__init__.py | 2 ++ src/beautifulsoup/element.py | 4 +-- src/beautifulsoup/tests/helpers.py | 50 ++++++++++++++++++++++++++++++--- src/beautifulsoup/tests/test_soup.py | 53 ----------------------------------- 4 files changed, 50 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py index b7db8db..8294c0c 100644 --- a/src/beautifulsoup/builder/__init__.py +++ b/src/beautifulsoup/builder/__init__.py @@ -11,6 +11,8 @@ class TreeBuilder(Entities): assume_html = False smart_quotes_to = Entities.XML_ENTITIES + convert_html_entities = True + convert_xml_entities = True def __init__(self): self.soup = None diff --git a/src/beautifulsoup/element.py b/src/beautifulsoup/element.py index b5b8e84..7649b4c 100644 --- a/src/beautifulsoup/element.py +++ b/src/beautifulsoup/element.py @@ -383,10 +383,10 @@ class Tag(PageElement, Entities): entities are being converted, any unrecognized entities are escaped.""" x = match.group(1) - if builder.convertHTMLEntities and x in name2codepoint: + if builder.convert_html_entities and x in name2codepoint: return unichr(name2codepoint[x]) elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if builder.convertXMLEntities: + if builder.convert_xml_entities: return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] else: return u'&%s;' % x diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py index a4156cd..fbdcedb 100644 --- a/src/beautifulsoup/tests/helpers.py +++ b/src/beautifulsoup/tests/helpers.py @@ -66,15 +66,43 @@ class BuilderSmokeTest(SoupTest): self.assertEqual(blockquote.p.b.string, 'Foo') self.assertEqual(blockquote.b.string, 'Foo') + def test_collapsed_whitespace(self): + """In most tags, whitespace is collapsed.""" + self.assertSoupEquals("

", "

") + + def test_preserved_whitespace_in_pre_and_textarea(self): + """In
 and ")
+
+
+    def test_single_quote_attribute_values_become_double_quotes(self):
+        self.assertSoupEquals("",
+                              '')
+
+    def test_attribute_values_with_nested_quotes_are_left_alone(self):
+        text = """a"""
+        self.assertSoupEquals(text)
+
+    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
+        text = """a"""
+        soup = self.soup(text)
+        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+        self.assertSoupEquals(
+            soup.foo.decode(),
+            """a""")
+
+    def test_ampersand_in_attribute_value_gets_quoted(self):
+        self.assertSoupEquals('',
+                              '')
+
 
 class BuilderInvalidMarkupSmokeTest(SoupTest):
     """Tests of invalid markup.
 
     These are very likely to give different results for different tree
-    builders.
-
-    It's not required that a tree builder handle invalid markup at
-    all.
+    builders. It's not required that a tree builder handle invalid
+    markup at all.
     """
 
     def test_unclosed_block_level_elements(self):
@@ -82,3 +110,17 @@ class BuilderInvalidMarkupSmokeTest(SoupTest):
         self.assertSoupEquals(
             '

Foo

Bar', '

Foo

Bar

') + + def test_fake_self_closing_tag(self): + # If a self-closing tag presents as a normal tag, the 'open' + # tag is treated as an instance of the self-closing tag and + # the 'close' tag is ignored. + self.assertSoupEquals( + "http://foo.com/", + "http://foo.com/") + + def test_boolean_attribute_with_no_value_gets_empty_value(self): + soup = self.soup("
foo
") + self.assertEquals(soup.table.td['nowrap'], '') + + diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index c35d10b..5724247 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -439,25 +439,6 @@ class TheManWithoutAttributes(SoupTest): text = "" self.assertTrue(BeautifulSoup(text).foo.has_key('attr')) -class QuoteMeOnThat(SoupTest): - "Test quoting" - def testQuotedAttributeValues(self): - self.assertSoupEquals("", - '') - - text = """a""" - soup = BeautifulSoup(text) - self.assertEquals(soup.decode(), text) - - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - newText = """a""" - self.assertSoupEquals(soup.decode(), newText) - - self.assertSoupEquals('', - '') - - - class YoureSoLiteral(SoupTest): "Test literal mode." def testLiteralMode(self): @@ -536,16 +517,6 @@ class CleanupOnAisleFour(SoupTest): self.assertEqual(soup.decode(), '

test1test2

') - def testSelfClosingTagOrNot(self): - text = "http://foo.com/" - self.assertEqual(BeautifulStoneSoup(text).decode(), text) - self.assertEqual(BeautifulSoup(text).decode(), - 'http://foo.com/') - - def testBooleanAttributes(self): - text = "foo" - self.assertSoupEquals(text, text) - def testCData(self): xml = "foobar" self.assertSoupEquals(xml, xml) @@ -825,29 +796,5 @@ class EncodeRed(SoupTest): self.assertSoupEquals(utf_8, encoding='utf-8') -class Whitewash(SoupTest): - """Test whitespace preservation.""" - - def testPreservedWhitespace(self): - self.assertSoupEquals("
   
") - self.assertSoupEquals("
 woo  
") - - def testCollapsedWhitespace(self): - self.assertSoupEquals("

", "

") - - -class AlternateBuilders(SoupTest): - """Test alternate builders.""" - - def testICantBelieveItsValidHTML(self): - builder = ICantBelieveItsValidHTMLTreeBuilder() - markup = "FooBar" - - soup = BeautifulSoup(markup) - self.assertEquals(soup.decode(), "FooBar") - - soup = BeautifulSoup(markup, builder=builder) - self.assertEquals(soup.decode(), markup) - if __name__ == '__main__': unittest.main() -- cgit v1.2.3