diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/beautifulsoup/builder/__init__.py | 2 | ||||
-rw-r--r-- | src/beautifulsoup/element.py | 4 | ||||
-rw-r--r-- | src/beautifulsoup/tests/helpers.py | 50 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_soup.py | 53 |
4 files changed, 50 insertions, 59 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py index b7db8db..8294c0c 100644 --- a/src/beautifulsoup/builder/__init__.py +++ b/src/beautifulsoup/builder/__init__.py @@ -11,6 +11,8 @@ class TreeBuilder(Entities): assume_html = False smart_quotes_to = Entities.XML_ENTITIES + convert_html_entities = True + convert_xml_entities = True def __init__(self): self.soup = None diff --git a/src/beautifulsoup/element.py b/src/beautifulsoup/element.py index b5b8e84..7649b4c 100644 --- a/src/beautifulsoup/element.py +++ b/src/beautifulsoup/element.py @@ -383,10 +383,10 @@ class Tag(PageElement, Entities): entities are being converted, any unrecognized entities are escaped.""" x = match.group(1) - if builder.convertHTMLEntities and x in name2codepoint: + if builder.convert_html_entities and x in name2codepoint: return unichr(name2codepoint[x]) elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if builder.convertXMLEntities: + if builder.convert_xml_entities: return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] else: return u'&%s;' % x diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py index a4156cd..fbdcedb 100644 --- a/src/beautifulsoup/tests/helpers.py +++ b/src/beautifulsoup/tests/helpers.py @@ -66,15 +66,43 @@ class BuilderSmokeTest(SoupTest): self.assertEqual(blockquote.p.b.string, 'Foo') self.assertEqual(blockquote.b.string, 'Foo') + def test_collapsed_whitespace(self): + """In most tags, whitespace is collapsed.""" + self.assertSoupEquals("<p> </p>", "<p> </p>") + + def test_preserved_whitespace_in_pre_and_textarea(self): + """In <pre> and <textarea> tags, whitespace is preserved.""" + self.assertSoupEquals("<pre> </pre>") + self.assertSoupEquals("<textarea> woo </textarea>") + + + def test_single_quote_attribute_values_become_double_quotes(self): + self.assertSoupEquals("<foo attr='bar'></foo>", + '<foo attr="bar"></foo>') + + def test_attribute_values_with_nested_quotes_are_left_alone(self): + text = """<foo attr='bar "brawls" happen'>a</foo>""" + self.assertSoupEquals(text) + + def test_attribute_values_with_double_nested_quotes_get_quoted(self): + text = """<foo attr='bar "brawls" happen'>a</foo>""" + soup = self.soup(text) + soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' + self.assertSoupEquals( + soup.foo.decode(), + """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""") + + def test_ampersand_in_attribute_value_gets_quoted(self): + self.assertSoupEquals('<this is="really messed up & stuff"></this>', + '<this is="really messed up & stuff"></this>') + class BuilderInvalidMarkupSmokeTest(SoupTest): """Tests of invalid markup. These are very likely to give different results for different tree - builders. - - It's not required that a tree builder handle invalid markup at - all. + builders. It's not required that a tree builder handle invalid + markup at all. """ def test_unclosed_block_level_elements(self): @@ -82,3 +110,17 @@ class BuilderInvalidMarkupSmokeTest(SoupTest): self.assertSoupEquals( '<blockquote><p><b>Foo</blockquote><p>Bar', '<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>') + + def test_fake_self_closing_tag(self): + # If a self-closing tag presents as a normal tag, the 'open' + # tag is treated as an instance of the self-closing tag and + # the 'close' tag is ignored. + self.assertSoupEquals( + "<item><link>http://foo.com/</link></item>", + "<item><link />http://foo.com/</item>") + + def test_boolean_attribute_with_no_value_gets_empty_value(self): + soup = self.soup("<table><td nowrap>foo</td></table>") + self.assertEquals(soup.table.td['nowrap'], '') + + diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index c35d10b..5724247 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -439,25 +439,6 @@ class TheManWithoutAttributes(SoupTest): text = "<foo attr='bar'>" self.assertTrue(BeautifulSoup(text).foo.has_key('attr')) -class QuoteMeOnThat(SoupTest): - "Test quoting" - def testQuotedAttributeValues(self): - self.assertSoupEquals("<foo attr='bar'></foo>", - '<foo attr="bar"></foo>') - - text = """<foo attr='bar "brawls" happen'>a</foo>""" - soup = BeautifulSoup(text) - self.assertEquals(soup.decode(), text) - - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - newText = """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""" - self.assertSoupEquals(soup.decode(), newText) - - self.assertSoupEquals('<this is="really messed up & stuff">', - '<this is="really messed up & stuff"></this>') - - - class YoureSoLiteral(SoupTest): "Test literal mode." def testLiteralMode(self): @@ -536,16 +517,6 @@ class CleanupOnAisleFour(SoupTest): self.assertEqual(soup.decode(), '<p>test1<selfclosing />test2</p>') - def testSelfClosingTagOrNot(self): - text = "<item><link>http://foo.com/</link></item>" - self.assertEqual(BeautifulStoneSoup(text).decode(), text) - self.assertEqual(BeautifulSoup(text).decode(), - '<item><link />http://foo.com/</item>') - - def testBooleanAttributes(self): - text = "<td nowrap>foo</td>" - self.assertSoupEquals(text, text) - def testCData(self): xml = "<root>foo<![CDATA[foobar]]>bar</root>" self.assertSoupEquals(xml, xml) @@ -825,29 +796,5 @@ class EncodeRed(SoupTest): self.assertSoupEquals(utf_8, encoding='utf-8') -class Whitewash(SoupTest): - """Test whitespace preservation.""" - - def testPreservedWhitespace(self): - self.assertSoupEquals("<pre> </pre>") - self.assertSoupEquals("<pre> woo </pre>") - - def testCollapsedWhitespace(self): - self.assertSoupEquals("<p> </p>", "<p> </p>") - - -class AlternateBuilders(SoupTest): - """Test alternate builders.""" - - def testICantBelieveItsValidHTML(self): - builder = ICantBelieveItsValidHTMLTreeBuilder() - markup = "<b>Foo<b>Bar</b></b>" - - soup = BeautifulSoup(markup) - self.assertEquals(soup.decode(), "<b>Foo</b><b>Bar</b>") - - soup = BeautifulSoup(markup, builder=builder) - self.assertEquals(soup.decode(), markup) - if __name__ == '__main__': unittest.main() |