summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/beautifulsoup/builder/__init__.py2
-rw-r--r--src/beautifulsoup/element.py4
-rw-r--r--src/beautifulsoup/tests/helpers.py50
-rw-r--r--src/beautifulsoup/tests/test_soup.py53
4 files changed, 50 insertions, 59 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py
index b7db8db..8294c0c 100644
--- a/src/beautifulsoup/builder/__init__.py
+++ b/src/beautifulsoup/builder/__init__.py
@@ -11,6 +11,8 @@ class TreeBuilder(Entities):
assume_html = False
smart_quotes_to = Entities.XML_ENTITIES
+ convert_html_entities = True
+ convert_xml_entities = True
def __init__(self):
self.soup = None
diff --git a/src/beautifulsoup/element.py b/src/beautifulsoup/element.py
index b5b8e84..7649b4c 100644
--- a/src/beautifulsoup/element.py
+++ b/src/beautifulsoup/element.py
@@ -383,10 +383,10 @@ class Tag(PageElement, Entities):
entities are being converted, any unrecognized entities are
escaped."""
x = match.group(1)
- if builder.convertHTMLEntities and x in name2codepoint:
+ if builder.convert_html_entities and x in name2codepoint:
return unichr(name2codepoint[x])
elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
- if builder.convertXMLEntities:
+ if builder.convert_xml_entities:
return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
else:
return u'&%s;' % x
diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py
index a4156cd..fbdcedb 100644
--- a/src/beautifulsoup/tests/helpers.py
+++ b/src/beautifulsoup/tests/helpers.py
@@ -66,15 +66,43 @@ class BuilderSmokeTest(SoupTest):
self.assertEqual(blockquote.p.b.string, 'Foo')
self.assertEqual(blockquote.b.string, 'Foo')
+ def test_collapsed_whitespace(self):
+ """In most tags, whitespace is collapsed."""
+ self.assertSoupEquals("<p> </p>", "<p> </p>")
+
+ def test_preserved_whitespace_in_pre_and_textarea(self):
+ """In <pre> and <textarea> tags, whitespace is preserved."""
+ self.assertSoupEquals("<pre> </pre>")
+ self.assertSoupEquals("<textarea> woo </textarea>")
+
+
+ def test_single_quote_attribute_values_become_double_quotes(self):
+ self.assertSoupEquals("<foo attr='bar'></foo>",
+ '<foo attr="bar"></foo>')
+
+ def test_attribute_values_with_nested_quotes_are_left_alone(self):
+ text = """<foo attr='bar "brawls" happen'>a</foo>"""
+ self.assertSoupEquals(text)
+
+ def test_attribute_values_with_double_nested_quotes_get_quoted(self):
+ text = """<foo attr='bar "brawls" happen'>a</foo>"""
+ soup = self.soup(text)
+ soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+ self.assertSoupEquals(
+ soup.foo.decode(),
+ """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>""")
+
+ def test_ampersand_in_attribute_value_gets_quoted(self):
+ self.assertSoupEquals('<this is="really messed up & stuff"></this>',
+ '<this is="really messed up &amp; stuff"></this>')
+
class BuilderInvalidMarkupSmokeTest(SoupTest):
"""Tests of invalid markup.
These are very likely to give different results for different tree
- builders.
-
- It's not required that a tree builder handle invalid markup at
- all.
+ builders. It's not required that a tree builder handle invalid
+ markup at all.
"""
def test_unclosed_block_level_elements(self):
@@ -82,3 +110,17 @@ class BuilderInvalidMarkupSmokeTest(SoupTest):
self.assertSoupEquals(
'<blockquote><p><b>Foo</blockquote><p>Bar',
'<blockquote><p><b>Foo</b></p></blockquote><p>Bar</p>')
+
+ def test_fake_self_closing_tag(self):
+ # If a self-closing tag presents as a normal tag, the 'open'
+ # tag is treated as an instance of the self-closing tag and
+ # the 'close' tag is ignored.
+ self.assertSoupEquals(
+ "<item><link>http://foo.com/</link></item>",
+ "<item><link />http://foo.com/</item>")
+
+ def test_boolean_attribute_with_no_value_gets_empty_value(self):
+ soup = self.soup("<table><td nowrap>foo</td></table>")
+ self.assertEquals(soup.table.td['nowrap'], '')
+
+
diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py
index c35d10b..5724247 100644
--- a/src/beautifulsoup/tests/test_soup.py
+++ b/src/beautifulsoup/tests/test_soup.py
@@ -439,25 +439,6 @@ class TheManWithoutAttributes(SoupTest):
text = "<foo attr='bar'>"
self.assertTrue(BeautifulSoup(text).foo.has_key('attr'))
-class QuoteMeOnThat(SoupTest):
- "Test quoting"
- def testQuotedAttributeValues(self):
- self.assertSoupEquals("<foo attr='bar'></foo>",
- '<foo attr="bar"></foo>')
-
- text = """<foo attr='bar "brawls" happen'>a</foo>"""
- soup = BeautifulSoup(text)
- self.assertEquals(soup.decode(), text)
-
- soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
- newText = """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>"""
- self.assertSoupEquals(soup.decode(), newText)
-
- self.assertSoupEquals('<this is="really messed up & stuff">',
- '<this is="really messed up &amp; stuff"></this>')
-
-
-
class YoureSoLiteral(SoupTest):
"Test literal mode."
def testLiteralMode(self):
@@ -536,16 +517,6 @@ class CleanupOnAisleFour(SoupTest):
self.assertEqual(soup.decode(),
'<p>test1<selfclosing />test2</p>')
- def testSelfClosingTagOrNot(self):
- text = "<item><link>http://foo.com/</link></item>"
- self.assertEqual(BeautifulStoneSoup(text).decode(), text)
- self.assertEqual(BeautifulSoup(text).decode(),
- '<item><link />http://foo.com/</item>')
-
- def testBooleanAttributes(self):
- text = "<td nowrap>foo</td>"
- self.assertSoupEquals(text, text)
-
def testCData(self):
xml = "<root>foo<![CDATA[foobar]]>bar</root>"
self.assertSoupEquals(xml, xml)
@@ -825,29 +796,5 @@ class EncodeRed(SoupTest):
self.assertSoupEquals(utf_8, encoding='utf-8')
-class Whitewash(SoupTest):
- """Test whitespace preservation."""
-
- def testPreservedWhitespace(self):
- self.assertSoupEquals("<pre> </pre>")
- self.assertSoupEquals("<pre> woo </pre>")
-
- def testCollapsedWhitespace(self):
- self.assertSoupEquals("<p> </p>", "<p> </p>")
-
-
-class AlternateBuilders(SoupTest):
- """Test alternate builders."""
-
- def testICantBelieveItsValidHTML(self):
- builder = ICantBelieveItsValidHTMLTreeBuilder()
- markup = "<b>Foo<b>Bar</b></b>"
-
- soup = BeautifulSoup(markup)
- self.assertEquals(soup.decode(), "<b>Foo</b><b>Bar</b>")
-
- soup = BeautifulSoup(markup, builder=builder)
- self.assertEquals(soup.decode(), markup)
-
if __name__ == '__main__':
unittest.main()