summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-27 22:12:34 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-27 22:12:34 -0500
commit710ee3323074c1432ece18b1eb9a40cacc0c601d (patch)
tree0e1aecad650cdfd499878d7399758e86705783fe
parent561b294b2f49bd6c752ae090056e2694dae79d49 (diff)
parent082a8c84a79fa33ea23c159495005ebe9a39cbf4 (diff)
Added a bunch of tests to verify that BS4 fixes various bugs.
-rw-r--r--tests/test_html5lib.py9
-rw-r--r--tests/test_lxml.py19
2 files changed, 28 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index b9bdd1d..ac99832 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -200,6 +200,15 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
soup = self.soup("<p>foo&#100;baz</p>")
self.assertEquals(soup.p.string, "foodbaz")
+ def test_entity_out_of_range(self):
+ # An entity that's out of range will be converted to
+ # REPLACEMENT CHARACTER.
+ soup = self.soup("<p>&#10000000000000;</p>")
+ self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}")
+
+ soup = self.soup("<p>&#x1000000000000;</p>")
+ self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}")
+
class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
@property
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 7e83eff..4b3df07 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -355,6 +355,12 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
'<table><tr><table><tr id="nested">',
'<table><tr><table><tr id="nested"></tr></table></tr></table>')
+ def test_paragraphs_containing_block_display_elements(self):
+ markup = self.soup("<p>this is the definition:"
+ "<dl><dt>first case</dt>")
+ # The <p> tag is closed before the <dl> tag begins.
+ self.assertEquals(markup.p.contents, ["this is the definition:"])
+
def test_empty_element_tag_with_contents(self):
self.assertSoupEquals("<br>foo</br>", "<br />foo")
@@ -400,6 +406,10 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
self.assertEquals(soup.a['bar'], '')
self.assertEquals(soup.a.string, "baz")
+ def test_unquoted_attribute_value(self):
+ soup = self.soup('<a style={height:21px;}></a>')
+ self.assertEquals(soup.a['style'], '{height:21px;}')
+
def test_attribute_value_with_embedded_brackets(self):
soup = self.soup('<a b="<a>">')
self.assertEquals(soup.a['b'], '<a>')
@@ -415,6 +425,15 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
# Also compare html5lib, which preserves the &# before the
# entity name.
+ def test_entity_out_of_range(self):
+ # An entity that's out of range will be ignored.
+ soup = self.soup("<p>&#10000000000000;</p>")
+ self.assertEquals(soup.p.string, None)
+
+ soup = self.soup("<p>&#x1000000000000;</p>")
+ self.assertEquals(soup.p.string, None)
+
+
def test_entity_was_not_finished(self):
soup = self.soup("<p>&lt;Hello&gt")
# Compare html5lib, which completes the entity.