diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-27 22:12:34 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-27 22:12:34 -0500 |
commit | 710ee3323074c1432ece18b1eb9a40cacc0c601d (patch) | |
tree | 0e1aecad650cdfd499878d7399758e86705783fe | |
parent | 561b294b2f49bd6c752ae090056e2694dae79d49 (diff) | |
parent | 082a8c84a79fa33ea23c159495005ebe9a39cbf4 (diff) |
Added a bunch of tests to verify that BS4 fixes various bugs.
-rw-r--r-- | tests/test_html5lib.py | 9 | ||||
-rw-r--r-- | tests/test_lxml.py | 19 |
2 files changed, 28 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index b9bdd1d..ac99832 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -200,6 +200,15 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): soup = self.soup("<p>foodbaz</p>") self.assertEquals(soup.p.string, "foodbaz") + def test_entity_out_of_range(self): + # An entity that's out of range will be converted to + # REPLACEMENT CHARACTER. + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}") + + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, u"\N{REPLACEMENT CHARACTER}") + class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): @property diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 7e83eff..4b3df07 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -355,6 +355,12 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): '<table><tr><table><tr id="nested">', '<table><tr><table><tr id="nested"></tr></table></tr></table>') + def test_paragraphs_containing_block_display_elements(self): + markup = self.soup("<p>this is the definition:" + "<dl><dt>first case</dt>") + # The <p> tag is closed before the <dl> tag begins. + self.assertEquals(markup.p.contents, ["this is the definition:"]) + def test_empty_element_tag_with_contents(self): self.assertSoupEquals("<br>foo</br>", "<br />foo") @@ -400,6 +406,10 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): self.assertEquals(soup.a['bar'], '') self.assertEquals(soup.a.string, "baz") + def test_unquoted_attribute_value(self): + soup = self.soup('<a style={height:21px;}></a>') + self.assertEquals(soup.a['style'], '{height:21px;}') + def test_attribute_value_with_embedded_brackets(self): soup = self.soup('<a b="<a>">') self.assertEquals(soup.a['b'], '<a>') @@ -415,6 +425,15 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): # Also compare html5lib, which preserves the &# before the # entity name. + def test_entity_out_of_range(self): + # An entity that's out of range will be ignored. + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, None) + + soup = self.soup("<p>�</p>") + self.assertEquals(soup.p.string, None) + + def test_entity_was_not_finished(self): soup = self.soup("<p><Hello>") # Compare html5lib, which completes the entity. |