diff options
Diffstat (limited to 'tests/test_html5lib.py')
-rw-r--r-- | tests/test_html5lib.py | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 3a4ee27..dada900 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -19,11 +19,32 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals( "A bare string", "A bare string") + def test_correctly_nested_tables(self): + markup = ('<table id="1">' + '<tr>' + "<td>Here's another table:" + '<table id="2">' + '<tr><td>foo</td></tr>' + '</table></td>') + + self.assertSoupEquals( + markup, + '<table id="1"><tbody><tr><td>Here\'s another table:' + '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>' + '</td></tr></tbody></table>') + + self.assertSoupEquals( + "<table><thead><tr><td>Foo</td></tr></thead>" + "<tbody><tr><td>Bar</td></tr></tbody>" + "<tfoot><tr><td>Baz</td></tr></tfoot></table>") + def test_collapsed_whitespace(self): """Whitespace is preserved even in tags that don't require it.""" self.assertSoupEquals("<p> </p>") self.assertSoupEquals("<b> </b>") + def test_cdata(self): + print self.soup("<div><![CDATA[foo]]></div>") class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): """See `BuilderInvalidMarkupSmokeTest`.""" @@ -40,12 +61,21 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): '<blockquote><p><b>Foo</blockquote><p>Bar', '<blockquote><p><b>Foo</b></p></blockquote><p><b>Bar</b></p>') + def test_table_containing_bare_markup(self): + # Markup should be in table cells, not directly in the table. + self.assertSoupEquals("<table><div>Foo</div></table>", + "<div>Foo</div><table></table>") + def test_incorrectly_nested_tables(self): self.assertSoupEquals( '<table><tr><table><tr id="nested">', ('<table><tbody><tr></tr></tbody></table>' '<table><tbody><tr id="nested"></tr></tbody></table>')) + def test_doctype_in_body(self): + markup = "<p>one<!DOCTYPE foobar>two</p>" + self.assertSoupEquals(markup, "<p>onetwo</p>") + def test_foo(self): isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" soup = self.soup(isolatin) @@ -53,4 +83,4 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) utf8 = utf8.replace("\xe9", "\xc3\xa9") - print soup + #print soup |