diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_html5lib.py | 40 | ||||
-rw-r--r-- | tests/test_lxml.py | 56 |
2 files changed, 46 insertions, 50 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index ef38f9f..5febe2d 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -19,31 +19,24 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals( "A bare string", "A bare string") - def test_nested_tables(self): - # See TestLXMLBuilder for TABLE_MARKUP_1 and - # TABLE_MARKUP_2. They're both nested tables where the - # top-level <table> and <tr> aren't closed. In TABLE_MARKUP_1 - # the second table is within a <td> tag. In - # TABLE_MARKUP_2, the second table is floating inside a <tr> tag. - # - # html5lib adds <tbody> tags to each table. It treats - # TABLE_MARKUP_1 as a nested table, and TABLE_MARKUP_2 as two - # different tables. + def test_correctly_nested_tables(self): + markup = ('<table id="1">' + '<tr>' + "<td>Here's another table:" + '<table id="2">' + '<tr><td>foo</td></tr>' + '</table></td>') + self.assertSoupEquals( - self.TABLE_MARKUP_1, - '<table id="1"><tbody>' - "<tr><td>Here's another table:" + markup, + '<table id="1"><tbody><tr><td>Here\'s another table:' '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>' - "</td></tr></tbody></table>" - ) + '</td></tr></tbody></table>') self.assertSoupEquals( - self.TABLE_MARKUP_2, - '<table id="1"><tbody>' - "<tr><td>Here's another table:</td></tr>" - '</tbody></table>' - '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>' - ) + "<table><thead><tr><td>Foo</td></tr></thead>" + "<tbody><tr><td>Bar</td></tr></tbody>" + "<tfoot><tr><td>Baz</td></tr></tfoot></table>") def test_collapsed_whitespace(self): """Whitespace is preserved even in tags that don't require it.""" @@ -66,6 +59,11 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): '<blockquote><p><b>Foo</blockquote><p>Bar', '<blockquote><p><b>Foo</b></p></blockquote><p><b>Bar</b></p>') + def test_table_containing_bare_markup(self): + # Markup should be in table cells, not directly in the table. + self.assertSoupEquals("<table><div>Foo</div></table>", + "<div>Foo</div><table></table>") + def test_incorrectly_nested_tables(self): self.assertSoupEquals( '<table><tr><table><tr id="nested">', diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 76bcd32..2af952f 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -76,40 +76,24 @@ class TestLXMLBuilder(SoupTest): '<tr><td>foo</td></tr>' '</table></td>') - # This is the same as TABLE_MARKUP_1, but the nested table is - # floating freely rather than being inside a <td> cell. - TABLE_MARKUP_2 = ('<table id="1">' - '<tr>' - "<td>Here's another table:</td>" - '<table id="2">' - '<tr><td>foo</td></tr>' - '</table></td>') + def test_correctly_nested_tables(self): + markup = ('<table id="1">' + '<tr>' + "<td>Here's another table:" + '<table id="2">' + '<tr><td>foo</td></tr>' + '</table></td>') - - def test_nested_tables(self): - # lxml closes the <tr> and <table> tags that weren't closed by - # TABLE_MARKUP. Unlike html5lib, it treats both bits of markup - # as nested tables. self.assertSoupEquals( - self.TABLE_MARKUP_1, - '<table id="1">' - '<tr>' - "<td>Here's another table:" - '<table id="2">' - '<tr><td>foo</td></tr>' - '</table>' + markup, + '<table id="1"><tr><td>Here\'s another table:' + '<table id="2"><tr><td>foo</td></tr></table>' '</td></tr></table>') self.assertSoupEquals( - self.TABLE_MARKUP_2, - '<table id="1">' - '<tr>' - "<td>Here's another table:</td>" - '<table id="2">' - '<tr><td>foo</td></tr>' - '</table>' - '</tr></table>') - + "<table><thead><tr><td>Foo</td></tr></thead>" + "<tbody><tr><td>Bar</td></tr></tbody>" + "<tfoot><tr><td>Baz</td></tr></tfoot></table>") def test_collapsed_whitespace(self): """In most tags, whitespace is collapsed.""" @@ -240,6 +224,20 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): markup at all. """ + def test_table_containing_bare_markup(self): + # Markup should be in table cells, not directly in the table. + self.assertSoupEquals("<table><div>Foo</div></table>") + + def test_incorrectly_nested_table(self): + # The second <table> tag is floating in the <tr> tag + # rather than being inside a <td>. + bad_markup = ('<table id="1">' + '<tr>' + "<td>Here's another table:</td>" + '<table id="2">' + '<tr><td>foo</td></tr>' + '</table></td>') + def test_unclosed_block_level_elements(self): # Unclosed block-level elements should be closed. self.assertSoupEquals( |