summaryrefslogtreecommitdiff
path: root/tests/test_html5lib.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-10 12:32:19 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-10 12:32:19 -0500
commit261f981dc4e62a41bb0e85285d1f40927b34e3d3 (patch)
tree0c58d7bac3f18c43316a0f1e1651c5928e0c1f9f /tests/test_html5lib.py
parentbb9d9c5dc0af0deefc1a77542c007b7040aa55bb (diff)
Added tests illustrating the different ways lxml and html5lib handle nested tables.
Diffstat (limited to 'tests/test_html5lib.py')
-rw-r--r--tests/test_html5lib.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 3a4ee27..ef38f9f 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -19,6 +19,32 @@ class TestHTML5Builder(TestLXMLBuilder):
self.assertSoupEquals(
"A bare string", "A bare string")
+ def test_nested_tables(self):
+ # See TestLXMLBuilder for TABLE_MARKUP_1 and
+ # TABLE_MARKUP_2. They're both nested tables where the
+ # top-level <table> and <tr> aren't closed. In TABLE_MARKUP_1
+ # the second table is within a <td> tag. In
+ # TABLE_MARKUP_2, the second table is floating inside a <tr> tag.
+ #
+ # html5lib adds <tbody> tags to each table. It treats
+ # TABLE_MARKUP_1 as a nested table, and TABLE_MARKUP_2 as two
+ # different tables.
+ self.assertSoupEquals(
+ self.TABLE_MARKUP_1,
+ '<table id="1"><tbody>'
+ "<tr><td>Here's another table:"
+ '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
+ "</td></tr></tbody></table>"
+ )
+
+ self.assertSoupEquals(
+ self.TABLE_MARKUP_2,
+ '<table id="1"><tbody>'
+ "<tr><td>Here's another table:</td></tr>"
+ '</tbody></table>'
+ '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
+ )
+
def test_collapsed_whitespace(self):
"""Whitespace is preserved even in tags that don't require it."""
self.assertSoupEquals("<p> </p>")