summaryrefslogtreecommitdiff
path: root/tests/test_lxml.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-10 12:32:19 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-10 12:32:19 -0500
commit261f981dc4e62a41bb0e85285d1f40927b34e3d3 (patch)
tree0c58d7bac3f18c43316a0f1e1651c5928e0c1f9f /tests/test_lxml.py
parentbb9d9c5dc0af0deefc1a77542c007b7040aa55bb (diff)
Added tests illustrating the different ways lxml and html5lib handle nested tables.
Diffstat (limited to 'tests/test_lxml.py')
-rw-r--r--tests/test_lxml.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 455c953..76bcd32 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -1,5 +1,7 @@
"""Tests to ensure that the lxml tree builder generates good trees."""
+import re
+
from beautifulsoup import BeautifulSoup
from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
from beautifulsoup.element import Comment
@@ -65,6 +67,50 @@ class TestLXMLBuilder(SoupTest):
self.assertEqual(blockquote.p.b.string, 'Foo')
self.assertEqual(blockquote.b.string, 'Foo')
+ # This is a <table> tag containing another <table> tag in one of its
+ # cells.
+ TABLE_MARKUP_1 = ('<table id="1">'
+ '<tr>'
+ "<td>Here's another table:"
+ '<table id="2">'
+ '<tr><td>foo</td></tr>'
+ '</table></td>')
+
+ # This is the same as TABLE_MARKUP_1, but the nested table is
+ # floating freely rather than being inside a <td> cell.
+ TABLE_MARKUP_2 = ('<table id="1">'
+ '<tr>'
+ "<td>Here's another table:</td>"
+ '<table id="2">'
+ '<tr><td>foo</td></tr>'
+ '</table></td>')
+
+
+ def test_nested_tables(self):
+ # lxml closes the <tr> and <table> tags that weren't closed by
+ # TABLE_MARKUP. Unlike html5lib, it treats both bits of markup
+ # as nested tables.
+ self.assertSoupEquals(
+ self.TABLE_MARKUP_1,
+ '<table id="1">'
+ '<tr>'
+ "<td>Here's another table:"
+ '<table id="2">'
+ '<tr><td>foo</td></tr>'
+ '</table>'
+ '</td></tr></table>')
+
+ self.assertSoupEquals(
+ self.TABLE_MARKUP_2,
+ '<table id="1">'
+ '<tr>'
+ "<td>Here's another table:</td>"
+ '<table id="2">'
+ '<tr><td>foo</td></tr>'
+ '</table>'
+ '</tr></table>')
+
+
def test_collapsed_whitespace(self):
"""In most tags, whitespace is collapsed."""
self.assertSoupEquals("<p> </p>", "<p> </p>")