From 261f981dc4e62a41bb0e85285d1f40927b34e3d3 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 10 Feb 2011 12:32:19 -0500 Subject: Added tests illustrating the different ways lxml and html5lib handle nested tables. --- tests/test_lxml.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 455c953..76bcd32 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -1,5 +1,7 @@ """Tests to ensure that the lxml tree builder generates good trees.""" +import re + from beautifulsoup import BeautifulSoup from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder from beautifulsoup.element import Comment @@ -65,6 +67,50 @@ class TestLXMLBuilder(SoupTest): self.assertEqual(blockquote.p.b.string, 'Foo') self.assertEqual(blockquote.b.string, 'Foo') + # This is a tag containing another
tag in one of its + # cells. + TABLE_MARKUP_1 = ('
' + '' + "') + + # This is the same as TABLE_MARKUP_1, but the nested table is + # floating freely rather than being inside a ') + + + def test_nested_tables(self): + # lxml closes the and
Here's another table:" + '' + '' + '
foo
cell. + TABLE_MARKUP_2 = ('' + '' + "" + '
Here's another table:
' + '' + '
foo
tags that weren't closed by + # TABLE_MARKUP. Unlike html5lib, it treats both bits of markup + # as nested tables. + self.assertSoupEquals( + self.TABLE_MARKUP_1, + '
' + '' + "
Here's another table:" + '' + '' + '
foo
' + '
') + + self.assertSoupEquals( + self.TABLE_MARKUP_2, + '' + '' + "" + '
Here's another table:
' + '' + '
foo
' + '') + + def test_collapsed_whitespace(self): """In most tags, whitespace is collapsed.""" self.assertSoupEquals("

", "

") -- cgit v1.2.3