From 261f981dc4e62a41bb0e85285d1f40927b34e3d3 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 10 Feb 2011 12:32:19 -0500 Subject: Added tests illustrating the different ways lxml and html5lib handle nested tables. --- tests/test_html5lib.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 3a4ee27..ef38f9f 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -19,6 +19,32 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals( "A bare string", "A bare string") + def test_nested_tables(self): + # See TestLXMLBuilder for TABLE_MARKUP_1 and + # TABLE_MARKUP_2. They're both nested tables where the + # top-level and aren't closed. In TABLE_MARKUP_1 + # the second table is within a tag. + # + # html5lib adds tags to each table. It treats + # TABLE_MARKUP_1 as a nested table, and TABLE_MARKUP_2 as two + # different tables. + self.assertSoupEquals( + self.TABLE_MARKUP_1, + '
tag. In + # TABLE_MARKUP_2, the second table is floating inside a
' + "
Here's another table:" + '
foo
' + "
" + ) + + self.assertSoupEquals( + self.TABLE_MARKUP_2, + '' + "" + '
Here's another table:
' + '
foo
' + ) + def test_collapsed_whitespace(self): """Whitespace is preserved even in tags that don't require it.""" self.assertSoupEquals("

") -- cgit v1.2.3 From d444427275459c6be2dc255274831bae26eb5e04 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 10 Feb 2011 13:13:50 -0500 Subject: Added more table tests. --- tests/test_html5lib.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index ef38f9f..5febe2d 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -19,31 +19,24 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals( "A bare string", "A bare string") - def test_nested_tables(self): - # See TestLXMLBuilder for TABLE_MARKUP_1 and - # TABLE_MARKUP_2. They're both nested tables where the - # top-level and aren't closed. In TABLE_MARKUP_1 - # the second table is within a tag. - # - # html5lib adds tags to each table. It treats - # TABLE_MARKUP_1 as a nested table, and TABLE_MARKUP_2 as two - # different tables. + def test_correctly_nested_tables(self): + markup = ('
tag. In - # TABLE_MARKUP_2, the second table is floating inside a
' + '' + "') + self.assertSoupEquals( - self.TABLE_MARKUP_1, - '
Here's another table:" + '' + '' + '
foo
' - "
Here's another table:" + markup, + '
Here\'s another table:' '
foo
' - "
" - ) + '
') self.assertSoupEquals( - self.TABLE_MARKUP_2, - '' - "" - '
Here's another table:
' - '
foo
' - ) + "" + "" + "
Foo
Bar
Baz
") def test_collapsed_whitespace(self): """Whitespace is preserved even in tags that don't require it.""" @@ -66,6 +59,11 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): '

Foo

Bar', '

Foo

Bar

') + def test_table_containing_bare_markup(self): + # Markup should be in table cells, not directly in the table. + self.assertSoupEquals("
Foo
", + "
Foo
") + def test_incorrectly_nested_tables(self): self.assertSoupEquals( '
', -- cgit v1.2.3 From e1ad4220e5ca00ec0e7f77ce5087845fcb356a0e Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 10 Feb 2011 13:18:57 -0500 Subject: Commented out annoying print statement. --- tests/test_html5lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 5febe2d..3e35949 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -77,4 +77,4 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) utf8 = utf8.replace("\xe9", "\xc3\xa9") - print soup + #print soup -- cgit v1.2.3 From d89c8878ea86a2575c87e9fad8081cfcd81e0bcd Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 10 Feb 2011 16:41:10 -0500 Subject: Added some elementary doctype handling. --- tests/test_html5lib.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 3e35949..dada900 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -43,6 +43,8 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals("

") self.assertSoupEquals(" ") + def test_cdata(self): + print self.soup("
") class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): """See `BuilderInvalidMarkupSmokeTest`.""" @@ -70,6 +72,10 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): ('
' '
')) + def test_doctype_in_body(self): + markup = "

onetwo

" + self.assertSoupEquals(markup, "

onetwo

") + def test_foo(self): isolatin = """Sacr\xe9 bleu!""" soup = self.soup(isolatin) -- cgit v1.2.3