diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-15 12:06:50 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-15 12:06:50 -0500 |
commit | 0f6d3cfbef6fc0b90f0e9fbe58408e00c2383070 (patch) | |
tree | dbf03213f941021448c1a853d8aa81617cd438d8 /bs4/tests/test_lxml.py | |
parent | 15f9dc784c352f2928c19460107095651f6fb624 (diff) | |
parent | 8cd893c5094e96c7bcdaa735356f4d803210ef34 (diff) |
Fixed up html5lib tree builder.
Diffstat (limited to 'bs4/tests/test_lxml.py')
-rw-r--r-- | bs4/tests/test_lxml.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 0adef20..359f619 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -332,6 +332,32 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): '<tr><td>foo</td></tr>' '</table></td>') + + def test_unclosed_a_tag(self): + # <a> tags really ought to be closed at some point. + # + # We have all the <div> tags because HTML5 says to duplicate + # the <a> tag rather than closing it, and that's what html5lib + # does. + markup = """<div id="1"> + <a href="foo"> +</div> +<div id="2"> + <div id="3"> + <a href="bar"></a> + </div> +</div>""" + + expect = """<div id="1"> +<a href="foo"> +</a></div> +<div id="2"> +<div id="3"> +<a href="bar"></a> +</div> +</div>""" + self.assertSoupEquals(markup, expect) + def test_unclosed_block_level_elements(self): # Unclosed block-level elements should be closed. self.assertSoupEquals( @@ -355,6 +381,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): '<table><tr><table><tr id="nested">', '<table><tr><table><tr id="nested"></tr></table></tr></table>') + def test_floating_text_in_table(self): + self.assertSoupEquals("<table><td></td>foo<td>bar</td></table>") + def test_paragraphs_containing_block_display_elements(self): markup = self.soup("<p>this is the definition:" "<dl><dt>first case</dt>") |