diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_html5lib.py | 26 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 29 |
2 files changed, 55 insertions, 0 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 5b1d1e4..dcbd204 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -104,12 +104,38 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): self.assertSoupEquals("<table><div>Foo</div></table>", "<div>Foo</div><table></table>") + def test_unclosed_a_tag(self): + # n.b. the whitespace is important here. + markup = """<div id="1"> + <a href="foo"> +</div> +<div id="2"> + <div id="3"> + <a href="bar"></a> + </div> +</div>""" + + expect = """<div id="1"> + <a href="foo"> +</a></div><a href="foo"> +</a><div id="2"><a href="foo"> + </a><div id="3"><a href="foo"> + </a><a href="bar"></a> + </div> +</div>""" + self.assertSoupEquals(markup, expect) + def test_incorrectly_nested_tables(self): self.assertSoupEquals( '<table><tr><table><tr id="nested">', ('<table><tbody><tr></tr></tbody></table>' '<table><tbody><tr id="nested"></tr></tbody></table>')) + def test_floating_text_in_table(self): + self.assertSoupEquals( + "<table><td></td>foo<td>bar</td></table>", + "foo<table><tbody><tr><td></td><td>bar</td></tr></tbody></table>") + def test_empty_element_tag_with_contents(self): self.assertSoupEquals("<br>foo</br>", "<br />foo<br />") diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 0adef20..359f619 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -332,6 +332,32 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): '<tr><td>foo</td></tr>' '</table></td>') + + def test_unclosed_a_tag(self): + # <a> tags really ought to be closed at some point. + # + # We have all the <div> tags because HTML5 says to duplicate + # the <a> tag rather than closing it, and that's what html5lib + # does. + markup = """<div id="1"> + <a href="foo"> +</div> +<div id="2"> + <div id="3"> + <a href="bar"></a> + </div> +</div>""" + + expect = """<div id="1"> +<a href="foo"> +</a></div> +<div id="2"> +<div id="3"> +<a href="bar"></a> +</div> +</div>""" + self.assertSoupEquals(markup, expect) + def test_unclosed_block_level_elements(self): # Unclosed block-level elements should be closed. self.assertSoupEquals( @@ -355,6 +381,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest): '<table><tr><table><tr id="nested">', '<table><tr><table><tr id="nested"></tr></table></tr></table>') + def test_floating_text_in_table(self): + self.assertSoupEquals("<table><td></td>foo<td>bar</td></table>") + def test_paragraphs_containing_block_display_elements(self): markup = self.soup("<p>this is the definition:" "<dl><dt>first case</dt>") |