summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/tests/test_html5lib.py21
-rw-r--r--bs4/tests/test_lxml.py26
2 files changed, 47 insertions, 0 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 5b1d1e4..d257392 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -104,6 +104,27 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
self.assertSoupEquals("<table><div>Foo</div></table>",
"<div>Foo</div><table></table>")
+ def test_unclosed_a_tag(self):
+ # n.b. the whitespace is important here.
+ markup = """<div id="1">
+ <a href="foo">
+</div>
+<div id="2">
+ <div id="3">
+ <a href="bar"></a>
+ </div>
+</div>"""
+
+ expect = """<div id="1">
+ <a href="foo">
+</a></div><a href="foo">
+</a><div id="2"><a href="foo">
+ </a><div id="3"><a href="foo">
+ </a><a href="bar"></a>
+ </div>
+</div>"""
+ self.assertSoupEquals(markup, expect)
+
def test_incorrectly_nested_tables(self):
self.assertSoupEquals(
'<table><tr><table><tr id="nested">',
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 0adef20..e2cb2d2 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -332,6 +332,32 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
'<tr><td>foo</td></tr>'
'</table></td>')
+
+ def test_unclosed_a_tag(self):
+ # <a> tags really ought to be closed at some point.
+ #
+ # We have all the <div> tags because HTML5 says to duplicate
+ # the <a> tag rather than closing it, and that's what html5lib
+ # does.
+ markup = """<div id="1">
+ <a href="foo">
+</div>
+<div id="2">
+ <div id="3">
+ <a href="bar"></a>
+ </div>
+</div>"""
+
+ expect = """<div id="1">
+<a href="foo">
+</a></div>
+<div id="2">
+<div id="3">
+<a href="bar"></a>
+</div>
+</div>"""
+ self.assertSoupEquals(markup, expect)
+
def test_unclosed_block_level_elements(self):
# Unclosed block-level elements should be closed.
self.assertSoupEquals(