summaryrefslogtreecommitdiff
path: root/bs4/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests')
-rw-r--r--bs4/tests/test_html5lib.py26
-rw-r--r--bs4/tests/test_lxml.py29
2 files changed, 55 insertions, 0 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 5b1d1e4..dcbd204 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -104,12 +104,38 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
self.assertSoupEquals("<table><div>Foo</div></table>",
"<div>Foo</div><table></table>")
+ def test_unclosed_a_tag(self):
+ # n.b. the whitespace is important here.
+ markup = """<div id="1">
+ <a href="foo">
+</div>
+<div id="2">
+ <div id="3">
+ <a href="bar"></a>
+ </div>
+</div>"""
+
+ expect = """<div id="1">
+ <a href="foo">
+</a></div><a href="foo">
+</a><div id="2"><a href="foo">
+ </a><div id="3"><a href="foo">
+ </a><a href="bar"></a>
+ </div>
+</div>"""
+ self.assertSoupEquals(markup, expect)
+
def test_incorrectly_nested_tables(self):
self.assertSoupEquals(
'<table><tr><table><tr id="nested">',
('<table><tbody><tr></tr></tbody></table>'
'<table><tbody><tr id="nested"></tr></tbody></table>'))
+ def test_floating_text_in_table(self):
+ self.assertSoupEquals(
+ "<table><td></td>foo<td>bar</td></table>",
+ "foo<table><tbody><tr><td></td><td>bar</td></tr></tbody></table>")
+
def test_empty_element_tag_with_contents(self):
self.assertSoupEquals("<br>foo</br>", "<br />foo<br />")
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 0adef20..359f619 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -332,6 +332,32 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
'<tr><td>foo</td></tr>'
'</table></td>')
+
+ def test_unclosed_a_tag(self):
+ # <a> tags really ought to be closed at some point.
+ #
+ # We have all the <div> tags because HTML5 says to duplicate
+ # the <a> tag rather than closing it, and that's what html5lib
+ # does.
+ markup = """<div id="1">
+ <a href="foo">
+</div>
+<div id="2">
+ <div id="3">
+ <a href="bar"></a>
+ </div>
+</div>"""
+
+ expect = """<div id="1">
+<a href="foo">
+</a></div>
+<div id="2">
+<div id="3">
+<a href="bar"></a>
+</div>
+</div>"""
+ self.assertSoupEquals(markup, expect)
+
def test_unclosed_block_level_elements(self):
# Unclosed block-level elements should be closed.
self.assertSoupEquals(
@@ -355,6 +381,9 @@ class TestLXMLBuilderInvalidMarkup(SoupTest):
'<table><tr><table><tr id="nested">',
'<table><tr><table><tr id="nested"></tr></table></tr></table>')
+ def test_floating_text_in_table(self):
+ self.assertSoupEquals("<table><td></td>foo<td>bar</td></table>")
+
def test_paragraphs_containing_block_display_elements(self):
markup = self.soup("<p>this is the definition:"
"<dl><dt>first case</dt>")