summaryrefslogtreecommitdiff
path: root/bs4/tests/test_html5lib.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-08-13 10:44:46 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-08-13 10:44:46 -0400
commit064439f3b2decfb55b4e118ac4d41851d13c4c6f (patch)
treeda0e8007734a025df3e3d024a74a3bfb2f5f3e5a /bs4/tests/test_html5lib.py
parent020b385300c75e4c3ab0c45532de272b27945c5a (diff)
* Fixed yet another problem with the html5lib tree builder, caused by
html5lib's tendency to rearrange the tree during parsing. [bug=1189267]
Diffstat (limited to 'bs4/tests/test_html5lib.py')
-rw-r--r--bs4/tests/test_html5lib.py13
1 files changed, 13 insertions, 0 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 2a3b41e..594c3e1 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -70,3 +70,16 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
soup = self.soup(markup)
# Verify that we can reach the <p> tag; this means the tree is connected.
self.assertEqual(b"<p>foo</p>", soup.p.encode())
+
+ def test_reparented_markup(self):
+ markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
+ soup = self.soup(markup)
+ self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
+ self.assertEqual(2, len(soup.find_all('p')))
+
+
+ def test_reparented_markup_ends_with_whitespace(self):
+ markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
+ soup = self.soup(markup)
+ self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
+ self.assertEqual(2, len(soup.find_all('p')))