diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-08-13 10:44:46 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-08-13 10:44:46 -0400 |
commit | 064439f3b2decfb55b4e118ac4d41851d13c4c6f (patch) | |
tree | da0e8007734a025df3e3d024a74a3bfb2f5f3e5a /bs4/tests/test_html5lib.py | |
parent | 020b385300c75e4c3ab0c45532de272b27945c5a (diff) |
* Fixed yet another problem with the html5lib tree builder, caused by
html5lib's tendency to rearrange the tree during
parsing. [bug=1189267]
Diffstat (limited to 'bs4/tests/test_html5lib.py')
-rw-r--r-- | bs4/tests/test_html5lib.py | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 2a3b41e..594c3e1 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -70,3 +70,16 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): soup = self.soup(markup) # Verify that we can reach the <p> tag; this means the tree is connected. self.assertEqual(b"<p>foo</p>", soup.p.encode()) + + def test_reparented_markup(self): + markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>' + soup = self.soup(markup) + self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + + def test_reparented_markup_ends_with_whitespace(self): + markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n' + soup = self.soup(markup) + self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) |