diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/builder/_html5lib.py | 1 | ||||
-rw-r--r-- | bs4/testing.py | 6 |
2 files changed, 7 insertions, 0 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 23e26b6..aa91435 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -131,6 +131,7 @@ class Element(html5lib.treebuilders._base.Node): old_element = self.element.contents[-1] new_element = self.soup.new_string(old_element + node.element) old_element.replace_with(new_element) + self.soup.previous_element = new_element else: self.soup.object_was_parsed(node.element, parent=self.element) diff --git a/bs4/testing.py b/bs4/testing.py index 383f36a..d8ff6b7 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -246,6 +246,12 @@ class HTMLTreeBuilderSmokeTest(object): self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("<html><h2>\nfoo</h2><p></p></html>") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + def test_basic_namespaces(self): """Parsers don't need to *understand* namespaces, but at the very least they should not choke on namespaces or lose |