diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-05-20 11:23:19 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-05-20 11:23:19 -0400 |
commit | 0f0ce683638ec47d87d52024a886995af114bdd7 (patch) | |
tree | 3d2a006ef35a29f62b5a87342e9bbfb7113fd540 | |
parent | a4d113a2f6648d7f97d29bbbd2634949a4050eb0 (diff) |
Fixed another bug by which the html5lib tree builder could create a
disconnected tree. [bug=1182089]
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 1 | ||||
-rw-r--r-- | bs4/testing.py | 6 |
3 files changed, 10 insertions, 0 deletions
@@ -3,6 +3,9 @@ * Gave new_string() the ability to create subclasses of NavigableString. [bug=1181986] +* Fixed another bug by which the html5lib tree builder could create a + disconnected tree. [bug=1182089] + * Fixed test failures when lxml is not installed. [bug=1181589] * html5lib now supports Python 3. Fixed some Python 2-specific diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 23e26b6..aa91435 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -131,6 +131,7 @@ class Element(html5lib.treebuilders._base.Node): old_element = self.element.contents[-1] new_element = self.soup.new_string(old_element + node.element) old_element.replace_with(new_element) + self.soup.previous_element = new_element else: self.soup.object_was_parsed(node.element, parent=self.element) diff --git a/bs4/testing.py b/bs4/testing.py index 383f36a..d8ff6b7 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -246,6 +246,12 @@ class HTMLTreeBuilderSmokeTest(object): self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("<html><h2>\nfoo</h2><p></p></html>") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + def test_basic_namespaces(self): """Parsers don't need to *understand* namespaces, but at the very least they should not choke on namespaces or lose |