summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-05-20 11:23:19 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-05-20 11:23:19 -0400
commit0f0ce683638ec47d87d52024a886995af114bdd7 (patch)
tree3d2a006ef35a29f62b5a87342e9bbfb7113fd540
parenta4d113a2f6648d7f97d29bbbd2634949a4050eb0 (diff)
Fixed another bug by which the html5lib tree builder could create a
disconnected tree. [bug=1182089]
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/_html5lib.py1
-rw-r--r--bs4/testing.py6
3 files changed, 10 insertions, 0 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 50b3cca..9b938f2 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -3,6 +3,9 @@
* Gave new_string() the ability to create subclasses of
NavigableString. [bug=1181986]
+* Fixed another bug by which the html5lib tree builder could create a
+ disconnected tree. [bug=1182089]
+
* Fixed test failures when lxml is not installed. [bug=1181589]
* html5lib now supports Python 3. Fixed some Python 2-specific
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 23e26b6..aa91435 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -131,6 +131,7 @@ class Element(html5lib.treebuilders._base.Node):
old_element = self.element.contents[-1]
new_element = self.soup.new_string(old_element + node.element)
old_element.replace_with(new_element)
+ self.soup.previous_element = new_element
else:
self.soup.object_was_parsed(node.element, parent=self.element)
diff --git a/bs4/testing.py b/bs4/testing.py
index 383f36a..d8ff6b7 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -246,6 +246,12 @@ class HTMLTreeBuilderSmokeTest(object):
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
+ def test_multipart_strings(self):
+ "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
+ soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
+ self.assertEqual("p", soup.h2.string.next_element.name)
+ self.assertEqual("p", soup.p.name)
+
def test_basic_namespaces(self):
"""Parsers don't need to *understand* namespaces, but at the
very least they should not choke on namespaces or lose