summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/__init__.py17
-rw-r--r--bs4/builder/_html5lib.py22
-rw-r--r--bs4/element.py2
-rw-r--r--bs4/testing.py18
4 files changed, 56 insertions, 3 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 9f602ae..cc4b27f 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -303,8 +303,25 @@ class BeautifulSoup(Tag):
"""Add an object to the parse tree."""
parent = parent or self.currentTag
most_recent_element = most_recent_element or self._most_recent_element
+
+ if isinstance(o, Tag):
+ next_element = o.next_element
+ next_sibling = o.next_sibling
+ previous_sibling = o.previous_sibling
+ previous_element = o.previous_element
+
o.setup(parent, most_recent_element)
+ if isinstance(o, Tag):
+ if next_element:
+ o.next_element = next_element
+ if next_sibling:
+ o.next_sibling = next_sibling
+ if previous_sibling:
+ o.previous_sibling = previous_sibling
+ if previous_element:
+ o.previous_element = previous_element
+
if most_recent_element is not None:
most_recent_element.next_element = o
self._most_recent_element = o
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 6013575..ea8ff43 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -226,6 +226,9 @@ class Element(html5lib.treebuilders._base.Node):
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
+ #print "MOVE", self.element.contents
+ #print "FROM", self.element
+ #print "TO", new_parent.element
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
@@ -244,17 +247,28 @@ class Element(html5lib.treebuilders._base.Node):
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
- append_after = new_parent.element.contents
+ append_after = new_parent_element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
first_child = to_append[0]
- first_child.previous_element = new_parents_last_descendant
+ if new_parents_last_descendant:
+ first_child.previous_element = new_parents_last_descendant
+ else:
+ first_child.previous_element = new_parent_element
first_child.previous_sibling = new_parents_last_child
+ if new_parents_last_descendant:
+ new_parents_last_descendant.next_element = first_child
+ else:
+ new_parent_element.next_element = first_child
+ if new_parents_last_child:
+ new_parents_last_child.next_sibling = first_child
# Fix the last child's next_element and next_sibling
last_child = to_append[-1]
last_child.next_element = new_parents_last_descendant_next_element
+ if new_parents_last_descendant_next_element:
+ new_parents_last_descendant_next_element.previous_element = last_child
last_child.next_sibling = None
for child in to_append:
@@ -265,6 +279,10 @@ class Element(html5lib.treebuilders._base.Node):
element.contents = []
element.next_element = final_next_element
+ #print "DONE WITH MOVE"
+ #print "FROM", self.element
+ #print "TO", new_parent_element
+
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
node = Element(tag, self.soup, self.namespace)
diff --git a/bs4/element.py b/bs4/element.py
index ff716df..f236216 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -192,7 +192,7 @@ class PageElement(object):
self.previous_element = previous_element
if previous_element is not None:
self.previous_element.next_element = self
- self.next_element = None
+ self.next_element = next_element
self.previous_sibling = None
self.next_sibling = None
if self.parent is not None and self.parent.contents:
diff --git a/bs4/testing.py b/bs4/testing.py
index a85ecd6..dfaa047 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -286,6 +286,24 @@ Hello, world!
soup = self.soup(content)
self.assertNotEqual(None, soup.html.body)
+ def test_multiple_copies_of_a_tag(self):
+ "Prevent recurrence of a bug in the html5lib treebuilder."
+ content = """<!DOCTYPE html>
+<html>
+ <body>
+ <article id="a" >
+ <div><a href="1"></div>
+ <footer>
+ <a href="2"></a>
+ </footer>
+ </article>
+ </body>
+</html>
+"""
+ soup = self.soup(content)
+ [x for x in soup.article.descendants]
+
+
def test_basic_namespaces(self):
"""Parsers don't need to *understand* namespaces, but at the
very least they should not choke on namespaces or lose