diff options
-rw-r--r-- | bs4/__init__.py | 17 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 22 | ||||
-rw-r--r-- | bs4/element.py | 2 | ||||
-rw-r--r-- | bs4/testing.py | 18 |
4 files changed, 56 insertions, 3 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index 9f602ae..cc4b27f 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -303,8 +303,25 @@ class BeautifulSoup(Tag): """Add an object to the parse tree.""" parent = parent or self.currentTag most_recent_element = most_recent_element or self._most_recent_element + + if isinstance(o, Tag): + next_element = o.next_element + next_sibling = o.next_sibling + previous_sibling = o.previous_sibling + previous_element = o.previous_element + o.setup(parent, most_recent_element) + if isinstance(o, Tag): + if next_element: + o.next_element = next_element + if next_sibling: + o.next_sibling = next_sibling + if previous_sibling: + o.previous_sibling = previous_sibling + if previous_element: + o.previous_element = previous_element + if most_recent_element is not None: most_recent_element.next_element = o self._most_recent_element = o diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 6013575..ea8ff43 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -226,6 +226,9 @@ class Element(html5lib.treebuilders._base.Node): def reparentChildren(self, new_parent): """Move all of this tag's children into another tag.""" + #print "MOVE", self.element.contents + #print "FROM", self.element + #print "TO", new_parent.element element = self.element new_parent_element = new_parent.element # Determine what this tag's next_element will be once all the children @@ -244,17 +247,28 @@ class Element(html5lib.treebuilders._base.Node): new_parents_last_descendant_next_element = new_parent_element.next_element to_append = element.contents - append_after = new_parent.element.contents + append_after = new_parent_element.contents if len(to_append) > 0: # Set the first child's previous_element and previous_sibling # to elements within the new parent first_child = to_append[0] - first_child.previous_element = new_parents_last_descendant + if new_parents_last_descendant: + first_child.previous_element = new_parents_last_descendant + else: + first_child.previous_element = new_parent_element first_child.previous_sibling = new_parents_last_child + if new_parents_last_descendant: + new_parents_last_descendant.next_element = first_child + else: + new_parent_element.next_element = first_child + if new_parents_last_child: + new_parents_last_child.next_sibling = first_child # Fix the last child's next_element and next_sibling last_child = to_append[-1] last_child.next_element = new_parents_last_descendant_next_element + if new_parents_last_descendant_next_element: + new_parents_last_descendant_next_element.previous_element = last_child last_child.next_sibling = None for child in to_append: @@ -265,6 +279,10 @@ class Element(html5lib.treebuilders._base.Node): element.contents = [] element.next_element = final_next_element + #print "DONE WITH MOVE" + #print "FROM", self.element + #print "TO", new_parent_element + def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) node = Element(tag, self.soup, self.namespace) diff --git a/bs4/element.py b/bs4/element.py index ff716df..f236216 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -192,7 +192,7 @@ class PageElement(object): self.previous_element = previous_element if previous_element is not None: self.previous_element.next_element = self - self.next_element = None + self.next_element = next_element self.previous_sibling = None self.next_sibling = None if self.parent is not None and self.parent.contents: diff --git a/bs4/testing.py b/bs4/testing.py index a85ecd6..dfaa047 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -286,6 +286,24 @@ Hello, world! soup = self.soup(content) self.assertNotEqual(None, soup.html.body) + def test_multiple_copies_of_a_tag(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """<!DOCTYPE html> +<html> + <body> + <article id="a" > + <div><a href="1"></div> + <footer> + <a href="2"></a> + </footer> + </article> + </body> +</html> +""" + soup = self.soup(content) + [x for x in soup.article.descendants] + + def test_basic_namespaces(self): """Parsers don't need to *understand* namespaces, but at the very least they should not choke on namespaces or lose |