diff options
Diffstat (limited to 'bs4/builder/_html5lib.py')
-rw-r--r-- | bs4/builder/_html5lib.py | 64 |
1 files changed, 56 insertions, 8 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 6ed5055..7de36ae 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -139,6 +139,9 @@ class Element(html5lib.treebuilders._base.Node): else: child = node.element + if not isinstance(child, basestring) and child.parent is not None: + node.element.extract() + if (string_child and self.element.contents and self.element.contents[-1].__class__ == NavigableString): # We are appending a string onto another string. @@ -147,12 +150,23 @@ class Element(html5lib.treebuilders._base.Node): old_element = self.element.contents[-1] new_element = self.soup.new_string(old_element + string_child) old_element.replace_with(new_element) - self.soup._most_recent_element = new_element + self.soup._most_recent_element = new_element else: if isinstance(node, basestring): # Create a brand new NavigableString from this string. child = self.soup.new_string(node) - self.soup.object_was_parsed(child, parent=self.element) + + # Tell Beautiful Soup to act as if it parsed this element + # immediately after the parent's last descendant. (Or + # immediately after the parent, if it has no children.) + if self.element.contents: + most_recent_element = self.element._last_descendant(False) + else: + most_recent_element = self.element + + self.soup.object_was_parsed( + child, parent=self.element, + most_recent_element=most_recent_element) def getAttributes(self): return AttrList(self.element) @@ -201,13 +215,47 @@ class Element(html5lib.treebuilders._base.Node): def removeChild(self, node): node.element.extract() - pass - def reparentChildren(self, newParent): - while self.element.contents: - child = self.element.contents[0] - child.extract() - newParent.appendChild(child) + def reparentChildren(self, new_parent): + """Move all of this tag's children into another tag.""" + element = self.element + new_parent_element = new_parent.element + # Determine what this tag's next_element will be once all the children + # are removed. + final_next_element = element.next_sibling + + new_parents_last_descendant = new_parent_element._last_descendant(False, False) + if len(new_parent_element.contents) > 0: + # The new parent already contains children. We will be + # appending this tag's children to the end. + new_parents_last_child = new_parent_element.contents[-1] + new_parents_last_descendant_next_element = new_parents_last_descendant.next_element + else: + # The new parent contains no children. + new_parents_last_child = None + new_parents_last_descendant_next_element = new_parent_element.next_element + + to_append = element.contents + append_after = new_parent.element.contents + if len(to_append) > 0: + # Set the first child's previous_element and previous_sibling + # to elements within the new parent + first_child = to_append[0] + first_child.previous_element = new_parents_last_descendant + first_child.previous_sibling = new_parents_last_child + + # Fix the last child's next_element and next_sibling + last_child = to_append[-1] + last_child.next_element = new_parents_last_descendant_next_element + last_child.next_sibling = None + + for child in to_append: + child.parent = new_parent_element + new_parent_element.contents.append(child) + + # Now that this element has no children, change its .next_element. + element.contents = [] + element.next_element = final_next_element def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) |