5 files changed, 84 insertions, 13 deletions
diff --git a/NEWS.txt b/NEWS.txt
index adb957d..904dc07 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,9 @@
+= 4.3.1 (Unreleased) =
+
+* Fixed yet another problem with the html5lib tree builder, caused by
+  html5lib's tendency to rearrange the tree during
+  parsing. [bug=1189267]
+
 = 4.3.0 (20130812) =
 
 * Instead of converting incoming data to Unicode and feeding it to the
diff --git a/bs4/__init__.py b/bs4/__init__.py
index ace72f1..75c1aaa 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -279,6 +279,7 @@ class BeautifulSoup(Tag):
         parent = parent or self.currentTag
         most_recent_element = most_recent_element or self._most_recent_element
         o.setup(parent, most_recent_element)
+
         if most_recent_element is not None:
             most_recent_element.next_element = o
         self._most_recent_element = o
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 6ed5055..7de36ae 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -139,6 +139,9 @@ class Element(html5lib.treebuilders._base.Node):
         else:
             child = node.element
 
+        if not isinstance(child, basestring) and child.parent is not None:
+            node.element.extract()
+
         if (string_child and self.element.contents
             and self.element.contents[-1].__class__ == NavigableString):
             # We are appending a string onto another string.
@@ -147,12 +150,23 @@ class Element(html5lib.treebuilders._base.Node):
             old_element = self.element.contents[-1]
             new_element = self.soup.new_string(old_element + string_child)
             old_element.replace_with(new_element)
-            self.soup._most_recent_element = new_element            
+            self.soup._most_recent_element = new_element
         else:
             if isinstance(node, basestring):
                 # Create a brand new NavigableString from this string.
                 child = self.soup.new_string(node)
-            self.soup.object_was_parsed(child, parent=self.element)
+
+            # Tell Beautiful Soup to act as if it parsed this element
+            # immediately after the parent's last descendant. (Or
+            # immediately after the parent, if it has no children.)
+            if self.element.contents:
+                most_recent_element = self.element._last_descendant(False)
+            else:
+                most_recent_element = self.element
+
+            self.soup.object_was_parsed(
+                child, parent=self.element,
+                most_recent_element=most_recent_element)
 
     def getAttributes(self):
         return AttrList(self.element)
@@ -201,13 +215,47 @@ class Element(html5lib.treebuilders._base.Node):
 
     def removeChild(self, node):
         node.element.extract()
-        pass
 
-    def reparentChildren(self, newParent):
-        while self.element.contents:
-            child = self.element.contents[0]
-            child.extract()
-            newParent.appendChild(child)
+    def reparentChildren(self, new_parent):
+        """Move all of this tag's children into another tag."""
+        element = self.element
+        new_parent_element = new_parent.element
+        # Determine what this tag's next_element will be once all the children
+        # are removed.
+        final_next_element = element.next_sibling
+
+        new_parents_last_descendant = new_parent_element._last_descendant(False, False)
+        if len(new_parent_element.contents) > 0:
+            # The new parent already contains children. We will be
+            # appending this tag's children to the end.
+            new_parents_last_child = new_parent_element.contents[-1]
+            new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
+        else:
+            # The new parent contains no children.
+            new_parents_last_child = None
+            new_parents_last_descendant_next_element = new_parent_element.next_element
+
+        to_append = element.contents
+        append_after = new_parent.element.contents
+        if len(to_append) > 0:
+            # Set the first child's previous_element and previous_sibling
+            # to elements within the new parent
+            first_child = to_append[0]
+            first_child.previous_element = new_parents_last_descendant
+            first_child.previous_sibling = new_parents_last_child
+
+            # Fix the last child's next_element and next_sibling
+            last_child = to_append[-1]
+            last_child.next_element = new_parents_last_descendant_next_element
+            last_child.next_sibling = None
+
+        for child in to_append:
+            child.parent = new_parent_element
+            new_parent_element.contents.append(child)
+
+        # Now that this element has no children, change its .next_element.
+        element.contents = []
+        element.next_element = final_next_element
 
     def cloneNode(self):
         tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/bs4/element.py b/bs4/element.py
index e10e100..caa855e 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -255,13 +255,16 @@ class PageElement(object):
         self.previous_sibling = self.next_sibling = None
         return self
 
-    def _last_descendant(self, is_initialized=True):
+    def _last_descendant(self, is_initialized=True, accept_self=True):
         "Finds the last element beneath this object to be parsed."
         if is_initialized and self.next_sibling:
-            return self.next_sibling.previous_element
-        last_child = self
-        while isinstance(last_child, Tag) and last_child.contents:
-            last_child = last_child.contents[-1]
+            last_child = self.next_sibling.previous_element
+        else:
+            last_child = self
+            while isinstance(last_child, Tag) and last_child.contents:
+                last_child = last_child.contents[-1]
+        if not accept_self and last_child == self:
+            last_child = None
         return last_child
     # BS3: Not part of the API!
     _lastRecursiveChild = _last_descendant
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 2a3b41e..594c3e1 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -70,3 +70,16 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
         soup = self.soup(markup)
         # Verify that we can reach the <p> tag; this means the tree is connected.
         self.assertEqual(b"<p>foo</p>", soup.p.encode())
+
+    def test_reparented_markup(self):
+        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
+        soup = self.soup(markup)
+        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
+        self.assertEqual(2, len(soup.find_all('p')))
+
+
+    def test_reparented_markup_ends_with_whitespace(self):
+        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
+        soup = self.soup(markup)
+        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
+        self.assertEqual(2, len(soup.find_all('p')))