summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2012-05-24 09:40:23 -0400
committerLeonard Richardson <leonardr@segfault.org>2012-05-24 09:40:23 -0400
commit6ec38efd97e9deb8fb362890f670ba9571f60e10 (patch)
treeb74bf6aa7d0115f996278e2b3cd65cd24ea7e9c0
parent34c036cde4ed75e000be2d29f542a3f9ec215dfa (diff)
Fixed some edge-case bugs having to do with inserting an element
into a tag it's already inside, and replacing one of a tag's children with another. [bug=997529]
-rw-r--r--NEWS.txt4
-rw-r--r--bs4/element.py19
-rw-r--r--bs4/tests/test_tree.py24
3 files changed, 34 insertions, 13 deletions
diff --git a/NEWS.txt b/NEWS.txt
index b9c92fe..16f4598 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -13,6 +13,10 @@
adding attributes to a tag that didn't originally have
any. [bug=1002378] Thanks to Oliver Beattie for the patch.
+* Fixed some edge-case bugs having to do with inserting an element
+ into a tag it's already inside, and replacing one of a tag's
+ children with another. [bug=997529]
+
= 4.0.5 (20120427) =
* Added a new method, wrap(), which wraps an element in a tag.
diff --git a/bs4/element.py b/bs4/element.py
index 22b8304..99a3540 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -137,14 +137,6 @@ class PageElement(object):
raise ValueError("Cannot replace a Tag with its parent.")
old_parent = self.parent
my_index = self.parent.index(self)
- if (hasattr(replace_with, 'parent')
- and replace_with.parent is self.parent):
- # We're replacing this element with one of its siblings.
- if self.parent.index(replace_with) < my_index:
- # Furthermore, it comes before this element. That
- # means that when we extract it, the index of this
- # element will change.
- my_index -= 1
self.extract()
old_parent.insert(my_index, replace_with)
return self
@@ -212,11 +204,12 @@ class PageElement(object):
# We're 'inserting' an element that's already one
# of this object's children.
if new_child.parent is self:
- if self.index(new_child) > position:
- # Furthermore we're moving it further down the
- # list of this object's children. That means that
- # when we extract this element, our target index
- # will jump down one.
+ current_index = self.index(new_child)
+ if current_index < position:
+ # We're moving this element further down the list
+ # of this object's children. That means that when
+ # we extract this element, our target index will
+ # jump down one.
position -= 1
new_child.extract()
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 9a10edf..1e24c29 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -821,6 +821,18 @@ class TestTreeModification(SoupTest):
self.assertEqual(the.next_element, c_tag)
self.assertEqual(c_tag.previous_element, the)
+ def test_append_child_thats_already_at_the_end(self):
+ data = "<a><b></b></a>"
+ soup = self.soup(data)
+ soup.a.append(soup.b)
+ self.assertEquals(data, soup.decode())
+
+ def test_move_tag_to_beginning_of_parent(self):
+ data = "<a><b></b><c></c><d></d></a>"
+ soup = self.soup(data)
+ soup.a.insert(0, soup.d)
+ self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
+
def test_insert_works_on_empty_element_tag(self):
# This is a little strange, since most HTML parsers don't allow
# markup like this to come through. But in general, we don't
@@ -882,6 +894,18 @@ class TestTreeModification(SoupTest):
self.assertEqual(no.next_element, "no")
self.assertEqual(no.next_sibling, " business")
+ def test_replace_first_child(self):
+ data = "<a><b></b><c></c></a>"
+ soup = self.soup(data)
+ soup.b.replace_with(soup.c)
+ self.assertEqual("<a><c></c></a>", soup.decode())
+
+ def test_replace_last_child(self):
+ data = "<a><b></b><c></c></a>"
+ soup = self.soup(data)
+ soup.c.replace_with(soup.b)
+ self.assertEqual("<a><b></b></a>", soup.decode())
+
def test_nested_tag_replace_with(self):
soup = self.soup(
"""<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")