diff options
author | Leonard Richardson <leonardr@segfault.org> | 2012-05-24 09:40:23 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2012-05-24 09:40:23 -0400 |
commit | 6ec38efd97e9deb8fb362890f670ba9571f60e10 (patch) | |
tree | b74bf6aa7d0115f996278e2b3cd65cd24ea7e9c0 | |
parent | 34c036cde4ed75e000be2d29f542a3f9ec215dfa (diff) |
Fixed some edge-case bugs having to do with inserting an element
into a tag it's already inside, and replacing one of a tag's
children with another. [bug=997529]
-rw-r--r-- | NEWS.txt | 4 | ||||
-rw-r--r-- | bs4/element.py | 19 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 24 |
3 files changed, 34 insertions, 13 deletions
@@ -13,6 +13,10 @@ adding attributes to a tag that didn't originally have any. [bug=1002378] Thanks to Oliver Beattie for the patch. +* Fixed some edge-case bugs having to do with inserting an element + into a tag it's already inside, and replacing one of a tag's + children with another. [bug=997529] + = 4.0.5 (20120427) = * Added a new method, wrap(), which wraps an element in a tag. diff --git a/bs4/element.py b/bs4/element.py index 22b8304..99a3540 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -137,14 +137,6 @@ class PageElement(object): raise ValueError("Cannot replace a Tag with its parent.") old_parent = self.parent my_index = self.parent.index(self) - if (hasattr(replace_with, 'parent') - and replace_with.parent is self.parent): - # We're replacing this element with one of its siblings. - if self.parent.index(replace_with) < my_index: - # Furthermore, it comes before this element. That - # means that when we extract it, the index of this - # element will change. - my_index -= 1 self.extract() old_parent.insert(my_index, replace_with) return self @@ -212,11 +204,12 @@ class PageElement(object): # We're 'inserting' an element that's already one # of this object's children. if new_child.parent is self: - if self.index(new_child) > position: - # Furthermore we're moving it further down the - # list of this object's children. That means that - # when we extract this element, our target index - # will jump down one. + current_index = self.index(new_child) + if current_index < position: + # We're moving this element further down the list + # of this object's children. That means that when + # we extract this element, our target index will + # jump down one. position -= 1 new_child.extract() diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 9a10edf..1e24c29 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -821,6 +821,18 @@ class TestTreeModification(SoupTest): self.assertEqual(the.next_element, c_tag) self.assertEqual(c_tag.previous_element, the) + def test_append_child_thats_already_at_the_end(self): + data = "<a><b></b></a>" + soup = self.soup(data) + soup.a.append(soup.b) + self.assertEquals(data, soup.decode()) + + def test_move_tag_to_beginning_of_parent(self): + data = "<a><b></b><c></c><d></d></a>" + soup = self.soup(data) + soup.a.insert(0, soup.d) + self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode()) + def test_insert_works_on_empty_element_tag(self): # This is a little strange, since most HTML parsers don't allow # markup like this to come through. But in general, we don't @@ -882,6 +894,18 @@ class TestTreeModification(SoupTest): self.assertEqual(no.next_element, "no") self.assertEqual(no.next_sibling, " business") + def test_replace_first_child(self): + data = "<a><b></b><c></c></a>" + soup = self.soup(data) + soup.b.replace_with(soup.c) + self.assertEqual("<a><c></c></a>", soup.decode()) + + def test_replace_last_child(self): + data = "<a><b></b><c></c></a>" + soup = self.soup(data) + soup.c.replace_with(soup.b) + self.assertEqual("<a><b></b></a>", soup.decode()) + def test_nested_tag_replace_with(self): soup = self.soup( """<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""") |