summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt6
-rw-r--r--bs4/element.py10
-rw-r--r--bs4/tests/test_tree.py25
3 files changed, 35 insertions, 6 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 2548e4e..9b90da2 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,8 +1,12 @@
-= Unreleased
+= 4.4.1 (20150928) =
* Fixed the test_detect_utf8 test so that it works when chardet is
installed. [bug=1471359]
+* Fixed a bug that deranged the tree when part of it was
+ removed. Thanks to Eric Weiser for the patch and John Wiseman for a
+ test. [bug=1481520]
+
= 4.4.0 (20150703) =
Especially important changes:
diff --git a/bs4/element.py b/bs4/element.py
index 336768b..02fd1b7 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -262,19 +262,19 @@ class PageElement(object):
next_element = last_child.next_element
if (self.previous_element is not None and
- self.previous_element != next_element):
+ self.previous_element is not next_element):
self.previous_element.next_element = next_element
- if next_element is not None and next_element != self.previous_element:
+ if next_element is not None and next_element is not self.previous_element:
next_element.previous_element = self.previous_element
self.previous_element = None
last_child.next_element = None
self.parent = None
if (self.previous_sibling is not None
- and self.previous_sibling != self.next_sibling):
+ and self.previous_sibling is not self.next_sibling):
self.previous_sibling.next_sibling = self.next_sibling
if (self.next_sibling is not None
- and self.next_sibling != self.previous_sibling):
+ and self.next_sibling is not self.previous_sibling):
self.next_sibling.previous_sibling = self.previous_sibling
self.previous_sibling = self.next_sibling = None
return self
@@ -287,7 +287,7 @@ class PageElement(object):
last_child = self
while isinstance(last_child, Tag) and last_child.contents:
last_child = last_child.contents[-1]
- if not accept_self and last_child == self:
+ if not accept_self and last_child is self:
last_child = None
return last_child
# BS3: Not part of the API!
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 2371591..ab21a50 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1084,6 +1084,31 @@ class TestTreeModification(SoupTest):
self.assertEqual(foo_2, soup.a.string)
self.assertEqual(bar_2, soup.b.string)
+ def test_extract_multiples_of_same_tag(self):
+ soup = self.soup("""
+<html>
+<head>
+<script>foo</script>
+</head>
+<body>
+ <script>bar</script>
+ <a></a>
+</body>
+<script>baz</script>
+</html>""")
+ [soup.script.extract() for i in soup.find_all("script")]
+ self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
+
+
+ def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
+ soup = self.soup(
+ '<html>\n'
+ '<body>hi</body>\n'
+ '</html>')
+ soup.find('body').extract()
+ self.assertEqual(None, soup.find('body'))
+
+
def test_clear(self):
"""Tag.clear()"""
soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")