diff options
-rw-r--r-- | NEWS.txt | 6 | ||||
-rw-r--r-- | bs4/element.py | 10 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 25 |
3 files changed, 35 insertions, 6 deletions
@@ -1,8 +1,12 @@ -= Unreleased += 4.4.1 (20150928) = * Fixed the test_detect_utf8 test so that it works when chardet is installed. [bug=1471359] +* Fixed a bug that deranged the tree when part of it was + removed. Thanks to Eric Weiser for the patch and John Wiseman for a + test. [bug=1481520] + = 4.4.0 (20150703) = Especially important changes: diff --git a/bs4/element.py b/bs4/element.py index 336768b..02fd1b7 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -262,19 +262,19 @@ class PageElement(object): next_element = last_child.next_element if (self.previous_element is not None and - self.previous_element != next_element): + self.previous_element is not next_element): self.previous_element.next_element = next_element - if next_element is not None and next_element != self.previous_element: + if next_element is not None and next_element is not self.previous_element: next_element.previous_element = self.previous_element self.previous_element = None last_child.next_element = None self.parent = None if (self.previous_sibling is not None - and self.previous_sibling != self.next_sibling): + and self.previous_sibling is not self.next_sibling): self.previous_sibling.next_sibling = self.next_sibling if (self.next_sibling is not None - and self.next_sibling != self.previous_sibling): + and self.next_sibling is not self.previous_sibling): self.next_sibling.previous_sibling = self.previous_sibling self.previous_sibling = self.next_sibling = None return self @@ -287,7 +287,7 @@ class PageElement(object): last_child = self while isinstance(last_child, Tag) and last_child.contents: last_child = last_child.contents[-1] - if not accept_self and last_child == self: + if not accept_self and last_child is self: last_child = None return last_child # BS3: Not part of the API! diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 2371591..ab21a50 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1084,6 +1084,31 @@ class TestTreeModification(SoupTest): self.assertEqual(foo_2, soup.a.string) self.assertEqual(bar_2, soup.b.string) + def test_extract_multiples_of_same_tag(self): + soup = self.soup(""" +<html> +<head> +<script>foo</script> +</head> +<body> + <script>bar</script> + <a></a> +</body> +<script>baz</script> +</html>""") + [soup.script.extract() for i in soup.find_all("script")] + self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body)) + + + def test_extract_works_when_element_is_surrounded_by_identical_strings(self): + soup = self.soup( + '<html>\n' + '<body>hi</body>\n' + '</html>') + soup.find('body').extract() + self.assertEqual(None, soup.find('body')) + + def test_clear(self): """Tag.clear()""" soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>") |