diff options
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/__init__.py | 28 | ||||
-rw-r--r-- | bs4/testing.py | 17 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 6 |
4 files changed, 54 insertions, 0 deletions
@@ -29,6 +29,9 @@ displayed correctly even if the filename or URL is a Unicode string. [bug=1268888] +* Force object_was_parsed() to keep the tree intact even when an element + from later in the document is moved into place. [bug=1430633] + = 4.3.2 (20131002) = * Fixed a bug in which short Unicode input was improperly encoded to diff --git a/bs4/__init__.py b/bs4/__init__.py index a53048d..9f602ae 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -310,6 +310,34 @@ class BeautifulSoup(Tag): self._most_recent_element = o parent.contents.append(o) + if parent.next_sibling: + # This node is being inserted into an element that has + # already been parsed. Deal with any dangling references. + index = parent.contents.index(o) + if index == 0: + previous_element = parent + previous_sibling = None + else: + previous_element = previous_sibling = parent.contents[index-1] + if index == len(parent.contents)-1: + next_element = parent.next_sibling + next_sibling = None + else: + next_element = next_sibling = parent.contents[index+1] + + o.previous_element = previous_element + if previous_element: + previous_element.next_element = o + o.next_element = next_element + if next_element: + next_element.previous_element = o + o.next_sibling = next_sibling + if next_sibling: + next_sibling.previous_sibling = o + o.previous_sibling = previous_sibling + if previous_sibling: + previous_sibling.next_sibling = o + def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent instance of the given tag. If inclusivePop is false, pops the tag diff --git a/bs4/testing.py b/bs4/testing.py index 023a495..a85ecd6 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -160,6 +160,23 @@ class HTMLTreeBuilderSmokeTest(object): def test_nested_formatting_elements(self): self.assertSoupEquals("<em><em></em></em>") + def test_double_head(self): + html = '''<!DOCTYPE html> +<html> +<head> +<title>Ordinary HEAD element test</title> +</head> +<script type="text/javascript"> +alert("Help!"); +</script> +<body> +Hello, world! +</body> +</html> +''' + soup = self.soup(html) + self.assertEqual("text/javascript", soup.find('script')['type']) + def test_comment(self): # Comments are represented as Comment objects. markup = "<p>foo<!--foobar-->baz</p>" diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index c9d1dcd..cb3897b 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -70,6 +70,12 @@ class TestFind(TreeTest): soup = self.soup(u'<h1>Räksmörgås</h1>') self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås') + def test_unicode_attribute_find(self): + soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>') + str(soup) + self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text) + + def test_find_everything(self): """Test an optimization that finds all tags.""" soup = self.soup("<a>foo</a><b>bar</b>") |