summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/__init__.py28
-rw-r--r--bs4/testing.py17
-rw-r--r--bs4/tests/test_tree.py6
4 files changed, 54 insertions, 0 deletions
diff --git a/NEWS.txt b/NEWS.txt
index df2abef..d228454 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -29,6 +29,9 @@
displayed correctly even if the filename or URL is a Unicode
string. [bug=1268888]
+* Force object_was_parsed() to keep the tree intact even when an element
+ from later in the document is moved into place. [bug=1430633]
+
= 4.3.2 (20131002) =
* Fixed a bug in which short Unicode input was improperly encoded to
diff --git a/bs4/__init__.py b/bs4/__init__.py
index a53048d..9f602ae 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -310,6 +310,34 @@ class BeautifulSoup(Tag):
self._most_recent_element = o
parent.contents.append(o)
+ if parent.next_sibling:
+ # This node is being inserted into an element that has
+ # already been parsed. Deal with any dangling references.
+ index = parent.contents.index(o)
+ if index == 0:
+ previous_element = parent
+ previous_sibling = None
+ else:
+ previous_element = previous_sibling = parent.contents[index-1]
+ if index == len(parent.contents)-1:
+ next_element = parent.next_sibling
+ next_sibling = None
+ else:
+ next_element = next_sibling = parent.contents[index+1]
+
+ o.previous_element = previous_element
+ if previous_element:
+ previous_element.next_element = o
+ o.next_element = next_element
+ if next_element:
+ next_element.previous_element = o
+ o.next_sibling = next_sibling
+ if next_sibling:
+ next_sibling.previous_sibling = o
+ o.previous_sibling = previous_sibling
+ if previous_sibling:
+ previous_sibling.next_sibling = o
+
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
instance of the given tag. If inclusivePop is false, pops the tag
diff --git a/bs4/testing.py b/bs4/testing.py
index 023a495..a85ecd6 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -160,6 +160,23 @@ class HTMLTreeBuilderSmokeTest(object):
def test_nested_formatting_elements(self):
self.assertSoupEquals("<em><em></em></em>")
+ def test_double_head(self):
+ html = '''<!DOCTYPE html>
+<html>
+<head>
+<title>Ordinary HEAD element test</title>
+</head>
+<script type="text/javascript">
+alert("Help!");
+</script>
+<body>
+Hello, world!
+</body>
+</html>
+'''
+ soup = self.soup(html)
+ self.assertEqual("text/javascript", soup.find('script')['type'])
+
def test_comment(self):
# Comments are represented as Comment objects.
markup = "<p>foo<!--foobar-->baz</p>"
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index c9d1dcd..cb3897b 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -70,6 +70,12 @@ class TestFind(TreeTest):
soup = self.soup(u'<h1>Räksmörgås</h1>')
self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås')
+ def test_unicode_attribute_find(self):
+ soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>')
+ str(soup)
+ self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
+
+
def test_find_everything(self):
"""Test an optimization that finds all tags."""
soup = self.soup("<a>foo</a><b>bar</b>")