diff options
-rw-r--r-- | NEWS.txt | 4 | ||||
-rw-r--r-- | bs4/element.py | 5 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 19 |
3 files changed, 27 insertions, 1 deletions
@@ -15,6 +15,10 @@ Especially important changes: argument described in the documentation. `text` may eventually change its meaning, but not for a very long time. [bug=1366856] +* Changed the way soup objects work under copy.copy() and + copy.deepcopy(). Copying a NavigableString will give you a new + NavigableString that is not connected to the parse tree. + * Started using a standard MIT license. [bug=1294662] * Added a Chinese translation of the documentation by Delong .w. diff --git a/bs4/element.py b/bs4/element.py index d1b7c12..0486da2 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -689,7 +689,10 @@ class NavigableString(unicode, PageElement): return u def __copy__(self): - return self + """A copy of a NavigableString has the same contents and class + as the original, but it is not connected to the parse tree. + """ + return type(self)(self) def __getnewargs__(self): return (unicode(self),) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 7edf848..22d4b4f 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1310,6 +1310,25 @@ class TestPersistence(SoupTest): loaded = pickle.loads(dumped) self.assertEqual(loaded.decode(), soup.decode()) + def test_copy_navigablestring_is_not_attached_to_tree(self): + html = u"<b>Foo<a></a></b><b>Bar</b>" + soup = self.soup(html) + s1 = soup.find(string="Foo") + s2 = copy.copy(s1) + self.assertEqual(s1, s2) + self.assertEqual(None, s2.parent) + self.assertEqual(None, s2.next_element) + self.assertNotEqual(None, s1.next_sibling) + self.assertEqual(None, s2.next_sibling) + self.assertEqual(None, s2.previous_element) + + def test_copy_navigablestring_subclass_has_same_type(self): + html = u"<b><!--Foo--></b>" + soup = self.soup(html) + s1 = soup.string + s2 = copy.copy(s1) + self.assertEqual(s1, s2) + self.assertTrue(isinstance(s2, Comment)) class TestSubstitutions(SoupTest): |