diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-06-28 14:08:48 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-06-28 14:08:48 -0400 |
commit | 92ad5e0dee9503f507f6277b493dfa96010f3a44 (patch) | |
tree | 53e6615b01afa7da78a325ce4b498f3c01e6a662 | |
parent | ec60474030e5ae091dc3500312f9447dcc7d56bf (diff) |
Copying a NavigableString will give you a new NavigableString that is not connected to the parse tree.
-rw-r--r-- | NEWS.txt | 4 | ||||
-rw-r--r-- | bs4/element.py | 5 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 19 |
3 files changed, 27 insertions, 1 deletions
@@ -15,6 +15,10 @@ Especially important changes: argument described in the documentation. `text` may eventually change its meaning, but not for a very long time. [bug=1366856] +* Changed the way soup objects work under copy.copy() and + copy.deepcopy(). Copying a NavigableString will give you a new + NavigableString that is not connected to the parse tree. + * Started using a standard MIT license. [bug=1294662] * Added a Chinese translation of the documentation by Delong .w. diff --git a/bs4/element.py b/bs4/element.py index d1b7c12..0486da2 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -689,7 +689,10 @@ class NavigableString(unicode, PageElement): return u def __copy__(self): - return self + """A copy of a NavigableString has the same contents and class + as the original, but it is not connected to the parse tree. + """ + return type(self)(self) def __getnewargs__(self): return (unicode(self),) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 7edf848..22d4b4f 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1310,6 +1310,25 @@ class TestPersistence(SoupTest): loaded = pickle.loads(dumped) self.assertEqual(loaded.decode(), soup.decode()) + def test_copy_navigablestring_is_not_attached_to_tree(self): + html = u"<b>Foo<a></a></b><b>Bar</b>" + soup = self.soup(html) + s1 = soup.find(string="Foo") + s2 = copy.copy(s1) + self.assertEqual(s1, s2) + self.assertEqual(None, s2.parent) + self.assertEqual(None, s2.next_element) + self.assertNotEqual(None, s1.next_sibling) + self.assertEqual(None, s2.next_sibling) + self.assertEqual(None, s2.previous_element) + + def test_copy_navigablestring_subclass_has_same_type(self): + html = u"<b><!--Foo--></b>" + soup = self.soup(html) + s1 = soup.string + s2 = copy.copy(s1) + self.assertEqual(s1, s2) + self.assertTrue(isinstance(s2, Comment)) class TestSubstitutions(SoupTest): |