diff options
author | Leonard Richardson <leonardr@segfault.org> | 2020-01-01 13:30:28 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2020-01-01 13:30:28 -0500 |
commit | 981c34917f44d96b5c7fa3314bcf39c772d12a61 (patch) | |
tree | a42db5bee6974fe281d12fb8285cec6a50966471 | |
parent | a021fc8a1aac56aa4a75c68fee5c4cb6a0e68551 (diff) |
API CHANGE - Added PageElement.decomposed, a new property which lets you
check whether you've already called decompose() on a Tag or
NavigableString.
-rw-r--r-- | CHANGELOG | 6 | ||||
-rw-r--r-- | bs4/element.py | 20 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 17 | ||||
-rw-r--r-- | doc/source/index.rst | 17 |
4 files changed, 53 insertions, 7 deletions
@@ -1,4 +1,8 @@ -= Unreleased += 4.9.0 (Unreleased) + +* Added PageElement.decomposed, a new property which lets you + check whether you've already called decompose() on a Tag or + NavigableString. * Fixed an unhandled exception when formatting a Tag that had been decomposed.[bug=1857767] diff --git a/bs4/element.py b/bs4/element.py index e0da4d2..11bf8c3 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -802,6 +802,14 @@ class PageElement(object): yield i i = i.parent + @property + def decomposed(self): + """Check whether a PageElement has been decomposed. + + :rtype: bool + """ + return getattr(self, '_decomposed', False) or False + # Old non-property versions of the generators, for backwards # compatibility with BS3. def nextGenerator(self): @@ -1211,15 +1219,21 @@ class Tag(PageElement): This element will be removed from the tree and wiped out; so will everything beneath it. + + The behavior of a decomposed PageElement is undefined and you + should never use one for anything, but if you need to _check_ + whether an element has been decomposed, you can use the + `decomposed` property. """ self.extract() i = self while i is not None: - next = i.next_element + n = i.next_element i.__dict__.clear() i.contents = [] - i = next - + i._decomposed = True + i = n + def clear(self, decompose=False): """Wipe out all children of this PageElement by calling extract() on them. diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 3251e0e..80aaaff 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1275,6 +1275,23 @@ class TestTreeModification(SoupTest): a.clear(decompose=True) self.assertEqual(0, len(em.contents)) + + def test_decompose(self): + # Test PageElement.decompose() and PageElement.decomposed + soup = self.soup("<p><a>String <em>Italicized</em></a></p><p>Another para</p>") + p1, p2 = soup.find_all('p') + a = p1.a + text = p1.em.string + for i in [p1, p2, a, text]: + self.assertEqual(False, i.decomposed) + + # This sets p1 and everything beneath it to decomposed. + p1.decompose() + for i in [p1, a, text]: + self.assertEqual(True, i.decomposed) + # p2 is unaffected. + self.assertEqual(False, p2.decomposed) + def test_string_set(self): """Tag.string = 'string'""" soup = self.soup("<a></a> <b><c></c></b>") diff --git a/doc/source/index.rst b/doc/source/index.rst index baaf09c..3664612 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -97,7 +97,7 @@ data structure:: # Lacie # </a> # and - # <a class="sister" href="http://example.com/tillie" id="link2"> + # <a class="sister" href="http://example.com/tillie" id="link3"> # Tillie # </a> # ; and they lived at the bottom of a well. @@ -2067,12 +2067,23 @@ destroys it and its contents`:: markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>' soup = BeautifulSoup(markup) a_tag = soup.a + i_tag = soup.i - soup.i.decompose() - + i_tag.decompose() a_tag # <a href="http://example.com/">I linked to</a> +The behavior of a decomposed ``Tag`` or ``NavigableString`` is not +defined and you should not use it for anything. If you're not sure +whether something has been decomposed, you can check its +``.decomposed`` property (new in 4.9.0):: + + i_tag.decomposed + # True + + a_tag.decomposed + # False + .. _replace_with(): |