summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2020-01-01 13:30:28 -0500
committerLeonard Richardson <leonardr@segfault.org>2020-01-01 13:30:28 -0500
commit981c34917f44d96b5c7fa3314bcf39c772d12a61 (patch)
treea42db5bee6974fe281d12fb8285cec6a50966471
parenta021fc8a1aac56aa4a75c68fee5c4cb6a0e68551 (diff)
API CHANGE - Added PageElement.decomposed, a new property which lets you
check whether you've already called decompose() on a Tag or NavigableString.
-rw-r--r--CHANGELOG6
-rw-r--r--bs4/element.py20
-rw-r--r--bs4/tests/test_tree.py17
-rw-r--r--doc/source/index.rst17
4 files changed, 53 insertions, 7 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 5f32a57..dde359e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,8 @@
-= Unreleased
+= 4.9.0 (Unreleased)
+
+* Added PageElement.decomposed, a new property which lets you
+ check whether you've already called decompose() on a Tag or
+ NavigableString.
* Fixed an unhandled exception when formatting a Tag that had been
decomposed.[bug=1857767]
diff --git a/bs4/element.py b/bs4/element.py
index e0da4d2..11bf8c3 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -802,6 +802,14 @@ class PageElement(object):
yield i
i = i.parent
+ @property
+ def decomposed(self):
+ """Check whether a PageElement has been decomposed.
+
+ :rtype: bool
+ """
+ return getattr(self, '_decomposed', False) or False
+
# Old non-property versions of the generators, for backwards
# compatibility with BS3.
def nextGenerator(self):
@@ -1211,15 +1219,21 @@ class Tag(PageElement):
This element will be removed from the tree and wiped out; so
will everything beneath it.
+
+ The behavior of a decomposed PageElement is undefined and you
+ should never use one for anything, but if you need to _check_
+ whether an element has been decomposed, you can use the
+ `decomposed` property.
"""
self.extract()
i = self
while i is not None:
- next = i.next_element
+ n = i.next_element
i.__dict__.clear()
i.contents = []
- i = next
-
+ i._decomposed = True
+ i = n
+
def clear(self, decompose=False):
"""Wipe out all children of this PageElement by calling extract()
on them.
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 3251e0e..80aaaff 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1275,6 +1275,23 @@ class TestTreeModification(SoupTest):
a.clear(decompose=True)
self.assertEqual(0, len(em.contents))
+
+ def test_decompose(self):
+ # Test PageElement.decompose() and PageElement.decomposed
+ soup = self.soup("<p><a>String <em>Italicized</em></a></p><p>Another para</p>")
+ p1, p2 = soup.find_all('p')
+ a = p1.a
+ text = p1.em.string
+ for i in [p1, p2, a, text]:
+ self.assertEqual(False, i.decomposed)
+
+ # This sets p1 and everything beneath it to decomposed.
+ p1.decompose()
+ for i in [p1, a, text]:
+ self.assertEqual(True, i.decomposed)
+ # p2 is unaffected.
+ self.assertEqual(False, p2.decomposed)
+
def test_string_set(self):
"""Tag.string = 'string'"""
soup = self.soup("<a></a> <b><c></c></b>")
diff --git a/doc/source/index.rst b/doc/source/index.rst
index baaf09c..3664612 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -97,7 +97,7 @@ data structure::
# Lacie
# </a>
# and
- # <a class="sister" href="http://example.com/tillie" id="link2">
+ # <a class="sister" href="http://example.com/tillie" id="link3">
# Tillie
# </a>
# ; and they lived at the bottom of a well.
@@ -2067,12 +2067,23 @@ destroys it and its contents`::
markup = '<a href="http://example.com/">I linked to <i>example.com</i></a>'
soup = BeautifulSoup(markup)
a_tag = soup.a
+ i_tag = soup.i
- soup.i.decompose()
-
+ i_tag.decompose()
a_tag
# <a href="http://example.com/">I linked to</a>
+The behavior of a decomposed ``Tag`` or ``NavigableString`` is not
+defined and you should not use it for anything. If you're not sure
+whether something has been decomposed, you can check its
+``.decomposed`` property (new in 4.9.0)::
+
+ i_tag.decomposed
+ # True
+
+ a_tag.decomposed
+ # False
+
.. _replace_with():