summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG3
-rw-r--r--bs4/element.py14
2 files changed, 13 insertions, 4 deletions
diff --git a/CHANGELOG b/CHANGELOG
index dde359e..3f460ae 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,9 @@
* Fixed an unhandled exception when formatting a Tag that had been
decomposed.[bug=1857767]
+* Added a performance optimization to PageElement.extract(). Patch by
+ Arthur Darcet.
+
= 4.8.2 (20191224)
* Added Python docstrings to all public methods of the most commonly
diff --git a/bs4/element.py b/bs4/element.py
index 11bf8c3..059e0de 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -239,7 +239,7 @@ class PageElement(object):
raise ValueError("Cannot replace a Tag with its parent.")
old_parent = self.parent
my_index = self.parent.index(self)
- self.extract()
+ self.extract(_self_index=my_index)
old_parent.insert(my_index, replace_with)
return self
replaceWith = replace_with # BS3
@@ -255,7 +255,7 @@ class PageElement(object):
"Cannot replace an element with its contents when that"
"element is not part of a tree.")
my_index = self.parent.index(self)
- self.extract()
+ self.extract(_self_index=my_index)
for child in reversed(self.contents[:]):
my_parent.insert(my_index, child)
return self
@@ -273,13 +273,19 @@ class PageElement(object):
wrap_inside.append(me)
return wrap_inside
- def extract(self):
+ def extract(self, _self_index=None):
"""Destructively rips this element out of the tree.
+ :param _self_index: The location of this element in its parent's
+ .contents, if known. Passing this in allows for a performance
+ optimization.
+
:return: `self`, no longer part of the tree.
"""
if self.parent is not None:
- del self.parent.contents[self.parent.index(self)]
+ if _self_index is None:
+ _self_index = self.parent.index(self)
+ del self.parent.contents[_self_index]
#Find the two elements that would be next to each other if
#this element (and any children) hadn't been parsed. Connect