From d35d70fc9a05252295110b9d7cd2eac5afce4a7c Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 5 Mar 2020 19:08:41 -0500 Subject: Added a performance optimization to PageElement.extract(). Patch by Arthur Darcet. --- bs4/element.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'bs4/element.py') diff --git a/bs4/element.py b/bs4/element.py index 11bf8c3..059e0de 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -239,7 +239,7 @@ class PageElement(object): raise ValueError("Cannot replace a Tag with its parent.") old_parent = self.parent my_index = self.parent.index(self) - self.extract() + self.extract(_self_index=my_index) old_parent.insert(my_index, replace_with) return self replaceWith = replace_with # BS3 @@ -255,7 +255,7 @@ class PageElement(object): "Cannot replace an element with its contents when that" "element is not part of a tree.") my_index = self.parent.index(self) - self.extract() + self.extract(_self_index=my_index) for child in reversed(self.contents[:]): my_parent.insert(my_index, child) return self @@ -273,13 +273,19 @@ class PageElement(object): wrap_inside.append(me) return wrap_inside - def extract(self): + def extract(self, _self_index=None): """Destructively rips this element out of the tree. + :param _self_index: The location of this element in its parent's + .contents, if known. Passing this in allows for a performance + optimization. + :return: `self`, no longer part of the tree. """ if self.parent is not None: - del self.parent.contents[self.parent.index(self)] + if _self_index is None: + _self_index = self.parent.index(self) + del self.parent.contents[_self_index] #Find the two elements that would be next to each other if #this element (and any children) hadn't been parsed. Connect -- cgit v1.2.3