summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2020-10-02 18:21:45 -0400
committerLeonard Richardson <leonardr@segfault.org>2020-10-02 18:21:45 -0400
commita352cbfd08b039d393a68ee8bc62d5d86cf02fbf (patch)
tree6c7c59612b548e274fb01e4095e729a4c4b65fa4
parent12e876bc47a63df663cfd039cd9e2a2ef68445c3 (diff)
Implemented a significant performance optimization to the process of
searching the parse tree. Patch by Morotti. [bug=1898212]
-rw-r--r--CHANGELOG5
-rw-r--r--bs4/element.py8
2 files changed, 13 insertions, 0 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 811171d..d6b4290 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,8 @@
+= 4.9.3 (Unreleased)
+
+* Implemented a significant performance optimization to the process of
+ searching the parse tree. Patch by Morotti. [bug=1898212]
+
= 4.9.2 (20200926)
* Fixed a bug that caused too many tags to be popped from the tag
diff --git a/bs4/element.py b/bs4/element.py
index 370b153..09a81d9 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1995,6 +1995,14 @@ class SoupStrainer(object):
if isinstance(markup_name, Tag):
markup = markup_name
markup_attrs = markup
+
+ if isinstance(self.name, basestring):
+ # Optimization for a very common case where the user is
+ # searching for a tag with one specific name, and we're
+ # looking at a tag with a different name.
+ if markup and not markup.prefix and self.name != markup.name:
+ return False
+
call_function_with_tag_data = (
isinstance(self.name, Callable)
and not isinstance(markup_name, Tag))