diff options
author | Leonard Richardson <leonardr@segfault.org> | 2023-03-23 16:59:27 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2023-03-23 16:59:27 -0400 |
commit | a342497cb81f01384d61e467daf91540369d4fc3 (patch) | |
tree | d95f0bc768cf5a78b99b3ea4290cdd8369e1916b | |
parent | c91087b78b3584b1e696056bc2ad14e34ebd689e (diff) |
Found and removed accidental calls to find(), greatly improving performance.
-rw-r--r-- | bs4/__init__.py | 1 | ||||
-rw-r--r-- | bs4/element.py | 20 |
2 files changed, 11 insertions, 10 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index 01fca6d..5e1bebe 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -469,6 +469,7 @@ class BeautifulSoup(Tag): self.open_tag_counter = Counter() self.preserve_whitespace_tag_stack = [] self.string_container_stack = [] + self._most_recent_element = None self.pushTag(self) def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, diff --git a/bs4/element.py b/bs4/element.py index 7bbb7fc..4f1372a 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -1658,10 +1658,10 @@ class Tag(PageElement): string_literal_mode = False for event, element in self._event_stream(iterator): - if event in (self.START_ELEMENT_EVENT, self.EMPTY_ELEMENT_EVENT): + if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT): piece = element._format_tag( eventual_encoding, formatter, opening=True) - elif event is self.END_ELEMENT_EVENT: + elif event is Tag.END_ELEMENT_EVENT: piece = element._format_tag( eventual_encoding, formatter, opening=False) if indent_level is not None: @@ -1671,7 +1671,7 @@ class Tag(PageElement): piece = element.output_ready(formatter) if isinstance(element, Tag) and not element._should_pretty_print(): - if event is self.START_ELEMENT_EVENT: + if event is Tag.START_ELEMENT_EVENT: # After processing this event we will be in string # literal mode. string_literal_mode = True @@ -1696,7 +1696,7 @@ class Tag(PageElement): piece, indent_level, formatter, indent_before, indent_after ) - if event == self.START_ELEMENT_EVENT: + if event == Tag.START_ELEMENT_EVENT: indent_level += 1 pieces.append(piece) return "".join(pieces) @@ -1704,7 +1704,7 @@ class Tag(PageElement): # Names for the different events yielded by _event_stream START_ELEMENT_EVENT = object() END_ELEMENT_EVENT = object() - VOID_ELEMENT_EVENT = object() + EMPTY_ELEMENT_EVENT = object() STRING_ELEMENT_EVENT = object() def _event_stream(self, iterator=None): @@ -1733,21 +1733,21 @@ class Tag(PageElement): # the stack closed before this element appeared. while tag_stack and c.parent != tag_stack[-1]: now_closed_tag = tag_stack.pop() - yield self.END_ELEMENT_EVENT, now_closed_tag + yield Tag.END_ELEMENT_EVENT, now_closed_tag if isinstance(c, Tag): if c.is_empty_element: - yield self.EMPTY_ELEMENT_EVENT, c + yield Tag.EMPTY_ELEMENT_EVENT, c else: - yield self.START_ELEMENT_EVENT, c + yield Tag.START_ELEMENT_EVENT, c tag_stack.append(c) continue else: - yield self.STRING_ELEMENT_EVENT, c + yield Tag.STRING_ELEMENT_EVENT, c while tag_stack: now_closed_tag = tag_stack.pop() - yield self.END_ELEMENT_EVENT, now_closed_tag + yield Tag.END_ELEMENT_EVENT, now_closed_tag def _indent_string(self, s, indent_level, formatter, indent_before, indent_after): |