summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-11-29 22:13:33 -0500
committerLeonard Richardson <leonardr@segfault.org>2021-11-29 22:13:33 -0500
commitad52722cc6b55ce414d395e9a0860cee57c0ab2d (patch)
tree8ff820b41d9ee5fb1f896629782270349cd8311b /bs4/__init__.py
parentc005e9ba28b4eec3a5fab173b928609bc692dd51 (diff)
Do a better job of keeping track of namespaces as an XML document is
parsed, so that CSS selectors that use namespaces will do the right thing more often. [bug=1946243]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py12
1 files changed, 7 insertions, 5 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 49e05e7..ddf1a86 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -309,8 +309,6 @@ class BeautifulSoup(Tag):
self._namespaces = dict()
self.parse_only = parse_only
- self.builder.initialize_soup(self)
-
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
elif len(markup) <= 256 and (
@@ -362,6 +360,7 @@ class BeautifulSoup(Tag):
self.builder.prepare_markup(
markup, from_encoding, exclude_encodings=exclude_encodings)):
self.reset()
+ self.builder.initialize_soup(self)
try:
self._feed()
success = True
@@ -400,7 +399,7 @@ class BeautifulSoup(Tag):
if 'builder' in d and not self.builder.picklable:
d['builder'] = None
return d
-
+
@classmethod
def _decode_markup(cls, markup):
"""Ensure `markup` is bytes so it's safe to send into warnings.warn.
@@ -693,7 +692,7 @@ class BeautifulSoup(Tag):
return most_recently_popped
def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
- sourcepos=None):
+ sourcepos=None, namespaces=None):
"""Called by the tree builder when a new tag is encountered.
:param name: Name of the tag.
@@ -703,6 +702,8 @@ class BeautifulSoup(Tag):
source document.
:param sourcepos: The character position within `sourceline` where this
tag was found.
+ :param namespaces: A dictionary of all namespace prefix mappings
+ currently in scope in the document.
If this method returns None, the tag was rejected by an active
SoupStrainer. You should proceed as if the tag had not occurred
@@ -720,7 +721,8 @@ class BeautifulSoup(Tag):
tag = self.element_classes.get(Tag, Tag)(
self, self.builder, name, namespace, nsprefix, attrs,
self.currentTag, self._most_recent_element,
- sourceline=sourceline, sourcepos=sourcepos
+ sourceline=sourceline, sourcepos=sourcepos,
+ namespaces=namespaces
)
if tag is None:
return tag