diff options
author | Leonard Richardson <leonardr@segfault.org> | 2021-11-29 22:13:33 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2021-11-29 22:13:33 -0500 |
commit | ad52722cc6b55ce414d395e9a0860cee57c0ab2d (patch) | |
tree | 8ff820b41d9ee5fb1f896629782270349cd8311b /bs4/__init__.py | |
parent | c005e9ba28b4eec3a5fab173b928609bc692dd51 (diff) |
Do a better job of keeping track of namespaces as an XML document is
parsed, so that CSS selectors that use namespaces will do the right
thing more often. [bug=1946243]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r-- | bs4/__init__.py | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index 49e05e7..ddf1a86 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -309,8 +309,6 @@ class BeautifulSoup(Tag): self._namespaces = dict() self.parse_only = parse_only - self.builder.initialize_soup(self) - if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() elif len(markup) <= 256 and ( @@ -362,6 +360,7 @@ class BeautifulSoup(Tag): self.builder.prepare_markup( markup, from_encoding, exclude_encodings=exclude_encodings)): self.reset() + self.builder.initialize_soup(self) try: self._feed() success = True @@ -400,7 +399,7 @@ class BeautifulSoup(Tag): if 'builder' in d and not self.builder.picklable: d['builder'] = None return d - + @classmethod def _decode_markup(cls, markup): """Ensure `markup` is bytes so it's safe to send into warnings.warn. @@ -693,7 +692,7 @@ class BeautifulSoup(Tag): return most_recently_popped def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None, - sourcepos=None): + sourcepos=None, namespaces=None): """Called by the tree builder when a new tag is encountered. :param name: Name of the tag. @@ -703,6 +702,8 @@ class BeautifulSoup(Tag): source document. :param sourcepos: The character position within `sourceline` where this tag was found. + :param namespaces: A dictionary of all namespace prefix mappings + currently in scope in the document. If this method returns None, the tag was rejected by an active SoupStrainer. You should proceed as if the tag had not occurred @@ -720,7 +721,8 @@ class BeautifulSoup(Tag): tag = self.element_classes.get(Tag, Tag)( self, self.builder, name, namespace, nsprefix, attrs, self.currentTag, self._most_recent_element, - sourceline=sourceline, sourcepos=sourcepos + sourceline=sourceline, sourcepos=sourcepos, + namespaces=namespaces ) if tag is None: return tag |