diff options
-rw-r--r-- | CHANGELOG | 5 | ||||
-rw-r--r-- | bs4/builder/__init__.py | 13 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 4 | ||||
-rw-r--r-- | bs4/builder/_lxml.py | 4 |
4 files changed, 19 insertions, 7 deletions
@@ -7,12 +7,15 @@ * Fixed a regression such that if you set .hidden on a tag, the tag becomes invisible but its contents are still visible. User manipulation of .hidden is not a documented or supported feature, so don't do this, - but it's not too difficult to keep the old behavior working. + but it wasn't too difficult to keep the old behavior working. * Fixed a case found by Mengyuhan where html.parser giving up on markup would result in an AssertionError instead of a ParserRejectedMarkup exception. +* Added the correct stacklevel to instances of the XMLParsedAsHTMLWarning. + [bug=2034451] + * Corrected the syntax of the license definition in pyproject.toml. Patch by Louis Maddox. [bug=2032848] diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 2e39745..ffb31fc 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object): XML_PREFIX_B = b'<?xml' @classmethod - def warn_if_markup_looks_like_xml(cls, markup): + def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3): """Perform a check on some markup to see if it looks like XML that's not XHTML. If so, issue a warning. This is much less reliable than doing the check while parsing, but some of the tree builders can't do that. + :param stacklevel: The stacklevel of the code calling this + function. + :return: True if the markup looks like non-XHTML XML, False otherwise. + """ if isinstance(markup, bytes): prefix = cls.XML_PREFIX_B @@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object): and markup.startswith(prefix) and not looks_like_html.search(markup[:500]) ): - cls._warn() + cls._warn(stacklevel=stacklevel+2) return True return False @classmethod - def _warn(cls): + def _warn(cls, stacklevel=5): """Issue a warning about XML being parsed as HTML.""" warnings.warn( - XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning + XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning, + stacklevel=stacklevel ) def _initialize_xml_detector(self): diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index dac2173..7c46a85 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder): # html5lib only parses HTML, so if it's given XML that's worth # noting. - DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) + DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml( + markup, stacklevel=3 + ) yield (markup, None, None, False) diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 971c81e..4f7cf74 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder): self.processing_instruction_class = ProcessingInstruction # We're in HTML mode, so if we're given XML, that's worth # noting. - DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) + DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml( + markup, stacklevel=3 + ) else: self.processing_instruction_class = XMLProcessingInstruction |