diff options
Diffstat (limited to 'bs4/builder')
-rw-r--r-- | bs4/builder/__init__.py | 13 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 4 | ||||
-rw-r--r-- | bs4/builder/_lxml.py | 4 |
3 files changed, 15 insertions, 6 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 2e39745..ffb31fc 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -514,15 +514,19 @@ class DetectsXMLParsedAsHTML(object): XML_PREFIX_B = b'<?xml' @classmethod - def warn_if_markup_looks_like_xml(cls, markup): + def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3): """Perform a check on some markup to see if it looks like XML that's not XHTML. If so, issue a warning. This is much less reliable than doing the check while parsing, but some of the tree builders can't do that. + :param stacklevel: The stacklevel of the code calling this + function. + :return: True if the markup looks like non-XHTML XML, False otherwise. + """ if isinstance(markup, bytes): prefix = cls.XML_PREFIX_B @@ -535,15 +539,16 @@ class DetectsXMLParsedAsHTML(object): and markup.startswith(prefix) and not looks_like_html.search(markup[:500]) ): - cls._warn() + cls._warn(stacklevel=stacklevel+2) return True return False @classmethod - def _warn(cls): + def _warn(cls, stacklevel=5): """Issue a warning about XML being parsed as HTML.""" warnings.warn( - XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning + XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning, + stacklevel=stacklevel ) def _initialize_xml_detector(self): diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index dac2173..7c46a85 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -77,7 +77,9 @@ class HTML5TreeBuilder(HTMLTreeBuilder): # html5lib only parses HTML, so if it's given XML that's worth # noting. - DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) + DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml( + markup, stacklevel=3 + ) yield (markup, None, None, False) diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 971c81e..4f7cf74 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -179,7 +179,9 @@ class LXMLTreeBuilderForXML(TreeBuilder): self.processing_instruction_class = ProcessingInstruction # We're in HTML mode, so if we're given XML, that's worth # noting. - DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) + DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml( + markup, stacklevel=3 + ) else: self.processing_instruction_class = XMLProcessingInstruction |