From c1a7aaae7140897b2e845be8c5aa077d6654ee0a Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 24 Oct 2021 21:15:31 -0400 Subject: Issue a warning when an HTML parser is used to parse a document that looks like XML but not XHTML. [bug=1939121] --- bs4/builder/_html5lib.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'bs4/builder/_html5lib.py') diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 914b1df..58bc176 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -8,6 +8,7 @@ __all__ = [ import warnings import re from bs4.builder import ( + DetectsXMLParsedAsHTML, PERMISSIVE, HTML, HTML_5, @@ -70,6 +71,11 @@ class HTML5TreeBuilder(HTMLTreeBuilder): # UnicodeDammit. if exclude_encodings: warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") + + # html5lib only parses HTML, so if it's given XML that's worth + # noting. + DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) + yield (markup, None, None, False) # These methods are defined by Beautiful Soup. -- cgit v1.2.3