Issue a warning when an HTML parser is used to parse a document that

looks like XML but not XHTML. [bug=1939121]
author: Leonard Richardson <leonardr@segfault.org> 2021-10-24 21:15:31 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2021-10-24 21:15:31 -0400
commit: c1a7aaae7140897b2e845be8c5aa077d6654ee0a (patch)
tree: df6a58adc912d111e619094d7884d034a6649249 /bs4/builder/_html5lib.py
parent: dd8aa7237b88569c99e85b300b0cf537aeaebfbd (diff)
1 files changed, 6 insertions, 0 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 914b1df..58bc176 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -8,6 +8,7 @@ __all__ = [
 import warnings
 import re
 from bs4.builder import (
+    DetectsXMLParsedAsHTML,
     PERMISSIVE,
     HTML,
     HTML_5,
@@ -70,6 +71,11 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         # UnicodeDammit.
         if exclude_encodings:
             warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
+
+        # html5lib only parses HTML, so if it's given XML that's worth
+        # noting.
+        DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
+
         yield (markup, None, None, False)
 
     # These methods are defined by Beautiful Soup.
author	Leonard Richardson <leonardr@segfault.org>	2021-10-24 21:15:31 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2021-10-24 21:15:31 -0400
commit	c1a7aaae7140897b2e845be8c5aa077d6654ee0a (patch)
tree	df6a58adc912d111e619094d7884d034a6649249 /bs4/builder/_html5lib.py
parent	dd8aa7237b88569c99e85b300b0cf537aeaebfbd (diff)