summaryrefslogtreecommitdiff
path: root/bs4/builder/_html5lib.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-10-24 21:15:31 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-10-24 21:15:31 -0400
commitc1a7aaae7140897b2e845be8c5aa077d6654ee0a (patch)
treedf6a58adc912d111e619094d7884d034a6649249 /bs4/builder/_html5lib.py
parentdd8aa7237b88569c99e85b300b0cf537aeaebfbd (diff)
Issue a warning when an HTML parser is used to parse a document that
looks like XML but not XHTML. [bug=1939121]
Diffstat (limited to 'bs4/builder/_html5lib.py')
-rw-r--r--bs4/builder/_html5lib.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 914b1df..58bc176 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -8,6 +8,7 @@ __all__ = [
import warnings
import re
from bs4.builder import (
+ DetectsXMLParsedAsHTML,
PERMISSIVE,
HTML,
HTML_5,
@@ -70,6 +71,11 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
# UnicodeDammit.
if exclude_encodings:
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
+
+ # html5lib only parses HTML, so if it's given XML that's worth
+ # noting.
+ DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup)
+
yield (markup, None, None, False)
# These methods are defined by Beautiful Soup.