diff options
author | Leonard Richardson <leonardr@segfault.org> | 2021-10-24 21:15:31 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2021-10-24 21:15:31 -0400 |
commit | c1a7aaae7140897b2e845be8c5aa077d6654ee0a (patch) | |
tree | df6a58adc912d111e619094d7884d034a6649249 /bs4/builder/_html5lib.py | |
parent | dd8aa7237b88569c99e85b300b0cf537aeaebfbd (diff) |
Issue a warning when an HTML parser is used to parse a document that
looks like XML but not XHTML. [bug=1939121]
Diffstat (limited to 'bs4/builder/_html5lib.py')
-rw-r--r-- | bs4/builder/_html5lib.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 914b1df..58bc176 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -8,6 +8,7 @@ __all__ = [ import warnings import re from bs4.builder import ( + DetectsXMLParsedAsHTML, PERMISSIVE, HTML, HTML_5, @@ -70,6 +71,11 @@ class HTML5TreeBuilder(HTMLTreeBuilder): # UnicodeDammit. if exclude_encodings: warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") + + # html5lib only parses HTML, so if it's given XML that's worth + # noting. + DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup) + yield (markup, None, None, False) # These methods are defined by Beautiful Soup. |