summaryrefslogtreecommitdiff
path: root/bs4/builder/_html5lib.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-06-27 09:55:40 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-06-27 09:55:40 -0400
commitfeffc5a1146e2520c90682bc2c33f5fa7d3943f0 (patch)
tree6dce892919c201b629628647f86843382b29a60a /bs4/builder/_html5lib.py
parentd728b9cbd6cd5954acf7c9c32fe2f1878809d6e8 (diff)
Added an exclude_encodings argument to UnicodeDammit and to the
Beautiful Soup constructor, which lets you prohibit the detection of an encoding that you know is wrong. [bug=1469408]
Diffstat (limited to 'bs4/builder/_html5lib.py')
-rw-r--r--bs4/builder/_html5lib.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 0778dde..7788063 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -29,9 +29,16 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
features = [NAME, PERMISSIVE, HTML_5, HTML]
- def prepare_markup(self, markup, user_specified_encoding):
+ def prepare_markup(self, markup, user_specified_encoding,
+ document_declared_encoding=None, exclude_encodings=None):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
+
+ # document_declared_encoding and exclude_encodings aren't used
+ # ATM because the html5lib TreeBuilder doesn't use
+ # UnicodeDammit.
+ if exclude_encodings:
+ warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
yield (markup, None, None, False)
# These methods are defined by Beautiful Soup.