diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-06-27 09:55:40 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-06-27 09:55:40 -0400 |
commit | feffc5a1146e2520c90682bc2c33f5fa7d3943f0 (patch) | |
tree | 6dce892919c201b629628647f86843382b29a60a /bs4/builder/_html5lib.py | |
parent | d728b9cbd6cd5954acf7c9c32fe2f1878809d6e8 (diff) |
Added an exclude_encodings argument to UnicodeDammit and to the
Beautiful Soup constructor, which lets you prohibit the detection of
an encoding that you know is wrong. [bug=1469408]
Diffstat (limited to 'bs4/builder/_html5lib.py')
-rw-r--r-- | bs4/builder/_html5lib.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 0778dde..7788063 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -29,9 +29,16 @@ class HTML5TreeBuilder(HTMLTreeBuilder): features = [NAME, PERMISSIVE, HTML_5, HTML] - def prepare_markup(self, markup, user_specified_encoding): + def prepare_markup(self, markup, user_specified_encoding, + document_declared_encoding=None, exclude_encodings=None): # Store the user-specified encoding for use later on. self.user_specified_encoding = user_specified_encoding + + # document_declared_encoding and exclude_encodings aren't used + # ATM because the html5lib TreeBuilder doesn't use + # UnicodeDammit. + if exclude_encodings: + warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") yield (markup, None, None, False) # These methods are defined by Beautiful Soup. |