From feffc5a1146e2520c90682bc2c33f5fa7d3943f0 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 27 Jun 2015 09:55:40 -0400 Subject: Added an exclude_encodings argument to UnicodeDammit and to the Beautiful Soup constructor, which lets you prohibit the detection of an encoding that you know is wrong. [bug=1469408] --- bs4/builder/_html5lib.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'bs4/builder/_html5lib.py') diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 0778dde..7788063 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -29,9 +29,16 @@ class HTML5TreeBuilder(HTMLTreeBuilder): features = [NAME, PERMISSIVE, HTML_5, HTML] - def prepare_markup(self, markup, user_specified_encoding): + def prepare_markup(self, markup, user_specified_encoding, + document_declared_encoding=None, exclude_encodings=None): # Store the user-specified encoding for use later on. self.user_specified_encoding = user_specified_encoding + + # document_declared_encoding and exclude_encodings aren't used + # ATM because the html5lib TreeBuilder doesn't use + # UnicodeDammit. + if exclude_encodings: + warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") yield (markup, None, None, False) # These methods are defined by Beautiful Soup. -- cgit v1.2.3