From feffc5a1146e2520c90682bc2c33f5fa7d3943f0 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 27 Jun 2015 09:55:40 -0400 Subject: Added an exclude_encodings argument to UnicodeDammit and to the Beautiful Soup constructor, which lets you prohibit the detection of an encoding that you know is wrong. [bug=1469408] --- bs4/builder/_htmlparser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'bs4/builder/_htmlparser.py') diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index b2cd467..25811f1 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -138,7 +138,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): self.parser_args = (args, kwargs) def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None): + document_declared_encoding=None, exclude_encodings=None): """ :return: A 4-tuple (markup, original encoding, encoding declared within markup, whether any characters had to be @@ -149,7 +149,8 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): return try_encodings = [user_specified_encoding, document_declared_encoding] - dammit = UnicodeDammit(markup, try_encodings, is_html=True) + dammit = UnicodeDammit(markup, try_encodings, is_html=True, + exclude_encodings=exclude_encodings) yield (dammit.markup, dammit.original_encoding, dammit.declared_html_encoding, dammit.contains_replacement_characters) -- cgit v1.2.3