diff options
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r-- | bs4/builder/_htmlparser.py | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index edd0bfb..c785eed 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -51,16 +51,18 @@ class HTMLParserTreeBuilder(HTMLParser, HTMLTreeBuilder): def prepare_markup(self, markup, user_specified_encoding=None, document_declared_encoding=None): """ - :return: A 3-tuple (markup, original encoding, encoding - declared within markup). + :return: A 4-tuple (markup, original encoding, encoding + declared within markup, whether any characters had to be + replaced with REPLACEMENT CHARACTER). """ if isinstance(markup, unicode): - return markup, None, None + return markup, None, None, False try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding) + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): super(HTMLParserTreeBuilder, self).feed(markup) |