diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2013-06-02 22:19:37 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2013-06-02 22:19:37 -0400 |
commit | 4a9444ac0b74fbf84cf86b9fcf6055c85e65f62a (patch) | |
tree | 570cbcb2c9ab9cf458edee87490afeffd8377560 /bs4/builder/_htmlparser.py | |
parent | 11dad27424b319a2034f59f5a7f48286551102d0 (diff) | |
parent | 4f9a654766df9ddd05e3ef274b4715b42668724f (diff) |
Merged in big encoding-detection refactoring branch.
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r-- | bs4/builder/_htmlparser.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index 65ee618..4b80f79 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -135,13 +135,14 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): replaced with REPLACEMENT CHARACTER). """ if isinstance(markup, unicode): - return markup, None, None, False + yield (markup, None, None, False) + return try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) - return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) + yield (dammit.markup, dammit.original_encoding, + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): args, kwargs = self.parser_args |