summaryrefslogtreecommitdiff
path: root/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-06-02 22:19:37 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-06-02 22:19:37 -0400
commit4a9444ac0b74fbf84cf86b9fcf6055c85e65f62a (patch)
tree570cbcb2c9ab9cf458edee87490afeffd8377560 /bs4/builder/_htmlparser.py
parent11dad27424b319a2034f59f5a7f48286551102d0 (diff)
parent4f9a654766df9ddd05e3ef274b4715b42668724f (diff)
Merged in big encoding-detection refactoring branch.
Diffstat (limited to 'bs4/builder/_htmlparser.py')
-rw-r--r--bs4/builder/_htmlparser.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 65ee618..4b80f79 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -135,13 +135,14 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
- return markup, None, None, False
+ yield (markup, None, None, False)
+ return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
- return (dammit.markup, dammit.original_encoding,
- dammit.declared_html_encoding,
- dammit.contains_replacement_characters)
+ yield (dammit.markup, dammit.original_encoding,
+ dammit.declared_html_encoding,
+ dammit.contains_replacement_characters)
def feed(self, markup):
args, kwargs = self.parser_args