diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-09 09:35:46 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-09 09:35:46 -0500 |
commit | 3c9e9c3f780d59f2ef7927fe8246ab78fe4f6572 (patch) | |
tree | f2c35a22d638d7a2886139b8c8b60eca0575949d /bs4/dammit.py | |
parent | c199d176f1ebb4289428e1ba8a939b2cd1b55218 (diff) |
Improved Unicode, Dammit's behavior when you give it Unicode to begin with.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index 8897063..2b681e8 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -167,15 +167,17 @@ class UnicodeDammit: def __init__(self, markup, override_encodings=[], smart_quotes_to=None, isHTML=False): self.declared_html_encoding = None - self.markup, document_encoding, sniffed_encoding = \ - self._detectEncoding(markup, isHTML) self.smart_quotes_to = smart_quotes_to self.tried_encodings = [] + if markup == '' or isinstance(markup, unicode): self.original_encoding = None self.unicode_markup = unicode(markup) return + self.markup, document_encoding, sniffed_encoding = \ + self._detectEncoding(markup, isHTML) + u = None for proposed_encoding in ( override_encodings + [document_encoding, sniffed_encoding]): |