diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 12:10:10 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-18 12:10:10 -0500 |
commit | 0dda99b15112df7225e647db9702fbd62dcc8ea8 (patch) | |
tree | 1127d44d52716738835c6ab2128fdb1561bc7cc2 /beautifulsoup/dammit.py | |
parent | 66cbef12d959149746b3361f227f2a0328a31469 (diff) | |
parent | 945b719a28c229178e710b749d2af4d00a81bdba (diff) |
Defer to html5lib's Unicode converter rather than using Unicode, Dammit. The lxml treebuilder still uses UD.
Diffstat (limited to 'beautifulsoup/dammit.py')
-rw-r--r-- | beautifulsoup/dammit.py | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py index 78bd4b2..954ca54 100644 --- a/beautifulsoup/dammit.py +++ b/beautifulsoup/dammit.py @@ -58,13 +58,12 @@ class UnicodeDammit: return u = None - for proposedEncoding in overrideEncodings: - u = self._convertFrom(proposedEncoding) - if u: break - if not u: - for proposedEncoding in (documentEncoding, sniffedEncoding): + for proposedEncoding in ( + overrideEncodings + [documentEncoding, sniffedEncoding]): + if proposedEncoding is not None: u = self._convertFrom(proposedEncoding) - if u: break + if u: + break # If no luck and we have auto-detection library, try that: if not u and chardet and not isinstance(self.markup, unicode): |