diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-05-21 18:10:15 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-05-21 18:10:15 -0400 |
commit | adbc7ecfea5b3e3349cdb4b4eac702d1e2b42e63 (patch) | |
tree | e8a7024d0e4e4319a9f9148cbd43d986028282de /bs4/dammit.py | |
parent | b4b3a1c6f001d99275e1682aa30ea5dc21017bea (diff) | |
parent | c29185a017c93d1aa19dea4606bde2a064f23639 (diff) |
OK, figured that out.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index f3e770e..ed5dc29 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -9,7 +9,6 @@ encoding; that's the tree builder's job. import codecs from htmlentitydefs import codepoint2name import re -import types # Autodetects character encodings. Very useful. # Download from http://chardet.feedparser.org/ @@ -37,7 +36,7 @@ class EntitySubstitution(object): lookup = {} reverse_lookup = {} characters = [] - for codepoint, name in codepoint2name.items(): + for codepoint, name in list(codepoint2name.items()): if codepoint == 34: # There's no point in turning the quotation mark into # ", unless it happens within an attribute value, which @@ -174,7 +173,7 @@ class UnicodeDammit: self.tried_encodings = [] if markup == '' or isinstance(markup, unicode): self.original_encoding = None - self.unicode = unicode(markup) + self.unicode_markup = unicode(markup) return u = None @@ -196,7 +195,7 @@ class UnicodeDammit: if u: break - self.unicode = u + self.unicode_markup = u if not u: self.original_encoding = None @@ -205,7 +204,7 @@ class UnicodeDammit: entity.""" orig = match.group(1) sub = self.MS_CHARS.get(orig) - if type(sub) == types.TupleType: + if type(sub) == tuple: if self.smart_quotes_to == 'xml': sub = '&#x'.encode() + sub[1].encode() + ';'.encode() else: @@ -234,7 +233,7 @@ class UnicodeDammit: u = self._to_unicode(markup, proposed) self.markup = u self.original_encoding = proposed - except Exception, e: + except Exception as e: # print "That didn't work!" # print e return None @@ -376,7 +375,7 @@ class UnicodeDammit: 250,251,252,253,254,255) import string c.EBCDIC_TO_ASCII_MAP = string.maketrans( - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap))) return s.translate(c.EBCDIC_TO_ASCII_MAP) MS_CHARS = {'\x80': ('euro', '20AC'), |