summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-05-21 18:10:15 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2011-05-21 18:10:15 -0400
commitadbc7ecfea5b3e3349cdb4b4eac702d1e2b42e63 (patch)
treee8a7024d0e4e4319a9f9148cbd43d986028282de /bs4/dammit.py
parentb4b3a1c6f001d99275e1682aa30ea5dc21017bea (diff)
parentc29185a017c93d1aa19dea4606bde2a064f23639 (diff)
OK, figured that out.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py13
1 files changed, 6 insertions, 7 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index f3e770e..ed5dc29 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,7 +9,6 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
-import types
# Autodetects character encodings. Very useful.
# Download from http://chardet.feedparser.org/
@@ -37,7 +36,7 @@ class EntitySubstitution(object):
lookup = {}
reverse_lookup = {}
characters = []
- for codepoint, name in codepoint2name.items():
+ for codepoint, name in list(codepoint2name.items()):
if codepoint == 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
@@ -174,7 +173,7 @@ class UnicodeDammit:
self.tried_encodings = []
if markup == '' or isinstance(markup, unicode):
self.original_encoding = None
- self.unicode = unicode(markup)
+ self.unicode_markup = unicode(markup)
return
u = None
@@ -196,7 +195,7 @@ class UnicodeDammit:
if u:
break
- self.unicode = u
+ self.unicode_markup = u
if not u:
self.original_encoding = None
@@ -205,7 +204,7 @@ class UnicodeDammit:
entity."""
orig = match.group(1)
sub = self.MS_CHARS.get(orig)
- if type(sub) == types.TupleType:
+ if type(sub) == tuple:
if self.smart_quotes_to == 'xml':
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
else:
@@ -234,7 +233,7 @@ class UnicodeDammit:
u = self._to_unicode(markup, proposed)
self.markup = u
self.original_encoding = proposed
- except Exception, e:
+ except Exception as e:
# print "That didn't work!"
# print e
return None
@@ -376,7 +375,7 @@ class UnicodeDammit:
250,251,252,253,254,255)
import string
c.EBCDIC_TO_ASCII_MAP = string.maketrans(
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
return s.translate(c.EBCDIC_TO_ASCII_MAP)
MS_CHARS = {'\x80': ('euro', '20AC'),