summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorThomas Kluyver <takowl@gmail.com>2011-06-29 01:21:16 +0100
committerThomas Kluyver <takowl@gmail.com>2011-06-29 01:21:16 +0100
commit7b3750d34c8e3f92a9f1b43557bb7b0986c1e0bc (patch)
treea89e43101a6591e6bf4c649ccb8a025afd9a62aa /bs4/dammit.py
parent828b557307123d667d073faf0cf35bc00f294dc5 (diff)
Various changes so most tests pass on Python 3.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py66
1 files changed, 33 insertions, 33 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index ed5dc29..8897063 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -224,7 +224,7 @@ class UnicodeDammit:
# that might have them.
if (self.smart_quotes_to is not None
and proposed.lower() in self.ENCODINGS_WITH_SMART_QUOTES):
- smart_quotes_re = "([\x80-\x9f])"
+ smart_quotes_re = b"([\x80-\x9f])"
smart_quotes_compiled = re.compile(smart_quotes_re)
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
@@ -378,35 +378,35 @@ class UnicodeDammit:
''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
return s.translate(c.EBCDIC_TO_ASCII_MAP)
- MS_CHARS = {'\x80': ('euro', '20AC'),
- '\x81': ' ',
- '\x82': ('sbquo', '201A'),
- '\x83': ('fnof', '192'),
- '\x84': ('bdquo', '201E'),
- '\x85': ('hellip', '2026'),
- '\x86': ('dagger', '2020'),
- '\x87': ('Dagger', '2021'),
- '\x88': ('circ', '2C6'),
- '\x89': ('permil', '2030'),
- '\x8A': ('Scaron', '160'),
- '\x8B': ('lsaquo', '2039'),
- '\x8C': ('OElig', '152'),
- '\x8D': '?',
- '\x8E': ('#x17D', '17D'),
- '\x8F': '?',
- '\x90': '?',
- '\x91': ('lsquo', '2018'),
- '\x92': ('rsquo', '2019'),
- '\x93': ('ldquo', '201C'),
- '\x94': ('rdquo', '201D'),
- '\x95': ('bull', '2022'),
- '\x96': ('ndash', '2013'),
- '\x97': ('mdash', '2014'),
- '\x98': ('tilde', '2DC'),
- '\x99': ('trade', '2122'),
- '\x9a': ('scaron', '161'),
- '\x9b': ('rsaquo', '203A'),
- '\x9c': ('oelig', '153'),
- '\x9d': '?',
- '\x9e': ('#x17E', '17E'),
- '\x9f': ('Yuml', ''),}
+ MS_CHARS = {b'\x80': ('euro', '20AC'),
+ b'\x81': ' ',
+ b'\x82': ('sbquo', '201A'),
+ b'\x83': ('fnof', '192'),
+ b'\x84': ('bdquo', '201E'),
+ b'\x85': ('hellip', '2026'),
+ b'\x86': ('dagger', '2020'),
+ b'\x87': ('Dagger', '2021'),
+ b'\x88': ('circ', '2C6'),
+ b'\x89': ('permil', '2030'),
+ b'\x8A': ('Scaron', '160'),
+ b'\x8B': ('lsaquo', '2039'),
+ b'\x8C': ('OElig', '152'),
+ b'\x8D': '?',
+ b'\x8E': ('#x17D', '17D'),
+ b'\x8F': '?',
+ b'\x90': '?',
+ b'\x91': ('lsquo', '2018'),
+ b'\x92': ('rsquo', '2019'),
+ b'\x93': ('ldquo', '201C'),
+ b'\x94': ('rdquo', '201D'),
+ b'\x95': ('bull', '2022'),
+ b'\x96': ('ndash', '2013'),
+ b'\x97': ('mdash', '2014'),
+ b'\x98': ('tilde', '2DC'),
+ b'\x99': ('trade', '2122'),
+ b'\x9a': ('scaron', '161'),
+ b'\x9b': ('rsaquo', '203A'),
+ b'\x9c': ('oelig', '153'),
+ b'\x9d': '?',
+ b'\x9e': ('#x17E', '17E'),
+ b'\x9f': ('Yuml', ''),}