summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2012-05-03 10:29:55 -0400
committerLeonard Richardson <leonardr@segfault.org>2012-05-03 10:29:55 -0400
commit0401057f29c9c8e6ee781aa9ca6fd1a395a4b084 (patch)
tree7efefe13bf03632255b59f6241bad4c660de3e4f /bs4/dammit.py
parent32a764727d7ae697945e70a942eab8899bc2f30d (diff)
Fixed the handling of &quot; with the built-in parser. [bug=993871]
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py14
1 files changed, 7 insertions, 7 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index a3301ee..66a9e9b 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -41,18 +41,18 @@ class EntitySubstitution(object):
def _populate_class_variables():
lookup = {}
reverse_lookup = {}
- characters = []
+ characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
- if codepoint == 34:
+ character = unichr(codepoint)
+ if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
# is handled elsewhere.
- continue
- character = unichr(codepoint)
- characters.append(character)
- lookup[character] = name
+ characters_for_re.append(character)
+ lookup[character] = name
+ # But we do want to turn &quot; into the quotation mark.
reverse_lookup[name] = character
- re_definition = "[%s]" % "".join(characters)
+ re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()