diff options
author | Leonard Richardson <leonardr@segfault.org> | 2012-05-03 10:29:55 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2012-05-03 10:29:55 -0400 |
commit | 0401057f29c9c8e6ee781aa9ca6fd1a395a4b084 (patch) | |
tree | 7efefe13bf03632255b59f6241bad4c660de3e4f /bs4/dammit.py | |
parent | 32a764727d7ae697945e70a942eab8899bc2f30d (diff) |
Fixed the handling of " with the built-in parser. [bug=993871]
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index a3301ee..66a9e9b 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -41,18 +41,18 @@ class EntitySubstitution(object): def _populate_class_variables(): lookup = {} reverse_lookup = {} - characters = [] + characters_for_re = [] for codepoint, name in list(codepoint2name.items()): - if codepoint == 34: + character = unichr(codepoint) + if codepoint != 34: # There's no point in turning the quotation mark into # ", unless it happens within an attribute value, which # is handled elsewhere. - continue - character = unichr(codepoint) - characters.append(character) - lookup[character] = name + characters_for_re.append(character) + lookup[character] = name + # But we do want to turn " into the quotation mark. reverse_lookup[name] = character - re_definition = "[%s]" % "".join(characters) + re_definition = "[%s]" % "".join(characters_for_re) return lookup, reverse_lookup, re.compile(re_definition) (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() |