diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-22 22:57:39 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-22 22:57:39 -0500 |
commit | 7db58f67d60bf2675613e38d2d8daeeadc5522db (patch) | |
tree | 6cacead21b43bb5de23947a414c5d949c0d6c434 /beautifulsoup/dammit.py | |
parent | f42fef27dc82ce97df0cb7b254595e6771461637 (diff) | |
parent | 6d7422139b7a60f48761f9a8ef52ed3de7393287 (diff) |
Solved the question of how to decide between ' (XML) and &squot; (HTML) by cutting the Gordian knot: quote the *double* quotes, which are always ".
Diffstat (limited to 'beautifulsoup/dammit.py')
-rw-r--r-- | beautifulsoup/dammit.py | 48 |
1 files changed, 23 insertions, 25 deletions
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py index 67bec17..788f72d 100644 --- a/beautifulsoup/dammit.py +++ b/beautifulsoup/dammit.py @@ -35,6 +35,10 @@ class EntitySubstitution(object): lookup = {} characters = [] for codepoint, name in codepoint2name.items(): + if codepoint == 34: + # There's no point in turning the quotation mark into + # "--even in attribute values we quote the + continue; character = unichr(codepoint) characters.append(character) lookup[character] = name @@ -66,8 +70,7 @@ class EntitySubstitution(object): entity = self.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] return "&%s;" % entity - def substitute_xml(self, value, make_quoted_attribute=False, - destination_is_xml=False): + def substitute_xml(self, value, make_quoted_attribute=False): """Substitute XML entities for special XML characters. :param value: A string to be substituted. The less-than sign will @@ -88,33 +91,28 @@ class EntitySubstitution(object): Welcome to "my bar" -> 'Welcome to "my bar"' If the string contains both single and double quotes, the - single quotes will be escaped (see `destination_is_xml`), and - the string will be quoted using single quotes. - - Welcome to "Bob's Bar" -> 'Welcome to "Bob&squot;s bar' - OR - 'Welcome to "Bob's bar' - (depending on the value of `destination_is_xml`) - - :param destination_is_xml: If destination_is_xml is True, - then when a single quote is escaped it will become - "'". But ' is not a valid HTML 4 entity. If - destination_is_xml is False, then single quotes will be - turned into "&squot;". - - The value of this argument is irrelevant unless - make_quoted_attribute is True. + double quotes will be escaped, and the string will be quoted + using double quotes. + + Welcome to "Bob's Bar" -> "Welcome to "Bob's bar" """ - quote_with = '"' if make_quoted_attribute: + quote_with = '"' if '"' in value: - quote_with = "'" if "'" in value: - if destination_is_xml: - replace_with = "'" - else: - replace_with = "&squot;" - value = value.replace("'", replace_with) + # The string contains both single and double + # quotes. Turn the double quotes into + # entities. We quote the double quotes rather than + # the single quotes because the entity name is + # """ whether this is HTML or XML. If we + # quoted the single quotes, we'd have to decide + # between ' and &squot;. + replace_with = """ + value = value.replace('"', replace_with) + else: + # There are double quotes but no single quotes. + # We can use single quotes to quote the attribute. + quote_with = "'" # Escape angle brackets, and ampersands that aren't part of # entities. |