summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-22 22:45:36 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-22 22:45:36 -0500
commit1c32da9882d056fee65d8c2aa44ecacfc21eb758 (patch)
tree1a0e636e6f5e40da75a6fca2613e773578c45112
parentf42fef27dc82ce97df0cb7b254595e6771461637 (diff)
Don't turn " into &quot; except in attribute values.
-rw-r--r--beautifulsoup/dammit.py5
-rw-r--r--tests/test_soup.py4
2 files changed, 9 insertions, 0 deletions
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py
index 67bec17..c2ba1fc 100644
--- a/beautifulsoup/dammit.py
+++ b/beautifulsoup/dammit.py
@@ -35,6 +35,11 @@ class EntitySubstitution(object):
lookup = {}
characters = []
for codepoint, name in codepoint2name.items():
+ if codepoint == 34:
+ # There's no point in turning the quotation mark into
+ # &quot; except in attribute values, which are handled
+ # separately.
+ continue;
character = unichr(codepoint)
characters.append(character)
lookup[character] = name
diff --git a/tests/test_soup.py b/tests/test_soup.py
index eaedd94..5df49bc 100644
--- a/tests/test_soup.py
+++ b/tests/test_soup.py
@@ -80,6 +80,10 @@ class TestEntitySubstitution(unittest.TestCase):
self.sub.substitute_xml("&Aacute;T&T"),
"&Aacute;T&amp;T")
+ def test_quotes_not_html_substituted(self):
+ """There's no need to do this except inside attribute values."""
+ text = 'Bob\'s "bar"'
+ self.assertEquals(self.sub.substitute_html(text), text)
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of Unicode, Dammit."""