4 files changed, 170 insertions, 16 deletions
diff --git a/NEWS.txt b/NEWS.txt
index ddceae3..2b8fac7 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -17,6 +17,9 @@
 * Give a more useful error when the user tries to run the Python 2
   version of BS under Python 3.
 
+* UnicodeDammit can now convert Microsoft smart quotes to ASCII with
+  UnicodeDammit(markup, smart_quotes_to="ascii").
+
 = 4.0.3 (20120403) =
 
 * Fixed a typo that caused some versions of Python 3 to convert the
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 65fd43d..824c4c0 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """Beautiful Soup bonus library: Unicode, Dammit
 
 This class forces XML data into a standard format (usually to UTF-8 or
@@ -232,16 +233,19 @@ class UnicodeDammit:
 
     def _sub_ms_char(self, match):
         """Changes a MS smart quote character to an XML or HTML
-        entity."""
+        entity, or an ASCII character."""
         orig = match.group(1)
-        sub = self.MS_CHARS.get(orig)
-        if type(sub) == tuple:
-            if self.smart_quotes_to == 'xml':
-                sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
-            else:
-                sub = '&'.encode() + sub[0].encode() + ';'.encode()
+        if self.smart_quotes_to == 'ascii':
+            sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
         else:
-            sub = sub.encode()
+            sub = self.MS_CHARS.get(orig)
+            if type(sub) == tuple:
+                if self.smart_quotes_to == 'xml':
+                    sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
+                else:
+                    sub = '&'.encode() + sub[0].encode() + ';'.encode()
+            else:
+                sub = sub.encode()
         return sub
 
     def _convert_from(self, proposed, errors="strict"):
@@ -407,6 +411,7 @@ class UnicodeDammit:
             ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
         return s.translate(c.EBCDIC_TO_ASCII_MAP)
 
+    # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
     MS_CHARS = {b'\x80': ('euro', '20AC'),
                 b'\x81': ' ',
                 b'\x82': ('sbquo', '201A'),
@@ -439,3 +444,138 @@ class UnicodeDammit:
                 b'\x9d': '?',
                 b'\x9e': ('#x17E', '17E'),
                 b'\x9f': ('Yuml', ''),}
+
+    # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
+    # horrors like stripping diacritical marks to turn á into a, but also
+    # contains non-horrors like turning “ into ".
+    MS_CHARS_TO_ASCII = {
+        b'\x80' : 'EUR',
+        b'\x81' : ' ',
+        b'\x82' : ',',
+        b'\x83' : 'f',
+        b'\x84' : ',,',
+        b'\x85' : '...',
+        b'\x86' : '+',
+        b'\x87' : '++',
+        b'\x88' : '^',
+        b'\x89' : '%',
+        b'\x8a' : 'S',
+        b'\x8b' : '<',
+        b'\x8c' : 'OE',
+        b'\x8d' : '?',
+        b'\x8e' : 'Z',
+        b'\x8f' : '?',
+        b'\x90' : '?',
+        b'\x91' : "'",
+        b'\x92' : "'",
+        b'\x93' : '"',
+        b'\x94' : '"',
+        b'\x95' : '*',
+        b'\x96' : '-',
+        b'\x97' : '--',
+        b'\x98' : '~',
+        b'\x99' : '(TM)',
+        b'\x9a' : 's',
+        b'\x9b' : '>',
+        b'\x9c' : 'oe',
+        b'\x9d' : '?',
+        b'\x9e' : 'z',
+        b'\x9f' : 'Y',
+        b'\xa0' : ' ',
+        b'\xa1' : '!',
+        b'\xa2' : 'c',
+        b'\xa3' : 'GBP',
+        b'\xa4' : '$', #This approximation is especially parochial--this is the
+                       #generic currency symbol.
+        b'\xa5' : 'YEN',
+        b'\xa6' : '|',
+        b'\xa7' : 'S',
+        b'\xa8' : '..',
+        b'\xa9' : '',
+        b'\xaa' : '(th)',
+        b'\xab' : '<<',
+        b'\xac' : '!',
+        b'\xad' : ' ',
+        b'\xae' : '(R)',
+        b'\xaf' : '-',
+        b'\xb0' : 'o',
+        b'\xb1' : '+-',
+        b'\xb2' : '2',
+        b'\xb3' : '3',
+        b'\xb4' : ("'", 'acute'),
+        b'\xb5' : 'u',
+        b'\xb6' : 'P',
+        b'\xb7' : '*',
+        b'\xb8' : ',',
+        b'\xb9' : '1',
+        b'\xba' : '(th)',
+        b'\xbb' : '>>',
+        b'\xbc' : '1/4',
+        b'\xbd' : '1/2',
+        b'\xbe' : '3/4',
+        b'\xbf' : '?',
+        b'\xc0' : 'A',
+        b'\xc1' : 'A',
+        b'\xc2' : 'A',
+        b'\xc3' : 'A',
+        b'\xc4' : 'A',
+        b'\xc5' : 'A',
+        b'\xc6' : 'AE',
+        b'\xc7' : 'C',
+        b'\xc8' : 'E',
+        b'\xc9' : 'E',
+        b'\xca' : 'E',
+        b'\xcb' : 'E',
+        b'\xcc' : 'I',
+        b'\xcd' : 'I',
+        b'\xce' : 'I',
+        b'\xcf' : 'I',
+        b'\xd0' : 'D',
+        b'\xd1' : 'N',
+        b'\xd2' : 'O',
+        b'\xd3' : 'O',
+        b'\xd4' : 'O',
+        b'\xd5' : 'O',
+        b'\xd6' : 'O',
+        b'\xd7' : '*',
+        b'\xd8' : 'O',
+        b'\xd9' : 'U',
+        b'\xda' : 'U',
+        b'\xdb' : 'U',
+        b'\xdc' : 'U',
+        b'\xdd' : 'Y',
+        b'\xde' : 'b',
+        b'\xdf' : 'B',
+        b'\xe0' : 'a',
+        b'\xe1' : 'a',
+        b'\xe2' : 'a',
+        b'\xe3' : 'a',
+        b'\xe4' : 'a',
+        b'\xe5' : 'a',
+        b'\xe6' : 'ae',
+        b'\xe7' : 'c',
+        b'\xe8' : 'e',
+        b'\xe9' : 'e',
+        b'\xea' : 'e',
+        b'\xeb' : 'e',
+        b'\xec' : 'i',
+        b'\xed' : 'i',
+        b'\xee' : 'i',
+        b'\xef' : 'i',
+        b'\xf0' : 'o',
+        b'\xf1' : 'n',
+        b'\xf2' : 'o',
+        b'\xf3' : 'o',
+        b'\xf4' : 'o',
+        b'\xf5' : 'o',
+        b'\xf6' : 'o',
+        b'\xf7' : '/',
+        b'\xf8' : 'o',
+        b'\xf9' : 'u',
+        b'\xfa' : 'u',
+        b'\xfb' : 'u',
+        b'\xfc' : 'u',
+        b'\xfd' : 'y',
+        b'\xfe' : 'b',
+        b'\xff' : 'y',
+        }
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index d826b25..ddbffd4 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -191,6 +191,12 @@ class TestUnicodeDammit(unittest.TestCase):
         self.assertEqual(
             dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
 
+    def test_smart_quotes_to_ascii(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
+        self.assertEqual(
+            dammit.unicode_markup, """<foo>''""</foo>""")
+
     def test_detect_utf8(self):
         utf8 = b"\xc3\xa9"
         dammit = UnicodeDammit(utf8)
diff --git a/doc/source/index.rst b/doc/source/index.rst
index d4dabb1..a7757d6 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -2391,21 +2391,26 @@ Unicode, Dammit has one special feature that Beautiful Soup doesn't
 use. You can use it to convert Microsoft smart quotes to HTML or XML
 entities::
 
- markup = b"<p>I just \x93love\x94 Microsoft Word</p>"
+ markup = b"<p>I just \x93love\x94 Microsoft Word\x92s smart quotes</p>"
 
  UnicodeDammit(markup, ["windows-1252"], smart_quotes_to="html").unicode_markup
- # u'<p>I just &ldquo;love&rdquo; Microsoft Word</p>'
+ # u'<p>I just &ldquo;love&rdquo; Microsoft Word&rsquo;s smart quotes</p>'
 
  UnicodeDammit(markup, ["windows-1252"], smart_quotes_to="xml").unicode_markup
- # u'<p>I just &#x201C;love&#x201D; Microsoft Word</p>'
+ # u'<p>I just &#x201C;love&#x201D; Microsoft Word&#x2019;s smart quotes</p>'
 
-You might find this feature useful, but Beautiful Soup doesn't use
-it. Beautiful Soup prefers the default behavior, which is to convert
-Microsoft smart quotes to Unicode characters along with everything
-else::
+You can also convert Microsoft smart quotes to ASCII quotes::
+
+ UnicodeDammit(markup, ["windows-1252"], smart_quotes_to="ascii").unicode_markup
+ # u'<p>I just "love" Microsoft Word\'s smart quotes</p>'
+
+Hopefully you'll find this feature useful, but Beautiful Soup doesn't
+use it. Beautiful Soup prefers the default behavior, which is to
+convert Microsoft smart quotes to Unicode characters along with
+everything else::
 
  UnicodeDammit(markup, ["windows-1252"]).unicode_markup
- # u'<p>I just \u201clove\u201d Microsoft Word</p>'
+ # u'<p>I just \u201clove\u201d Microsoft Word\u2019s smart quotes</p>'
 
 Parsing only part of a document
 ===============================