4 files changed, 45 insertions, 15 deletions
diff --git a/NEWS.txt b/NEWS.txt
index cd28490..5dbd044 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,7 +1,14 @@
+= 4.0.0b7 () =
+
+* Issue a warning if characters were replaced with REPLACEMENT
+  CHARACTER during Unicode conversion.
+
 = 4.0.0b6 (20110216) =
 
-* The value of multi-valued attributes like "class" are always turned
-  into a list, even if there's only one value.
+* Multi-valued attributes like "class" always have a list of values,
+  even if there's only one value in the list.
+
+* Added a number of multi-valued attributes defined in HTML5.
 
 * Stopped generating a space before the slash that closes an
   empty-element tag. This may come back if I add a special XHTML mode
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 76ac9ce..a35c213 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,6 +9,7 @@ encoding; that's the tree builder's job.
 import codecs
 from htmlentitydefs import codepoint2name
 import re
+import warnings
 
 # Autodetects character encodings. Very useful.
 # Download from http://chardet.feedparser.org/
@@ -212,6 +213,10 @@ class UnicodeDammit:
                 if proposed_encoding != "ascii":
                     u = self._convert_from(proposed_encoding, "replace")
                 if u is not None:
+                    warnings.warn(
+                        UnicodeWarning(
+                            "Some characters could not be decoded, and were "
+                            "replaced with REPLACEMENT CHARACTER."))
                     self.contains_replacement_characters = True
                     break
 
diff --git a/bs4/doc/source/index.rst b/bs4/doc/source/index.rst
index 8328ed7..200317a 100644
--- a/bs4/doc/source/index.rst
+++ b/bs4/doc/source/index.rst
@@ -303,19 +303,24 @@ done by treating the tag as a dictionary::
 Multi-valued attributes
 &&&&&&&&&&&&&&&&&&&&&&&
 
-HTML defines a few attributes that can have multiple values. The most
-common is ``class`` (a tag can have more than one CSS class), but
-there are a few others: ``rel``, ``rev``, ``archive``,
-``accept-charset``, and ``headers``. If one of these attributes has
-more than one value, Beautiful Soup will turn its values into a list::
+HTML 4 defines a few attributes that can have multiple values. HTML 5
+removes a couple of them, but defines a few more. The most common
+multi-valued attribute is ``class`` (that is, a tag can have more than
+one CSS class). Others include ``rel``, ``rev``, ``accept-charset``,
+``headers``, and ``accesskey``. Beautiful Soup presents the value(s)
+of a multi-valued attribute as a list::
 
  css_soup = BeautifulSoup('<p class="body strikeout"></p>')
  css_soup.p['class']
  # ["body", "strikeout"]
 
+ css_soup = BeautifulSoup('<p class="body"></p>')
+ css_soup.p['class']
+ # ["body"]
+
 If an attribute `looks` like it has more than one value, but it's not
-one of the special attributes listed above, Beautiful Soup will leave
-the attribute alone::
+a multi-valued attribute as defined by any version of the HTML
+standard, Beautiful Soup will leave the attribute alone::
 
  id_soup = BeautifulSoup('<p id="my id"></p>')
  id_soup.p['id']
@@ -326,11 +331,19 @@ consolidated::
 
  rel_soup = BeautifulSoup('<p>Back to the <a rel="index">homepage</a></p>')
  rel_soup.a['rel']
- # 'index'
+ # ['index']
  rel_soup.a['rel'] = ['index', 'contents']
  print(rel_soup.p)
  # <p>Back to the <a rel="index contents">homepage</a></p>
 
+If you parse a document as XML, there are no multi-valued attributes::
+
+ xml_soup = BeautifulSoup('<p class="body strikeout"></p>', 'xml')
+ xml_soup.p['class']
+ # u'body strikeout'
+
+
+
 ``NavigableString``
 -------------------
 
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index d744694..997a01f 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -177,9 +177,14 @@ class TestUnicodeDammit(unittest.TestCase):
         doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
 <html><b>\330\250\330\252\330\261</b>
 <i>\310\322\321\220\312\321\355\344</i></html>"""
-        dammit = UnicodeDammit(doc)
-        self.assertEqual(True, dammit.contains_replacement_characters)
-        self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+        with warnings.catch_warnings(record=True) as w:
+            dammit = UnicodeDammit(doc)
+            self.assertEqual(True, dammit.contains_replacement_characters)
+            self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+
+            soup = BeautifulSoup(doc)
+            self.assertTrue(soup.contains_replacement_characters)
 
-        soup = BeautifulSoup(doc)
-        self.assertTrue(soup.contains_replacement_characters)
+            msg = w[0].message
+            self.assertTrue(isinstance(msg, UnicodeWarning))
+            self.assertTrue("Some characters could not be decoded" in str(msg))