Fixed a bug that caused Unicode data put into UnicodeDammit to

return None instead of the original data. [bug=1214983]
author: Leonard Richardson <leonardr@segfault.org> 2013-10-02 08:18:54 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2013-10-02 08:18:54 -0400
commit: d69a8433c010ad5c790566bd4d4e47a1db81988c (patch)
tree: 971a5b49f3cf21f68d03b20ac5aa3ccb6c38e0f3 /bs4/dammit.py
parent: c0de2023544605dc1940e4d24d493a14b5300066 (diff)
1 files changed, 9 insertions, 6 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index c859066..59640b7 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 """Beautiful Soup bonus library: Unicode, Dammit
 
-This class forces XML data into a standard format (usually to UTF-8 or
-Unicode).  It is heavily based on code from Mark Pilgrim's Universal
-Feed Parser. It does not rewrite the XML or HTML to reflect a new
-encoding; that's the tree builder's job.
+This library converts a bytestream to Unicode through any means
+necessary. It is heavily based on code from Mark Pilgrim's Universal
+Feed Parser. It works best on XML and XML, but it does not rewrite the
+XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
 
 import codecs
@@ -339,12 +339,15 @@ class UnicodeDammit:
 
         self.detector = EncodingDetector(markup, override_encodings, is_html)
 
-        # Is the data in Unicode to begin with?
+        # Short-circuit if the data is in Unicode to begin with.
         if isinstance(markup, unicode) or markup == '':
             self.markup = markup
             self.unicode_markup = unicode(markup)
+            self.original_encoding = None
+            return
 
-        # As a first step, the encoding detector may strip a byte-order mark.
+        # The encoding detector may have stripped a byte-order mark.
+        # Use the stripped markup from this point on.
         self.markup = self.detector.markup
 
         u = None
author	Leonard Richardson <leonardr@segfault.org>	2013-10-02 08:18:54 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2013-10-02 08:18:54 -0400
commit	d69a8433c010ad5c790566bd4d4e47a1db81988c (patch)
tree	971a5b49f3cf21f68d03b20ac5aa3ccb6c38e0f3 /bs4/dammit.py
parent	c0de2023544605dc1940e4d24d493a14b5300066 (diff)