summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-09 09:35:46 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-09 09:35:46 -0500
commit3c9e9c3f780d59f2ef7927fe8246ab78fe4f6572 (patch)
treef2c35a22d638d7a2886139b8c8b60eca0575949d
parentc199d176f1ebb4289428e1ba8a939b2cd1b55218 (diff)
Improved Unicode, Dammit's behavior when you give it Unicode to begin with.
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/dammit.py6
2 files changed, 7 insertions, 2 deletions
diff --git a/NEWS.txt b/NEWS.txt
index b91f384..4535f19 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -20,6 +20,9 @@
* Fixed a bug that wrecked the tree if you replaced an element with an
empty string. [bug=728697]
+* Improved Unicode, Dammit's behavior when you give it Unicode to
+ begin with.
+
= 4.0.0b4 (20120208) =
* Added BeautifulSoup.new_string() to go along with BeautifulSoup.new_tag()
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 8897063..2b681e8 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -167,15 +167,17 @@ class UnicodeDammit:
def __init__(self, markup, override_encodings=[],
smart_quotes_to=None, isHTML=False):
self.declared_html_encoding = None
- self.markup, document_encoding, sniffed_encoding = \
- self._detectEncoding(markup, isHTML)
self.smart_quotes_to = smart_quotes_to
self.tried_encodings = []
+
if markup == '' or isinstance(markup, unicode):
self.original_encoding = None
self.unicode_markup = unicode(markup)
return
+ self.markup, document_encoding, sniffed_encoding = \
+ self._detectEncoding(markup, isHTML)
+
u = None
for proposed_encoding in (
override_encodings + [document_encoding, sniffed_encoding]):