From 3c9e9c3f780d59f2ef7927fe8246ab78fe4f6572 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 9 Feb 2012 09:35:46 -0500 Subject: Improved Unicode, Dammit's behavior when you give it Unicode to begin with. --- NEWS.txt | 3 +++ bs4/dammit.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/NEWS.txt b/NEWS.txt index b91f384..4535f19 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -20,6 +20,9 @@ * Fixed a bug that wrecked the tree if you replaced an element with an empty string. [bug=728697] +* Improved Unicode, Dammit's behavior when you give it Unicode to + begin with. + = 4.0.0b4 (20120208) = * Added BeautifulSoup.new_string() to go along with BeautifulSoup.new_tag() diff --git a/bs4/dammit.py b/bs4/dammit.py index 8897063..2b681e8 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -167,15 +167,17 @@ class UnicodeDammit: def __init__(self, markup, override_encodings=[], smart_quotes_to=None, isHTML=False): self.declared_html_encoding = None - self.markup, document_encoding, sniffed_encoding = \ - self._detectEncoding(markup, isHTML) self.smart_quotes_to = smart_quotes_to self.tried_encodings = [] + if markup == '' or isinstance(markup, unicode): self.original_encoding = None self.unicode_markup = unicode(markup) return + self.markup, document_encoding, sniffed_encoding = \ + self._detectEncoding(markup, isHTML) + u = None for proposed_encoding in ( override_encodings + [document_encoding, sniffed_encoding]): -- cgit v1.2.3