diff options
-rw-r--r-- | beautifulsoup/dammit.py | 11 | ||||
-rw-r--r-- | tests/test_lxml.py | 9 |
2 files changed, 9 insertions, 11 deletions
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py index 78bd4b2..954ca54 100644 --- a/beautifulsoup/dammit.py +++ b/beautifulsoup/dammit.py @@ -58,13 +58,12 @@ class UnicodeDammit: return u = None - for proposedEncoding in overrideEncodings: - u = self._convertFrom(proposedEncoding) - if u: break - if not u: - for proposedEncoding in (documentEncoding, sniffedEncoding): + for proposedEncoding in ( + overrideEncodings + [documentEncoding, sniffedEncoding]): + if proposedEncoding is not None: u = self._convertFrom(proposedEncoding) - if u: break + if u: + break # If no luck and we have auto-detection library, try that: if not u and chardet and not isinstance(self.markup, unicode): diff --git a/tests/test_lxml.py b/tests/test_lxml.py index a1f156a..4c11b1d 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -383,13 +383,12 @@ class TestLXMLBuilderEncodingConversion(SoupTest): def setUp(self): super(TestLXMLBuilderEncodingConversion, self).setUp() - self.unicode_data = u"<html><head></head><body><foo>\N{LATIN SMALL LETTER E WITH ACUTE}</foo></body></html>" + self.unicode_data = u"<html><head></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>" self.utf8_data = self.unicode_data.encode("utf-8") - # Just so you know what it looks like. self.assertEqual( self.utf8_data, - "<html><head></head><body><foo>\xc3\xa9</foo></body></html>") + "<html><head></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>") def test_ascii_in_unicode_out(self): # ASCII input is converted to Unicode. The originalEncoding @@ -406,7 +405,7 @@ class TestLXMLBuilderEncodingConversion(SoupTest): # is not set. soup_from_unicode = self.soup(self.unicode_data) self.assertEquals(soup_from_unicode.decode(), self.unicode_data) - self.assertEquals(soup_from_unicode.foo.string, u'\xe9') + self.assertEquals(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') self.assertEquals(soup_from_unicode.originalEncoding, None) def test_utf8_in_unicode_out(self): @@ -414,7 +413,7 @@ class TestLXMLBuilderEncodingConversion(SoupTest): # attribute is set. soup_from_utf8 = self.soup(self.utf8_data) self.assertEquals(soup_from_utf8.decode(), self.unicode_data) - self.assertEquals(soup_from_utf8.foo.string, u'\xe9') + self.assertEquals(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') def test_utf8_out(self): # The internal data structures can be encoded as UTF-8. |