diff options
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r-- | bs4/tests/test_soup.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index e2e2c30..3643aed 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -50,6 +50,11 @@ class TestConstructor(SoupTest): soup = self.soup(data) self.assertEqual(u"foo\0bar", soup.h1.string) + def test_exclude_encodings(self): + utf8_data = u"Räksmörgås".encode("utf-8") + soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) + self.assertEqual("windows-1252", soup.original_encoding) + class TestWarnings(SoupTest): @@ -322,6 +327,20 @@ class TestUnicodeDammit(unittest.TestCase): dammit = UnicodeDammit(utf8_data, [bad_encoding]) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') + def test_exclude_encodings(self): + # This is UTF-8. + utf8_data = u"Räksmörgås".encode("utf-8") + + # But if we exclude UTF-8 from consideration, the guess is + # Windows-1252. + dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"]) + self.assertEqual(dammit.original_encoding.lower(), 'windows-1252') + + # And if we exclude that, there is no valid guess at all. + dammit = UnicodeDammit( + utf8_data, exclude_encodings=["utf-8", "windows-1252"]) + self.assertEqual(dammit.original_encoding, None) + def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self): detected = EncodingDetector( b'<?xml version="1.0" encoding="UTF-\xdb" ?>') |