diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:31:20 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:31:20 -0500 |
commit | ffcebc274b84b85a0b8c93c2aca8756df4baa236 (patch) | |
tree | 29dab20e3176c47b37d8a133fd9d4fee52f75b63 /bs4/tests/test_soup.py | |
parent | 97ac0bc1947b3c5ea7d262d268f42ab629117441 (diff) |
Issue a warning if characters were replaced with REPLACEMENT CHARACTER during Unicode conversion.
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r-- | bs4/tests/test_soup.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index d744694..997a01f 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -177,9 +177,14 @@ class TestUnicodeDammit(unittest.TestCase): doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?> <html><b>\330\250\330\252\330\261</b> <i>\310\322\321\220\312\321\355\344</i></html>""" - dammit = UnicodeDammit(doc) - self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue(u"\ufffd" in dammit.unicode_markup) + with warnings.catch_warnings(record=True) as w: + dammit = UnicodeDammit(doc) + self.assertEqual(True, dammit.contains_replacement_characters) + self.assertTrue(u"\ufffd" in dammit.unicode_markup) + + soup = BeautifulSoup(doc) + self.assertTrue(soup.contains_replacement_characters) - soup = BeautifulSoup(doc) - self.assertTrue(soup.contains_replacement_characters) + msg = w[0].message + self.assertTrue(isinstance(msg, UnicodeWarning)) + self.assertTrue("Some characters could not be decoded" in str(msg)) |