diff options
-rw-r--r-- | NEWS.txt | 6 | ||||
-rw-r--r-- | bs4/dammit.py | 7 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 18 |
3 files changed, 17 insertions, 14 deletions
@@ -1,3 +1,9 @@ += 4.1.2 (Unreleased) = + +* Use logging.warning() instead of warning.warn() to notify the user + that characters were replaced with REPLACEMENT + CHARACTER. [bug=1013862] + = 4.1.1 (20120703) = * Fixed an html5lib tree builder crash which happened when html5lib diff --git a/bs4/dammit.py b/bs4/dammit.py index 58cad9b..67ce66c 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -10,7 +10,7 @@ encoding; that's the tree builder's job. import codecs from htmlentitydefs import codepoint2name import re -import warnings +import logging # Autodetects character encodings. Very useful. # Download from http://chardet.feedparser.org/ @@ -226,10 +226,9 @@ class UnicodeDammit: if proposed_encoding != "ascii": u = self._convert_from(proposed_encoding, "replace") if u is not None: - warnings.warn( - UnicodeWarning( + logging.warning( "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.")) + "replaced with REPLACEMENT CHARACTER.") self.contains_replacement_characters = True break diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 23a664e..4b5bab8 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """Tests of Beautiful Soup as a whole.""" +import logging import unittest from bs4 import ( BeautifulSoup, @@ -262,20 +263,17 @@ class TestUnicodeDammit(unittest.TestCase): <html><b>\330\250\330\252\330\261</b> <i>\310\322\321\220\312\321\355\344</i></html>""" chardet = bs4.dammit.chardet + logging.disable(logging.WARNING) try: bs4.dammit.chardet = None - with warnings.catch_warnings(record=True) as w: - dammit = UnicodeDammit(doc) - self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue(u"\ufffd" in dammit.unicode_markup) + dammit = UnicodeDammit(doc) + self.assertEqual(True, dammit.contains_replacement_characters) + self.assertTrue(u"\ufffd" in dammit.unicode_markup) - soup = BeautifulSoup(doc, "html.parser") - self.assertTrue(soup.contains_replacement_characters) - - msg = w[0].message - self.assertTrue(isinstance(msg, UnicodeWarning)) - self.assertTrue("Some characters could not be decoded" in str(msg)) + soup = BeautifulSoup(doc, "html.parser") + self.assertTrue(soup.contains_replacement_characters) finally: + logging.disable(logging.NOTSET) bs4.dammit.chardet = chardet def test_sniffed_xml_encoding(self): |