summaryrefslogtreecommitdiff
path: root/bs4
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2012-07-03 15:10:36 -0400
committerLeonard Richardson <leonardr@segfault.org>2012-07-03 15:10:36 -0400
commitef51996386270f8bc3d7b4e4272d8b117b4f41af (patch)
tree4570b2214c25707d3f090fb731d335f64e0fdbcd /bs4
parentfad4290ac93a33d7052ea1c7a73fb992b9579c3a (diff)
Use logging.warning() instead of warning.warn() to notify the user that characters were replaced with REPLACEMENT CHARACTER. [bug=1013862]
Diffstat (limited to 'bs4')
-rw-r--r--bs4/dammit.py7
-rw-r--r--bs4/tests/test_soup.py18
2 files changed, 11 insertions, 14 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 58cad9b..67ce66c 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -10,7 +10,7 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
-import warnings
+import logging
# Autodetects character encodings. Very useful.
# Download from http://chardet.feedparser.org/
@@ -226,10 +226,9 @@ class UnicodeDammit:
if proposed_encoding != "ascii":
u = self._convert_from(proposed_encoding, "replace")
if u is not None:
- warnings.warn(
- UnicodeWarning(
+ logging.warning(
"Some characters could not be decoded, and were "
- "replaced with REPLACEMENT CHARACTER."))
+ "replaced with REPLACEMENT CHARACTER.")
self.contains_replacement_characters = True
break
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 23a664e..4b5bab8 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
+import logging
import unittest
from bs4 import (
BeautifulSoup,
@@ -262,20 +263,17 @@ class TestUnicodeDammit(unittest.TestCase):
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet
+ logging.disable(logging.WARNING)
try:
bs4.dammit.chardet = None
- with warnings.catch_warnings(record=True) as w:
- dammit = UnicodeDammit(doc)
- self.assertEqual(True, dammit.contains_replacement_characters)
- self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+ dammit = UnicodeDammit(doc)
+ self.assertEqual(True, dammit.contains_replacement_characters)
+ self.assertTrue(u"\ufffd" in dammit.unicode_markup)
- soup = BeautifulSoup(doc, "html.parser")
- self.assertTrue(soup.contains_replacement_characters)
-
- msg = w[0].message
- self.assertTrue(isinstance(msg, UnicodeWarning))
- self.assertTrue("Some characters could not be decoded" in str(msg))
+ soup = BeautifulSoup(doc, "html.parser")
+ self.assertTrue(soup.contains_replacement_characters)
finally:
+ logging.disable(logging.NOTSET)
bs4.dammit.chardet = chardet
def test_sniffed_xml_encoding(self):