summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt6
-rw-r--r--bs4/dammit.py7
-rw-r--r--bs4/tests/test_soup.py18
3 files changed, 17 insertions, 14 deletions
diff --git a/NEWS.txt b/NEWS.txt
index d4484b6..6b21baa 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,9 @@
+= 4.1.2 (Unreleased) =
+
+* Use logging.warning() instead of warning.warn() to notify the user
+ that characters were replaced with REPLACEMENT
+ CHARACTER. [bug=1013862]
+
= 4.1.1 (20120703) =
* Fixed an html5lib tree builder crash which happened when html5lib
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 58cad9b..67ce66c 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -10,7 +10,7 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
-import warnings
+import logging
# Autodetects character encodings. Very useful.
# Download from http://chardet.feedparser.org/
@@ -226,10 +226,9 @@ class UnicodeDammit:
if proposed_encoding != "ascii":
u = self._convert_from(proposed_encoding, "replace")
if u is not None:
- warnings.warn(
- UnicodeWarning(
+ logging.warning(
"Some characters could not be decoded, and were "
- "replaced with REPLACEMENT CHARACTER."))
+ "replaced with REPLACEMENT CHARACTER.")
self.contains_replacement_characters = True
break
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 23a664e..4b5bab8 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
+import logging
import unittest
from bs4 import (
BeautifulSoup,
@@ -262,20 +263,17 @@ class TestUnicodeDammit(unittest.TestCase):
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet
+ logging.disable(logging.WARNING)
try:
bs4.dammit.chardet = None
- with warnings.catch_warnings(record=True) as w:
- dammit = UnicodeDammit(doc)
- self.assertEqual(True, dammit.contains_replacement_characters)
- self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+ dammit = UnicodeDammit(doc)
+ self.assertEqual(True, dammit.contains_replacement_characters)
+ self.assertTrue(u"\ufffd" in dammit.unicode_markup)
- soup = BeautifulSoup(doc, "html.parser")
- self.assertTrue(soup.contains_replacement_characters)
-
- msg = w[0].message
- self.assertTrue(isinstance(msg, UnicodeWarning))
- self.assertTrue("Some characters could not be decoded" in str(msg))
+ soup = BeautifulSoup(doc, "html.parser")
+ self.assertTrue(soup.contains_replacement_characters)
finally:
+ logging.disable(logging.NOTSET)
bs4.dammit.chardet = chardet
def test_sniffed_xml_encoding(self):