summaryrefslogtreecommitdiff
path: root/bs4/tests/test_soup.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r--bs4/tests/test_soup.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index b74a246..e2e2c30 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
+from pdb import set_trace
import logging
import unittest
import sys
@@ -20,6 +21,7 @@ import bs4.dammit
from bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
+ EncodingDetector,
)
from bs4.testing import (
SoupTest,
@@ -320,6 +322,12 @@ class TestUnicodeDammit(unittest.TestCase):
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+ def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
+ detected = EncodingDetector(
+ b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
+ encodings = list(detected.encodings)
+ assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
+
def test_detect_html5_style_meta_tag(self):
for data in (