summaryrefslogtreecommitdiff
path: root/bs4/tests/test_soup.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-06-25 07:04:00 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-06-25 07:04:00 -0400
commitfee5d1d8f62254939939eb97ac56d7c4f158e6cf (patch)
treeda3fe37e5b5cf93c871ce1b2ec60cc7bfc2448a4 /bs4/tests/test_soup.py
parentbc85e533729738de1100fabe23ddc71b41933a7a (diff)
Fixed a crash in Unicode, Dammit's encoding detector when the name
of the encoding itself contained invalid bytes. [bug=1360913]
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r--bs4/tests/test_soup.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index b74a246..e2e2c30 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
+from pdb import set_trace
import logging
import unittest
import sys
@@ -20,6 +21,7 @@ import bs4.dammit
from bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
+ EncodingDetector,
)
from bs4.testing import (
SoupTest,
@@ -320,6 +322,12 @@ class TestUnicodeDammit(unittest.TestCase):
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+ def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
+ detected = EncodingDetector(
+ b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
+ encodings = list(detected.encodings)
+ assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
+
def test_detect_html5_style_meta_tag(self):
for data in (