diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-06-25 07:04:00 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-06-25 07:04:00 -0400 |
commit | fee5d1d8f62254939939eb97ac56d7c4f158e6cf (patch) | |
tree | da3fe37e5b5cf93c871ce1b2ec60cc7bfc2448a4 /bs4/tests/test_soup.py | |
parent | bc85e533729738de1100fabe23ddc71b41933a7a (diff) |
Fixed a crash in Unicode, Dammit's encoding detector when the name
of the encoding itself contained invalid bytes. [bug=1360913]
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r-- | bs4/tests/test_soup.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index b74a246..e2e2c30 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """Tests of Beautiful Soup as a whole.""" +from pdb import set_trace import logging import unittest import sys @@ -20,6 +21,7 @@ import bs4.dammit from bs4.dammit import ( EntitySubstitution, UnicodeDammit, + EncodingDetector, ) from bs4.testing import ( SoupTest, @@ -320,6 +322,12 @@ class TestUnicodeDammit(unittest.TestCase): dammit = UnicodeDammit(utf8_data, [bad_encoding]) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') + def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self): + detected = EncodingDetector( + b'<?xml version="1.0" encoding="UTF-\xdb" ?>') + encodings = list(detected.encodings) + assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings + def test_detect_html5_style_meta_tag(self): for data in ( |