From 5261e6c36d0f0c6cea364390dbe9f4cce66306e8 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Thu, 9 Feb 2012 10:38:14 -0500 Subject: Unicode, Dammit now detects the encoding in HTML 5-style tags like . [bug=837268] --- bs4/tests/test_soup.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'bs4/tests/test_soup.py') diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index f995678..ddfc68c 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -151,3 +151,14 @@ class TestUnicodeDammit(unittest.TestCase): for bad_encoding in ['.utf8', '...', 'utF---16.!']: dammit = UnicodeDammit(utf8_data, [bad_encoding]) self.assertEqual(dammit.original_encoding, 'utf-8') + + def test_detect_html5_style_meta_tag(self): + + for data in ( + b'', + b"", + b"", + b""): + dammit = UnicodeDammit(data, is_html=True) + self.assertEquals( + "euc-jp", dammit.original_encoding) -- cgit v1.2.3