diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-09 10:38:14 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-09 10:38:14 -0500 |
commit | 5261e6c36d0f0c6cea364390dbe9f4cce66306e8 (patch) | |
tree | 37c0265b0b1f582ac344eaca569bde7c3bee80a9 /bs4/tests/test_soup.py | |
parent | 274ceca63fe55336201cd611d897662e5d000e8f (diff) |
Unicode, Dammit now detects the encoding in HTML 5-style <meta> tags like <meta charset="utf-8" />. [bug=837268]
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r-- | bs4/tests/test_soup.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index f995678..ddfc68c 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -151,3 +151,14 @@ class TestUnicodeDammit(unittest.TestCase): for bad_encoding in ['.utf8', '...', 'utF---16.!']: dammit = UnicodeDammit(utf8_data, [bad_encoding]) self.assertEqual(dammit.original_encoding, 'utf-8') + + def test_detect_html5_style_meta_tag(self): + + for data in ( + b'<html><meta charset="euc-jp" /></html>', + b"<html><meta charset='euc-jp' /></html>", + b"<html><meta charset=euc-jp /></html>", + b"<html><meta charset=euc-jp/></html>"): + dammit = UnicodeDammit(data, is_html=True) + self.assertEquals( + "euc-jp", dammit.original_encoding) |