From 2208a31babdd6ec331bde1ae82b83b35553cb0ce Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 08:53:17 -0500 Subject: Ported the encoding tests, and split them up into logical chunks. The html5lib writer isn't setting up the charset substitution. --- tests/test_tree.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'tests/test_tree.py') diff --git a/tests/test_tree.py b/tests/test_tree.py index e424e0b..02efead 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -817,6 +817,47 @@ class TestPersistence(SoupTest): self.assertEqual(loaded.decode(), soup.decode()) +class TestSubstitutions(SoupTest): + + def test_encoding_substitution(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + soup = self.soup(meta_tag) + + # Parse the document, and the charset is replaced with a + # generic value. + self.assertEquals(soup.meta['content'], + 'text/html; charset=%SOUP-ENCODING%') + + # Encode the document into some encoding, and the encoding is + # substituted into the meta tag. + utf_8 = soup.encode("utf-8") + self.assertTrue("charset=utf-8" in utf_8) + + euc_jp = soup.encode("euc_jp") + self.assertTrue("charset=euc_jp" in euc_jp) + + shift_jis = soup.encode("shift-jis") + self.assertTrue("charset=shift-jis" in shift_jis) + + utf_16_u = soup.encode("utf-16").decode("utf-16") + self.assertTrue("charset=utf-16" in utf_16_u) + + def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self): + markup = ('
foo
') + + # Beautiful Soup used to try to rewrite the meta tag even if the + # meta tag got filtered out by the strainer. This test makes + # sure that doesn't happen. + strainer = SoupStrainer('pre') + soup = BeautifulSoup(markup, parseOnlyThese=strainer) + self.assertEquals(soup.contents[0].name, 'pre') + + + class TestEncoding(SoupTest): """Test the ability to encode objects into strings.""" -- cgit v1.2.3