summaryrefslogtreecommitdiff
path: root/tests/test_tree.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 08:57:46 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 08:57:46 -0500
commitcb85520f7627a914e10e2d3ea52d7066bdf3984d (patch)
treec3127a3f3ecd0999a245c3b9de27dfa648cfafba /tests/test_tree.py
parent01ba489720b7bde42890221eb83a0b743d7aea8d (diff)
parentfcb11108f0c7913c0ab0d2f60bbff0f96ca5c16b (diff)
Added tests for META tag rewriting and encoding smoke tests.
Diffstat (limited to 'tests/test_tree.py')
-rw-r--r--tests/test_tree.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/tests/test_tree.py b/tests/test_tree.py
index e424e0b..02efead 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -817,6 +817,47 @@ class TestPersistence(SoupTest):
self.assertEqual(loaded.decode(), soup.decode())
+class TestSubstitutions(SoupTest):
+
+ def test_encoding_substitution(self):
+ # Here's the <meta> tag saying that a document is
+ # encoded in Shift-JIS.
+ meta_tag = ('<meta content="text/html; charset=x-sjis" '
+ 'http-equiv="Content-type" />')
+ soup = self.soup(meta_tag)
+
+ # Parse the document, and the charset is replaced with a
+ # generic value.
+ self.assertEquals(soup.meta['content'],
+ 'text/html; charset=%SOUP-ENCODING%')
+
+ # Encode the document into some encoding, and the encoding is
+ # substituted into the meta tag.
+ utf_8 = soup.encode("utf-8")
+ self.assertTrue("charset=utf-8" in utf_8)
+
+ euc_jp = soup.encode("euc_jp")
+ self.assertTrue("charset=euc_jp" in euc_jp)
+
+ shift_jis = soup.encode("shift-jis")
+ self.assertTrue("charset=shift-jis" in shift_jis)
+
+ utf_16_u = soup.encode("utf-16").decode("utf-16")
+ self.assertTrue("charset=utf-16" in utf_16_u)
+
+ def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
+ markup = ('<head><meta content="text/html; charset=x-sjis" '
+ 'http-equiv="Content-type" /></head><pre>foo</pre>')
+
+ # Beautiful Soup used to try to rewrite the meta tag even if the
+ # meta tag got filtered out by the strainer. This test makes
+ # sure that doesn't happen.
+ strainer = SoupStrainer('pre')
+ soup = BeautifulSoup(markup, parseOnlyThese=strainer)
+ self.assertEquals(soup.contents[0].name, 'pre')
+
+
+
class TestEncoding(SoupTest):
"""Test the ability to encode objects into strings."""