summaryrefslogtreecommitdiff
path: root/tests/test_html5lib.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 11:29:43 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-18 11:29:43 -0500
commit75c5891980c961dfe36745c1934010560666f938 (patch)
treefa62ea107db97916fa538883ae561fba64ea13d9 /tests/test_html5lib.py
parentddf9d04e42168fdb25b742b35efc891789a4b6c9 (diff)
Pass the user-specified encoding in to html5lib rather than dropping it on the floor.
Diffstat (limited to 'tests/test_html5lib.py')
-rw-r--r--tests/test_html5lib.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 1034720..59d84a3 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -146,4 +146,12 @@ class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
def default_builder(self):
return HTML5TreeBuilder()
- pass
+ def test_real_hebrew_document(self):
+ # A real-world test to make sure we can convert ISO-8859-9 (a
+ # Hebrew encoding) to UTF-8.
+ soup = self.soup(self.HEBREW_DOCUMENT,
+ fromEncoding="iso-8859-8")
+ self.assertEquals(soup.originalEncoding, 'iso8859-8')
+ self.assertEquals(
+ soup.encode('utf-8'),
+ self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8"))