diff options
Diffstat (limited to 'tests/test_html5lib.py')
-rw-r--r-- | tests/test_html5lib.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 3efdebf..59d84a3 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -3,6 +3,7 @@ from beautifulsoup.element import Comment from test_lxml import ( TestLXMLBuilder, TestLXMLBuilderInvalidMarkup, + TestLXMLBuilderEncodingConversion, ) class TestHTML5Builder(TestLXMLBuilder): @@ -138,3 +139,19 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): utf8 = utf8.replace("\xe9", "\xc3\xa9") #print soup + + +class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): + @property + def default_builder(self): + return HTML5TreeBuilder() + + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + soup = self.soup(self.HEBREW_DOCUMENT, + fromEncoding="iso-8859-8") + self.assertEquals(soup.originalEncoding, 'iso8859-8') + self.assertEquals( + soup.encode('utf-8'), + self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8")) |