From 397483b981b7fe84d4f43dcb5c01268c3bf81602 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 10:30:50 -0500 Subject: Added failing encoding conversion tests for html5lib. --- tests/test_html5lib.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 3efdebf..1034720 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -3,6 +3,7 @@ from beautifulsoup.element import Comment from test_lxml import ( TestLXMLBuilder, TestLXMLBuilderInvalidMarkup, + TestLXMLBuilderEncodingConversion, ) class TestHTML5Builder(TestLXMLBuilder): @@ -138,3 +139,11 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): utf8 = utf8.replace("\xe9", "\xc3\xa9") #print soup + + +class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): + @property + def default_builder(self): + return HTML5TreeBuilder() + + pass -- cgit v1.2.3 From 75c5891980c961dfe36745c1934010560666f938 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 11:29:43 -0500 Subject: Pass the user-specified encoding in to html5lib rather than dropping it on the floor. --- tests/test_html5lib.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index 1034720..59d84a3 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -146,4 +146,12 @@ class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): def default_builder(self): return HTML5TreeBuilder() - pass + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + soup = self.soup(self.HEBREW_DOCUMENT, + fromEncoding="iso-8859-8") + self.assertEquals(soup.originalEncoding, 'iso8859-8') + self.assertEquals( + soup.encode('utf-8'), + self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8")) -- cgit v1.2.3