summaryrefslogtreecommitdiff
path: root/tests/test_html5lib.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_html5lib.py')
-rw-r--r--tests/test_html5lib.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index 3efdebf..59d84a3 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -3,6 +3,7 @@ from beautifulsoup.element import Comment
from test_lxml import (
TestLXMLBuilder,
TestLXMLBuilderInvalidMarkup,
+ TestLXMLBuilderEncodingConversion,
)
class TestHTML5Builder(TestLXMLBuilder):
@@ -138,3 +139,19 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
utf8 = utf8.replace("\xe9", "\xc3\xa9")
#print soup
+
+
+class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion):
+ @property
+ def default_builder(self):
+ return HTML5TreeBuilder()
+
+ def test_real_hebrew_document(self):
+ # A real-world test to make sure we can convert ISO-8859-9 (a
+ # Hebrew encoding) to UTF-8.
+ soup = self.soup(self.HEBREW_DOCUMENT,
+ fromEncoding="iso-8859-8")
+ self.assertEquals(soup.originalEncoding, 'iso8859-8')
+ self.assertEquals(
+ soup.encode('utf-8'),
+ self.HEBREW_DOCUMENT.decode("iso-8859-8").encode("utf-8"))