From ddf9d04e42168fdb25b742b35efc891789a4b6c9 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 18 Feb 2011 11:09:59 -0500 Subject: Have the html5lib builder set the sniffed encoding after parsing, rather than before as happens with lxml. --- tests/test_lxml.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tests/test_lxml.py') diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 1218763..98dd8c2 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -383,10 +383,13 @@ class TestLXMLBuilderEncodingConversion(SoupTest): def setUp(self): super(TestLXMLBuilderEncodingConversion, self).setUp() - self.unicode_data = u"\xe9" + self.unicode_data = u"\N{LATIN SMALL LETTER E WITH ACUTE}" self.utf8_data = self.unicode_data.encode("utf-8") + + # Just so you know what it looks like. self.assertEqual( - self.utf8_data, "\xc3\xa9") + self.utf8_data, + "\xc3\xa9") def test_ascii_in_unicode_out(self): # ASCII input is converted to Unicode. The originalEncoding -- cgit v1.2.3