From 749f01e2b664dcbf4f58dfbdcaa4d314f6e3b9ef Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Thu, 10 Feb 2011 09:50:32 -0500
Subject: Added a test to verify that both lxml and html5lib convert entities
 to Unicode characters during parsing.

---
 tests/test_lxml.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tests/test_lxml.py')
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index d16e8d9..e6e015b 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -114,14 +114,29 @@ class TestLXMLBuilder(SoupTest):
         soup = BeautifulSoup('<script>%s</script>' % javascript)
         self.assertEquals(soup.script.string, javascript)
 
+    def test_entities_converted_on_the_way_in(self):
+        # Both XML and HTML entities are converted to Unicode characters
+        # during parsing.
+        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+        expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
+        self.assertSoupEquals(text, expected)
+
+    # Tests below this line need work.
+
+    def test_entities_converted_on_the_way_out(self):
+        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+        expected = u"&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;".encode("utf-8")
+        soup = BeautifulSoup(text)
+        str = soup.p.string
+        #self.assertEquals(str.encode("utf-8"), expected)
+
     def test_foo(self):
         isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
         soup = self.soup(isolatin)
 
         utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
         utf8 = utf8.replace("\xe9", "\xc3\xa9")
-
-        print soup
+        #print soup
 
 
 class TestLXMLBuilderInvalidMarkup(SoupTest):
-- 
cgit v1.2.3