summaryrefslogtreecommitdiff
path: root/tests/test_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_lxml.py')
-rw-r--r--tests/test_lxml.py19
1 files changed, 17 insertions, 2 deletions
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index d16e8d9..e6e015b 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -114,14 +114,29 @@ class TestLXMLBuilder(SoupTest):
soup = BeautifulSoup('<script>%s</script>' % javascript)
self.assertEquals(soup.script.string, javascript)
+ def test_entities_converted_on_the_way_in(self):
+ # Both XML and HTML entities are converted to Unicode characters
+ # during parsing.
+ text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+ expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
+ self.assertSoupEquals(text, expected)
+
+ # Tests below this line need work.
+
+ def test_entities_converted_on_the_way_out(self):
+ text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+ expected = u"&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;".encode("utf-8")
+ soup = BeautifulSoup(text)
+ str = soup.p.string
+ #self.assertEquals(str.encode("utf-8"), expected)
+
def test_foo(self):
isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
soup = self.soup(isolatin)
utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
utf8 = utf8.replace("\xe9", "\xc3\xa9")
-
- print soup
+ #print soup
class TestLXMLBuilderInvalidMarkup(SoupTest):