From 19f05a586c79b86be8ebe06a3728ab9a94162bee Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 31 May 2013 09:17:11 -0400 Subject: Create a new lxml parser object for every new parsing strategy. --- bs4/testing.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'bs4/testing.py') diff --git a/bs4/testing.py b/bs4/testing.py index d8ff6b7..c363a89 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -279,6 +279,14 @@ class HTMLTreeBuilderSmokeTest(object): # to detect any differences between them. # + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) + def test_soupstrainer(self): """Parsers should be able to work with SoupStrainers.""" strainer = SoupStrainer("b") @@ -482,6 +490,11 @@ class XMLTreeBuilderSmokeTest(object): encoded = soup.encode() self.assertTrue(b"< < hey > >" in encoded) + def test_can_parse_unicode_document(self): + markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) + def test_popping_namespaced_tag(self): markup = 'b2012-07-02T20:33:42Zcd' soup = self.soup(markup) -- cgit v1.2.3