From 84d7f8dd319039d385b9afe1da751006be2c9859 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 10:37:24 -0500 Subject: Figured out the deal with CDATA sections in lxml and html5lib, and added comments and tests. --- tests/test_html5lib.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'tests/test_html5lib.py') diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index dada900..2d16bbb 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -1,4 +1,5 @@ from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder +from beautifulsoup.element import Comment from test_lxml import ( TestLXMLBuilder, TestLXMLBuilderInvalidMarkup, @@ -43,8 +44,19 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals("

") self.assertSoupEquals(" ") - def test_cdata(self): - print self.soup("
") + def test_cdata_where_its_ok(self): + # In html5lib 0.9.0, all CDATA sections are converted into + # comments. In a later version (unreleased as of this + # writing), CDATA sections in tags like and will + # be preserved. BUT, I'm not sure how Beautiful Soup needs to + # adjust to transform this preservation into the construction + # of a BS CData object. + markup = "foobar" + + # Eventually we should be able to do a find(text="foobar") and + # get a CData object. + self.assertSoupEquals(markup, "") + class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): """See `BuilderInvalidMarkupSmokeTest`.""" @@ -76,6 +88,13 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): markup = "

onetwo

" self.assertSoupEquals(markup, "

onetwo

") + def test_cdata_where_it_doesnt_belong(self): + # Random CDATA sections are converted into comments. + markup = "
" + soup = self.soup(markup) + data = soup.find(text="[CDATA[foo]]") + self.assertEquals(data.__class__, Comment) + def test_foo(self): isolatin = """Sacr\xe9 bleu!""" soup = self.soup(isolatin) -- cgit v1.2.3