diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-13 10:37:24 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-13 10:37:24 -0500 |
commit | 84d7f8dd319039d385b9afe1da751006be2c9859 (patch) | |
tree | b265fc282c99140d1371962b2339bc32cde1beff /tests/test_html5lib.py | |
parent | d89c8878ea86a2575c87e9fad8081cfcd81e0bcd (diff) |
Figured out the deal with CDATA sections in lxml and html5lib, and added comments and tests.
Diffstat (limited to 'tests/test_html5lib.py')
-rw-r--r-- | tests/test_html5lib.py | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index dada900..2d16bbb 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -1,4 +1,5 @@ from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder +from beautifulsoup.element import Comment from test_lxml import ( TestLXMLBuilder, TestLXMLBuilderInvalidMarkup, @@ -43,8 +44,19 @@ class TestHTML5Builder(TestLXMLBuilder): self.assertSoupEquals("<p> </p>") self.assertSoupEquals("<b> </b>") - def test_cdata(self): - print self.soup("<div><![CDATA[foo]]></div>") + def test_cdata_where_its_ok(self): + # In html5lib 0.9.0, all CDATA sections are converted into + # comments. In a later version (unreleased as of this + # writing), CDATA sections in tags like <svg> and <math> will + # be preserved. BUT, I'm not sure how Beautiful Soup needs to + # adjust to transform this preservation into the construction + # of a BS CData object. + markup = "<svg><![CDATA[foobar]]>" + + # Eventually we should be able to do a find(text="foobar") and + # get a CData object. + self.assertSoupEquals(markup, "<svg><!--[CDATA[foobar]]--></svg>") + class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): """See `BuilderInvalidMarkupSmokeTest`.""" @@ -76,6 +88,13 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup): markup = "<p>one<!DOCTYPE foobar>two</p>" self.assertSoupEquals(markup, "<p>onetwo</p>") + def test_cdata_where_it_doesnt_belong(self): + # Random CDATA sections are converted into comments. + markup = "<div><![CDATA[foo]]>" + soup = self.soup(markup) + data = soup.find(text="[CDATA[foo]]") + self.assertEquals(data.__class__, Comment) + def test_foo(self): isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" soup = self.soup(isolatin) |