From 84d7f8dd319039d385b9afe1da751006be2c9859 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sun, 13 Feb 2011 10:37:24 -0500
Subject: Figured out the deal with CDATA sections in lxml and html5lib, and
 added comments and tests.

---
 tests/test_html5lib.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'tests/test_html5lib.py')
diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py
index dada900..2d16bbb 100644
--- a/tests/test_html5lib.py
+++ b/tests/test_html5lib.py
@@ -1,4 +1,5 @@
 from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
+from beautifulsoup.element import Comment
 from test_lxml import (
     TestLXMLBuilder,
     TestLXMLBuilderInvalidMarkup,
@@ -43,8 +44,19 @@ class TestHTML5Builder(TestLXMLBuilder):
         self.assertSoupEquals("<p>   </p>")
         self.assertSoupEquals("<b>   </b>")
 
-    def test_cdata(self):
-        print self.soup("<div><![CDATA[foo]]></div>")
+    def test_cdata_where_its_ok(self):
+        # In html5lib 0.9.0, all CDATA sections are converted into
+        # comments.  In a later version (unreleased as of this
+        # writing), CDATA sections in tags like <svg> and <math> will
+        # be preserved. BUT, I'm not sure how Beautiful Soup needs to
+        # adjust to transform this preservation into the construction
+        # of a BS CData object.
+        markup = "<svg><![CDATA[foobar]]>"
+
+        # Eventually we should be able to do a find(text="foobar") and
+        # get a CData object.
+        self.assertSoupEquals(markup, "<svg><!--[CDATA[foobar]]--></svg>")
+
 
 class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
     """See `BuilderInvalidMarkupSmokeTest`."""
@@ -76,6 +88,13 @@ class TestHTML5BuilderInvalidMarkup(TestLXMLBuilderInvalidMarkup):
         markup = "<p>one<!DOCTYPE foobar>two</p>"
         self.assertSoupEquals(markup, "<p>onetwo</p>")
 
+    def test_cdata_where_it_doesnt_belong(self):
+        # Random CDATA sections are converted into comments.
+        markup = "<div><![CDATA[foo]]>"
+        soup = self.soup(markup)
+        data = soup.find(text="[CDATA[foo]]")
+        self.assertEquals(data.__class__, Comment)
+
     def test_foo(self):
         isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
         soup = self.soup(isolatin)
-- 
cgit v1.2.3