From f2532b1d63bd4a4d2be6ad9a4dce5eea03f43e7a Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sun, 20 Feb 2011 10:39:30 -0500
Subject: Since we can't parse in CData objects ATM, added a test for CData
 objects created manually, to keep the bits from rotting.

---
 CHANGELOG          | 19 +++++++++++++++++++
 tests/test_lxml.py | 11 +++++++++++
 tests/test_tree.py | 16 +++++++++++-----
 3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
index 96a9ed4..3fb4f36 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -74,6 +74,25 @@ Unicode character. There are no longer any smartQuotesTo or
 convert_entities arguments. (Unicode Dammit still has smart_quotes_to,
 but the default is now to turn smart quotes into Unicode.)
 
+== CDATA sections are normal text, if they're understood at all. ==
+
+Currently, both HTML parsers ignore CDATA sections in markup:
+
+ <p><![CDATA[foo]]></p> => <p></p>
+
+A future version of html5lib will turn CDATA sections into text nodes,
+but only within tags like <svg> and <math>:
+
+ <svg><![CDATA[foo]]></svg> => <p>foo</p>
+
+The default XML parser (which uses lxml behind the scenes) turns CDATA
+sections into ordinary text elements:
+
+ <p><![CDATA[foo]]></p> => <p>foo</p>
+
+In theory it's possible to preserve the CDATA sections when using the
+XML parser, but I don't see how to get it to work in practice.
+
 = 3.1.0 =
 
 A hybrid version that supports 2.4 and can be automatically converted
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index c178457..88c866d 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -518,6 +518,17 @@ class TestLXMLXMLBuilder(SoupTest):
     def default_builder(self):
         return LXMLTreeBuilderForXML()
 
+    def test_cdata_becomes_text(self):
+        # LXML sends CData sections as 'data' events, so we can't
+        # create special CData objects for them. We have to use
+        # NavigableString. I would like to fix this, but it's not a
+        # very high priority.
+        markup = "<foo><![CDATA[iamcdata]]></foo>"
+        soup = self.soup(markup)
+        cdata = soup.foo.contents[0]
+        self.assertEquals(cdata.__class__.__name__, 'NavigableString')
+
+
     def test_can_handle_invalid_xml(self):
         self.assertSoupEquals("<a><b>", "<a><b /></a>")
 
diff --git a/tests/test_tree.py b/tests/test_tree.py
index 40643dc..6f00716 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -13,7 +13,7 @@ import copy
 import cPickle as pickle
 import re
 from beautifulsoup import BeautifulSoup
-from beautifulsoup.element import SoupStrainer, Tag
+from beautifulsoup.element import CData, SoupStrainer, Tag
 from beautifulsoup.testing import SoupTest
 
 class TreeTest(SoupTest):
@@ -883,9 +883,15 @@ class TestEncoding(SoupTest):
             soup.b.encode("utf-8"), html.encode("utf-8"))
 
 
-class TestEmptyElementTags(SoupTest):
+class TestNavigableStringSubclasses(SoupTest):
 
-    @property
-    def default_builder(self):
-        return LXMLTreeBuilderForXML()
 
+    def test_cdata(self):
+        # None of the current builders turn CDATA sections into CData
+        # objects, but you can create them manually.
+        soup = self.soup("")
+        cdata = CData("foo")
+        soup.insert(1, cdata)
+        self.assertEquals(str(soup), "<![CDATA[foo]]>")
+        self.assertEquals(soup.find(text="foo"), "foo")
+        self.assertEquals(soup.contents[0], "foo")
-- 
cgit v1.2.3