summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG19
-rw-r--r--tests/test_lxml.py11
-rw-r--r--tests/test_tree.py16
3 files changed, 41 insertions, 5 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 96a9ed4..3fb4f36 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -74,6 +74,25 @@ Unicode character. There are no longer any smartQuotesTo or
convert_entities arguments. (Unicode Dammit still has smart_quotes_to,
but the default is now to turn smart quotes into Unicode.)
+== CDATA sections are normal text, if they're understood at all. ==
+
+Currently, both HTML parsers ignore CDATA sections in markup:
+
+ <p><![CDATA[foo]]></p> => <p></p>
+
+A future version of html5lib will turn CDATA sections into text nodes,
+but only within tags like <svg> and <math>:
+
+ <svg><![CDATA[foo]]></svg> => <p>foo</p>
+
+The default XML parser (which uses lxml behind the scenes) turns CDATA
+sections into ordinary text elements:
+
+ <p><![CDATA[foo]]></p> => <p>foo</p>
+
+In theory it's possible to preserve the CDATA sections when using the
+XML parser, but I don't see how to get it to work in practice.
+
= 3.1.0 =
A hybrid version that supports 2.4 and can be automatically converted
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index c178457..88c866d 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -518,6 +518,17 @@ class TestLXMLXMLBuilder(SoupTest):
def default_builder(self):
return LXMLTreeBuilderForXML()
+ def test_cdata_becomes_text(self):
+ # LXML sends CData sections as 'data' events, so we can't
+ # create special CData objects for them. We have to use
+ # NavigableString. I would like to fix this, but it's not a
+ # very high priority.
+ markup = "<foo><![CDATA[iamcdata]]></foo>"
+ soup = self.soup(markup)
+ cdata = soup.foo.contents[0]
+ self.assertEquals(cdata.__class__.__name__, 'NavigableString')
+
+
def test_can_handle_invalid_xml(self):
self.assertSoupEquals("<a><b>", "<a><b /></a>")
diff --git a/tests/test_tree.py b/tests/test_tree.py
index 40643dc..6f00716 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -13,7 +13,7 @@ import copy
import cPickle as pickle
import re
from beautifulsoup import BeautifulSoup
-from beautifulsoup.element import SoupStrainer, Tag
+from beautifulsoup.element import CData, SoupStrainer, Tag
from beautifulsoup.testing import SoupTest
class TreeTest(SoupTest):
@@ -883,9 +883,15 @@ class TestEncoding(SoupTest):
soup.b.encode("utf-8"), html.encode("utf-8"))
-class TestEmptyElementTags(SoupTest):
+class TestNavigableStringSubclasses(SoupTest):
- @property
- def default_builder(self):
- return LXMLTreeBuilderForXML()
+ def test_cdata(self):
+ # None of the current builders turn CDATA sections into CData
+ # objects, but you can create them manually.
+ soup = self.soup("")
+ cdata = CData("foo")
+ soup.insert(1, cdata)
+ self.assertEquals(str(soup), "<![CDATA[foo]]>")
+ self.assertEquals(soup.find(text="foo"), "foo")
+ self.assertEquals(soup.contents[0], "foo")