From 4191d5ff45015c6fac1db0bbdd7b3fcaff234424 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sun, 13 Feb 2011 18:04:03 -0500
Subject: Clarified lxml's behavior w/r/t CDATA sections.

---
 beautifulsoup/builder/lxml_builder.py | 4 ++--
 tests/test_lxml.py                    | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 4e83bba..86ac183 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -6,8 +6,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
 
     def __init__(self, parser_class=etree.HTMLParser):
         # etree.HTMLParser's constructor has an argument strip_cdata,
-        # but it does nothing. CDATA sections will become text when
-        # passed through etree.HTMLParser.
+        # but it does nothing. CDATA sections are always stripped when
+        # passed through HTMLParser.
         self.parser = parser_class(target=self)
         self.soup = None
 
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 207d141..8f36b41 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -196,16 +196,17 @@ class TestLXMLBuilder(SoupTest):
         soup = self.soup("<a>&nbsp;&nbsp;</a>")
         self.assertEquals(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
 
+    def test_cdata_where_its_ok(self):
+        # lxml strips CDATA sections, no matter where they occur.
+        markup = "<svg><![CDATA[foobar]]>"
+        self.assertSoupEquals(markup, "<svg></svg>")
+
     # Tests below this line need work.
 
     #def test_doctype(self):
     #    xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"><html>foo</html></p>'
     #    self.assertSoupEquals(xml)
 
-
-    #def test_cdata(self):
-    #    print self.soup("<div><![CDATA[foo]]></div>")
-
     def test_entities_converted_on_the_way_out(self):
         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
         expected = u"&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;".encode("utf-8")
-- 
cgit v1.2.3