From 4191d5ff45015c6fac1db0bbdd7b3fcaff234424 Mon Sep 17 00:00:00 2001
From: Leonard Richardson
Date: Sun, 13 Feb 2011 18:04:03 -0500
Subject: Clarified lxml's behavior w/r/t CDATA sections.
---
beautifulsoup/builder/lxml_builder.py | 4 ++--
tests/test_lxml.py | 9 +++++----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 4e83bba..86ac183 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -6,8 +6,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
def __init__(self, parser_class=etree.HTMLParser):
# etree.HTMLParser's constructor has an argument strip_cdata,
- # but it does nothing. CDATA sections will become text when
- # passed through etree.HTMLParser.
+ # but it does nothing. CDATA sections are always stripped when
+ # passed through HTMLParser.
self.parser = parser_class(target=self)
self.soup = None
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 207d141..8f36b41 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -196,16 +196,17 @@ class TestLXMLBuilder(SoupTest):
soup = self.soup(" ")
self.assertEquals(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
+ def test_cdata_where_its_ok(self):
+ # lxml strips CDATA sections, no matter where they occur.
+ markup = "
'
# self.assertSoupEquals(xml)
-
- #def test_cdata(self):
- # print self.soup("")
-
def test_entities_converted_on_the_way_out(self):
text = "<<sacré bleu!>>
"
expected = u"<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>".encode("utf-8")
--
cgit v1.2.3