summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--beautifulsoup/builder/lxml_builder.py4
-rw-r--r--tests/test_lxml.py9
2 files changed, 7 insertions, 6 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 4e83bba..86ac183 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -6,8 +6,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
def __init__(self, parser_class=etree.HTMLParser):
# etree.HTMLParser's constructor has an argument strip_cdata,
- # but it does nothing. CDATA sections will become text when
- # passed through etree.HTMLParser.
+ # but it does nothing. CDATA sections are always stripped when
+ # passed through HTMLParser.
self.parser = parser_class(target=self)
self.soup = None
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 207d141..8f36b41 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -196,16 +196,17 @@ class TestLXMLBuilder(SoupTest):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEquals(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
+ def test_cdata_where_its_ok(self):
+ # lxml strips CDATA sections, no matter where they occur.
+ markup = "<svg><![CDATA[foobar]]>"
+ self.assertSoupEquals(markup, "<svg></svg>")
+
# Tests below this line need work.
#def test_doctype(self):
# xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"><html>foo</html></p>'
# self.assertSoupEquals(xml)
-
- #def test_cdata(self):
- # print self.soup("<div><![CDATA[foo]]></div>")
-
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;".encode("utf-8")