From 84d7f8dd319039d385b9afe1da751006be2c9859 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 13 Feb 2011 10:37:24 -0500 Subject: Figured out the deal with CDATA sections in lxml and html5lib, and added comments and tests. --- beautifulsoup/builder/lxml_builder.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'beautifulsoup/builder/lxml_builder.py') diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index 8336ab4..4e83bba 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -5,6 +5,9 @@ from beautifulsoup.builder import HTMLTreeBuilder class LXMLTreeBuilder(HTMLTreeBuilder): def __init__(self, parser_class=etree.HTMLParser): + # etree.HTMLParser's constructor has an argument strip_cdata, + # but it does nothing. CDATA sections will become text when + # passed through etree.HTMLParser. self.parser = parser_class(target=self) self.soup = None @@ -21,6 +24,9 @@ class LXMLTreeBuilder(HTMLTreeBuilder): def end(self, name): self.soup.handle_endtag(name) + def pi(self, target, data): + pass + def data(self, content): self.soup.handle_data(content) -- cgit v1.2.3