Figured out the deal with CDATA sections in lxml and html5lib, and added comments and tests.

author: Leonard Richardson <leonard.richardson@canonical.com> 2011-02-13 10:37:24 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2011-02-13 10:37:24 -0500
commit: 84d7f8dd319039d385b9afe1da751006be2c9859 (patch)
tree: b265fc282c99140d1371962b2339bc32cde1beff /beautifulsoup/builder/lxml_builder.py
parent: d89c8878ea86a2575c87e9fad8081cfcd81e0bcd (diff)
1 files changed, 6 insertions, 0 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 8336ab4..4e83bba 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -5,6 +5,9 @@ from beautifulsoup.builder import HTMLTreeBuilder
 class LXMLTreeBuilder(HTMLTreeBuilder):
 
     def __init__(self, parser_class=etree.HTMLParser):
+        # etree.HTMLParser's constructor has an argument strip_cdata,
+        # but it does nothing. CDATA sections will become text when
+        # passed through etree.HTMLParser.
         self.parser = parser_class(target=self)
         self.soup = None
 
@@ -21,6 +24,9 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
     def end(self, name):
         self.soup.handle_endtag(name)
 
+    def pi(self, target, data):
+        pass
+
     def data(self, content):
         self.soup.handle_data(content)
author	Leonard Richardson <leonard.richardson@canonical.com>	2011-02-13 10:37:24 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2011-02-13 10:37:24 -0500
commit	84d7f8dd319039d385b9afe1da751006be2c9859 (patch)
tree	b265fc282c99140d1371962b2339bc32cde1beff /beautifulsoup/builder/lxml_builder.py
parent	d89c8878ea86a2575c87e9fad8081cfcd81e0bcd (diff)