summaryrefslogtreecommitdiff
path: root/beautifulsoup/builder/lxml_builder.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-13 10:37:24 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-13 10:37:24 -0500
commit84d7f8dd319039d385b9afe1da751006be2c9859 (patch)
treeb265fc282c99140d1371962b2339bc32cde1beff /beautifulsoup/builder/lxml_builder.py
parentd89c8878ea86a2575c87e9fad8081cfcd81e0bcd (diff)
Figured out the deal with CDATA sections in lxml and html5lib, and added comments and tests.
Diffstat (limited to 'beautifulsoup/builder/lxml_builder.py')
-rw-r--r--beautifulsoup/builder/lxml_builder.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 8336ab4..4e83bba 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -5,6 +5,9 @@ from beautifulsoup.builder import HTMLTreeBuilder
class LXMLTreeBuilder(HTMLTreeBuilder):
def __init__(self, parser_class=etree.HTMLParser):
+ # etree.HTMLParser's constructor has an argument strip_cdata,
+ # but it does nothing. CDATA sections will become text when
+ # passed through etree.HTMLParser.
self.parser = parser_class(target=self)
self.soup = None
@@ -21,6 +24,9 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
def end(self, name):
self.soup.handle_endtag(name)
+ def pi(self, target, data):
+ pass
+
def data(self, content):
self.soup.handle_data(content)