From b3ba2b97b2d4c4e5559baadc96f1844753b38df4 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 20 Feb 2011 10:04:35 -0500 Subject: Made the XML treebuilder able to handle basic invalid XML. --- beautifulsoup/builder/lxml_builder.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'beautifulsoup/builder/lxml_builder.py') diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index e431a62..9f4c0bd 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -11,7 +11,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def default_parser(self): # This can either return a parser object or a class, which # will be instantiated with default arguments. - return etree.XMLParser + return etree.XMLParser(target=self, strip_cdata=False, recover=True) def __init__(self, parser=None, empty_element_tags=None): if empty_element_tags is not None: @@ -71,10 +71,6 @@ class LXMLTreeBuilderForXML(TreeBuilder): self.soup.handle_data(content) self.soup.endData(Comment) - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return u'%s' % fragment - class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): @@ -82,5 +78,6 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): def default_parser(self): return etree.HTMLParser - def end(self, name): - self.soup.handle_endtag(name) + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'%s' % fragment -- cgit v1.2.3