diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-19 21:35:53 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-19 21:35:53 -0500 |
commit | bc44faea853cb628a280ef94c12c1b24ff45df2b (patch) | |
tree | 1c643b901203260b7b9326fc862daba899172b75 | |
parent | 9a936b48fe05666780662c76d5df3b3de7b48074 (diff) |
Made it easier to pass a custom lxml parser object into the treebuilder.
-rw-r--r-- | beautifulsoup/builder/lxml_builder.py | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py index afdf760..aa37f76 100644 --- a/beautifulsoup/builder/lxml_builder.py +++ b/beautifulsoup/builder/lxml_builder.py @@ -2,15 +2,23 @@ from lxml import etree from beautifulsoup.element import Comment, Doctype from beautifulsoup.builder import TreeBuilder, HTMLTreeBuilder from beautifulsoup.dammit import UnicodeDammit +import types class LXMLTreeBuilderForXML(TreeBuilder): DEFAULT_PARSER_CLASS = etree.XMLParser - def __init__(self, parser_class=None): - # strip_cdata only has an effect on XMLParser. HTMLParser's - # constructor accepts strip_cdata but ignores it. - parser_class = parser_class or self.DEFAULT_PARSER_CLASS - self.parser = parser_class(target=self, strip_cdata=False) + @property + def default_parser(self): + return etree.XMLParser + + def __init__(self, parser=None): + if parser is None: + # Use the default parser. + parser = self.default_parser + if callable(parser): + # Instantiate it with default arguments + parser = parser(target=self, strip_cdata=False) + self.parser = parser self.soup = None def prepare_markup(self, markup, user_specified_encoding=None, @@ -64,4 +72,6 @@ class LXMLTreeBuilderForXML(TreeBuilder): class LXMLTreeBuilder(LXMLTreeBuilderForXML, HTMLTreeBuilder): - DEFAULT_PARSER_CLASS = etree.HTMLParser + @property + def default_parser(self): + return etree.HTMLParser |