summaryrefslogtreecommitdiff
path: root/beautifulsoup/builder/lxml_builder.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-19 21:35:53 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-19 21:35:53 -0500
commitbc44faea853cb628a280ef94c12c1b24ff45df2b (patch)
tree1c643b901203260b7b9326fc862daba899172b75 /beautifulsoup/builder/lxml_builder.py
parent9a936b48fe05666780662c76d5df3b3de7b48074 (diff)
Made it easier to pass a custom lxml parser object into the treebuilder.
Diffstat (limited to 'beautifulsoup/builder/lxml_builder.py')
-rw-r--r--beautifulsoup/builder/lxml_builder.py22
1 files changed, 16 insertions, 6 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index afdf760..aa37f76 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -2,15 +2,23 @@ from lxml import etree
from beautifulsoup.element import Comment, Doctype
from beautifulsoup.builder import TreeBuilder, HTMLTreeBuilder
from beautifulsoup.dammit import UnicodeDammit
+import types
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
- def __init__(self, parser_class=None):
- # strip_cdata only has an effect on XMLParser. HTMLParser's
- # constructor accepts strip_cdata but ignores it.
- parser_class = parser_class or self.DEFAULT_PARSER_CLASS
- self.parser = parser_class(target=self, strip_cdata=False)
+ @property
+ def default_parser(self):
+ return etree.XMLParser
+
+ def __init__(self, parser=None):
+ if parser is None:
+ # Use the default parser.
+ parser = self.default_parser
+ if callable(parser):
+ # Instantiate it with default arguments
+ parser = parser(target=self, strip_cdata=False)
+ self.parser = parser
self.soup = None
def prepare_markup(self, markup, user_specified_encoding=None,
@@ -64,4 +72,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
class LXMLTreeBuilder(LXMLTreeBuilderForXML, HTMLTreeBuilder):
- DEFAULT_PARSER_CLASS = etree.HTMLParser
+ @property
+ def default_parser(self):
+ return etree.HTMLParser