summaryrefslogtreecommitdiff
path: root/beautifulsoup/builder/_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'beautifulsoup/builder/_lxml.py')
-rw-r--r--beautifulsoup/builder/_lxml.py108
1 files changed, 0 insertions, 108 deletions
diff --git a/beautifulsoup/builder/_lxml.py b/beautifulsoup/builder/_lxml.py
deleted file mode 100644
index 23ac485..0000000
--- a/beautifulsoup/builder/_lxml.py
+++ /dev/null
@@ -1,108 +0,0 @@
-__all__ = [
- 'LXMLTreeBuilderForXML',
- 'LXMLTreeBuilder',
- ]
-
-from lxml import etree
-from beautifulsoup.element import Comment, Doctype
-from beautifulsoup.builder import (
- FAST,
- HTML,
- HTMLTreeBuilder,
- PERMISSIVE,
- TreeBuilder,
- XML)
-from beautifulsoup.dammit import UnicodeDammit
-import types
-
-LXML = 'lxml'
-
-class LXMLTreeBuilderForXML(TreeBuilder):
- DEFAULT_PARSER_CLASS = etree.XMLParser
-
- is_xml = True
-
- # Well, it's permissive by XML parser standards.
- features = [LXML, XML, FAST, PERMISSIVE]
-
- @property
- def default_parser(self):
- # This can either return a parser object or a class, which
- # will be instantiated with default arguments.
- return etree.XMLParser(target=self, strip_cdata=False, recover=True)
-
- def __init__(self, parser=None, empty_element_tags=None):
- if empty_element_tags is not None:
- self.empty_element_tags = set(empty_element_tags)
- if parser is None:
- # Use the default parser.
- parser = self.default_parser
- if callable(parser):
- # Instantiate the parser with default arguments
- parser = parser(target=self, strip_cdata=False)
- self.parser = parser
- self.soup = None
-
- def prepare_markup(self, markup, user_specified_encoding=None,
- document_declared_encoding=None):
- """
- :return: A 3-tuple (markup, original encoding, encoding
- declared within markup).
- """
- if isinstance(markup, unicode):
- return markup, None, None
-
- try_encodings = [user_specified_encoding, document_declared_encoding]
- dammit = UnicodeDammit(markup, try_encodings, isHTML=True)
- return (dammit.markup, dammit.original_encoding,
- dammit.declared_html_encoding)
-
- def feed(self, markup):
- self.parser.feed(markup)
- self.parser.close()
-
- def close(self):
- pass
-
- def start(self, name, attrs):
- self.soup.handle_starttag(name, attrs)
-
- def end(self, name):
- self.soup.endData()
- completed_tag = self.soup.tagStack[-1]
- self.soup.handle_endtag(name)
-
- def pi(self, target, data):
- pass
-
- def data(self, content):
- self.soup.handle_data(content)
-
- def doctype(self, name, pubid, system):
- self.soup.endData()
- doctype = Doctype.for_name_and_ids(name, pubid, system)
- self.soup.object_was_parsed(doctype)
-
- def comment(self, content):
- "Handle comments as Comment objects."
- self.soup.endData()
- self.soup.handle_data(content)
- self.soup.endData(Comment)
-
- def test_fragment_to_document(self, fragment):
- """See `TreeBuilder`."""
- return u'<?xml version="1.0" encoding="utf-8">\n%s' % fragment
-
-
-class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
-
- features = [LXML, HTML, FAST]
- is_xml = False
-
- @property
- def default_parser(self):
- return etree.HTMLParser
-
- def test_fragment_to_document(self, fragment):
- """See `TreeBuilder`."""
- return u'<html><body>%s</body></html>' % fragment