summaryrefslogtreecommitdiff
path: root/beautifulsoup/builder/lxml_builder.py
diff options
context:
space:
mode:
Diffstat (limited to 'beautifulsoup/builder/lxml_builder.py')
-rw-r--r--beautifulsoup/builder/lxml_builder.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
index 9ced9f0..a1f8c1e 100644
--- a/beautifulsoup/builder/lxml_builder.py
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -1,6 +1,7 @@
from lxml import etree
from beautifulsoup.element import Comment, Doctype
from beautifulsoup.builder import HTMLTreeBuilder
+from beautifulsoup.dammit import UnicodeDammit
class LXMLTreeBuilder(HTMLTreeBuilder):
@@ -11,6 +12,20 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
self.parser = parser_class(target=self)
self.soup = None
+ def prepare_markup(self, markup, user_specified_encoding=None,
+ document_declared_encoding=None):
+ """
+ :return: A 3-tuple (markup, original encoding, encoding
+ declared within markup).
+ """
+ if isinstance(markup, unicode):
+ return markup, None, None
+
+ try_encodings = [user_specified_encoding, document_declared_encoding]
+ dammit = UnicodeDammit(markup, try_encodings, isHTML=True)
+ return dammit.markup, dammit.originalEncoding, dammit.declaredHTMLEncoding
+
+
def feed(self, markup):
self.parser.feed(markup)
self.parser.close()