summaryrefslogtreecommitdiff
path: root/bs4/builder/_lxml.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-09 16:15:56 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-09 16:15:56 -0500
commit4aff2ee4d6f077e06159c92ab05c0f2ea527c6fa (patch)
tree40951a60046f184794a011a498187053e8ad2a92 /bs4/builder/_lxml.py
parentcaeb168dc47470607b3cd091e1d35db45c089385 (diff)
As a last-ditch attempt to turn data into Unicode, use errors=replace instead of errors=strict.
Diffstat (limited to 'bs4/builder/_lxml.py')
-rw-r--r--bs4/builder/_lxml.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 7219e49..cc3cb86 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -50,12 +50,13 @@ class LXMLTreeBuilderForXML(TreeBuilder):
declared within markup).
"""
if isinstance(markup, unicode):
- return markup, None, None
+ return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
- dammit.declared_html_encoding)
+ dammit.declared_html_encoding,
+ dammit.contains_replacement_characters)
def feed(self, markup):
self.parser.feed(markup)