diff options
| author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-09 16:15:56 -0500 |
|---|---|---|
| committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-09 16:15:56 -0500 |
| commit | 4aff2ee4d6f077e06159c92ab05c0f2ea527c6fa (patch) | |
| tree | 40951a60046f184794a011a498187053e8ad2a92 /bs4/builder/_lxml.py | |
| parent | caeb168dc47470607b3cd091e1d35db45c089385 (diff) | |
As a last-ditch attempt to turn data into Unicode, use errors=replace instead of errors=strict.
Diffstat (limited to 'bs4/builder/_lxml.py')
| -rw-r--r-- | bs4/builder/_lxml.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 7219e49..cc3cb86 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -50,12 +50,13 @@ class LXMLTreeBuilderForXML(TreeBuilder): declared within markup). """ if isinstance(markup, unicode): - return markup, None, None + return markup, None, None, False try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding) + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): self.parser.feed(markup) |
