summaryrefslogtreecommitdiff
path: root/bs4/builder/_lxml.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-06-27 09:55:40 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-06-27 09:55:40 -0400
commitfeffc5a1146e2520c90682bc2c33f5fa7d3943f0 (patch)
tree6dce892919c201b629628647f86843382b29a60a /bs4/builder/_lxml.py
parentd728b9cbd6cd5954acf7c9c32fe2f1878809d6e8 (diff)
Added an exclude_encodings argument to UnicodeDammit and to the
Beautiful Soup constructor, which lets you prohibit the detection of an encoding that you know is wrong. [bug=1469408]
Diffstat (limited to 'bs4/builder/_lxml.py')
-rw-r--r--bs4/builder/_lxml.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index b0bc8a0..2e33386 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -77,6 +77,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
+ exclude_encodings=None,
document_declared_encoding=None):
"""
:yield: A series of 4-tuples.
@@ -102,7 +103,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# the document as each one in turn.
is_html = not self.is_xml
try_encodings = [user_specified_encoding, document_declared_encoding]
- detector = EncodingDetector(markup, try_encodings, is_html)
+ detector = EncodingDetector(
+ markup, try_encodings, is_html, exclude_encodings)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)