Split out the code that guesses at encodings from the code that tries to decode a bytestring based on those encodings. This is necessary because lxml wants to do the decoding itself.

author: Leonard Richardson <leonardr@segfault.org> 2013-05-30 11:33:00 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2013-05-30 11:33:00 -0400
commit: ea23194367fb36d201cf6b8134601a73070dff63 (patch)
tree: 90a944aff6823d05bf0a6f815fa68192270fecbc /bs4/diagnose.py
parent: 9ebf90c684990306433b6c364a93b425a88ef2e7 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 25fda5c..f9bff28 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -61,14 +61,14 @@ def diagnose(data):
 
         print "-" * 80
 
-def lxml_trace(data, html=True):
+def lxml_trace(data, html=True, **kwargs):
     """Print out the lxml events that occur during parsing.
 
     This lets you see how lxml parses a document when no Beautiful
     Soup code is running.
     """
     from lxml import etree
-    for event, element in etree.iterparse(StringIO(data), html=html):
+    for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
         print("%s, %4s, %s" % (event, element.tag, element.text))
 
 class AnnouncingParser(HTMLParser):
author	Leonard Richardson <leonardr@segfault.org>	2013-05-30 11:33:00 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2013-05-30 11:33:00 -0400
commit	ea23194367fb36d201cf6b8134601a73070dff63 (patch)
tree	90a944aff6823d05bf0a6f815fa68192270fecbc /bs4/diagnose.py
parent	9ebf90c684990306433b6c364a93b425a88ef2e7 (diff)