summaryrefslogtreecommitdiff
path: root/bs4/diagnose.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r--bs4/diagnose.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 500e92d..3bf583f 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -4,7 +4,7 @@
__license__ = "MIT"
import cProfile
-from io import StringIO
+from io import BytesIO
from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
@@ -103,7 +103,13 @@ def lxml_trace(data, html=True, **kwargs):
if False, lxml's XML parser will be used.
"""
from lxml import etree
- for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+ recover = kwargs.pop('recover', True)
+ if isinstance(data, str):
+ data = data.encode("utf8")
+ reader = BytesIO(data)
+ for event, element in etree.iterparse(
+ reader, html=html, recover=recover, **kwargs
+ ):
print(("%s, %4s, %s" % (event, element.tag, element.text)))
class AnnouncingParser(HTMLParser):