summaryrefslogtreecommitdiff
path: root/bs4/diagnose.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-10-23 09:38:55 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-10-23 09:38:55 -0400
commitbbf5c99a147387e6acdc5405f59c8dcbea0164c2 (patch)
tree43c88ca642dd261b91b07af48d3f9a6558c73c82 /bs4/diagnose.py
parent6c18f90838426fc121e296d85cc6f5037505c384 (diff)
Fix a Python 3-specific problem in diagnose.lxml_trace.
Diffstat (limited to 'bs4/diagnose.py')
-rw-r--r--bs4/diagnose.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 500e92d..3bf583f 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -4,7 +4,7 @@
__license__ = "MIT"
import cProfile
-from io import StringIO
+from io import BytesIO
from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
@@ -103,7 +103,13 @@ def lxml_trace(data, html=True, **kwargs):
if False, lxml's XML parser will be used.
"""
from lxml import etree
- for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+ recover = kwargs.pop('recover', True)
+ if isinstance(data, str):
+ data = data.encode("utf8")
+ reader = BytesIO(data)
+ for event, element in etree.iterparse(
+ reader, html=html, recover=recover, **kwargs
+ ):
print(("%s, %4s, %s" % (event, element.tag, element.text)))
class AnnouncingParser(HTMLParser):