diff options
author | Leonard Richardson <leonardr@segfault.org> | 2021-10-23 09:38:55 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2021-10-23 09:38:55 -0400 |
commit | bbf5c99a147387e6acdc5405f59c8dcbea0164c2 (patch) | |
tree | 43c88ca642dd261b91b07af48d3f9a6558c73c82 | |
parent | 6c18f90838426fc121e296d85cc6f5037505c384 (diff) |
Fix a Python 3-specific problem in diagnose.lxml_trace.
-rw-r--r-- | bs4/diagnose.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/bs4/diagnose.py b/bs4/diagnose.py index 500e92d..3bf583f 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -4,7 +4,7 @@ __license__ = "MIT" import cProfile -from io import StringIO +from io import BytesIO from html.parser import HTMLParser import bs4 from bs4 import BeautifulSoup, __version__ @@ -103,7 +103,13 @@ def lxml_trace(data, html=True, **kwargs): if False, lxml's XML parser will be used. """ from lxml import etree - for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + recover = kwargs.pop('recover', True) + if isinstance(data, str): + data = data.encode("utf8") + reader = BytesIO(data) + for event, element in etree.iterparse( + reader, html=html, recover=recover, **kwargs + ): print(("%s, %4s, %s" % (event, element.tag, element.text))) class AnnouncingParser(HTMLParser): |