From bbf5c99a147387e6acdc5405f59c8dcbea0164c2 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 23 Oct 2021 09:38:55 -0400 Subject: Fix a Python 3-specific problem in diagnose.lxml_trace. --- bs4/diagnose.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bs4/diagnose.py b/bs4/diagnose.py index 500e92d..3bf583f 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -4,7 +4,7 @@ __license__ = "MIT" import cProfile -from io import StringIO +from io import BytesIO from html.parser import HTMLParser import bs4 from bs4 import BeautifulSoup, __version__ @@ -103,7 +103,13 @@ def lxml_trace(data, html=True, **kwargs): if False, lxml's XML parser will be used. """ from lxml import etree - for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + recover = kwargs.pop('recover', True) + if isinstance(data, str): + data = data.encode("utf8") + reader = BytesIO(data) + for event, element in etree.iterparse( + reader, html=html, recover=recover, **kwargs + ): print(("%s, %4s, %s" % (event, element.tag, element.text))) class AnnouncingParser(HTMLParser): -- cgit v1.2.3