From d241e5d5a9e9c01d88b3711a82a6114f846ffdb1 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 14 Jul 2018 15:35:24 -0400 Subject: Stopped HTMLParser from raising an exception in very rare cases of bad markup. [bug=1708831] --- bs4/builder/_htmlparser.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'bs4/builder/_htmlparser.py') diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index 67890b3..71604c5 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -64,7 +64,18 @@ class BeautifulSoupHTMLParser(HTMLParser): # order. It's a list of closing tags we've already handled and # will ignore, assuming they ever show up. self.already_closed_empty_element = [] - + + def error(self, msg): + """In Python 3, HTMLParser subclasses must implement error(), although this + requirement doesn't appear to be documented. + + In Python 2, HTMLParser implements error() as raising an exception. + + In any event, this method is called only on very strange markup and our best strategy + is to pretend it didn't happen and keep going. + """ + warnings.warn(msg) + def handle_startendtag(self, name, attrs): # This is only called when the markup looks like # . -- cgit v1.2.3