From 418ab58f94ea4a0957bcad8e7520bb3fb62d1394 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 4 Jun 2023 18:44:29 -0400 Subject: Fixed a case found by Mengyuhan where html.parser giving up on markup would result in an AssertionError instead of a ParserRejectedMarkup exception. --- CHANGELOG | 4 ++++ bs4/builder/_htmlparser.py | 2 +- ...rash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase | Bin 0 -> 103 bytes bs4/tests/test_fuzz.py | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase diff --git a/CHANGELOG b/CHANGELOG index 94f1209..644b318 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,10 @@ of .hidden is not a documented or supported feature, so don't do this, but it's not too difficult to keep the old behavior working. +* Fixed a case found by Mengyuhan where html.parser giving up on + markup would result in an AssertionError instead of a + ParserRejectedMarkup exception. + = 4.12.2 (20230407) * Fixed an unhandled exception in BeautifulSoup.decode_contents diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index e065096..3cc187f 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): parser.soup = self.soup try: parser.feed(markup) + parser.close() except AssertionError as e: # html.parser raises AssertionError in rare cases to # indicate a fatal problem with the markup, especially # when there's an error in the doctype declaration. raise ParserRejectedMarkup(e) - parser.close() parser.already_closed_empty_element = [] diff --git a/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase b/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase new file mode 100644 index 0000000..8857115 Binary files /dev/null and b/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase differ diff --git a/bs4/tests/test_fuzz.py b/bs4/tests/test_fuzz.py index 2857830..f29802d 100644 --- a/bs4/tests/test_fuzz.py +++ b/bs4/tests/test_fuzz.py @@ -68,6 +68,7 @@ class TestFuzz(object): @pytest.mark.parametrize( "filename", [ "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912", + "crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a", ] ) def test_rejected_markup(self, filename): -- cgit v1.2.3