diff options
-rw-r--r-- | CHANGELOG | 4 | ||||
-rw-r--r-- | bs4/builder/_htmlparser.py | 2 | ||||
-rw-r--r-- | bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase | bin | 0 -> 103 bytes | |||
-rw-r--r-- | bs4/tests/test_fuzz.py | 1 |
4 files changed, 6 insertions, 1 deletions
@@ -5,6 +5,10 @@ of .hidden is not a documented or supported feature, so don't do this, but it's not too difficult to keep the old behavior working. +* Fixed a case found by Mengyuhan where html.parser giving up on + markup would result in an AssertionError instead of a + ParserRejectedMarkup exception. + = 4.12.2 (20230407) * Fixed an unhandled exception in BeautifulSoup.decode_contents diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index e065096..3cc187f 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): parser.soup = self.soup try: parser.feed(markup) + parser.close() except AssertionError as e: # html.parser raises AssertionError in rare cases to # indicate a fatal problem with the markup, especially # when there's an error in the doctype declaration. raise ParserRejectedMarkup(e) - parser.close() parser.already_closed_empty_element = [] diff --git a/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase b/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase Binary files differnew file mode 100644 index 0000000..8857115 --- /dev/null +++ b/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase diff --git a/bs4/tests/test_fuzz.py b/bs4/tests/test_fuzz.py index 2857830..f29802d 100644 --- a/bs4/tests/test_fuzz.py +++ b/bs4/tests/test_fuzz.py @@ -68,6 +68,7 @@ class TestFuzz(object): @pytest.mark.parametrize( "filename", [ "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912", + "crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a", ] ) def test_rejected_markup(self, filename): |