summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2023-06-04 18:44:29 -0400
committerLeonard Richardson <leonardr@segfault.org>2023-06-04 18:44:29 -0400
commit418ab58f94ea4a0957bcad8e7520bb3fb62d1394 (patch)
treec1e330d20245e08272310a03e6a4514255fd24f5
parenta9132eb00c8ebb66569baa9e09324a7ba69c54d1 (diff)
Fixed a case found by Mengyuhan where html.parser giving up on
markup would result in an AssertionError instead of a ParserRejectedMarkup exception.
-rw-r--r--CHANGELOG4
-rw-r--r--bs4/builder/_htmlparser.py2
-rw-r--r--bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcasebin0 -> 103 bytes
-rw-r--r--bs4/tests/test_fuzz.py1
4 files changed, 6 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 94f1209..644b318 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,10 @@
of .hidden is not a documented or supported feature, so don't do this,
but it's not too difficult to keep the old behavior working.
+* Fixed a case found by Mengyuhan where html.parser giving up on
+ markup would result in an AssertionError instead of a
+ ParserRejectedMarkup exception.
+
= 4.12.2 (20230407)
* Fixed an unhandled exception in BeautifulSoup.decode_contents
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index e065096..3cc187f 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
+ parser.close()
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
- parser.close()
parser.already_closed_empty_element = []
diff --git a/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase b/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase
new file mode 100644
index 0000000..8857115
--- /dev/null
+++ b/bs4/tests/fuzz/crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a.testcase
Binary files differ
diff --git a/bs4/tests/test_fuzz.py b/bs4/tests/test_fuzz.py
index 2857830..f29802d 100644
--- a/bs4/tests/test_fuzz.py
+++ b/bs4/tests/test_fuzz.py
@@ -68,6 +68,7 @@ class TestFuzz(object):
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
+ "crash-ffbdfa8a2b26f13537b68d3794b0478a4090ee4a",
]
)
def test_rejected_markup(self, filename):