summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256.testcase1
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016.testcasebin0 -> 15347 bytes
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824.testcasebin0 -> 12 bytes
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000.testcase1
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624.testcasebin0 -> 11502 bytes
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640.testcase1
-rw-r--r--bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6401239223762944.testcasebin0 -> 3536 bytes
-rw-r--r--bs4/tests/test_fuzz.py97
8 files changed, 95 insertions, 5 deletions
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256.testcase
new file mode 100644
index 0000000..4828f8a
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256.testcase
@@ -0,0 +1 @@
+ <css \ No newline at end of file
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016.testcase
new file mode 100644
index 0000000..8a585ce
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016.testcase
Binary files differ
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824.testcase
new file mode 100644
index 0000000..fd41142
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824.testcase
Binary files differ
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000.testcase
new file mode 100644
index 0000000..6248b2c
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000.testcase
@@ -0,0 +1 @@
+ ><applet></applet><applet></applet><apple|><applet><applet><appl><applet><applet></applet></applet></applet></applet><applet></applet><apple>t<applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet>et><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><azplet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><plet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet><applet></applet></applet></applet></applet></appt></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet></applet><<meta charset=utf-8> \ No newline at end of file
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624.testcase
new file mode 100644
index 0000000..107da53
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624.testcase
Binary files differ
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640.testcase
new file mode 100644
index 0000000..b60a250
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640.testcase
@@ -0,0 +1 @@
+- <math><select><mi><select><select>t \ No newline at end of file
diff --git a/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6401239223762944.testcase b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6401239223762944.testcase
new file mode 100644
index 0000000..d8bbca3
--- /dev/null
+++ b/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6401239223762944.testcase
Binary files differ
diff --git a/bs4/tests/test_fuzz.py b/bs4/tests/test_fuzz.py
index f778539..92728c0 100644
--- a/bs4/tests/test_fuzz.py
+++ b/bs4/tests/test_fuzz.py
@@ -14,13 +14,54 @@ from bs4 import (
BeautifulSoup,
ParserRejectedMarkup,
)
+try:
+ from soupsieve.util import SelectorSyntaxError
+ import lxml
+ import html5lib
+ fully_fuzzable = True
+except ImportError:
+ fully_fuzzable = False
+
+@pytest.mark.skipif(not fully_fuzzable, reason="Prerequisites for fuzz tests are not installed.")
class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so
# they can be included in builds.
TESTCASE_SUFFIX = ".testcase"
+ # Copied 20230512 from
+ # https://github.com/google/oss-fuzz/blob/4ac6a645a197a695fe76532251feb5067076b3f3/projects/bs4/bs4_fuzzer.py
+ #
+ # Copying the code lets us precisely duplicate the behavior of
+ # oss-fuzz. The downside is that this code changes over time, so
+ # multiple copies of the code must be kept around to run against
+ # older tests. I'm not sure what to do about this, but I may
+ # retire old tests after a time.
+ def fuzz_test_with_css(self, filename):
+ data = self.__markup(filename)
+ parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
+ try:
+ idx = int(data[0]) % len(parsers)
+ except ValueError:
+ return
+
+ css_selector, data = data[1:10], data[10:]
+
+ try:
+ soup = BeautifulSoup(data[1:], features=parsers[idx])
+ except ParserRejectedMarkup:
+ return
+ except ValueError:
+ return
+
+ list(soup.find_all(True))
+ try:
+ soup.css.select(css_selector.decode('utf-8', 'replace'))
+ except SelectorSyntaxError:
+ return
+ soup.prettify()
+
# This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup
# instead.
@@ -33,11 +74,14 @@ class TestFuzz(object):
markup = self.__markup(filename)
with pytest.raises(ParserRejectedMarkup):
BeautifulSoup(markup, 'html.parser')
-
+
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
+ #
+ # These test cases are in the older format that doesn't specify
+ # which parser to use or give a CSS selector.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
@@ -46,18 +90,44 @@ class TestFuzz(object):
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
]
)
- def test_deeply_nested_document(self, filename):
+ def test_deeply_nested_document_without_css(self, filename):
# Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has
# been fixed.
markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode()
+ # This class of error has to do with very deeply nested documents
+ # which overflow the Python call stack when the tree is converted
+ # to a string. This is an issue with Beautiful Soup which was fixed
+ # as part of [bug=1471755].
+ @pytest.mark.parametrize(
+ "filename", [
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-5000587759190016",
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-5375146639360000",
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-5492400320282624",
+ ]
+ )
+ def test_deeply_nested_document(self, filename):
+ self.fuzz_test_with_css(filename)
+
+ @pytest.mark.parametrize(
+ "filename", [
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-4670634698080256",
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-5270998950477824",
+ ]
+ )
+ def test_soupsieve_errors(self, filename):
+ self.fuzz_test_with_css(filename)
+
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
- @pytest.mark.skip("html5lib problems")
+ #
+ # These test cases are in the older format that doesn't specify
+ # which parser to use or give a CSS selector.
+ @pytest.mark.skip(reason="html5lib-specific problems")
@pytest.mark.parametrize(
"filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
@@ -68,7 +138,7 @@ class TestFuzz(object):
# b'-<math><sElect><mi><sElect><sElect>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
-
+
# b'ñ<table><svg><html>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
@@ -79,10 +149,27 @@ class TestFuzz(object):
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
]
)
- def test_html5lib_parse_errors(self, filename):
+ def test_html5lib_parse_errors_without_css(self, filename):
markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode())
+ # This class of error represents problems with html5lib's parser,
+ # not Beautiful Soup. I use
+ # https://github.com/html5lib/html5lib-python/issues/568 to notify
+ # the html5lib developers of these issues.
+ @pytest.mark.skip(reason="html5lib-specific problems")
+ @pytest.mark.parametrize(
+ "filename", [
+ # b'- \xff\xff <math>\x10<select><mi><select><select>t'
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-6306874195312640",
+
+ # b'\xb1<a>\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'
+ "clusterfuzz-testcase-minimized-bs4_fuzzer-6401239223762944"
+ ]
+ )
+ def test_html5lib_parse_errors(self, filename):
+ self.fuzz_test_with_css(filename)
+
def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX