Added fuzz tests.

author: Leonard Richardson <leonardr@segfault.org> 2023-03-20 09:13:58 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2023-03-20 09:13:58 -0400
commit: 49a5fe0ade3c99ae516a7894c45279af0644a7f3 (patch)
tree: aa6db265930fc41b2ab7fbc92889ac3bc3c4c2bf /bs4/tests/test_fuzz.py
parent: 961f700cd9e0ed562291f1f8d2e6b460cf562fae (diff)
1 files changed, 45 insertions, 25 deletions
diff --git a/bs4/tests/test_fuzz.py b/bs4/tests/test_fuzz.py
index 2c4237b..a5f2c12 100644
--- a/bs4/tests/test_fuzz.py
+++ b/bs4/tests/test_fuzz.py
@@ -1,3 +1,13 @@
+"""This file contains test cases reported by third parties using
+fuzzing tools, primarily from Google's oss-fuzz project. Some of these
+represent real problems with Beautiful Soup, but many are problems in
+libraries that Beautiful Soup depends on, and many of the test cases
+represent different ways of triggering the same problem.
+
+Grouping these test cases together makes it easy to see which test
+cases represent the same problem, and puts the test cases in close
+proximity to code that can trigger the problems.
+"""
 import os
 import pytest
 from bs4 import (
@@ -7,6 +17,41 @@ from bs4 import (
 
 class TestFuzz(object):
 
+    # This class of error has been fixed by catching a less helpful
+    # exception from html.parser and raising ParserRejectedMarkup
+    # instead.
+    @pytest.mark.parametrize(
+        "filename", [
+            "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
+        ]
+    )
+    def test_rejected_markup(self, filename):
+        markup = self.__markup(filename)
+        with pytest.raises(ParserRejectedMarkup):
+            BeautifulSoup(markup, 'html.parser')
+
+    # This class of error has to do with very deeply nested documents
+    # which overflow the Python call stack when the tree is converted
+    # to string. This is an issue with Beautiful Soup. See
+    # [bug=1471755], for example.
+    @pytest.mark.skip("recursion limit exceeded")
+    @pytest.mark.parametrize(
+        "filename", [
+            "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
+            "clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
+            "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
+            "clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
+            "clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
+        ]
+    )
+    def test_recursion_limit_exceeded(self, filename):
+        markup = self.__markup(filename)
+        with pytest.raises(RecursionError):
+            BeautifulSoup(markup, 'html.parser').encode()
+
+    # This class of error represents problems with html5lib's parser,
+    # not Beautiful Soup.
+    @pytest.mark.skip("html5lib problems")
     @pytest.mark.parametrize(
         "filename", [
             # b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
@@ -31,32 +76,7 @@ class TestFuzz(object):
     def test_html5lib_parse_errors(self, filename):
         markup = self.__markup(filename)
         print(BeautifulSoup(markup, 'html5lib').encode())
-        
-    @pytest.mark.parametrize(
-        "filename", [
-            "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
-        ]
-    )
-    def test_rejected_markup(self, filename):
-        markup = self.__markup(filename)
-        with pytest.raises(ParserRejectedMarkup):
-            BeautifulSoup(markup, 'html.parser')
 
-    @pytest.mark.skip("recursion")
-    @pytest.mark.parametrize(
-        "filename", [
-            "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
-            "clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
-            "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
-            "clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
-            "clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
-        ]
-    )
-    def test_recursion_limit_exceeded(self, filename):
-        markup = self.__markup(filename)
-        with pytest.raises(RecursionError):
-            BeautifulSoup(markup, 'html.parser').encode()
-        
     def __markup(self, filename):
         this_dir = os.path.split(__file__)[0]
         path = os.path.join(this_dir, 'fuzz', filename)
author	Leonard Richardson <leonardr@segfault.org>	2023-03-20 09:13:58 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2023-03-20 09:13:58 -0400
commit	49a5fe0ade3c99ae516a7894c45279af0644a7f3 (patch)
tree	aa6db265930fc41b2ab7fbc92889ac3bc3c4c2bf /bs4/tests/test_fuzz.py
parent	961f700cd9e0ed562291f1f8d2e6b460cf562fae (diff)