When the html.parser parser decides it can't parse a document, Beautiful

Soup now consistently propagates this fact by raising a ParserRejectedMarkup error. [bug=2007343]
author: Leonard Richardson <leonardr@segfault.org> 2023-02-15 20:37:18 -0500
committer: Leonard Richardson <leonardr@segfault.org> 2023-02-15 20:37:18 -0500
commit: e0bbee776ca241d908af36e4e5ce0d0b1bedceaf (patch)
tree: d05d4fb74ebfeb14d8d5fd0a98deec229c1b5789 /bs4/tests/__init__.py
parent: 8432abbfa16efe13cd0c057f91bb42f1f6cb3e36 (diff)
1 files changed, 5 insertions, 31 deletions
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py
index f4d62db..d8b3b9b 100644
--- a/bs4/tests/__init__.py
+++ b/bs4/tests/__init__.py
@@ -297,37 +297,11 @@ class TreeBuilderSmokeTest(object):
             markup, multi_valued_attributes=multi_valued_attributes
         )
         assert soup.a['class'] == ['a', 'b', 'c']
-        
-    def test_fuzzed_input(self):
-        # This test centralizes in one place the various fuzz tests
-        # for Beautiful Soup created by the oss-fuzz project.
-        
-        # These strings superficially resemble markup, but they
-        # generally can't be parsed into anything. The best we can
-        # hope for is that parsing these strings won't crash the
-        # parser.
-        #
-        # n.b. This markup is commented out because these fuzz tests
-        # _do_ crash the parser. However the crashes are due to bugs
-        # in html.parser, not Beautiful Soup -- otherwise I'd fix the
-        # bugs!
-        
-        bad_markup = [
-            # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
-            # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
-            # https://bugs.python.org/issue37747
-            #
-            #b'\n<![\xff\xfe\xfe\xcd\x00',
-
-            #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
-            # https://bugs.python.org/issue34480
-            #
-            #b'<![n\x00'
-        ]
-        for markup in bad_markup:
-            with warnings.catch_warnings(record=False):
-                soup = self.soup(markup)
-        
+
+    def test_invalid_doctype(self):
+        markup = '<![if word]>content<![endif]>'
+        markup = '<!DOCTYPE html]ff>'
+        soup = self.soup(markup)
 
 class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
author	Leonard Richardson <leonardr@segfault.org>	2023-02-15 20:37:18 -0500
committer	Leonard Richardson <leonardr@segfault.org>	2023-02-15 20:37:18 -0500
commit	e0bbee776ca241d908af36e4e5ce0d0b1bedceaf (patch)
tree	d05d4fb74ebfeb14d8d5fd0a98deec229c1b5789 /bs4/tests/__init__.py
parent	8432abbfa16efe13cd0c057f91bb42f1f6cb3e36 (diff)