diff options
author | Leonard Richardson <leonardr@segfault.org> | 2023-02-15 20:37:18 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2023-02-15 20:37:18 -0500 |
commit | e0bbee776ca241d908af36e4e5ce0d0b1bedceaf (patch) | |
tree | d05d4fb74ebfeb14d8d5fd0a98deec229c1b5789 /bs4/tests/__init__.py | |
parent | 8432abbfa16efe13cd0c057f91bb42f1f6cb3e36 (diff) |
When the html.parser parser decides it can't parse a document, Beautiful
Soup now consistently propagates this fact by raising a
ParserRejectedMarkup error. [bug=2007343]
Diffstat (limited to 'bs4/tests/__init__.py')
-rw-r--r-- | bs4/tests/__init__.py | 36 |
1 files changed, 5 insertions, 31 deletions
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py index f4d62db..d8b3b9b 100644 --- a/bs4/tests/__init__.py +++ b/bs4/tests/__init__.py @@ -297,37 +297,11 @@ class TreeBuilderSmokeTest(object): markup, multi_valued_attributes=multi_valued_attributes ) assert soup.a['class'] == ['a', 'b', 'c'] - - def test_fuzzed_input(self): - # This test centralizes in one place the various fuzz tests - # for Beautiful Soup created by the oss-fuzz project. - - # These strings superficially resemble markup, but they - # generally can't be parsed into anything. The best we can - # hope for is that parsing these strings won't crash the - # parser. - # - # n.b. This markup is commented out because these fuzz tests - # _do_ crash the parser. However the crashes are due to bugs - # in html.parser, not Beautiful Soup -- otherwise I'd fix the - # bugs! - - bad_markup = [ - # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873 - # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700 - # https://bugs.python.org/issue37747 - # - #b'\n<![\xff\xfe\xfe\xcd\x00', - - #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8 - # https://bugs.python.org/issue34480 - # - #b'<![n\x00' - ] - for markup in bad_markup: - with warnings.catch_warnings(record=False): - soup = self.soup(markup) - + + def test_invalid_doctype(self): + markup = '<![if word]>content<![endif]>' + markup = '<!DOCTYPE html]ff>' + soup = self.soup(markup) class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): |