summaryrefslogtreecommitdiff
path: root/bs4/testing.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-04-08 23:26:50 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-04-08 23:26:50 -0400
commit8d73b97105bf6534057ee93af6795a2a0aceb993 (patch)
tree4f5bbf82bdbd95692b07ef2e36ce2ae0fced9827 /bs4/testing.py
parent34e0ce8a9dd43ada1c55b50a156fbce63b1e2ebb (diff)
Brought in fuzz tests from the oss-project into Beautiful Soup's unit test suite.
Diffstat (limited to 'bs4/testing.py')
-rw-r--r--bs4/testing.py42
1 files changed, 39 insertions, 3 deletions
diff --git a/bs4/testing.py b/bs4/testing.py
index a2f83a1..87cd13f 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -8,6 +8,7 @@ import pickle
import copy
import functools
import unittest
+import warnings
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
@@ -228,7 +229,42 @@ class SoupTest(unittest.TestCase):
return child
-class HTMLTreeBuilderSmokeTest(object):
+class TreeBuilderSmokeTest(object):
+ # Tests that are common to HTML and XML tree builders.
+
+ def test_fuzzed_input(self):
+ # This test centralizes in one place the various fuzz tests
+ # for Beautiful Soup created by the oss-fuzz.
+
+ # These strings superficially resemble markup, but they
+ # generally can't be parsed into anything. The best we can
+ # hope for is that parsing these strings won't crash the
+ # parser.
+
+ # n.b. This markup is commented out because these fuzz tests
+ # _do_ crash the parser. However the crashes are due to bugs
+ # in html.parser, not Beautiful Soup -- otherwise I'd fix the
+ # bugs!
+
+ bad_markup = [
+ # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
+ # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
+ # https://bugs.python.org/issue37747
+ #
+ #b'\n<![\xff\xfe\xfe\xcd\x00',
+
+ #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
+ # https://bugs.python.org/issue34480
+ #
+ #b'<![n\x00'
+ ]
+ for markup in bad_markup:
+ with warnings.catch_warnings(record=False):
+ soup = self.soup(markup)
+ pass
+
+
+class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
"""A basic test of a treebuilder's competence.
@@ -651,7 +687,7 @@ Hello, world!
markup = b'<a class="foo bar">'
soup = self.soup(markup)
self.assertEqual(['foo', 'bar'], soup.a['class'])
-
+
#
# Generally speaking, tests below this point are more tests of
# Beautiful Soup than tests of the tree builders. But parsers are
@@ -881,7 +917,7 @@ Hello, world!
self.linkage_validator(soup)
-class XMLTreeBuilderSmokeTest(object):
+class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
def test_pickle_and_unpickle_identity(self):
# Pickling a tree, then unpickling it, yields a tree identical