summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bs4/testing.py42
-rw-r--r--bs4/tests/test_htmlparser.py7
-rw-r--r--doc/source/index.rst2
3 files changed, 46 insertions, 5 deletions
diff --git a/bs4/testing.py b/bs4/testing.py
index a2f83a1..87cd13f 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -8,6 +8,7 @@ import pickle
import copy
import functools
import unittest
+import warnings
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
@@ -228,7 +229,42 @@ class SoupTest(unittest.TestCase):
return child
-class HTMLTreeBuilderSmokeTest(object):
+class TreeBuilderSmokeTest(object):
+ # Tests that are common to HTML and XML tree builders.
+
+ def test_fuzzed_input(self):
+ # This test centralizes in one place the various fuzz tests
+ # for Beautiful Soup created by the oss-fuzz.
+
+ # These strings superficially resemble markup, but they
+ # generally can't be parsed into anything. The best we can
+ # hope for is that parsing these strings won't crash the
+ # parser.
+
+ # n.b. This markup is commented out because these fuzz tests
+ # _do_ crash the parser. However the crashes are due to bugs
+ # in html.parser, not Beautiful Soup -- otherwise I'd fix the
+ # bugs!
+
+ bad_markup = [
+ # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
+ # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
+ # https://bugs.python.org/issue37747
+ #
+ #b'\n<![\xff\xfe\xfe\xcd\x00',
+
+ #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
+ # https://bugs.python.org/issue34480
+ #
+ #b'<![n\x00'
+ ]
+ for markup in bad_markup:
+ with warnings.catch_warnings(record=False):
+ soup = self.soup(markup)
+ pass
+
+
+class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
"""A basic test of a treebuilder's competence.
@@ -651,7 +687,7 @@ Hello, world!
markup = b'<a class="foo bar">'
soup = self.soup(markup)
self.assertEqual(['foo', 'bar'], soup.a['class'])
-
+
#
# Generally speaking, tests below this point are more tests of
# Beautiful Soup than tests of the tree builders. But parsers are
@@ -881,7 +917,7 @@ Hello, world!
self.linkage_validator(soup)
-class XMLTreeBuilderSmokeTest(object):
+class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
def test_pickle_and_unpickle_identity(self):
# Pickling a tree, then unpickling it, yields a tree identical
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
index 7ee91aa..db85d2d 100644
--- a/bs4/tests/test_htmlparser.py
+++ b/bs4/tests/test_htmlparser.py
@@ -3,6 +3,7 @@ trees."""
from pdb import set_trace
import pickle
+import warnings
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@@ -94,4 +95,8 @@ class TestHTMLParserSubclass(SoupTest):
that doesn't cause a crash.
"""
parser = BeautifulSoupHTMLParser()
- parser.error("don't crash")
+ with warnings.catch_warnings(record=True) as warns:
+ parser.error("don't crash")
+ [warning] = warns
+ assert "don't crash" == str(warning.message)
+
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 63e74e2..88b8475 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -439,7 +439,7 @@ keyword argument into the ``BeautifulSoup`` constructor::
no_list_soup.p['class']
# 'body strikeout'
-You can use ```get_attribute_list`` to get a value that's always a
+You can use ``get_attribute_list`` to get a value that's always a
list, whether or not it's a multi-valued atribute::
id_soup.p.get_attribute_list('id')