diff options
-rw-r--r-- | bs4/testing.py | 42 | ||||
-rw-r--r-- | bs4/tests/test_htmlparser.py | 7 | ||||
-rw-r--r-- | doc/source/index.rst | 2 |
3 files changed, 46 insertions, 5 deletions
diff --git a/bs4/testing.py b/bs4/testing.py index a2f83a1..87cd13f 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -8,6 +8,7 @@ import pickle import copy import functools import unittest +import warnings from unittest import TestCase from bs4 import BeautifulSoup from bs4.element import ( @@ -228,7 +229,42 @@ class SoupTest(unittest.TestCase): return child -class HTMLTreeBuilderSmokeTest(object): +class TreeBuilderSmokeTest(object): + # Tests that are common to HTML and XML tree builders. + + def test_fuzzed_input(self): + # This test centralizes in one place the various fuzz tests + # for Beautiful Soup created by the oss-fuzz. + + # These strings superficially resemble markup, but they + # generally can't be parsed into anything. The best we can + # hope for is that parsing these strings won't crash the + # parser. + + # n.b. This markup is commented out because these fuzz tests + # _do_ crash the parser. However the crashes are due to bugs + # in html.parser, not Beautiful Soup -- otherwise I'd fix the + # bugs! + + bad_markup = [ + # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873 + # https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700 + # https://bugs.python.org/issue37747 + # + #b'\n<![\xff\xfe\xfe\xcd\x00', + + #https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8 + # https://bugs.python.org/issue34480 + # + #b'<![n\x00' + ] + for markup in bad_markup: + with warnings.catch_warnings(record=False): + soup = self.soup(markup) + pass + + +class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): """A basic test of a treebuilder's competence. @@ -651,7 +687,7 @@ Hello, world! markup = b'<a class="foo bar">' soup = self.soup(markup) self.assertEqual(['foo', 'bar'], soup.a['class']) - + # # Generally speaking, tests below this point are more tests of # Beautiful Soup than tests of the tree builders. But parsers are @@ -881,7 +917,7 @@ Hello, world! self.linkage_validator(soup) -class XMLTreeBuilderSmokeTest(object): +class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): def test_pickle_and_unpickle_identity(self): # Pickling a tree, then unpickling it, yields a tree identical diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index 7ee91aa..db85d2d 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -3,6 +3,7 @@ trees.""" from pdb import set_trace import pickle +import warnings from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest from bs4.builder import HTMLParserTreeBuilder from bs4.builder._htmlparser import BeautifulSoupHTMLParser @@ -94,4 +95,8 @@ class TestHTMLParserSubclass(SoupTest): that doesn't cause a crash. """ parser = BeautifulSoupHTMLParser() - parser.error("don't crash") + with warnings.catch_warnings(record=True) as warns: + parser.error("don't crash") + [warning] = warns + assert "don't crash" == str(warning.message) + diff --git a/doc/source/index.rst b/doc/source/index.rst index 63e74e2..88b8475 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -439,7 +439,7 @@ keyword argument into the ``BeautifulSoup`` constructor:: no_list_soup.p['class'] # 'body strikeout' -You can use ```get_attribute_list`` to get a value that's always a +You can use ``get_attribute_list`` to get a value that's always a list, whether or not it's a multi-valued atribute:: id_soup.p.get_attribute_list('id') |