summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-10-23 15:20:01 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-10-23 15:20:01 -0400
commit0afd48f8b3069fb3577749374e20611b231cb829 (patch)
tree6c156d1a1be01bbe9db95e0980b0f20af77a4b11
parentbbf5c99a147387e6acdc5405f59c8dcbea0164c2 (diff)
Fixed a crash when overriding multi_valued_attributes and using the
html5lib parser. [bug=1948488]
-rw-r--r--CHANGELOG3
-rw-r--r--bs4/builder/_html5lib.py4
-rw-r--r--bs4/tests/__init__.py19
3 files changed, 24 insertions, 2 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 5e02c4b..6e9e66d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -11,6 +11,9 @@ Python 2 was revision 605.
to make it possible to treat ruby text specially in get_text() calls.
[bug=1941980]
+* Fixed a crash when overriding multi_valued_attributes and using the
+ html5lib parser. [bug=1948488]
+
= 4.10.0 (20210907)
* This is the first release of Beautiful Soup to only support Python
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 69aefd7..914b1df 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -242,8 +242,8 @@ class AttrList(object):
def __setitem__(self, name, value):
# If this attribute is a multi-valued attribute for this element,
# turn its value into a list.
- list_attr = self.element.cdata_list_attributes
- if (name in list_attr['*']
+ list_attr = self.element.cdata_list_attributes or {}
+ if (name in list_attr.get('*')
or (self.element.name in list_attr
and name in list_attr[self.element.name])):
# A node that is being cloned may have already undergone
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py
index 6b70c34..6677d2e 100644
--- a/bs4/tests/__init__.py
+++ b/bs4/tests/__init__.py
@@ -252,6 +252,25 @@ class SoupTest(object):
class TreeBuilderSmokeTest(object):
# Tests that are common to HTML and XML tree builders.
+ @pytest.mark.parametrize(
+ "multi_valued_attributes",
+ [None, dict(b=['class']), {'*': ['notclass']}]
+ )
+ def test_attribute_not_multi_valued(self, multi_valued_attributes):
+ markup = '<a class="a b c">'
+ soup = self.soup(markup, multi_valued_attributes=multi_valued_attributes)
+ assert soup.a['class'] == 'a b c'
+
+ @pytest.mark.parametrize(
+ "multi_valued_attributes", [dict(a=['class']), {'*': ['class']}]
+ )
+ def test_attribute_multi_valued(self, multi_valued_attributes):
+ markup = '<a class="a b c">'
+ soup = self.soup(
+ markup, multi_valued_attributes=multi_valued_attributes
+ )
+ assert soup.a['class'] == ['a', 'b', 'c']
+
def test_fuzzed_input(self):
# This test centralizes in one place the various fuzz tests
# for Beautiful Soup created by the oss-fuzz project.