summaryrefslogtreecommitdiff
path: root/bs4/tests/test_soup.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2019-07-07 15:59:09 -0400
committerLeonard Richardson <leonardr@segfault.org>2019-07-07 15:59:09 -0400
commit0159c0a4135f267aed0586ba9d829d0a3da25da8 (patch)
tree78f20c40c74910109651bb7fb0a1b6dc1a29a621 /bs4/tests/test_soup.py
parent0c3c1970dcb93bbe591707e43cfba9b24de45d05 (diff)
It's now possible to override a TreeBuilder's cdata_list_attributes dictionary by passing in a replacement. None will disable the feature altogether. [bug=1832978]
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r--bs4/tests/test_soup.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 1c6b7a6..213255d 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -24,6 +24,7 @@ from bs4.dammit import (
EncodingDetector,
)
from bs4.testing import (
+ default_builder,
SoupTest,
skipIf,
)
@@ -89,7 +90,33 @@ class TestConstructor(SoupTest):
self.assertEqual(builder, soup.builder)
self.assertEqual(kwargs, builder.called_with)
-
+ def test_cdata_list_attributes(self):
+ # Most attribute values are represented as scalars, but the
+ # HTML standard says that some attributes, like 'class' have
+ # space-separated lists as values.
+ markup = '<a id=" an id " class=" a class "></a>'
+ soup = self.soup(markup)
+
+ # Note that the spaces are stripped for 'class' but not for 'id'.
+ a = soup.a
+ self.assertEqual(" an id ", a['id'])
+ self.assertEqual(["a", "class"], a['class'])
+
+ # TreeBuilder takes an argument called 'cdata_list_attributes' which lets
+ # you customize or disable this. As always, you can customize the TreeBuilder
+ # by passing in a keyword argument to the BeautifulSoup constructor.
+ soup = self.soup(markup, builder=default_builder, cdata_list_attributes=None)
+ self.assertEqual(" a class ", soup.a['class'])
+
+ # Here are two ways of saying that `id` is a CDATA list
+ # attribute and 'class' is not.
+ for switcheroo in ({'*': 'id'}, {'a': 'id'}):
+ soup = self.soup(markup, builder=None, cdata_list_attributes=switcheroo)
+ a = soup.a
+ self.assertEqual(["an", "id"], a['id'])
+ self.assertEqual(" a class ", a['class'])
+
+
class TestWarnings(SoupTest):
def _no_parser_specified(self, s, is_there=True):