summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--bs4/builder/__init__.py20
-rw-r--r--bs4/tests/test_soup.py10
-rw-r--r--doc/source/index.rst23
4 files changed, 43 insertions, 12 deletions
diff --git a/CHANGELOG b/CHANGELOG
index edf9648..da4aaaf 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,7 +4,7 @@
keyword arguments into the BeautifulSoup constructor. The main
reason to do this right now is to change how multi-valued
attributes are treated -- you can do this with the
- 'cdata_list_attributes' argument. [bug=1832978]
+ `multi_valued_attributes` argument. [bug=1832978]
= 4.7.1 (20190106)
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 9dad920..c5e6e84 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -100,11 +100,23 @@ class TreeBuilder(object):
USE_DEFAULT = object()
- def __init__(self, cdata_list_attributes=USE_DEFAULT):
+ def __init__(self, multi_valued_attributes=USE_DEFAULT):
+ """Constructor.
+
+ :param multi_valued_attributes: If this is set to None, the
+ TreeBuilder will not turn any values for attributes like
+ 'class' into lists. Setting this do a dictionary will
+ customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES
+ for an example.
+
+ Internally, these are called "CDATA list attributes", but that
+ probably doesn't make sense to an end-use, so the argument ame
+ is `multi_valued_attributes`.
+ """
self.soup = None
- if cdata_list_attributes is self.USE_DEFAULT:
- cdata_list_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
- self.cdata_list_attributes = cdata_list_attributes
+ if multi_valued_attributes is self.USE_DEFAULT:
+ multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
+ self.cdata_list_attributes = multi_valued_attributes
def initialize_soup(self, soup):
"""The BeautifulSoup object has been initialized and is now
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 213255d..a2242da 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -102,16 +102,16 @@ class TestConstructor(SoupTest):
self.assertEqual(" an id ", a['id'])
self.assertEqual(["a", "class"], a['class'])
- # TreeBuilder takes an argument called 'cdata_list_attributes' which lets
+ # TreeBuilder takes an argument called 'mutli_valued_attributes' which lets
# you customize or disable this. As always, you can customize the TreeBuilder
# by passing in a keyword argument to the BeautifulSoup constructor.
- soup = self.soup(markup, builder=default_builder, cdata_list_attributes=None)
+ soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
self.assertEqual(" a class ", soup.a['class'])
- # Here are two ways of saying that `id` is a CDATA list
- # attribute and 'class' is not.
+ # Here are two ways of saying that `id` is a multi-valued
+ # attribute in this context, but 'class' is not.
for switcheroo in ({'*': 'id'}, {'a': 'id'}):
- soup = self.soup(markup, builder=None, cdata_list_attributes=switcheroo)
+ soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)
a = soup.a
self.assertEqual(["an", "id"], a['id'])
self.assertEqual(" a class ", a['class'])
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 61c4bb9..8376549 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -428,8 +428,15 @@ consolidated::
print(rel_soup.p)
# <p>Back to the <a rel="index contents">homepage</a></p>
-You can use ```get_attribute_list`` to get a value that's always a list,
-string, whether or not it's a multi-valued atribute
+ You can disable this by passing ``multi_valued_attributes=None`` as a
+keyword argument into the ``BeautifulSoup`` constructor::
+
+ no_list_soup = BeautifulSoup('<p class="body strikeout"></p>', 'html', multi_valued_attributes=None)
+ no_list_soup.p['class']
+ # u'body strikeout'
+
+You can use ```get_attribute_list`` to get a value that's always a
+list, whether or not it's a multi-valued atribute::
id_soup.p.get_attribute_list('id')
# ["my id"]
@@ -440,8 +447,20 @@ If you parse a document as XML, there are no multi-valued attributes::
xml_soup.p['class']
# u'body strikeout'
+Again, you can configure this using the ``multi_valued_attributes`` argument::
+
+ class_is_multi= { '*' : 'class'}
+ xml_soup = BeautifulSoup('<p class="body strikeout"></p>', 'xml', multi_valued_attributes=class_is_multi)
+ xml_soup.p['class']
+ # [u'body', u'strikeout']
+You probably won't need to do this, but if you do, use the defaults as
+a guide. They implement the rules described in the HTML specification::
+ from bs4.builder import builder_registry
+ builder_registry.lookup('html').DEFAULT_CDATA_LIST_ATTRIBUTES
+
+
``NavigableString``
-------------------