diff options
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | bs4/builder/__init__.py | 20 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 10 | ||||
-rw-r--r-- | doc/source/index.rst | 23 |
4 files changed, 43 insertions, 12 deletions
@@ -4,7 +4,7 @@ keyword arguments into the BeautifulSoup constructor. The main reason to do this right now is to change how multi-valued attributes are treated -- you can do this with the - 'cdata_list_attributes' argument. [bug=1832978] + `multi_valued_attributes` argument. [bug=1832978] = 4.7.1 (20190106) diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 9dad920..c5e6e84 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -100,11 +100,23 @@ class TreeBuilder(object): USE_DEFAULT = object() - def __init__(self, cdata_list_attributes=USE_DEFAULT): + def __init__(self, multi_valued_attributes=USE_DEFAULT): + """Constructor. + + :param multi_valued_attributes: If this is set to None, the + TreeBuilder will not turn any values for attributes like + 'class' into lists. Setting this do a dictionary will + customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES + for an example. + + Internally, these are called "CDATA list attributes", but that + probably doesn't make sense to an end-use, so the argument ame + is `multi_valued_attributes`. + """ self.soup = None - if cdata_list_attributes is self.USE_DEFAULT: - cdata_list_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES - self.cdata_list_attributes = cdata_list_attributes + if multi_valued_attributes is self.USE_DEFAULT: + multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES + self.cdata_list_attributes = multi_valued_attributes def initialize_soup(self, soup): """The BeautifulSoup object has been initialized and is now diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 213255d..a2242da 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -102,16 +102,16 @@ class TestConstructor(SoupTest): self.assertEqual(" an id ", a['id']) self.assertEqual(["a", "class"], a['class']) - # TreeBuilder takes an argument called 'cdata_list_attributes' which lets + # TreeBuilder takes an argument called 'mutli_valued_attributes' which lets # you customize or disable this. As always, you can customize the TreeBuilder # by passing in a keyword argument to the BeautifulSoup constructor. - soup = self.soup(markup, builder=default_builder, cdata_list_attributes=None) + soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None) self.assertEqual(" a class ", soup.a['class']) - # Here are two ways of saying that `id` is a CDATA list - # attribute and 'class' is not. + # Here are two ways of saying that `id` is a multi-valued + # attribute in this context, but 'class' is not. for switcheroo in ({'*': 'id'}, {'a': 'id'}): - soup = self.soup(markup, builder=None, cdata_list_attributes=switcheroo) + soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo) a = soup.a self.assertEqual(["an", "id"], a['id']) self.assertEqual(" a class ", a['class']) diff --git a/doc/source/index.rst b/doc/source/index.rst index 61c4bb9..8376549 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -428,8 +428,15 @@ consolidated:: print(rel_soup.p) # <p>Back to the <a rel="index contents">homepage</a></p> -You can use ```get_attribute_list`` to get a value that's always a list, -string, whether or not it's a multi-valued atribute + You can disable this by passing ``multi_valued_attributes=None`` as a +keyword argument into the ``BeautifulSoup`` constructor:: + + no_list_soup = BeautifulSoup('<p class="body strikeout"></p>', 'html', multi_valued_attributes=None) + no_list_soup.p['class'] + # u'body strikeout' + +You can use ```get_attribute_list`` to get a value that's always a +list, whether or not it's a multi-valued atribute:: id_soup.p.get_attribute_list('id') # ["my id"] @@ -440,8 +447,20 @@ If you parse a document as XML, there are no multi-valued attributes:: xml_soup.p['class'] # u'body strikeout' +Again, you can configure this using the ``multi_valued_attributes`` argument:: + + class_is_multi= { '*' : 'class'} + xml_soup = BeautifulSoup('<p class="body strikeout"></p>', 'xml', multi_valued_attributes=class_is_multi) + xml_soup.p['class'] + # [u'body', u'strikeout'] +You probably won't need to do this, but if you do, use the defaults as +a guide. They implement the rules described in the HTML specification:: + from bs4.builder import builder_registry + builder_registry.lookup('html').DEFAULT_CDATA_LIST_ATTRIBUTES + + ``NavigableString`` ------------------- |