summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt8
-rw-r--r--bs4/element.py6
-rw-r--r--bs4/tests/test_tree.py21
-rw-r--r--doc/source/index.rst84
4 files changed, 74 insertions, 45 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 9378f51..fc7cd68 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,10 +1,14 @@
= 4.1.2 (Unreleased) =
+* As per PEP-8, allow searching by CSS class using the 'class_'
+ keyword argument. [bug=1037624]
+
* Use namespace prefixes for namespaced attribute names, instead of
the fully-qualified names given by the lxml parser. [bug=1037597]
-* When sniffing encodings, if the cchardet library is installed, use
- it instead of chardet. It's much faster. [bug=1020748]
+* When sniffing encodings, if the cchardet library is installed,
+ Beautiful Soup uses ccharder instead of chardet. cchardet is much
+ faster. [bug=1020748]
* Use logging.warning() instead of warning.warn() to notify the user
that characters were replaced with REPLACEMENT
diff --git a/bs4/element.py b/bs4/element.py
index 4a4d3ed..2e3be46 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1170,6 +1170,12 @@ class SoupStrainer(object):
kwargs['class'] = attrs
attrs = None
+ if 'class_' in kwargs:
+ # Treat class_="foo" as a search for the 'class'
+ # attribute, overriding any non-dict value for attrs.
+ kwargs['class'] = kwargs['class_']
+ del kwargs['class_']
+
if kwargs:
if attrs:
attrs = attrs.copy()
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index cc573ed..9397f24 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -228,18 +228,24 @@ class TestFindAllByAttribute(TreeTest):
self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
["Name match."])
- # Passing class='class2' would cause a syntax error.
self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
["Class match."])
def test_find_all_by_class(self):
- # Passing in a string to 'attrs' will search the CSS class.
tree = self.soup("""
<a class="1">Class 1.</a>
<a class="2">Class 2.</a>
<b class="1">Class 1.</b>
<c class="3 4">Class 3 and 4.</c>
""")
+
+ # Passing in the class_ keyword argument will search against
+ # the 'class' attribute.
+ self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.'])
+ self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.'])
+ self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.'])
+
+ # Passing in a string to 'attrs' will also search the CSS class.
self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
@@ -248,17 +254,15 @@ class TestFindAllByAttribute(TreeTest):
def test_find_by_class_when_multiple_classes_present(self):
tree = self.soup("<gar class='foo bar'>Found it</gar>")
- attrs = { 'class' : re.compile("o") }
- f = tree.find_all("gar", attrs=attrs)
+ f = tree.find_all("gar", class_=re.compile("o"))
self.assertSelects(f, ["Found it"])
- f = tree.find_all("gar", re.compile("a"))
+ f = tree.find_all("gar", class_=re.compile("a"))
self.assertSelects(f, ["Found it"])
# Since the class is not the string "foo bar", but the two
# strings "foo" and "bar", this will not find anything.
- attrs = { 'class' : re.compile("o b") }
- f = tree.find_all("gar", attrs=attrs)
+ f = tree.find_all("gar", class_=re.compile("o b"))
self.assertSelects(f, [])
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
@@ -283,8 +287,9 @@ class TestFindAllByAttribute(TreeTest):
self.assertEqual([a, a2], soup.find_all("a", "foo"))
self.assertEqual([a], soup.find_all("a", "bar"))
- # If you specify the attribute as a string that contains a
+ # If you specify the class as a string that contains a
# space, only that specific value will be found.
+ self.assertEqual([a], soup.find_all("a", class_="foo bar"))
self.assertEqual([a], soup.find_all("a", "foo bar"))
self.assertEqual([], soup.find_all("a", "bar foo"))
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 3c8cc76..e51ec84 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -20,9 +20,11 @@ The examples in this documentation should work the same way in Python
2.7 and Python 3.2.
You might be looking for the documentation for `Beautiful Soup 3
-<http://www.crummy.com/software/BeautifulSoup/bs3/documentation.html>`_. If
-you want to learn about the differences between Beautiful Soup 3 and
-Beautiful Soup 4, see `Porting code to BS4`_.
+<http://www.crummy.com/software/BeautifulSoup/bs3/documentation.html>`_.
+If so, you should know that Beautiful Soup 3 is no longer being
+developed, and that Beautiful Soup 4 is recommended for all new
+projects. If you want to learn about the differences between Beautiful
+Soup 3 and Beautiful Soup 4, see `Porting code to BS4`_.
Getting help
------------
@@ -1217,45 +1219,27 @@ keyword argument::
Searching by CSS class
^^^^^^^^^^^^^^^^^^^^^^
-Instead of using keyword arguments, you can filter tags based on their
-attributes by passing a dictionary in for ``attrs``. These two lines of
-code are equivalent::
-
- soup.find_all(href=re.compile("elsie"), id='link1')
- soup.find_all(attrs={'href' : re.compile("elsie"), 'id': 'link1'})
-
-The ``attrs`` argument would be a pretty obscure feature were it not for
-one thing: CSS. It's very useful to search for a tag that has a
-certain CSS class, but the name of the CSS attribute, "class", is also a
-Python reserved word.
-
-You can use ``attrs`` to search by CSS class::
+It's very useful to search for a tag that has a certain CSS class, but
+the name of the CSS attribute, "class", is a reserved word in
+Python. Using ``class`` as a keyword argument will give you a syntax
+error. As of Beautiful Soup 4.1.2, you can search by CSS class using
+the keyword argument ``class_``::
- soup.find_all("a", { "class" : "sister" })
+ soup.find_all("a", class_="sister")
# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
# <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
-But that's a lot of code for such a common operation. Instead, you can
-pass a string `attrs` instead of a dictionary. The string will be used
-to restrict the CSS class::
+As with any keyword argument, you can pass ``class_`` a string, a regular
+expression, a function, or ``True``::
- soup.find_all("a", "sister")
- # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
- # <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
- # <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
-
-You can also pass in a regular expression, a function or
-True. Anything you pass in for ``attrs`` that's not a dictionary will
-be used to search against the CSS class::
-
- soup.find_all(attrs=re.compile("itl"))
+ soup.find_all(class_=re.compile("itl"))
# [<p class="title"><b>The Dormouse's story</b></p>]
def has_six_characters(css_class):
return css_class is not None and len(css_class) == 6
- soup.find_all(attrs=has_six_characters)
+ soup.find_all(class_=has_six_characters)
# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
# <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
@@ -1266,17 +1250,47 @@ matches a certain CSS class, you're matching against `any` of its CSS
classes::
css_soup = BeautifulSoup('<p class="body strikeout"></p>')
- css_soup.find_all("p", "strikeout")
+ css_soup.find_all("p", class_="strikeout")
# [<p class="body strikeout"></p>]
- css_soup.find_all("p", "body")
+ css_soup.find_all("p", class_="body")
# [<p class="body strikeout"></p>]
-Searching for the string value of the ``class`` attribute won't work::
+You can also search for the exact string value of the ``class`` attribute:
+
+ css_soup.find_all("p", class_="body strikeout")
+ # [<p class="body strikeout"></p>]
- css_soup.find_all("p", "body strikeout")
+But searching for variants of the string value won't work::
+
+ css_soup.find_all("p", class_="strikeout body")
# []
+There's a shortcut for ``class_`` present in all versions of Beautiful
+Soup. The second argument to any ``find()``-type method is called
+``attrs``, and passing in a string for ``attrs`` will search for that
+string as a CSS class::
+
+ soup.find_all("a", "sister")
+ # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
+ # <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
+ # <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
+
+You can also pass in a regular expression, a function or
+True&mdash;anything except a dictionary. Whatever you pass in will be
+used to search against the CSS class, the same as if you'd passed it
+in for the ``class_`` keyword argument::
+
+By passing in a dictionary to ``attrs``, you can search many HTML
+attributes at once, not just the CSS class. These two lines of code
+are equivalent::
+
+ soup.find_all(href=re.compile("elsie"), id='link1')
+ soup.find_all(attrs={'href' : re.compile("elsie"), 'id': 'link1'})
+
+This isn't a very useful feature, since it's usually easier
+to use the keyword arguments.
+
.. _text:
The ``text`` argument