summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS.txt4
-rw-r--r--bs4/element.py12
-rw-r--r--bs4/tests/test_tree.py8
-rw-r--r--doc/source/index.rst4
4 files changed, 27 insertions, 1 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 1a6442d..cc3c17f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -5,6 +5,10 @@
versions. In Python 3, __str__ now returns a Unicode string instead
of a bytestring. [bug=1420131]
+* Introduced the select_one() method, which uses a CSS selector but
+ only returns the first match, instead of a list of
+ matches. [bug=1349367]
+
* Started using a standard MIT license. [bug=1294662]
* Added a Chinese translation of the documentation by Delong .w.
diff --git a/bs4/element.py b/bs4/element.py
index 454d34b..cc21c5c 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1258,7 +1258,14 @@ class Tag(PageElement):
_selector_combinators = ['>', '+', '~']
_select_debug = False
- def select(self, selector, _candidate_generator=None):
+ def select_one(self, selector):
+ """Perform a CSS selection operation on the current element."""
+ value = self.select(selector, limit=1)
+ if value:
+ return value[0]
+ return None
+
+ def select(self, selector, _candidate_generator=None, limit=None):
"""Perform a CSS selection operation on the current element."""
# Remove whitespace directly after the grouping operator ','
@@ -1433,6 +1440,7 @@ class Tag(PageElement):
else:
_use_candidate_generator = _candidate_generator
+ count = 0
for tag in current_context:
if self._select_debug:
print " Running candidate generator on %s %s" % (
@@ -1457,6 +1465,8 @@ class Tag(PageElement):
# don't include it in the context more than once.
new_context.append(candidate)
new_context_ids.add(id(candidate))
+ if limit and len(new_context) >= limit:
+ break
elif self._select_debug:
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 9e2982a..6ce2a07 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1630,6 +1630,14 @@ class TestSoupSelector(TreeTest):
for div in els:
self.assertEqual(div.name, 'div')
+ el = self.soup.select_one('div')
+ self.assertEqual('main', el['id'])
+
+ def test_select_one_returns_none_if_no_match(self):
+ match = self.soup.select_one('nonexistenttag')
+ self.assertEqual(None, match)
+
+
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assertSelects('div div', ['inner', 'data1'])
diff --git a/doc/source/index.rst b/doc/source/index.rst
index d35481e..1b7b1e6 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -1735,6 +1735,10 @@ Match language codes::
# <p lang="en-us">Howdy, y'all</p>,
# <p lang="en-gb">Pip-pip, old fruit</p>]
+Find only the first tag that matches a selector::
+
+ soup.select_one(".sister")
+ # <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
This is all a convenience for users who know the CSS selector syntax. You
can do all this stuff with the Beautiful Soup API. And if CSS