diff options
-rw-r--r-- | NEWS.txt | 4 | ||||
-rw-r--r-- | bs4/element.py | 12 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 8 | ||||
-rw-r--r-- | doc/source/index.rst | 4 |
4 files changed, 27 insertions, 1 deletions
@@ -5,6 +5,10 @@ versions. In Python 3, __str__ now returns a Unicode string instead of a bytestring. [bug=1420131] +* Introduced the select_one() method, which uses a CSS selector but + only returns the first match, instead of a list of + matches. [bug=1349367] + * Started using a standard MIT license. [bug=1294662] * Added a Chinese translation of the documentation by Delong .w. diff --git a/bs4/element.py b/bs4/element.py index 454d34b..cc21c5c 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -1258,7 +1258,14 @@ class Tag(PageElement): _selector_combinators = ['>', '+', '~'] _select_debug = False - def select(self, selector, _candidate_generator=None): + def select_one(self, selector): + """Perform a CSS selection operation on the current element.""" + value = self.select(selector, limit=1) + if value: + return value[0] + return None + + def select(self, selector, _candidate_generator=None, limit=None): """Perform a CSS selection operation on the current element.""" # Remove whitespace directly after the grouping operator ',' @@ -1433,6 +1440,7 @@ class Tag(PageElement): else: _use_candidate_generator = _candidate_generator + count = 0 for tag in current_context: if self._select_debug: print " Running candidate generator on %s %s" % ( @@ -1457,6 +1465,8 @@ class Tag(PageElement): # don't include it in the context more than once. new_context.append(candidate) new_context_ids.add(id(candidate)) + if limit and len(new_context) >= limit: + break elif self._select_debug: print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs)) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 9e2982a..6ce2a07 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1630,6 +1630,14 @@ class TestSoupSelector(TreeTest): for div in els: self.assertEqual(div.name, 'div') + el = self.soup.select_one('div') + self.assertEqual('main', el['id']) + + def test_select_one_returns_none_if_no_match(self): + match = self.soup.select_one('nonexistenttag') + self.assertEqual(None, match) + + def test_tag_in_tag_one(self): els = self.soup.select('div div') self.assertSelects('div div', ['inner', 'data1']) diff --git a/doc/source/index.rst b/doc/source/index.rst index d35481e..1b7b1e6 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1735,6 +1735,10 @@ Match language codes:: # <p lang="en-us">Howdy, y'all</p>, # <p lang="en-gb">Pip-pip, old fruit</p>] +Find only the first tag that matches a selector:: + + soup.select_one(".sister") + # <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a> This is all a convenience for users who know the CSS selector syntax. You can do all this stuff with the Beautiful Soup API. And if CSS |