summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-15 14:07:04 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-15 14:07:04 -0500
commitbe0c08585f54ec709740ff4352006bf3e605b8f2 (patch)
tree342c8a482bef4490a8f0fbb528611888bcf76721
parent0f6d3cfbef6fc0b90f0e9fbe58408e00c2383070 (diff)
Better defined behavior when the user wants to search for a combination of text and tag-specific arguments. [bug=695312]
-rw-r--r--NEWS.txt8
-rw-r--r--bs4/doc/source/index.rst21
-rw-r--r--bs4/element.py6
-rw-r--r--bs4/tests/test_tree.py21
4 files changed, 51 insertions, 5 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 98535ef..1c3e19c 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,5 +1,13 @@
= 4.0.0b6 () =
+* Passing text along with tag-specific arguments to a find* method:
+
+ find("a", text="Click here")
+
+ will find tags that contain the given text as their
+ .string. Previously, the tag-specific arguments were ignored and
+ only strings were searched.
+
* Fixed a bug that caused the html5lib tree builder to build a
partially disconnected tree. Generally cleaned up the html5lib tree
builder.
diff --git a/bs4/doc/source/index.rst b/bs4/doc/source/index.rst
index 1ad6449..8b7f1e4 100644
--- a/bs4/doc/source/index.rst
+++ b/bs4/doc/source/index.rst
@@ -1206,6 +1206,14 @@ Here are some examples::
soup.find_all(text=is_the_only_string_within_a_tag)
# [u"The Dormouse's story", u"The Dormouse's story", u'Elsie', u'Lacie', u'Tillie', u'...']
+Although ``text`` is for finding strings, you can combine it with
+arguments for finding tags, Beautiful Soup will find all tags whose
+``.string`` matches your value for ``text``. This code finds the <a>
+tags whose ``.string`` is "Elsie"::
+
+ soup.find_all("a", "Elsie")
+ # [<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>]
+
.. _limit:
The ``limit`` argument
@@ -2495,9 +2503,16 @@ Miscellaneous
contains a single tag B and nothing else, then A.string is the same as
B.string. (Previously, it was None.)
-`Multi-valued attributes`_ like ``class`` are parsed into lists if
-they have more than one value. This may affect the way you search by
-CSS class.
+`Multi-valued attributes`_ like ``class`` are presented as lists. This
+may affect the way you search by CSS class.
+
+If you pass one of the ``find*`` methods both :ref:`text <text>` `and`
+a tag-specific argument like :ref:`name <name>`, Beautiful Soup will
+search for tags that match your tag-specific criteria and whose
+:ref:`Tag.string <.string>` matches your value for :ref:`text
+<text>`. It will `not` find the strings themselves. Previously,
+Beautiful Soup ignored the tag-specific arguments and looked for
+strings.
The ``BeautifulSoup`` constructor no longer recognizes the
`markupMassage` argument. It's now the parser's responsibility to
diff --git a/bs4/element.py b/bs4/element.py
index 474364b..5e15252 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -974,6 +974,8 @@ class SoupStrainer(object):
found = markup
else:
found = markup_name
+ if found and self.text and self.text != found.string:
+ found = None
return found
searchTag = search_tag
@@ -991,12 +993,12 @@ class SoupStrainer(object):
# If it's a Tag, make sure its name or attributes match.
# Don't bother with Tags if we're searching for text.
elif isinstance(markup, Tag):
- if not self.text:
+ if not self.text or self.name or self.attrs:
found = self.search_tag(markup)
# If it's text, make sure the text matches.
elif isinstance(markup, NavigableString) or \
isinstance(markup, basestring):
- if self._matches(markup, self.text):
+ if not self.name and not self.attrs and self._matches(markup, self.text):
found = markup
else:
raise Exception(
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 4da6fd9..2e74c00 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -277,6 +277,27 @@ class TestFindAllByAttribute(TreeTest):
self.assertSelects(tree.find_all(id=re.compile("^a+$")),
["One a.", "Two as."])
+ def test_find_by_name_and_containing_string(self):
+ soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
+ a = soup.a
+
+ self.assertEqual([a], soup.find_all("a", text="foo"))
+ self.assertEqual([], soup.find_all("a", text="bar"))
+ self.assertEqual([], soup.find_all("a", text="bar"))
+
+ def test_find_by_name_and_containing_string_when_string_is_buried(self):
+ soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
+ self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
+
+ def test_find_by_attribute_and_containing_string(self):
+ soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
+ a = soup.a
+
+ self.assertEqual([a], soup.find_all(id=2, text="foo"))
+ self.assertEqual([], soup.find_all(id=1, text="bar"))
+
+
+
class TestIndex(TreeTest):
"""Test Tag.index"""