Better defined behavior when the user wants to search for a combination of text and tag-specific arguments. [bug=695312]

author: Leonard Richardson <leonard.richardson@canonical.com> 2012-02-15 14:07:04 -0500
committer: Leonard Richardson <leonard.richardson@canonical.com> 2012-02-15 14:07:04 -0500
commit: be0c08585f54ec709740ff4352006bf3e605b8f2 (patch)
tree: 342c8a482bef4490a8f0fbb528611888bcf76721
parent: 0f6d3cfbef6fc0b90f0e9fbe58408e00c2383070 (diff)
4 files changed, 51 insertions, 5 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 98535ef..1c3e19c 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,5 +1,13 @@
 = 4.0.0b6 () =
 
+* Passing text along with tag-specific arguments to a find* method:
+
+   find("a", text="Click here")
+
+  will find tags that contain the given text as their
+  .string. Previously, the tag-specific arguments were ignored and
+  only strings were searched.
+
 * Fixed a bug that caused the html5lib tree builder to build a
   partially disconnected tree. Generally cleaned up the html5lib tree
   builder.
diff --git a/bs4/doc/source/index.rst b/bs4/doc/source/index.rst
index 1ad6449..8b7f1e4 100644
--- a/bs4/doc/source/index.rst
+++ b/bs4/doc/source/index.rst
@@ -1206,6 +1206,14 @@ Here are some examples::
  soup.find_all(text=is_the_only_string_within_a_tag)
  # [u"The Dormouse's story", u"The Dormouse's story", u'Elsie', u'Lacie', u'Tillie', u'...']
 
+Although ``text`` is for finding strings, you can combine it with
+arguments for finding tags, Beautiful Soup will find all tags whose
+``.string`` matches your value for ``text``. This code finds the <a>
+tags whose ``.string`` is "Elsie"::
+
+ soup.find_all("a", "Elsie")
+ # [<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>]
+
 .. _limit:
 
 The ``limit`` argument
@@ -2495,9 +2503,16 @@ Miscellaneous
 contains a single tag B and nothing else, then A.string is the same as
 B.string. (Previously, it was None.)
 
-`Multi-valued attributes`_ like ``class`` are parsed into lists if
-they have more than one value. This may affect the way you search by
-CSS class.
+`Multi-valued attributes`_ like ``class`` are presented as lists. This
+may affect the way you search by CSS class.
+
+If you pass one of the ``find*`` methods both :ref:`text <text>` `and`
+a tag-specific argument like :ref:`name <name>`, Beautiful Soup will
+search for tags that match your tag-specific criteria and whose
+:ref:`Tag.string <.string>` matches your value for :ref:`text
+<text>`. It will `not` find the strings themselves. Previously,
+Beautiful Soup ignored the tag-specific arguments and looked for
+strings.
 
 The ``BeautifulSoup`` constructor no longer recognizes the
 `markupMassage` argument. It's now the parser's responsibility to
diff --git a/bs4/element.py b/bs4/element.py
index 474364b..5e15252 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -974,6 +974,8 @@ class SoupStrainer(object):
                     found = markup
                 else:
                     found = markup_name
+        if found and self.text and self.text != found.string:
+            found = None
         return found
     searchTag = search_tag
 
@@ -991,12 +993,12 @@ class SoupStrainer(object):
         # If it's a Tag, make sure its name or attributes match.
         # Don't bother with Tags if we're searching for text.
         elif isinstance(markup, Tag):
-            if not self.text:
+            if not self.text or self.name or self.attrs:
                 found = self.search_tag(markup)
         # If it's text, make sure the text matches.
         elif isinstance(markup, NavigableString) or \
                  isinstance(markup, basestring):
-            if self._matches(markup, self.text):
+            if not self.name and not self.attrs and self._matches(markup, self.text):
                 found = markup
         else:
             raise Exception(
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 4da6fd9..2e74c00 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -277,6 +277,27 @@ class TestFindAllByAttribute(TreeTest):
         self.assertSelects(tree.find_all(id=re.compile("^a+$")),
                            ["One a.", "Two as."])
 
+    def test_find_by_name_and_containing_string(self):
+        soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
+        a = soup.a
+
+        self.assertEqual([a], soup.find_all("a", text="foo"))
+        self.assertEqual([], soup.find_all("a", text="bar"))
+        self.assertEqual([], soup.find_all("a", text="bar"))
+
+    def test_find_by_name_and_containing_string_when_string_is_buried(self):
+        soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
+        self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
+
+    def test_find_by_attribute_and_containing_string(self):
+        soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
+        a = soup.a
+
+        self.assertEqual([a], soup.find_all(id=2, text="foo"))
+        self.assertEqual([], soup.find_all(id=1, text="bar"))
+
+
+
 
 class TestIndex(TreeTest):
     """Test Tag.index"""
author	Leonard Richardson <leonard.richardson@canonical.com>	2012-02-15 14:07:04 -0500
committer	Leonard Richardson <leonard.richardson@canonical.com>	2012-02-15 14:07:04 -0500
commit	be0c08585f54ec709740ff4352006bf3e605b8f2 (patch)
tree	342c8a482bef4490a8f0fbb528611888bcf76721
parent	0f6d3cfbef6fc0b90f0e9fbe58408e00c2383070 (diff)