From 74ca8e3f33d44475401be0bc418da83264f91207 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Fri, 2 Mar 2012 10:29:08 -0500
Subject: Brought the soupselect port up to date.

---
 NEWS.txt               |  6 +++++
 bs4/element.py         | 45 +++++++++++++++++++++++++++---------
 bs4/tests/test_tree.py | 62 +++++++++++++++++++++++++++++++-------------------
 doc/source/index.rst   | 36 +++++++++++++++++++++++++----
 4 files changed, 110 insertions(+), 39 deletions(-)

diff --git a/NEWS.txt b/NEWS.txt
index d9b421e..cf76b84 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,9 @@
+= 4.0.0b11 () =
+
+* Brought BS up to date with the latest release of soupselect, adding
+  CSS selector support for direct descendant matches and multiple CSS
+  class matches.
+
 = 4.0.0b10 (20120302) =
 
 * Added support for simple CSS selectors, taken from the soupselect project.
diff --git a/bs4/element.py b/bs4/element.py
index 2851a75..d2fa19f 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -483,7 +483,11 @@ class PageElement(object):
         """Perform a CSS selection operation on the current element."""
         tokens = selector.split()
         current_context = [self]
-        for token in tokens:
+        for index, token in enumerate(tokens):
+            if tokens[index - 1] == '>':
+                # already found direct descendants in last step. skip this
+                # step.
+                continue
             m = self.attribselect_re.match(token)
             if m is not None:
                 # Attribute selector
@@ -493,9 +497,11 @@ class PageElement(object):
                 checker = self._attribute_checker(operator, attribute, value)
                 found = []
                 for context in current_context:
-                    found.extend([el for el in context.find_all(tag) if checker(el)])
+                    found.extend(
+                        [el for el in context.find_all(tag) if checker(el)])
                 current_context = found
                 continue
+
             if '#' in token:
                 # ID selector
                 tag, id = token.split('#', 1)
@@ -506,21 +512,25 @@ class PageElement(object):
                     return [] # No match
                 current_context = [el]
                 continue
+
             if '.' in token:
                 # Class selector
-                tag, klass = token.split('.', 1)
-                if not tag:
-                    tag = True
+                tag_name, klass = token.split('.', 1)
+                if not tag_name:
+                    tag_name = True
+                classes = set(klass.split('.'))
                 found = []
+                def classes_match(tag):
+                    if tag_name is not True and tag.name != tag_name:
+                        return False
+                    if not tag.has_attr('class'):
+                        return False
+                    return classes.issubset(tag['class'])
                 for context in current_context:
-                    found.extend(
-                        context.find_all(
-                            tag,
-                            {'class': lambda attr: attr and klass in attr.split()}
-                            )
-                        )
+                    found.extend(context.find_all(classes_match))
                 current_context = found
                 continue
+
             if token == '*':
                 # Star selector
                 found = []
@@ -528,6 +538,19 @@ class PageElement(object):
                     found.extend(context.findAll(True))
                 current_context = found
                 continue
+
+            if token == '>':
+                # Child selector
+                tag = tokens[index + 1]
+                if not tag:
+                    tag = True
+
+                found = []
+                for context in current_context:
+                    found.extend(context.find_all(tag, recursive=False))
+                current_context = found
+                continue
+
             # Here we should just have a regular tag
             if not self.tag_name_re.match(token):
                 return []
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 6d22448..e9a5763 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1308,37 +1308,43 @@ class TestNavigableStringSubclasses(SoupTest):
 
 class TestSoupSelector(TreeTest):
 
-    HTML = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-    "http://www.w3.org/TR/html4/strict.dtd">
+    HTML = """
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+"http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
-  <title>The title</title>
-  <link rel="stylesheet" href="blah.css" type="text/css" id="l1">
+<title>The title</title>
+<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
 </head>
 <body>
 
 <div id="main">
-    <div id="inner">
-        <h1 id="header1">An H1</h1>
-        <p>Some text</p>
-        <p class="onep" id="p1">Some more text</p>
-        <h2 id="header2">An H2</h2>
-        <p class="class1 class2 class3" id="pmulti">Another</p>
-        <a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
-        <h2 id="header3">Another H2</h2>
-        <a id="me" href="http://simonwillison.net/" rel="me">me</a>
-    </div>
-    <p lang="en" id="lang-en">English</p>
-    <p lang="en-gb" id="lang-en-gb">English UK</p>
-    <p lang="en-us" id="lang-en-us">English US</p>
-    <p lang="fr" id="lang-fr">French</p>
+<div id="inner">
+<h1 id="header1">An H1</h1>
+<p>Some text</p>
+<p class="onep" id="p1">Some more text</p>
+<h2 id="header2">An H2</h2>
+<p class="class1 class2 class3" id="pmulti">Another</p>
+<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
+<h2 id="header3">Another H2</h2>
+<a id="me" href="http://simonwillison.net/" rel="me">me</a>
+<span class="s1">
+<a href="#" id="s1a1">span1a1</a>
+<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
+<span class="span2">
+<a href="#" id="s2a1">span2a1</a>
+</span>
+<span class="span3"></span>
+</span>
+</div>
+<p lang="en" id="lang-en">English</p>
+<p lang="en-gb" id="lang-en-gb">English UK</p>
+<p lang="en-us" id="lang-en-us">English US</p>
+<p lang="fr" id="lang-fr">French</p>
 </div>
 
 <div id="footer">
 </div>
-
-</body>
-</html>
 """
 
     def setUp(self):
@@ -1428,6 +1434,16 @@ class TestSoupSelector(TreeTest):
             '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
             self.assertSelects(selector, ['pmulti'])
 
+    def test_multi_class_selection(self):
+        for selector in ('.class1.class3', '.class3.class2',
+                         '.class1.class2.class3'):
+            self.assertSelects(selector, ['pmulti'])
+
+    def test_child_selector(self):
+        self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
+        self.assertSelects('.s1 > a span', ['s1a2s1'])
+
+
     def test_attribute_equals(self):
         self.assertSelectMultiple(
             ('p[class="onep"]', ['p1']),
@@ -1481,7 +1497,7 @@ class TestSoupSelector(TreeTest):
             ('[href$=".css"]', ['l1']),
             ('link[href$=".css"]', ['l1']),
             ('link[id$="1"]', ['l1']),
-            ('[id$="1"]', ['l1', 'p1', 'header1']),
+            ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']),
             ('div[id$="1"]', []),
             ('[id$="noending"]', []),
         )
@@ -1504,7 +1520,7 @@ class TestSoupSelector(TreeTest):
             ('[href*=".css"]', ['l1']),
             ('link[href*=".css"]', ['l1']),
             ('link[id*="1"]', ['l1']),
-            ('[id*="1"]', ['l1', 'p1', 'header1']),
+            ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']),
             ('div[id*="1"]', []),
             ('[id*="noending"]', []),
             # New for this test
diff --git a/doc/source/index.rst b/doc/source/index.rst
index a9d404a..37d5f07 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -1538,15 +1538,27 @@ You can find tags::
 
 Find tags beneath other tags::
 
- soup.select("p a")
- # [<a class="sister" href="http://example.com/elsie"
- id="link1">Elsie</a>, <a class="sister"
- href="http://example.com/lacie" id="link2">Lacie</a>, <a
- class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
+ soup.select("body a")
+ # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
+ #  <a class="sister" href="http://example.com/lacie"  id="link2">Lacie</a>,
+ #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
  soup.select("html head title")
  # [<title>The Dormouse's story</title>]
 
+Find tags `directly` beneath other tags::
+
+ soup.select("head > title")
+ # [<title>The Dormouse's story</title>]
+
+ soup.select("p > a")
+ # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
+ #  <a class="sister" href="http://example.com/lacie"  id="link2">Lacie</a>,
+ #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
+
+ soup.select("body > a")
+ # []
+
 Find tags by CSS class::
 
  soup.select(".sister")
@@ -1590,6 +1602,20 @@ Find tags by attribute value::
  soup.select('a[href*=".com/el"]')
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]
 
+Match language codes::
+
+ multilingual_markup = """
+  <p lang="en">Hello</p>
+  <p lang="en-us">Howdy, y'all</p>
+  <p lang="en-gb">Pip-pip, old fruit</p>
+  <p lang="fr">Bonjour mes amis</p>
+ """
+ multilingual_soup = BeautifulSoup(multilingual_markup)
+ multilingual_soup.select('p[lang|=en]')
+ # [<p lang="en">Hello</p>,
+ #  <p lang="en-us">Howdy, y'all</p>,
+ #  <p lang="en-gb">Pip-pip, old fruit</p>]
+
 This is a convenience for users who know the CSS selector syntax. You
 can do all this stuff with the Beautiful Soup API. And if CSS
 selectors are all you need, you might as well use lxml directly,
-- 
cgit v1.2.3