1 files changed, 94 insertions, 51 deletions
diff --git a/bs4/element.py b/bs4/element.py
index 9f3adfb..982a5b0 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -567,6 +567,14 @@ class PageElement(object):
             value =" ".join(value)
         return value
 
+    def _tag_name_matches_and(self, function, tag_name):
+        if not tag_name:
+            return function
+        else:
+            def _match(tag):
+                return tag.name == tag_name and function(tag)
+            return _match
+
     def _attribute_checker(self, operator, attribute, value=''):
         """Create a function that performs a CSS selector operation.
 
@@ -608,53 +616,54 @@ class PageElement(object):
         else:
             return lambda el: el.has_attr(attribute)
 
-    def select(self, selector, recursive=True):
+    _selectors_that_consume_an_extra_token = ['>', '+', '~']
+
+    debug = True
+
+    def select(self, selector, _candidate_generator=None):
         """Perform a CSS selection operation on the current element."""
+        debug = True
         tokens = selector.split()
         current_context = [self]
+
+        if tokens[-1] in self._selectors_that_consume_an_extra_token:
+            raise ValueError(
+                'Final selector "%s" is missing an argument.' % tokens[-1])
+        if self.debug:
+            print 'Running CSS selector "%s"' % selector
         for index, token in enumerate(tokens):
-            if tokens[index - 1] == '>':
-                # already found direct descendants in last step. skip this
-                # step.
+            if self.debug:
+                print ' Considering token "%s"' % token
+            recursive_candidate_generator = None
+            tag_name = None
+            if tokens[index-1] in self._selectors_that_consume_an_extra_token:
+                # This token was consumed by the previous selector. Skip it.
+                if self.debug:
+                    print '  Token was consumed by the previous selector.'
                 continue
-
-            # Each operation corresponds to a candidate generator (a
-            # rule for finding tags that might match) and a checker function (a
-            # rule for determining whether a tag does match.
-            production_rule = None
+            # Each operation corresponds to a checker function, a rule
+            # for determining whether a candidate matches the
+            # selector. Candidates are generated by the active
+            # iterator.
             checker = None
 
             m = self.attribselect_re.match(token)
             if m is not None:
                 # Attribute selector
                 tag_name, attribute, operator, value = m.groups()
-                if not tag_name:
-                    tag_name = True
-                production_rule = lambda tag: tag.find_all(
-                    tag_name, recursive=recursive)
                 checker = self._attribute_checker(operator, attribute, value)
 
             elif '#' in token:
                 # ID selector
                 tag_name, id = token.split('#', 1)
-                if tag_name == "":
-                    tag_name = True
-                def find_by_id(tag):
-                    found = tag.find(tag_name, id=id, recursive=recursive)
-                    if found is None:
-                        return []
-                    return [found]
-                production_rule = find_by_id
-                checker = lambda x: True
+                def id_matches(tag):
+                    return tag.get('id', None) == id
+                checker = id_matches
 
             elif '.' in token:
                 # Class selector
                 tag_name, klass = token.split('.', 1)
-                if tag_name == '':
-                    tag_name = True
                 classes = set(klass.split('.'))
-                production_rule = lambda tag: tag.find_all(
-                    tag_name, recursive=recursive)
                 def classes_match(candidate):
                     return classes.issubset(candidate.get('class', []))
                 checker = classes_match
@@ -678,49 +687,83 @@ class PageElement(object):
                         if pseudo_value < 1:
                             raise ValueError(
                                 'nth-of-type pseudoselector value must be at least 1.')
+                        count = 0
                         def nth_child_of_type(tag):
-                            children = tag.find_all(
-                                tag_name, limit=pseudo_value, recursive=recursive)
-                            if len(children) < pseudo_value:
-                                return []
-                            return [children[pseudo_value-1]]
-                        production_rule = nth_child_of_type
-                        checker = lambda x: True
+                            count += 1
+                            if count == pseudo_value:
+                                return True
+                            return False
+                        checker = nth_child_of_type
                     else:
                         raise NotImplementedError(
                             'The following pseudoselectors are implemented: nth-of-type.')
 
             elif token == '*':
-                # Star selector
-                production_rule = lambda tag: tag.find_all(True, recursive=recursive)
-                checker = lambda x: True
+                # Star selector -- matches everything
+                checker = True
 
             elif token == '>':
-                # Child selector
-                # TODO If this is the last token, there's a problem.
-                next_selector = tokens[index + 1]
-                production_rule = lambda tag: tag.select(
-                    next_selector, recursive=False)
-                checker = lambda candidate: True
+                # Run the next token as a CSS selector against the
+                # direct children of each tag in the current context.
+                recursive_candidate_generator = tag.children
 
             elif self.tag_name_re.match(token):
+                # Just a tag name.
                 tag_name = token
-                production_rule = lambda tag: tag.find_all(tag_name, recursive=recursive)
-                checker = lambda candidate: True
+                checker = True
             else:
                 raise ValueError(
                     'Unsupported or invalid CSS selector: "%s"' % token)
 
-            # We now have a production rule and a checker. Find
-            # candidates by applying the production rule to every
-            # member of the current context. Check each candidate
-            # against the checker. The new context is the set of
-            # candidates that pass the checker.
+            if recursive_candidate_generator:
+                # This happens when the selector looks like  "> foo".
+                #
+                # The generator calls select() recursively on every
+                # member of the current context, passing in a different
+                # candidate generator and a different selector.
+                #
+                # In the case of "> foo", the candidate generator is
+                # one that yields a tag's direct children (">"), and
+                # the selector is "foo".
+                next_token = tokens[index+1]
+                def recursive_select(tag):
+                    tag.select(next_token, recursive_candidate_generator)
+                _candidate_generator = recursive_select
+                checker = True
+            elif _candidate_generator is None:
+                # By default, a tag's candidates are all of its
+                # children. If tag_name is defined, only yield tags
+                # with that name.
+                if self.debug:
+                    if tag_name:
+                        check = "[any]"
+                    else:
+                        check = tag_name
+                    print '   Default candidate generator, tag name="%s"' % check
+                def default_candidate_generator(tag):
+                    for child in tag.descendants:
+                        if not isinstance(child, Tag):
+                            continue
+                        if tag_name and not child.name == tag_name:
+                            continue
+                        yield child
+                _candidate_generator = default_candidate_generator
+
             new_context = []
             for tag in current_context:
-                for candidate in production_rule(tag):
-                    if checker(candidate):
+                if self.debug:
+                    print "    Running candidate generator on %s %s" % (
+                        tag.name, repr(tag.attrs))
+                for candidate in _candidate_generator(tag):
+                    if not isinstance(candidate, Tag):
+                        continue
+                    if checker is True or checker(candidate):
+                        if self.debug:
+                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
                         new_context.append(candidate)
+                    elif self.debug:
+                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
+
             current_context = new_context
 
         return current_context