2 files changed, 205 insertions, 201 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 7abc700..419843b 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,5 +1,17 @@
 = 4.2.0 (Unreleased) =
 
+* The Tag.select() method now supports most features of
+  CSS selectors.
+
+ - Added support for the adjacent sibling combinator (+) and the
+   general sibling combinator (~). Tests by "liquider". [bug=1082144]
+
+ - Added limited support for the "nth-of-type" pseudo-class. Code
+   by Sven Slootweg. [bug=1109952]
+
+ - Refactored the code so that the combinators (+, ~, and >) can
+   select child or sibling tags by attributes other than name.
+
 * In an HTML document, the contents of a <script> or <style> tag will
   no longer undergo entity substitution by default. XML documents work
   the same way they did before. [bug=1085953]
@@ -21,27 +33,21 @@
 * Added the 'diagnose' submodule, which includes several useful
   functions for reporting problems and doing tech support.
 
-  * diagnose(data) tries the given markup on every installed parser,
+  - diagnose(data) tries the given markup on every installed parser,
     reporting exceptions and displaying successes. If a parser is not
     installed, diagnose() mentions this fact.
 
-  * lxml_trace(data, html=True) runs the given markup through lxml's
+  - lxml_trace(data, html=True) runs the given markup through lxml's
     XML parser or HTML parser, and prints out the parser events as
     they happen. This helps you quickly determine whether a given
     problem occurs in lxml code or Beautiful Soup code.
 
-  * htmlparser_trace(data) is the same thing, but for Python's
+  - htmlparser_trace(data) is the same thing, but for Python's
     built-in HTMLParser class.
 
 * The prettify() method now leaves the contents of <pre> tags
   alone. [bug=1095654]
 
-* Added support for the "nth-of-type" CSS selector. Code by Sven
-  Slootweg. [bug=1109952]
-
-* The CSS selector ">" can now find a tag by means other than the
-  tag name. Code by Sven Slootweg. [bug=1109952]
-
 * Fix a bug in the html5lib treebuilder which sometimes created
   disconnected trees. [bug=1039527]
 
diff --git a/bs4/element.py b/bs4/element.py
index 120471e..0be349d 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -616,198 +616,6 @@ class PageElement(object):
         else:
             return lambda el: el.has_attr(attribute)
 
-    _selectors_that_consume_an_extra_token = ['>', '+', '~']
-
-    _select_debug = False
-
-    def select(self, selector, _candidate_generator=None):
-        """Perform a CSS selection operation on the current element."""
-        tokens = selector.split()
-        current_context = [self]
-
-        if tokens[-1] in self._selectors_that_consume_an_extra_token:
-            raise ValueError(
-                'Final selector "%s" is missing an argument.' % tokens[-1])
-        if self._select_debug:
-            print 'Running CSS selector "%s"' % selector
-        for index, token in enumerate(tokens):
-            if self._select_debug:
-                print ' Considering token "%s"' % token
-            recursive_candidate_generator = None
-            tag_name = None
-            if tokens[index-1] in self._selectors_that_consume_an_extra_token:
-                # This token was consumed by the previous selector. Skip it.
-                if self._select_debug:
-                    print '  Token was consumed by the previous selector.'
-                continue
-            # Each operation corresponds to a checker function, a rule
-            # for determining whether a candidate matches the
-            # selector. Candidates are generated by the active
-            # iterator.
-            checker = None
-
-            m = self.attribselect_re.match(token)
-            if m is not None:
-                # Attribute selector
-                tag_name, attribute, operator, value = m.groups()
-                checker = self._attribute_checker(operator, attribute, value)
-
-            elif '#' in token:
-                # ID selector
-                tag_name, id = token.split('#', 1)
-                def id_matches(tag):
-                    return tag.get('id', None) == id
-                checker = id_matches
-
-            elif '.' in token:
-                # Class selector
-                tag_name, klass = token.split('.', 1)
-                classes = set(klass.split('.'))
-                def classes_match(candidate):
-                    return classes.issubset(candidate.get('class', []))
-                checker = classes_match
-
-            elif ':' in token:
-                # Pseudoselector
-                tag_name, pseudo = token.split(':', 1)
-                if tag_name == '':
-                    raise ValueError(
-                        "A pseudoselector must be prefixed with a tag name.")
-                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
-                found = []
-                if pseudo_attributes is not None:
-                    pseudo_type, pseudo_value = pseudo_attributes.groups()
-                    if pseudo_type == 'nth-of-type':
-                        try:
-                            pseudo_value = int(pseudo_value)
-                        except:
-                            raise NotImplementedError(
-                                'Only numeric values are supported for the nth-of-type pseudoselector for now.')
-                        if pseudo_value < 1:
-                            raise ValueError(
-                                'nth-of-type pseudoselector value must be at least 1.')
-                        class Counter(object):
-                            def __init__(self, destination):
-                                self.count = 0
-                                self.destination = destination
-
-                            def nth_child_of_type(self, tag):
-                                self.count += 1
-                                if self.count == self.destination:
-                                    return True
-                                if self.count > self.destination:
-                                    raise StopIteration()
-                                return False
-                        checker = Counter(pseudo_value).nth_child_of_type
-                    else:
-                        raise NotImplementedError(
-                            'The following pseudoselectors are implemented: nth-of-type.')
-
-            elif token == '*':
-                # Star selector -- matches everything
-                pass
-            elif token == '>':
-                # Run the next token as a CSS selector against the
-                # direct children of each tag in the current context.
-                recursive_candidate_generator = lambda tag: tag.children
-            elif token == '~':
-                # Run the next token as a CSS selector against the
-                # siblings of each tag in the current context.
-                recursive_candidate_generator = lambda tag: tag.next_siblings
-            elif token == '+':
-                # For each tag in the current context, run the next
-                # token as a CSS selector against the tag's next
-                # sibling that's a tag.
-                def next_tag_sibling(tag):
-                    yield tag.find_next_sibling(True)
-                recursive_candidate_generator = next_tag_sibling
-
-            elif self.tag_name_re.match(token):
-                # Just a tag name.
-                tag_name = token
-            else:
-                raise ValueError(
-                    'Unsupported or invalid CSS selector: "%s"' % token)
-
-            if recursive_candidate_generator:
-                # This happens when the selector looks like  "> foo".
-                #
-                # The generator calls select() recursively on every
-                # member of the current context, passing in a different
-                # candidate generator and a different selector.
-                #
-                # In the case of "> foo", the candidate generator is
-                # one that yields a tag's direct children (">"), and
-                # the selector is "foo".
-                next_token = tokens[index+1]
-                def recursive_select(tag):
-                    if self._select_debug:
-                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
-                        print '-' * 40
-                    for i in tag.select(next_token, recursive_candidate_generator):
-                        if self._select_debug:
-                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
-                        yield i
-                    if self._select_debug:
-                        print '-' * 40
-                _use_candidate_generator = recursive_select
-            elif _candidate_generator is None:
-                # By default, a tag's candidates are all of its
-                # children. If tag_name is defined, only yield tags
-                # with that name.
-                if self._select_debug:
-                    if tag_name:
-                        check = "[any]"
-                    else:
-                        check = tag_name
-                    print '   Default candidate generator, tag name="%s"' % check
-                if self._select_debug:
-                    # This is redundant with later code, but it stops
-                    # a bunch of bogus tags from cluttering up the
-                    # debug log.
-                    def default_candidate_generator(tag):
-                        for child in tag.descendants:
-                            if not isinstance(child, Tag):
-                                continue
-                            if tag_name and not child.name == tag_name:
-                                continue
-                            yield child
-                    _use_candidate_generator = default_candidate_generator
-                else:
-                    _use_candidate_generator = lambda tag: tag.descendants
-            else:
-                _use_candidate_generator = _candidate_generator
-
-            new_context = []
-            for tag in current_context:
-                if self._select_debug:
-                    print "    Running candidate generator on %s %s" % (
-                        tag.name, repr(tag.attrs))
-                for candidate in _use_candidate_generator(tag):
-                    if not isinstance(candidate, Tag):
-                        continue
-                    if tag_name and candidate.name != tag_name:
-                        continue
-                    if checker is not None:
-                        try:
-                            result = checker(candidate)
-                        except StopIteration:
-                            break
-                    if checker is None or result:
-                        if self._select_debug:
-                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
-                        new_context.append(candidate)
-                    elif self._select_debug:
-                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
-
-            current_context = new_context
-
-        if self._select_debug:
-            print "Final verdict:"
-            for i in current_context:
-                print " %s %s" % (i.name, i.attrs)
-        return current_context
-
     # Old non-property versions of the generators, for backwards
     # compatibility with BS3.
     def nextGenerator(self):
@@ -1376,6 +1184,196 @@ class Tag(PageElement):
             yield current
             current = current.next_element
 
+    _selectors_that_consume_an_extra_token = ['>', '+', '~']
+    _select_debug = False
+    def select(self, selector, _candidate_generator=None):
+        """Perform a CSS selection operation on the current element."""
+        tokens = selector.split()
+        current_context = [self]
+
+        if tokens[-1] in self._selectors_that_consume_an_extra_token:
+            raise ValueError(
+                'Final selector "%s" is missing an argument.' % tokens[-1])
+        if self._select_debug:
+            print 'Running CSS selector "%s"' % selector
+        for index, token in enumerate(tokens):
+            if self._select_debug:
+                print ' Considering token "%s"' % token
+            recursive_candidate_generator = None
+            tag_name = None
+            if tokens[index-1] in self._selectors_that_consume_an_extra_token:
+                # This token was consumed by the previous selector. Skip it.
+                if self._select_debug:
+                    print '  Token was consumed by the previous selector.'
+                continue
+            # Each operation corresponds to a checker function, a rule
+            # for determining whether a candidate matches the
+            # selector. Candidates are generated by the active
+            # iterator.
+            checker = None
+
+            m = self.attribselect_re.match(token)
+            if m is not None:
+                # Attribute selector
+                tag_name, attribute, operator, value = m.groups()
+                checker = self._attribute_checker(operator, attribute, value)
+
+            elif '#' in token:
+                # ID selector
+                tag_name, id = token.split('#', 1)
+                def id_matches(tag):
+                    return tag.get('id', None) == id
+                checker = id_matches
+
+            elif '.' in token:
+                # Class selector
+                tag_name, klass = token.split('.', 1)
+                classes = set(klass.split('.'))
+                def classes_match(candidate):
+                    return classes.issubset(candidate.get('class', []))
+                checker = classes_match
+
+            elif ':' in token:
+                # Pseudo-class
+                tag_name, pseudo = token.split(':', 1)
+                if tag_name == '':
+                    raise ValueError(
+                        "A pseudo-class must be prefixed with a tag name.")
+                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+                found = []
+                if pseudo_attributes is not None:
+                    pseudo_type, pseudo_value = pseudo_attributes.groups()
+                    if pseudo_type == 'nth-of-type':
+                        try:
+                            pseudo_value = int(pseudo_value)
+                        except:
+                            raise NotImplementedError(
+                                'Only numeric values are currently supported for the nth-of-type pseudo-class.')
+                        if pseudo_value < 1:
+                            raise ValueError(
+                                'nth-of-type pseudo-class value must be at least 1.')
+                        class Counter(object):
+                            def __init__(self, destination):
+                                self.count = 0
+                                self.destination = destination
+
+                            def nth_child_of_type(self, tag):
+                                self.count += 1
+                                if self.count == self.destination:
+                                    return True
+                                if self.count > self.destination:
+                                    raise StopIteration()
+                                return False
+                        checker = Counter(pseudo_value).nth_child_of_type
+                    else:
+                        raise NotImplementedError(
+                            'Only the following pseudo-classes are implemented: nth-of-type.')
+
+            elif token == '*':
+                # Star selector -- matches everything
+                pass
+            elif token == '>':
+                # Run the next token as a CSS selector against the
+                # direct children of each tag in the current context.
+                recursive_candidate_generator = lambda tag: tag.children
+            elif token == '~':
+                # Run the next token as a CSS selector against the
+                # siblings of each tag in the current context.
+                recursive_candidate_generator = lambda tag: tag.next_siblings
+            elif token == '+':
+                # For each tag in the current context, run the next
+                # token as a CSS selector against the tag's next
+                # sibling that's a tag.
+                def next_tag_sibling(tag):
+                    yield tag.find_next_sibling(True)
+                recursive_candidate_generator = next_tag_sibling
+
+            elif self.tag_name_re.match(token):
+                # Just a tag name.
+                tag_name = token
+            else:
+                raise ValueError(
+                    'Unsupported or invalid CSS selector: "%s"' % token)
+
+            if recursive_candidate_generator:
+                # This happens when the selector looks like  "> foo".
+                #
+                # The generator calls select() recursively on every
+                # member of the current context, passing in a different
+                # candidate generator and a different selector.
+                #
+                # In the case of "> foo", the candidate generator is
+                # one that yields a tag's direct children (">"), and
+                # the selector is "foo".
+                next_token = tokens[index+1]
+                def recursive_select(tag):
+                    if self._select_debug:
+                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
+                        print '-' * 40
+                    for i in tag.select(next_token, recursive_candidate_generator):
+                        if self._select_debug:
+                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
+                        yield i
+                    if self._select_debug:
+                        print '-' * 40
+                _use_candidate_generator = recursive_select
+            elif _candidate_generator is None:
+                # By default, a tag's candidates are all of its
+                # children. If tag_name is defined, only yield tags
+                # with that name.
+                if self._select_debug:
+                    if tag_name:
+                        check = "[any]"
+                    else:
+                        check = tag_name
+                    print '   Default candidate generator, tag name="%s"' % check
+                if self._select_debug:
+                    # This is redundant with later code, but it stops
+                    # a bunch of bogus tags from cluttering up the
+                    # debug log.
+                    def default_candidate_generator(tag):
+                        for child in tag.descendants:
+                            if not isinstance(child, Tag):
+                                continue
+                            if tag_name and not child.name == tag_name:
+                                continue
+                            yield child
+                    _use_candidate_generator = default_candidate_generator
+                else:
+                    _use_candidate_generator = lambda tag: tag.descendants
+            else:
+                _use_candidate_generator = _candidate_generator
+
+            new_context = []
+            for tag in current_context:
+                if self._select_debug:
+                    print "    Running candidate generator on %s %s" % (
+                        tag.name, repr(tag.attrs))
+                for candidate in _use_candidate_generator(tag):
+                    if not isinstance(candidate, Tag):
+                        continue
+                    if tag_name and candidate.name != tag_name:
+                        continue
+                    if checker is not None:
+                        try:
+                            result = checker(candidate)
+                        except StopIteration:
+                            break
+                    if checker is None or result:
+                        if self._select_debug:
+                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
+                        new_context.append(candidate)
+                    elif self._select_debug:
+                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
+
+            current_context = new_context
+
+        if self._select_debug:
+            print "Final verdict:"
+            for i in current_context:
+                print " %s %s" % (i.name, i.attrs)
+        return current_context
+
     # Old names for backwards compatibility
     def childGenerator(self):
         return self.children