diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/element.py | 46 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 30 |
2 files changed, 69 insertions, 7 deletions
diff --git a/bs4/element.py b/bs4/element.py index 398eb05..67f2a79 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -608,7 +608,7 @@ class PageElement(object): else: return lambda el: el.has_attr(attribute) - def select(self, selector): + def select(self, selector, recursive=True): """Perform a CSS selection operation on the current element.""" tokens = selector.split() current_context = [self] @@ -627,7 +627,9 @@ class PageElement(object): found = [] for context in current_context: found.extend( - [el for el in context.find_all(tag) if checker(el)]) + [el for el in + context.find_all(tag, recursive=recursive) + if checker(el)]) current_context = found continue @@ -656,15 +658,45 @@ class PageElement(object): return False return classes.issubset(tag['class']) for context in current_context: - found.extend(context.find_all(classes_match)) + found.extend(context.find_all(classes_match, recursive=recursive)) current_context = found continue + if ':' in token: + # Pseudoselector + tag_name, pseudo = token.split(':', 1) + if not tag_name: + raise ValueError( + "A pseudoselector must be prefixed with a tag name.") + pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo) + found = [] + if pseudo_attributes is not None: + pseudo_type, pseudo_value = pseudo_attributes.groups() + if pseudo_type == 'nth-of-type': + try: + pseudo_value = int(pseudo_value) + except: + raise NotImplementedError( + 'Only numeric values are supported for the nth-of-type pseudoselector for now.') + if pseudo_value < 1: + raise ValueError( + 'nth-of-type pseudoselector value must be at least 1.') + pseudo_value = pseudo_value - 1 + for context in current_context: + all_nodes = context.find_all(tag_name, recursive=recursive) + if pseudo_value < len(all_nodes): + found.extend([all_nodes[pseudo_value]]) + current_context = found + continue + else: + raise NotImplementedError( + 'Only the nth-of-type pseudoselector is supported for now.') + if token == '*': # Star selector found = [] for context in current_context: - found.extend(context.findAll(True)) + found.extend(context.find_all(True, recursive=recursive)) current_context = found continue @@ -676,16 +708,16 @@ class PageElement(object): found = [] for context in current_context: - found.extend(context.find_all(tag, recursive=False)) + found.extend(context.select(tag, recursive=False)) current_context = found continue - + # Here we should just have a regular tag if not self.tag_name_re.match(token): return [] found = [] for context in current_context: - found.extend(context.findAll(token)) + found.extend(context.find_all(token, recursive=recursive)) current_context = found return current_context diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 503af63..5f9e24b 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1637,6 +1637,9 @@ class TestSoupSelector(TreeTest): def test_child_selector(self): self.assertSelects('.s1 > a', ['s1a1', 's1a2']) self.assertSelects('.s1 > a span', ['s1a2s1']) + + def test_child_selector_id(self): + self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1']) def test_attribute_equals(self): self.assertSelectMultiple( @@ -1744,6 +1747,33 @@ class TestSoupSelector(TreeTest): ('p[blah]', []), ) + def test_nth_of_type(self): + # Try to select first paragraph + els = self.soup.select('div#inner p:nth-of-type(1)') + self.assertEqual(len(els), 1) + self.assertEqual(els[0].string, u'Some text') + + # Try to select third paragraph + els = self.soup.select('div#inner p:nth-of-type(3)') + self.assertEqual(len(els), 1) + self.assertEqual(els[0].string, u'Another') + + # Try to select (non-existent!) fourth paragraph + els = self.soup.select('div#inner p:nth-of-type(4)') + self.assertEqual(len(els), 0) + + # Pass in an invalid value. + self.assertRaises( + ValueError, self.soup.select, 'div p:nth-of-type(0)') + + def test_nth_of_type_direct_descendant(self): + els = self.soup.select('div#inner > p:nth-of-type(1)') + self.assertEqual(len(els), 1) + self.assertEqual(els[0].string, u'Some text') + + def test_id_child_selector_nth_of_type(self): + self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) + def test_select_on_element(self): # Other tests operate on the tree; this operates on an element # within the tree. |