diff options
author | Leonard Richardson <leonardr@segfault.org> | 2017-05-06 13:23:18 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2017-05-06 13:23:18 -0400 |
commit | 49cc750524fb436fa4880eefa6c8d0b3bbbd7175 (patch) | |
tree | 0d282e7eee39e43ed6cc7e1b5541bad4d585bed5 /bs4/element.py | |
parent | 2874552990cf9668d721808d3dbd1cd80a98c614 (diff) |
It's now possible to use a tag's namespace prefix when searching,
e.g. soup.find('namespace:tag') [bug=1655332]
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 61 |
1 files changed, 51 insertions, 10 deletions
diff --git a/bs4/element.py b/bs4/element.py index 9284d11..115ab24 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -535,9 +535,16 @@ class PageElement(object): return ResultSet(strainer, result) elif isinstance(name, basestring): # Optimization to find all tags with a given name. + if name.count(':') == 1: + # This is a name with a prefix. + prefix, name = name.split(':', 1) + else: + prefix = None result = (element for element in generator if isinstance(element, Tag) - and element.name == name) + and element.name == name + and (prefix is None or element.prefix == prefix) + ) return ResultSet(strainer, result) results = ResultSet(strainer) while True: @@ -1698,7 +1705,7 @@ class SoupStrainer(object): "I don't know how to match against a %s" % markup.__class__) return found - def _matches(self, markup, match_against): + def _matches(self, markup, match_against, already_tried=None): # print u"Matching %s against %s" % (markup, match_against) result = False if isinstance(markup, list) or isinstance(markup, tuple): @@ -1713,7 +1720,7 @@ class SoupStrainer(object): if self._matches(' '.join(markup), match_against): return True return False - + if match_against is True: # True matches any non-None value. return markup is not None @@ -1723,6 +1730,7 @@ class SoupStrainer(object): # Custom callables take the tag as an argument, but all # other ways of matching match the tag name as a string. + original_markup = markup if isinstance(markup, Tag): markup = markup.name @@ -1733,18 +1741,51 @@ class SoupStrainer(object): # None matches None, False, an empty string, an empty list, and so on. return not match_against - if isinstance(match_against, unicode): + if (hasattr(match_against, '__iter__') + and not isinstance(match_against, basestring)): + # We're asked to match against an iterable of items. + # The markup must be match at least one item in the + # iterable. We'll try each one in turn. + # + # To avoid infinite recursion we need to keep track of + # items we've already seen. + if not already_tried: + already_tried = set() + for item in match_against: + if item.__hash__: + key = item + else: + key = id(item) + if key in already_tried: + continue + else: + already_tried.add(key) + if self._matches(original_markup, item, already_tried): + return True + else: + return False + + # Beyond this point we might need to run the test twice: once against + # the tag's name and once against its prefixed name. + match = False + + if not match and isinstance(match_against, unicode): # Exact string match - return markup == match_against + match = markup == match_against - if hasattr(match_against, 'match'): + if not match and hasattr(match_against, 'search'): # Regexp match return match_against.search(markup) - if hasattr(match_against, '__iter__'): - # The markup must be an exact match against something - # in the iterable. - return markup in match_against + if (not match + and isinstance(original_markup, Tag) + and original_markup.prefix): + # Try the whole thing again with the prefixed tag name. + return self._matches( + original_markup.prefix + ':' + original_markup.name, match_against + ) + + return match class ResultSet(list): |