summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2017-05-06 13:23:18 -0400
committerLeonard Richardson <leonardr@segfault.org>2017-05-06 13:23:18 -0400
commit49cc750524fb436fa4880eefa6c8d0b3bbbd7175 (patch)
tree0d282e7eee39e43ed6cc7e1b5541bad4d585bed5 /bs4/element.py
parent2874552990cf9668d721808d3dbd1cd80a98c614 (diff)
It's now possible to use a tag's namespace prefix when searching,
e.g. soup.find('namespace:tag') [bug=1655332]
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py61
1 files changed, 51 insertions, 10 deletions
diff --git a/bs4/element.py b/bs4/element.py
index 9284d11..115ab24 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -535,9 +535,16 @@ class PageElement(object):
return ResultSet(strainer, result)
elif isinstance(name, basestring):
# Optimization to find all tags with a given name.
+ if name.count(':') == 1:
+ # This is a name with a prefix.
+ prefix, name = name.split(':', 1)
+ else:
+ prefix = None
result = (element for element in generator
if isinstance(element, Tag)
- and element.name == name)
+ and element.name == name
+ and (prefix is None or element.prefix == prefix)
+ )
return ResultSet(strainer, result)
results = ResultSet(strainer)
while True:
@@ -1698,7 +1705,7 @@ class SoupStrainer(object):
"I don't know how to match against a %s" % markup.__class__)
return found
- def _matches(self, markup, match_against):
+ def _matches(self, markup, match_against, already_tried=None):
# print u"Matching %s against %s" % (markup, match_against)
result = False
if isinstance(markup, list) or isinstance(markup, tuple):
@@ -1713,7 +1720,7 @@ class SoupStrainer(object):
if self._matches(' '.join(markup), match_against):
return True
return False
-
+
if match_against is True:
# True matches any non-None value.
return markup is not None
@@ -1723,6 +1730,7 @@ class SoupStrainer(object):
# Custom callables take the tag as an argument, but all
# other ways of matching match the tag name as a string.
+ original_markup = markup
if isinstance(markup, Tag):
markup = markup.name
@@ -1733,18 +1741,51 @@ class SoupStrainer(object):
# None matches None, False, an empty string, an empty list, and so on.
return not match_against
- if isinstance(match_against, unicode):
+ if (hasattr(match_against, '__iter__')
+ and not isinstance(match_against, basestring)):
+ # We're asked to match against an iterable of items.
+ # The markup must be match at least one item in the
+ # iterable. We'll try each one in turn.
+ #
+ # To avoid infinite recursion we need to keep track of
+ # items we've already seen.
+ if not already_tried:
+ already_tried = set()
+ for item in match_against:
+ if item.__hash__:
+ key = item
+ else:
+ key = id(item)
+ if key in already_tried:
+ continue
+ else:
+ already_tried.add(key)
+ if self._matches(original_markup, item, already_tried):
+ return True
+ else:
+ return False
+
+ # Beyond this point we might need to run the test twice: once against
+ # the tag's name and once against its prefixed name.
+ match = False
+
+ if not match and isinstance(match_against, unicode):
# Exact string match
- return markup == match_against
+ match = markup == match_against
- if hasattr(match_against, 'match'):
+ if not match and hasattr(match_against, 'search'):
# Regexp match
return match_against.search(markup)
- if hasattr(match_against, '__iter__'):
- # The markup must be an exact match against something
- # in the iterable.
- return markup in match_against
+ if (not match
+ and isinstance(original_markup, Tag)
+ and original_markup.prefix):
+ # Try the whole thing again with the prefixed tag name.
+ return self._matches(
+ original_markup.prefix + ':' + original_markup.name, match_against
+ )
+
+ return match
class ResultSet(list):