summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-15 17:03:37 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-15 17:03:37 -0500
commit87747b712cfe63d173332f06ee1ba2bf9adf9ce5 (patch)
tree1829d574032666de65230c2c9df6f1aa522c1b05 /bs4/element.py
parentac197c5ad0ffe0795436cb54e0766640d12c6a31 (diff)
Added a kind of hacky way to interpret the restriction class='foo bar'. Stop generating a space before the slash that closes an empty-element tag.
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py23
1 files changed, 17 insertions, 6 deletions
diff --git a/bs4/element.py b/bs4/element.py
index cf1ed32..a0f64ba 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -782,7 +782,7 @@ class Tag(PageElement):
close = ''
closeTag = ''
if self.is_empty_element:
- close = ' /'
+ close = '/'
else:
closeTag = '</%s>' % self.name
@@ -1013,11 +1013,22 @@ class SoupStrainer(object):
result = False
if isinstance(markup, list) or isinstance(markup, tuple):
- # This should only happen when searching the 'class'
- # attribute of a tag with multiple CSS classes.
- for item in markup:
- if self._matches(item, match_against):
- result = True
+ # This should only happen when searching, e.g. the 'class'
+ # attribute.
+ if (isinstance(match_against, basestring)
+ and ' ' in match_against):
+ # A bit of a special case. If they try to match "foo
+ # bar" on a multivalue attribute's value, only accept
+ # the literal value "foo bar"
+ #
+ # XXX This is going to be pretty slow because we keep
+ # splitting match_against. But it shouldn't come up
+ # too often.
+ result = (whitespace_re.split(match_against) == markup)
+ else:
+ for item in markup:
+ if self._matches(item, match_against):
+ result = True
elif match_against is True:
result = markup is not None
elif isinstance(match_against, collections.Callable):