diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-15 17:03:37 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-15 17:03:37 -0500 |
commit | 87747b712cfe63d173332f06ee1ba2bf9adf9ce5 (patch) | |
tree | 1829d574032666de65230c2c9df6f1aa522c1b05 /bs4/element.py | |
parent | ac197c5ad0ffe0795436cb54e0766640d12c6a31 (diff) |
Added a kind of hacky way to interpret the restriction class='foo bar'. Stop generating a space before the slash that closes an empty-element tag.
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/bs4/element.py b/bs4/element.py index cf1ed32..a0f64ba 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -782,7 +782,7 @@ class Tag(PageElement): close = '' closeTag = '' if self.is_empty_element: - close = ' /' + close = '/' else: closeTag = '</%s>' % self.name @@ -1013,11 +1013,22 @@ class SoupStrainer(object): result = False if isinstance(markup, list) or isinstance(markup, tuple): - # This should only happen when searching the 'class' - # attribute of a tag with multiple CSS classes. - for item in markup: - if self._matches(item, match_against): - result = True + # This should only happen when searching, e.g. the 'class' + # attribute. + if (isinstance(match_against, basestring) + and ' ' in match_against): + # A bit of a special case. If they try to match "foo + # bar" on a multivalue attribute's value, only accept + # the literal value "foo bar" + # + # XXX This is going to be pretty slow because we keep + # splitting match_against. But it shouldn't come up + # too often. + result = (whitespace_re.split(match_against) == markup) + else: + for item in markup: + if self._matches(item, match_against): + result = True elif match_against is True: result = markup is not None elif isinstance(match_against, collections.Callable): |