diff options
author | Leonard Richardson <leonardr@segfault.org> | 2016-07-18 21:33:24 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2016-07-18 21:33:24 -0400 |
commit | 1126b39cf68d7b75b7f12a185ab9f6983526fa3a (patch) | |
tree | e957681992412e0bd19c91e1719d3ec5fc605235 /bs4/element.py | |
parent | d152430325e8ebbf5a5be46c4c0a3f8489c119ff (diff) |
Added support for CSS selector values that contain quoted spaces,
such as tag[style="display: foo"]. [bug=1540588]
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 14 |
1 files changed, 6 insertions, 8 deletions
diff --git a/bs4/element.py b/bs4/element.py index 7a3aa52..ad13533 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -4,6 +4,7 @@ __license__ = "MIT" import collections import re +import shlex import sys import warnings from bs4.dammit import EntitySubstitution @@ -1319,6 +1320,7 @@ class Tag(PageElement): _selector_combinators = ['>', '+', '~'] _select_debug = False + quoted_colon = re.compile('"[^"]*:[^"]*"') def select_one(self, selector): """Perform a CSS selection operation on the current element.""" value = self.select(selector, limit=1) @@ -1344,8 +1346,7 @@ class Tag(PageElement): if limit and len(context) >= limit: break return context - - tokens = selector.split() + tokens = shlex.split(selector) current_context = [self] if tokens[-1] in self._selector_combinators: @@ -1397,7 +1398,7 @@ class Tag(PageElement): return classes.issubset(candidate.get('class', [])) checker = classes_match - elif ':' in token: + elif ':' in token and not self.quoted_colon.search(token): # Pseudo-class tag_name, pseudo = token.split(':', 1) if tag_name == '': @@ -1428,11 +1429,8 @@ class Tag(PageElement): self.count += 1 if self.count == self.destination: return True - if self.count > self.destination: - # Stop the generator that's sending us - # these things. - raise StopIteration() - return False + else: + return False checker = Counter(pseudo_value).nth_child_of_type else: raise NotImplementedError( |