diff options
Diffstat (limited to 'bs4/css.py')
-rw-r--r-- | bs4/css.py | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/bs4/css.py b/bs4/css.py new file mode 100644 index 0000000..5d60267 --- /dev/null +++ b/bs4/css.py @@ -0,0 +1,251 @@ +"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" + +try: + import soupsieve +except ImportError as e: + soupsieve = None + warnings.warn( + 'The soupsieve package is not installed. CSS selectors cannot be used.' + ) + + +class CSS(object): + """A proxy object against the soupsieve library, to simplify its + CSS selector API. + + Acquire this object through the .css attribute on the + BeautifulSoup object, or on the Tag you want to use as the + starting point for a CSS selector. + + Specifically, the element to be selected against doesn't need to + be explicitly specified in the function call, since you access + this object through a specific tag. + + """ + + def __init__(self, tag): + """Constructor. + + You don't need to instantiate this class yourself; instead, + access the .css attribute on the BeautifulSoup object, or on + the Tag you want to use as the starting point for your CSS + selector. + + :param tag: All CSS selectors will use this as their starting + point. + + """ + if soupsieve is None: + raise NotImplementedError( + "Cannot execute CSS selectors because the soupsieve package is not installed." + ) + self.tag = tag + + @classmethod + def escape(cls, ident): + """Escape a CSS selector. + + This is a simple wrapper around soupselect.escape(). + """ + if soupsieve is None: + raise NotImplementedError( + "Cannot escape CSS selectors because the soupsieve package is not installed." + ) + return soupsieve.escape(ident) + + def _ns(self, ns): + """Normalize a dictionary of namespaces.""" + if ns is None: + ns = self.tag._namespaces + return ns + + def _rs(self, results): + """Normalize a list of results to a Resultset. + + A ResultSet is more consistent with the rest of Beautiful + Soup, and ResultSet.__getattr__ has a helpful error message if + you try to treat a list of results as a single result (a + common mistake). + """ + # Import here to avoid circular import + from bs4.element import ResultSet + return ResultSet(None, results) + + def select_one(self, select, namespaces=None, flags=0, **kwargs): + """Perform a CSS selection operation on the current Tag and return the + first result. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.select_one() + method. + + :param selector: A CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will use the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.select_one() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.select_one() method. + + :return: A Tag, or None if the selector has no match. + :rtype: bs4.element.Tag + + """ + return soupsieve.select_one( + select, self.tag, self._ns(namespaces), flags, **kwargs + ) + + def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): + """Perform a CSS selection operation on the current Tag. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.select() + method. + + :param selector: A string containing a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param limit: After finding this number of results, stop looking. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.select() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.select() method. + + :return: A ResultSet of Tag objects. + :rtype: bs4.element.ResultSet + + """ + if limit is None: + limit = 0 + + return self._rs( + soupsieve.select( + select, self.tag, self._ns(namespaces), limit, flags, + **kwargs + ) + ) + + def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): + """Perform a CSS selection operation on the current Tag. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.iselect() + method. It is the same as select(), but it returns a generator + instead of a list. + + :param selector: A string containing a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param limit: After finding this number of results, stop looking. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.iselect() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.iselect() method. + + :return: A generator + :rtype: types.GeneratorType + """ + return soupsieve.iselect( + select, self.tag, self._ns(namespaces), limit, flags, **kwargs + ) + + def closest(self, select, namespaces=None, flags=0, **kwargs): + """Find the Tag closest to this one that matches the given selector. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.closest() + method. + + :param selector: A string containing a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.closest() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.closest() method. + + :return: A Tag, or None if there is no match. + :rtype: bs4.Tag + + """ + return soupsieve.closest( + select, self.tag, self._ns(namespaces), flags, **kwargs + ) + + def match(self, select, namespaces=None, flags=0, **kwargs): + """Check whether this Tag matches the given CSS selector. + + This uses the Soup Sieve library. For more information, see + that library's documentation for the soupsieve.match() + method. + + :param: a CSS selector. + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.match() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.match() method. + + :return: True if this Tag matches the selector; False otherwise. + :rtype: bool + """ + return soupsieve.match( + select, self.tag, self._ns(namespaces), flags, **kwargs + ) + + def filter(self, select, namespaces=None, flags=0, **kwargs): + """Filter this Tag's direct children based on the given CSS selector. + + This uses the Soup Sieve library. It works the same way as + passing this Tag into that library's soupsieve.filter() + method. More information, for more information see the + documentation for soupsieve.filter(). + + :param namespaces: A dictionary mapping namespace prefixes + used in the CSS selector to namespace URIs. By default, + Beautiful Soup will pass in the prefixes it encountered while + parsing the document. + + :param flags: Flags to be passed into Soup Sieve's + soupsieve.filter() method. + + :param kwargs: Keyword arguments to be passed into SoupSieve's + soupsieve.filter() method. + + :return: A ResultSet of Tag objects. + :rtype: bs4.element.ResultSet + + """ + return self._rs( + soupsieve.filter( + select, self.tag, self._ns(namespaces), flags, **kwargs + ) + ) |