summaryrefslogtreecommitdiff
path: root/bs4/css.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/css.py')
-rw-r--r--bs4/css.py251
1 files changed, 251 insertions, 0 deletions
diff --git a/bs4/css.py b/bs4/css.py
new file mode 100644
index 0000000..5d60267
--- /dev/null
+++ b/bs4/css.py
@@ -0,0 +1,251 @@
+"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
+
+try:
+ import soupsieve
+except ImportError as e:
+ soupsieve = None
+ warnings.warn(
+ 'The soupsieve package is not installed. CSS selectors cannot be used.'
+ )
+
+
+class CSS(object):
+ """A proxy object against the soupsieve library, to simplify its
+ CSS selector API.
+
+ Acquire this object through the .css attribute on the
+ BeautifulSoup object, or on the Tag you want to use as the
+ starting point for a CSS selector.
+
+ Specifically, the element to be selected against doesn't need to
+ be explicitly specified in the function call, since you access
+ this object through a specific tag.
+
+ """
+
+ def __init__(self, tag):
+ """Constructor.
+
+ You don't need to instantiate this class yourself; instead,
+ access the .css attribute on the BeautifulSoup object, or on
+ the Tag you want to use as the starting point for your CSS
+ selector.
+
+ :param tag: All CSS selectors will use this as their starting
+ point.
+
+ """
+ if soupsieve is None:
+ raise NotImplementedError(
+ "Cannot execute CSS selectors because the soupsieve package is not installed."
+ )
+ self.tag = tag
+
+ @classmethod
+ def escape(cls, ident):
+ """Escape a CSS selector.
+
+ This is a simple wrapper around soupselect.escape().
+ """
+ if soupsieve is None:
+ raise NotImplementedError(
+ "Cannot escape CSS selectors because the soupsieve package is not installed."
+ )
+ return soupsieve.escape(ident)
+
+ def _ns(self, ns):
+ """Normalize a dictionary of namespaces."""
+ if ns is None:
+ ns = self.tag._namespaces
+ return ns
+
+ def _rs(self, results):
+ """Normalize a list of results to a Resultset.
+
+ A ResultSet is more consistent with the rest of Beautiful
+ Soup, and ResultSet.__getattr__ has a helpful error message if
+ you try to treat a list of results as a single result (a
+ common mistake).
+ """
+ # Import here to avoid circular import
+ from bs4.element import ResultSet
+ return ResultSet(None, results)
+
+ def select_one(self, select, namespaces=None, flags=0, **kwargs):
+ """Perform a CSS selection operation on the current Tag and return the
+ first result.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.select_one()
+ method.
+
+ :param selector: A CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will use the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.select_one() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.select_one() method.
+
+ :return: A Tag, or None if the selector has no match.
+ :rtype: bs4.element.Tag
+
+ """
+ return soupsieve.select_one(
+ select, self.tag, self._ns(namespaces), flags, **kwargs
+ )
+
+ def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
+ """Perform a CSS selection operation on the current Tag.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.select()
+ method.
+
+ :param selector: A string containing a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param limit: After finding this number of results, stop looking.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.select() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.select() method.
+
+ :return: A ResultSet of Tag objects.
+ :rtype: bs4.element.ResultSet
+
+ """
+ if limit is None:
+ limit = 0
+
+ return self._rs(
+ soupsieve.select(
+ select, self.tag, self._ns(namespaces), limit, flags,
+ **kwargs
+ )
+ )
+
+ def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
+ """Perform a CSS selection operation on the current Tag.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.iselect()
+ method. It is the same as select(), but it returns a generator
+ instead of a list.
+
+ :param selector: A string containing a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param limit: After finding this number of results, stop looking.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.iselect() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.iselect() method.
+
+ :return: A generator
+ :rtype: types.GeneratorType
+ """
+ return soupsieve.iselect(
+ select, self.tag, self._ns(namespaces), limit, flags, **kwargs
+ )
+
+ def closest(self, select, namespaces=None, flags=0, **kwargs):
+ """Find the Tag closest to this one that matches the given selector.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.closest()
+ method.
+
+ :param selector: A string containing a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.closest() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.closest() method.
+
+ :return: A Tag, or None if there is no match.
+ :rtype: bs4.Tag
+
+ """
+ return soupsieve.closest(
+ select, self.tag, self._ns(namespaces), flags, **kwargs
+ )
+
+ def match(self, select, namespaces=None, flags=0, **kwargs):
+ """Check whether this Tag matches the given CSS selector.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.match()
+ method.
+
+ :param: a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.match() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.match() method.
+
+ :return: True if this Tag matches the selector; False otherwise.
+ :rtype: bool
+ """
+ return soupsieve.match(
+ select, self.tag, self._ns(namespaces), flags, **kwargs
+ )
+
+ def filter(self, select, namespaces=None, flags=0, **kwargs):
+ """Filter this Tag's direct children based on the given CSS selector.
+
+ This uses the Soup Sieve library. It works the same way as
+ passing this Tag into that library's soupsieve.filter()
+ method. More information, for more information see the
+ documentation for soupsieve.filter().
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.filter() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.filter() method.
+
+ :return: A ResultSet of Tag objects.
+ :rtype: bs4.element.ResultSet
+
+ """
+ return self._rs(
+ soupsieve.filter(
+ select, self.tag, self._ns(namespaces), flags, **kwargs
+ )
+ )