Removed Soup Sieve fallback method, added documentation.

author: Leonard Richardson <leonardr@segfault.org> 2023-02-07 10:37:50 -0500
committer: Leonard Richardson <leonardr@segfault.org> 2023-02-07 10:37:50 -0500
commit: 7eedde44d45f99340bcf98384dfb11295ffcebdd (patch)
tree: a1398bc82bb843b90631aa2941787a37e0e5ab98
parent: 6d70cafddd4a265feec5a30cc5b302fd6fbaeb83 (diff)
4 files changed, 159 insertions, 84 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 086be15..6d83cf7 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,6 +3,30 @@ Note: Beautiful Soup's official support for Python 2 ended on January 1st,
 4.9.3. In the Launchpad Git repository, the final revision to support
 Python 2 was revision 70f546b1e689a70e2f103795efce6d261a3dadf7.
 
+= 4.12.0 (Unreleased)
+
+* Introduced the .css property, which centralizes all access to
+  the Soup Sieve API. This allows Beautiful Soup to give direct
+  access to as much of Soup Sieve that makes sense, without cluttering
+  the BeautifulSoup and Tag classes with a lot of new methods.
+
+  This does mean one addition to the BeautifulSoup and Tag classes
+  (the .css property itself), so this might be a breaking change if you
+  happen to use Beautiful Soup to parse XML that includes a tag called
+  <css>. In particular, code like this will not work in 4.12.0:
+
+    soup.css['id']
+
+  Code like this will work just as before:
+
+    soup.find_one('css')['id']
+
+  The Soup Sieve methods supported through the .css property are
+  select(), select_one(), iselect(), closest(), match(), filter(),
+  and escape(). The BeautifulSoup and Tag classes still support the
+  select() and select_one() methods; they have not been deprecated,
+  but they have been demoted to convenience methods.
+
 = 4.11.2 (20230131)
 
 * Fixed test failures caused by nondeterministic behavior of
diff --git a/bs4/css.py b/bs4/css.py
index 8b76139..b237051 100644
--- a/bs4/css.py
+++ b/bs4/css.py
@@ -33,6 +33,8 @@ class CSS(object):
         :param tag: All CSS selectors will use this as their starting
         point.
 
+        :param api: A plug-in replacement for the soupsieve module,
+        designed mainly for use in tests.
         """
         if api is None:
             raise NotImplementedError(
@@ -63,9 +65,9 @@ class CSS(object):
         """Normalize a list of results to a Resultset.
 
         A ResultSet is more consistent with the rest of Beautiful
-        Soup, and ResultSet.__getattr__ has a helpful error message if
-        you try to treat a list of results as a single result (a
-        common mistake).
+        Soup's API, and ResultSet.__getattr__ has a helpful error
+        message if you try to treat a list of results as a single
+        result (a common mistake).
         """
         # Import here to avoid circular import
         from bs4.element import ResultSet
@@ -249,19 +251,3 @@ class CSS(object):
                 select, self.tag, self._ns(namespaces), flags, **kwargs
             )
         )
-
-    def __getattr__(self, __name):
-        """Catch-all method that has a chance of giving access to future
-        methods to be added to Soup Sieve without needing a Beautiful Soup
-        API change.
-
-        Basically, if you call tag.css.somemethod(selector), this code will
-        turn that into soupsieve.somemethod(selector, tag).
-        """
-        attr = getattr(self.api, __name)
-        if callable(attr):
-            return (
-                lambda pattern, *args, __tag=self.tag, __attr=attr, **kwargs:
-                attr(pattern, __tag, *args, **kwargs)
-            )
-        return attr
diff --git a/bs4/tests/test_css.py b/bs4/tests/test_css.py
index a6c17de..cf73831 100644
--- a/bs4/tests/test_css.py
+++ b/bs4/tests/test_css.py
@@ -474,25 +474,20 @@ class TestCSSSelectors(SoupTest):
         assert m(".foo#bar") == '\\.foo\\#bar'
         assert m("()[]{}") == '\\(\\)\\[\\]\\{\\}'
         assert m(".foo") == self.soup.css.escape(".foo")
-
-    def test_fallback(self):
+        
+    def test_api_replacement(self):
+        # You can pass in another object to act as a drop-in
+        # replacement for the soupsieve module.
         class Mock():
             attribute = "value"
             pass
         mock_soupsieve = Mock()
-        mock_soupsieve.some_other_method = MagicMock()
+        mock_soupsieve.escape = MagicMock()
 
         # If an unknown method turns out to be present in Soup Sieve,
         # we may still be able to call it.
         css = CSS(self.soup, api=mock_soupsieve)
-        css.some_other_method("selector", 1, flags=0)
-        mock_soupsieve.some_other_method.assert_called_with(
+        css.escape("identifier")
+        mock_soupsieve.escape.assert_called_with(
             "selector", self.soup, 1, flags=0
         )
-
-        # If the attribute is not callable, getattr is a passthrough.
-        assert mock_soupsieve.attribute == "value"
-
-        # If the method just isn't there, too bad.
-        with pytest.raises(AttributeError):
-            mock_soupsieve.no_such_method()
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 007e75f..5152929 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -36,7 +36,7 @@ Beautiful Soup users:
 * `이 문서는 한국어 번역도 가능합니다. <https://www.crummy.com/software/BeautifulSoup/bs4/doc.ko/>`_
 * `Este documento também está disponível em Português do Brasil. <https://www.crummy.com/software/BeautifulSoup/bs4/doc.ptbr>`_
 * `Эта документация доступна на русском языке. <https://www.crummy.com/software/BeautifulSoup/bs4/doc.ru/>`_
-
+ 
 Getting help
 ------------
 
@@ -47,6 +47,9 @@ your problem involves parsing an HTML document, be sure to mention
 :ref:`what the diagnose() function says <diagnose>` about
 that document.
 
+When reporting an error in this documentation, please mention which
+translation you're reading.
+
 Quick Start
 ===========
 
@@ -1670,126 +1673,188 @@ that show up earlier in the document than the one we started with. A
 <p> tag that contains an <a> tag must have shown up before the <a>
 tag it contains.
 
-CSS selectors
--------------
-
-``BeautifulSoup`` has a ``.select()`` method which uses the `SoupSieve
-<https://facelessuser.github.io/soupsieve/>`_ package to run a CSS
-selector against a parsed document and return all the matching
-elements. ``Tag`` has a similar method which runs a CSS selector
-against the contents of a single tag.
+The ``.css`` property and CSS selectors
+---------------------------------------
 
-(The SoupSieve integration was added in Beautiful Soup 4.7.0. Earlier
-versions also have the ``.select()`` method, but only the most
-commonly-used CSS selectors are supported. If you installed Beautiful
-Soup through ``pip``, SoupSieve was installed at the same time, so you
-don't have to do anything extra.)
+``BeautifulSoup`` and ``Tag`` objects support CSS selectors through
+their ``.css`` property. The actual selector implementation is handled
+by the `Soup Sieve <https://facelessuser.github.io/soupsieve/>`_
+package, available on PyPI as ``soupsieve``. If you installed
+Beautiful Soup through ``pip``, Soup Sieve was installed at the same
+time, so you don't have to do anything extra.
 
-The SoupSieve `documentation
+`The Soup Sieve documentation
 <https://facelessuser.github.io/soupsieve/>`_ lists all the currently
-supported CSS selectors, but here are some of the basics:
-
-You can find tags::
+supported CSS selectors, but here are some of the basics. You can find
+tags::
 
- soup.select("title")
+ soup.css.select("title")
  # [<title>The Dormouse's story</title>]
 
- soup.select("p:nth-of-type(3)")
+ soup.css.select("p:nth-of-type(3)")
  # [<p class="story">...</p>]
 
 Find tags beneath other tags::
 
- soup.select("body a")
+ soup.css.select("body a")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie"  id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
- soup.select("html head title")
+ soup.css.select("html head title")
  # [<title>The Dormouse's story</title>]
 
 Find tags `directly` beneath other tags::
 
- soup.select("head > title")
+ soup.css.select("head > title")
  # [<title>The Dormouse's story</title>]
 
- soup.select("p > a")
+ soup.css.select("p > a")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie"  id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
- soup.select("p > a:nth-of-type(2)")
+ soup.css.select("p > a:nth-of-type(2)")
  # [<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]
 
- soup.select("p > #link1")
+ soup.css.select("p > #link1")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]
 
- soup.select("body > a")
+ soup.css.select("body > a")
  # []
 
 Find the siblings of tags::
 
- soup.select("#link1 ~ .sister")
+ soup.css.select("#link1 ~ .sister")
  # [<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie"  id="link3">Tillie</a>]
 
- soup.select("#link1 + .sister")
+ soup.css.select("#link1 + .sister")
  # [<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]
 
 Find tags by CSS class::
 
- soup.select(".sister")
+ soup.css.select(".sister")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
- soup.select("[class~=sister]")
+ soup.css.select("[class~=sister]")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
 Find tags by ID::
 
- soup.select("#link1")
+ soup.css.select("#link1")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]
 
- soup.select("a#link2")
+ soup.css.select("a#link2")
  # [<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]
 
 Find tags that match any selector from a list of selectors::
 
- soup.select("#link1,#link2")
+ soup.css.select("#link1,#link2")
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]
 
 Test for the existence of an attribute::
 
- soup.select('a[href]')
+ soup.css.select('a[href]')
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
 Find tags by attribute value::
 
- soup.select('a[href="http://example.com/elsie"]')
+ soup.css.select('a[href="http://example.com/elsie"]')
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]
 
- soup.select('a[href^="http://example.com/"]')
+ soup.css.select('a[href^="http://example.com/"]')
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
  #  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
  #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
- soup.select('a[href$="tillie"]')
+ soup.css.select('a[href$="tillie"]')
  # [<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
 
- soup.select('a[href*=".com/el"]')
+ soup.css.select('a[href*=".com/el"]')
  # [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>]
 
 There's also a method called ``select_one()``, which finds only the
 first tag that matches a selector::
 
+ soup.css.select_one(".sister")
+ # <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
+
+As a convenience, you can call ``select()`` and ``select_one()`` can
+directly on the ``BeautifulSoup`` or ``Tag`` object::
+
+ soup.select('a[href$="tillie"]')
+ # [<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
+
  soup.select_one(".sister")
  # <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
 
+CSS selector support is a convenience for people who already know the
+CSS selector syntax. You can do all of this with the Beautiful Soup
+API. If CSS selectors are all you need, you should skip Beautiful Soup
+altogether and parse the document with ``lxml``: it's a lot
+faster. But Soup Sieve lets you `combine` CSS selectors with the
+Beautiful Soup API.
+
+Advanced Soup Sieve features
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Soup Sieve offers a substantial API beyond the ``select()`` and
+``select_one()`` methods, and you can access most of that API through
+the ``.css`` attribute of ``Tag`` or ``BeautifulSoup``. What follows
+is just a list of the supported methods; see `the Soup Sieve
+documentation <https://facelessuser.github.io/soupsieve/>`_ for full
+documentation.
+
+The ``iselect()`` method works the same as ``select()``, but it
+returns a generator instead of a list.
+
+ [tag['id'] for tag in soup.css.iselect(".sister")]
+ # ['link1', 'link2', 'link3']
+
+The ``closest()`` method returns the nearest parent of a given ``Tag``
+that matches a CSS selector, similar to Beautiful Soup's
+``find_parent()`` method::
+
+ elsie = soup.css.select_one(".sister")
+ elsie.css.closest("p.story")
+ # <p class="story">Once upon a time there were three little sisters; and their names were
+ #  <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
+ #  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
+ #  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
+ #  and they lived at the bottom of a well.</p>
+
+The ``match()`` method returns a boolean depending on whether or not a
+specific ``Tag`` matches a selector::
+ 
+ # elsie.css.match("#link1")
+ True
+
+ # elsie.css.match("#link2")
+ False
+
+The ``filter()`` method returns the subset of a tag's direct children
+that match a selector::
+ 
+ [tag.string for tag in soup.find('p', 'story').css.filter('a')]
+ # ['Elsie', 'Lacie', 'Tillie']
+
+The ``escape()`` method escapes CSS identifiers that would otherwise
+be invalid::
+ 
+ soup.css.escape("1-strange-identifier")
+ # '\\31 -strange-identifier'
+
+Namespaces in CSS selectors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 If you've parsed XML that defines namespaces, you can use them in CSS
 selectors.::
 
@@ -1798,28 +1863,33 @@ selectors.::
   <ns1:child>I'm in namespace 1</ns1:child>
   <ns2:child>I'm in namespace 2</ns2:child>
  </tag> """
- soup = BeautifulSoup(xml, "xml")
+ namespace_soup = BeautifulSoup(xml, "xml")
 
- soup.select("child")
+ namespace_soup.css.select("child")
  # [<ns1:child>I'm in namespace 1</ns1:child>, <ns2:child>I'm in namespace 2</ns2:child>]
 
- soup.select("ns1|child")
+ namespace_soup.css.select("ns1|child")
  # [<ns1:child>I'm in namespace 1</ns1:child>]
- 
-When handling a CSS selector that uses namespaces, Beautiful Soup
-always tries to use namespace prefixes that make sense based on what
-it saw while parsing the document. You can always provide your own
-dictionary of abbreviations::
+
+Beautiful Soup tries to use namespace prefixes that make sense based
+on what it saw while parsing the document, but you can always provide
+your own dictionary of abbreviations::
 
  namespaces = dict(first="http://namespace1/", second="http://namespace2/")
- soup.select("second|child", namespaces=namespaces)
+ namespace_soup.css.select("second|child", namespaces=namespaces)
  # [<ns1:child>I'm in namespace 2</ns1:child>]
+
+History of CSS selector support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The `.css` property was added in Beautiful Soup 4.12.0. Prior to this,
+only the ``.select()`` and ``.select_one()`` convenience methods were
+supported.
+
+The Soup Sieve integration was added in Beautiful Soup 4.7.0. Earlier
+versions had the ``.select()`` method, but only the most commonly-used
+CSS selectors were supported.
  
-All this CSS selector stuff is a convenience for people who already
-know the CSS selector syntax. You can do all of this with the
-Beautiful Soup API. And if CSS selectors are all you need, you should
-parse the document with lxml: it's a lot faster. But this lets you
-`combine` CSS selectors with the Beautiful Soup API.
 
 Modifying the tree
 ==================
author	Leonard Richardson <leonardr@segfault.org>	2023-02-07 10:37:50 -0500
committer	Leonard Richardson <leonardr@segfault.org>	2023-02-07 10:37:50 -0500
commit	7eedde44d45f99340bcf98384dfb11295ffcebdd (patch)
tree	a1398bc82bb843b90631aa2941787a37e0e5ab98
parent	6d70cafddd4a265feec5a30cc5b302fd6fbaeb83 (diff)