Added docstrings to all methods in __init__.py

author: Leonard Richardson <leonardr@segfault.org> 2019-12-20 15:07:27 -0500
committer: Leonard Richardson <leonardr@segfault.org> 2019-12-20 15:07:27 -0500
commit: ee3f30b0e778747fb701856a33713369f778834f (patch)
tree: 1b7aeedc410a2bbd4262dfd984d70c640d5cec38
parent: f5f0e1ea521f813964b4f9cbea53fcdfc3942f56 (diff)
1 files changed, 120 insertions, 58 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 1ea7b97..0f3a0e3 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -52,18 +52,26 @@ from .element import (
 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
 
 class BeautifulSoup(Tag):
-    """
-    This class defines the basic interface called by the tree builders.
+    """A data structure representing a parsed HTML or XML document.
+
+    Most of the methods you'll call on a BeautifulSoup object are inherited from
+    PageElement or Tag.
+
+    Internally, this class defines the basic interface called by the
+    tree builders when converting an HTML/XML document into a data
+    structure. The interface abstracts away the differences between
+    parsers. To write a new tree builder, you'll need to understand
+    these methods as a whole.
 
-    These methods will be called by the parser:
-      reset()
-      feed(markup)
+    These methods will be called by the BeautifulSoup constructor:
+      * reset()
+      * feed(markup)
 
     The tree builder may call these methods from its feed() implementation:
-      handle_starttag(name, attrs) # See note about return value
-      handle_endtag(name)
-      handle_data(data) # Appends to the current data node
-      endData(containerClass) # Ends the current data node
+      * handle_starttag(name, attrs) # See note about return value
+      * handle_endtag(name)
+      * handle_data(data) # Appends to the current data node
+      * endData(containerClass) # Ends the current data node
 
     No matter how complicated the underlying parser is, you should be
     able to build a tree using 'start tag' events, 'end tag' events,
@@ -73,13 +81,19 @@ class BeautifulSoup(Tag):
     like HTML's <br> tag), call handle_starttag and then
     handle_endtag.
     """
+
+    # Since BeautifulSoup subclasses Tag, it's possible to treat it as
+    # a Tag with a .name. This name makes it clear the BeautifulSoup
+    # object isn't a real markup tag.
     ROOT_TAG_NAME = u'[document]'
 
     # If the end-user gives no indication which tree builder they
     # want, look for one with these features.
     DEFAULT_BUILDER_FEATURES = ['html', 'fast']
-   
-    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
+
+    # A bytestring containing all ASCII whitespace characters, used in
+    # endData() to detect data chunks that seem 'empty'.
+    ASCII_SPACES = b'\x20\x0a\x09\x0c\x0d'
 
     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
 
@@ -89,50 +103,49 @@ class BeautifulSoup(Tag):
         """Constructor.
 
         :param markup: A string or a file-like object representing
-        markup to be parsed.
+         markup to be parsed.
 
         :param features: Desirable features of the parser to be used. This
-        may be the name of a specific parser ("lxml", "lxml-xml",
-        "html.parser", or "html5lib") or it may be the type of markup
-        to be used ("html", "html5", "xml"). It's recommended that you
-        name a specific parser, so that Beautiful Soup gives you the
-        same results across platforms and virtual environments.
+         may be the name of a specific parser ("lxml", "lxml-xml",
+         "html.parser", or "html5lib") or it may be the type of markup
+         to be used ("html", "html5", "xml"). It's recommended that you
+         name a specific parser, so that Beautiful Soup gives you the
+         same results across platforms and virtual environments.
 
         :param builder: A TreeBuilder subclass to instantiate (or
-        instance to use) instead of looking one up based on
-        `features`. You only need to use this if you've implemented a
-        custom TreeBuilder.
+         instance to use) instead of looking one up based on
+         `features`. You only need to use this if you've implemented a
+         custom TreeBuilder.
 
         :param parse_only: A SoupStrainer. Only parts of the document
-        matching the SoupStrainer will be considered. This is useful
-        when parsing part of a document that would otherwise be too
-        large to fit into memory.
+         matching the SoupStrainer will be considered. This is useful
+         when parsing part of a document that would otherwise be too
+         large to fit into memory.
 
         :param from_encoding: A string indicating the encoding of the
-        document to be parsed. Pass this in if Beautiful Soup is
-        guessing wrongly about the document's encoding.
+         document to be parsed. Pass this in if Beautiful Soup is
+         guessing wrongly about the document's encoding.
 
         :param exclude_encodings: A list of strings indicating
-        encodings known to be wrong. Pass this in if you don't know
-        the document's encoding but you know Beautiful Soup's guess is
-        wrong.
+         encodings known to be wrong. Pass this in if you don't know
+         the document's encoding but you know Beautiful Soup's guess is
+         wrong.
 
         :param element_classes: A dictionary mapping BeautifulSoup
-        classes like Tag and NavigableString to other classes you'd
-        like to be instantiated instead as the parse tree is
-        built. This is useful for using subclasses to modify the
-        default behavior of Tag or NavigableString.
+         classes like Tag and NavigableString, to other classes you'd
+         like to be instantiated instead as the parse tree is
+         built. This is useful for subclassing Tag or NavigableString
+         to modify default behavior.
 
         :param kwargs: For backwards compatibility purposes, the
-        constructor accepts certain keyword arguments used in
-        Beautiful Soup 3. None of these arguments do anything in
-        Beautiful Soup 4; they will result in a warning and then be ignored.
-
-        Apart from this, any keyword arguments passed into the BeautifulSoup
-        constructor are propagated to the TreeBuilder constructor. This
-        makes it possible to configure a TreeBuilder beyond saying
-        which one to use.
-
+         constructor accepts certain keyword arguments used in
+         Beautiful Soup 3. None of these arguments do anything in
+         Beautiful Soup 4; they will result in a warning and then be ignored.
+         
+         Apart from this, any keyword arguments passed into the BeautifulSoup
+         constructor are propagated to the TreeBuilder constructor. This
+         makes it possible to configure a TreeBuilder by passing in arguments,
+         not just by saying which one to use.
         """
 
         if 'convertEntities' in kwargs:
@@ -329,6 +342,9 @@ class BeautifulSoup(Tag):
         self.builder.soup = None
 
     def __copy__(self):
+        """A copy of a BeautifulSoup object is created by converting the
+        document to a string and parsing it again.
+        """
         copy = type(self)(
             self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
         )
@@ -349,9 +365,10 @@ class BeautifulSoup(Tag):
 
     @staticmethod
     def _check_markup_is_url(markup):
-        """ 
-        Check if markup looks like it's actually a url and raise a warning 
-        if so. Markup can be unicode or str (py2) / bytes (py3).
+        """Error-handling method to raise a warning if incoming markup looks
+        like a URL.
+
+        :param markup: A string.
         """
         if isinstance(markup, bytes):
             space = b' '
@@ -376,6 +393,9 @@ class BeautifulSoup(Tag):
                 )
 
     def _feed(self):
+        """Internal method that parses previously set markup, creating a large
+        number of Tag and NavigableString objects.
+        """
         # Convert the document to Unicode.
         self.builder.reset()
 
@@ -386,6 +406,9 @@ class BeautifulSoup(Tag):
             self.popTag()
 
     def reset(self):
+        """Reset this object to a state as though it had never parsed any
+        markup.
+        """
         Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
         self.hidden = 1
         self.builder.reset()
@@ -397,7 +420,7 @@ class BeautifulSoup(Tag):
 
     def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
                 sourceline=None, sourcepos=None, **kwattrs):
-        """Create a new tag associated with this soup."""
+        """Create a new Tag associated with this BeautifulSoup object."""
         kwattrs.update(attrs)
         return self.element_classes.get(Tag, Tag)(
             None, self.builder, name, namespace, nsprefix, kwattrs,
@@ -405,19 +428,28 @@ class BeautifulSoup(Tag):
         )
 
     def new_string(self, s, subclass=None):
-        """Create a new NavigableString associated with this soup."""
+        """Create a new NavigableString associated with this BeautifulSoup
+        object.
+        """
         subclass = subclass or self.element_classes.get(
             NavigableString, NavigableString
         )
         return subclass(s)
 
     def insert_before(self, successor):
+        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
+        it because there is nothing before or after it in the parse tree.
+        """
         raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
 
     def insert_after(self, successor):
+        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
+        it because there is nothing before or after it in the parse tree.
+        """
         raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
 
     def popTag(self):
+        """Internal method called by _popToTag when a tag is closed."""
         tag = self.tagStack.pop()
         if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
             self.preserve_whitespace_tag_stack.pop()
@@ -427,6 +459,7 @@ class BeautifulSoup(Tag):
         return self.currentTag
 
     def pushTag(self, tag):
+        """Internal method called by handle_starttag when a tag is opened."""
         #print "Push", tag.name
         if self.currentTag is not None:
             self.currentTag.contents.append(tag)
@@ -436,7 +469,9 @@ class BeautifulSoup(Tag):
             self.preserve_whitespace_tag_stack.append(tag)
 
     def endData(self, containerClass=None):
-
+        """Method called by the TreeBuilder when the end of a data segment
+        occurs.
+        """
         # Default container is NavigableString.
         containerClass = containerClass or NavigableString
 
@@ -476,7 +511,7 @@ class BeautifulSoup(Tag):
             self.object_was_parsed(o)
 
     def object_was_parsed(self, o, parent=None, most_recent_element=None):
-        """Add an object to the parse tree."""
+        """Method called by the TreeBuilder to integrate an object into the parse tree."""
         if parent is None:
             parent = self.currentTag
         if most_recent_element is not None:
@@ -545,9 +580,14 @@ class BeautifulSoup(Tag):
 
     def _popToTag(self, name, nsprefix=None, inclusivePop=True):
         """Pops the tag stack up to and including the most recent
-        instance of the given tag. If inclusivePop is false, pops the tag
-        stack up to but *not* including the most recent instqance of
-        the given tag."""
+        instance of the given tag. 
+
+        :param name: Pop up to the most recent tag with this name.
+        :param nsprefix: The namespace prefix that goes with `name`.
+        :param inclusivePop: It this is false, pops the tag stack up
+          to but *not* including the most recent instqance of the
+          given tag.
+        """
         #print "Popping to %s" % name
         if name == self.ROOT_TAG_NAME:
             # The BeautifulSoup object itself can never be popped.
@@ -568,14 +608,21 @@ class BeautifulSoup(Tag):
 
     def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
                         sourcepos=None):
-        """Push a start tag on to the stack.
+        """Called by the tree builder when a new tag is encountered.
 
-        If this method returns None, the tag was rejected by the
+        :param name: Name of the tag.
+        :param nsprefix: Namespace prefix for the tag.
+        :param attrs: A dictionary of attribute values.
+        :param sourceline: The line number where this tag was found in its
+            source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was found.
+
+        If this method returns None, the tag was rejected by an active
         SoupStrainer. You should proceed as if the tag had not occurred
         in the document. For instance, if this was a self-closing tag,
         don't call handle_endtag.
         """
-
         # print "Start tag %s: %s" % (name, attrs)
         self.endData()
 
@@ -598,19 +645,30 @@ class BeautifulSoup(Tag):
         return tag
 
     def handle_endtag(self, name, nsprefix=None):
+        """Called by the tree builder when an ending tag is encountered.
+
+        :param name: Name of the tag.
+        :param nsprefix: Namespace prefix for the tag.
+        """
         #print "End tag: " + name
         self.endData()
         self._popToTag(name, nsprefix)
 
     def handle_data(self, data):
+        """Called by the tree builder when a chunk of textual data is encountered."""
         self.current_data.append(data)
 
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
                formatter="minimal"):
-        """Returns a string or Unicode representation of this document.
-        To get Unicode, pass None for encoding."""
+        """Returns a string or Unicode representation of the parse tree
+            as an HTML or XML document.
 
+        :param pretty_print: If this is True, indentation will be used to
+            make the document more readable.
+        :param eventual_encoding: The encoding of the final document.
+            If this is None, the document will be a Unicode string.
+        """
         if self.is_xml:
             # Print the XML declaration
             encoding_part = ''
@@ -626,7 +684,7 @@ class BeautifulSoup(Tag):
         return prefix + super(BeautifulSoup, self).decode(
             indent_level, eventual_encoding, formatter)
 
-# Alias to make it easier to type import: 'from bs4 import _soup'
+# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
 _soup = BeautifulSoup
 
@@ -642,13 +700,17 @@ class BeautifulStoneSoup(BeautifulSoup):
 
 
 class StopParsing(Exception):
+    """Exception raised by a TreeBuilder if it's unable to continue parsing."""
     pass
 
 class FeatureNotFound(ValueError):
+    """Exception raised by the BeautifulSoup constructor if no parser with the
+    requested features is found.
+    """
     pass
 
 
-#By default, act as an HTML pretty-printer.
+#If this file is run as a script, act as an HTML pretty-printer.
 if __name__ == '__main__':
     import sys
     soup = BeautifulSoup(sys.stdin)
author	Leonard Richardson <leonardr@segfault.org>	2019-12-20 15:07:27 -0500
committer	Leonard Richardson <leonardr@segfault.org>	2019-12-20 15:07:27 -0500
commit	ee3f30b0e778747fb701856a33713369f778834f (patch)
tree	1b7aeedc410a2bbd4262dfd984d70c640d5cec38
parent	f5f0e1ea521f813964b4f9cbea53fcdfc3942f56 (diff)