Added docstrings for some but not all tree buidlers.

author: Leonard Richardson <leonardr@segfault.org> 2019-12-24 10:41:57 -0500
committer: Leonard Richardson <leonardr@segfault.org> 2019-12-24 10:41:57 -0500
commit: bef726b23d0770860cd347b03009ffb027159572 (patch)
tree: 325b698568a6fcb63018753db4830a579254f6ca /bs4/builder/_lxml.py
parent: 5952879a2458fdeb74673d3ccd61fd312c7d66df (diff)
1 files changed, 37 insertions, 9 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index c83dd2d..1b44d75 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -62,10 +62,13 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     # But instead we build an XMLParser or HTMLParser object to serve
     # as the target of parse messages, and those messages don't include
     # line numbers.
+    # See: https://bugs.launchpad.net/lxml/+bug/1846906
     
     def initialize_soup(self, soup):
         """Let the BeautifulSoup object know about the standard namespace
         mapping.
+
+        :param soup: A `BeautifulSoup`.
         """
         super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
         self._register_namespaces(self.DEFAULT_NSMAPS)
@@ -75,6 +78,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         while parsing the document.
 
         This might be useful later on when creating CSS selectors.
+
+        :param mapping: A dictionary mapping namespace prefixes to URIs.
         """
         for key, value in mapping.items():
             if key and key not in self.soup._namespaces:
@@ -84,14 +89,23 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                 self.soup._namespaces[key] = value
 
     def default_parser(self, encoding):
-        # This can either return a parser object or a class, which
-        # will be instantiated with default arguments.
+        """Find the default parser for the given encoding.
+
+        :param encoding: A string.
+        :return: Either a parser object or a class, which
+          will be instantiated with default arguments.
+        """
         if self._default_parser is not None:
             return self._default_parser
         return etree.XMLParser(
             target=self, strip_cdata=False, recover=True, encoding=encoding)
 
     def parser_for(self, encoding):
+        """Instantiate an appropriate parser for the given encoding.
+
+        :param encoding: A string.
+        :return: A parser object such as an `etree.XMLParser`.
+        """
         # Use the default parser.
         parser = self.default_parser(encoding)
 
@@ -124,17 +138,31 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     def prepare_markup(self, markup, user_specified_encoding=None,
                        exclude_encodings=None,
                        document_declared_encoding=None):
-        """
-        :yield: A series of 4-tuples.
+        """Run any preliminary steps necessary to make incoming markup
+        acceptable to the parser.
+
+        lxml really wants to get a bytestring and convert it to
+        Unicode itself. So instead of using UnicodeDammit to convert
+        the bytestring to Unicode using different encodings, this
+        implementation uses EncodingDetector to iterate over the
+        encodings, and tell lxml to try to parse the document as each
+        one in turn.
+
+        :param markup: Some markup -- hopefully a bytestring.
+        :param user_specified_encoding: The user asked to try this encoding.
+        :param document_declared_encoding: The markup itself claims to be
+            in this encoding.
+        :param exclude_encodings: The user asked _not_ to try any of
+            these encodings.
+
+        :yield: A series of 4-tuples:
          (markup, encoding, declared encoding,
           has undergone character replacement)
 
-        Each 4-tuple represents a strategy for parsing the document.
+         Each 4-tuple represents a strategy for converting the
+         document to Unicode and parsing it. Each strategy will be tried 
+         in turn.
         """
-        # Instead of using UnicodeDammit to convert the bytestring to
-        # Unicode using different encodings, use EncodingDetector to
-        # iterate over the encodings, and tell lxml to try to parse
-        # the document as each one in turn.
         is_html = not self.is_xml
         if is_html:
             self.processing_instruction_class = ProcessingInstruction
author	Leonard Richardson <leonardr@segfault.org>	2019-12-24 10:41:57 -0500
committer	Leonard Richardson <leonardr@segfault.org>	2019-12-24 10:41:57 -0500
commit	bef726b23d0770860cd347b03009ffb027159572 (patch)
tree	325b698568a6fcb63018753db4830a579254f6ca /bs4/builder/_lxml.py
parent	5952879a2458fdeb74673d3ccd61fd312c7d66df (diff)