summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--bs4/formatter.py63
2 files changed, 58 insertions, 7 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 261b576..d24559c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,4 @@
-= Unreleased
+= 4.8.2 (20191224)
* Added Python docstrings to most public methods.
diff --git a/bs4/formatter.py b/bs4/formatter.py
index f2724db..c907ea8 100644
--- a/bs4/formatter.py
+++ b/bs4/formatter.py
@@ -5,6 +5,27 @@ class Formatter(EntitySubstitution):
Some parts of this strategy come from the distinction between
HTML4, HTML5, and XML. Others are configurable by the user.
+
+ Formatters are passed in as the `formatter` argument to methods
+ like `Element.encode`. Most people won't need to think about
+ formatters, and most people who need to think about them can pass
+ in one of these predefined strings as `formatter` rather than
+ making a new Formatter object:
+
+ For HTML documents:
+ * 'html' - HTML entity substitution for generic HTML documents. (default)
+ * 'html5' - HTML entity substitution for HTML5 documents.
+ * 'minimal' - Only make the substitutions necessary to guarantee
+ valid HTML.
+ * None - Do not perform any substitution. This will be faster
+ but may result in invalid markup.
+
+ For XML documents:
+ * 'html' - Entity substitution for XHTML documents.
+ * 'minimal' - Only make the substitutions necessary to guarantee
+ valid XML. (default)
+ * None - Do not perform any substitution. This will be faster
+ but may result in invalid markup.
"""
# Registries of XML and HTML formatters.
XML_FORMATTERS = {}
@@ -28,10 +49,21 @@ class Formatter(EntitySubstitution):
self, language=None, entity_substitution=None,
void_element_close_prefix='/', cdata_containing_tags=None,
):
- """
+ """Constructor.
+
+ :param language: This should be Formatter.XML if you are formatting
+ XML markup and Formatter.HTML if you are formatting HTML markup.
- :param void_element_close_prefix: By default, represent void
- elements as <tag/> rather than <tag>
+ :param entity_substitution: A function to call to replace special
+ characters with XML/HTML entities. For examples, see
+ bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
+ :param void_element_close_prefix: By default, void elements
+ are represented as <tag/> (XML rules) rather than <tag>
+ (HTML rules). To get <tag>, pass in the empty string.
+ :param cdata_containing_tags: The list of tags that are defined
+ as containing CDATA in this dialect. For example, in HTML,
+ <script> and <style> tags are defined as containing CDATA,
+ and their contents should not be formatted.
"""
self.language = language
self.entity_substitution = entity_substitution
@@ -41,7 +73,14 @@ class Formatter(EntitySubstitution):
)
def substitute(self, ns):
- """Process a string that needs to undergo entity substitution."""
+ """Process a string that needs to undergo entity substitution.
+ This may be a string encountered in an attribute value or as
+ text.
+
+ :param ns: A string.
+ :return: A string with certain characters replaced by named
+ or numeric entities.
+ """
if not self.entity_substitution:
return ns
from element import NavigableString
@@ -54,21 +93,33 @@ class Formatter(EntitySubstitution):
return self.entity_substitution(ns)
def attribute_value(self, value):
- """Process the value of an attribute."""
+ """Process the value of an attribute.
+
+ :param ns: A string.
+ :return: A string with certain characters replaced by named
+ or numeric entities.
+ """
return self.substitute(value)
def attributes(self, tag):
- """Reorder a tag's attributes however you want."""
+ """Reorder a tag's attributes however you want.
+
+ By default, attributes are sorted alphabetically. This makes
+ behavior consistent between Python 2 and Python 3, and preserves
+ backwards compatibility with older versions of Beautiful Soup.
+ """
return sorted(tag.attrs.items())
class HTMLFormatter(Formatter):
+ """A generic Formatter for HTML."""
REGISTRY = {}
def __init__(self, *args, **kwargs):
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
class XMLFormatter(Formatter):
+ """A generic Formatter for XML."""
REGISTRY = {}
def __init__(self, *args, **kwargs):
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)