summaryrefslogtreecommitdiff
path: root/bs4/formatter.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-02-14 15:34:04 -0500
committerLeonard Richardson <leonardr@segfault.org>2021-02-14 15:34:04 -0500
commit7201eecc09b51df5a0fb704670aa66bcc9d8e635 (patch)
tree30dd9d9df4d81eff431a53f5c47093934b06dfd1 /bs4/formatter.py
parentc876fbf402f15d924b7c0d9a9be5ba80769444a3 (diff)
The 'html5' formatter now treats attributes whose values are the
empty string as HTML boolean attributes. Previously (and in other formatters), an attribute value must be set as None to be treated as a boolean attribute. In a future release, I plan to also give this behavior to the 'html' formatter. Patch by Isaac Muse. [bug=1915424]
Diffstat (limited to 'bs4/formatter.py')
-rw-r--r--bs4/formatter.py23
1 files changed, 18 insertions, 5 deletions
diff --git a/bs4/formatter.py b/bs4/formatter.py
index 9a692ec..82d4689 100644
--- a/bs4/formatter.py
+++ b/bs4/formatter.py
@@ -14,7 +14,8 @@ class Formatter(EntitySubstitution):
For HTML documents:
* 'html' - HTML entity substitution for generic HTML documents. (default)
- * 'html5' - HTML entity substitution for HTML5 documents.
+ * 'html5' - HTML entity substitution for HTML5 documents, as
+ well as some optimizations in the way tags are rendered.
* 'minimal' - Only make the substitutions necessary to guarantee
valid HTML.
* None - Do not perform any substitution. This will be faster
@@ -48,6 +49,7 @@ class Formatter(EntitySubstitution):
def __init__(
self, language=None, entity_substitution=None,
void_element_close_prefix='/', cdata_containing_tags=None,
+ empty_attributes_are_booleans=False,
):
"""Constructor.
@@ -64,6 +66,9 @@ class Formatter(EntitySubstitution):
as containing CDATA in this dialect. For example, in HTML,
<script> and <style> tags are defined as containing CDATA,
and their contents should not be formatted.
+ :param blank_attributes_are_booleans: Render attributes whose value
+ is the empty string as HTML-style boolean attributes.
+ (Attributes whose value is None are always rendered this way.)
"""
self.language = language
self.entity_substitution = entity_substitution
@@ -71,7 +76,8 @@ class Formatter(EntitySubstitution):
self.cdata_containing_tags = self._default(
language, cdata_containing_tags, 'cdata_containing_tags'
)
-
+ self.empty_attributes_are_booleans=empty_attributes_are_booleans
+
def substitute(self, ns):
"""Process a string that needs to undergo entity substitution.
This may be a string encountered in an attribute value or as
@@ -107,11 +113,17 @@ class Formatter(EntitySubstitution):
By default, attributes are sorted alphabetically. This makes
behavior consistent between Python 2 and Python 3, and preserves
backwards compatibility with older versions of Beautiful Soup.
+
+ If `empty_boolean_attributes` is True, then attributes whose
+ values are set to the empty string will be treated as boolean
+ attributes.
"""
if tag.attrs is None:
return []
- return sorted(tag.attrs.items())
-
+ return sorted(
+ (k, (None if self.empty_attributes_are_booleans and v == '' else v))
+ for k, v in tag.attrs.items()
+ )
class HTMLFormatter(Formatter):
"""A generic Formatter for HTML."""
@@ -133,7 +145,8 @@ HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
)
HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_html,
- void_element_close_prefix = None
+ void_element_close_prefix=None,
+ empty_attributes_are_booleans=True,
)
HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_xml