5 files changed, 117 insertions, 46 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 9cddc55..dd62294 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -18,7 +18,13 @@
   may now return a different result than calling get_text() on the tag
   itself. That's because different tags now have different
   understandings of what counts as 'text'. [bug=1906226] [bug=1868861]
-	
+
+* The 'html5' formatter now treats attributes whose values are the
+  empty string as HTML boolean attributes. Previously (and in other
+  formatters), an attribute value must be set as None to be treated as
+  a boolean attribute. In a future release, I plan to also give this
+  behavior to the 'html' formatter. Patch by Isaac Muse. [bug=1915424]
+
 * Corrected output when the namespace prefix associated with a
   namespaced attribute is the empty string, as opposed to
   None. [bug=1915583]
diff --git a/bs4/formatter.py b/bs4/formatter.py
index 9a692ec..82d4689 100644
--- a/bs4/formatter.py
+++ b/bs4/formatter.py
@@ -14,7 +14,8 @@ class Formatter(EntitySubstitution):
 
     For HTML documents:
      * 'html' - HTML entity substitution for generic HTML documents. (default)
-     * 'html5' - HTML entity substitution for HTML5 documents.
+     * 'html5' - HTML entity substitution for HTML5 documents, as
+                 well as some optimizations in the way tags are rendered.
      * 'minimal' - Only make the substitutions necessary to guarantee
                    valid HTML.
      * None - Do not perform any substitution. This will be faster
@@ -48,6 +49,7 @@ class Formatter(EntitySubstitution):
     def __init__(
             self, language=None, entity_substitution=None,
             void_element_close_prefix='/', cdata_containing_tags=None,
+            empty_attributes_are_booleans=False,
     ):
         """Constructor.
 
@@ -64,6 +66,9 @@ class Formatter(EntitySubstitution):
            as containing CDATA in this dialect. For example, in HTML,
            <script> and <style> tags are defined as containing CDATA,
            and their contents should not be formatted.
+        :param blank_attributes_are_booleans: Render attributes whose value
+            is the empty string as HTML-style boolean attributes.
+            (Attributes whose value is None are always rendered this way.)
         """
         self.language = language
         self.entity_substitution = entity_substitution
@@ -71,7 +76,8 @@ class Formatter(EntitySubstitution):
         self.cdata_containing_tags = self._default(
             language, cdata_containing_tags, 'cdata_containing_tags'
         )
-            
+        self.empty_attributes_are_booleans=empty_attributes_are_booleans
+        
     def substitute(self, ns):
         """Process a string that needs to undergo entity substitution.
         This may be a string encountered in an attribute value or as
@@ -107,11 +113,17 @@ class Formatter(EntitySubstitution):
         By default, attributes are sorted alphabetically. This makes
         behavior consistent between Python 2 and Python 3, and preserves
         backwards compatibility with older versions of Beautiful Soup.
+
+        If `empty_boolean_attributes` is True, then attributes whose
+        values are set to the empty string will be treated as boolean
+        attributes.
         """
         if tag.attrs is None:
             return []
-        return sorted(tag.attrs.items())
-
+        return sorted(
+            (k, (None if self.empty_attributes_are_booleans and v == '' else v))
+            for k, v in tag.attrs.items()
+        )
    
 class HTMLFormatter(Formatter):
     """A generic Formatter for HTML."""
@@ -133,7 +145,8 @@ HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
 )
 HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
     entity_substitution=EntitySubstitution.substitute_html,
-    void_element_close_prefix = None
+    void_element_close_prefix=None,
+    empty_attributes_are_booleans=True,
 )
 HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
     entity_substitution=EntitySubstitution.substitute_xml
diff --git a/bs4/tests/test_formatter.py b/bs4/tests/test_formatter.py
new file mode 100644
index 0000000..718989b
--- /dev/null
+++ b/bs4/tests/test_formatter.py
@@ -0,0 +1,81 @@
+from bs4.element import Tag
+from bs4.testing import SoupTest
+from bs4.formatter import (
+    Formatter,
+    HTMLFormatter,
+    XMLFormatter,
+)
+
+class TestFormatter(SoupTest):
+
+    def test_default_attributes(self):
+        # Test the default behavior of Formatter.attributes().
+        formatter = Formatter()
+        tag = Tag(name="tag")
+        tag['b'] = 1
+        tag['a'] = 2
+
+        # Attributes come out sorted by name. In Python 3, attributes
+        # normally come out of a dictionary in the order they were
+        # added.
+        self.assertEquals([('a', 2), ('b', 1)], formatter.attributes(tag))
+
+        # This works even if Tag.attrs is None, though this shouldn't
+        # normally happen.
+        tag.attrs = None
+        self.assertEquals([], formatter.attributes(tag))
+        
+    def test_sort_attributes(self):
+        # Test the ability to override Formatter.attributes() to,
+        # e.g., disable the normal sorting of attributes.
+        class UnsortedFormatter(Formatter):
+            def attributes(self, tag):
+                self.called_with = tag
+                for k, v in sorted(tag.attrs.items()):
+                    if k == 'ignore':
+                        continue
+                    yield k,v
+
+        soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
+        formatter = UnsortedFormatter()
+        decoded = soup.decode(formatter=formatter)
+
+        # attributes() was called on the <p> tag. It filtered out one
+        # attribute and sorted the other two.
+        self.assertEquals(formatter.called_with, soup.p)
+        self.assertEquals(u'<p aval="2" cval="1"></p>', decoded)
+
+    def test_empty_attributes_are_booleans(self):
+        # Test the behavior of empty_attributes_are_booleans as well
+        # as which Formatters have it enabled.
+        
+        for name in ('html', 'minimal', None):
+            formatter = HTMLFormatter.REGISTRY[name]
+            self.assertEquals(False, formatter.empty_attributes_are_booleans)
+
+        formatter = XMLFormatter.REGISTRY[None]
+        self.assertEquals(False, formatter.empty_attributes_are_booleans)
+
+        formatter = HTMLFormatter.REGISTRY['html5']
+        self.assertEquals(True, formatter.empty_attributes_are_booleans)
+
+        # Verify that the constructor sets the value.
+        formatter = Formatter(empty_attributes_are_booleans=True)
+        self.assertEquals(True, formatter.empty_attributes_are_booleans)
+
+        # Now demonstrate what it does to markup.
+        for markup in (
+                "<option selected></option>",
+                '<option selected=""></option>'
+        ):
+            soup = self.soup(markup)
+            for formatter in ('html', 'minimal', 'xml', None):
+                self.assertEquals(
+                    b'<option selected=""></option>',
+                    soup.option.encode(formatter='html')
+                )
+                self.assertEquals(
+                    b'<option selected></option>',
+                    soup.option.encode(formatter='html5')
+                )
+
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 9267a8f..d1ca5ea 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1846,45 +1846,6 @@ class TestEncoding(SoupTest):
         else:
             self.assertEqual(b'<b>\\u2603</b>', repr(soup))
 
-class TestFormatter(SoupTest):
-
-    def test_default_attributes(self):
-        # Test the default behavior of Formatter.attributes().
-        formatter = Formatter()
-        tag = Tag(name="tag")
-        tag['b'] = 1
-        tag['a'] = 2
-
-        # Attributes come out sorted by name. In Python 3, attributes
-        # normally come out of a dictionary in the order they were
-        # added.
-        self.assertEquals([('a', 2), ('b', 1)], formatter.attributes(tag))
-
-        # This works even if Tag.attrs is None, though this shouldn't
-        # normally happen.
-        tag.attrs = None
-        self.assertEquals([], formatter.attributes(tag))
-        
-    def test_sort_attributes(self):
-        # Test the ability to override Formatter.attributes() to,
-        # e.g., disable the normal sorting of attributes.
-        class UnsortedFormatter(Formatter):
-            def attributes(self, tag):
-                self.called_with = tag
-                for k, v in sorted(tag.attrs.items()):
-                    if k == 'ignore':
-                        continue
-                    yield k,v
-
-        soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
-        formatter = UnsortedFormatter()
-        decoded = soup.decode(formatter=formatter)
-
-        # attributes() was called on the <p> tag. It filtered out one
-        # attribute and sorted the other two.
-        self.assertEquals(formatter.called_with, soup.p)
-        self.assertEquals(u'<p aval="2" cval="1"></p>', decoded)
-
 
 class TestNavigableStringSubclasses(SoupTest):
 
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 8a1a2d5..2b5843d 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -2299,7 +2299,7 @@ Unicode characters to HTML entities whenever possible::
  #  Il a dit &lt;&lt;Sacr&eacute; bleu!&gt;&gt;
  # </p>
 
-If you pass in ``formatter="html5"``, it's the same as
+If you pass in ``formatter="html5"``, it's similar to
 ``formatter="html"``, but Beautiful Soup will
 omit the closing slash in HTML void tags like "br"::
 
@@ -2310,7 +2310,17 @@ omit the closing slash in HTML void tags like "br"::
  
  print(br.encode(formatter="html5"))
  # b'<br>'
+
+In addition, any attributes whose values are the empty string
+will become HTML-style boolean attributes:
+
+ option = BeautifulSoup('<option selected=""></option>').option
+ print(option.encode(formatter="html"))
+ # b'<option selected=""></option>'
  
+ print(option.encode(formatter="html5"))
+ # b'<option selected></option>'
+
 If you pass in ``formatter=None``, Beautiful Soup will not modify
 strings at all on output. This is the fastest option, but it may lead
 to Beautiful Soup generating invalid HTML/XML, as in these examples::