summaryrefslogtreecommitdiff
path: root/bs4/tests/test_formatter.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-02-14 15:34:04 -0500
committerLeonard Richardson <leonardr@segfault.org>2021-02-14 15:34:04 -0500
commit7201eecc09b51df5a0fb704670aa66bcc9d8e635 (patch)
tree30dd9d9df4d81eff431a53f5c47093934b06dfd1 /bs4/tests/test_formatter.py
parentc876fbf402f15d924b7c0d9a9be5ba80769444a3 (diff)
The 'html5' formatter now treats attributes whose values are the
empty string as HTML boolean attributes. Previously (and in other formatters), an attribute value must be set as None to be treated as a boolean attribute. In a future release, I plan to also give this behavior to the 'html' formatter. Patch by Isaac Muse. [bug=1915424]
Diffstat (limited to 'bs4/tests/test_formatter.py')
-rw-r--r--bs4/tests/test_formatter.py81
1 files changed, 81 insertions, 0 deletions
diff --git a/bs4/tests/test_formatter.py b/bs4/tests/test_formatter.py
new file mode 100644
index 0000000..718989b
--- /dev/null
+++ b/bs4/tests/test_formatter.py
@@ -0,0 +1,81 @@
+from bs4.element import Tag
+from bs4.testing import SoupTest
+from bs4.formatter import (
+ Formatter,
+ HTMLFormatter,
+ XMLFormatter,
+)
+
+class TestFormatter(SoupTest):
+
+ def test_default_attributes(self):
+ # Test the default behavior of Formatter.attributes().
+ formatter = Formatter()
+ tag = Tag(name="tag")
+ tag['b'] = 1
+ tag['a'] = 2
+
+ # Attributes come out sorted by name. In Python 3, attributes
+ # normally come out of a dictionary in the order they were
+ # added.
+ self.assertEquals([('a', 2), ('b', 1)], formatter.attributes(tag))
+
+ # This works even if Tag.attrs is None, though this shouldn't
+ # normally happen.
+ tag.attrs = None
+ self.assertEquals([], formatter.attributes(tag))
+
+ def test_sort_attributes(self):
+ # Test the ability to override Formatter.attributes() to,
+ # e.g., disable the normal sorting of attributes.
+ class UnsortedFormatter(Formatter):
+ def attributes(self, tag):
+ self.called_with = tag
+ for k, v in sorted(tag.attrs.items()):
+ if k == 'ignore':
+ continue
+ yield k,v
+
+ soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
+ formatter = UnsortedFormatter()
+ decoded = soup.decode(formatter=formatter)
+
+ # attributes() was called on the <p> tag. It filtered out one
+ # attribute and sorted the other two.
+ self.assertEquals(formatter.called_with, soup.p)
+ self.assertEquals(u'<p aval="2" cval="1"></p>', decoded)
+
+ def test_empty_attributes_are_booleans(self):
+ # Test the behavior of empty_attributes_are_booleans as well
+ # as which Formatters have it enabled.
+
+ for name in ('html', 'minimal', None):
+ formatter = HTMLFormatter.REGISTRY[name]
+ self.assertEquals(False, formatter.empty_attributes_are_booleans)
+
+ formatter = XMLFormatter.REGISTRY[None]
+ self.assertEquals(False, formatter.empty_attributes_are_booleans)
+
+ formatter = HTMLFormatter.REGISTRY['html5']
+ self.assertEquals(True, formatter.empty_attributes_are_booleans)
+
+ # Verify that the constructor sets the value.
+ formatter = Formatter(empty_attributes_are_booleans=True)
+ self.assertEquals(True, formatter.empty_attributes_are_booleans)
+
+ # Now demonstrate what it does to markup.
+ for markup in (
+ "<option selected></option>",
+ '<option selected=""></option>'
+ ):
+ soup = self.soup(markup)
+ for formatter in ('html', 'minimal', 'xml', None):
+ self.assertEquals(
+ b'<option selected=""></option>',
+ soup.option.encode(formatter='html')
+ )
+ self.assertEquals(
+ b'<option selected></option>',
+ soup.option.encode(formatter='html5')
+ )
+