From 7201eecc09b51df5a0fb704670aa66bcc9d8e635 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 14 Feb 2021 15:34:04 -0500 Subject: The 'html5' formatter now treats attributes whose values are the empty string as HTML boolean attributes. Previously (and in other formatters), an attribute value must be set as None to be treated as a boolean attribute. In a future release, I plan to also give this behavior to the 'html' formatter. Patch by Isaac Muse. [bug=1915424] --- bs4/tests/test_formatter.py | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 bs4/tests/test_formatter.py (limited to 'bs4/tests/test_formatter.py') diff --git a/bs4/tests/test_formatter.py b/bs4/tests/test_formatter.py new file mode 100644 index 0000000..718989b --- /dev/null +++ b/bs4/tests/test_formatter.py @@ -0,0 +1,81 @@ +from bs4.element import Tag +from bs4.testing import SoupTest +from bs4.formatter import ( + Formatter, + HTMLFormatter, + XMLFormatter, +) + +class TestFormatter(SoupTest): + + def test_default_attributes(self): + # Test the default behavior of Formatter.attributes(). + formatter = Formatter() + tag = Tag(name="tag") + tag['b'] = 1 + tag['a'] = 2 + + # Attributes come out sorted by name. In Python 3, attributes + # normally come out of a dictionary in the order they were + # added. + self.assertEquals([('a', 2), ('b', 1)], formatter.attributes(tag)) + + # This works even if Tag.attrs is None, though this shouldn't + # normally happen. + tag.attrs = None + self.assertEquals([], formatter.attributes(tag)) + + def test_sort_attributes(self): + # Test the ability to override Formatter.attributes() to, + # e.g., disable the normal sorting of attributes. + class UnsortedFormatter(Formatter): + def attributes(self, tag): + self.called_with = tag + for k, v in sorted(tag.attrs.items()): + if k == 'ignore': + continue + yield k,v + + soup = self.soup('

') + formatter = UnsortedFormatter() + decoded = soup.decode(formatter=formatter) + + # attributes() was called on the

tag. It filtered out one + # attribute and sorted the other two. + self.assertEquals(formatter.called_with, soup.p) + self.assertEquals(u'

', decoded) + + def test_empty_attributes_are_booleans(self): + # Test the behavior of empty_attributes_are_booleans as well + # as which Formatters have it enabled. + + for name in ('html', 'minimal', None): + formatter = HTMLFormatter.REGISTRY[name] + self.assertEquals(False, formatter.empty_attributes_are_booleans) + + formatter = XMLFormatter.REGISTRY[None] + self.assertEquals(False, formatter.empty_attributes_are_booleans) + + formatter = HTMLFormatter.REGISTRY['html5'] + self.assertEquals(True, formatter.empty_attributes_are_booleans) + + # Verify that the constructor sets the value. + formatter = Formatter(empty_attributes_are_booleans=True) + self.assertEquals(True, formatter.empty_attributes_are_booleans) + + # Now demonstrate what it does to markup. + for markup in ( + "", + '' + ): + soup = self.soup(markup) + for formatter in ('html', 'minimal', 'xml', None): + self.assertEquals( + b'', + soup.option.encode(formatter='html') + ) + self.assertEquals( + b'', + soup.option.encode(formatter='html5') + ) + -- cgit v1.2.3