summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2019-07-07 22:57:04 -0400
committerLeonard Richardson <leonardr@segfault.org>2019-07-07 22:57:04 -0400
commit519afbe269b671e15a1f1d2aecfe4fc579b61efc (patch)
tree34009e19c95cae9245678451f3d7dc783f75f59a
parent2fcaeb6e916a09fa87b4b2ab57167c39db6cef8c (diff)
A Formatter can now decide how (or whether) to order the attributes
inside a tag. [bug=1812422]
-rw-r--r--CHANGELOG3
-rw-r--r--bs4/element.py15
-rw-r--r--bs4/tests/test_tree.py22
3 files changed, 37 insertions, 3 deletions
diff --git a/CHANGELOG b/CHANGELOG
index b60b5b5..019ace4 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,9 @@
attributes are treated -- you can do this with the
`multi_valued_attributes` argument. [bug=1832978]
+* A Formatter can now decide how (or whether) to order the attributes
+ inside a tag. [bug=1812422]
+
* &apos; (which is valid in XML and XHTML, but not HTML 4) is now
recognized as a named entity and converted to a single quote. [bug=1818721]
diff --git a/bs4/element.py b/bs4/element.py
index 1183f77..e8e48df 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -142,15 +142,20 @@ class Formatter(object):
# By default, represent void elements as <tag/> rather than <tag>
void_element_close_prefix = '/'
- def substitute_entities(self, *args, **kwargs):
+ def substitute(self, *args, **kwargs):
"""Transform certain characters into named entities."""
raise NotImplementedError()
+
+ def sort_attributes(self, attributes):
+ """Reorder a tag's attributes however you want."""
+ return sorted(attributes.items())
+
class HTMLFormatter(Formatter):
"""The default HTML formatter."""
def substitute(self, *args, **kwargs):
return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
-
+
class MinimalHTMLFormatter(Formatter):
"""A minimal HTML formatter."""
def substitute(self, *args, **kwargs):
@@ -1157,7 +1162,11 @@ class Tag(PageElement):
formatter = self._formatter_for_name(formatter)
attrs = []
if self.attrs:
- for key, val in sorted(self.attrs.items()):
+ if isinstance(formatter, Formatter):
+ sorted_attrs = formatter.sort_attributes(self.attrs)
+ else:
+ sorted_attrs = self.attrs.items()
+ for key, val in sorted_attrs:
if val is None:
decoded = key
else:
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index a14928e..f7c5e2f 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -24,6 +24,7 @@ from bs4.element import (
CData,
Comment,
Declaration,
+ MinimalHTMLFormatter,
Doctype,
NavigableString,
SoupStrainer,
@@ -1683,6 +1684,27 @@ class TestEncoding(SoupTest):
else:
self.assertEqual(b'<b>\\u2603</b>', repr(soup))
+class TestFormatter(SoupTest):
+
+ def test_sort_attributes(self):
+ class UnsortedFormatter(MinimalHTMLFormatter):
+ def sort_attributes(self, attributes):
+ self.called_with = attributes
+ for k, v in sorted(attributes.items()):
+ if k == 'ignore':
+ continue
+ yield k,v
+
+ soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
+ formatter = UnsortedFormatter()
+ decoded = soup.decode(formatter=formatter)
+
+ # sort_attributes() was called with all three attributes. It removed one and
+ # sorted the other two.
+ self.assertEquals(formatter.called_with, dict(cval="1", aval="2", ignore="ignored"))
+ self.assertEquals(u'<p aval="2" cval="1"></p>', decoded)
+
+
class TestNavigableStringSubclasses(SoupTest):
def test_cdata(self):