diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-07 17:19:43 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-07 17:19:43 -0500 |
commit | f23376fdeee206cbf24d4b3aff43a307fb3786a6 (patch) | |
tree | cf37f1842d033e1cef46d6954495096c3d5661f5 /bs4/tests/test_tree.py | |
parent | 1cc507c9ac0154904a65f7352736cb665686e4a4 (diff) |
Made the formatter something you pass in rather than a variable you set.
Diffstat (limited to 'bs4/tests/test_tree.py')
-rw-r--r-- | bs4/tests/test_tree.py | 56 |
1 files changed, 50 insertions, 6 deletions
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 82a3bfa..5552347 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -934,12 +934,57 @@ class TestPersistence(SoupTest): class TestSubstitutions(SoupTest): - def test_html_entity_substitution(self): - soup = self.soup( - u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>") - decoded = soup.decode(substitute_html_entities=True) + def test_default_formatter_is_minimal(self): + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + soup = self.soup(markup) + decoded = soup.decode(formatter="minimal") + # The < is converted back into < but the e-with-acute is left alone. + self.assertEqual( + decoded, + self.document_for( + u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + + def test_formatter_html(self): + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + soup = self.soup(markup) + decoded = soup.decode(formatter="html") + self.assertEqual( + decoded, + self.document_for("<b><<Sacré bleu!>></b>")) + + def test_formatter_minimal(self): + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + soup = self.soup(markup) + decoded = soup.decode(formatter="minimal") + # The < is converted back into < but the e-with-acute is left alone. + self.assertEqual( + decoded, + self.document_for( + u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + + def test_formatter_null(self): + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + soup = self.soup(markup) + decoded = soup.decode(formatter=None) + # Neither the angle brackets nor the e-with-acute are converted. + # This is not valid HTML, but it's what the user wanted. self.assertEqual(decoded, - self.document_for("<b>Sacré bleu!</b>")) + self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + + def test_formatter_custom(self): + markup = u"<b><foo></b><b>bar</b>" + soup = self.soup(markup) + decoded = soup.decode(formatter = lambda x: x.upper()) + # Instead of normal entity conversion code, the custom + # callable is called on every string. + self.assertEqual( + decoded, + self.document_for(u"<b><FOO></b><b>BAR</b>")) + + def test_prettify_accepts_formatter(self): + soup = BeautifulSoup("<html><body>foo</body></html>") + pretty = soup.prettify(formatter = lambda x: x.upper()) + self.assertTrue(b"FOO" in pretty) def test_html_entity_substitution_off_by_default(self): markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" @@ -984,7 +1029,6 @@ class TestSubstitutions(SoupTest): soup = self.soup(markup, parse_only=strainer) self.assertEqual(soup.contents[0].name, 'pre') - class TestEncoding(SoupTest): """Test the ability to encode objects into strings.""" |