diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:55:20 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:55:20 -0500 |
commit | 1a50d9623831990ae0a78ea3a7e66fa098fe92ac (patch) | |
tree | d31578ac86c753c6e3427f574408a1ad960d80ac /bs4/tests | |
parent | ffcebc274b84b85a0b8c93c2aca8756df4baa236 (diff) |
By default, turn unrecognized characters into numeric XML entity refs.
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_tree.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 9e57d54..70a7da1 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1242,6 +1242,16 @@ class TestEncoding(SoupTest): self.assertEqual( soup.b.encode("utf-8"), html.encode("utf-8")) + def test_encoding_substitutes_unrecognized_characters_by_default(self): + html = u"<b>\N{SNOWMAN}</b>" + soup = self.soup(html) + self.assertEqual(soup.b.encode("ascii"), b"<b>☃</b>") + + def test_encoding_can_be_made_strict(self): + html = u"<b>\N{SNOWMAN}</b>" + soup = self.soup(html) + self.assertRaises( + UnicodeEncodeError, soup.encode, "ascii", errors="strict") class TestNavigableStringSubclasses(SoupTest): |