summaryrefslogtreecommitdiff
path: root/bs4/tests
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-16 13:55:20 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-16 13:55:20 -0500
commit1a50d9623831990ae0a78ea3a7e66fa098fe92ac (patch)
treed31578ac86c753c6e3427f574408a1ad960d80ac /bs4/tests
parentffcebc274b84b85a0b8c93c2aca8756df4baa236 (diff)
By default, turn unrecognized characters into numeric XML entity refs.
Diffstat (limited to 'bs4/tests')
-rw-r--r--bs4/tests/test_tree.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 9e57d54..70a7da1 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1242,6 +1242,16 @@ class TestEncoding(SoupTest):
self.assertEqual(
soup.b.encode("utf-8"), html.encode("utf-8"))
+ def test_encoding_substitutes_unrecognized_characters_by_default(self):
+ html = u"<b>\N{SNOWMAN}</b>"
+ soup = self.soup(html)
+ self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
+
+ def test_encoding_can_be_made_strict(self):
+ html = u"<b>\N{SNOWMAN}</b>"
+ soup = self.soup(html)
+ self.assertRaises(
+ UnicodeEncodeError, soup.encode, "ascii", errors="strict")
class TestNavigableStringSubclasses(SoupTest):