diff options
author | Leonard Richardson <leonardr@segfault.org> | 2023-01-25 15:25:00 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2023-01-25 15:25:00 -0500 |
commit | f54ed875b72db32d773e928e90413fcce422dea1 (patch) | |
tree | 7fd219d6d2f68e4d6d17a2e759397d6e8ba01b70 | |
parent | 5fb5a9564741d4351fde8d828a6fd1944734807d (diff) |
Made the ISO-8859-1 smoke test more robust.
-rw-r--r-- | bs4/tests/__init__.py | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py index 7f8b620..f2fbb08 100644 --- a/bs4/tests/__init__.py +++ b/bs4/tests/__init__.py @@ -827,12 +827,13 @@ Hello, world! # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>' - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. + # That's because we're going to encode it into ISO-Latin-1, + # a.k.a ISO-8859-1, and use that to test. iso_latin_html = unicode_html.encode("iso-8859-1") # Parse the ISO-Latin-1 HTML. - soup = self.soup(iso_latin_html) + soup = self.soup(iso_latin_html, from_encoding="iso-8859-1") + # Encode it to UTF-8. result = soup.encode("utf-8") |