Fixed a test failure when cchardet is not installed but

charset_normalizer is. [bug=1973072]
author: Leonard Richardson <leonardr@segfault.org> 2022-05-15 15:08:17 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2022-05-15 15:08:17 -0400
commit: d4189dccc847d87eed573c7ab2948db588aed4c0 (patch)
tree: 53987f4962660c1dcd7cb2b07578110be6c09e3b
parent: 92b50fc6ae57038a09e9cace20604613b1b345e8 (diff)
2 files changed, 36 insertions, 34 deletions
diff --git a/CHANGELOG b/CHANGELOG
index b1dbf7d..ac7882b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,9 @@ Python 2 was revision 605.
 
 = Unreleased
 
+* Fixed a test failure when cchardet is not installed but
+  charset_normalizer is. [bug=1973072]
+
 * Fixed another crash when overriding multi_valued_attributes and using the
   html5lib parser. [bug=1948488]
 
diff --git a/bs4/tests/test_dammit.py b/bs4/tests/test_dammit.py
index 9971234..9aad0ac 100644
--- a/bs4/tests/test_dammit.py
+++ b/bs4/tests/test_dammit.py
@@ -17,26 +17,24 @@ class TestUnicodeDammit(object):
         dammit = UnicodeDammit(markup)
         assert dammit.unicode_markup == markup
 
-    def test_smart_quotes_to_unicode(self):
+    @pytest.mark.parametrize(
+        "smart_quotes_to,expect_converted",
+        [(None, "\u2018\u2019\u201c\u201d"),
+         ("xml", "&#x2018;&#x2019;&#x201C;&#x201D;"),
+         ("html", "&lsquo;&rsquo;&ldquo;&rdquo;"),
+         ("ascii", "''" + '""'),
+        ]
+    )
+    def test_smart_quotes_to(self, smart_quotes_to, expect_converted):
+        """Verify the functionality of the smart_quotes_to argument
+        to the UnicodeDammit constructor."""
         markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup)
-        assert dammit.unicode_markup == "<foo>\u2018\u2019\u201c\u201d</foo>"
-
-    def test_smart_quotes_to_xml_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="xml")
-        assert dammit.unicode_markup == "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>"
-
-    def test_smart_quotes_to_html_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="html")
-        assert dammit.unicode_markup == "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>"
-
-    def test_smart_quotes_to_ascii(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
-        assert dammit.unicode_markup == """<foo>''""</foo>"""
-
+        converted = UnicodeDammit(
+            markup, known_definite_encodings=["windows-1252"],
+            smart_quotes_to=smart_quotes_to
+        ).unicode_markup
+        assert converted == "<foo>{}</foo>".format(expect_converted)
+        
     def test_detect_utf8(self):
         utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
         dammit = UnicodeDammit(utf8)
@@ -275,23 +273,24 @@ class TestEntitySubstitution(object):
     def setup_method(self):
         self.sub = EntitySubstitution
 
-    def test_simple_html_substitution(self):
-        # Unicode characters corresponding to named HTML entites
-        # are substituted, and no others.
-        s = "foo\u2200\N{SNOWMAN}\u00f5bar"
-        assert self.sub.substitute_html(s) == "foo&forall;\N{SNOWMAN}&otilde;bar"
-
-    def test_smart_quote_substitution(self):
-        # MS smart quotes are a common source of frustration, so we
-        # give them a special test.
-        quotes = b"\x91\x92foo\x93\x94"
-        dammit = UnicodeDammit(quotes)
-        assert self.sub.substitute_html(dammit.markup) == "&lsquo;&rsquo;foo&ldquo;&rdquo;"
 
+    @pytest.mark.parametrize(
+        "original,substituted",
+        [
+            # Basic case. Unicode characters corresponding to named
+            # HTML entites are substituted; others are not.
+            ("foo\u2200\N{SNOWMAN}\u00f5bar",
+             "foo&forall;\N{SNOWMAN}&otilde;bar"),
+
+            # MS smart quotes are a common source of frustration, so we
+            # give them a special test.
+            ('‘’foo“”', "&lsquo;&rsquo;foo&ldquo;&rdquo;"),           
+        ]
+    )
+    def test_substitute_html(self, original, substituted):
+        assert self.sub.substitute_html(original) == substituted
+        
     def test_html5_entity(self):
-        # Some HTML5 entities correspond to single- or multi-character
-        # Unicode sequences.
-
         for entity, u in (
             # A few spot checks of our ability to recognize
             # special character sequences and convert them
author	Leonard Richardson <leonardr@segfault.org>	2022-05-15 15:08:17 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2022-05-15 15:08:17 -0400
commit	d4189dccc847d87eed573c7ab2948db588aed4c0 (patch)
tree	53987f4962660c1dcd7cb2b07578110be6c09e3b
parent	92b50fc6ae57038a09e9cace20604613b1b345e8 (diff)