Goodbye, Python 2. [bug=1942919]

author: Leonard Richardson <leonardr@segfault.org> 2021-09-07 20:09:32 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2021-09-07 20:09:32 -0400
commit: 9d68e443978afda17f59f0ff9e73af2b9b0921c2 (patch)
tree: c23b00ad1379e3c10212c048ef84fc40c9321da3 /bs4/element.py
parent: 70f546b1e689a70e2f103795efce6d261a3dadf7 (diff)
1 files changed, 55 insertions, 55 deletions
diff --git a/bs4/element.py b/bs4/element.py
index e7867a9..82a986e 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -3,14 +3,14 @@ __license__ = "MIT"
 
 try:
     from collections.abc import Callable # Python 3.6
-except ImportError , e:
+except ImportError as e:
     from collections import Callable
 import re
 import sys
 import warnings
 try:
     import soupsieve
-except ImportError, e:
+except ImportError as e:
     soupsieve = None
     warnings.warn(
         'The soupsieve package is not installed. CSS selectors cannot be used.'
@@ -57,22 +57,22 @@ def _alias(attr):
 # Source:
 # https://docs.python.org/3/library/codecs.html#python-specific-encodings
 PYTHON_SPECIFIC_ENCODINGS = set([
-    u"idna",
-    u"mbcs",
-    u"oem",
-    u"palmos",
-    u"punycode",
-    u"raw_unicode_escape",
-    u"undefined",
-    u"unicode_escape",
-    u"raw-unicode-escape",
-    u"unicode-escape",
-    u"string-escape",
-    u"string_escape",
+    "idna",
+    "mbcs",
+    "oem",
+    "palmos",
+    "punycode",
+    "raw_unicode_escape",
+    "undefined",
+    "unicode_escape",
+    "raw-unicode-escape",
+    "unicode-escape",
+    "string-escape",
+    "string_escape",
 ])
     
 
-class NamespacedAttribute(unicode):
+class NamespacedAttribute(str):
     """A namespaced string (e.g. 'xml:lang') that remembers the namespace
     ('xml') and the name ('lang') that were used to create it.
     """
@@ -84,18 +84,18 @@ class NamespacedAttribute(unicode):
             name = None
 
         if not name:
-            obj = unicode.__new__(cls, prefix)
+            obj = str.__new__(cls, prefix)
         elif not prefix:
             # Not really namespaced.
-            obj = unicode.__new__(cls, name)
+            obj = str.__new__(cls, name)
         else:
-            obj = unicode.__new__(cls, prefix + ":" + name)
+            obj = str.__new__(cls, prefix + ":" + name)
         obj.prefix = prefix
         obj.name = name
         obj.namespace = namespace
         return obj
 
-class AttributeValueWithCharsetSubstitution(unicode):
+class AttributeValueWithCharsetSubstitution(str):
     """A stand-in object for a character encoding specified in HTML."""
 
 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -106,7 +106,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
     """
 
     def __new__(cls, original_value):
-        obj = unicode.__new__(cls, original_value)
+        obj = str.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -134,9 +134,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
         match = cls.CHARSET_RE.search(original_value)
         if match is None:
             # No substitution necessary.
-            return unicode.__new__(unicode, original_value)
+            return str.__new__(str, original_value)
 
-        obj = unicode.__new__(cls, original_value)
+        obj = str.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -272,7 +272,7 @@ class PageElement(object):
         for string in self._all_strings(True):
             yield string
 
-    def get_text(self, separator=u"", strip=False,
+    def get_text(self, separator="", strip=False,
                  types=default):
         """Get all child strings of this PageElement, concatenated using the
         given separator.
@@ -418,7 +418,7 @@ class PageElement(object):
             raise ValueError("Cannot insert None into a tag.")
         if new_child is self:
             raise ValueError("Cannot insert a tag into itself.")
-        if (isinstance(new_child, basestring)
+        if (isinstance(new_child, str)
             and not isinstance(new_child, NavigableString)):
             new_child = NavigableString(new_child)
 
@@ -795,7 +795,7 @@ class PageElement(object):
                 result = (element for element in generator
                           if isinstance(element, Tag))
                 return ResultSet(strainer, result)
-            elif isinstance(name, basestring):
+            elif isinstance(name, str):
                 # Optimization to find all tags with a given name.
                 if name.count(':') == 1:
                     # This is a name with a prefix. If this is a namespace-aware document,
@@ -914,7 +914,7 @@ class PageElement(object):
         return self.parents
 
 
-class NavigableString(unicode, PageElement):
+class NavigableString(str, PageElement):
     """A Python Unicode string that is part of a parse tree.
 
     When Beautiful Soup parses the markup <b>penguin</b>, it will
@@ -937,10 +937,10 @@ class NavigableString(unicode, PageElement):
         passed in to the superclass's __new__ or the superclass won't know
         how to handle non-ASCII characters.
         """
-        if isinstance(value, unicode):
-            u = unicode.__new__(cls, value)
+        if isinstance(value, str):
+            u = str.__new__(cls, value)
         else:
-            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+            u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
         u.setup()
         return u
 
@@ -951,7 +951,7 @@ class NavigableString(unicode, PageElement):
         return type(self)(self)
 
     def __getnewargs__(self):
-        return (unicode(self),)
+        return (str(self),)
 
     def __getattr__(self, attr):
         """text.string gives you text. This is for backwards
@@ -1059,30 +1059,30 @@ class PreformattedString(NavigableString):
 
 class CData(PreformattedString):
     """A CDATA block."""
-    PREFIX = u'<![CDATA['
-    SUFFIX = u']]>'
+    PREFIX = '<![CDATA['
+    SUFFIX = ']]>'
 
 class ProcessingInstruction(PreformattedString):
     """A SGML processing instruction."""
 
-    PREFIX = u'<?'
-    SUFFIX = u'>'
+    PREFIX = '<?'
+    SUFFIX = '>'
 
 class XMLProcessingInstruction(ProcessingInstruction):
     """An XML processing instruction."""
-    PREFIX = u'<?'
-    SUFFIX = u'?>'
+    PREFIX = '<?'
+    SUFFIX = '?>'
 
 class Comment(PreformattedString):
     """An HTML or XML comment."""
-    PREFIX = u'<!--'
-    SUFFIX = u'-->'
+    PREFIX = '<!--'
+    SUFFIX = '-->'
 
 
 class Declaration(PreformattedString):
     """An XML declaration."""
-    PREFIX = u'<?'
-    SUFFIX = u'?>'
+    PREFIX = '<?'
+    SUFFIX = '?>'
 
 
 class Doctype(PreformattedString):
@@ -1110,8 +1110,8 @@ class Doctype(PreformattedString):
 
         return Doctype(value)
 
-    PREFIX = u'<!DOCTYPE '
-    SUFFIX = u'>\n'
+    PREFIX = '<!DOCTYPE '
+    SUFFIX = '>\n'
 
 
 class Stylesheet(NavigableString):
@@ -1496,7 +1496,7 @@ class Tag(PageElement):
     def __contains__(self, x):
         return x in self.contents
 
-    def __nonzero__(self):
+    def __bool__(self):
         "A tag is non-None even if it has no contents."
         return True
 
@@ -1645,8 +1645,8 @@ class Tag(PageElement):
             else:
                 if isinstance(val, list) or isinstance(val, tuple):
                     val = ' '.join(val)
-                elif not isinstance(val, basestring):
-                    val = unicode(val)
+                elif not isinstance(val, str):
+                    val = str(val)
                 elif (
                         isinstance(val, AttributeValueWithCharsetSubstitution)
                         and eventual_encoding is not None
@@ -1655,7 +1655,7 @@ class Tag(PageElement):
 
                 text = formatter.attribute_value(val)
                 decoded = (
-                    unicode(key) + '='
+                    str(key) + '='
                     + formatter.quoted_attribute_value(text))
             attrs.append(decoded)
         close = ''
@@ -2014,7 +2014,7 @@ class SoupStrainer(object):
             else:
                 attrs = kwargs
         normalized_attrs = {}
-        for key, value in attrs.items():
+        for key, value in list(attrs.items()):
             normalized_attrs[key] = self._normalize_search_value(value)
 
         self.attrs = normalized_attrs
@@ -2023,7 +2023,7 @@ class SoupStrainer(object):
     def _normalize_search_value(self, value):
         # Leave it alone if it's a Unicode string, a callable, a
         # regular expression, a boolean, or None.
-        if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
+        if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
             or isinstance(value, bool) or value is None):
             return value
 
@@ -2036,7 +2036,7 @@ class SoupStrainer(object):
             new_value = []
             for v in value:
                 if (hasattr(v, '__iter__') and not isinstance(v, bytes)
-                    and not isinstance(v, unicode)):
+                    and not isinstance(v, str)):
                     # This is almost certainly the user's mistake. In the
                     # interests of avoiding infinite loops, we'll let
                     # it through as-is rather than doing a recursive call.
@@ -2048,7 +2048,7 @@ class SoupStrainer(object):
         # Otherwise, convert it into a Unicode string.
         # The unicode(str()) thing is so this will do the same thing on Python 2
         # and Python 3.
-        return unicode(str(value))
+        return str(str(value))
 
     def __str__(self):
         """A human-readable representation of this SoupStrainer."""
@@ -2076,7 +2076,7 @@ class SoupStrainer(object):
             markup = markup_name
             markup_attrs = markup
 
-        if isinstance(self.name, basestring):
+        if isinstance(self.name, str):
             # Optimization for a very common case where the user is
             # searching for a tag with one specific name, and we're
             # looking at a tag with a different name.
@@ -2132,7 +2132,7 @@ class SoupStrainer(object):
         found = None
         # If given a list of items, scan it for a text element that
         # matches.
-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
             for element in markup:
                 if isinstance(element, NavigableString) \
                        and self.search(element):
@@ -2145,7 +2145,7 @@ class SoupStrainer(object):
                 found = self.search_tag(markup)
         # If it's text, make sure the text matches.
         elif isinstance(markup, NavigableString) or \
-                 isinstance(markup, basestring):
+                 isinstance(markup, str):
             if not self.name and not self.attrs and self._matches(markup, self.text):
                 found = markup
         else:
@@ -2190,7 +2190,7 @@ class SoupStrainer(object):
             return not match_against
 
         if (hasattr(match_against, '__iter__')
-            and not isinstance(match_against, basestring)):
+            and not isinstance(match_against, str)):
             # We're asked to match against an iterable of items.
             # The markup must be match at least one item in the
             # iterable. We'll try each one in turn.
@@ -2217,7 +2217,7 @@ class SoupStrainer(object):
         # the tag's name and once against its prefixed name.
         match = False
         
-        if not match and isinstance(match_against, unicode):
+        if not match and isinstance(match_against, str):
             # Exact string match
             match = markup == match_against
author	Leonard Richardson <leonardr@segfault.org>	2021-09-07 20:09:32 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2021-09-07 20:09:32 -0400
commit	9d68e443978afda17f59f0ff9e73af2b9b0921c2 (patch)
tree	c23b00ad1379e3c10212c048ef84fc40c9321da3 /bs4/element.py
parent	70f546b1e689a70e2f103795efce6d261a3dadf7 (diff)