Goodbye, Python 2. [bug=1942919]

author: Leonard Richardson <leonardr@segfault.org> 2021-09-07 20:09:32 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2021-09-07 20:09:32 -0400
commit: 9d68e443978afda17f59f0ff9e73af2b9b0921c2 (patch)
tree: c23b00ad1379e3c10212c048ef84fc40c9321da3 /bs4/__init__.py
parent: 70f546b1e689a70e2f103795efce6d261a3dadf7 (diff)
1 files changed, 24 insertions, 22 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 7c6044a..2a436d3 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
 provides methods and Pythonic idioms that make it easy to navigate,
 search, and modify the parse tree.
 
-Beautiful Soup works with Python 2.7 and up. It works better if lxml
+Beautiful Soup works with Python 3.5 and up. It works better if lxml
 and/or html5lib is installed.
 
 For more than you ever wanted to know about Beautiful Soup, see the
@@ -15,13 +15,14 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.9.3"
-__copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
+__version__ = "4.10.0"
+__copyright__ = "Copyright (c) 2004-2021 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
+
 from collections import Counter
 import os
 import re
@@ -29,6 +30,11 @@ import sys
 import traceback
 import warnings
 
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 2.
+if sys.version_info.major < 3:
+    raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
+
 from .builder import builder_registry, ParserRejectedMarkup
 from .dammit import UnicodeDammit
 from .element import (
@@ -49,10 +55,6 @@ from .element import (
     TemplateString,
     )
 
-# The very first thing we do is give a useful error if someone is
-# running this code under Python 3 without converting it.
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
-
 # Define some custom warnings.
 class GuessedAtParserWarning(UserWarning):
     """The warning issued when BeautifulSoup has to guess what parser to
@@ -100,7 +102,7 @@ class BeautifulSoup(Tag):
     # Since BeautifulSoup subclasses Tag, it's possible to treat it as
     # a Tag with a .name. This name makes it clear the BeautifulSoup
     # object isn't a real markup tag.
-    ROOT_TAG_NAME = u'[document]'
+    ROOT_TAG_NAME = '[document]'
 
     # If the end-user gives no indication which tree builder they
     # want, look for one with these features.
@@ -217,7 +219,7 @@ class BeautifulSoup(Tag):
         from_encoding = from_encoding or deprecated_argument(
             "fromEncoding", "from_encoding")
 
-        if from_encoding and isinstance(markup, unicode):
+        if from_encoding and isinstance(markup, str):
             warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
             from_encoding = None
 
@@ -234,7 +236,7 @@ class BeautifulSoup(Tag):
             builder_class = builder
             builder = None
         elif builder is None:
-            if isinstance(features, basestring):
+            if isinstance(features, str):
                 features = [features]
             if features is None or len(features) == 0:
                 features = self.DEFAULT_BUILDER_FEATURES
@@ -309,13 +311,13 @@ class BeautifulSoup(Tag):
             markup = markup.read()
         elif len(markup) <= 256 and (
                 (isinstance(markup, bytes) and not b'<' in markup)
-                or (isinstance(markup, unicode) and not u'<' in markup)
+                or (isinstance(markup, str) and not '<' in markup)
         ):
             # Print out warnings for a couple beginner problems
             # involving passing non-markup to Beautiful Soup.
             # Beautiful Soup will still parse the input as markup,
             # just in case that's what the user really wants.
-            if (isinstance(markup, unicode)
+            if (isinstance(markup, str)
                 and not os.path.supports_unicode_filenames):
                 possible_filename = markup.encode("utf8")
             else:
@@ -326,7 +328,7 @@ class BeautifulSoup(Tag):
                 is_file = os.path.exists(possible_filename)
                 if is_file:
                     is_directory = os.path.isdir(possible_filename)
-            except Exception, e:
+            except Exception as e:
                 # This is almost certainly a problem involving
                 # characters not valid in filenames on this
                 # system. Just let it go.
@@ -365,9 +367,9 @@ class BeautifulSoup(Tag):
                 pass
 
         if not success:
-            other_exceptions = [unicode(e) for e in rejections]
+            other_exceptions = [str(e) for e in rejections]
             raise ParserRejectedMarkup(
-                u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+                "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
             )
 
         # Clear out the markup and remove the builder's circular
@@ -418,9 +420,9 @@ class BeautifulSoup(Tag):
         if isinstance(markup, bytes):
             space = b' '
             cant_start_with = (b"http:", b"https:")
-        elif isinstance(markup, unicode):
-            space = u' '
-            cant_start_with = (u"http:", u"https:")
+        elif isinstance(markup, str):
+            space = ' '
+            cant_start_with = ("http:", "https:")
         else:
             return
 
@@ -555,7 +557,7 @@ class BeautifulSoup(Tag):
         occurs.
         """       
         if self.current_data:
-            current_data = u''.join(self.current_data)
+            current_data = ''.join(self.current_data)
             # If whitespace is not preserved, and this string contains
             # nothing but ASCII spaces, replace it with a single space
             # or newline.
@@ -759,9 +761,9 @@ class BeautifulSoup(Tag):
                 eventual_encoding = None
             if eventual_encoding != None:
                 encoding_part = ' encoding="%s"' % eventual_encoding
-            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
+            prefix = '<?xml version="1.0"%s?>\n' % encoding_part
         else:
-            prefix = u''
+            prefix = ''
         if not pretty_print:
             indent_level = None
         else:
@@ -799,4 +801,4 @@ class FeatureNotFound(ValueError):
 if __name__ == '__main__':
     import sys
     soup = BeautifulSoup(sys.stdin)
-    print(soup.prettify())
+    print((soup.prettify()))
author	Leonard Richardson <leonardr@segfault.org>	2021-09-07 20:09:32 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2021-09-07 20:09:32 -0400
commit	9d68e443978afda17f59f0ff9e73af2b9b0921c2 (patch)
tree	c23b00ad1379e3c10212c048ef84fc40c9321da3 /bs4/__init__.py
parent	70f546b1e689a70e2f103795efce6d261a3dadf7 (diff)