summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-09-07 20:09:32 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-09-07 20:09:32 -0400
commit9d68e443978afda17f59f0ff9e73af2b9b0921c2 (patch)
treec23b00ad1379e3c10212c048ef84fc40c9321da3 /bs4/__init__.py
parent70f546b1e689a70e2f103795efce6d261a3dadf7 (diff)
Goodbye, Python 2. [bug=1942919]
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py46
1 files changed, 24 insertions, 22 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 7c6044a..2a436d3 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
provides methods and Pythonic idioms that make it easy to navigate,
search, and modify the parse tree.
-Beautiful Soup works with Python 2.7 and up. It works better if lxml
+Beautiful Soup works with Python 3.5 and up. It works better if lxml
and/or html5lib is installed.
For more than you ever wanted to know about Beautiful Soup, see the
@@ -15,13 +15,14 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.9.3"
-__copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
+__version__ = "4.10.0"
+__copyright__ = "Copyright (c) 2004-2021 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
__all__ = ['BeautifulSoup']
+
from collections import Counter
import os
import re
@@ -29,6 +30,11 @@ import sys
import traceback
import warnings
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 2.
+if sys.version_info.major < 3:
+ raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
+
from .builder import builder_registry, ParserRejectedMarkup
from .dammit import UnicodeDammit
from .element import (
@@ -49,10 +55,6 @@ from .element import (
TemplateString,
)
-# The very first thing we do is give a useful error if someone is
-# running this code under Python 3 without converting it.
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
-
# Define some custom warnings.
class GuessedAtParserWarning(UserWarning):
"""The warning issued when BeautifulSoup has to guess what parser to
@@ -100,7 +102,7 @@ class BeautifulSoup(Tag):
# Since BeautifulSoup subclasses Tag, it's possible to treat it as
# a Tag with a .name. This name makes it clear the BeautifulSoup
# object isn't a real markup tag.
- ROOT_TAG_NAME = u'[document]'
+ ROOT_TAG_NAME = '[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
@@ -217,7 +219,7 @@ class BeautifulSoup(Tag):
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
- if from_encoding and isinstance(markup, unicode):
+ if from_encoding and isinstance(markup, str):
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
from_encoding = None
@@ -234,7 +236,7 @@ class BeautifulSoup(Tag):
builder_class = builder
builder = None
elif builder is None:
- if isinstance(features, basestring):
+ if isinstance(features, str):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
@@ -309,13 +311,13 @@ class BeautifulSoup(Tag):
markup = markup.read()
elif len(markup) <= 256 and (
(isinstance(markup, bytes) and not b'<' in markup)
- or (isinstance(markup, unicode) and not u'<' in markup)
+ or (isinstance(markup, str) and not '<' in markup)
):
# Print out warnings for a couple beginner problems
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
- if (isinstance(markup, unicode)
+ if (isinstance(markup, str)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
@@ -326,7 +328,7 @@ class BeautifulSoup(Tag):
is_file = os.path.exists(possible_filename)
if is_file:
is_directory = os.path.isdir(possible_filename)
- except Exception, e:
+ except Exception as e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
@@ -365,9 +367,9 @@ class BeautifulSoup(Tag):
pass
if not success:
- other_exceptions = [unicode(e) for e in rejections]
+ other_exceptions = [str(e) for e in rejections]
raise ParserRejectedMarkup(
- u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+ "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
)
# Clear out the markup and remove the builder's circular
@@ -418,9 +420,9 @@ class BeautifulSoup(Tag):
if isinstance(markup, bytes):
space = b' '
cant_start_with = (b"http:", b"https:")
- elif isinstance(markup, unicode):
- space = u' '
- cant_start_with = (u"http:", u"https:")
+ elif isinstance(markup, str):
+ space = ' '
+ cant_start_with = ("http:", "https:")
else:
return
@@ -555,7 +557,7 @@ class BeautifulSoup(Tag):
occurs.
"""
if self.current_data:
- current_data = u''.join(self.current_data)
+ current_data = ''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
@@ -759,9 +761,9 @@ class BeautifulSoup(Tag):
eventual_encoding = None
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
- prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
+ prefix = '<?xml version="1.0"%s?>\n' % encoding_part
else:
- prefix = u''
+ prefix = ''
if not pretty_print:
indent_level = None
else:
@@ -799,4 +801,4 @@ class FeatureNotFound(ValueError):
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
- print(soup.prettify())
+ print((soup.prettify()))