summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2021-09-07 20:09:32 -0400
committerLeonard Richardson <leonardr@segfault.org>2021-09-07 20:09:32 -0400
commit9d68e443978afda17f59f0ff9e73af2b9b0921c2 (patch)
treec23b00ad1379e3c10212c048ef84fc40c9321da3 /bs4/element.py
parent70f546b1e689a70e2f103795efce6d261a3dadf7 (diff)
Goodbye, Python 2. [bug=1942919]
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py110
1 files changed, 55 insertions, 55 deletions
diff --git a/bs4/element.py b/bs4/element.py
index e7867a9..82a986e 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -3,14 +3,14 @@ __license__ = "MIT"
try:
from collections.abc import Callable # Python 3.6
-except ImportError , e:
+except ImportError as e:
from collections import Callable
import re
import sys
import warnings
try:
import soupsieve
-except ImportError, e:
+except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
@@ -57,22 +57,22 @@ def _alias(attr):
# Source:
# https://docs.python.org/3/library/codecs.html#python-specific-encodings
PYTHON_SPECIFIC_ENCODINGS = set([
- u"idna",
- u"mbcs",
- u"oem",
- u"palmos",
- u"punycode",
- u"raw_unicode_escape",
- u"undefined",
- u"unicode_escape",
- u"raw-unicode-escape",
- u"unicode-escape",
- u"string-escape",
- u"string_escape",
+ "idna",
+ "mbcs",
+ "oem",
+ "palmos",
+ "punycode",
+ "raw_unicode_escape",
+ "undefined",
+ "unicode_escape",
+ "raw-unicode-escape",
+ "unicode-escape",
+ "string-escape",
+ "string_escape",
])
-class NamespacedAttribute(unicode):
+class NamespacedAttribute(str):
"""A namespaced string (e.g. 'xml:lang') that remembers the namespace
('xml') and the name ('lang') that were used to create it.
"""
@@ -84,18 +84,18 @@ class NamespacedAttribute(unicode):
name = None
if not name:
- obj = unicode.__new__(cls, prefix)
+ obj = str.__new__(cls, prefix)
elif not prefix:
# Not really namespaced.
- obj = unicode.__new__(cls, name)
+ obj = str.__new__(cls, name)
else:
- obj = unicode.__new__(cls, prefix + ":" + name)
+ obj = str.__new__(cls, prefix + ":" + name)
obj.prefix = prefix
obj.name = name
obj.namespace = namespace
return obj
-class AttributeValueWithCharsetSubstitution(unicode):
+class AttributeValueWithCharsetSubstitution(str):
"""A stand-in object for a character encoding specified in HTML."""
class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -106,7 +106,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
"""
def __new__(cls, original_value):
- obj = unicode.__new__(cls, original_value)
+ obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -134,9 +134,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
match = cls.CHARSET_RE.search(original_value)
if match is None:
# No substitution necessary.
- return unicode.__new__(unicode, original_value)
+ return str.__new__(str, original_value)
- obj = unicode.__new__(cls, original_value)
+ obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -272,7 +272,7 @@ class PageElement(object):
for string in self._all_strings(True):
yield string
- def get_text(self, separator=u"", strip=False,
+ def get_text(self, separator="", strip=False,
types=default):
"""Get all child strings of this PageElement, concatenated using the
given separator.
@@ -418,7 +418,7 @@ class PageElement(object):
raise ValueError("Cannot insert None into a tag.")
if new_child is self:
raise ValueError("Cannot insert a tag into itself.")
- if (isinstance(new_child, basestring)
+ if (isinstance(new_child, str)
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
@@ -795,7 +795,7 @@ class PageElement(object):
result = (element for element in generator
if isinstance(element, Tag))
return ResultSet(strainer, result)
- elif isinstance(name, basestring):
+ elif isinstance(name, str):
# Optimization to find all tags with a given name.
if name.count(':') == 1:
# This is a name with a prefix. If this is a namespace-aware document,
@@ -914,7 +914,7 @@ class PageElement(object):
return self.parents
-class NavigableString(unicode, PageElement):
+class NavigableString(str, PageElement):
"""A Python Unicode string that is part of a parse tree.
When Beautiful Soup parses the markup <b>penguin</b>, it will
@@ -937,10 +937,10 @@ class NavigableString(unicode, PageElement):
passed in to the superclass's __new__ or the superclass won't know
how to handle non-ASCII characters.
"""
- if isinstance(value, unicode):
- u = unicode.__new__(cls, value)
+ if isinstance(value, str):
+ u = str.__new__(cls, value)
else:
- u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+ u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
u.setup()
return u
@@ -951,7 +951,7 @@ class NavigableString(unicode, PageElement):
return type(self)(self)
def __getnewargs__(self):
- return (unicode(self),)
+ return (str(self),)
def __getattr__(self, attr):
"""text.string gives you text. This is for backwards
@@ -1059,30 +1059,30 @@ class PreformattedString(NavigableString):
class CData(PreformattedString):
"""A CDATA block."""
- PREFIX = u'<![CDATA['
- SUFFIX = u']]>'
+ PREFIX = '<![CDATA['
+ SUFFIX = ']]>'
class ProcessingInstruction(PreformattedString):
"""A SGML processing instruction."""
- PREFIX = u'<?'
- SUFFIX = u'>'
+ PREFIX = '<?'
+ SUFFIX = '>'
class XMLProcessingInstruction(ProcessingInstruction):
"""An XML processing instruction."""
- PREFIX = u'<?'
- SUFFIX = u'?>'
+ PREFIX = '<?'
+ SUFFIX = '?>'
class Comment(PreformattedString):
"""An HTML or XML comment."""
- PREFIX = u'<!--'
- SUFFIX = u'-->'
+ PREFIX = '<!--'
+ SUFFIX = '-->'
class Declaration(PreformattedString):
"""An XML declaration."""
- PREFIX = u'<?'
- SUFFIX = u'?>'
+ PREFIX = '<?'
+ SUFFIX = '?>'
class Doctype(PreformattedString):
@@ -1110,8 +1110,8 @@ class Doctype(PreformattedString):
return Doctype(value)
- PREFIX = u'<!DOCTYPE '
- SUFFIX = u'>\n'
+ PREFIX = '<!DOCTYPE '
+ SUFFIX = '>\n'
class Stylesheet(NavigableString):
@@ -1496,7 +1496,7 @@ class Tag(PageElement):
def __contains__(self, x):
return x in self.contents
- def __nonzero__(self):
+ def __bool__(self):
"A tag is non-None even if it has no contents."
return True
@@ -1645,8 +1645,8 @@ class Tag(PageElement):
else:
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
- elif not isinstance(val, basestring):
- val = unicode(val)
+ elif not isinstance(val, str):
+ val = str(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None
@@ -1655,7 +1655,7 @@ class Tag(PageElement):
text = formatter.attribute_value(val)
decoded = (
- unicode(key) + '='
+ str(key) + '='
+ formatter.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
@@ -2014,7 +2014,7 @@ class SoupStrainer(object):
else:
attrs = kwargs
normalized_attrs = {}
- for key, value in attrs.items():
+ for key, value in list(attrs.items()):
normalized_attrs[key] = self._normalize_search_value(value)
self.attrs = normalized_attrs
@@ -2023,7 +2023,7 @@ class SoupStrainer(object):
def _normalize_search_value(self, value):
# Leave it alone if it's a Unicode string, a callable, a
# regular expression, a boolean, or None.
- if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
+ if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
or isinstance(value, bool) or value is None):
return value
@@ -2036,7 +2036,7 @@ class SoupStrainer(object):
new_value = []
for v in value:
if (hasattr(v, '__iter__') and not isinstance(v, bytes)
- and not isinstance(v, unicode)):
+ and not isinstance(v, str)):
# This is almost certainly the user's mistake. In the
# interests of avoiding infinite loops, we'll let
# it through as-is rather than doing a recursive call.
@@ -2048,7 +2048,7 @@ class SoupStrainer(object):
# Otherwise, convert it into a Unicode string.
# The unicode(str()) thing is so this will do the same thing on Python 2
# and Python 3.
- return unicode(str(value))
+ return str(str(value))
def __str__(self):
"""A human-readable representation of this SoupStrainer."""
@@ -2076,7 +2076,7 @@ class SoupStrainer(object):
markup = markup_name
markup_attrs = markup
- if isinstance(self.name, basestring):
+ if isinstance(self.name, str):
# Optimization for a very common case where the user is
# searching for a tag with one specific name, and we're
# looking at a tag with a different name.
@@ -2132,7 +2132,7 @@ class SoupStrainer(object):
found = None
# If given a list of items, scan it for a text element that
# matches.
- if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+ if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
for element in markup:
if isinstance(element, NavigableString) \
and self.search(element):
@@ -2145,7 +2145,7 @@ class SoupStrainer(object):
found = self.search_tag(markup)
# If it's text, make sure the text matches.
elif isinstance(markup, NavigableString) or \
- isinstance(markup, basestring):
+ isinstance(markup, str):
if not self.name and not self.attrs and self._matches(markup, self.text):
found = markup
else:
@@ -2190,7 +2190,7 @@ class SoupStrainer(object):
return not match_against
if (hasattr(match_against, '__iter__')
- and not isinstance(match_against, basestring)):
+ and not isinstance(match_against, str)):
# We're asked to match against an iterable of items.
# The markup must be match at least one item in the
# iterable. We'll try each one in turn.
@@ -2217,7 +2217,7 @@ class SoupStrainer(object):
# the tag's name and once against its prefixed name.
match = False
- if not match and isinstance(match_against, unicode):
+ if not match and isinstance(match_against, str):
# Exact string match
match = markup == match_against