summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2020-05-17 13:58:15 -0400
committerLeonard Richardson <leonardr@segfault.org>2020-05-17 13:58:15 -0400
commit8ddf0953746cb25cd900038067ee5a469940e972 (patch)
tree851985846258032d58df53192514a87a3aeda014
parent56d128279162d3a5696cfba767891c843393e372 (diff)
Switch entirely to Python 3-style print statements, even in Python 2.
-rw-r--r--bs4/__init__.py14
-rw-r--r--bs4/builder/__init__.py4
-rw-r--r--bs4/builder/_html5lib.py12
-rw-r--r--bs4/builder/_htmlparser.py6
-rw-r--r--bs4/dammit.py10
-rw-r--r--bs4/diagnose.py44
-rw-r--r--bs4/element.py6
-rw-r--r--setup.py2
8 files changed, 49 insertions, 49 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index e2d0d61..980c0ce 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.9.0"
+__version__ = "4.9.1"
__copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@@ -513,14 +513,14 @@ class BeautifulSoup(Tag):
self.preserve_whitespace_tag_stack.pop()
if self.string_container_stack and tag == self.string_container_stack[-1]:
self.string_container_stack.pop()
- #print "Pop", tag.name
+ #print("Pop", tag.name)
if self.tagStack:
self.currentTag = self.tagStack[-1]
return self.currentTag
def pushTag(self, tag):
"""Internal method called by handle_starttag when a tag is opened."""
- #print "Push", tag.name
+ #print("Push", tag.name)
if self.currentTag is not None:
self.currentTag.contents.append(tag)
self.tagStack.append(tag)
@@ -643,7 +643,7 @@ class BeautifulSoup(Tag):
to but *not* including the most recent instqance of the
given tag.
"""
- #print "Popping to %s" % name
+ #print("Popping to %s" % name)
if name == self.ROOT_TAG_NAME:
# The BeautifulSoup object itself can never be popped.
return
@@ -678,7 +678,7 @@ class BeautifulSoup(Tag):
in the document. For instance, if this was a self-closing tag,
don't call handle_endtag.
"""
- # print "Start tag %s: %s" % (name, attrs)
+ # print("Start tag %s: %s" % (name, attrs))
self.endData()
if (self.parse_only and len(self.tagStack) <= 1
@@ -705,7 +705,7 @@ class BeautifulSoup(Tag):
:param name: Name of the tag.
:param nsprefix: Namespace prefix for the tag.
"""
- #print "End tag: " + name
+ #print("End tag: " + name)
self.endData()
self._popToTag(name, nsprefix)
@@ -774,4 +774,4 @@ class FeatureNotFound(ValueError):
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
- print soup.prettify()
+ print(soup.prettify())
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 7d3a6eb..e319625 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -334,11 +334,11 @@ class SAXTreeBuilder(TreeBuilder):
def startElement(self, name, attrs):
attrs = dict((key[1], value) for key, value in list(attrs.items()))
- #print "Start %s, %r" % (name, attrs)
+ #print("Start %s, %r" % (name, attrs))
self.soup.handle_starttag(name, attrs)
def endElement(self, name):
- #print "End %s" % name
+ #print("End %s" % name)
self.soup.handle_endtag(name)
def startElementNS(self, nsTuple, nodeName, attrs):
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index b36189d..a1c6134 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -375,9 +375,9 @@ class Element(treebuilder_base.Node):
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
- # print "MOVE", self.element.contents
- # print "FROM", self.element
- # print "TO", new_parent.element
+ # print("MOVE", self.element.contents)
+ # print("FROM", self.element)
+ # print("TO", new_parent.element)
element = self.element
new_parent_element = new_parent.element
@@ -435,9 +435,9 @@ class Element(treebuilder_base.Node):
element.contents = []
element.next_element = final_next_element
- # print "DONE WITH MOVE"
- # print "FROM", self.element
- # print "TO", new_parent_element
+ # print("DONE WITH MOVE")
+ # print("FROM", self.element)
+ # print("TO", new_parent_element)
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 1d96a66..1cb84ff 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -146,7 +146,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
else:
attr_dict[key] = value
attrvalue = '""'
- #print "START", name
+ #print("START", name)
sourceline, sourcepos = self.getpos()
tag = self.soup.handle_starttag(
name, None, None, attr_dict, sourceline=sourceline,
@@ -176,12 +176,12 @@ class BeautifulSoupHTMLParser(HTMLParser):
be the closing portion of an empty-element tag,
e.g. '<tag></tag>'.
"""
- #print "END", name
+ #print("END", name)
if check_already_closed and name in self.already_closed_empty_element:
# This is a redundant end tag for an empty-element tag.
# We've already called handle_endtag() for it, so just
# check it off the list.
- # print "ALREADY CLOSED", name
+ # print("ALREADY CLOSED", name)
self.already_closed_empty_element.remove(name)
else:
self.soup.handle_endtag(name)
diff --git a/bs4/dammit.py b/bs4/dammit.py
index eb08568..33f7b7d 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -506,16 +506,16 @@ class UnicodeDammit:
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
try:
- #print "Trying to convert document to %s (errors=%s)" % (
- # proposed, errors)
+ #print("Trying to convert document to %s (errors=%s)" % (
+ # proposed, errors))
u = self._to_unicode(markup, proposed, errors)
self.markup = u
self.original_encoding = proposed
except Exception as e:
- #print "That didn't work!"
- #print e
+ #print("That didn't work!")
+ #print(e)
return None
- #print "Correct encoding: %s" % proposed
+ #print("Correct encoding: %s" % proposed)
return self.markup
def _to_unicode(self, data, encoding, errors="strict"):
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index c58d610..e4f2f47 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -25,8 +25,8 @@ def diagnose(data):
:param data: A string containing markup that needs to be explained.
:return: None; diagnostics are printed to standard output.
"""
- print "Diagnostic running on Beautiful Soup %s" % __version__
- print "Python version %s" % sys.version
+ print("Diagnostic running on Beautiful Soup %s" % __version__)
+ print("Python version %s" % sys.version)
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
@@ -35,7 +35,7 @@ def diagnose(data):
break
else:
basic_parsers.remove(name)
- print (
+ print(
"I noticed that %s is not installed. Installing it may help." %
name)
@@ -43,52 +43,52 @@ def diagnose(data):
basic_parsers.append("lxml-xml")
try:
from lxml import etree
- print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
+ print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
except ImportError, e:
- print (
+ print(
"lxml is not installed or couldn't be imported.")
if 'html5lib' in basic_parsers:
try:
import html5lib
- print "Found html5lib version %s" % html5lib.__version__
+ print("Found html5lib version %s" % html5lib.__version__)
except ImportError, e:
- print (
+ print(
"html5lib is not installed or couldn't be imported.")
if hasattr(data, 'read'):
data = data.read()
elif data.startswith("http:") or data.startswith("https:"):
- print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
- print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+ print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
+ print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
else:
try:
if os.path.exists(data):
- print '"%s" looks like a filename. Reading data from the file.' % data
+ print('"%s" looks like a filename. Reading data from the file.' % data)
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
- print
+ print("")
for parser in basic_parsers:
- print "Trying to parse your markup with %s" % parser
+ print("Trying to parse your markup with %s" % parser)
success = False
try:
soup = BeautifulSoup(data, features=parser)
success = True
except Exception, e:
- print "%s could not parse the markup." % parser
+ print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
- print "Here's what %s did with the markup:" % parser
- print soup.prettify()
+ print("Here's what %s did with the markup:" % parser)
+ print(soup.prettify())
- print "-" * 80
+ print("-" * 80)
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
@@ -193,9 +193,9 @@ def rdoc(num_elements=1000):
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
- print "Comparative parser benchmark on Beautiful Soup %s" % __version__
+ print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
data = rdoc(num_elements)
- print "Generated a large invalid HTML document (%d bytes)." % len(data)
+ print("Generated a large invalid HTML document (%d bytes)." % len(data))
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
@@ -205,23 +205,23 @@ def benchmark_parsers(num_elements=100000):
b = time.time()
success = True
except Exception, e:
- print "%s could not parse the markup." % parser
+ print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
- print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
+ print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
- print "Raw lxml parsed the markup in %.2fs." % (b-a)
+ print("Raw lxml parsed the markup in %.2fs." % (b-a))
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
- print "Raw html5lib parsed the markup in %.2fs." % (b-a)
+ print("Raw html5lib parsed the markup in %.2fs." % (b-a))
def profile(num_elements=100000, parser="lxml"):
"""Use Python's profiler on a randomly generated document."""
diff --git a/bs4/element.py b/bs4/element.py
index 4947be9..130dfa9 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1432,7 +1432,7 @@ class Tag(PageElement):
def __getattr__(self, tag):
"""Calling tag.subtag is the same as calling tag.find(name="subtag")"""
- #print "Getattr %s.%s" % (self.__class__, tag)
+ #print("Getattr %s.%s" % (self.__class__, tag))
if len(tag) > 3 and tag.endswith('Tag'):
# BS3: soup.aTag -> "soup.find("a")
tag_name = tag[:-3]
@@ -2035,7 +2035,7 @@ class SoupStrainer(object):
:param markup: A PageElement or a list of them.
"""
- # print 'looking for %s in %s' % (self, markup)
+ # print('looking for %s in %s' % (self, markup))
found = None
# If given a list of items, scan it for a text element that
# matches.
@@ -2061,7 +2061,7 @@ class SoupStrainer(object):
return found
def _matches(self, markup, match_against, already_tried=None):
- # print u"Matching %s against %s" % (markup, match_against)
+ # print(u"Matching %s against %s" % (markup, match_against))
result = False
if isinstance(markup, list) or isinstance(markup, tuple):
# This should only happen when searching a multi-valued attribute
diff --git a/setup.py b/setup.py
index 29167dd..e8244ed 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ setup(
# NOTE: We can't import __version__ from bs4 because bs4/__init__.py is Python 2 code,
# and converting it to Python 3 means going through this code to run 2to3.
# So we have to specify it twice for the time being.
- version = '4.9.0',
+ version = '4.9.1',
author="Leonard Richardson",
author_email='leonardr@segfault.org',
url="http://www.crummy.com/software/BeautifulSoup/bs4/",