summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-08-11 09:54:55 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2011-08-11 09:54:55 -0400
commitb8827dc6e99eea433d6fef9518b332d008f7a905 (patch)
tree002cbc5a0d2f6ca80a55eb08b6ecb8fcb60bbd8d /bs4/element.py
parentadbc7ecfea5b3e3349cdb4b4eac702d1e2b42e63 (diff)
parent74fef7e4d1d3154f7a0c3b53c60d760bfeeadbf8 (diff)
[r=leonardr] Changes to make Beautiful Soup work with Python 3. Code by Thomas Kluyver.
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py16
1 files changed, 11 insertions, 5 deletions
diff --git a/bs4/element.py b/bs4/element.py
index 95661ae..5db5b36 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,8 +1,10 @@
import collections
import re
+import sys
from bs4.dammit import EntitySubstitution
DEFAULT_OUTPUT_ENCODING = "utf-8"
+PY3K = (sys.version_info[0] > 2)
def _match_css_class(str):
@@ -523,7 +525,7 @@ class Tag(PageElement):
self.extract()
i = self
while i is not None:
- next = i.next
+ next = i.next_element
i.__dict__.clear()
i = next
@@ -599,7 +601,8 @@ class Tag(PageElement):
#print "Getattr %s.%s" % (self.__class__, tag)
if len(tag) > 3 and tag.endswith('Tag'):
return self.find(tag[:-3])
- elif not tag.startswith("__"):
+ # We special case contents to avoid recursion.
+ elif not tag.startswith("__") and not tag=="contents":
return self.find(tag)
raise AttributeError(
"'%s' object has no attribute '%s'" % (self.__class__, tag))
@@ -635,6 +638,9 @@ class Tag(PageElement):
def __str__(self):
return self.encode()
+
+ if PY3K:
+ __str__ = __repr__ = __unicode__
def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
indent_level=None, substitute_html_entities=False):
@@ -872,7 +878,7 @@ class SoupStrainer(object):
found = None
# If given a list of items, scan it for a text element that
# matches.
- if hasattr(markup, '__iter__') and not isinstance(markup, Tag):
+ if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
for element in markup:
if isinstance(element, NavigableString) \
and self.search(element):
@@ -912,8 +918,8 @@ class SoupStrainer(object):
# It's a regexp object.
result = markup and match_against.search(markup)
elif (hasattr(match_against, '__iter__')
- and (markup is not None
- or not isinstance(match_against, basestring))):
+ and markup is not None
+ and not isinstance(match_against, basestring)):
result = markup in match_against
elif hasattr(match_against, 'items'):
result = match_against in markup