diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-08-11 09:54:55 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-08-11 09:54:55 -0400 |
commit | b8827dc6e99eea433d6fef9518b332d008f7a905 (patch) | |
tree | 002cbc5a0d2f6ca80a55eb08b6ecb8fcb60bbd8d /bs4/element.py | |
parent | adbc7ecfea5b3e3349cdb4b4eac702d1e2b42e63 (diff) | |
parent | 74fef7e4d1d3154f7a0c3b53c60d760bfeeadbf8 (diff) |
[r=leonardr] Changes to make Beautiful Soup work with Python 3. Code by Thomas Kluyver.
Diffstat (limited to 'bs4/element.py')
-rw-r--r-- | bs4/element.py | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/bs4/element.py b/bs4/element.py index 95661ae..5db5b36 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -1,8 +1,10 @@ import collections import re +import sys from bs4.dammit import EntitySubstitution DEFAULT_OUTPUT_ENCODING = "utf-8" +PY3K = (sys.version_info[0] > 2) def _match_css_class(str): @@ -523,7 +525,7 @@ class Tag(PageElement): self.extract() i = self while i is not None: - next = i.next + next = i.next_element i.__dict__.clear() i = next @@ -599,7 +601,8 @@ class Tag(PageElement): #print "Getattr %s.%s" % (self.__class__, tag) if len(tag) > 3 and tag.endswith('Tag'): return self.find(tag[:-3]) - elif not tag.startswith("__"): + # We special case contents to avoid recursion. + elif not tag.startswith("__") and not tag=="contents": return self.find(tag) raise AttributeError( "'%s' object has no attribute '%s'" % (self.__class__, tag)) @@ -635,6 +638,9 @@ class Tag(PageElement): def __str__(self): return self.encode() + + if PY3K: + __str__ = __repr__ = __unicode__ def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, indent_level=None, substitute_html_entities=False): @@ -872,7 +878,7 @@ class SoupStrainer(object): found = None # If given a list of items, scan it for a text element that # matches. - if hasattr(markup, '__iter__') and not isinstance(markup, Tag): + if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)): for element in markup: if isinstance(element, NavigableString) \ and self.search(element): @@ -912,8 +918,8 @@ class SoupStrainer(object): # It's a regexp object. result = markup and match_against.search(markup) elif (hasattr(match_against, '__iter__') - and (markup is not None - or not isinstance(match_against, basestring))): + and markup is not None + and not isinstance(match_against, basestring)): result = markup in match_against elif hasattr(match_against, 'items'): result = match_against in markup |