6 files changed, 84 insertions, 77 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 8baeec4..6406bef 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -16,7 +16,6 @@ For more than you ever wanted to know about Beautiful Soup, see the
 documentation:
 http://www.crummy.com/software/BeautifulSoup/documentation.html
 """
-from __future__ import generators
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
 __version__ = "4.0.0a"
@@ -27,9 +26,9 @@ __all__ = ['BeautifulSoup']
 
 import re
 
-from builder import builder_registry
-from dammit import UnicodeDammit
-from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
+from .builder import builder_registry
+from .dammit import UnicodeDammit
+from .element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
 
 
 class BeautifulSoup(Tag):
@@ -160,10 +159,10 @@ class BeautifulSoup(Tag):
 
     def object_was_parsed(self, o):
         """Add an object to the parse tree."""
-        o.setup(self.currentTag, self.previous)
-        if self.previous:
-            self.previous.next = o
-        self.previous = o
+        o.setup(self.currentTag, self.previous_element)
+        if self.previous_element:
+            self.previous_element.next_element = o
+        self.previous_element = o
         self.currentTag.contents.append(o)
 
 
@@ -207,12 +206,12 @@ class BeautifulSoup(Tag):
             return None
 
         tag = Tag(self, self.builder, name, attrs, self.currentTag,
-                  self.previous)
+                  self.previous_element)
         if tag is None:
             return tag
-        if self.previous:
-            self.previous.next = tag
-        self.previous = tag
+        if self.previous_element:
+            self.previous_element.next_element = tag
+        self.previous_element = tag
         self.pushTag(tag)
         return tag
 
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index afd49b9..e6d4fa1 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -144,7 +144,7 @@ class SAXTreeBuilder(TreeBuilder):
         pass
 
     def startElement(self, name, attrs):
-        attrs = dict((key[1], value) for key, value in attrs.items())
+        attrs = dict((key[1], value) for key, value in list(attrs.items()))
         #print "Start %s, %r" % (name, attrs)
         self.soup.handle_starttag(name, attrs)
 
@@ -247,16 +247,16 @@ def register_treebuilders_from(module):
 # builder registrations will take precedence. In general, we want
 # html5lib to take precedence over lxml, because it's more
 # reliable. And we only want to use HTMLParser as a last result.
-import _htmlparser
+from .import _htmlparser
 register_treebuilders_from(_htmlparser)
 try:
-    import _lxml
+    from . import _lxml
     register_treebuilders_from(_lxml)
 except ImportError:
     # They don't have lxml installed.
     pass
 try:
-    import _html5lib
+    from . import _html5lib
     register_treebuilders_from(_html5lib)
 except ImportError:
     # They don't have html5lib installed.
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index d74c4b0..e9d7f58 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -102,18 +102,18 @@ class AttrList(object):
         self.element = element
         self.attrs = dict(self.element.attrs)
     def __iter__(self):
-        return self.attrs.items().__iter__()
+        return list(self.attrs.items()).__iter__()
     def __setitem__(self, name, value):
         "set attr", name, value
         self.element[name] = value
     def items(self):
-        return self.attrs.items()
+        return list(self.attrs.items())
     def keys(self):
-        return self.attrs.keys()
+        return list(self.attrs.keys())
     def __getitem__(self, name):
         return self.attrs[name]
     def __contains__(self, name):
-        return name in self.attrs.keys()
+        return name in list(self.attrs.keys())
 
 
 class Element(html5lib.treebuilders._base.Node):
@@ -155,7 +155,7 @@ class Element(html5lib.treebuilders._base.Node):
 
     def setAttributes(self, attributes):
         if attributes is not None and attributes != {}:
-            for name, value in attributes.items():
+            for name, value in list(attributes.items()):
                 self.element[name] =  value
             # The attributes may contain variables that need substitution.
             # Call set_up_substitutions manually.
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 57798f6..07b2032 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -3,6 +3,7 @@ __all__ = [
     'LXMLTreeBuilder',
     ]
 
+import collections
 from lxml import etree
 from bs4.element import Comment, Doctype
 from bs4.builder import (
@@ -36,7 +37,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         if parser is None:
             # Use the default parser.
             parser = self.default_parser
-        if callable(parser):
+        if isinstance(parser, collections.Callable):
             # Instantiate the parser with default arguments
             parser = parser(target=self, strip_cdata=False)
         self.parser = parser
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 75d445e..4aafe81 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,7 +9,6 @@ encoding; that's the tree builder's job.
 import codecs
 from htmlentitydefs import codepoint2name
 import re
-import types
 
 # Autodetects character encodings. Very useful.
 # Download from http://chardet.feedparser.org/
@@ -37,7 +36,7 @@ class EntitySubstitution(object):
         lookup = {}
         reverse_lookup = {}
         characters = []
-        for codepoint, name in codepoint2name.items():
+        for codepoint, name in list(codepoint2name.items()):
             if codepoint == 34:
                 # There's no point in turning the quotation mark into
                 # &quot;, unless it happens within an attribute value, which
@@ -175,7 +174,7 @@ class UnicodeDammit:
         self.tried_encodings = []
         if markup == '' or isinstance(markup, unicode):
             self.original_encoding = None
-            self.unicode = unicode(markup)
+            self.unicode_markup = unicode(markup)
             return
 
         u = None
@@ -197,7 +196,7 @@ class UnicodeDammit:
                 if u:
                     break
 
-        self.unicode = u
+        self.unicode_markup = u
         if not u: self.original_encoding = None
 
     def _sub_ms_char(self, match):
@@ -205,7 +204,7 @@ class UnicodeDammit:
         entity."""
         orig = match.group(1)
         sub = self.MS_CHARS.get(orig)
-        if type(sub) == types.TupleType:
+        if type(sub) == tuple:
             if self.smart_quotes_to == 'xml':
                 sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
             else:
@@ -234,7 +233,7 @@ class UnicodeDammit:
             u = self._to_unicode(markup, proposed)
             self.markup = u
             self.original_encoding = proposed
-        except Exception, e:
+        except Exception as e:
             # print "That didn't work!"
             # print e
             return None
@@ -375,7 +374,7 @@ class UnicodeDammit:
                     250,251,252,253,254,255)
             import string
             c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
-            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+            ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
         return s.translate(c.EBCDIC_TO_ASCII_MAP)
 
     MS_CHARS = { '\x80' : ('euro', '20AC'),
diff --git a/bs4/element.py b/bs4/element.py
index 587078c..a9814e1 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,9 +1,5 @@
+import collections
 import re
-import types
-try:
-    from htmlentitydefs import name2codepoint
-except ImportError:
-    name2codepoint = {}
 from bs4.dammit import EntitySubstitution
 
 DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -13,12 +9,12 @@ class PageElement(object):
     """Contains the navigational information for some part of the page
     (either a tag or a piece of text)"""
 
-    def setup(self, parent=None, previous=None):
+    def setup(self, parent=None, previous_element=None):
         """Sets up the initial relations between this element and
         other elements."""
         self.parent = parent
-        self.previous = previous
-        self.next = None
+        self.previous_element = previous_element
+        self.next_element = None
         self.previousSibling = None
         self.nextSibling = None
         if self.parent and self.parent.contents:
@@ -52,14 +48,14 @@ class PageElement(object):
         #this element (and any children) hadn't been parsed. Connect
         #the two.
         lastChild = self._last_recursive_child()
-        nextElement = lastChild.next
+        nextElement = lastChild.next_element
 
-        if self.previous:
-            self.previous.next = nextElement
+        if self.previous_element:
+            self.previous_element.next_element = nextElement
         if nextElement:
-            nextElement.previous = self.previous
-        self.previous = None
-        lastChild.next = None
+            nextElement.previous_element = self.previous_element
+        self.previous_element = None
+        lastChild.next_element = None
 
         self.parent = None
         if self.previousSibling:
@@ -78,8 +74,7 @@ class PageElement(object):
 
     def insert(self, position, newChild):
         if (isinstance(newChild, basestring)
-            or isinstance(newChild, unicode)) \
-            and not isinstance(newChild, NavigableString):
+            and not isinstance(newChild, NavigableString)):
             newChild = NavigableString(newChild)
 
         position =  min(position, len(self.contents))
@@ -100,14 +95,14 @@ class PageElement(object):
         previousChild = None
         if position == 0:
             newChild.previousSibling = None
-            newChild.previous = self
+            newChild.previous_element = self
         else:
             previousChild = self.contents[position-1]
             newChild.previousSibling = previousChild
             newChild.previousSibling.nextSibling = newChild
-            newChild.previous = previousChild._last_recursive_child()
-        if newChild.previous:
-            newChild.previous.next = newChild
+            newChild.previous_element = previousChild._last_recursive_child()
+        if newChild.previous_element:
+            newChild.previous_element.next_element = newChild
 
         newChildsLastElement = newChild._last_recursive_child()
 
@@ -122,18 +117,18 @@ class PageElement(object):
                 if not parent: # This is the last element in the document.
                     break
             if parentsNextSibling:
-                newChildsLastElement.next = parentsNextSibling
+                newChildsLastElement.next_element = parentsNextSibling
             else:
-                newChildsLastElement.next = None
+                newChildsLastElement.next_element = None
         else:
             nextChild = self.contents[position]
             newChild.nextSibling = nextChild
             if newChild.nextSibling:
                 newChild.nextSibling.previousSibling = newChild
-            newChildsLastElement.next = nextChild
+            newChildsLastElement.next_element = nextChild
 
-        if newChildsLastElement.next:
-            newChildsLastElement.next.previous = newChildsLastElement
+        if newChildsLastElement.next_element:
+            newChildsLastElement.next_element.previous_element = newChildsLastElement
         self.contents.insert(position, newChild)
 
     def append(self, tag):
@@ -223,6 +218,14 @@ class PageElement(object):
     findParents = find_parents  # BS3
     fetchParents = find_parents # BS2
 
+    @property
+    def next(self):
+        return self.next_element
+
+    @property
+    def previous(self):
+        return self.previous_element
+
     #These methods do the real heavy lifting.
 
     def _find_one(self, method, name, attrs, text, **kwargs):
@@ -243,7 +246,7 @@ class PageElement(object):
         results = ResultSet(strainer)
         while True:
             try:
-                i = generator.next()
+                i = next(generator)
             except StopIteration:
                 break
             if i:
@@ -260,7 +263,7 @@ class PageElement(object):
     def next_elements(self):
         i = self
         while i:
-            i = i.next
+            i = i.next_element
             yield i
 
     @property
@@ -274,7 +277,7 @@ class PageElement(object):
     def previous_elements(self):
         i = self
         while i:
-            i = i.previous
+            i = i.previous_element
             yield i
 
     @property
@@ -341,7 +344,9 @@ class NavigableString(unicode, PageElement):
         if attr == 'string':
             return self
         else:
-            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+            raise AttributeError(
+                "'%s' object has no attribute '%s'" % (
+                    self.__class__.__name__, attr))
 
     def output_ready(self, substitute_html_entities=False):
         if substitute_html_entities:
@@ -458,8 +463,8 @@ class Tag(PageElement):
         attribute."""
         return self.attrs.get(key, default)
 
-    def has_key(self, key):
-        return self.attrs.has_key(key)
+    def has_attr(self, key):
+        return key in self.attrs
 
     def __getitem__(self, key):
         """tag[key] returns the value of the 'key' attribute for the tag,
@@ -488,14 +493,14 @@ class Tag(PageElement):
 
     def __delitem__(self, key):
         "Deleting tag[key] deletes all 'key' attributes for the tag."
-        if self.attrs.has_key(key):
+        if key in self.attrs:
             del self.attrs[key]
 
     def __call__(self, *args, **kwargs):
         """Calling a tag like a function is the same as calling its
         find_all() method. Eg. tag('a') returns a list of all the A tags
         found within this tag."""
-        return apply(self.find_all, args, kwargs)
+        return self.find_all(args, kwargs)
 
     def __getattr__(self, tag):
         #print "Getattr %s.%s" % (self.__class__, tag)
@@ -503,7 +508,8 @@ class Tag(PageElement):
             return self.find(tag[:-3])
         elif tag.find('__') != 0:
             return self.find(tag)
-        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+        raise AttributeError(
+            "'%s' object has no attribute '%s'" % (self.__class__, tag))
 
     def __eq__(self, other):
         """Returns true iff this tag has the same name, the same attributes,
@@ -688,11 +694,11 @@ class Tag(PageElement):
     def recursive_children(self):
         if not len(self.contents):
             raise StopIteration # XXX return instead?
-        stopNode = self._last_recursive_child().next
+        stopNode = self._last_recursive_child().next_element
         current = self.contents[0]
         while current is not stopNode:
             yield current
-            current = current.next
+            current = current.next_element
 
     # Old names for backwards compatibility
     def childGenerator(self):
@@ -701,6 +707,10 @@ class Tag(PageElement):
     def recursiveChildGenerator(self):
         return self.recursive_children
 
+    # This was kind of misleading because has_key() (attributes) was
+    # different from __in__ (contents). has_key() is gone in Python 3,
+    # anyway.
+    has_key = has_attr
 
 # Next, a couple classes to represent queries and their results.
 class SoupStrainer(object):
@@ -733,8 +743,9 @@ class SoupStrainer(object):
         if isinstance(markupName, Tag):
             markup = markupName
             markupAttrs = markup
-        callFunctionWithTagData = callable(self.name) \
-                                and not isinstance(markupName, Tag)
+        callFunctionWithTagData = (
+            isinstance(self.name, collections.Callable)
+            and not isinstance(markupName, Tag))
 
         if (not self.name) \
                or callFunctionWithTagData \
@@ -745,7 +756,7 @@ class SoupStrainer(object):
             else:
                 match = True
                 markupAttrMap = None
-                for attr, matchAgainst in self.attrs.items():
+                for attr, matchAgainst in list(self.attrs.items()):
                     if not markupAttrMap:
                          if hasattr(markupAttrs, 'get'):
                             markupAttrMap = markupAttrs
@@ -786,16 +797,16 @@ class SoupStrainer(object):
             if self._matches(markup, self.text):
                 found = markup
         else:
-            raise Exception, "I don't know how to match against a %s" \
-                  % markup.__class__
+            raise Exception(
+                "I don't know how to match against a %s" % markup.__class__)
         return found
 
     def _matches(self, markup, matchAgainst):
         #print "Matching %s against %s" % (markup, matchAgainst)
         result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+        if matchAgainst == True and isinstance(matchAgainst, bool):
             result = markup != None
-        elif callable(matchAgainst):
+        elif isinstance(matchAgainst, collections.Callable):
             result = matchAgainst(markup)
         else:
             #Custom match methods take the tag as an argument, but all
@@ -813,12 +824,9 @@ class SoupStrainer(object):
                        or not isinstance(matchAgainst, basestring))):
                 result = markup in matchAgainst
             elif hasattr(matchAgainst, 'items'):
-                result = markup.has_key(matchAgainst)
+                result = matchAgainst in markup
             elif matchAgainst and isinstance(markup, basestring):
-                if isinstance(markup, unicode):
-                    matchAgainst = unicode(matchAgainst)
-                else:
-                    matchAgainst = str(matchAgainst)
+                matchAgainst = markup.__class__(matchAgainst)
 
             if not result:
                 result = matchAgainst == markup