From 927e5de747ed2d87b55eb6e8dad46d8598f27e09 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 11:23:05 -0400
Subject: Changed dammit.py to require fewer changes to be Python 3 compatible.

---
 CHANGELOG          |  1 +
 bs4/dammit.py      | 13 ++++++-------
 tests/test_soup.py | 12 ++++++------
 3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
index 00d80da..cd01b3b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -38,6 +38,7 @@ work. Here are the renames:
 Some attributes have also been renamed:
 
  * Tag.isSelfClosing -> Tag.is_empty_element
+ * UnicodeDammit.unicode -> UnicodeDammit.unicode_markup
 
 So have some arguments to popular methods:
 
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 75d445e..4aafe81 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,7 +9,6 @@ encoding; that's the tree builder's job.
 import codecs
 from htmlentitydefs import codepoint2name
 import re
-import types
 
 # Autodetects character encodings. Very useful.
 # Download from http://chardet.feedparser.org/
@@ -37,7 +36,7 @@ class EntitySubstitution(object):
         lookup = {}
         reverse_lookup = {}
         characters = []
-        for codepoint, name in codepoint2name.items():
+        for codepoint, name in list(codepoint2name.items()):
             if codepoint == 34:
                 # There's no point in turning the quotation mark into
                 # &quot;, unless it happens within an attribute value, which
@@ -175,7 +174,7 @@ class UnicodeDammit:
         self.tried_encodings = []
         if markup == '' or isinstance(markup, unicode):
             self.original_encoding = None
-            self.unicode = unicode(markup)
+            self.unicode_markup = unicode(markup)
             return
 
         u = None
@@ -197,7 +196,7 @@ class UnicodeDammit:
                 if u:
                     break
 
-        self.unicode = u
+        self.unicode_markup = u
         if not u: self.original_encoding = None
 
     def _sub_ms_char(self, match):
@@ -205,7 +204,7 @@ class UnicodeDammit:
         entity."""
         orig = match.group(1)
         sub = self.MS_CHARS.get(orig)
-        if type(sub) == types.TupleType:
+        if type(sub) == tuple:
             if self.smart_quotes_to == 'xml':
                 sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
             else:
@@ -234,7 +233,7 @@ class UnicodeDammit:
             u = self._to_unicode(markup, proposed)
             self.markup = u
             self.original_encoding = proposed
-        except Exception, e:
+        except Exception as e:
             # print "That didn't work!"
             # print e
             return None
@@ -375,7 +374,7 @@ class UnicodeDammit:
                     250,251,252,253,254,255)
             import string
             c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
-            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+            ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
         return s.translate(c.EBCDIC_TO_ASCII_MAP)
 
     MS_CHARS = { '\x80' : ('euro', '20AC'),
diff --git a/tests/test_soup.py b/tests/test_soup.py
index d283b8a..87d6f3b 100644
--- a/tests/test_soup.py
+++ b/tests/test_soup.py
@@ -86,37 +86,37 @@ class TestUnicodeDammit(unittest.TestCase):
         markup = "<foo>\x91\x92\x93\x94</foo>"
         dammit = UnicodeDammit(markup)
         self.assertEquals(
-            dammit.unicode, u"<foo>\u2018\u2019\u201c\u201d</foo>")
+            dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
 
     def test_smart_quotes_to_xml_entities(self):
         markup = "<foo>\x91\x92\x93\x94</foo>"
         dammit = UnicodeDammit(markup, smart_quotes_to="xml")
         self.assertEquals(
-            dammit.unicode, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
+            dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
 
     def test_smart_quotes_to_html_entities(self):
         markup = "<foo>\x91\x92\x93\x94</foo>"
         dammit = UnicodeDammit(markup, smart_quotes_to="html")
         self.assertEquals(
-            dammit.unicode, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
+            dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
 
     def test_detect_utf8(self):
         utf8 = "\xc3\xa9"
         dammit = UnicodeDammit(utf8)
-        self.assertEquals(dammit.unicode, u'\xe9')
+        self.assertEquals(dammit.unicode_markup, u'\xe9')
         self.assertEquals(dammit.original_encoding, 'utf-8')
 
     def test_convert_hebrew(self):
         hebrew = "\xed\xe5\xec\xf9"
         dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
         self.assertEquals(dammit.original_encoding, 'iso-8859-8')
-        self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
+        self.assertEquals(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
 
     def test_dont_see_smart_quotes_where_there_are_none(self):
         utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
         dammit = UnicodeDammit(utf_8)
         self.assertEquals(dammit.original_encoding, 'utf-8')
-        self.assertEquals(dammit.unicode.encode("utf-8"), utf_8)
+        self.assertEquals(dammit.unicode_markup.encode("utf-8"), utf_8)
 
     def test_ignore_inappropriate_codecs(self):
         utf8_data = u"Räksmörgås".encode("utf-8")
-- 
cgit v1.2.3


From 5045b8cfcdd7556a1e2c4a2999d4ed1108b0425a Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 11:26:28 -0400
Subject: Reduced the difference between Python 2's __init__.py and Python 3's
 __init__.py.

---
 bs4/__init__.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/bs4/__init__.py b/bs4/__init__.py
index 8baeec4..22ecc43 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -16,7 +16,6 @@ For more than you ever wanted to know about Beautiful Soup, see the
 documentation:
 http://www.crummy.com/software/BeautifulSoup/documentation.html
 """
-from __future__ import generators
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
 __version__ = "4.0.0a"
@@ -27,9 +26,9 @@ __all__ = ['BeautifulSoup']
 
 import re
 
-from builder import builder_registry
-from dammit import UnicodeDammit
-from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
+from .builder import builder_registry
+from .dammit import UnicodeDammit
+from .element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
 
 
 class BeautifulSoup(Tag):
-- 
cgit v1.2.3


From bc568d5a817c9104d984215e036dad87506f6bfc Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 11:57:46 -0400
Subject: Renamed .next and .previous to .next_element and .previous_element.

---
 CHANGELOG          |  2 ++
 bs4/__init__.py    | 16 +++++++-------
 bs4/element.py     | 64 +++++++++++++++++++++++++++++-------------------------
 tests/test_tree.py | 62 ++++++++++++++++++++++++++--------------------------
 4 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index cd01b3b..a636544 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -39,6 +39,8 @@ Some attributes have also been renamed:
 
  * Tag.isSelfClosing -> Tag.is_empty_element
  * UnicodeDammit.unicode -> UnicodeDammit.unicode_markup
+ * Tag.next -> Tag.next_element
+ * Tag.previous -> Tag.previous_element
 
 So have some arguments to popular methods:
 
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 22ecc43..6406bef 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -159,10 +159,10 @@ class BeautifulSoup(Tag):
 
     def object_was_parsed(self, o):
         """Add an object to the parse tree."""
-        o.setup(self.currentTag, self.previous)
-        if self.previous:
-            self.previous.next = o
-        self.previous = o
+        o.setup(self.currentTag, self.previous_element)
+        if self.previous_element:
+            self.previous_element.next_element = o
+        self.previous_element = o
         self.currentTag.contents.append(o)
 
 
@@ -206,12 +206,12 @@ class BeautifulSoup(Tag):
             return None
 
         tag = Tag(self, self.builder, name, attrs, self.currentTag,
-                  self.previous)
+                  self.previous_element)
         if tag is None:
             return tag
-        if self.previous:
-            self.previous.next = tag
-        self.previous = tag
+        if self.previous_element:
+            self.previous_element.next_element = tag
+        self.previous_element = tag
         self.pushTag(tag)
         return tag
 
diff --git a/bs4/element.py b/bs4/element.py
index 587078c..729ab36 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,9 +1,6 @@
+import collections
 import re
 import types
-try:
-    from htmlentitydefs import name2codepoint
-except ImportError:
-    name2codepoint = {}
 from bs4.dammit import EntitySubstitution
 
 DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -13,12 +10,12 @@ class PageElement(object):
     """Contains the navigational information for some part of the page
     (either a tag or a piece of text)"""
 
-    def setup(self, parent=None, previous=None):
+    def setup(self, parent=None, previous_element=None):
         """Sets up the initial relations between this element and
         other elements."""
         self.parent = parent
-        self.previous = previous
-        self.next = None
+        self.previous_element = previous_element
+        self.next_element = None
         self.previousSibling = None
         self.nextSibling = None
         if self.parent and self.parent.contents:
@@ -52,14 +49,14 @@ class PageElement(object):
         #this element (and any children) hadn't been parsed. Connect
         #the two.
         lastChild = self._last_recursive_child()
-        nextElement = lastChild.next
+        nextElement = lastChild.next_element
 
-        if self.previous:
-            self.previous.next = nextElement
+        if self.previous_element:
+            self.previous_element.next_element = nextElement
         if nextElement:
-            nextElement.previous = self.previous
-        self.previous = None
-        lastChild.next = None
+            nextElement.previous_element = self.previous_element
+        self.previous_element = None
+        lastChild.next_element = None
 
         self.parent = None
         if self.previousSibling:
@@ -100,14 +97,14 @@ class PageElement(object):
         previousChild = None
         if position == 0:
             newChild.previousSibling = None
-            newChild.previous = self
+            newChild.previous_element = self
         else:
             previousChild = self.contents[position-1]
             newChild.previousSibling = previousChild
             newChild.previousSibling.nextSibling = newChild
-            newChild.previous = previousChild._last_recursive_child()
-        if newChild.previous:
-            newChild.previous.next = newChild
+            newChild.previous_element = previousChild._last_recursive_child()
+        if newChild.previous_element:
+            newChild.previous_element.next_element = newChild
 
         newChildsLastElement = newChild._last_recursive_child()
 
@@ -122,18 +119,18 @@ class PageElement(object):
                 if not parent: # This is the last element in the document.
                     break
             if parentsNextSibling:
-                newChildsLastElement.next = parentsNextSibling
+                newChildsLastElement.next_element = parentsNextSibling
             else:
-                newChildsLastElement.next = None
+                newChildsLastElement.next_element = None
         else:
             nextChild = self.contents[position]
             newChild.nextSibling = nextChild
             if newChild.nextSibling:
                 newChild.nextSibling.previousSibling = newChild
-            newChildsLastElement.next = nextChild
+            newChildsLastElement.next_element = nextChild
 
-        if newChildsLastElement.next:
-            newChildsLastElement.next.previous = newChildsLastElement
+        if newChildsLastElement.next_element:
+            newChildsLastElement.next_element.previous_element = newChildsLastElement
         self.contents.insert(position, newChild)
 
     def append(self, tag):
@@ -223,6 +220,14 @@ class PageElement(object):
     findParents = find_parents  # BS3
     fetchParents = find_parents # BS2
 
+    @property
+    def next(self):
+        return self.next_element
+
+    @property
+    def previous(self):
+        return self.previous_element
+
     #These methods do the real heavy lifting.
 
     def _find_one(self, method, name, attrs, text, **kwargs):
@@ -260,7 +265,7 @@ class PageElement(object):
     def next_elements(self):
         i = self
         while i:
-            i = i.next
+            i = i.next_element
             yield i
 
     @property
@@ -274,7 +279,7 @@ class PageElement(object):
     def previous_elements(self):
         i = self
         while i:
-            i = i.previous
+            i = i.previous_element
             yield i
 
     @property
@@ -688,11 +693,11 @@ class Tag(PageElement):
     def recursive_children(self):
         if not len(self.contents):
             raise StopIteration # XXX return instead?
-        stopNode = self._last_recursive_child().next
+        stopNode = self._last_recursive_child().next_element
         current = self.contents[0]
         while current is not stopNode:
             yield current
-            current = current.next
+            current = current.next_element
 
     # Old names for backwards compatibility
     def childGenerator(self):
@@ -733,8 +738,9 @@ class SoupStrainer(object):
         if isinstance(markupName, Tag):
             markup = markupName
             markupAttrs = markup
-        callFunctionWithTagData = callable(self.name) \
-                                and not isinstance(markupName, Tag)
+        callFunctionWithTagData = (
+            isinstance(self.name, collections.Callable)
+            and not isinstance(markupName, Tag))
 
         if (not self.name) \
                or callFunctionWithTagData \
@@ -795,7 +801,7 @@ class SoupStrainer(object):
         result = False
         if matchAgainst == True and type(matchAgainst) == types.BooleanType:
             result = markup != None
-        elif callable(matchAgainst):
+        elif isinstance(matchAgainst, collections.Callable):
             result = matchAgainst(markup)
         else:
             #Custom match methods take the tag as an argument, but all
diff --git a/tests/test_tree.py b/tests/test_tree.py
index f2989fe..87a7e3a 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -307,16 +307,16 @@ class TestNextOperations(ProximityTest):
         self.start = self.tree.b
 
     def test_next(self):
-        self.assertEquals(self.start.next, "One")
-        self.assertEquals(self.start.next.next['id'], "2")
+        self.assertEquals(self.start.next_element, "One")
+        self.assertEquals(self.start.next_element.next_element['id'], "2")
 
     def test_next_of_last_item_is_none(self):
         last = self.tree.find(text="Three")
-        self.assertEquals(last.next, None)
+        self.assertEquals(last.next_element, None)
 
     def test_next_of_root_is_none(self):
         # The document root is outside the next/previous chain.
-        self.assertEquals(self.tree.next, None)
+        self.assertEquals(self.tree.next_element, None)
 
     def test_find_all_next(self):
         self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
@@ -352,17 +352,17 @@ class TestPreviousOperations(ProximityTest):
         self.end = self.tree.find(text="Three")
 
     def test_previous(self):
-        self.assertEquals(self.end.previous['id'], "3")
-        self.assertEquals(self.end.previous.previous, "Two")
+        self.assertEquals(self.end.previous_element['id'], "3")
+        self.assertEquals(self.end.previous_element.previous_element, "Two")
 
     def test_previous_of_first_item_is_none(self):
         first = self.tree.find('html')
-        self.assertEquals(first.previous, None)
+        self.assertEquals(first.previous_element, None)
 
     def test_previous_of_root_is_none(self):
         # The document root is outside the next/previous chain.
         # XXX This is broken!
-        #self.assertEquals(self.tree.previous, None)
+        #self.assertEquals(self.tree.previous_element, None)
         pass
 
     def test_find_all_previous(self):
@@ -436,7 +436,7 @@ class TestNextSibling(SiblingTest):
         self.assertEquals(self.start.nextSibling.nextSibling['id'], '3')
 
         # Note the difference between nextSibling and next.
-        self.assertEquals(self.start.next['id'], '1.1')
+        self.assertEquals(self.start.next_element['id'], '1.1')
 
     def test_next_sibling_may_not_exist(self):
         self.assertEquals(self.tree.html.nextSibling, None)
@@ -481,7 +481,7 @@ class TestPreviousSibling(SiblingTest):
         self.assertEquals(self.end.previousSibling.previousSibling['id'], '2')
 
         # Note the difference between previousSibling and previous.
-        self.assertEquals(self.end.previous['id'], '3.1')
+        self.assertEquals(self.end.previous_element['id'], '3.1')
 
     def test_previous_sibling_may_not_exist(self):
         self.assertEquals(self.tree.html.previousSibling, None)
@@ -565,10 +565,10 @@ class TestTreeModification(SoupTest):
         soup.find(text="Argh!").replace_with("Hooray!")
         new_text = soup.find(text="Hooray!")
         b = soup.b
-        self.assertEqual(new_text.previous, b)
+        self.assertEqual(new_text.previous_element, b)
         self.assertEqual(new_text.parent, b)
-        self.assertEqual(new_text.previous.next, new_text)
-        self.assertEqual(new_text.next, None)
+        self.assertEqual(new_text.previous_element.next_element, new_text)
+        self.assertEqual(new_text.next_element, None)
 
     def test_consecutive_text_nodes(self):
         # A builder should never create two consecutive text nodes,
@@ -582,14 +582,14 @@ class TestTreeModification(SoupTest):
                 "<a><b>Argh!Hooray!</b><c></c></a>"))
 
         new_text = soup.find(text="Hooray!")
-        self.assertEqual(new_text.previous, "Argh!")
-        self.assertEqual(new_text.previous.next, new_text)
+        self.assertEqual(new_text.previous_element, "Argh!")
+        self.assertEqual(new_text.previous_element.next_element, new_text)
 
         self.assertEqual(new_text.previousSibling, "Argh!")
         self.assertEqual(new_text.previousSibling.nextSibling, new_text)
 
         self.assertEqual(new_text.nextSibling, None)
-        self.assertEqual(new_text.next, soup.c)
+        self.assertEqual(new_text.next_element, soup.c)
 
 
     def test_insert_tag(self):
@@ -610,8 +610,8 @@ class TestTreeModification(SoupTest):
         self.assertEqual(magic_tag.previousSibling, b_tag)
 
         find = b_tag.find(text="Find")
-        self.assertEqual(find.next, magic_tag)
-        self.assertEqual(magic_tag.previous, find)
+        self.assertEqual(find.next_element, magic_tag)
+        self.assertEqual(magic_tag.previous_element, find)
 
         c_tag = soup.c
         self.assertEqual(magic_tag.nextSibling, c_tag)
@@ -619,8 +619,8 @@ class TestTreeModification(SoupTest):
 
         the = magic_tag.find(text="the")
         self.assertEqual(the.parent, magic_tag)
-        self.assertEqual(the.next, c_tag)
-        self.assertEqual(c_tag.previous, the)
+        self.assertEqual(the.next_element, c_tag)
+        self.assertEqual(c_tag.previous_element, the)
 
     def test_insert_works_on_empty_element_tag(self):
         # This is a little strange, since most HTML parsers don't allow
@@ -643,7 +643,7 @@ class TestTreeModification(SoupTest):
 
         self.assertEquals(show.parent, None)
         self.assertEquals(no.parent, soup.p)
-        self.assertEquals(no.next, "no")
+        self.assertEquals(no.next_element, "no")
         self.assertEquals(no.nextSibling, " business")
 
     def test_nested_tag_replace_with(self):
@@ -662,24 +662,24 @@ class TestTreeModification(SoupTest):
 
         # The <b> tag is now an orphan.
         self.assertEqual(remove_tag.parent, None)
-        self.assertEqual(remove_tag.find(text="right").next, None)
-        self.assertEqual(remove_tag.previous, None)
+        self.assertEqual(remove_tag.find(text="right").next_element, None)
+        self.assertEqual(remove_tag.previous_element, None)
         self.assertEqual(remove_tag.nextSibling, None)
         self.assertEqual(remove_tag.previousSibling, None)
 
         # The <f> tag is now connected to the <a> tag.
         self.assertEqual(move_tag.parent, soup.a)
-        self.assertEqual(move_tag.previous, "We")
-        self.assertEqual(move_tag.next.next, soup.e)
+        self.assertEqual(move_tag.previous_element, "We")
+        self.assertEqual(move_tag.next_element.next_element, soup.e)
         self.assertEqual(move_tag.nextSibling, None)
 
         # The gap where the <f> tag used to be has been mended, and
         # the word "to" is now connected to the <g> tag.
         to_text = soup.find(text="to")
         g_tag = soup.g
-        self.assertEqual(to_text.next, g_tag)
+        self.assertEqual(to_text.next_element, g_tag)
         self.assertEqual(to_text.nextSibling, g_tag)
-        self.assertEqual(g_tag.previous, to_text)
+        self.assertEqual(g_tag.previous_element, to_text)
         self.assertEqual(g_tag.previousSibling, to_text)
 
     def test_extract(self):
@@ -696,15 +696,15 @@ class TestTreeModification(SoupTest):
         # The extracted tag is now an orphan.
         self.assertEqual(len(soup.body.contents), 2)
         self.assertEqual(extracted.parent, None)
-        self.assertEqual(extracted.previous, None)
-        self.assertEqual(extracted.next.next, None)
+        self.assertEqual(extracted.previous_element, None)
+        self.assertEqual(extracted.next_element.next_element, None)
 
         # The gap where the extracted tag used to be has been mended.
         content_1 = soup.find(text="Some content. ")
         content_2 = soup.find(text=" More content.")
-        self.assertEquals(content_1.next, content_2)
+        self.assertEquals(content_1.next_element, content_2)
         self.assertEquals(content_1.nextSibling, content_2)
-        self.assertEquals(content_2.previous, content_1)
+        self.assertEquals(content_2.previous_element, content_1)
         self.assertEquals(content_2.previousSibling, content_1)
 
 
-- 
cgit v1.2.3


From 7103a5f5ebcf655f9f8288eb54663b2485e197a9 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:00:16 -0400
Subject: More Python 3 compatibility.

---
 bs4/element.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/bs4/element.py b/bs4/element.py
index 729ab36..c7dbd6b 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -75,8 +75,7 @@ class PageElement(object):
 
     def insert(self, position, newChild):
         if (isinstance(newChild, basestring)
-            or isinstance(newChild, unicode)) \
-            and not isinstance(newChild, NavigableString):
+            and not isinstance(newChild, NavigableString)):
             newChild = NavigableString(newChild)
 
         position =  min(position, len(self.contents))
@@ -248,7 +247,7 @@ class PageElement(object):
         results = ResultSet(strainer)
         while True:
             try:
-                i = generator.next()
+                i = next(generator)
             except StopIteration:
                 break
             if i:
@@ -346,7 +345,9 @@ class NavigableString(unicode, PageElement):
         if attr == 'string':
             return self
         else:
-            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+            raise AttributeError(
+                "'%s' object has no attribute '%s'" % (
+                    self.__class__.__name__, attr))
 
     def output_ready(self, substitute_html_entities=False):
         if substitute_html_entities:
@@ -464,7 +465,7 @@ class Tag(PageElement):
         return self.attrs.get(key, default)
 
     def has_key(self, key):
-        return self.attrs.has_key(key)
+        return key in self.attrs
 
     def __getitem__(self, key):
         """tag[key] returns the value of the 'key' attribute for the tag,
@@ -493,7 +494,7 @@ class Tag(PageElement):
 
     def __delitem__(self, key):
         "Deleting tag[key] deletes all 'key' attributes for the tag."
-        if self.attrs.has_key(key):
+        if key in self.attrs:
             del self.attrs[key]
 
     def __call__(self, *args, **kwargs):
-- 
cgit v1.2.3


From f2f5df1563c3861a1f28bcfc0532d2e54de50cab Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:01:25 -0400
Subject: More Python 3 compatibility.

---
 bs4/element.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bs4/element.py b/bs4/element.py
index c7dbd6b..a10e615 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -501,7 +501,7 @@ class Tag(PageElement):
         """Calling a tag like a function is the same as calling its
         find_all() method. Eg. tag('a') returns a list of all the A tags
         found within this tag."""
-        return apply(self.find_all, args, kwargs)
+        return self.find_all(args, kwargs)
 
     def __getattr__(self, tag):
         #print "Getattr %s.%s" % (self.__class__, tag)
@@ -509,7 +509,8 @@ class Tag(PageElement):
             return self.find(tag[:-3])
         elif tag.find('__') != 0:
             return self.find(tag)
-        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+        raise AttributeError(
+            "'%s' object has no attribute '%s'" % (self.__class__, tag))
 
     def __eq__(self, other):
         """Returns true iff this tag has the same name, the same attributes,
-- 
cgit v1.2.3


From 60b789089df25026b48d0a63b054bfa1e347aac9 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:04:42 -0400
Subject: More Python 3 compatibility.

---
 bs4/element.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/bs4/element.py b/bs4/element.py
index a10e615..fb768d1 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,6 +1,5 @@
 import collections
 import re
-import types
 from bs4.dammit import EntitySubstitution
 
 DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -753,7 +752,7 @@ class SoupStrainer(object):
             else:
                 match = True
                 markupAttrMap = None
-                for attr, matchAgainst in self.attrs.items():
+                for attr, matchAgainst in list(self.attrs.items()):
                     if not markupAttrMap:
                          if hasattr(markupAttrs, 'get'):
                             markupAttrMap = markupAttrs
@@ -794,14 +793,14 @@ class SoupStrainer(object):
             if self._matches(markup, self.text):
                 found = markup
         else:
-            raise Exception, "I don't know how to match against a %s" \
-                  % markup.__class__
+            raise Exception(
+                "I don't know how to match against a %s" % markup.__class__)
         return found
 
     def _matches(self, markup, matchAgainst):
         #print "Matching %s against %s" % (markup, matchAgainst)
         result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+        if matchAgainst == True and isinstance(matchAgainst, bool):
             result = markup != None
         elif isinstance(matchAgainst, collections.Callable):
             result = matchAgainst(markup)
@@ -821,7 +820,7 @@ class SoupStrainer(object):
                        or not isinstance(matchAgainst, basestring))):
                 result = markup in matchAgainst
             elif hasattr(matchAgainst, 'items'):
-                result = markup.has_key(matchAgainst)
+                result = matchAgainst in markup
             elif matchAgainst and isinstance(markup, basestring):
                 if isinstance(markup, unicode):
                     matchAgainst = unicode(matchAgainst)
-- 
cgit v1.2.3


From 581e6d941036081ad7d09d51b3469eb8de891e09 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:06:15 -0400
Subject: More Python 3 compatibility.

---
 bs4/element.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/bs4/element.py b/bs4/element.py
index fb768d1..10377e9 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -822,10 +822,7 @@ class SoupStrainer(object):
             elif hasattr(matchAgainst, 'items'):
                 result = matchAgainst in markup
             elif matchAgainst and isinstance(markup, basestring):
-                if isinstance(markup, unicode):
-                    matchAgainst = unicode(matchAgainst)
-                else:
-                    matchAgainst = str(matchAgainst)
+                matchAgainst = markup.__class__(matchAgainst)
 
             if not result:
                 result = matchAgainst == markup
-- 
cgit v1.2.3


From fc53a81aa12a520af7144f17796c5d74c5aaff0a Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:08:01 -0400
Subject: More Python 3 compatibility.

---
 bs4/builder/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index afd49b9..222eb5b 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -144,7 +144,7 @@ class SAXTreeBuilder(TreeBuilder):
         pass
 
     def startElement(self, name, attrs):
-        attrs = dict((key[1], value) for key, value in attrs.items())
+        attrs = dict((key[1], value) for key, value in list(attrs.items()))
         #print "Start %s, %r" % (name, attrs)
         self.soup.handle_starttag(name, attrs)
 
@@ -247,16 +247,16 @@ def register_treebuilders_from(module):
 # builder registrations will take precedence. In general, we want
 # html5lib to take precedence over lxml, because it's more
 # reliable. And we only want to use HTMLParser as a last result.
-import _htmlparser
+from . import _htmlparser
 register_treebuilders_from(_htmlparser)
 try:
-    import _lxml
+    from . import _lxml
     register_treebuilders_from(_lxml)
 except ImportError:
     # They don't have lxml installed.
     pass
 try:
-    import _html5lib
+    from . import _html5lib
     register_treebuilders_from(_html5lib)
 except ImportError:
     # They don't have html5lib installed.
-- 
cgit v1.2.3


From 60630ce1ccd988bd449394c68d6eb817832c6e54 Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:13:26 -0400
Subject: More Python 3 compatibility.

---
 bs4/builder/__init__.py  |  2 +-
 bs4/builder/_html5lib.py | 10 +++++-----
 bs4/builder/_lxml.py     |  3 ++-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 222eb5b..e6d4fa1 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -247,7 +247,7 @@ def register_treebuilders_from(module):
 # builder registrations will take precedence. In general, we want
 # html5lib to take precedence over lxml, because it's more
 # reliable. And we only want to use HTMLParser as a last result.
-from . import _htmlparser
+from .import _htmlparser
 register_treebuilders_from(_htmlparser)
 try:
     from . import _lxml
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index d74c4b0..e9d7f58 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -102,18 +102,18 @@ class AttrList(object):
         self.element = element
         self.attrs = dict(self.element.attrs)
     def __iter__(self):
-        return self.attrs.items().__iter__()
+        return list(self.attrs.items()).__iter__()
     def __setitem__(self, name, value):
         "set attr", name, value
         self.element[name] = value
     def items(self):
-        return self.attrs.items()
+        return list(self.attrs.items())
     def keys(self):
-        return self.attrs.keys()
+        return list(self.attrs.keys())
     def __getitem__(self, name):
         return self.attrs[name]
     def __contains__(self, name):
-        return name in self.attrs.keys()
+        return name in list(self.attrs.keys())
 
 
 class Element(html5lib.treebuilders._base.Node):
@@ -155,7 +155,7 @@ class Element(html5lib.treebuilders._base.Node):
 
     def setAttributes(self, attributes):
         if attributes is not None and attributes != {}:
-            for name, value in attributes.items():
+            for name, value in list(attributes.items()):
                 self.element[name] =  value
             # The attributes may contain variables that need substitution.
             # Call set_up_substitutions manually.
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 57798f6..07b2032 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -3,6 +3,7 @@ __all__ = [
     'LXMLTreeBuilder',
     ]
 
+import collections
 from lxml import etree
 from bs4.element import Comment, Doctype
 from bs4.builder import (
@@ -36,7 +37,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         if parser is None:
             # Use the default parser.
             parser = self.default_parser
-        if callable(parser):
+        if isinstance(parser, collections.Callable):
             # Instantiate the parser with default arguments
             parser = parser(target=self, strip_cdata=False)
         self.parser = parser
-- 
cgit v1.2.3


From 728ff36cd480a02c562976c5279e5a41240c1bfb Mon Sep 17 00:00:00 2001
From: Leonard Richardson <leonard.richardson@canonical.com>
Date: Sat, 21 May 2011 12:26:35 -0400
Subject: Deprecated has_key in favor of has_attr.

---
 CHANGELOG          |  7 +++++++
 bs4/element.py     |  6 +++++-
 tests/test_tree.py | 18 ++++++++++++------
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index a636544..4d1d075 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -35,6 +35,13 @@ work. Here are the renames:
  * findPreviousSibling -> find_previous_sibling
  * findPreviousSiblings -> find_previous_siblings
 
+Methods have been renamed for compatibility with Python 3.
+
+ * Tag.has_key() -> Tag.has_attr()
+
+   (This was misleading, anyway, because has_key() looked at
+   a tag's attributes and __in__ looked at a tag's contents.)
+
 Some attributes have also been renamed:
 
  * Tag.isSelfClosing -> Tag.is_empty_element
diff --git a/bs4/element.py b/bs4/element.py
index 10377e9..a9814e1 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -463,7 +463,7 @@ class Tag(PageElement):
         attribute."""
         return self.attrs.get(key, default)
 
-    def has_key(self, key):
+    def has_attr(self, key):
         return key in self.attrs
 
     def __getitem__(self, key):
@@ -707,6 +707,10 @@ class Tag(PageElement):
     def recursiveChildGenerator(self):
         return self.recursive_children
 
+    # This was kind of misleading because has_key() (attributes) was
+    # different from __in__ (contents). has_key() is gone in Python 3,
+    # anyway.
+    has_key = has_attr
 
 # Next, a couple classes to represent queries and their results.
 class SoupStrainer(object):
diff --git a/tests/test_tree.py b/tests/test_tree.py
index 87a7e3a..cf14d0c 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -10,7 +10,7 @@ methods tested here.
 """
 
 import copy
-import cPickle as pickle
+import pickle
 import re
 from bs4 import BeautifulSoup
 from bs4.builder import builder_registry
@@ -288,7 +288,7 @@ class TestParentOperations(TreeTest):
 
     def test_parent_generator(self):
         parents = [parent['id'] for parent in self.start.parents
-                   if parent is not None and parent.has_key('id')]
+                   if parent is not None and 'id' in parent.attrs]
         self.assertEquals(parents, ['bottom', 'middle', 'top'])
 
 
@@ -735,11 +735,17 @@ class TestElementObjects(SoupTest):
         self.assertEqual(soup.a, None)
         self.assertEqual(soup.aTag, None)
 
-    def test_has_key(self):
-        """has_key() checks for the presence of an attribute."""
+    def test_has_attr(self):
+        """has_attr() checks for the presence of an attribute.
+
+        Please note note: has_attr() is different from
+        __in__. has_attr() checks the tag's attributes and __in__
+        checks the tag's chidlren.
+        """
         soup = self.soup("<foo attr='bar'>")
-        self.assertTrue(soup.foo.has_key('attr'))
-        self.assertFalse(soup.foo.has_key('attr2'))
+        self.assertTrue(soup.foo.has_attr('attr'))
+        self.assertFalse(soup.foo.has_attr('attr2'))
+
 
     def test_attributes_come_out_in_alphabetical_order(self):
         markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
-- 
cgit v1.2.3