20 files changed, 45 insertions, 490 deletions
diff --git a/src/beautifulsoup/AUTHORS b/AUTHORS
index d353253..d353253 100644
--- a/src/beautifulsoup/AUTHORS
+++ b/AUTHORS
diff --git a/src/beautifulsoup/CHANGELOG b/CHANGELOG
index 4e97e1b..4e97e1b 100644
--- a/src/beautifulsoup/CHANGELOG
+++ b/CHANGELOG
diff --git a/src/beautifulsoup/README.txt b/README.txt
index ff83212..ff83212 100644
--- a/src/beautifulsoup/README.txt
+++ b/README.txt
diff --git a/TODO b/TODO
index 75f1ca6..208638d 100644
--- a/TODO
+++ b/TODO
@@ -6,3 +6,48 @@ Calculate tag.string dynamically rather than when creating the
 tree. The html5lib builder doesn't use popTag, and adding/removing
 things from the tree after the fact may also change the
 value/availability of tag.string.
+
+---
+
+Here are some unit tests that fail with HTMLParser.
+
+    def testValidButBogusDeclarationFAILS(self):
+        self.assertSoupEquals('<! Foo >a', '<!Foo >a')
+
+    def testIncompleteDeclarationAtEndFAILS(self):
+        self.assertSoupEquals('a<!b')
+
+    def testIncompleteEntityAtEndFAILS(self):
+        self.assertSoupEquals('&lt;Hello&gt')
+
+        # This is not what the original author had in mind, but it's
+        # a legitimate interpretation of what they wrote.
+        self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""",
+        '<a href="foo&lt;/a&gt;, &lt;/a&gt;&lt;a href="></a>, <a href="bar">baz</a>')
+        # SGMLParser generates bogus parse events when attribute values
+        # contain embedded brackets, but at least Beautiful Soup fixes
+        # it up a little.
+        self.assertSoupEquals('<a b="<a>">', '<a b="&lt;a&gt;"></a><a>"></a>')
+        self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah',
+                              """<a href='"http://foo.com/'></a><a> and blah and blah</a>""")
+
+        invalidEntity = "foo&#bar;baz"
+        soup = BeautifulStoneSoup\
+               (invalidEntity,
+                convertEntities=htmlEnt)
+        self.assertEquals(str(soup), invalidEntity)
+
+
+Tag names that contain Unicode characters crash the parser:
+    def testUnicodeTagNamesFAILS(self):
+	self.assertSoupEquals("<デダ芻デダtext>2PM</デダ芻デダtext>")
+
+Here's the implementation of NavigableString.__unicode__:
+
+    def __unicode__(self):
+        return unicode(str(self))
+
+It converts the Unicode to a string, and then back to Unicode. I can't
+find any other way of turning an element of a Unicode subclass into a
+normal Unicode object. This is pretty bad and a better technique is
+welcome.
diff --git a/src/beautifulsoup/__init__.py b/__init__.py
index 8817164..8817164 100644
--- a/src/beautifulsoup/__init__.py
+++ b/__init__.py
diff --git a/_bootstrap/COPYRIGHT.txt b/_bootstrap/COPYRIGHT.txt
deleted file mode 100644
index 0e07bd9..0000000
--- a/_bootstrap/COPYRIGHT.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Copyright (c) 2004-2009 Zope Corporation and Contributors.
-All Rights Reserved.
-
-This software is subject to the provisions of the Zope Public License,
-Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-FOR A PARTICULAR PURPOSE.
diff --git a/_bootstrap/LICENSE.txt b/_bootstrap/LICENSE.txt
deleted file mode 100644
index eeb9ddf..0000000
--- a/_bootstrap/LICENSE.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Zope Public License (ZPL) Version 2.1
--------------------------------------
-
-A copyright notice accompanies this license document that
-identifies the copyright holders.
-
-This license has been certified as open source. It has also
-been designated as GPL compatible by the Free Software
-Foundation (FSF).
-
-Redistribution and use in source and binary forms, with or
-without modification, are permitted provided that the
-following conditions are met:
-
-1. Redistributions in source code must retain the
-   accompanying copyright notice, this list of conditions,
-   and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the accompanying
-   copyright notice, this list of conditions, and the
-   following disclaimer in the documentation and/or other
-   materials provided with the distribution.
-
-3. Names of the copyright holders must not be used to
-   endorse or promote products derived from this software
-   without prior written permission from the copyright
-   holders.
-
-4. The right to distribute this software or to use it for
-   any purpose does not give you the right to use
-   Servicemarks (sm) or Trademarks (tm) of the copyright
-   holders. Use of them is covered by separate agreement
-   with the copyright holders.
-
-5. If any files are modified, you must cause the modified
-   files to carry prominent notices stating that you changed
-   the files and the date of any change.
-
-Disclaimer
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS''
-  AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
-  NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
-  NO EVENT SHALL THE COPYRIGHT HOLDERS BE
-  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-  DAMAGE.
-\ No newline at end of file
diff --git a/_bootstrap/bootstrap.py b/_bootstrap/bootstrap.py
deleted file mode 100644
index 7728587..0000000
--- a/_bootstrap/bootstrap.py
+++ /dev/null
@@ -1,77 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2006 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Bootstrap a buildout-based project
-
-Simply run this script in a directory containing a buildout.cfg.
-The script accepts buildout command-line options, so you can
-use the -c option to specify an alternate configuration file.
-
-$Id$
-"""
-
-import os, shutil, sys, tempfile, urllib2
-
-tmpeggs = tempfile.mkdtemp()
-
-is_jython = sys.platform.startswith('java')
-
-try:
-    import pkg_resources
-except ImportError:
-    ez = {}
-    exec urllib2.urlopen('http://peak.telecommunity.com/dist/ez_setup.py'
-                         ).read() in ez
-    ez['use_setuptools'](to_dir=tmpeggs, download_delay=0)
-
-    import pkg_resources
-
-if sys.platform == 'win32':
-    def quote(c):
-        if ' ' in c:
-            return '"%s"' % c # work around spawn lamosity on windows
-        else:
-            return c
-else:
-    def quote (c):
-        return c
-
-cmd = 'from setuptools.command.easy_install import main; main()'
-ws  = pkg_resources.working_set
-
-if is_jython:
-    import subprocess
-    
-    assert subprocess.Popen([sys.executable] + ['-c', quote(cmd), '-mqNxd', 
-           quote(tmpeggs), 'zc.buildout'], 
-           env=dict(os.environ,
-               PYTHONPATH=
-               ws.find(pkg_resources.Requirement.parse('setuptools')).location
-               ),
-           ).wait() == 0
-
-else:
-    assert os.spawnle(
-        os.P_WAIT, sys.executable, quote (sys.executable),
-        '-c', quote (cmd), '-mqNxd', quote (tmpeggs), 'zc.buildout',
-        dict(os.environ,
-            PYTHONPATH=
-            ws.find(pkg_resources.Requirement.parse('setuptools')).location
-            ),
-        ) == 0
-
-ws.add_entry(tmpeggs)
-ws.require('zc.buildout')
-import zc.buildout.buildout
-zc.buildout.buildout.main(sys.argv[1:] + ['bootstrap'])
-shutil.rmtree(tmpeggs)
diff --git a/bootstrap.py b/bootstrap.py
deleted file mode 120000
index 44defc0..0000000
--- a/bootstrap.py
+++ /dev/null
@@ -1 +0,0 @@
-_bootstrap/bootstrap.py
-\ No newline at end of file
diff --git a/buildout.cfg b/buildout.cfg
deleted file mode 100644
index 14850fe..0000000
--- a/buildout.cfg
+++ /dev/null
@@ -1,31 +0,0 @@
-[buildout]
-parts =
-    interpreter
-    test
-    docs
-    tags
-unzip = true
-
-develop = .
-
-[test]
-recipe = zc.recipe.testrunner
-eggs = beautifulsoup
-defaults = '--tests-pattern ^tests --exit-with-status --suite-name additional_tests'.split()
-
-[docs]
-recipe = z3c.recipe.sphinxdoc
-eggs = beautifulsoup [docs]
-index-doc = README
-default.css =
-layout.html =
-
-[interpreter]
-recipe = zc.recipe.egg
-interpreter = py
-eggs = beautifulsoup
-       docutils
-
-[tags]
-recipe = z3c.recipe.tag:tags
-eggs = beautifulsoup
diff --git a/src/beautifulsoup/dammit.py b/dammit.py
index 78bd4b2..78bd4b2 100644
--- a/src/beautifulsoup/dammit.py
+++ b/dammit.py
diff --git a/src/beautifulsoup/element.py b/element.py
index 7649b4c..7649b4c 100644
--- a/src/beautifulsoup/element.py
+++ b/element.py
diff --git a/lxml_test.py b/lxml_test.py
deleted file mode 100644
index 2e25c06..0000000
--- a/lxml_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
-from lxml_builder import LXMLTreeBuilder
-from lxml import etree
-builder = LXMLTreeBuilder(parser_class=etree.XMLParser)
-soup = BeautifulStoneSoup("<foo>bar</foo>", builder=builder)
-print soup.prettify()
-
-soup = BeautifulSoup("<foo>bar</foo>", builder=builder)
-print soup.prettify()
-
-builder = LXMLTreeBuilder(parser_class=etree.HTMLParser, self_closing_tags=['br'])
-soup = BeautifulSoup("<html><head><title>test<body><h1>page<!--Comment--><script>foo<b>bar</script><br />title</h1>", builder=builder)
-print soup.prettify()
diff --git a/setup.py b/setup.py
deleted file mode 100644
index bd8619c..0000000
--- a/setup.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-
-import ez_setup
-ez_setup.use_setuptools()
-
-import sys
-from setuptools import setup, find_packages
-
-sys.path.insert(0, 'src')
-from beautifulsoup import __version__
-
-setup(
-    name='beautifulsoup',
-    version=__version__,
-    packages=find_packages('src'),
-    package_dir={'':'src'},
-    include_package_data=True,
-    zip_safe=False,
-    maintainer='Leonard Richardson',
-    maintainer_email='leonardr@segfault.org',
-    long_description="""Beautiful Soup parses arbitrarily invalid XML/HTML and provides a variety of methods and Pythonic idioms for iterating and searching the parse tree.""",
-    license='New-style BSD',
-    install_requires=[
-        'setuptools',
-        'zope.interface',
-        ],
-    url='https://launchpad.net/beautifulsoup',
-    download_url= 'https://launchpad.net/beautifulsoup/+download',
-    classifiers=["Development Status :: 5 - Production/Stable",
-                 "Intended Audience :: Developers",
-                 "License :: OSI Approved :: Python Software Foundation License",
-                 "Programming Language :: Python",
-                 "Topic :: Text Processing :: Markup :: HTML",
-                 "Topic :: Text Processing :: Markup :: XML",
-                 "Topic :: Text Processing :: Markup :: SGML",
-                 "Topic :: Software Development :: Libraries :: Python Modules",
-                 ],
-    extras_require=dict(
-        docs=['Sphinx',
-              'z3c.recipe.sphinxdoc']
-    ),
-    setup_requires=['eggtestinfo', 'setuptools_bzr'],
-    test_suite='beautifulsoup.tests',
-    )
diff --git a/src/beautifulsoup/TODO b/src/beautifulsoup/TODO
deleted file mode 100644
index 84fa273..0000000
--- a/src/beautifulsoup/TODO
+++ /dev/null
@@ -1,42 +0,0 @@
-Here are some unit tests that fail with HTMLParser.
-
-    def testValidButBogusDeclarationFAILS(self):
-        self.assertSoupEquals('<! Foo >a', '<!Foo >a')
-
-    def testIncompleteDeclarationAtEndFAILS(self):
-        self.assertSoupEquals('a<!b')
-
-    def testIncompleteEntityAtEndFAILS(self):
-        self.assertSoupEquals('&lt;Hello&gt')
-
-        # This is not what the original author had in mind, but it's
-        # a legitimate interpretation of what they wrote.
-        self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""",
-        '<a href="foo&lt;/a&gt;, &lt;/a&gt;&lt;a href="></a>, <a href="bar">baz</a>')
-        # SGMLParser generates bogus parse events when attribute values
-        # contain embedded brackets, but at least Beautiful Soup fixes
-        # it up a little.
-        self.assertSoupEquals('<a b="<a>">', '<a b="&lt;a&gt;"></a><a>"></a>')
-        self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah',
-                              """<a href='"http://foo.com/'></a><a> and blah and blah</a>""")
-
-        invalidEntity = "foo&#bar;baz"
-        soup = BeautifulStoneSoup\
-               (invalidEntity,
-                convertEntities=htmlEnt)
-        self.assertEquals(str(soup), invalidEntity)
-
-
-Tag names that contain Unicode characters crash the parser:
-    def testUnicodeTagNamesFAILS(self):
-	self.assertSoupEquals("<�f�_䍃f�_text>2PM</�f�_䍃f�_text>")
-
-Here's the implementation of NavigableString.__unicode__:
-
-    def __unicode__(self):
-        return unicode(str(self))
-
-It converts the Unicode to a string, and then back to Unicode. I can't
-find any other way of turning an element of a Unicode subclass into a
-normal Unicode object. This is pretty bad and a better technique is
-welcome.
diff --git a/src/beautifulsoup/python3.diff b/src/beautifulsoup/python3.diff
deleted file mode 100644
index 142f2b1..0000000
--- a/src/beautifulsoup/python3.diff
+++ /dev/null
@@ -1,208 +0,0 @@
-=== modified file 'src/beautifulsoup/builder.py'
---- src/beautifulsoup/builder.py	2009-04-10 15:22:53 +0000
-+++ src/beautifulsoup/builder.py	2009-04-10 17:12:49 +0000
-@@ -6,7 +6,7 @@
- from element import name2codepoint
- from element import (
-     CData, Comment, Declaration, Entities, ProcessingInstruction)
--from HTMLParser import HTMLParser, HTMLParseError
-+from html.parser import HTMLParser, HTMLParseError
- 
- __all__ = ['TreeBuilder',
-            'HTMLParserXMLTreeBuilder',
-
-=== modified file 'src/beautifulsoup/element.py'
---- src/beautifulsoup/element.py	2009-04-10 15:22:53 +0000
-+++ src/beautifulsoup/element.py	2009-04-10 17:12:49 +0000
-@@ -1,7 +1,7 @@
- import re
- import types
- try:
--    from htmlentitydefs import name2codepoint
-+    from html.entities import name2codepoint
- except ImportError:
-     name2codepoint = {}
- 
-@@ -254,7 +254,7 @@
-         g = generator()
-         while True:
-             try:
--                i = g.next()
-+                i = g.__next__()
-             except StopIteration:
-                 break
-             if i:
-
-=== modified file 'src/beautifulsoup/tests/test_soup.py'
---- src/beautifulsoup/tests/test_soup.py	2009-04-10 15:45:04 +0000
-+++ src/beautifulsoup/tests/test_soup.py	2009-04-10 17:15:31 +0000
-@@ -635,9 +635,9 @@
-         self.assertSoupEquals('<b>hello&nbsp;there</b>')
- 
-     def testEntitiesInAttributeValues(self):
--        self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
-+        self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
-                               encoding='utf-8')
--        self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
-+        self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
-                               encoding='utf-8')
- 
-         builder = HTMLParserTreeBuilder(convertEntities=Entities.HTML_ENTITIES)
-@@ -681,11 +681,11 @@
-     smart quote fixes."""
- 
-     def testUnicodeDammitStandalone(self):
--        markup = "<foo>\x92</foo>"
-+        markup = b"<foo>\x92</foo>"
-         dammit = UnicodeDammit(markup)
-         self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
- 
--        hebrew = "\xed\xe5\xec\xf9"
-+        hebrew = b"\xed\xe5\xec\xf9"
-         dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
-         self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
-         self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
-@@ -697,7 +697,7 @@
- 
-         unicodeData = u"<foo>\u00FC</foo>"
-         utf8 = unicodeData.encode("utf-8")
--        self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
-+        self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
- 
-         unicodeSoup = BeautifulStoneSoup(unicodeData)
-         self.assertEquals(unicodeData, unicodeSoup.decode())
-@@ -724,8 +724,8 @@
-         self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
- 
-     def testRewrittenXMLHeader(self):
--        euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
--        utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
-+        euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
-+        utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
-         soup = BeautifulStoneSoup(euc_jp)
-         if soup.originalEncoding != "euc-jp":
-             raise Exception("Test failed when parsing euc-jp document. "
-@@ -736,12 +736,12 @@
-         self.assertEquals(soup.originalEncoding, "euc-jp")
-         self.assertEquals(soup.renderContents('utf-8'), utf8)
- 
--        old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
-+        old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
-         new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
-         self.assertSoupEquals(old_text, new_text)
- 
-     def testRewrittenMetaTag(self):
--        no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
-+        no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
-         soup = BeautifulSoup(no_shift_jis_html)
- 
-         # Beautiful Soup used to try to rewrite the meta tag even if the
-@@ -751,16 +751,16 @@
-         soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
-         self.assertEquals(soup.contents[0].name, 'pre')
- 
--        meta_tag = ('<meta content="text/html; charset=x-sjis" '
--                    'http-equiv="Content-type" />')
-+        meta_tag = (b'<meta content="text/html; charset=x-sjis" '
-+                    b'http-equiv="Content-type" />')
-         shift_jis_html = (
--            '<html><head>\n%s\n'
--            '<meta http-equiv="Content-language" content="ja" />'
--            '</head><body><pre>\n'
--            '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
--            '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
--            '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
--            '</pre></body></html>') % meta_tag
-+            b'<html><head>\n' + meta_tag + b'\n'
-+            b'<meta http-equiv="Content-language" content="ja" />'
-+            b'</head><body><pre>\n'
-+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
-+            b'</pre></body></html>')
-         soup = BeautifulSoup(shift_jis_html)
-         if soup.originalEncoding != "shift-jis":
-             raise Exception("Test failed when parsing shift-jis document "
-@@ -773,60 +773,60 @@
-         content_type_tag = soup.meta['content']
-         self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
-                           'charset=%SOUP-ENCODING%')
--        content_type = str(soup.meta)
-+        content_type = soup.meta.decode()
-         index = content_type.find('charset=')
-         self.assertEqual(content_type[index:index+len('charset=utf8')+1],
-                          'charset=utf-8')
-         content_type = soup.meta.encode('shift-jis')
--        index = content_type.find('charset=')
-+        index = content_type.find(b'charset=')
-         self.assertEqual(content_type[index:index+len('charset=shift-jis')],
-                          'charset=shift-jis'.encode())
- 
-         self.assertEquals(soup.encode('utf-8'), (
--                '<html><head>\n'
--                '<meta content="text/html; charset=utf-8" '
--                'http-equiv="Content-type" />\n'
--                '<meta http-equiv="Content-language" content="ja" />'
--                '</head><body><pre>\n'
--                '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
--                '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
--                '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
--                '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
--                '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
--                '</pre></body></html>'))
-+                b'<html><head>\n'
-+                b'<meta content="text/html; charset=utf-8" '
-+                b'http-equiv="Content-type" />\n'
-+                b'<meta http-equiv="Content-language" content="ja" />'
-+                b'</head><body><pre>\n'
-+                b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
-+                b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
-+                b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
-+                b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
-+                b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
-+                b'</pre></body></html>'))
-         self.assertEquals(soup.encode("shift-jis"),
-                           shift_jis_html.replace('x-sjis'.encode(),
-                                                  'shift-jis'.encode()))
- 
--        isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
-+        isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
-         soup = BeautifulSoup(isolatin)
- 
-         utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
--        utf8 = utf8.replace("\xe9", "\xc3\xa9")
-+        utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
-         self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
- 
-     def testHebrew(self):
--        iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
--        utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
-+        iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
-+        utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
-         soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
-         self.assertEquals(soup.encode('utf-8'), utf8)
- 
-     def testSmartQuotesNotSoSmartAnymore(self):
--        self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
-+        self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
-                               '&lsquo;Foo&rsquo; <!--blah-->')
- 
-     def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
--        smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
-+        smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
-         soup = BeautifulSoup(smartQuotes)
-         self.assertEquals(soup.decode(),
-                           'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
-         builder = HTMLParserTreeBuilder(convertEntities="html")
-         soup = BeautifulSoup(smartQuotes, builder)
-         self.assertEquals(soup.encode('utf-8'),
--                          'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
-+                          b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
- 
-     def testDontSeeSmartQuotesWhereThereAreNone(self):
--        utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
-+        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
-         self.assertSoupEquals(utf_8, encoding='utf-8')
- 
- 
-
diff --git a/testall.sh b/testall.sh
deleted file mode 100755
index 801124f..0000000
--- a/testall.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-python BeautifulSoupTests.py && sh to3.sh && cd python3 && python3 BeautifulSoupTests.py
diff --git a/src/beautifulsoup/testing.py b/testing.py
index 20d087e..20d087e 100644
--- a/src/beautifulsoup/testing.py
+++ b/testing.py
diff --git a/to3.sh b/to3.sh
deleted file mode 100755
index 26b3246..0000000
--- a/to3.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-mkdir python3
-for i in BeautifulSoupTests.py builder.py element.py dammit.py
-do
-    cp $i python3/
-    2to3-3.0 -x next $i | patch -p0 python3/$i
-    cp python3/$i python3/$i.orig
-    patch -p0 python3/$i < $i.3.diff
-done
-\ No newline at end of file
diff --git a/src/beautifulsoup/util.py b/util.py
index 693a7e2..693a7e2 100644
--- a/src/beautifulsoup/util.py
+++ b/util.py