summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS (renamed from src/beautifulsoup/AUTHORS)0
-rw-r--r--CHANGELOG (renamed from src/beautifulsoup/CHANGELOG)0
-rw-r--r--README.txt (renamed from src/beautifulsoup/README.txt)0
-rw-r--r--TODO45
-rw-r--r--__init__.py (renamed from src/beautifulsoup/__init__.py)0
-rw-r--r--_bootstrap/COPYRIGHT.txt9
-rw-r--r--_bootstrap/LICENSE.txt54
-rw-r--r--_bootstrap/bootstrap.py77
l---------bootstrap.py1
-rw-r--r--buildout.cfg31
-rw-r--r--dammit.py (renamed from src/beautifulsoup/dammit.py)0
-rw-r--r--element.py (renamed from src/beautifulsoup/element.py)0
-rw-r--r--lxml_test.py13
-rw-r--r--setup.py44
-rw-r--r--src/beautifulsoup/TODO42
-rw-r--r--src/beautifulsoup/python3.diff208
-rwxr-xr-xtestall.sh2
-rw-r--r--testing.py (renamed from src/beautifulsoup/testing.py)0
-rwxr-xr-xto3.sh9
-rw-r--r--util.py (renamed from src/beautifulsoup/util.py)0
20 files changed, 45 insertions, 490 deletions
diff --git a/src/beautifulsoup/AUTHORS b/AUTHORS
index d353253..d353253 100644
--- a/src/beautifulsoup/AUTHORS
+++ b/AUTHORS
diff --git a/src/beautifulsoup/CHANGELOG b/CHANGELOG
index 4e97e1b..4e97e1b 100644
--- a/src/beautifulsoup/CHANGELOG
+++ b/CHANGELOG
diff --git a/src/beautifulsoup/README.txt b/README.txt
index ff83212..ff83212 100644
--- a/src/beautifulsoup/README.txt
+++ b/README.txt
diff --git a/TODO b/TODO
index 75f1ca6..208638d 100644
--- a/TODO
+++ b/TODO
@@ -6,3 +6,48 @@ Calculate tag.string dynamically rather than when creating the
tree. The html5lib builder doesn't use popTag, and adding/removing
things from the tree after the fact may also change the
value/availability of tag.string.
+
+---
+
+Here are some unit tests that fail with HTMLParser.
+
+ def testValidButBogusDeclarationFAILS(self):
+ self.assertSoupEquals('<! Foo >a', '<!Foo >a')
+
+ def testIncompleteDeclarationAtEndFAILS(self):
+ self.assertSoupEquals('a<!b')
+
+ def testIncompleteEntityAtEndFAILS(self):
+ self.assertSoupEquals('&lt;Hello&gt')
+
+ # This is not what the original author had in mind, but it's
+ # a legitimate interpretation of what they wrote.
+ self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""",
+ '<a href="foo&lt;/a&gt;, &lt;/a&gt;&lt;a href="></a>, <a href="bar">baz</a>')
+ # SGMLParser generates bogus parse events when attribute values
+ # contain embedded brackets, but at least Beautiful Soup fixes
+ # it up a little.
+ self.assertSoupEquals('<a b="<a>">', '<a b="&lt;a&gt;"></a><a>"></a>')
+ self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah',
+ """<a href='"http://foo.com/'></a><a> and blah and blah</a>""")
+
+ invalidEntity = "foo&#bar;baz"
+ soup = BeautifulStoneSoup\
+ (invalidEntity,
+ convertEntities=htmlEnt)
+ self.assertEquals(str(soup), invalidEntity)
+
+
+Tag names that contain Unicode characters crash the parser:
+ def testUnicodeTagNamesFAILS(self):
+ self.assertSoupEquals("<デダ芻デダtext>2PM</デダ芻デダtext>")
+
+Here's the implementation of NavigableString.__unicode__:
+
+ def __unicode__(self):
+ return unicode(str(self))
+
+It converts the Unicode to a string, and then back to Unicode. I can't
+find any other way of turning an element of a Unicode subclass into a
+normal Unicode object. This is pretty bad and a better technique is
+welcome.
diff --git a/src/beautifulsoup/__init__.py b/__init__.py
index 8817164..8817164 100644
--- a/src/beautifulsoup/__init__.py
+++ b/__init__.py
diff --git a/_bootstrap/COPYRIGHT.txt b/_bootstrap/COPYRIGHT.txt
deleted file mode 100644
index 0e07bd9..0000000
--- a/_bootstrap/COPYRIGHT.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Copyright (c) 2004-2009 Zope Corporation and Contributors.
-All Rights Reserved.
-
-This software is subject to the provisions of the Zope Public License,
-Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-FOR A PARTICULAR PURPOSE.
diff --git a/_bootstrap/LICENSE.txt b/_bootstrap/LICENSE.txt
deleted file mode 100644
index eeb9ddf..0000000
--- a/_bootstrap/LICENSE.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Zope Public License (ZPL) Version 2.1
--------------------------------------
-
-A copyright notice accompanies this license document that
-identifies the copyright holders.
-
-This license has been certified as open source. It has also
-been designated as GPL compatible by the Free Software
-Foundation (FSF).
-
-Redistribution and use in source and binary forms, with or
-without modification, are permitted provided that the
-following conditions are met:
-
-1. Redistributions in source code must retain the
- accompanying copyright notice, this list of conditions,
- and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the accompanying
- copyright notice, this list of conditions, and the
- following disclaimer in the documentation and/or other
- materials provided with the distribution.
-
-3. Names of the copyright holders must not be used to
- endorse or promote products derived from this software
- without prior written permission from the copyright
- holders.
-
-4. The right to distribute this software or to use it for
- any purpose does not give you the right to use
- Servicemarks (sm) or Trademarks (tm) of the copyright
- holders. Use of them is covered by separate agreement
- with the copyright holders.
-
-5. If any files are modified, you must cause the modified
- files to carry prominent notices stating that you changed
- the files and the date of any change.
-
-Disclaimer
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS''
- AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
- NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
- NO EVENT SHALL THE COPYRIGHT HOLDERS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- DAMAGE. \ No newline at end of file
diff --git a/_bootstrap/bootstrap.py b/_bootstrap/bootstrap.py
deleted file mode 100644
index 7728587..0000000
--- a/_bootstrap/bootstrap.py
+++ /dev/null
@@ -1,77 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2006 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Bootstrap a buildout-based project
-
-Simply run this script in a directory containing a buildout.cfg.
-The script accepts buildout command-line options, so you can
-use the -c option to specify an alternate configuration file.
-
-$Id$
-"""
-
-import os, shutil, sys, tempfile, urllib2
-
-tmpeggs = tempfile.mkdtemp()
-
-is_jython = sys.platform.startswith('java')
-
-try:
- import pkg_resources
-except ImportError:
- ez = {}
- exec urllib2.urlopen('http://peak.telecommunity.com/dist/ez_setup.py'
- ).read() in ez
- ez['use_setuptools'](to_dir=tmpeggs, download_delay=0)
-
- import pkg_resources
-
-if sys.platform == 'win32':
- def quote(c):
- if ' ' in c:
- return '"%s"' % c # work around spawn lamosity on windows
- else:
- return c
-else:
- def quote (c):
- return c
-
-cmd = 'from setuptools.command.easy_install import main; main()'
-ws = pkg_resources.working_set
-
-if is_jython:
- import subprocess
-
- assert subprocess.Popen([sys.executable] + ['-c', quote(cmd), '-mqNxd',
- quote(tmpeggs), 'zc.buildout'],
- env=dict(os.environ,
- PYTHONPATH=
- ws.find(pkg_resources.Requirement.parse('setuptools')).location
- ),
- ).wait() == 0
-
-else:
- assert os.spawnle(
- os.P_WAIT, sys.executable, quote (sys.executable),
- '-c', quote (cmd), '-mqNxd', quote (tmpeggs), 'zc.buildout',
- dict(os.environ,
- PYTHONPATH=
- ws.find(pkg_resources.Requirement.parse('setuptools')).location
- ),
- ) == 0
-
-ws.add_entry(tmpeggs)
-ws.require('zc.buildout')
-import zc.buildout.buildout
-zc.buildout.buildout.main(sys.argv[1:] + ['bootstrap'])
-shutil.rmtree(tmpeggs)
diff --git a/bootstrap.py b/bootstrap.py
deleted file mode 120000
index 44defc0..0000000
--- a/bootstrap.py
+++ /dev/null
@@ -1 +0,0 @@
-_bootstrap/bootstrap.py \ No newline at end of file
diff --git a/buildout.cfg b/buildout.cfg
deleted file mode 100644
index 14850fe..0000000
--- a/buildout.cfg
+++ /dev/null
@@ -1,31 +0,0 @@
-[buildout]
-parts =
- interpreter
- test
- docs
- tags
-unzip = true
-
-develop = .
-
-[test]
-recipe = zc.recipe.testrunner
-eggs = beautifulsoup
-defaults = '--tests-pattern ^tests --exit-with-status --suite-name additional_tests'.split()
-
-[docs]
-recipe = z3c.recipe.sphinxdoc
-eggs = beautifulsoup [docs]
-index-doc = README
-default.css =
-layout.html =
-
-[interpreter]
-recipe = zc.recipe.egg
-interpreter = py
-eggs = beautifulsoup
- docutils
-
-[tags]
-recipe = z3c.recipe.tag:tags
-eggs = beautifulsoup
diff --git a/src/beautifulsoup/dammit.py b/dammit.py
index 78bd4b2..78bd4b2 100644
--- a/src/beautifulsoup/dammit.py
+++ b/dammit.py
diff --git a/src/beautifulsoup/element.py b/element.py
index 7649b4c..7649b4c 100644
--- a/src/beautifulsoup/element.py
+++ b/element.py
diff --git a/lxml_test.py b/lxml_test.py
deleted file mode 100644
index 2e25c06..0000000
--- a/lxml_test.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
-from lxml_builder import LXMLTreeBuilder
-from lxml import etree
-builder = LXMLTreeBuilder(parser_class=etree.XMLParser)
-soup = BeautifulStoneSoup("<foo>bar</foo>", builder=builder)
-print soup.prettify()
-
-soup = BeautifulSoup("<foo>bar</foo>", builder=builder)
-print soup.prettify()
-
-builder = LXMLTreeBuilder(parser_class=etree.HTMLParser, self_closing_tags=['br'])
-soup = BeautifulSoup("<html><head><title>test<body><h1>page<!--Comment--><script>foo<b>bar</script><br />title</h1>", builder=builder)
-print soup.prettify()
diff --git a/setup.py b/setup.py
deleted file mode 100644
index bd8619c..0000000
--- a/setup.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-
-import ez_setup
-ez_setup.use_setuptools()
-
-import sys
-from setuptools import setup, find_packages
-
-sys.path.insert(0, 'src')
-from beautifulsoup import __version__
-
-setup(
- name='beautifulsoup',
- version=__version__,
- packages=find_packages('src'),
- package_dir={'':'src'},
- include_package_data=True,
- zip_safe=False,
- maintainer='Leonard Richardson',
- maintainer_email='leonardr@segfault.org',
- long_description="""Beautiful Soup parses arbitrarily invalid XML/HTML and provides a variety of methods and Pythonic idioms for iterating and searching the parse tree.""",
- license='New-style BSD',
- install_requires=[
- 'setuptools',
- 'zope.interface',
- ],
- url='https://launchpad.net/beautifulsoup',
- download_url= 'https://launchpad.net/beautifulsoup/+download',
- classifiers=["Development Status :: 5 - Production/Stable",
- "Intended Audience :: Developers",
- "License :: OSI Approved :: Python Software Foundation License",
- "Programming Language :: Python",
- "Topic :: Text Processing :: Markup :: HTML",
- "Topic :: Text Processing :: Markup :: XML",
- "Topic :: Text Processing :: Markup :: SGML",
- "Topic :: Software Development :: Libraries :: Python Modules",
- ],
- extras_require=dict(
- docs=['Sphinx',
- 'z3c.recipe.sphinxdoc']
- ),
- setup_requires=['eggtestinfo', 'setuptools_bzr'],
- test_suite='beautifulsoup.tests',
- )
diff --git a/src/beautifulsoup/TODO b/src/beautifulsoup/TODO
deleted file mode 100644
index 84fa273..0000000
--- a/src/beautifulsoup/TODO
+++ /dev/null
@@ -1,42 +0,0 @@
-Here are some unit tests that fail with HTMLParser.
-
- def testValidButBogusDeclarationFAILS(self):
- self.assertSoupEquals('<! Foo >a', '<!Foo >a')
-
- def testIncompleteDeclarationAtEndFAILS(self):
- self.assertSoupEquals('a<!b')
-
- def testIncompleteEntityAtEndFAILS(self):
- self.assertSoupEquals('&lt;Hello&gt')
-
- # This is not what the original author had in mind, but it's
- # a legitimate interpretation of what they wrote.
- self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""",
- '<a href="foo&lt;/a&gt;, &lt;/a&gt;&lt;a href="></a>, <a href="bar">baz</a>')
- # SGMLParser generates bogus parse events when attribute values
- # contain embedded brackets, but at least Beautiful Soup fixes
- # it up a little.
- self.assertSoupEquals('<a b="<a>">', '<a b="&lt;a&gt;"></a><a>"></a>')
- self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah',
- """<a href='"http://foo.com/'></a><a> and blah and blah</a>""")
-
- invalidEntity = "foo&#bar;baz"
- soup = BeautifulStoneSoup\
- (invalidEntity,
- convertEntities=htmlEnt)
- self.assertEquals(str(soup), invalidEntity)
-
-
-Tag names that contain Unicode characters crash the parser:
- def testUnicodeTagNamesFAILS(self):
- self.assertSoupEquals("<f_䍃f_text>2PM</f_䍃f_text>")
-
-Here's the implementation of NavigableString.__unicode__:
-
- def __unicode__(self):
- return unicode(str(self))
-
-It converts the Unicode to a string, and then back to Unicode. I can't
-find any other way of turning an element of a Unicode subclass into a
-normal Unicode object. This is pretty bad and a better technique is
-welcome.
diff --git a/src/beautifulsoup/python3.diff b/src/beautifulsoup/python3.diff
deleted file mode 100644
index 142f2b1..0000000
--- a/src/beautifulsoup/python3.diff
+++ /dev/null
@@ -1,208 +0,0 @@
-=== modified file 'src/beautifulsoup/builder.py'
---- src/beautifulsoup/builder.py 2009-04-10 15:22:53 +0000
-+++ src/beautifulsoup/builder.py 2009-04-10 17:12:49 +0000
-@@ -6,7 +6,7 @@
- from element import name2codepoint
- from element import (
- CData, Comment, Declaration, Entities, ProcessingInstruction)
--from HTMLParser import HTMLParser, HTMLParseError
-+from html.parser import HTMLParser, HTMLParseError
-
- __all__ = ['TreeBuilder',
- 'HTMLParserXMLTreeBuilder',
-
-=== modified file 'src/beautifulsoup/element.py'
---- src/beautifulsoup/element.py 2009-04-10 15:22:53 +0000
-+++ src/beautifulsoup/element.py 2009-04-10 17:12:49 +0000
-@@ -1,7 +1,7 @@
- import re
- import types
- try:
-- from htmlentitydefs import name2codepoint
-+ from html.entities import name2codepoint
- except ImportError:
- name2codepoint = {}
-
-@@ -254,7 +254,7 @@
- g = generator()
- while True:
- try:
-- i = g.next()
-+ i = g.__next__()
- except StopIteration:
- break
- if i:
-
-=== modified file 'src/beautifulsoup/tests/test_soup.py'
---- src/beautifulsoup/tests/test_soup.py 2009-04-10 15:45:04 +0000
-+++ src/beautifulsoup/tests/test_soup.py 2009-04-10 17:15:31 +0000
-@@ -635,9 +635,9 @@
- self.assertSoupEquals('<b>hello&nbsp;there</b>')
-
- def testEntitiesInAttributeValues(self):
-- self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
-+ self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
- encoding='utf-8')
-- self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
-+ self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
- encoding='utf-8')
-
- builder = HTMLParserTreeBuilder(convertEntities=Entities.HTML_ENTITIES)
-@@ -681,11 +681,11 @@
- smart quote fixes."""
-
- def testUnicodeDammitStandalone(self):
-- markup = "<foo>\x92</foo>"
-+ markup = b"<foo>\x92</foo>"
- dammit = UnicodeDammit(markup)
- self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
-
-- hebrew = "\xed\xe5\xec\xf9"
-+ hebrew = b"\xed\xe5\xec\xf9"
- dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
- self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
-@@ -697,7 +697,7 @@
-
- unicodeData = u"<foo>\u00FC</foo>"
- utf8 = unicodeData.encode("utf-8")
-- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
-+ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
-
- unicodeSoup = BeautifulStoneSoup(unicodeData)
- self.assertEquals(unicodeData, unicodeSoup.decode())
-@@ -724,8 +724,8 @@
- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
-
- def testRewrittenXMLHeader(self):
-- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
-- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
-+ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
-+ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
- soup = BeautifulStoneSoup(euc_jp)
- if soup.originalEncoding != "euc-jp":
- raise Exception("Test failed when parsing euc-jp document. "
-@@ -736,12 +736,12 @@
- self.assertEquals(soup.originalEncoding, "euc-jp")
- self.assertEquals(soup.renderContents('utf-8'), utf8)
-
-- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
-+ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
- new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
- self.assertSoupEquals(old_text, new_text)
-
- def testRewrittenMetaTag(self):
-- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
-+ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
- soup = BeautifulSoup(no_shift_jis_html)
-
- # Beautiful Soup used to try to rewrite the meta tag even if the
-@@ -751,16 +751,16 @@
- soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
- self.assertEquals(soup.contents[0].name, 'pre')
-
-- meta_tag = ('<meta content="text/html; charset=x-sjis" '
-- 'http-equiv="Content-type" />')
-+ meta_tag = (b'<meta content="text/html; charset=x-sjis" '
-+ b'http-equiv="Content-type" />')
- shift_jis_html = (
-- '<html><head>\n%s\n'
-- '<meta http-equiv="Content-language" content="ja" />'
-- '</head><body><pre>\n'
-- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
-- '</pre></body></html>') % meta_tag
-+ b'<html><head>\n' + meta_tag + b'\n'
-+ b'<meta http-equiv="Content-language" content="ja" />'
-+ b'</head><body><pre>\n'
-+ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-+ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-+ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
-+ b'</pre></body></html>')
- soup = BeautifulSoup(shift_jis_html)
- if soup.originalEncoding != "shift-jis":
- raise Exception("Test failed when parsing shift-jis document "
-@@ -773,60 +773,60 @@
- content_type_tag = soup.meta['content']
- self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
- 'charset=%SOUP-ENCODING%')
-- content_type = str(soup.meta)
-+ content_type = soup.meta.decode()
- index = content_type.find('charset=')
- self.assertEqual(content_type[index:index+len('charset=utf8')+1],
- 'charset=utf-8')
- content_type = soup.meta.encode('shift-jis')
-- index = content_type.find('charset=')
-+ index = content_type.find(b'charset=')
- self.assertEqual(content_type[index:index+len('charset=shift-jis')],
- 'charset=shift-jis'.encode())
-
- self.assertEquals(soup.encode('utf-8'), (
-- '<html><head>\n'
-- '<meta content="text/html; charset=utf-8" '
-- 'http-equiv="Content-type" />\n'
-- '<meta http-equiv="Content-language" content="ja" />'
-- '</head><body><pre>\n'
-- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
-- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
-- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
-- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
-- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
-- '</pre></body></html>'))
-+ b'<html><head>\n'
-+ b'<meta content="text/html; charset=utf-8" '
-+ b'http-equiv="Content-type" />\n'
-+ b'<meta http-equiv="Content-language" content="ja" />'
-+ b'</head><body><pre>\n'
-+ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
-+ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
-+ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
-+ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
-+ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
-+ b'</pre></body></html>'))
- self.assertEquals(soup.encode("shift-jis"),
- shift_jis_html.replace('x-sjis'.encode(),
- 'shift-jis'.encode()))
-
-- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
-+ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
- soup = BeautifulSoup(isolatin)
-
- utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
-- utf8 = utf8.replace("\xe9", "\xc3\xa9")
-+ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
- self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
-
- def testHebrew(self):
-- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
-- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
-+ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
-+ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
- soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
- self.assertEquals(soup.encode('utf-8'), utf8)
-
- def testSmartQuotesNotSoSmartAnymore(self):
-- self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
-+ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
- '&lsquo;Foo&rsquo; <!--blah-->')
-
- def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
-- smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
-+ smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
- soup = BeautifulSoup(smartQuotes)
- self.assertEquals(soup.decode(),
- 'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
- builder = HTMLParserTreeBuilder(convertEntities="html")
- soup = BeautifulSoup(smartQuotes, builder)
- self.assertEquals(soup.encode('utf-8'),
-- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
-+ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
-
- def testDontSeeSmartQuotesWhereThereAreNone(self):
-- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
-+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
- self.assertSoupEquals(utf_8, encoding='utf-8')
-
-
-
diff --git a/testall.sh b/testall.sh
deleted file mode 100755
index 801124f..0000000
--- a/testall.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-python BeautifulSoupTests.py && sh to3.sh && cd python3 && python3 BeautifulSoupTests.py
diff --git a/src/beautifulsoup/testing.py b/testing.py
index 20d087e..20d087e 100644
--- a/src/beautifulsoup/testing.py
+++ b/testing.py
diff --git a/to3.sh b/to3.sh
deleted file mode 100755
index 26b3246..0000000
--- a/to3.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-mkdir python3
-for i in BeautifulSoupTests.py builder.py element.py dammit.py
-do
- cp $i python3/
- 2to3-3.0 -x next $i | patch -p0 python3/$i
- cp python3/$i python3/$i.orig
- patch -p0 python3/$i < $i.3.diff
-done \ No newline at end of file
diff --git a/src/beautifulsoup/util.py b/util.py
index 693a7e2..693a7e2 100644
--- a/src/beautifulsoup/util.py
+++ b/util.py