summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/beautifulsoup/builder.py.3.diff4
-rw-r--r--src/beautifulsoup/dammit.py.3.diff70
-rw-r--r--src/beautifulsoup/element.py.3.diff8
-rw-r--r--src/beautifulsoup/python3.diff208
-rw-r--r--src/beautifulsoup/tests/test_soup.py.3.diff122
5 files changed, 208 insertions, 204 deletions
diff --git a/src/beautifulsoup/builder.py.3.diff b/src/beautifulsoup/builder.py.3.diff
deleted file mode 100644
index 91b510d..0000000
--- a/src/beautifulsoup/builder.py.3.diff
+++ /dev/null
@@ -1,4 +0,0 @@
-90c90
-< from HTMLParser import HTMLParser, HTMLParseError
----
-> from html.parser import HTMLParser, HTMLParseError
diff --git a/src/beautifulsoup/dammit.py.3.diff b/src/beautifulsoup/dammit.py.3.diff
deleted file mode 100644
index f6bab68..0000000
--- a/src/beautifulsoup/dammit.py.3.diff
+++ /dev/null
@@ -1,70 +0,0 @@
-1800c1800
-< smart_quotes_re = "([\x80-\x9f])"
----
-> smart_quotes_re = b"([\x80-\x9f])"
-1952,1983c1952,1983
-< MS_CHARS = { '\x80' : ('euro', '20AC'),
-< '\x81' : ' ',
-< '\x82' : ('sbquo', '201A'),
-< '\x83' : ('fnof', '192'),
-< '\x84' : ('bdquo', '201E'),
-< '\x85' : ('hellip', '2026'),
-< '\x86' : ('dagger', '2020'),
-< '\x87' : ('Dagger', '2021'),
-< '\x88' : ('circ', '2C6'),
-< '\x89' : ('permil', '2030'),
-< '\x8A' : ('Scaron', '160'),
-< '\x8B' : ('lsaquo', '2039'),
-< '\x8C' : ('OElig', '152'),
-< '\x8D' : '?',
-< '\x8E' : ('#x17D', '17D'),
-< '\x8F' : '?',
-< '\x90' : '?',
-< '\x91' : ('lsquo', '2018'),
-< '\x92' : ('rsquo', '2019'),
-< '\x93' : ('ldquo', '201C'),
-< '\x94' : ('rdquo', '201D'),
-< '\x95' : ('bull', '2022'),
-< '\x96' : ('ndash', '2013'),
-< '\x97' : ('mdash', '2014'),
-< '\x98' : ('tilde', '2DC'),
-< '\x99' : ('trade', '2122'),
-< '\x9a' : ('scaron', '161'),
-< '\x9b' : ('rsaquo', '203A'),
-< '\x9c' : ('oelig', '153'),
-< '\x9d' : '?',
-< '\x9e' : ('#x17E', '17E'),
-< '\x9f' : ('Yuml', ''),}
----
-> MS_CHARS = { b'\x80' : ('euro', '20AC'),
-> b'\x81' : ' ',
-> b'\x82' : ('sbquo', '201A'),
-> b'\x83' : ('fnof', '192'),
-> b'\x84' : ('bdquo', '201E'),
-> b'\x85' : ('hellip', '2026'),
-> b'\x86' : ('dagger', '2020'),
-> b'\x87' : ('Dagger', '2021'),
-> b'\x88' : ('circ', '2C6'),
-> b'\x89' : ('permil', '2030'),
-> b'\x8A' : ('Scaron', '160'),
-> b'\x8B' : ('lsaquo', '2039'),
-> b'\x8C' : ('OElig', '152'),
-> b'\x8D' : '?',
-> b'\x8E' : ('#x17D', '17D'),
-> b'\x8F' : '?',
-> b'\x90' : '?',
-> b'\x91' : ('lsquo', '2018'),
-> b'\x92' : ('rsquo', '2019'),
-> b'\x93' : ('ldquo', '201C'),
-> b'\x94' : ('rdquo', '201D'),
-> b'\x95' : ('bull', '2022'),
-> b'\x96' : ('ndash', '2013'),
-> b'\x97' : ('mdash', '2014'),
-> b'\x98' : ('tilde', '2DC'),
-> b'\x99' : ('trade', '2122'),
-> b'\x9a' : ('scaron', '161'),
-> b'\x9b' : ('rsaquo', '203A'),
-> b'\x9c' : ('oelig', '153'),
-> b'\x9d' : '?',
-> b'\x9e' : ('#x17E', '17E'),
-> b'\x9f' : ('Yuml', ''),}
diff --git a/src/beautifulsoup/element.py.3.diff b/src/beautifulsoup/element.py.3.diff
deleted file mode 100644
index 4549edd..0000000
--- a/src/beautifulsoup/element.py.3.diff
+++ /dev/null
@@ -1,8 +0,0 @@
-92c92
-< from htmlentitydefs import name2codepoint
----
-> from html.entities import name2codepoint
-337c337
-< i = g.next()
----
-> i = g.__next__()
diff --git a/src/beautifulsoup/python3.diff b/src/beautifulsoup/python3.diff
new file mode 100644
index 0000000..142f2b1
--- /dev/null
+++ b/src/beautifulsoup/python3.diff
@@ -0,0 +1,208 @@
+=== modified file 'src/beautifulsoup/builder.py'
+--- src/beautifulsoup/builder.py 2009-04-10 15:22:53 +0000
++++ src/beautifulsoup/builder.py 2009-04-10 17:12:49 +0000
+@@ -6,7 +6,7 @@
+ from element import name2codepoint
+ from element import (
+ CData, Comment, Declaration, Entities, ProcessingInstruction)
+-from HTMLParser import HTMLParser, HTMLParseError
++from html.parser import HTMLParser, HTMLParseError
+
+ __all__ = ['TreeBuilder',
+ 'HTMLParserXMLTreeBuilder',
+
+=== modified file 'src/beautifulsoup/element.py'
+--- src/beautifulsoup/element.py 2009-04-10 15:22:53 +0000
++++ src/beautifulsoup/element.py 2009-04-10 17:12:49 +0000
+@@ -1,7 +1,7 @@
+ import re
+ import types
+ try:
+- from htmlentitydefs import name2codepoint
++ from html.entities import name2codepoint
+ except ImportError:
+ name2codepoint = {}
+
+@@ -254,7 +254,7 @@
+ g = generator()
+ while True:
+ try:
+- i = g.next()
++ i = g.__next__()
+ except StopIteration:
+ break
+ if i:
+
+=== modified file 'src/beautifulsoup/tests/test_soup.py'
+--- src/beautifulsoup/tests/test_soup.py 2009-04-10 15:45:04 +0000
++++ src/beautifulsoup/tests/test_soup.py 2009-04-10 17:15:31 +0000
+@@ -635,9 +635,9 @@
+ self.assertSoupEquals('<b>hello&nbsp;there</b>')
+
+ def testEntitiesInAttributeValues(self):
+- self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
++ self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
+ encoding='utf-8')
+- self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
++ self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
+ encoding='utf-8')
+
+ builder = HTMLParserTreeBuilder(convertEntities=Entities.HTML_ENTITIES)
+@@ -681,11 +681,11 @@
+ smart quote fixes."""
+
+ def testUnicodeDammitStandalone(self):
+- markup = "<foo>\x92</foo>"
++ markup = b"<foo>\x92</foo>"
+ dammit = UnicodeDammit(markup)
+ self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
+
+- hebrew = "\xed\xe5\xec\xf9"
++ hebrew = b"\xed\xe5\xec\xf9"
+ dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
+ self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
+ self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
+@@ -697,7 +697,7 @@
+
+ unicodeData = u"<foo>\u00FC</foo>"
+ utf8 = unicodeData.encode("utf-8")
+- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
++ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
+
+ unicodeSoup = BeautifulStoneSoup(unicodeData)
+ self.assertEquals(unicodeData, unicodeSoup.decode())
+@@ -724,8 +724,8 @@
+ self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
+
+ def testRewrittenXMLHeader(self):
+- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
+- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
++ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
++ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
+ soup = BeautifulStoneSoup(euc_jp)
+ if soup.originalEncoding != "euc-jp":
+ raise Exception("Test failed when parsing euc-jp document. "
+@@ -736,12 +736,12 @@
+ self.assertEquals(soup.originalEncoding, "euc-jp")
+ self.assertEquals(soup.renderContents('utf-8'), utf8)
+
+- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
++ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
+ new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
+ self.assertSoupEquals(old_text, new_text)
+
+ def testRewrittenMetaTag(self):
+- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
++ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
+ soup = BeautifulSoup(no_shift_jis_html)
+
+ # Beautiful Soup used to try to rewrite the meta tag even if the
+@@ -751,16 +751,16 @@
+ soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
+ self.assertEquals(soup.contents[0].name, 'pre')
+
+- meta_tag = ('<meta content="text/html; charset=x-sjis" '
+- 'http-equiv="Content-type" />')
++ meta_tag = (b'<meta content="text/html; charset=x-sjis" '
++ b'http-equiv="Content-type" />')
+ shift_jis_html = (
+- '<html><head>\n%s\n'
+- '<meta http-equiv="Content-language" content="ja" />'
+- '</head><body><pre>\n'
+- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
+- '</pre></body></html>') % meta_tag
++ b'<html><head>\n' + meta_tag + b'\n'
++ b'<meta http-equiv="Content-language" content="ja" />'
++ b'</head><body><pre>\n'
++ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
++ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
++ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
++ b'</pre></body></html>')
+ soup = BeautifulSoup(shift_jis_html)
+ if soup.originalEncoding != "shift-jis":
+ raise Exception("Test failed when parsing shift-jis document "
+@@ -773,60 +773,60 @@
+ content_type_tag = soup.meta['content']
+ self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
+ 'charset=%SOUP-ENCODING%')
+- content_type = str(soup.meta)
++ content_type = soup.meta.decode()
+ index = content_type.find('charset=')
+ self.assertEqual(content_type[index:index+len('charset=utf8')+1],
+ 'charset=utf-8')
+ content_type = soup.meta.encode('shift-jis')
+- index = content_type.find('charset=')
++ index = content_type.find(b'charset=')
+ self.assertEqual(content_type[index:index+len('charset=shift-jis')],
+ 'charset=shift-jis'.encode())
+
+ self.assertEquals(soup.encode('utf-8'), (
+- '<html><head>\n'
+- '<meta content="text/html; charset=utf-8" '
+- 'http-equiv="Content-type" />\n'
+- '<meta http-equiv="Content-language" content="ja" />'
+- '</head><body><pre>\n'
+- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
+- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
+- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
+- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
+- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
+- '</pre></body></html>'))
++ b'<html><head>\n'
++ b'<meta content="text/html; charset=utf-8" '
++ b'http-equiv="Content-type" />\n'
++ b'<meta http-equiv="Content-language" content="ja" />'
++ b'</head><body><pre>\n'
++ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
++ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
++ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
++ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
++ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
++ b'</pre></body></html>'))
+ self.assertEquals(soup.encode("shift-jis"),
+ shift_jis_html.replace('x-sjis'.encode(),
+ 'shift-jis'.encode()))
+
+- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
++ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
+ soup = BeautifulSoup(isolatin)
+
+ utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
+- utf8 = utf8.replace("\xe9", "\xc3\xa9")
++ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
+ self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
+
+ def testHebrew(self):
+- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
+- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
++ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
++ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
+ soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
+ self.assertEquals(soup.encode('utf-8'), utf8)
+
+ def testSmartQuotesNotSoSmartAnymore(self):
+- self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
++ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
+ '&lsquo;Foo&rsquo; <!--blah-->')
+
+ def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
+- smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
++ smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
+ soup = BeautifulSoup(smartQuotes)
+ self.assertEquals(soup.decode(),
+ 'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
+ builder = HTMLParserTreeBuilder(convertEntities="html")
+ soup = BeautifulSoup(smartQuotes, builder)
+ self.assertEquals(soup.encode('utf-8'),
+- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
++ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
+
+ def testDontSeeSmartQuotesWhereThereAreNone(self):
+- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
++ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
+ self.assertSoupEquals(utf_8, encoding='utf-8')
+
+
+
diff --git a/src/beautifulsoup/tests/test_soup.py.3.diff b/src/beautifulsoup/tests/test_soup.py.3.diff
deleted file mode 100644
index fc9636c..0000000
--- a/src/beautifulsoup/tests/test_soup.py.3.diff
+++ /dev/null
@@ -1,122 +0,0 @@
-433c433
-< self.assertTrue('attr' in BeautifulSoup(text).foo)
----
-> self.assertTrue(BeautifulSoup(text).foo.has_key('attr'))
-622c622
-< self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
----
-> self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
-624c624
-< self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
----
-> self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
-671c671
-< markup = "<foo>\x92</foo>"
----
-> markup = b"<foo>\x92</foo>"
-675c675
-< hebrew = "\xed\xe5\xec\xf9"
----
-> hebrew = b"\xed\xe5\xec\xf9"
-687c687
-< self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
----
-> self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
-714,715c714,715
-< euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
-< utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
----
-> euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
-> utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
-726c726
-< old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
----
-> old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
-731c731
-< no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
----
-> no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
-741,742c741,742
-< meta_tag = ('<meta content="text/html; charset=x-sjis" '
-< 'http-equiv="Content-type" />')
----
-> meta_tag = (b'<meta content="text/html; charset=x-sjis" '
-> b'http-equiv="Content-type" />')
-744,750c744,750
-< '<html><head>\n%s\n'
-< '<meta http-equiv="Content-language" content="ja" />'
-< '</head><body><pre>\n'
-< '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-< '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-< '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
-< '</pre></body></html>') % meta_tag
----
-> b'<html><head>\n' + meta_tag + b'\n'
-> b'<meta http-equiv="Content-language" content="ja" />'
-> b'</head><body><pre>\n'
-> b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-> b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-> b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
-> b'</pre></body></html>')
-763c763
-< content_type = str(soup.meta)
----
-> content_type = soup.meta.decode()
-768c768
-< index = content_type.find('charset=')
----
-> index = content_type.find(b'charset=')
-773,783c773,783
-< '<html><head>\n'
-< '<meta content="text/html; charset=utf-8" '
-< 'http-equiv="Content-type" />\n'
-< '<meta http-equiv="Content-language" content="ja" />'
-< '</head><body><pre>\n'
-< '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
-< '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
-< '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
-< '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
-< '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
-< '</pre></body></html>'))
----
-> b'<html><head>\n'
-> b'<meta content="text/html; charset=utf-8" '
-> b'http-equiv="Content-type" />\n'
-> b'<meta http-equiv="Content-language" content="ja" />'
-> b'</head><body><pre>\n'
-> b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
-> b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
-> b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
-> b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
-> b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
-> b'</pre></body></html>'))
-788c788
-< isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
----
-> isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
-792c792
-< utf8 = utf8.replace("\xe9", "\xc3\xa9")
----
-> utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
-796,797c796,797
-< iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
-< utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
----
-> iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
-> utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
-802c802
-< self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
----
-> self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
-806c806
-< smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
----
-> smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
-812c812
-< 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
----
-> b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
-815c815
-< utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
----
-> utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"