diff options
Diffstat (limited to 'src/beautifulsoup/python3.diff')
-rw-r--r-- | src/beautifulsoup/python3.diff | 208 |
1 files changed, 0 insertions, 208 deletions
diff --git a/src/beautifulsoup/python3.diff b/src/beautifulsoup/python3.diff deleted file mode 100644 index 142f2b1..0000000 --- a/src/beautifulsoup/python3.diff +++ /dev/null @@ -1,208 +0,0 @@ -=== modified file 'src/beautifulsoup/builder.py' ---- src/beautifulsoup/builder.py 2009-04-10 15:22:53 +0000 -+++ src/beautifulsoup/builder.py 2009-04-10 17:12:49 +0000 -@@ -6,7 +6,7 @@ - from element import name2codepoint - from element import ( - CData, Comment, Declaration, Entities, ProcessingInstruction) --from HTMLParser import HTMLParser, HTMLParseError -+from html.parser import HTMLParser, HTMLParseError - - __all__ = ['TreeBuilder', - 'HTMLParserXMLTreeBuilder', - -=== modified file 'src/beautifulsoup/element.py' ---- src/beautifulsoup/element.py 2009-04-10 15:22:53 +0000 -+++ src/beautifulsoup/element.py 2009-04-10 17:12:49 +0000 -@@ -1,7 +1,7 @@ - import re - import types - try: -- from htmlentitydefs import name2codepoint -+ from html.entities import name2codepoint - except ImportError: - name2codepoint = {} - -@@ -254,7 +254,7 @@ - g = generator() - while True: - try: -- i = g.next() -+ i = g.__next__() - except StopIteration: - break - if i: - -=== modified file 'src/beautifulsoup/tests/test_soup.py' ---- src/beautifulsoup/tests/test_soup.py 2009-04-10 15:45:04 +0000 -+++ src/beautifulsoup/tests/test_soup.py 2009-04-10 17:15:31 +0000 -@@ -635,9 +635,9 @@ - self.assertSoupEquals('<b>hello there</b>') - - def testEntitiesInAttributeValues(self): -- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', -+ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', - encoding='utf-8') -- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', -+ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', - encoding='utf-8') - - builder = HTMLParserTreeBuilder(convertEntities=Entities.HTML_ENTITIES) -@@ -681,11 +681,11 @@ - smart quote fixes.""" - - def testUnicodeDammitStandalone(self): -- markup = "<foo>\x92</foo>" -+ markup = b"<foo>\x92</foo>" - dammit = UnicodeDammit(markup) - self.assertEquals(dammit.unicode, "<foo>’</foo>") - -- hebrew = "\xed\xe5\xec\xf9" -+ hebrew = b"\xed\xe5\xec\xf9" - dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) - self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9') - self.assertEquals(dammit.originalEncoding, 'iso-8859-8') -@@ -697,7 +697,7 @@ - - unicodeData = u"<foo>\u00FC</foo>" - utf8 = unicodeData.encode("utf-8") -- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>') -+ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>') - - unicodeSoup = BeautifulStoneSoup(unicodeData) - self.assertEquals(unicodeData, unicodeSoup.decode()) -@@ -724,8 +724,8 @@ - self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås') - - def testRewrittenXMLHeader(self): -- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' -- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" -+ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' -+ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" - soup = BeautifulStoneSoup(euc_jp) - if soup.originalEncoding != "euc-jp": - raise Exception("Test failed when parsing euc-jp document. " -@@ -736,12 +736,12 @@ - self.assertEquals(soup.originalEncoding, "euc-jp") - self.assertEquals(soup.renderContents('utf-8'), utf8) - -- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>" -+ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>" - new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>" - self.assertSoupEquals(old_text, new_text) - - def testRewrittenMetaTag(self): -- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' -+ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' - soup = BeautifulSoup(no_shift_jis_html) - - # Beautiful Soup used to try to rewrite the meta tag even if the -@@ -751,16 +751,16 @@ - soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer) - self.assertEquals(soup.contents[0].name, 'pre') - -- meta_tag = ('<meta content="text/html; charset=x-sjis" ' -- 'http-equiv="Content-type" />') -+ meta_tag = (b'<meta content="text/html; charset=x-sjis" ' -+ b'http-equiv="Content-type" />') - shift_jis_html = ( -- '<html><head>\n%s\n' -- '<meta http-equiv="Content-language" content="ja" />' -- '</head><body><pre>\n' -- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' -- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' -- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' -- '</pre></body></html>') % meta_tag -+ b'<html><head>\n' + meta_tag + b'\n' -+ b'<meta http-equiv="Content-language" content="ja" />' -+ b'</head><body><pre>\n' -+ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' -+ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' -+ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' -+ b'</pre></body></html>') - soup = BeautifulSoup(shift_jis_html) - if soup.originalEncoding != "shift-jis": - raise Exception("Test failed when parsing shift-jis document " -@@ -773,60 +773,60 @@ - content_type_tag = soup.meta['content'] - self.assertEquals(content_type_tag[content_type_tag.find('charset='):], - 'charset=%SOUP-ENCODING%') -- content_type = str(soup.meta) -+ content_type = soup.meta.decode() - index = content_type.find('charset=') - self.assertEqual(content_type[index:index+len('charset=utf8')+1], - 'charset=utf-8') - content_type = soup.meta.encode('shift-jis') -- index = content_type.find('charset=') -+ index = content_type.find(b'charset=') - self.assertEqual(content_type[index:index+len('charset=shift-jis')], - 'charset=shift-jis'.encode()) - - self.assertEquals(soup.encode('utf-8'), ( -- '<html><head>\n' -- '<meta content="text/html; charset=utf-8" ' -- 'http-equiv="Content-type" />\n' -- '<meta http-equiv="Content-language" content="ja" />' -- '</head><body><pre>\n' -- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' -- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' -- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' -- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' -- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' -- '</pre></body></html>')) -+ b'<html><head>\n' -+ b'<meta content="text/html; charset=utf-8" ' -+ b'http-equiv="Content-type" />\n' -+ b'<meta http-equiv="Content-language" content="ja" />' -+ b'</head><body><pre>\n' -+ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' -+ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' -+ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' -+ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' -+ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' -+ b'</pre></body></html>')) - self.assertEquals(soup.encode("shift-jis"), - shift_jis_html.replace('x-sjis'.encode(), - 'shift-jis'.encode())) - -- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" -+ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" - soup = BeautifulSoup(isolatin) - - utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) -- utf8 = utf8.replace("\xe9", "\xc3\xa9") -+ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9") - self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8') - - def testHebrew(self): -- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' -- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' -+ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' -+ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' - soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8") - self.assertEquals(soup.encode('utf-8'), utf8) - - def testSmartQuotesNotSoSmartAnymore(self): -- self.assertSoupEquals("\x91Foo\x92 <!--blah-->", -+ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->", - '‘Foo’ <!--blah-->') - - def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self): -- smartQuotes = "Il a dit, \x8BSacré bleu!\x9b" -+ smartQuotes = b"Il a dit, \x8BSacré bleu!\x9b" - soup = BeautifulSoup(smartQuotes) - self.assertEquals(soup.decode(), - 'Il a dit, ‹Sacré bleu!›') - builder = HTMLParserTreeBuilder(convertEntities="html") - soup = BeautifulSoup(smartQuotes, builder) - self.assertEquals(soup.encode('utf-8'), -- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') -+ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') - - def testDontSeeSmartQuotesWhereThereAreNone(self): -- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch" -+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" - self.assertSoupEquals(utf_8, encoding='utf-8') - - - |