diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_dammit.py | 58 | ||||
-rw-r--r-- | bs4/tests/test_formatter.py | 20 | ||||
-rw-r--r-- | bs4/tests/test_html5lib.py | 44 | ||||
-rw-r--r-- | bs4/tests/test_htmlparser.py | 48 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 4 | ||||
-rw-r--r-- | bs4/tests/test_navigablestring.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 50 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 130 |
8 files changed, 178 insertions, 178 deletions
diff --git a/bs4/tests/test_dammit.py b/bs4/tests/test_dammit.py index b477df8..5177503 100644 --- a/bs4/tests/test_dammit.py +++ b/bs4/tests/test_dammit.py @@ -12,7 +12,7 @@ class TestUnicodeDammit(unittest.TestCase): """Standalone tests of UnicodeDammit.""" def test_unicode_input(self): - markup = u"I'm already Unicode! \N{SNOWMAN}" + markup = "I'm already Unicode! \N{SNOWMAN}" dammit = UnicodeDammit(markup) self.assertEqual(dammit.unicode_markup, markup) @@ -20,7 +20,7 @@ class TestUnicodeDammit(unittest.TestCase): markup = b"<foo>\x91\x92\x93\x94</foo>" dammit = UnicodeDammit(markup) self.assertEqual( - dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>") + dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>") def test_smart_quotes_to_xml_entities(self): markup = b"<foo>\x91\x92\x93\x94</foo>" @@ -44,14 +44,14 @@ class TestUnicodeDammit(unittest.TestCase): utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83" dammit = UnicodeDammit(utf8) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') - self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}') + self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}') def test_convert_hebrew(self): hebrew = b"\xed\xe5\xec\xf9" dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8') - self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9') + self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9') def test_dont_see_smart_quotes_where_there_are_none(self): utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" @@ -60,19 +60,19 @@ class TestUnicodeDammit(unittest.TestCase): self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) def test_ignore_inappropriate_codecs(self): - utf8_data = u"Räksmörgås".encode("utf-8") + utf8_data = "Räksmörgås".encode("utf-8") dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') def test_ignore_invalid_codecs(self): - utf8_data = u"Räksmörgås".encode("utf-8") + utf8_data = "Räksmörgås".encode("utf-8") for bad_encoding in ['.utf8', '...', 'utF---16.!']: dammit = UnicodeDammit(utf8_data, [bad_encoding]) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') def test_exclude_encodings(self): # This is UTF-8. - utf8_data = u"Räksmörgås".encode("utf-8") + utf8_data = "Räksmörgås".encode("utf-8") # But if we exclude UTF-8 from consideration, the guess is # Windows-1252. @@ -90,7 +90,7 @@ class TestEncodingDetector(unittest.TestCase): detected = EncodingDetector( b'<?xml version="1.0" encoding="UTF-\xdb" ?>') encodings = list(detected.encodings) - assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings + assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings def test_detect_html5_style_meta_tag(self): @@ -130,7 +130,7 @@ class TestEncodingDetector(unittest.TestCase): bs4.dammit.chardet_dammit = noop dammit = UnicodeDammit(doc) self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue(u"\ufffd" in dammit.unicode_markup) + self.assertTrue("\ufffd" in dammit.unicode_markup) soup = BeautifulSoup(doc, "html.parser") self.assertTrue(soup.contains_replacement_characters) @@ -142,7 +142,7 @@ class TestEncodingDetector(unittest.TestCase): # A document written in UTF-16LE will have its byte order marker stripped. data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' dammit = UnicodeDammit(data) - self.assertEqual(u"<a>áé</a>", dammit.unicode_markup) + self.assertEqual("<a>áé</a>", dammit.unicode_markup) self.assertEqual("utf-16le", dammit.original_encoding) def test_known_definite_versus_user_encodings(self): @@ -201,12 +201,12 @@ class TestEncodingDetector(unittest.TestCase): def test_detwingle(self): # Here's a UTF8 document. - utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8") + utf8 = ("\N{SNOWMAN}" * 3).encode("utf8") # Here's a Windows-1252 document. windows_1252 = ( - u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" - u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") + "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" + "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") # Through some unholy alchemy, they've been stuck together. doc = utf8 + windows_1252 + utf8 @@ -221,7 +221,7 @@ class TestEncodingDetector(unittest.TestCase): fixed = UnicodeDammit.detwingle(doc) self.assertEqual( - u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) + "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) def test_detwingle_ignores_multibyte_characters(self): # Each of these characters has a UTF-8 representation ending @@ -229,9 +229,9 @@ class TestEncodingDetector(unittest.TestCase): # Windows-1252. But our code knows to skip over multibyte # UTF-8 characters, so they'll survive the process unscathed. for tricky_unicode_char in ( - u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' - u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' - u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one. + "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' + "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' + "\xf0\x90\x90\x93", # This is a CJK character, not sure which one. ): input = tricky_unicode_char.encode("utf8") self.assertTrue(input.endswith(b'\x93')) @@ -246,29 +246,29 @@ class TestEncodingDetector(unittest.TestCase): # interesting to know what encoding was claimed # originally. - html_unicode = u'<html><head><meta charset="utf-8"></head></html>' + html_unicode = '<html><head><meta charset="utf-8"></head></html>' html_bytes = html_unicode.encode("ascii") - xml_unicode= u'<?xml version="1.0" encoding="ISO-8859-1" ?>' + xml_unicode= '<?xml version="1.0" encoding="ISO-8859-1" ?>' xml_bytes = xml_unicode.encode("ascii") m = EncodingDetector.find_declared_encoding - self.assertEquals(None, m(html_unicode, is_html=False)) - self.assertEquals("utf-8", m(html_unicode, is_html=True)) - self.assertEquals("utf-8", m(html_bytes, is_html=True)) + self.assertEqual(None, m(html_unicode, is_html=False)) + self.assertEqual("utf-8", m(html_unicode, is_html=True)) + self.assertEqual("utf-8", m(html_bytes, is_html=True)) - self.assertEquals("iso-8859-1", m(xml_unicode)) - self.assertEquals("iso-8859-1", m(xml_bytes)) + self.assertEqual("iso-8859-1", m(xml_unicode)) + self.assertEqual("iso-8859-1", m(xml_bytes)) # Normally, only the first few kilobytes of a document are checked for # an encoding. spacer = b' ' * 5000 - self.assertEquals(None, m(spacer + html_bytes)) - self.assertEquals(None, m(spacer + xml_bytes)) + self.assertEqual(None, m(spacer + html_bytes)) + self.assertEqual(None, m(spacer + xml_bytes)) # But you can tell find_declared_encoding to search an entire # HTML document. - self.assertEquals( + self.assertEqual( "utf-8", m(spacer + html_bytes, is_html=True, search_entire_document=True) ) @@ -276,11 +276,11 @@ class TestEncodingDetector(unittest.TestCase): # The XML encoding declaration has to be the very first thing # in the document. We'll allow whitespace before the document # starts, but nothing else. - self.assertEquals( + self.assertEqual( "iso-8859-1", m(xml_bytes, search_entire_document=True) ) - self.assertEquals( + self.assertEqual( None, m(b'a' + xml_bytes, search_entire_document=True) ) diff --git a/bs4/tests/test_formatter.py b/bs4/tests/test_formatter.py index 718989b..c188e18 100644 --- a/bs4/tests/test_formatter.py +++ b/bs4/tests/test_formatter.py @@ -18,12 +18,12 @@ class TestFormatter(SoupTest): # Attributes come out sorted by name. In Python 3, attributes # normally come out of a dictionary in the order they were # added. - self.assertEquals([('a', 2), ('b', 1)], formatter.attributes(tag)) + self.assertEqual([('a', 2), ('b', 1)], formatter.attributes(tag)) # This works even if Tag.attrs is None, though this shouldn't # normally happen. tag.attrs = None - self.assertEquals([], formatter.attributes(tag)) + self.assertEqual([], formatter.attributes(tag)) def test_sort_attributes(self): # Test the ability to override Formatter.attributes() to, @@ -42,8 +42,8 @@ class TestFormatter(SoupTest): # attributes() was called on the <p> tag. It filtered out one # attribute and sorted the other two. - self.assertEquals(formatter.called_with, soup.p) - self.assertEquals(u'<p aval="2" cval="1"></p>', decoded) + self.assertEqual(formatter.called_with, soup.p) + self.assertEqual('<p aval="2" cval="1"></p>', decoded) def test_empty_attributes_are_booleans(self): # Test the behavior of empty_attributes_are_booleans as well @@ -51,17 +51,17 @@ class TestFormatter(SoupTest): for name in ('html', 'minimal', None): formatter = HTMLFormatter.REGISTRY[name] - self.assertEquals(False, formatter.empty_attributes_are_booleans) + self.assertEqual(False, formatter.empty_attributes_are_booleans) formatter = XMLFormatter.REGISTRY[None] - self.assertEquals(False, formatter.empty_attributes_are_booleans) + self.assertEqual(False, formatter.empty_attributes_are_booleans) formatter = HTMLFormatter.REGISTRY['html5'] - self.assertEquals(True, formatter.empty_attributes_are_booleans) + self.assertEqual(True, formatter.empty_attributes_are_booleans) # Verify that the constructor sets the value. formatter = Formatter(empty_attributes_are_booleans=True) - self.assertEquals(True, formatter.empty_attributes_are_booleans) + self.assertEqual(True, formatter.empty_attributes_are_booleans) # Now demonstrate what it does to markup. for markup in ( @@ -70,11 +70,11 @@ class TestFormatter(SoupTest): ): soup = self.soup(markup) for formatter in ('html', 'minimal', 'xml', None): - self.assertEquals( + self.assertEqual( b'<option selected=""></option>', soup.option.encode(formatter='html') ) - self.assertEquals( + self.assertEqual( b'<option selected></option>', soup.option.encode(formatter='html5') ) diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 2adebc8..f8902ad 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -5,7 +5,7 @@ import warnings try: from bs4.builder import HTML5TreeBuilder HTML5LIB_PRESENT = True -except ImportError, e: +except ImportError as e: HTML5LIB_PRESENT = False from bs4.element import SoupStrainer from bs4.testing import ( @@ -74,14 +74,14 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): def test_reparented_markup(self): markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>' soup = self.soup(markup) - self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) + self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) self.assertEqual(2, len(soup.find_all('p'))) def test_reparented_markup_ends_with_whitespace(self): markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n' soup = self.soup(markup) - self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) + self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) self.assertEqual(2, len(soup.find_all('p'))) def test_reparented_markup_containing_identical_whitespace_nodes(self): @@ -127,7 +127,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): def test_foster_parenting(self): markup = b"""<table><td></tbody>A""" soup = self.soup(markup) - self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode()) + self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode()) def test_extraction(self): """ @@ -199,28 +199,28 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): # HTMLParserTreeBuilderSmokeTest. It's not in the superclass # because the lxml HTML TreeBuilder _doesn't_ work this way. for input_element, output_unicode, output_element in ( - ("⇄", u'\u21c4', b'⇄'), - ('⊧', u'\u22a7', b'⊧'), - ('𝔑', u'\U0001d511', b'𝔑'), - ('≧̸', u'\u2267\u0338', b'≧̸'), - ('¬', u'\xac', b'¬'), - ('⫬', u'\u2aec', b'⫬'), - ('"', u'"', b'"'), - ('∴', u'\u2234', b'∴'), - ('∴', u'\u2234', b'∴'), - ('∴', u'\u2234', b'∴'), - ("fj", u'fj', b'fj'), - ("⊔", u'\u2294', b'⊔'), - ("⊔︀", u'\u2294\ufe00', b'⊔︀'), - ("'", u"'", b"'"), - ("|", u"|", b"|"), + ("⇄", '\u21c4', b'⇄'), + ('⊧', '\u22a7', b'⊧'), + ('𝔑', '\U0001d511', b'𝔑'), + ('≧̸', '\u2267\u0338', b'≧̸'), + ('¬', '\xac', b'¬'), + ('⫬', '\u2aec', b'⫬'), + ('"', '"', b'"'), + ('∴', '\u2234', b'∴'), + ('∴', '\u2234', b'∴'), + ('∴', '\u2234', b'∴'), + ("fj", 'fj', b'fj'), + ("⊔", '\u2294', b'⊔'), + ("⊔︀", '\u2294\ufe00', b'⊔︀'), + ("'", "'", b"'"), + ("|", "|", b"|"), ): - markup = u'<div>%s</div>' % input_element + markup = '<div>%s</div>' % input_element div = self.soup(markup).div without_element = div.encode() expect = b"<div>%s</div>" % output_unicode.encode("utf8") - self.assertEquals(without_element, expect) + self.assertEqual(without_element, expect) with_element = div.encode(formatter="html") expect = b"<div>%s</div>" % output_element - self.assertEquals(with_element, expect) + self.assertEqual(with_element, expect) diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index e84eced..0d8161e 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -61,20 +61,20 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): # If you don't provide any particular value for # on_duplicate_attribute, later values replace earlier values. soup = self.soup(markup) - self.assertEquals("url3", soup.a['href']) - self.assertEquals(["cls"], soup.a['class']) - self.assertEquals("id", soup.a['id']) + self.assertEqual("url3", soup.a['href']) + self.assertEqual(["cls"], soup.a['class']) + self.assertEqual("id", soup.a['id']) # You can also get this behavior explicitly. def assert_attribute(on_duplicate_attribute, expected): soup = self.soup( markup, on_duplicate_attribute=on_duplicate_attribute ) - self.assertEquals(expected, soup.a['href']) + self.assertEqual(expected, soup.a['href']) # Verify that non-duplicate attributes are treated normally. - self.assertEquals(["cls"], soup.a['class']) - self.assertEquals("id", soup.a['id']) + self.assertEqual(["cls"], soup.a['class']) + self.assertEqual("id", soup.a['id']) assert_attribute(None, "url3") assert_attribute(BeautifulSoupHTMLParser.REPLACE, "url3") @@ -94,31 +94,31 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): # convert those Unicode characters to a (potentially # different) named entity on the way out. for input_element, output_unicode, output_element in ( - ("⇄", u'\u21c4', b'⇄'), - ('⊧', u'\u22a7', b'⊧'), - ('𝔑', u'\U0001d511', b'𝔑'), - ('≧̸', u'\u2267\u0338', b'≧̸'), - ('¬', u'\xac', b'¬'), - ('⫬', u'\u2aec', b'⫬'), - ('"', u'"', b'"'), - ('∴', u'\u2234', b'∴'), - ('∴', u'\u2234', b'∴'), - ('∴', u'\u2234', b'∴'), - ("fj", u'fj', b'fj'), - ("⊔", u'\u2294', b'⊔'), - ("⊔︀", u'\u2294\ufe00', b'⊔︀'), - ("'", u"'", b"'"), - ("|", u"|", b"|"), + ("⇄", '\u21c4', b'⇄'), + ('⊧', '\u22a7', b'⊧'), + ('𝔑', '\U0001d511', b'𝔑'), + ('≧̸', '\u2267\u0338', b'≧̸'), + ('¬', '\xac', b'¬'), + ('⫬', '\u2aec', b'⫬'), + ('"', '"', b'"'), + ('∴', '\u2234', b'∴'), + ('∴', '\u2234', b'∴'), + ('∴', '\u2234', b'∴'), + ("fj", 'fj', b'fj'), + ("⊔", '\u2294', b'⊔'), + ("⊔︀", '\u2294\ufe00', b'⊔︀'), + ("'", "'", b"'"), + ("|", "|", b"|"), ): - markup = u'<div>%s</div>' % input_element + markup = '<div>%s</div>' % input_element div = self.soup(markup).div without_element = div.encode() expect = b"<div>%s</div>" % output_unicode.encode("utf8") - self.assertEquals(without_element, expect) + self.assertEqual(without_element, expect) with_element = div.encode(formatter="html") expect = b"<div>%s</div>" % output_element - self.assertEquals(with_element, expect) + self.assertEqual(with_element, expect) class TestHTMLParserSubclass(SoupTest): diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index d8dada4..71931ff 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -7,7 +7,7 @@ try: import lxml.etree LXML_PRESENT = True LXML_VERSION = lxml.etree.LXML_VERSION -except ImportError, e: +except ImportError as e: LXML_PRESENT = False LXML_VERSION = (0,) @@ -68,7 +68,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): # if one is installed. with warnings.catch_warnings(record=True) as w: soup = BeautifulStoneSoup("<b />") - self.assertEqual(u"<b/>", unicode(soup.b)) + self.assertEqual("<b/>", str(soup.b)) self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) def test_tracking_line_numbers(self): diff --git a/bs4/tests/test_navigablestring.py b/bs4/tests/test_navigablestring.py index 8b903ea..89e92b7 100644 --- a/bs4/tests/test_navigablestring.py +++ b/bs4/tests/test_navigablestring.py @@ -15,7 +15,7 @@ class TestNavigableString(SoupTest): def test_text_acquisition_methods(self): # These methods are intended for use against Tag, but they # work on NavigableString as well, - eq_ = self.assertEquals + eq_ = self.assertEqual s = NavigableString("fee ") cdata = CData("fie ") diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 9074bdb..4d00845 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -51,17 +51,17 @@ PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) class TestConstructor(SoupTest): def test_short_unicode_input(self): - data = u"<h1>éé</h1>" + data = "<h1>éé</h1>" soup = self.soup(data) - self.assertEqual(u"éé", soup.h1.string) + self.assertEqual("éé", soup.h1.string) def test_embedded_null(self): - data = u"<h1>foo\0bar</h1>" + data = "<h1>foo\0bar</h1>" soup = self.soup(data) - self.assertEqual(u"foo\0bar", soup.h1.string) + self.assertEqual("foo\0bar", soup.h1.string) def test_exclude_encodings(self): - utf8_data = u"Räksmörgås".encode("utf-8") + utf8_data = "Räksmörgås".encode("utf-8") soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) self.assertEqual("windows-1252", soup.original_encoding) @@ -127,7 +127,7 @@ class TestConstructor(SoupTest): yield markup, None, None, False import re - self.assertRaisesRegexp( + self.assertRaisesRegex( ParserRejectedMarkup, "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.", BeautifulSoup, '', builder=Mock, @@ -318,7 +318,7 @@ class TestWarnings(SoupTest): with warnings.catch_warnings(record=True) as warning_list: # note - this url must differ from the bytes one otherwise # python's warnings system swallows the second warning - soup = self.soup(u"http://www.crummyunicode.com/") + soup = self.soup("http://www.crummyunicode.com/") warning = self._assert_warning( warning_list, MarkupResemblesLocatorWarning ) @@ -334,7 +334,7 @@ class TestWarnings(SoupTest): def test_url_warning_with_unicode_and_space(self): with warnings.catch_warnings(record=True) as warning_list: - soup = self.soup(u"http://www.crummyuncode.com/ is great") + soup = self.soup("http://www.crummyuncode.com/ is great") self.assertFalse(any("looks like a URL" in str(w.message) for w in warning_list)) @@ -356,9 +356,9 @@ class TestEntitySubstitution(unittest.TestCase): def test_simple_html_substitution(self): # Unicode characters corresponding to named HTML entites # are substituted, and no others. - s = u"foo\u2200\N{SNOWMAN}\u00f5bar" + s = "foo\u2200\N{SNOWMAN}\u00f5bar" self.assertEqual(self.sub.substitute_html(s), - u"foo∀\N{SNOWMAN}õbar") + "foo∀\N{SNOWMAN}õbar") def test_smart_quote_substitution(self): # MS smart quotes are a common source of frustration, so we @@ -376,11 +376,11 @@ class TestEntitySubstitution(unittest.TestCase): # A few spot checks of our ability to recognize # special character sequences and convert them # to named entities. - ('⊧', u'\u22a7'), - ('𝔑', u'\U0001d511'), - ('≧̸', u'\u2267\u0338'), - ('¬', u'\xac'), - ('⫬', u'\u2aec'), + ('⊧', '\u22a7'), + ('𝔑', '\U0001d511'), + ('≧̸', '\u2267\u0338'), + ('¬', '\xac'), + ('⫬', '\u2aec'), # We _could_ convert | to &verbarr;, but we don't, because # | is an ASCII character. @@ -396,7 +396,7 @@ class TestEntitySubstitution(unittest.TestCase): ('<', '<'), ('&', '&'), ): - template = u'3 %s 4' + template = '3 %s 4' raw = template % u with_entities = template % entity self.assertEqual(self.sub.substitute_html(raw), with_entities) @@ -405,12 +405,12 @@ class TestEntitySubstitution(unittest.TestCase): # Some HTML5 entities correspond either to a single-character # Unicode sequence _or_ to the same character plus U+FE00, # VARIATION SELECTOR 1. We can handle this. - data = u"fjords \u2294 penguins" - markup = u"fjords ⊔ penguins" + data = "fjords \u2294 penguins" + markup = "fjords ⊔ penguins" self.assertEqual(self.sub.substitute_html(data), markup) - data = u"fjords \u2294\ufe00 penguins" - markup = u"fjords ⊔︀ penguins" + data = "fjords \u2294\ufe00 penguins" + markup = "fjords ⊔︀ penguins" self.assertEqual(self.sub.substitute_html(data), markup) def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self): @@ -468,7 +468,7 @@ class TestEncodingConversion(SoupTest): def setUp(self): super(TestEncodingConversion, self).setUp() - self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' + self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' self.utf8_data = self.unicode_data.encode("utf-8") # Just so you know what it looks like. self.assertEqual( @@ -488,7 +488,7 @@ class TestEncodingConversion(SoupTest): ascii = b"<foo>a</foo>" soup_from_ascii = self.soup(ascii) unicode_output = soup_from_ascii.decode() - self.assertTrue(isinstance(unicode_output, unicode)) + self.assertTrue(isinstance(unicode_output, str)) self.assertEqual(unicode_output, self.document_for(ascii.decode())) self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") finally: @@ -500,7 +500,7 @@ class TestEncodingConversion(SoupTest): # is not set. soup_from_unicode = self.soup(self.unicode_data) self.assertEqual(soup_from_unicode.decode(), self.unicode_data) - self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') + self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!') self.assertEqual(soup_from_unicode.original_encoding, None) def test_utf8_in_unicode_out(self): @@ -508,7 +508,7 @@ class TestEncodingConversion(SoupTest): # attribute is set. soup_from_utf8 = self.soup(self.utf8_data) self.assertEqual(soup_from_utf8.decode(), self.unicode_data) - self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') + self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!') def test_utf8_out(self): # The internal data structures can be encoded as UTF-8. @@ -519,7 +519,7 @@ class TestEncodingConversion(SoupTest): PYTHON_3_PRE_3_2, "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") def test_attribute_name_containing_unicode_characters(self): - markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>' + markup = '<div><a \N{SNOWMAN}="snowman"></a></div>' self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 26004ce..59b51d0 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -75,13 +75,13 @@ class TestFind(TreeTest): self.assertEqual(soup.find("b").string, "2") def test_unicode_text_find(self): - soup = self.soup(u'<h1>Räksmörgås</h1>') - self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås') + soup = self.soup('<h1>Räksmörgås</h1>') + self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås') def test_unicode_attribute_find(self): - soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>') + soup = self.soup('<h1 id="Räksmörgås">here it is</h1>') str(soup) - self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text) + self.assertEqual("here it is", soup.find(id='Räksmörgås').text) def test_find_everything(self): @@ -101,17 +101,17 @@ class TestFindAll(TreeTest): """You can search the tree for text nodes.""" soup = self.soup("<html>Foo<b>bar</b>\xbb</html>") # Exact match. - self.assertEqual(soup.find_all(string="bar"), [u"bar"]) - self.assertEqual(soup.find_all(text="bar"), [u"bar"]) + self.assertEqual(soup.find_all(string="bar"), ["bar"]) + self.assertEqual(soup.find_all(text="bar"), ["bar"]) # Match any of a number of strings. self.assertEqual( - soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) + soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"]) # Match a regular expression. self.assertEqual(soup.find_all(text=re.compile('.*')), - [u"Foo", u"bar", u'\xbb']) + ["Foo", "bar", '\xbb']) # Match anything. self.assertEqual(soup.find_all(text=True), - [u"Foo", u"bar", u'\xbb']) + ["Foo", "bar", '\xbb']) def test_find_all_limit(self): """You can limit the number of items returned by find_all.""" @@ -254,8 +254,8 @@ class TestFindAllByAttribute(TreeTest): ["Matching a.", "Matching b."]) def test_find_all_by_utf8_attribute_value(self): - peace = u"םולש".encode("utf8") - data = u'<a title="םולש"></a>'.encode("utf8") + peace = "םולש".encode("utf8") + data = '<a title="םולש"></a>'.encode("utf8") soup = self.soup(data) self.assertEqual([soup.a], soup.find_all(title=peace)) self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) @@ -444,7 +444,7 @@ class TestSmooth(TreeTest): # output. # Since the <span> tag has two children, its .string is None. - self.assertEquals(None, div.span.string) + self.assertEqual(None, div.span.string) self.assertEqual(7, len(div.contents)) div.smooth() @@ -755,18 +755,18 @@ class TestTag(SoupTest): # No list of whitespace-preserving tags -> pretty-print tag._preserve_whitespace_tags = None - self.assertEquals(True, tag._should_pretty_print(0)) + self.assertEqual(True, tag._should_pretty_print(0)) # List exists but tag is not on the list -> pretty-print tag.preserve_whitespace_tags = ["some_other_tag"] - self.assertEquals(True, tag._should_pretty_print(1)) + self.assertEqual(True, tag._should_pretty_print(1)) # Indent level is None -> don't pretty-print - self.assertEquals(False, tag._should_pretty_print(None)) + self.assertEqual(False, tag._should_pretty_print(None)) # Tag is on the whitespace-preserving list -> don't pretty-print tag.preserve_whitespace_tags = ["some_other_tag", "a_tag"] - self.assertEquals(False, tag._should_pretty_print(1)) + self.assertEqual(False, tag._should_pretty_print(1)) class TestTagCreation(SoupTest): @@ -905,10 +905,10 @@ class TestTreeModification(SoupTest): assert not isinstance(i, BeautifulSoup) p1, p2, p3, p4 = list(soup.children) - self.assertEquals("And now, a word:", p1.string) - self.assertEquals("p2", p2.string) - self.assertEquals("p3", p3.string) - self.assertEquals("And we're back.", p4.string) + self.assertEqual("And now, a word:", p1.string) + self.assertEqual("p2", p2.string) + self.assertEqual("p3", p3.string) + self.assertEqual("And we're back.", p4.string) def test_replace_with_maintains_next_element_throughout(self): @@ -1015,8 +1015,8 @@ class TestTreeModification(SoupTest): d1 = soup.find('div', id='d1') d2 = soup.find('div', id='d2') d2.extend(d1) - self.assertEqual(u'<div id="d1"></div>', d1.decode()) - self.assertEqual(u'<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode()) + self.assertEqual('<div id="d1"></div>', d1.decode()) + self.assertEqual('<div id="d2"><a>1</a><a>2</a><a>3</a><a>4</a></div>', d2.decode()) def test_move_tag_to_beginning_of_parent(self): data = "<a><b></b><c></c><d></d></a>" @@ -1293,7 +1293,7 @@ class TestTreeModification(SoupTest): <script>baz</script> </html>""") [soup.script.extract() for i in soup.find_all("script")] - self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body)) + self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body)) def test_extract_works_when_element_is_surrounded_by_identical_strings(self): @@ -1589,7 +1589,7 @@ class TestPersistence(SoupTest): soup = BeautifulSoup(b'<p> </p>', 'html.parser') encoding = soup.original_encoding copy = soup.__copy__() - self.assertEqual(u"<p> </p>", unicode(copy)) + self.assertEqual("<p> </p>", str(copy)) self.assertEqual(encoding, copy.original_encoding) def test_copy_preserves_builder_information(self): @@ -1619,14 +1619,14 @@ class TestPersistence(SoupTest): def test_unicode_pickle(self): # A tree containing Unicode characters can be pickled. - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) loaded = pickle.loads(dumped) self.assertEqual(loaded.decode(), soup.decode()) def test_copy_navigablestring_is_not_attached_to_tree(self): - html = u"<b>Foo<a></a></b><b>Bar</b>" + html = "<b>Foo<a></a></b><b>Bar</b>" soup = self.soup(html) s1 = soup.find(string="Foo") s2 = copy.copy(s1) @@ -1638,7 +1638,7 @@ class TestPersistence(SoupTest): self.assertEqual(None, s2.previous_element) def test_copy_navigablestring_subclass_has_same_type(self): - html = u"<b><!--Foo--></b>" + html = "<b><!--Foo--></b>" soup = self.soup(html) s1 = soup.string s2 = copy.copy(s1) @@ -1646,19 +1646,19 @@ class TestPersistence(SoupTest): self.assertTrue(isinstance(s2, Comment)) def test_copy_entire_soup(self): - html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end" + html = "<div><b>Foo<a></a></b><b>Bar</b></div>end" soup = self.soup(html) soup_copy = copy.copy(soup) self.assertEqual(soup, soup_copy) def test_copy_tag_copies_contents(self): - html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end" + html = "<div><b>Foo<a></a></b><b>Bar</b></div>end" soup = self.soup(html) div = soup.div div_copy = copy.copy(div) # The two tags look the same, and evaluate to equal. - self.assertEqual(unicode(div), unicode(div_copy)) + self.assertEqual(str(div), str(div_copy)) self.assertEqual(div, div_copy) # But they're not the same object. @@ -1674,17 +1674,17 @@ class TestPersistence(SoupTest): class TestSubstitutions(SoupTest): def test_default_formatter_is_minimal(self): - markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="minimal") # The < is converted back into < but the e-with-acute is left alone. self.assertEqual( decoded, self.document_for( - u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) def test_formatter_html(self): - markup = u"<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = "<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="html") self.assertEqual( @@ -1692,7 +1692,7 @@ class TestSubstitutions(SoupTest): self.document_for("<br/><b><<Sacré bleu!>></b>")) def test_formatter_html5(self): - markup = u"<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = "<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="html5") self.assertEqual( @@ -1700,49 +1700,49 @@ class TestSubstitutions(SoupTest): self.document_for("<br><b><<Sacré bleu!>></b>")) def test_formatter_minimal(self): - markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="minimal") # The < is converted back into < but the e-with-acute is left alone. self.assertEqual( decoded, self.document_for( - u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) def test_formatter_null(self): - markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter=None) # Neither the angle brackets nor the e-with-acute are converted. # This is not valid HTML, but it's what the user wanted. self.assertEqual(decoded, - self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) def test_formatter_custom(self): - markup = u"<b><foo></b><b>bar</b><br/>" + markup = "<b><foo></b><b>bar</b><br/>" soup = self.soup(markup) decoded = soup.decode(formatter = lambda x: x.upper()) # Instead of normal entity conversion code, the custom # callable is called on every string. self.assertEqual( decoded, - self.document_for(u"<b><FOO></b><b>BAR</b><br/>")) + self.document_for("<b><FOO></b><b>BAR</b><br/>")) def test_formatter_is_run_on_attribute_values(self): - markup = u'<a href="http://a.com?a=b&c=é">e</a>' + markup = '<a href="http://a.com?a=b&c=é">e</a>' soup = self.soup(markup) a = soup.a - expect_minimal = u'<a href="http://a.com?a=b&c=é">e</a>' + expect_minimal = '<a href="http://a.com?a=b&c=é">e</a>' self.assertEqual(expect_minimal, a.decode()) self.assertEqual(expect_minimal, a.decode(formatter="minimal")) - expect_html = u'<a href="http://a.com?a=b&c=é">e</a>' + expect_html = '<a href="http://a.com?a=b&c=é">e</a>' self.assertEqual(expect_html, a.decode(formatter="html")) self.assertEqual(markup, a.decode(formatter=None)) - expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>' + expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>' self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) def test_formatter_skips_script_tag_for_html_documents(self): @@ -1768,7 +1768,7 @@ class TestSubstitutions(SoupTest): # Everything outside the <pre> tag is reformatted, but everything # inside is left alone. self.assertEqual( - u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>', + '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>', soup.div.prettify()) def test_prettify_accepts_formatter_function(self): @@ -1778,14 +1778,14 @@ class TestSubstitutions(SoupTest): def test_prettify_outputs_unicode_by_default(self): soup = self.soup("<a></a>") - self.assertEqual(unicode, type(soup.prettify())) + self.assertEqual(str, type(soup.prettify())) def test_prettify_can_encode_data(self): soup = self.soup("<a></a>") self.assertEqual(bytes, type(soup.prettify("utf-8"))) def test_html_entity_substitution_off_by_default(self): - markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" + markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" soup = self.soup(markup) encoded = soup.b.encode("utf-8") self.assertEqual(encoded, markup.encode('utf-8')) @@ -1829,48 +1829,48 @@ class TestEncoding(SoupTest): """Test the ability to encode objects into strings.""" def test_unicode_string_can_be_encoded(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual(soup.b.string.encode("utf-8"), - u"\N{SNOWMAN}".encode("utf-8")) + "\N{SNOWMAN}".encode("utf-8")) def test_tag_containing_unicode_string_can_be_encoded(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual( soup.b.encode("utf-8"), html.encode("utf-8")) def test_encoding_substitutes_unrecognized_characters_by_default(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual(soup.b.encode("ascii"), b"<b>☃</b>") def test_encoding_can_be_made_strict(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertRaises( UnicodeEncodeError, soup.encode, "ascii", errors="strict") def test_decode_contents(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) - self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents()) + self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents()) def test_encode_contents(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual( - u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( + "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( encoding="utf8")) def test_deprecated_renderContents(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual( - u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) + "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) def test_repr(self): - html = u"<b>\N{SNOWMAN}</b>" + html = "<b>\N{SNOWMAN}</b>" soup = self.soup(html) if PY3K: self.assertEqual(html, repr(soup)) @@ -1952,7 +1952,7 @@ class TestSoupSelector(TreeTest): els = self.soup.select('title') self.assertEqual(len(els), 1) self.assertEqual(els[0].name, 'title') - self.assertEqual(els[0].contents, [u'The title']) + self.assertEqual(els[0].contents, ['The title']) def test_one_tag_many(self): els = self.soup.select('div') @@ -1998,7 +1998,7 @@ class TestSoupSelector(TreeTest): self.assertEqual(dashed[0]['id'], 'dash2') def test_dashed_tag_text(self): - self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.') + self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.') def test_select_dashed_matches_find_all(self): self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag')) @@ -2184,12 +2184,12 @@ class TestSoupSelector(TreeTest): # Try to select first paragraph els = self.soup.select('div#inner p:nth-of-type(1)') self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, u'Some text') + self.assertEqual(els[0].string, 'Some text') # Try to select third paragraph els = self.soup.select('div#inner p:nth-of-type(3)') self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, u'Another') + self.assertEqual(els[0].string, 'Another') # Try to select (non-existent!) fourth paragraph els = self.soup.select('div#inner p:nth-of-type(4)') @@ -2202,7 +2202,7 @@ class TestSoupSelector(TreeTest): def test_nth_of_type_direct_descendant(self): els = self.soup.select('div#inner > p:nth-of-type(1)') self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, u'Some text') + self.assertEqual(els[0].string, 'Some text') def test_id_child_selector_nth_of_type(self): self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) @@ -2283,7 +2283,7 @@ class TestSoupSelector(TreeTest): markup = '<div class="c1"/><div class="c2"/><div class="c1"/>' soup = BeautifulSoup(markup, 'html.parser') selected = soup.select(".c1, .c2") - self.assertEquals(3, len(selected)) + self.assertEqual(3, len(selected)) # Verify that find_all finds the same elements, though because # of an implementation detail it finds them in a different |