diff options
Diffstat (limited to 'bs4/testing.py')
-rw-r--r-- | bs4/testing.py | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/bs4/testing.py b/bs4/testing.py index 5b0eb7c..2f9046a 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -26,7 +26,7 @@ from bs4.element import ( from bs4.builder import HTMLParserTreeBuilder default_builder = HTMLParserTreeBuilder -BAD_DOCUMENT = u"""A bare string +BAD_DOCUMENT = """A bare string <!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd"> <!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd"> <div><![CDATA[A CDATA section where it doesn't belong]]></div> @@ -95,7 +95,7 @@ class SoupTest(unittest.TestCase): # Verify that every tag that was opened was eventually closed. # There are no tags in the open tag counter. - assert all(v==0 for v in obj.open_tag_counter.values()) + assert all(v==0 for v in list(obj.open_tag_counter.values())) # The only tag in the tag stack is the one for the root # document. @@ -407,7 +407,7 @@ class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): # process_markup correctly sets processing_instruction_class # even when the markup is already Unicode and there is no # need to process anything. - markup = u"""<?PITarget PIContent?>""" + markup = """<?PITarget PIContent?>""" soup = self.soup(markup) self.assertEqual(markup, soup.decode()) @@ -579,14 +579,14 @@ Hello, world! # "&T" and "&p" look like incomplete character entities, but they are # not. self.assertSoupEquals( - u"<p>• AT&T is in the s&p 500</p>", - u"<p>\u2022 AT&T is in the s&p 500</p>" + "<p>• AT&T is in the s&p 500</p>", + "<p>\u2022 AT&T is in the s&p 500</p>" ) def test_apos_entity(self): self.assertSoupEquals( - u"<p>Bob's Bar</p>", - u"<p>Bob's Bar</p>", + "<p>Bob's Bar</p>", + "<p>Bob's Bar</p>", ) def test_entities_in_foreign_document_encoding(self): @@ -599,17 +599,17 @@ Hello, world! # characters. markup = "<p>“Hello” -☃</p>" soup = self.soup(markup) - self.assertEquals(u"“Hello” -☃", soup.p.string) + self.assertEqual("“Hello” -☃", soup.p.string) def test_entities_in_attributes_converted_to_unicode(self): - expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>' + expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>' self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) def test_entities_in_text_converted_to_unicode(self): - expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>' + expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>' self.assertSoupEquals("<p>piñata</p>", expect) self.assertSoupEquals("<p>piñata</p>", expect) self.assertSoupEquals("<p>piñata</p>", expect) @@ -620,7 +620,7 @@ Hello, world! '<p>I said "good day!"</p>') def test_out_of_range_entity(self): - expect = u"\N{REPLACEMENT CHARACTER}" + expect = "\N{REPLACEMENT CHARACTER}" self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) @@ -698,9 +698,9 @@ Hello, world! # A seemingly innocuous document... but it's in Unicode! And # it contains characters that can't be represented in the # encoding found in the declaration! The horror! - markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>' + markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>' soup = self.soup(markup) - self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) + self.assertEqual('Sacr\xe9 bleu!', soup.body.string) def test_soupstrainer(self): """Parsers should be able to work with SoupStrainers.""" @@ -740,7 +740,7 @@ Hello, world! # Both XML and HTML entities are converted to Unicode characters # during parsing. text = "<p><<sacré bleu!>></p>" - expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>" + expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>" self.assertSoupEquals(text, expected) def test_smart_quotes_converted_on_the_way_in(self): @@ -750,15 +750,15 @@ Hello, world! soup = self.soup(quote) self.assertEqual( soup.p.string, - u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") + "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") def test_non_breaking_spaces_converted_on_the_way_in(self): soup = self.soup("<a> </a>") - self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) + self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) def test_entities_converted_on_the_way_out(self): text = "<p><<sacré bleu!>></p>" - expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8") + expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8") soup = self.soup(text) self.assertEqual(soup.p.encode("utf-8"), expected) @@ -767,7 +767,7 @@ Hello, world! # easy-to-understand document. # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. - unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>' + unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>' # That's because we're going to encode it into ISO-Latin-1, and use # that to test. @@ -883,8 +883,8 @@ Hello, world! soup = self.soup(markup) for encoding in PYTHON_SPECIFIC_ENCODINGS: if encoding in ( - u'idna', u'mbcs', u'oem', u'undefined', - u'string_escape', u'string-escape' + 'idna', 'mbcs', 'oem', 'undefined', + 'string_escape', 'string-escape' ): # For one reason or another, these will raise an # exception if we actually try to use them, so don't @@ -945,8 +945,8 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): soup = self.soup(markup) for encoding in PYTHON_SPECIFIC_ENCODINGS: if encoding in ( - u'idna', u'mbcs', u'oem', u'undefined', - u'string_escape', u'string-escape' + 'idna', 'mbcs', 'oem', 'undefined', + 'string_escape', 'string-escape' ): # For one reason or another, these will raise an # exception if we actually try to use them, so don't @@ -997,15 +997,15 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): self.assertTrue(b"< < hey > >" in encoded) def test_can_parse_unicode_document(self): - markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>' + markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>' soup = self.soup(markup) - self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) + self.assertEqual('Sacr\xe9 bleu!', soup.root.string) def test_popping_namespaced_tag(self): markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>' soup = self.soup(markup) self.assertEqual( - unicode(soup.rss), markup) + str(soup.rss), markup) def test_docstring_includes_correct_encoding(self): soup = self.soup("<root/>") @@ -1036,17 +1036,17 @@ class XMLTreeBuilderSmokeTest(TreeBuilderSmokeTest): def test_closing_namespaced_tag(self): markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>' soup = self.soup(markup) - self.assertEqual(unicode(soup.p), markup) + self.assertEqual(str(soup.p), markup) def test_namespaced_attributes(self): markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>' soup = self.soup(markup) - self.assertEqual(unicode(soup.foo), markup) + self.assertEqual(str(soup.foo), markup) def test_namespaced_attributes_xml_namespace(self): markup = '<foo xml:lang="fr">bar</foo>' soup = self.soup(markup) - self.assertEqual(unicode(soup.foo), markup) + self.assertEqual(str(soup.foo), markup) def test_find_by_prefixed_name(self): doc = """<?xml version="1.0" encoding="utf-8"?> |