diff options
Diffstat (limited to 'bs4/tests')
-rw-r--r-- | bs4/tests/test_html5lib.py | 11 | ||||
-rw-r--r-- | bs4/tests/test_soup.py | 40 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 45 |
3 files changed, 79 insertions, 17 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py index 65536c2..8e3cba6 100644 --- a/bs4/tests/test_html5lib.py +++ b/bs4/tests/test_html5lib.py @@ -84,6 +84,17 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) self.assertEqual(2, len(soup.find_all('p'))) + def test_reparented_markup_containing_identical_whitespace_nodes(self): + """Verify that we keep the two whitespace nodes in this + document distinct when reparenting the adjacent <tbody> tags. + """ + markup = '<table> <tbody><tbody><ims></tbody> </table>' + soup = self.soup(markup) + space1, space2 = soup.find_all(string=' ') + tbody1, tbody2 = soup.find_all('tbody') + assert space1.next_element is tbody1 + assert tbody2.next_element is space2 + def test_processing_instruction(self): """Processing instructions become comments.""" markup = b"""<?PITarget PIContent?>""" diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index 1238af2..f3e69ed 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -35,7 +35,6 @@ try: except ImportError, e: LXML_PRESENT = False -PYTHON_2_PRE_2_7 = (sys.version_info < (2,7)) PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) class TestConstructor(SoupTest): @@ -77,7 +76,7 @@ class TestWarnings(SoupTest): def test_no_warning_if_explicit_parser_specified(self): with warnings.catch_warnings(record=True) as w: soup = self.soup("<a><b></b></a>", "html.parser") - self.assertEquals([], w) + self.assertEqual([], w) def test_parseOnlyThese_renamed_to_parse_only(self): with warnings.catch_warnings(record=True) as w: @@ -118,15 +117,34 @@ class TestWarnings(SoupTest): soup = self.soup(filename) self.assertEqual(0, len(w)) - def test_url_warning(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("http://www.crummy.com/") - msg = str(w[0].message) - self.assertTrue("looks like a URL" in msg) + def test_url_warning_with_bytes_url(self): + with warnings.catch_warnings(record=True) as warning_list: + soup = self.soup(b"http://www.crummybytes.com/") + # Be aware this isn't the only warning that can be raised during + # execution.. + self.assertTrue(any("looks like a URL" in str(w.message) + for w in warning_list)) + + def test_url_warning_with_unicode_url(self): + with warnings.catch_warnings(record=True) as warning_list: + # note - this url must differ from the bytes one otherwise + # python's warnings system swallows the second warning + soup = self.soup(u"http://www.crummyunicode.com/") + self.assertTrue(any("looks like a URL" in str(w.message) + for w in warning_list)) + + def test_url_warning_with_bytes_and_space(self): + with warnings.catch_warnings(record=True) as warning_list: + soup = self.soup(b"http://www.crummybytes.com/ is great") + self.assertFalse(any("looks like a URL" in str(w.message) + for w in warning_list)) + + def test_url_warning_with_unicode_and_space(self): + with warnings.catch_warnings(record=True) as warning_list: + soup = self.soup(u"http://www.crummyuncode.com/ is great") + self.assertFalse(any("looks like a URL" in str(w.message) + for w in warning_list)) - with warnings.catch_warnings(record=True) as w: - soup = self.soup("http://www.crummy.com/ is great") - self.assertEqual(0, len(w)) class TestSelectiveParsing(SoupTest): @@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest): self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data) @skipIf( - PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2, + PYTHON_3_PRE_3_2, "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") def test_attribute_name_containing_unicode_characters(self): markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>' diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 6b2a123..a4fe0b1 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -222,6 +222,17 @@ class TestFindAllByName(TreeTest): self.assertSelects( tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) + def test_find_with_multi_valued_attribute(self): + soup = self.soup( + "<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>" + ) + r1 = soup.find('div', 'a d'); + r2 = soup.find('div', re.compile(r'a d')); + r3, r4 = soup.find_all('div', ['a b', 'a d']); + self.assertEqual('3', r1.string) + self.assertEqual('3', r2.string) + self.assertEqual('1', r3.string) + self.assertEqual('3', r4.string) class TestFindAllByAttribute(TreeTest): @@ -294,10 +305,10 @@ class TestFindAllByAttribute(TreeTest): f = tree.find_all("gar", class_=re.compile("a")) self.assertSelects(f, ["Found it"]) - # Since the class is not the string "foo bar", but the two - # strings "foo" and "bar", this will not find anything. + # If the search fails to match the individual strings "foo" and "bar", + # it will be tried against the combined string "foo bar". f = tree.find_all("gar", class_=re.compile("o b")) - self.assertSelects(f, []) + self.assertSelects(f, ["Found it"]) def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self): soup = self.soup("<a class='bar'>Found it</a>") @@ -335,7 +346,7 @@ class TestFindAllByAttribute(TreeTest): strainer = SoupStrainer(attrs={'id' : 'first'}) self.assertSelects(tree.find_all(strainer), ['Match.']) - def test_find_all_with_missing_atribute(self): + def test_find_all_with_missing_attribute(self): # You can pass in None as the value of an attribute to find_all. # This will match tags that do not have that attribute set. tree = self.soup("""<a id="1">ID present.</a> @@ -1328,6 +1339,13 @@ class TestPersistence(SoupTest): copied = copy.deepcopy(self.tree) self.assertEqual(copied.decode(), self.tree.decode()) + def test_copy_preserves_encoding(self): + soup = BeautifulSoup(b'<p> </p>', 'html.parser') + encoding = soup.original_encoding + copy = soup.__copy__() + self.assertEqual(u"<p> </p>", unicode(copy)) + self.assertEqual(encoding, copy.original_encoding) + def test_unicode_pickle(self): # A tree containing Unicode characters can be pickled. html = u"<b>\N{SNOWMAN}</b>" @@ -1676,8 +1694,8 @@ class TestSoupSelector(TreeTest): def setUp(self): self.soup = BeautifulSoup(self.HTML, 'html.parser') - def assertSelects(self, selector, expected_ids): - el_ids = [el['id'] for el in self.soup.select(selector)] + def assertSelects(self, selector, expected_ids, **kwargs): + el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)] el_ids.sort() expected_ids.sort() self.assertEqual(expected_ids, el_ids, @@ -1720,6 +1738,13 @@ class TestSoupSelector(TreeTest): for selector in ('html div', 'html body div', 'body div'): self.assertSelects(selector, ['data1', 'main', 'inner', 'footer']) + + def test_limit(self): + self.assertSelects('html div', ['main'], limit=1) + self.assertSelects('html body div', ['inner', 'main'], limit=2) + self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'], + limit=10) + def test_tag_no_match(self): self.assertEqual(len(self.soup.select('del')), 0) @@ -1902,6 +1927,14 @@ class TestSoupSelector(TreeTest): ('div[data-tag]', ['data1']) ) + def test_quoted_space_in_selector_name(self): + html = """<div style="display: wrong">nope</div> + <div style="display: right">yes</div> + """ + soup = BeautifulSoup(html, 'html.parser') + [chosen] = soup.select('div[style="display: right"]') + self.assertEqual("yes", chosen.string) + def test_unsupported_pseudoclass(self): self.assertRaises( NotImplementedError, self.soup.select, "a:no-such-pseudoclass") |