From 36a4d3f2c6b7ddb967d885ba36f850a668029d9e Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 12 Sep 2021 20:59:43 -0400 Subject: Ported unit tests to use pytest. --- bs4/tests/test_tree.py | 1160 ++++++++++++++++++++++++------------------------ 1 file changed, 573 insertions(+), 587 deletions(-) (limited to 'bs4/tests/test_tree.py') diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 59b51d0..aab773f 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -12,6 +12,7 @@ methods tested here. from pdb import set_trace import copy import pickle +import pytest import re import warnings from bs4 import BeautifulSoup @@ -44,23 +45,23 @@ LXML_PRESENT = (builder_registry.lookup("lxml") is not None) class TreeTest(SoupTest): - def assertSelects(self, tags, should_match): + def assert_selects(self, tags, should_match): """Make sure that the given tags have the correct text. This is used in tests that define a bunch of tags, each containing a single string, and then select certain strings by some mechanism. """ - self.assertEqual([tag.string for tag in tags], should_match) + assert [tag.string for tag in tags] == should_match - def assertSelectsIDs(self, tags, should_match): + def assert_selects_ids(self, tags, should_match): """Make sure that the given tags have the correct IDs. This is used in tests that define a bunch of tags, each containing a single string, and then select certain strings by some mechanism. """ - self.assertEqual([tag['id'] for tag in tags], should_match) + assert [tag['id'] for tag in tags] == should_match class TestFind(TreeTest): @@ -72,27 +73,27 @@ class TestFind(TreeTest): def test_find_tag(self): soup = self.soup("1234") - self.assertEqual(soup.find("b").string, "2") + assert soup.find("b").string == "2" def test_unicode_text_find(self): soup = self.soup('

Räksmörgås

') - self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås') + assert soup.find(string='Räksmörgås') == 'Räksmörgås' def test_unicode_attribute_find(self): soup = self.soup('

here it is

') str(soup) - self.assertEqual("here it is", soup.find(id='Räksmörgås').text) + assert "here it is" == soup.find(id='Räksmörgås').text def test_find_everything(self): """Test an optimization that finds all tags.""" soup = self.soup("foobar") - self.assertEqual(2, len(soup.find_all())) + assert 2 == len(soup.find_all()) def test_find_everything_with_name(self): """Test an optimization that finds all tags with a given name.""" soup = self.soup("foobarbaz") - self.assertEqual(2, len(soup.find_all('a'))) + assert 2 == len(soup.find_all('a')) class TestFindAll(TreeTest): """Basic tests of the find_all() method.""" @@ -101,34 +102,31 @@ class TestFindAll(TreeTest): """You can search the tree for text nodes.""" soup = self.soup("Foobar\xbb") # Exact match. - self.assertEqual(soup.find_all(string="bar"), ["bar"]) - self.assertEqual(soup.find_all(text="bar"), ["bar"]) + assert soup.find_all(string="bar") == ["bar"] + assert soup.find_all(text="bar") == ["bar"] # Match any of a number of strings. - self.assertEqual( - soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"]) + assert soup.find_all(text=["Foo", "bar"]) == ["Foo", "bar"] # Match a regular expression. - self.assertEqual(soup.find_all(text=re.compile('.*')), - ["Foo", "bar", '\xbb']) + assert soup.find_all(text=re.compile('.*')) == ["Foo", "bar", '\xbb'] # Match anything. - self.assertEqual(soup.find_all(text=True), - ["Foo", "bar", '\xbb']) + assert soup.find_all(text=True) == ["Foo", "bar", '\xbb'] def test_find_all_limit(self): """You can limit the number of items returned by find_all.""" soup = self.soup("12345") - self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"]) - self.assertSelects(soup.find_all('a', limit=1), ["1"]) - self.assertSelects( + self.assert_selects(soup.find_all('a', limit=3), ["1", "2", "3"]) + self.assert_selects(soup.find_all('a', limit=1), ["1"]) + self.assert_selects( soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"]) # A limit of 0 means no limit. - self.assertSelects( + self.assert_selects( soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"]) def test_calling_a_tag_is_calling_findall(self): soup = self.soup("123") - self.assertSelects(soup('a', limit=1), ["1"]) - self.assertSelects(soup.b(id="foo"), ["3"]) + self.assert_selects(soup('a', limit=1), ["1"]) + self.assert_selects(soup.b(id="foo"), ["3"]) def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self): soup = self.soup("") @@ -138,79 +136,78 @@ class TestFindAll(TreeTest): # Without special code in _normalize_search_value, this would cause infinite # recursion. - self.assertEqual([], soup.find_all(l)) + assert [] == soup.find_all(l) def test_find_all_resultset(self): """All find_all calls return a ResultSet""" soup = self.soup("") result = soup.find_all("a") - self.assertTrue(hasattr(result, "source")) + assert hasattr(result, "source") result = soup.find_all(True) - self.assertTrue(hasattr(result, "source")) + assert hasattr(result, "source") result = soup.find_all(text="foo") - self.assertTrue(hasattr(result, "source")) + assert hasattr(result, "source") class TestFindAllBasicNamespaces(TreeTest): def test_find_by_namespaced_name(self): soup = self.soup('4') - self.assertEqual("4", soup.find("mathml:msqrt").string) - self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name) + assert "4" == soup.find("mathml:msqrt").string + assert "a" == soup.find(attrs= { "svg:fill" : "red" }).name class TestFindAllByName(TreeTest): """Test ways of finding tags by tag name.""" - def setUp(self): - super(TreeTest, self).setUp() + def setup_method(self): self.tree = self.soup("""First tag. Second tag. Third Nested tag. tag.""") def test_find_all_by_tag_name(self): # Find all the tags. - self.assertSelects( + self.assert_selects( self.tree.find_all('a'), ['First tag.', 'Nested tag.']) def test_find_all_by_name_and_text(self): - self.assertSelects( + self.assert_selects( self.tree.find_all('a', text='First tag.'), ['First tag.']) - self.assertSelects( + self.assert_selects( self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.']) - self.assertSelects( + self.assert_selects( self.tree.find_all('a', text=re.compile("tag")), ['First tag.', 'Nested tag.']) def test_find_all_on_non_root_element(self): # You can call find_all on any node, not just the root. - self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.']) + self.assert_selects(self.tree.c.find_all('a'), ['Nested tag.']) def test_calling_element_invokes_find_all(self): - self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.']) + self.assert_selects(self.tree('a'), ['First tag.', 'Nested tag.']) def test_find_all_by_tag_strainer(self): - self.assertSelects( + self.assert_selects( self.tree.find_all(SoupStrainer('a')), ['First tag.', 'Nested tag.']) def test_find_all_by_tag_names(self): - self.assertSelects( + self.assert_selects( self.tree.find_all(['a', 'b']), ['First tag.', 'Second tag.', 'Nested tag.']) def test_find_all_by_tag_dict(self): - self.assertSelects( + self.assert_selects( self.tree.find_all({'a' : True, 'b' : True}), ['First tag.', 'Second tag.', 'Nested tag.']) def test_find_all_by_tag_re(self): - self.assertSelects( + self.assert_selects( self.tree.find_all(re.compile('^[ab]$')), ['First tag.', 'Second tag.', 'Nested tag.']) @@ -224,7 +221,7 @@ class TestFindAllByName(TreeTest): Does not match. Match 2.""") - self.assertSelects( + self.assert_selects( tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) def test_find_with_multi_valued_attribute(self): @@ -234,10 +231,10 @@ class TestFindAllByName(TreeTest): r1 = soup.find('div', 'a d'); r2 = soup.find('div', re.compile(r'a d')); r3, r4 = soup.find_all('div', ['a b', 'a d']); - self.assertEqual('3', r1.string) - self.assertEqual('3', r2.string) - self.assertEqual('1', r3.string) - self.assertEqual('3', r4.string) + assert '3' == r1.string + assert '3' == r2.string + assert '1' == r3.string + assert '3' == r4.string class TestFindAllByAttribute(TreeTest): @@ -250,16 +247,16 @@ class TestFindAllByAttribute(TreeTest): Non-matching Matching b.a. """) - self.assertSelects(tree.find_all(id='first'), + self.assert_selects(tree.find_all(id='first'), ["Matching a.", "Matching b."]) def test_find_all_by_utf8_attribute_value(self): peace = "םולש".encode("utf8") data = ''.encode("utf8") soup = self.soup(data) - self.assertEqual([soup.a], soup.find_all(title=peace)) - self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) - self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"])) + assert [soup.a] == soup.find_all(title=peace) + assert [soup.a] == soup.find_all(title=peace.decode("utf8")) + assert [soup.a], soup.find_all(title=[peace, "something else"]) def test_find_all_by_attribute_dict(self): # You can pass in a dictionary as the argument 'attrs'. This @@ -273,13 +270,13 @@ class TestFindAllByAttribute(TreeTest): """) # This doesn't do what you want. - self.assertSelects(tree.find_all(name='name1'), + self.assert_selects(tree.find_all(name='name1'), ["A tag called 'name1'."]) # This does what you want. - self.assertSelects(tree.find_all(attrs={'name' : 'name1'}), + self.assert_selects(tree.find_all(attrs={'name' : 'name1'}), ["Name match."]) - self.assertSelects(tree.find_all(attrs={'class' : 'class2'}), + self.assert_selects(tree.find_all(attrs={'class' : 'class2'}), ["Class match."]) def test_find_all_by_class(self): @@ -292,57 +289,57 @@ class TestFindAllByAttribute(TreeTest): # Passing in the class_ keyword argument will search against # the 'class' attribute. - self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.']) - self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.']) - self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.']) + self.assert_selects(tree.find_all('a', class_='1'), ['Class 1.']) + self.assert_selects(tree.find_all('c', class_='3'), ['Class 3 and 4.']) + self.assert_selects(tree.find_all('c', class_='4'), ['Class 3 and 4.']) # Passing in a string to 'attrs' will also search the CSS class. - self.assertSelects(tree.find_all('a', '1'), ['Class 1.']) - self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.']) - self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.']) - self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.']) + self.assert_selects(tree.find_all('a', '1'), ['Class 1.']) + self.assert_selects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.']) + self.assert_selects(tree.find_all('c', '3'), ['Class 3 and 4.']) + self.assert_selects(tree.find_all('c', '4'), ['Class 3 and 4.']) def test_find_by_class_when_multiple_classes_present(self): tree = self.soup("Found it") f = tree.find_all("gar", class_=re.compile("o")) - self.assertSelects(f, ["Found it"]) + self.assert_selects(f, ["Found it"]) f = tree.find_all("gar", class_=re.compile("a")) - self.assertSelects(f, ["Found it"]) + self.assert_selects(f, ["Found it"]) # If the search fails to match the individual strings "foo" and "bar", # it will be tried against the combined string "foo bar". f = tree.find_all("gar", class_=re.compile("o b")) - self.assertSelects(f, ["Found it"]) + self.assert_selects(f, ["Found it"]) def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self): soup = self.soup("Found it") - self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"]) + self.assert_selects(soup.find_all("a", re.compile("ba")), ["Found it"]) def big_attribute_value(value): return len(value) > 3 - self.assertSelects(soup.find_all("a", big_attribute_value), []) + self.assert_selects(soup.find_all("a", big_attribute_value), []) def small_attribute_value(value): return len(value) <= 3 - self.assertSelects( + self.assert_selects( soup.find_all("a", small_attribute_value), ["Found it"]) def test_find_all_with_string_for_attrs_finds_multiple_classes(self): soup = self.soup('') a, a2 = soup.find_all("a") - self.assertEqual([a, a2], soup.find_all("a", "foo")) - self.assertEqual([a], soup.find_all("a", "bar")) + assert [a, a2], soup.find_all("a", "foo") + assert [a], soup.find_all("a", "bar") # If you specify the class as a string that contains a # space, only that specific value will be found. - self.assertEqual([a], soup.find_all("a", class_="foo bar")) - self.assertEqual([a], soup.find_all("a", "foo bar")) - self.assertEqual([], soup.find_all("a", "bar foo")) + assert [a] == soup.find_all("a", class_="foo bar") + assert [a] == soup.find_all("a", "foo bar") + assert [] == soup.find_all("a", "bar foo") def test_find_all_by_attribute_soupstrainer(self): tree = self.soup(""" @@ -350,7 +347,7 @@ class TestFindAllByAttribute(TreeTest): Non-match.""") strainer = SoupStrainer(attrs={'id' : 'first'}) - self.assertSelects(tree.find_all(strainer), ['Match.']) + self.assert_selects(tree.find_all(strainer), ['Match.']) def test_find_all_with_missing_attribute(self): # You can pass in None as the value of an attribute to find_all. @@ -358,7 +355,7 @@ class TestFindAllByAttribute(TreeTest): tree = self.soup("""ID present. No ID present. ID is empty.""") - self.assertSelects(tree.find_all('a', id=None), ["No ID present."]) + self.assert_selects(tree.find_all('a', id=None), ["No ID present."]) def test_find_all_with_defined_attribute(self): # You can pass in None as the value of an attribute to find_all. @@ -366,7 +363,7 @@ class TestFindAllByAttribute(TreeTest): tree = self.soup("""ID present. No ID present. ID is empty.""") - self.assertSelects( + self.assert_selects( tree.find_all(id=True), ["ID present.", "ID is empty."]) def test_find_all_with_numeric_attribute(self): @@ -375,8 +372,8 @@ class TestFindAllByAttribute(TreeTest): Quoted attribute.""") expected = ["Unquoted attribute.", "Quoted attribute."] - self.assertSelects(tree.find_all(id=1), expected) - self.assertSelects(tree.find_all(id="1"), expected) + self.assert_selects(tree.find_all(id=1), expected) + self.assert_selects(tree.find_all(id="1"), expected) def test_find_all_with_list_attribute_values(self): # You can pass a list of attribute values instead of just one, @@ -385,7 +382,7 @@ class TestFindAllByAttribute(TreeTest): 2 3 No ID.""") - self.assertSelects(tree.find_all(id=["1", "3", "4"]), + self.assert_selects(tree.find_all(id=["1", "3", "4"]), ["1", "3"]) def test_find_all_with_regular_expression_attribute_value(self): @@ -398,27 +395,26 @@ class TestFindAllByAttribute(TreeTest): One b. No ID.""") - self.assertSelects(tree.find_all(id=re.compile("^a+$")), + self.assert_selects(tree.find_all(id=re.compile("^a+$")), ["One a.", "Two as."]) def test_find_by_name_and_containing_string(self): soup = self.soup("foobarfoo") a = soup.a - self.assertEqual([a], soup.find_all("a", text="foo")) - self.assertEqual([], soup.find_all("a", text="bar")) - self.assertEqual([], soup.find_all("a", text="bar")) + assert [a] == soup.find_all("a", text="foo") + assert [] == soup.find_all("a", text="bar") def test_find_by_name_and_containing_string_when_string_is_buried(self): soup = self.soup("foofoo") - self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo")) + assert soup.find_all("a") == soup.find_all("a", text="foo") def test_find_by_attribute_and_containing_string(self): soup = self.soup('foofoo') a = soup.a - self.assertEqual([a], soup.find_all(id=2, text="foo")) - self.assertEqual([], soup.find_all(id=1, text="bar")) + assert [a] == soup.find_all(id=2, text="foo") + assert [] == soup.find_all(id=1, text="bar") class TestSmooth(TreeTest): @@ -444,25 +440,25 @@ class TestSmooth(TreeTest): # output. # Since the tag has two children, its .string is None. - self.assertEqual(None, div.span.string) + assert None == div.span.string - self.assertEqual(7, len(div.contents)) + assert 7 == len(div.contents) div.smooth() - self.assertEqual(5, len(div.contents)) + assert 5 == len(div.contents) # The three strings at the beginning of div.contents have been # merged into on string. # - self.assertEqual('abc', div.contents[0]) + assert 'abc' == div.contents[0] # The call is recursive -- the tag was also smoothed. - self.assertEqual('12', div.span.string) + assert '12' == div.span.string # The two comments have _not_ been merged, even though # comments are strings. Merging comments would change the # meaning of the HTML. - self.assertEqual('Comment 1', div.contents[1]) - self.assertEqual('Comment 2', div.contents[2]) + assert 'Comment 1' == div.contents[1] + assert 'Comment 2' == div.contents[2] class TestIndex(TreeTest): @@ -479,15 +475,15 @@ class TestIndex(TreeTest): """) div = tree.div for i, element in enumerate(div.contents): - self.assertEqual(i, div.index(element)) - self.assertRaises(ValueError, tree.index, 1) + assert i == div.index(element) + with pytest.raises(ValueError): + tree.index(1) class TestParentOperations(TreeTest): """Test navigation and searching through an element's parents.""" - def setUp(self): - super(TestParentOperations, self).setUp() + def setup_method(self): self.tree = self.soup('''