summaryrefslogtreecommitdiff
path: root/bs4/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/tests')
-rw-r--r--bs4/tests/test_html5lib.py11
-rw-r--r--bs4/tests/test_soup.py40
-rw-r--r--bs4/tests/test_tree.py45
3 files changed, 79 insertions, 17 deletions
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 65536c2..8e3cba6 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -84,6 +84,17 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
+ def test_reparented_markup_containing_identical_whitespace_nodes(self):
+ """Verify that we keep the two whitespace nodes in this
+ document distinct when reparenting the adjacent <tbody> tags.
+ """
+ markup = '<table> <tbody><tbody><ims></tbody> </table>'
+ soup = self.soup(markup)
+ space1, space2 = soup.find_all(string=' ')
+ tbody1, tbody2 = soup.find_all('tbody')
+ assert space1.next_element is tbody1
+ assert tbody2.next_element is space2
+
def test_processing_instruction(self):
"""Processing instructions become comments."""
markup = b"""<?PITarget PIContent?>"""
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 1238af2..f3e69ed 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -35,7 +35,6 @@ try:
except ImportError, e:
LXML_PRESENT = False
-PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestConstructor(SoupTest):
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
def test_no_warning_if_explicit_parser_specified(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", "html.parser")
- self.assertEquals([], w)
+ self.assertEqual([], w)
def test_parseOnlyThese_renamed_to_parse_only(self):
with warnings.catch_warnings(record=True) as w:
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
soup = self.soup(filename)
self.assertEqual(0, len(w))
- def test_url_warning(self):
- with warnings.catch_warnings(record=True) as w:
- soup = self.soup("http://www.crummy.com/")
- msg = str(w[0].message)
- self.assertTrue("looks like a URL" in msg)
+ def test_url_warning_with_bytes_url(self):
+ with warnings.catch_warnings(record=True) as warning_list:
+ soup = self.soup(b"http://www.crummybytes.com/")
+ # Be aware this isn't the only warning that can be raised during
+ # execution..
+ self.assertTrue(any("looks like a URL" in str(w.message)
+ for w in warning_list))
+
+ def test_url_warning_with_unicode_url(self):
+ with warnings.catch_warnings(record=True) as warning_list:
+ # note - this url must differ from the bytes one otherwise
+ # python's warnings system swallows the second warning
+ soup = self.soup(u"http://www.crummyunicode.com/")
+ self.assertTrue(any("looks like a URL" in str(w.message)
+ for w in warning_list))
+
+ def test_url_warning_with_bytes_and_space(self):
+ with warnings.catch_warnings(record=True) as warning_list:
+ soup = self.soup(b"http://www.crummybytes.com/ is great")
+ self.assertFalse(any("looks like a URL" in str(w.message)
+ for w in warning_list))
+
+ def test_url_warning_with_unicode_and_space(self):
+ with warnings.catch_warnings(record=True) as warning_list:
+ soup = self.soup(u"http://www.crummyuncode.com/ is great")
+ self.assertFalse(any("looks like a URL" in str(w.message)
+ for w in warning_list))
- with warnings.catch_warnings(record=True) as w:
- soup = self.soup("http://www.crummy.com/ is great")
- self.assertEqual(0, len(w))
class TestSelectiveParsing(SoupTest):
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
@skipIf(
- PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
+ PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 6b2a123..a4fe0b1 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -222,6 +222,17 @@ class TestFindAllByName(TreeTest):
self.assertSelects(
tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
+ def test_find_with_multi_valued_attribute(self):
+ soup = self.soup(
+ "<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
+ )
+ r1 = soup.find('div', 'a d');
+ r2 = soup.find('div', re.compile(r'a d'));
+ r3, r4 = soup.find_all('div', ['a b', 'a d']);
+ self.assertEqual('3', r1.string)
+ self.assertEqual('3', r2.string)
+ self.assertEqual('1', r3.string)
+ self.assertEqual('3', r4.string)
class TestFindAllByAttribute(TreeTest):
@@ -294,10 +305,10 @@ class TestFindAllByAttribute(TreeTest):
f = tree.find_all("gar", class_=re.compile("a"))
self.assertSelects(f, ["Found it"])
- # Since the class is not the string "foo bar", but the two
- # strings "foo" and "bar", this will not find anything.
+ # If the search fails to match the individual strings "foo" and "bar",
+ # it will be tried against the combined string "foo bar".
f = tree.find_all("gar", class_=re.compile("o b"))
- self.assertSelects(f, [])
+ self.assertSelects(f, ["Found it"])
def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
soup = self.soup("<a class='bar'>Found it</a>")
@@ -335,7 +346,7 @@ class TestFindAllByAttribute(TreeTest):
strainer = SoupStrainer(attrs={'id' : 'first'})
self.assertSelects(tree.find_all(strainer), ['Match.'])
- def test_find_all_with_missing_atribute(self):
+ def test_find_all_with_missing_attribute(self):
# You can pass in None as the value of an attribute to find_all.
# This will match tags that do not have that attribute set.
tree = self.soup("""<a id="1">ID present.</a>
@@ -1328,6 +1339,13 @@ class TestPersistence(SoupTest):
copied = copy.deepcopy(self.tree)
self.assertEqual(copied.decode(), self.tree.decode())
+ def test_copy_preserves_encoding(self):
+ soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
+ encoding = soup.original_encoding
+ copy = soup.__copy__()
+ self.assertEqual(u"<p> </p>", unicode(copy))
+ self.assertEqual(encoding, copy.original_encoding)
+
def test_unicode_pickle(self):
# A tree containing Unicode characters can be pickled.
html = u"<b>\N{SNOWMAN}</b>"
@@ -1676,8 +1694,8 @@ class TestSoupSelector(TreeTest):
def setUp(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
- def assertSelects(self, selector, expected_ids):
- el_ids = [el['id'] for el in self.soup.select(selector)]
+ def assertSelects(self, selector, expected_ids, **kwargs):
+ el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
el_ids.sort()
expected_ids.sort()
self.assertEqual(expected_ids, el_ids,
@@ -1720,6 +1738,13 @@ class TestSoupSelector(TreeTest):
for selector in ('html div', 'html body div', 'body div'):
self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
+
+ def test_limit(self):
+ self.assertSelects('html div', ['main'], limit=1)
+ self.assertSelects('html body div', ['inner', 'main'], limit=2)
+ self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
+ limit=10)
+
def test_tag_no_match(self):
self.assertEqual(len(self.soup.select('del')), 0)
@@ -1902,6 +1927,14 @@ class TestSoupSelector(TreeTest):
('div[data-tag]', ['data1'])
)
+ def test_quoted_space_in_selector_name(self):
+ html = """<div style="display: wrong">nope</div>
+ <div style="display: right">yes</div>
+ """
+ soup = BeautifulSoup(html, 'html.parser')
+ [chosen] = soup.select('div[style="display: right"]')
+ self.assertEqual("yes", chosen.string)
+
def test_unsupported_pseudoclass(self):
self.assertRaises(
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")