diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/element.py | 15 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 10 |
2 files changed, 21 insertions, 4 deletions
diff --git a/bs4/element.py b/bs4/element.py index 99a3540..6fb89ea 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -1287,15 +1287,24 @@ class SoupStrainer(object): result = markup and match_against.search(markup) elif (hasattr(match_against, '__iter__') and markup is not None - and not isinstance(match_against, basestring)): + and not isinstance(match_against, bytes) + and not isinstance(match_against, unicode)): result = markup in match_against elif hasattr(match_against, 'items'): if markup is None: result = len(match_against.items()) == 0 else: result = match_against in markup - elif match_against and isinstance(markup, basestring): - match_against = markup.__class__(match_against) + elif match_against is not None: + if isinstance(match_against, unicode): + # Unicode is fine. + pass + elif isinstance(match_against, bytes): + # A bytestring should be converted into Unicode. + match_against = match_against.decode("utf8") + else: + # Anything else should be converted into a string, then to Unicode. + match_against = str(match_against) if not result: result = match_against == markup diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 1e24c29..1bb479e 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -192,6 +192,14 @@ class TestFindAllByAttribute(TreeTest): self.assertSelects(tree.find_all(id='first'), ["Matching a.", "Matching b."]) + def test_find_all_by_utf8_attribute_value(self): + peace = u"םולש".encode("utf8") + data = u'<a title="םולש"></a>'.encode("utf8") + soup = self.soup(data) + self.assertEqual([soup.a], soup.find_all(title=peace)) + self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) + self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"])) + def test_find_all_by_attribute_dict(self): # You can pass in a dictionary as the argument 'attrs'. This # lets you search for attributes like 'name' (a fixed argument @@ -825,7 +833,7 @@ class TestTreeModification(SoupTest): data = "<a><b></b></a>" soup = self.soup(data) soup.a.append(soup.b) - self.assertEquals(data, soup.decode()) + self.assertEqual(data, soup.decode()) def test_move_tag_to_beginning_of_parent(self): data = "<a><b></b><c></c><d></d></a>" |