diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-08-12 12:02:18 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-08-12 12:02:18 -0400 |
commit | c22a10a31aa0bc6671ce22509dade90496736b32 (patch) | |
tree | 8916b29ca7e2bcc6fd7315068444cb88147db7be | |
parent | b7fae1bd115492eb489359715ed74a742e664f46 (diff) |
All find_all calls should now return a ResultSet object. Patch by
Aaron DeVore. [bug=1194034]
-rw-r--r-- | NEWS.txt | 20 | ||||
-rw-r--r-- | bs4/element.py | 24 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 13 |
3 files changed, 37 insertions, 20 deletions
@@ -1,14 +1,15 @@ = 4.3.0 (Unreleased) = * Instead of converting incoming data to Unicode and feeding it to the - lxml tree builder, Beautiful Soup now makes successive guesses at - the encoding of the incoming data, and tells lxml to parse the data - as that encoding. Giving lxml more control over the parsing process - improves performance and avoids a number of bugs and issues with the - lxml parser which had previously required elaborate workarounds: + lxml tree builder in chunks, Beautiful Soup now makes successive + guesses at the encoding of the incoming data, and tells lxml to + parse the data as that encoding. Giving lxml more control over the + parsing process improves performance and avoids a number of bugs and + issues with the lxml parser which had previously required elaborate + workarounds: - - An issue in which lxml refuses to parse Unicode strings. - [bug=1180527] + - An issue in which lxml refuses to parse Unicode strings on some + systems. [bug=1180527] - A returning bug that truncated documents longer than a (very small) size. [bug=963880] @@ -26,12 +27,15 @@ undocumented features have also been removed. * Beautiful Soup will issue a warning if instead of markup you pass it - a URL or the name of a file on disk (a common beginner mistake). + a URL or the name of a file on disk (a common beginner's mistake). * A number of optimizations improve the performance of the lxml tree builder by about 33%, the html.parser tree builder by about 20%, and the html5lib tree builder by about 15%. +* All find_all calls should now return a ResultSet object. Patch by + Aaron DeVore. [bug=1194034] + = 4.2.1 (20130531) = * The default XML formatter will now replace ampersands even if they diff --git a/bs4/element.py b/bs4/element.py index f248895..2484853 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -477,20 +477,20 @@ class PageElement(object): if isinstance(name, SoupStrainer): strainer = name - elif text is None and not limit and not attrs and not kwargs: + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + + if text is None and not limit and not attrs and not kwargs: # Optimization to find all tags. if name is True or name is None: - return [element for element in generator - if isinstance(element, Tag)] + result = (element for element in generator + if isinstance(element, Tag)) + ResultSet(strainer, result) # Optimization to find all tags with a given name. elif isinstance(name, basestring): - return [element for element in generator - if isinstance(element, Tag) and element.name == name] - else: - strainer = SoupStrainer(name, attrs, text, **kwargs) - else: - # Build a SoupStrainer - strainer = SoupStrainer(name, attrs, text, **kwargs) + result = (element for element in generator + if isinstance(element, Tag) + and element.name == name) results = ResultSet(strainer) while True: try: @@ -1602,6 +1602,6 @@ class SoupStrainer(object): class ResultSet(list): """A ResultSet is just a list that keeps track of the SoupStrainer that created it.""" - def __init__(self, source): - list.__init__([]) + def __init__(self, source, result=()): + super(list, self).__init__(result) self.source = source diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 0acc092..1c2c93b 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -115,6 +115,19 @@ class TestFindAll(TreeTest): # recursion. self.assertEqual([], soup.find_all(l)) + def test_find_all_resultset(self): + """All find_all calls return a ResultSet""" + soup = self.soup("<a></a>") + result = soup.find_all("a") + self.assertTrue(hasattr(result, "source")) + + result = soup.find_all(True) + self.assertTrue(hasattr(result, "source")) + + result = soup.find_all(text="foo") + self.assertTrue(hasattr(result, "source")) + + class TestFindAllBasicNamespaces(TreeTest): def test_find_by_namespaced_name(self): |