diff options
-rw-r--r-- | CHANGELOG | 51 | ||||
-rw-r--r-- | beautifulsoup/__init__.py | 2 | ||||
-rw-r--r-- | beautifulsoup/builder/__init__.py | 98 | ||||
-rw-r--r-- | beautifulsoup/element.py | 180 | ||||
-rw-r--r-- | tests/test_tree.py | 136 |
5 files changed, 313 insertions, 154 deletions
@@ -1,5 +1,49 @@ = 4.0 = +== Better method names == + +Methods have been renamed to comply with PEP 8. The old names still +work. Here are the renames: + + * findAll -> find_all + * findAllNext -> find_all_next + * findAllPrevious -> find_all_previous + * findNext -> find_next + * findNextSibling -> find_next_sibling + * findNextSiblings -> find_next_siblings + * findParent -> find_parent + * findParents -> find_parents + * findPrevious -> find_previous + * findPreviousSibling -> find_previous_sibling + * findPreviousSiblings -> find_previous_siblings + +== Generators are now properties == + +The generators have been given more sensible (and PEP 8-compliant) +names, and turned into properties: + + * childGenerator() -> children + * nextGenerator() -> next_elements + * nextSiblingGenerator() -> next_siblings + * previousGenerator() -> previous_elements + * previousSiblingGenerator() -> previous_siblings + * recursiveChildGenerator() -> recursive_children + * parentGenerator() -> parents + +So instead of this: + + for parent in tag.parentGenerator(): + ... + +You can write this: + + for parent in tag.parents: + ... + +(But the old code will still work.) + +== tag.string is recursive == + tag.string now operates recursively. If tag A contains a single tag B and nothing else, then A.string is the same as B.string. So: @@ -7,6 +51,13 @@ and nothing else, then A.string is the same as B.string. So: The value of a.string used to be None, and now it's "foo". +== Entities are always converted to Unicode == + +An HTML or XML entity is always converted into the corresponding +Unicode character. There are no longer any smartQuotesTo or +convertEntities arguments. (Unicode Dammit still has smartQuotesTo, +though that may change.) + = 3.1.0 = A hybrid version that supports 2.4 and can be automatically converted diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py index ddf51f9..e23c9d9 100644 --- a/beautifulsoup/__init__.py +++ b/beautifulsoup/__init__.py @@ -171,7 +171,7 @@ class BeautifulStoneSoup(Tag): else: dammit = UnicodeDammit\ (markup, [self.fromEncoding, inDocumentEncoding], - smartQuotesTo=self.builder.smart_quotes_to, isHTML=isHTML) + isHTML=isHTML) markup = dammit.unicode self.originalEncoding = dammit.originalEncoding self.declaredHTMLEncoding = dammit.declaredHTMLEncoding diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py new file mode 100644 index 0000000..86de5ec --- /dev/null +++ b/beautifulsoup/builder/__init__.py @@ -0,0 +1,98 @@ +from beautifulsoup.element import Entities + +__all__ = [ + 'HTMLTreeBuilder', + 'SAXTreeBuilder', + 'TreeBuilder', + ] + + +class TreeBuilder(Entities): + """Turn a document into a Beautiful Soup object tree.""" + + assume_html = False + + def __init__(self): + self.soup = None + + def isSelfClosingTag(self, name): + return name in self.self_closing_tags + + def reset(self): + pass + + def feed(self, markup): + raise NotImplementedError() + + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty <head> tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + """ + return fragment + + +class SAXTreeBuilder(TreeBuilder): + """A Beautiful Soup treebuilder that listens for SAX events.""" + + def feed(self, markup): + raise NotImplementedError() + + def close(self): + pass + + def startElement(self, name, attrs): + attrs = dict((key[1], value) for key, value in attrs.items()) + #print "Start %s, %r" % (name, attrs) + self.soup.handle_starttag(name, attrs) + + def endElement(self, name): + #print "End %s" % name + self.soup.handle_endtag(name) + + def startElementNS(self, nsTuple, nodeName, attrs): + # Throw away (ns, nodeName) for now. + self.startElement(nodeName, attrs) + + def endElementNS(self, nsTuple, nodeName): + # Throw away (ns, nodeName) for now. + self.endElement(nodeName) + #handler.endElementNS((ns, node.nodeName), node.nodeName) + + def startPrefixMapping(self, prefix, nodeValue): + # Ignore the prefix for now. + pass + + def endPrefixMapping(self, prefix): + # Ignore the prefix for now. + # handler.endPrefixMapping(prefix) + pass + + def characters(self, content): + self.soup.handle_data(content) + + def startDocument(self): + pass + + def endDocument(self): + pass + + +class HTMLTreeBuilder(TreeBuilder): + """This TreeBuilder knows facts about HTML. + + Such as which tags are self-closing tags. + """ + + assume_html = True + + preserve_whitespace_tags = set(['pre', 'textarea']) + self_closing_tags = set(['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py index 8749114..39e0e06 100644 --- a/beautifulsoup/element.py +++ b/beautifulsoup/element.py @@ -161,77 +161,88 @@ class PageElement: """Appends the given tag to the contents of this tag.""" self.insert(len(self.contents), tag) - def findNext(self, name=None, attrs={}, text=None, **kwargs): + def find_next(self, name=None, attrs={}, text=None, **kwargs): """Returns the first item that matches the given criteria and appears after this Tag in the document.""" - return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + return self._findOne(self.find_all_next, name, attrs, text, **kwargs) + findNext = find_next # BS3 - def findAllNext(self, name=None, attrs={}, text=None, limit=None, + def find_all_next(self, name=None, attrs={}, text=None, limit=None, **kwargs): """Returns all items that match the given criteria and appear after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.nextGenerator, + return self._find_all(name, attrs, text, limit, self.next_elements, **kwargs) + findAllNext = find_all_next # BS3 - def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs): """Returns the closest sibling to this Tag that matches the given criteria and appears after this Tag in the document.""" - return self._findOne(self.findNextSiblings, name, attrs, text, + return self._findOne(self.find_next_siblings, name, attrs, text, **kwargs) + findNextSibling = find_next_sibling # BS3 - def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, - **kwargs): + def find_next_siblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): """Returns the siblings of this Tag that match the given criteria and appear after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.nextSiblingGenerator, **kwargs) - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + return self._find_all(name, attrs, text, limit, + self.next_siblings, **kwargs) + findNextSiblings = find_next_siblings # BS3 + fetchNextSiblings = find_next_siblings # BS2 - def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + def find_previous(self, name=None, attrs={}, text=None, **kwargs): """Returns the first item that matches the given criteria and appears before this Tag in the document.""" - return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + return self._findOne( + self.find_all_previous, name, attrs, text, **kwargs) + findPrevious = find_previous # BS3 - def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + def find_all_previous(self, name=None, attrs={}, text=None, limit=None, **kwargs): """Returns all items that match the given criteria and appear before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.previousGenerator, + return self._find_all(name, attrs, text, limit, self.previous_elements, **kwargs) - fetchPrevious = findAllPrevious # Compatibility with pre-3.x + findAllPrevious = find_all_previous # BS3 + fetchPrevious = find_all_previous # BS2 - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs): """Returns the closest sibling to this Tag that matches the given criteria and appears before this Tag in the document.""" - return self._findOne(self.findPreviousSiblings, name, attrs, text, + return self._findOne(self.find_previous_siblings, name, attrs, text, **kwargs) + findPreviousSibling = find_previous_sibling # BS3 - def findPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None, **kwargs): + def find_previous_siblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): """Returns the siblings of this Tag that match the given criteria and appear before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.previousSiblingGenerator, **kwargs) - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + return self._find_all(name, attrs, text, limit, + self.previous_siblings, **kwargs) + findPreviousSiblings = find_previous_siblings # BS3 + fetchPreviousSiblings = find_previous_siblings # BS2 - def findParent(self, name=None, attrs={}, **kwargs): + def find_parent(self, name=None, attrs={}, **kwargs): """Returns the closest parent of this Tag that matches the given criteria.""" # NOTE: We can't use _findOne because findParents takes a different # set of arguments. r = None - l = self.findParents(name, attrs, 1) + l = self.find_parents(name, attrs, 1) if l: r = l[0] return r + findParent = find_parent # BS3 - def findParents(self, name=None, attrs={}, limit=None, **kwargs): + def find_parents(self, name=None, attrs={}, limit=None, **kwargs): """Returns the parents of this Tag that match the given criteria.""" - return self._findAll(name, attrs, None, limit, self.parentGenerator, + return self._find_all(name, attrs, None, limit, self.parents, **kwargs) - fetchParents = findParents # Compatibility with pre-3.x + findParents = find_parents # BS3 + fetchParents = find_parents # BS2 #These methods do the real heavy lifting. @@ -242,7 +253,7 @@ class PageElement: r = l[0] return r - def _findAll(self, name, attrs, text, limit, generator, **kwargs): + def _find_all(self, name, attrs, text, limit, generator, **kwargs): "Iterates over a generator looking for things that match." if isinstance(name, SoupStrainer): @@ -251,10 +262,9 @@ class PageElement: # Build a SoupStrainer strainer = SoupStrainer(name, attrs, text, **kwargs) results = ResultSet(strainer) - g = generator() while True: try: - i = g.next() + i = generator.next() except StopIteration: break if i: @@ -265,38 +275,60 @@ class PageElement: break return results - #These Generators can be used to navigate starting from both + #These generators can be used to navigate starting from both #NavigableStrings and Tags. - def nextGenerator(self): + @property + def next_elements(self): i = self while i: i = i.next yield i - def nextSiblingGenerator(self): + @property + def next_siblings(self): i = self while i: i = i.nextSibling yield i - def previousGenerator(self): + @property + def previous_elements(self): i = self while i: i = i.previous yield i - def previousSiblingGenerator(self): + @property + def previous_siblings(self): i = self while i: i = i.previousSibling yield i - def parentGenerator(self): + @property + def parents(self): i = self while i: i = i.parent yield i + # Old non-property versions of the generators, for backwards + # compatibility with BS3. + def nextGenerator(self): + return self.next_elements + + def nextSiblingGenerator(self): + return self.next_siblings + + def previousGenerator(self): + return self.previous_elements + + def previousSiblingGenerator(self): + return self.previous_siblings + + def parentGenerator(self): + return self.parents + # Utility methods def substituteEncoding(self, str, encoding=None): encoding = encoding or "utf-8" @@ -389,37 +421,12 @@ class Tag(PageElement, Entities): """Represents a found HTML tag with its attributes and contents.""" - def _convertEntities(self, builder, match): - """Used in a call to re.sub to replace HTML, XML, and numeric - entities with the appropriate Unicode characters. If HTML - entities are being converted, any unrecognized entities are - escaped.""" - x = match.group(1) - if builder.convert_html_entities and x in name2codepoint: - return unichr(name2codepoint[x]) - elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if builder.convert_xml_entities: - return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] - else: - return u'&%s;' % x - elif len(x) > 0 and x[0] == '#': - # Handle numeric entities - if len(x) > 1 and x[1] == 'x': - return unichr(int(x[2:], 16)) - else: - return unichr(int(x[1:])) - - elif self.escapeUnrecognizedEntities: - return u'&%s;' % x - else: - return u'&%s;' % x - def __init__(self, parser, builder, name, attrs=None, parent=None, previous=None): "Basic constructor." # We don't actually store the parser object: that lets extracted - # chunks be garbage-collected + # chunks be garbage-collected. self.parserClass = parser.__class__ self.name = name self.isSelfClosing = builder.isSelfClosingTag(name) @@ -432,19 +439,11 @@ class Tag(PageElement, Entities): self.setup(parent, previous) self.hidden = False self.containsSubstitutions = False - self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities - - # Convert any HTML, XML, or numeric entities in the attribute values. - convert_one = lambda x: self._convertEntities(parser.builder, x) - def convert(kval): - k, val = kval - if val is None: - return kval - return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", convert_one, val)) + if isinstance(attrs, types.DictType): - self.attrs = [convert(kv) for kv in attrs.items()] + self.attrs = [kv for kv in attrs.items()] else: - self.attrs = map(convert, attrs) + self.attrs = list(attrs) @property def string(self): @@ -519,9 +518,9 @@ class Tag(PageElement, Entities): def __call__(self, *args, **kwargs): """Calling a tag like a function is the same as calling its - findAll() method. Eg. tag('a') returns a list of all the A tags + find_all() method. Eg. tag('a') returns a list of all the A tags found within this tag.""" - return apply(self.findAll, args, kwargs) + return apply(self.find_all, args, kwargs) def __getattr__(self, tag): #print "Getattr %s.%s" % (self.__class__, tag) @@ -702,14 +701,14 @@ class Tag(PageElement, Entities): """Return only the first child of this Tag matching the given criteria.""" r = None - l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + l = self.find_all(name, attrs, recursive, text, 1, **kwargs) if l: r = l[0] return r findChild = find - def findAll(self, name=None, attrs={}, recursive=True, text=None, - limit=None, **kwargs): + def find_all(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): """Extracts a list of Tag objects that match the given criteria. You can specify the name of the Tag and any attributes you want the Tag to have. @@ -719,11 +718,12 @@ class Tag(PageElement, Entities): callable that takes a string and returns whether or not the string matches for some custom definition of 'matches'. The same is true of the tag name.""" - generator = self.recursiveChildGenerator + generator = self.recursive_children if not recursive: - generator = self.childGenerator - return self._findAll(name, attrs, text, limit, generator, **kwargs) - findChildren = findAll + generator = self.children + return self._find_all(name, attrs, text, limit, generator, **kwargs) + findAll = find_all # BS3 + findChildren = find_all # BS2 #Private methods @@ -737,12 +737,14 @@ class Tag(PageElement, Entities): return self.attrMap #Generator methods - def childGenerator(self): + @property + def children(self): for i in range(0, len(self.contents)): yield self.contents[i] raise StopIteration - def recursiveChildGenerator(self): + @property + def recursive_children(self): if not len(self.contents): raise StopIteration stopNode = self._lastRecursiveChild().next @@ -751,6 +753,14 @@ class Tag(PageElement, Entities): yield current current = current.next + # Old names for backwards compatibility + def childGenerator(self): + return self.children + + def recursiveChildGenerator(self): + return self.recursive_children + + # Next, a couple classes to represent queries and their results. class SoupStrainer: """Encapsulates a number of ways of matching a markup element (tag or diff --git a/tests/test_tree.py b/tests/test_tree.py index 367489e..e424e0b 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -40,7 +40,7 @@ class TreeTest(SoupTest): class TestFind(TreeTest): """Basic tests of the find() method. - find() just calls findAll() with limit=1, so it's not tested all + find() just calls find_all() with limit=1, so it's not tested all that thouroughly here. """ @@ -54,34 +54,34 @@ class TestFind(TreeTest): class TestFindAll(TreeTest): - """Basic tests of the findAll() method.""" + """Basic tests of the find_all() method.""" def test_find_all_text_nodes(self): """You can search the tree for text nodes.""" soup = self.soup("<html>Foo<b>bar</b>\xbb</html>") # Exact match. - self.assertEqual(soup.findAll(text="bar"), [u"bar"]) + self.assertEqual(soup.find_all(text="bar"), [u"bar"]) # Match any of a number of strings. self.assertEqual( - soup.findAll(text=["Foo", "bar"]), [u"Foo", u"bar"]) + soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) # Match a regular expression. - self.assertEqual(soup.findAll(text=re.compile('.*')), + self.assertEqual(soup.find_all(text=re.compile('.*')), [u"Foo", u"bar", u'\xbb']) # Match anything. - self.assertEqual(soup.findAll(text=True), + self.assertEqual(soup.find_all(text=True), [u"Foo", u"bar", u'\xbb']) def test_find_all_limit(self): - """You can limit the number of items returned by findAll.""" + """You can limit the number of items returned by find_all.""" soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>") - self.assertSelects(soup.findAll('a', limit=3), ["1", "2", "3"]) - self.assertSelects(soup.findAll('a', limit=1), ["1"]) + self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"]) + self.assertSelects(soup.find_all('a', limit=1), ["1"]) self.assertSelects( - soup.findAll('a', limit=10), ["1", "2", "3", "4", "5"]) + soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"]) # A limit of 0 means no limit. self.assertSelects( - soup.findAll('a', limit=0), ["1", "2", "3", "4", "5"]) + soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"]) class TestFindAllByName(TreeTest): """Test ways of finding tags by tag name.""" @@ -95,33 +95,33 @@ class TestFindAllByName(TreeTest): def test_find_all_by_tag_name(self): # Find all the <a> tags. self.assertSelects( - self.tree.findAll('a'), ['First tag.', 'Nested tag.']) + self.tree.find_all('a'), ['First tag.', 'Nested tag.']) def test_find_all_on_non_root_element(self): # You can call find_all on any node, not just the root. - self.assertSelects(self.tree.c.findAll('a'), ['Nested tag.']) + self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.']) def test_calling_element_invokes_find_all(self): self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.']) def test_find_all_by_tag_strainer(self): self.assertSelects( - self.tree.findAll(SoupStrainer('a')), + self.tree.find_all(SoupStrainer('a')), ['First tag.', 'Nested tag.']) def test_find_all_by_tag_names(self): self.assertSelects( - self.tree.findAll(['a', 'b']), + self.tree.find_all(['a', 'b']), ['First tag.', 'Second tag.', 'Nested tag.']) def test_find_all_by_tag_dict(self): self.assertSelects( - self.tree.findAll({'a' : True, 'b' : True}), + self.tree.find_all({'a' : True, 'b' : True}), ['First tag.', 'Second tag.', 'Nested tag.']) def test_find_all_by_tag_re(self): self.assertSelects( - self.tree.findAll(re.compile('^[ab]$')), + self.tree.find_all(re.compile('^[ab]$')), ['First tag.', 'Second tag.', 'Nested tag.']) def test_find_all_with_tags_matching_method(self): @@ -135,26 +135,26 @@ class TestFindAllByName(TreeTest): <b id="b">Match 2.</a>""") self.assertSelects( - tree.findAll(id_matches_name), ["Match 1.", "Match 2."]) + tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) class TestFindAllByAttribute(TreeTest): def test_find_all_by_attribute_name(self): - # You can pass in keyword arguments to findAll to search by + # You can pass in keyword arguments to find_all to search by # attribute. tree = self.soup(""" <a id="first">Matching a.</a> <a id="second"> Non-matching <b id="first">Matching b.</b>a. </a>""") - self.assertSelects(tree.findAll(id='first'), + self.assertSelects(tree.find_all(id='first'), ["Matching a.", "Matching b."]) def test_find_all_by_attribute_dict(self): # You can pass in a dictionary as the argument 'attrs'. This # lets you search for attributes like 'name' (a fixed argument - # to findAll) and 'class' (a reserved word in Python.) + # to find_all) and 'class' (a reserved word in Python.) tree = self.soup(""" <a name="name1" class="class1">Name match.</a> <a name="name2" class="class2">Class match.</a> @@ -163,14 +163,14 @@ class TestFindAllByAttribute(TreeTest): """) # This doesn't do what you want. - self.assertSelects(tree.findAll(name='name1'), + self.assertSelects(tree.find_all(name='name1'), ["A tag called 'name1'."]) # This does what you want. - self.assertSelects(tree.findAll(attrs={'name' : 'name1'}), + self.assertSelects(tree.find_all(attrs={'name' : 'name1'}), ["Name match."]) # Passing class='class2' would cause a syntax error. - self.assertSelects(tree.findAll(attrs={'class' : 'class2'}), + self.assertSelects(tree.find_all(attrs={'class' : 'class2'}), ["Class match."]) def test_find_all_by_class(self): @@ -180,8 +180,8 @@ class TestFindAllByAttribute(TreeTest): <a class="2">Class 2.</a> <b class="1">Class 1.</a> """) - self.assertSelects(tree.findAll('a', '1'), ['Class 1.']) - self.assertSelects(tree.findAll(attrs='1'), ['Class 1.', 'Class 1.']) + self.assertSelects(tree.find_all('a', '1'), ['Class 1.']) + self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.']) def test_find_all_by_attribute_soupstrainer(self): tree = self.soup(""" @@ -189,24 +189,24 @@ class TestFindAllByAttribute(TreeTest): <a id="second">Non-match.</a>""") strainer = SoupStrainer(attrs={'id' : 'first'}) - self.assertSelects(tree.findAll(strainer), ['Match.']) + self.assertSelects(tree.find_all(strainer), ['Match.']) def test_find_all_with_missing_atribute(self): - # You can pass in None as the value of an attribute to findAll. + # You can pass in None as the value of an attribute to find_all. # This will match tags that do not have that attribute set. tree = self.soup("""<a id="1">ID present.</a> <a>No ID present.</a> <a id="">ID is empty.</a>""") - self.assertSelects(tree.findAll('a', id=None), ["No ID present."]) + self.assertSelects(tree.find_all('a', id=None), ["No ID present."]) def test_find_all_with_defined_attribute(self): - # You can pass in None as the value of an attribute to findAll. + # You can pass in None as the value of an attribute to find_all. # This will match tags that have that attribute set to any value. tree = self.soup("""<a id="1">ID present.</a> <a>No ID present.</a> <a id="">ID is empty.</a>""") self.assertSelects( - tree.findAll(id=True), ["ID present.", "ID is empty."]) + tree.find_all(id=True), ["ID present.", "ID is empty."]) def test_find_all_with_numeric_attribute(self): # If you search for a number, it's treated as a string. @@ -214,8 +214,8 @@ class TestFindAllByAttribute(TreeTest): <a id="1">Quoted attribute.</a>""") expected = ["Unquoted attribute.", "Quoted attribute."] - self.assertSelects(tree.findAll(id=1), expected) - self.assertSelects(tree.findAll(id="1"), expected) + self.assertSelects(tree.find_all(id=1), expected) + self.assertSelects(tree.find_all(id="1"), expected) def test_find_all_with_list_attribute_values(self): # You can pass a list of attribute values instead of just one, @@ -224,7 +224,7 @@ class TestFindAllByAttribute(TreeTest): <a id="2">2</a> <a id="3">3</a> <a>No ID.</a>""") - self.assertSelects(tree.findAll(id=["1", "3", "4"]), + self.assertSelects(tree.find_all(id=["1", "3", "4"]), ["1", "3"]) def test_find_all_with_regular_expression_attribute_value(self): @@ -237,7 +237,7 @@ class TestFindAllByAttribute(TreeTest): <a id="b">One b.</a> <a>No ID.</a>""") - self.assertSelects(tree.findAll(id=re.compile("^a+$")), + self.assertSelects(tree.find_all(id=re.compile("^a+$")), ["One a.", "Two as."]) @@ -270,12 +270,12 @@ class TestParentOperations(TreeTest): def test_find_parents(self): self.assertSelectsIDs( - self.start.findParents('ul'), ['bottom', 'middle', 'top']) + self.start.find_parents('ul'), ['bottom', 'middle', 'top']) self.assertSelectsIDs( - self.start.findParents('ul', id="middle"), ['middle']) + self.start.find_parents('ul', id="middle"), ['middle']) def test_find_parent(self): - self.assertEquals(self.start.findParent('ul')['id'], 'bottom') + self.assertEquals(self.start.find_parent('ul')['id'], 'bottom') def test_parent_of_text_element(self): text = self.tree.find(text="Start here") @@ -283,10 +283,10 @@ class TestParentOperations(TreeTest): def test_text_element_find_parent(self): text = self.tree.find(text="Start here") - self.assertEquals(text.findParent('ul')['id'], 'bottom') + self.assertEquals(text.find_parent('ul')['id'], 'bottom') def test_parent_generator(self): - parents = [parent['id'] for parent in self.start.parentGenerator() + parents = [parent['id'] for parent in self.start.parents if parent is not None and parent.has_key('id')] self.assertEquals(parents, ['bottom', 'middle', 'top']) @@ -318,21 +318,21 @@ class TestNextOperations(ProximityTest): self.assertEquals(self.tree.next, None) def test_find_all_next(self): - self.assertSelects(self.start.findAllNext('b'), ["Two", "Three"]) - self.assertSelects(self.start.findAllNext(id=3), ["Three"]) + self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"]) + self.assertSelects(self.start.find_all_next(id=3), ["Three"]) def test_find_next(self): - self.assertEquals(self.start.findNext('b')['id'], '2') - self.assertEquals(self.start.findNext(text="Three"), "Three") + self.assertEquals(self.start.find_next('b')['id'], '2') + self.assertEquals(self.start.find_next(text="Three"), "Three") def test_find_next_for_text_element(self): text = self.tree.find(text="One") - self.assertEquals(text.findNext("b").string, "Two") - self.assertSelects(text.findAllNext("b"), ["Two", "Three"]) + self.assertEquals(text.find_next("b").string, "Two") + self.assertSelects(text.find_all_next("b"), ["Two", "Three"]) def test_next_generator(self): start = self.tree.find(text="Two") - successors = [node for node in start.nextGenerator()] + successors = [node for node in start.next_elements] # There are two successors: the final <b> tag and its text contents. # Then we go off the end. tag, contents, none = successors @@ -340,7 +340,7 @@ class TestNextOperations(ProximityTest): self.assertEquals(contents, "Three") self.assertEquals(none, None) - # XXX Should nextGenerator really return None? Seems like it + # XXX Should next_elements really return None? Seems like it # should just stop. @@ -369,22 +369,22 @@ class TestPreviousOperations(ProximityTest): # of the "Three" node itself, which is why "Three" shows up # here. self.assertSelects( - self.end.findAllPrevious('b'), ["Three", "Two", "One"]) - self.assertSelects(self.end.findAllPrevious(id=1), ["One"]) + self.end.find_all_previous('b'), ["Three", "Two", "One"]) + self.assertSelects(self.end.find_all_previous(id=1), ["One"]) def test_find_previous(self): - self.assertEquals(self.end.findPrevious('b')['id'], '3') - self.assertEquals(self.end.findPrevious(text="One"), "One") + self.assertEquals(self.end.find_previous('b')['id'], '3') + self.assertEquals(self.end.find_previous(text="One"), "One") def test_find_previous_for_text_element(self): text = self.tree.find(text="Three") - self.assertEquals(text.findPrevious("b").string, "Three") + self.assertEquals(text.find_previous("b").string, "Three") self.assertSelects( - text.findAllPrevious("b"), ["Three", "Two", "One"]) + text.find_all_previous("b"), ["Three", "Two", "One"]) def test_previous_generator(self): start = self.tree.find(text="One") - predecessors = [node for node in start.previousGenerator()] + predecessors = [node for node in start.previous_elements] # There are four predecessors: the <b> tag containing "One" # the <body> tag, the <head> tag, and the <html> tag. Then we @@ -447,13 +447,13 @@ class TestNextSibling(SiblingTest): self.assertEquals(last_span.nextSibling, None) def test_find_next_sibling(self): - self.assertEquals(self.start.findNextSibling('span')['id'], '2') + self.assertEquals(self.start.find_next_sibling('span')['id'], '2') def test_next_siblings(self): - self.assertSelectsIDs(self.start.findNextSiblings("span"), + self.assertSelectsIDs(self.start.find_next_siblings("span"), ['2', '3', '4']) - self.assertSelectsIDs(self.start.findNextSiblings(id='3'), ['3']) + self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3']) def test_next_sibling_for_text_element(self): soup = self.soup("Foo<b>bar</b>baz") @@ -461,9 +461,9 @@ class TestNextSibling(SiblingTest): self.assertEquals(start.nextSibling.name, 'b') self.assertEquals(start.nextSibling.nextSibling, 'baz') - self.assertSelects(start.findNextSiblings('b'), ['bar']) - self.assertEquals(start.findNextSibling(text="baz"), "baz") - self.assertEquals(start.findNextSibling(text="nonesuch"), None) + self.assertSelects(start.find_next_siblings('b'), ['bar']) + self.assertEquals(start.find_next_sibling(text="baz"), "baz") + self.assertEquals(start.find_next_sibling(text="nonesuch"), None) class TestPreviousSibling(SiblingTest): @@ -492,13 +492,13 @@ class TestPreviousSibling(SiblingTest): self.assertEquals(first_span.previousSibling, None) def test_find_previous_sibling(self): - self.assertEquals(self.end.findPreviousSibling('span')['id'], '3') + self.assertEquals(self.end.find_previous_sibling('span')['id'], '3') def test_previous_siblings(self): - self.assertSelectsIDs(self.end.findPreviousSiblings("span"), + self.assertSelectsIDs(self.end.find_previous_siblings("span"), ['3', '2', '1']) - self.assertSelectsIDs(self.end.findPreviousSiblings(id='1'), ['1']) + self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1']) def test_previous_sibling_for_text_element(self): soup = self.soup("Foo<b>bar</b>baz") @@ -506,9 +506,9 @@ class TestPreviousSibling(SiblingTest): self.assertEquals(start.previousSibling.name, 'b') self.assertEquals(start.previousSibling.previousSibling, 'Foo') - self.assertSelects(start.findPreviousSiblings('b'), ['bar']) - self.assertEquals(start.findPreviousSibling(text="Foo"), "Foo") - self.assertEquals(start.findPreviousSibling(text="nonesuch"), None) + self.assertSelects(start.find_previous_siblings('b'), ['bar']) + self.assertEquals(start.find_previous_sibling(text="Foo"), "Foo") + self.assertEquals(start.find_previous_sibling(text="nonesuch"), None) class TestTreeModification(SoupTest): @@ -623,7 +623,7 @@ class TestTreeModification(SoupTest): def test_replace_with(self): soup = self.soup( "<p>There's <b>no</b> business like <b>show</b> business</p>") - no, show = soup.findAll('b') + no, show = soup.find_all('b') show.replaceWith(no) self.assertEquals( soup.decode(), |