summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG51
-rw-r--r--beautifulsoup/__init__.py2
-rw-r--r--beautifulsoup/builder/__init__.py98
-rw-r--r--beautifulsoup/element.py180
-rw-r--r--tests/test_tree.py136
5 files changed, 313 insertions, 154 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0bfe72a..dffab7c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,49 @@
= 4.0 =
+== Better method names ==
+
+Methods have been renamed to comply with PEP 8. The old names still
+work. Here are the renames:
+
+ * findAll -> find_all
+ * findAllNext -> find_all_next
+ * findAllPrevious -> find_all_previous
+ * findNext -> find_next
+ * findNextSibling -> find_next_sibling
+ * findNextSiblings -> find_next_siblings
+ * findParent -> find_parent
+ * findParents -> find_parents
+ * findPrevious -> find_previous
+ * findPreviousSibling -> find_previous_sibling
+ * findPreviousSiblings -> find_previous_siblings
+
+== Generators are now properties ==
+
+The generators have been given more sensible (and PEP 8-compliant)
+names, and turned into properties:
+
+ * childGenerator() -> children
+ * nextGenerator() -> next_elements
+ * nextSiblingGenerator() -> next_siblings
+ * previousGenerator() -> previous_elements
+ * previousSiblingGenerator() -> previous_siblings
+ * recursiveChildGenerator() -> recursive_children
+ * parentGenerator() -> parents
+
+So instead of this:
+
+ for parent in tag.parentGenerator():
+ ...
+
+You can write this:
+
+ for parent in tag.parents:
+ ...
+
+(But the old code will still work.)
+
+== tag.string is recursive ==
+
tag.string now operates recursively. If tag A contains a single tag B
and nothing else, then A.string is the same as B.string. So:
@@ -7,6 +51,13 @@ and nothing else, then A.string is the same as B.string. So:
The value of a.string used to be None, and now it's "foo".
+== Entities are always converted to Unicode ==
+
+An HTML or XML entity is always converted into the corresponding
+Unicode character. There are no longer any smartQuotesTo or
+convertEntities arguments. (Unicode Dammit still has smartQuotesTo,
+though that may change.)
+
= 3.1.0 =
A hybrid version that supports 2.4 and can be automatically converted
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
index ddf51f9..e23c9d9 100644
--- a/beautifulsoup/__init__.py
+++ b/beautifulsoup/__init__.py
@@ -171,7 +171,7 @@ class BeautifulStoneSoup(Tag):
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
- smartQuotesTo=self.builder.smart_quotes_to, isHTML=isHTML)
+ isHTML=isHTML)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
new file mode 100644
index 0000000..86de5ec
--- /dev/null
+++ b/beautifulsoup/builder/__init__.py
@@ -0,0 +1,98 @@
+from beautifulsoup.element import Entities
+
+__all__ = [
+ 'HTMLTreeBuilder',
+ 'SAXTreeBuilder',
+ 'TreeBuilder',
+ ]
+
+
+class TreeBuilder(Entities):
+ """Turn a document into a Beautiful Soup object tree."""
+
+ assume_html = False
+
+ def __init__(self):
+ self.soup = None
+
+ def isSelfClosingTag(self, name):
+ return name in self.self_closing_tags
+
+ def reset(self):
+ pass
+
+ def feed(self, markup):
+ raise NotImplementedError()
+
+ def test_fragment_to_document(self, fragment):
+ """Wrap an HTML fragment to make it look like a document.
+
+ Different parsers do this differently. For instance, lxml
+ introduces an empty <head> tag, and html5lib
+ doesn't. Abstracting this away lets us write simple tests
+ which run HTML fragments through the parser and compare the
+ results against other HTML fragments.
+
+ This method should not be used outside of tests.
+ """
+ return fragment
+
+
+class SAXTreeBuilder(TreeBuilder):
+ """A Beautiful Soup treebuilder that listens for SAX events."""
+
+ def feed(self, markup):
+ raise NotImplementedError()
+
+ def close(self):
+ pass
+
+ def startElement(self, name, attrs):
+ attrs = dict((key[1], value) for key, value in attrs.items())
+ #print "Start %s, %r" % (name, attrs)
+ self.soup.handle_starttag(name, attrs)
+
+ def endElement(self, name):
+ #print "End %s" % name
+ self.soup.handle_endtag(name)
+
+ def startElementNS(self, nsTuple, nodeName, attrs):
+ # Throw away (ns, nodeName) for now.
+ self.startElement(nodeName, attrs)
+
+ def endElementNS(self, nsTuple, nodeName):
+ # Throw away (ns, nodeName) for now.
+ self.endElement(nodeName)
+ #handler.endElementNS((ns, node.nodeName), node.nodeName)
+
+ def startPrefixMapping(self, prefix, nodeValue):
+ # Ignore the prefix for now.
+ pass
+
+ def endPrefixMapping(self, prefix):
+ # Ignore the prefix for now.
+ # handler.endPrefixMapping(prefix)
+ pass
+
+ def characters(self, content):
+ self.soup.handle_data(content)
+
+ def startDocument(self):
+ pass
+
+ def endDocument(self):
+ pass
+
+
+class HTMLTreeBuilder(TreeBuilder):
+ """This TreeBuilder knows facts about HTML.
+
+ Such as which tags are self-closing tags.
+ """
+
+ assume_html = True
+
+ preserve_whitespace_tags = set(['pre', 'textarea'])
+ self_closing_tags = set(['br' , 'hr', 'input', 'img', 'meta',
+ 'spacer', 'link', 'frame', 'base'])
+
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index 8749114..39e0e06 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -161,77 +161,88 @@ class PageElement:
"""Appends the given tag to the contents of this tag."""
self.insert(len(self.contents), tag)
- def findNext(self, name=None, attrs={}, text=None, **kwargs):
+ def find_next(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
appears after this Tag in the document."""
- return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
+ return self._findOne(self.find_all_next, name, attrs, text, **kwargs)
+ findNext = find_next # BS3
- def findAllNext(self, name=None, attrs={}, text=None, limit=None,
+ def find_all_next(self, name=None, attrs={}, text=None, limit=None,
**kwargs):
"""Returns all items that match the given criteria and appear
after this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.nextGenerator,
+ return self._find_all(name, attrs, text, limit, self.next_elements,
**kwargs)
+ findAllNext = find_all_next # BS3
- def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
+ def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the closest sibling to this Tag that matches the
given criteria and appears after this Tag in the document."""
- return self._findOne(self.findNextSiblings, name, attrs, text,
+ return self._findOne(self.find_next_siblings, name, attrs, text,
**kwargs)
+ findNextSibling = find_next_sibling # BS3
- def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
+ def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
+ **kwargs):
"""Returns the siblings of this Tag that match the given
criteria and appear after this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.nextSiblingGenerator, **kwargs)
- fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
+ return self._find_all(name, attrs, text, limit,
+ self.next_siblings, **kwargs)
+ findNextSiblings = find_next_siblings # BS3
+ fetchNextSiblings = find_next_siblings # BS2
- def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
+ def find_previous(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
appears before this Tag in the document."""
- return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
+ return self._findOne(
+ self.find_all_previous, name, attrs, text, **kwargs)
+ findPrevious = find_previous # BS3
- def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
+ def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
**kwargs):
"""Returns all items that match the given criteria and appear
before this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.previousGenerator,
+ return self._find_all(name, attrs, text, limit, self.previous_elements,
**kwargs)
- fetchPrevious = findAllPrevious # Compatibility with pre-3.x
+ findAllPrevious = find_all_previous # BS3
+ fetchPrevious = find_all_previous # BS2
- def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
+ def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the closest sibling to this Tag that matches the
given criteria and appears before this Tag in the document."""
- return self._findOne(self.findPreviousSiblings, name, attrs, text,
+ return self._findOne(self.find_previous_siblings, name, attrs, text,
**kwargs)
+ findPreviousSibling = find_previous_sibling # BS3
- def findPreviousSiblings(self, name=None, attrs={}, text=None,
- limit=None, **kwargs):
+ def find_previous_siblings(self, name=None, attrs={}, text=None,
+ limit=None, **kwargs):
"""Returns the siblings of this Tag that match the given
criteria and appear before this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.previousSiblingGenerator, **kwargs)
- fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
+ return self._find_all(name, attrs, text, limit,
+ self.previous_siblings, **kwargs)
+ findPreviousSiblings = find_previous_siblings # BS3
+ fetchPreviousSiblings = find_previous_siblings # BS2
- def findParent(self, name=None, attrs={}, **kwargs):
+ def find_parent(self, name=None, attrs={}, **kwargs):
"""Returns the closest parent of this Tag that matches the given
criteria."""
# NOTE: We can't use _findOne because findParents takes a different
# set of arguments.
r = None
- l = self.findParents(name, attrs, 1)
+ l = self.find_parents(name, attrs, 1)
if l:
r = l[0]
return r
+ findParent = find_parent # BS3
- def findParents(self, name=None, attrs={}, limit=None, **kwargs):
+ def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
"""Returns the parents of this Tag that match the given
criteria."""
- return self._findAll(name, attrs, None, limit, self.parentGenerator,
+ return self._find_all(name, attrs, None, limit, self.parents,
**kwargs)
- fetchParents = findParents # Compatibility with pre-3.x
+ findParents = find_parents # BS3
+ fetchParents = find_parents # BS2
#These methods do the real heavy lifting.
@@ -242,7 +253,7 @@ class PageElement:
r = l[0]
return r
- def _findAll(self, name, attrs, text, limit, generator, **kwargs):
+ def _find_all(self, name, attrs, text, limit, generator, **kwargs):
"Iterates over a generator looking for things that match."
if isinstance(name, SoupStrainer):
@@ -251,10 +262,9 @@ class PageElement:
# Build a SoupStrainer
strainer = SoupStrainer(name, attrs, text, **kwargs)
results = ResultSet(strainer)
- g = generator()
while True:
try:
- i = g.next()
+ i = generator.next()
except StopIteration:
break
if i:
@@ -265,38 +275,60 @@ class PageElement:
break
return results
- #These Generators can be used to navigate starting from both
+ #These generators can be used to navigate starting from both
#NavigableStrings and Tags.
- def nextGenerator(self):
+ @property
+ def next_elements(self):
i = self
while i:
i = i.next
yield i
- def nextSiblingGenerator(self):
+ @property
+ def next_siblings(self):
i = self
while i:
i = i.nextSibling
yield i
- def previousGenerator(self):
+ @property
+ def previous_elements(self):
i = self
while i:
i = i.previous
yield i
- def previousSiblingGenerator(self):
+ @property
+ def previous_siblings(self):
i = self
while i:
i = i.previousSibling
yield i
- def parentGenerator(self):
+ @property
+ def parents(self):
i = self
while i:
i = i.parent
yield i
+ # Old non-property versions of the generators, for backwards
+ # compatibility with BS3.
+ def nextGenerator(self):
+ return self.next_elements
+
+ def nextSiblingGenerator(self):
+ return self.next_siblings
+
+ def previousGenerator(self):
+ return self.previous_elements
+
+ def previousSiblingGenerator(self):
+ return self.previous_siblings
+
+ def parentGenerator(self):
+ return self.parents
+
# Utility methods
def substituteEncoding(self, str, encoding=None):
encoding = encoding or "utf-8"
@@ -389,37 +421,12 @@ class Tag(PageElement, Entities):
"""Represents a found HTML tag with its attributes and contents."""
- def _convertEntities(self, builder, match):
- """Used in a call to re.sub to replace HTML, XML, and numeric
- entities with the appropriate Unicode characters. If HTML
- entities are being converted, any unrecognized entities are
- escaped."""
- x = match.group(1)
- if builder.convert_html_entities and x in name2codepoint:
- return unichr(name2codepoint[x])
- elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
- if builder.convert_xml_entities:
- return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
- else:
- return u'&%s;' % x
- elif len(x) > 0 and x[0] == '#':
- # Handle numeric entities
- if len(x) > 1 and x[1] == 'x':
- return unichr(int(x[2:], 16))
- else:
- return unichr(int(x[1:]))
-
- elif self.escapeUnrecognizedEntities:
- return u'&amp;%s;' % x
- else:
- return u'&%s;' % x
-
def __init__(self, parser, builder, name, attrs=None, parent=None,
previous=None):
"Basic constructor."
# We don't actually store the parser object: that lets extracted
- # chunks be garbage-collected
+ # chunks be garbage-collected.
self.parserClass = parser.__class__
self.name = name
self.isSelfClosing = builder.isSelfClosingTag(name)
@@ -432,19 +439,11 @@ class Tag(PageElement, Entities):
self.setup(parent, previous)
self.hidden = False
self.containsSubstitutions = False
- self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
-
- # Convert any HTML, XML, or numeric entities in the attribute values.
- convert_one = lambda x: self._convertEntities(parser.builder, x)
- def convert(kval):
- k, val = kval
- if val is None:
- return kval
- return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", convert_one, val))
+
if isinstance(attrs, types.DictType):
- self.attrs = [convert(kv) for kv in attrs.items()]
+ self.attrs = [kv for kv in attrs.items()]
else:
- self.attrs = map(convert, attrs)
+ self.attrs = list(attrs)
@property
def string(self):
@@ -519,9 +518,9 @@ class Tag(PageElement, Entities):
def __call__(self, *args, **kwargs):
"""Calling a tag like a function is the same as calling its
- findAll() method. Eg. tag('a') returns a list of all the A tags
+ find_all() method. Eg. tag('a') returns a list of all the A tags
found within this tag."""
- return apply(self.findAll, args, kwargs)
+ return apply(self.find_all, args, kwargs)
def __getattr__(self, tag):
#print "Getattr %s.%s" % (self.__class__, tag)
@@ -702,14 +701,14 @@ class Tag(PageElement, Entities):
"""Return only the first child of this Tag matching the given
criteria."""
r = None
- l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
+ l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
if l:
r = l[0]
return r
findChild = find
- def findAll(self, name=None, attrs={}, recursive=True, text=None,
- limit=None, **kwargs):
+ def find_all(self, name=None, attrs={}, recursive=True, text=None,
+ limit=None, **kwargs):
"""Extracts a list of Tag objects that match the given
criteria. You can specify the name of the Tag and any
attributes you want the Tag to have.
@@ -719,11 +718,12 @@ class Tag(PageElement, Entities):
callable that takes a string and returns whether or not the
string matches for some custom definition of 'matches'. The
same is true of the tag name."""
- generator = self.recursiveChildGenerator
+ generator = self.recursive_children
if not recursive:
- generator = self.childGenerator
- return self._findAll(name, attrs, text, limit, generator, **kwargs)
- findChildren = findAll
+ generator = self.children
+ return self._find_all(name, attrs, text, limit, generator, **kwargs)
+ findAll = find_all # BS3
+ findChildren = find_all # BS2
#Private methods
@@ -737,12 +737,14 @@ class Tag(PageElement, Entities):
return self.attrMap
#Generator methods
- def childGenerator(self):
+ @property
+ def children(self):
for i in range(0, len(self.contents)):
yield self.contents[i]
raise StopIteration
- def recursiveChildGenerator(self):
+ @property
+ def recursive_children(self):
if not len(self.contents):
raise StopIteration
stopNode = self._lastRecursiveChild().next
@@ -751,6 +753,14 @@ class Tag(PageElement, Entities):
yield current
current = current.next
+ # Old names for backwards compatibility
+ def childGenerator(self):
+ return self.children
+
+ def recursiveChildGenerator(self):
+ return self.recursive_children
+
+
# Next, a couple classes to represent queries and their results.
class SoupStrainer:
"""Encapsulates a number of ways of matching a markup element (tag or
diff --git a/tests/test_tree.py b/tests/test_tree.py
index 367489e..e424e0b 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -40,7 +40,7 @@ class TreeTest(SoupTest):
class TestFind(TreeTest):
"""Basic tests of the find() method.
- find() just calls findAll() with limit=1, so it's not tested all
+ find() just calls find_all() with limit=1, so it's not tested all
that thouroughly here.
"""
@@ -54,34 +54,34 @@ class TestFind(TreeTest):
class TestFindAll(TreeTest):
- """Basic tests of the findAll() method."""
+ """Basic tests of the find_all() method."""
def test_find_all_text_nodes(self):
"""You can search the tree for text nodes."""
soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
# Exact match.
- self.assertEqual(soup.findAll(text="bar"), [u"bar"])
+ self.assertEqual(soup.find_all(text="bar"), [u"bar"])
# Match any of a number of strings.
self.assertEqual(
- soup.findAll(text=["Foo", "bar"]), [u"Foo", u"bar"])
+ soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
# Match a regular expression.
- self.assertEqual(soup.findAll(text=re.compile('.*')),
+ self.assertEqual(soup.find_all(text=re.compile('.*')),
[u"Foo", u"bar", u'\xbb'])
# Match anything.
- self.assertEqual(soup.findAll(text=True),
+ self.assertEqual(soup.find_all(text=True),
[u"Foo", u"bar", u'\xbb'])
def test_find_all_limit(self):
- """You can limit the number of items returned by findAll."""
+ """You can limit the number of items returned by find_all."""
soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
- self.assertSelects(soup.findAll('a', limit=3), ["1", "2", "3"])
- self.assertSelects(soup.findAll('a', limit=1), ["1"])
+ self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
+ self.assertSelects(soup.find_all('a', limit=1), ["1"])
self.assertSelects(
- soup.findAll('a', limit=10), ["1", "2", "3", "4", "5"])
+ soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
# A limit of 0 means no limit.
self.assertSelects(
- soup.findAll('a', limit=0), ["1", "2", "3", "4", "5"])
+ soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
class TestFindAllByName(TreeTest):
"""Test ways of finding tags by tag name."""
@@ -95,33 +95,33 @@ class TestFindAllByName(TreeTest):
def test_find_all_by_tag_name(self):
# Find all the <a> tags.
self.assertSelects(
- self.tree.findAll('a'), ['First tag.', 'Nested tag.'])
+ self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
def test_find_all_on_non_root_element(self):
# You can call find_all on any node, not just the root.
- self.assertSelects(self.tree.c.findAll('a'), ['Nested tag.'])
+ self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
def test_calling_element_invokes_find_all(self):
self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
def test_find_all_by_tag_strainer(self):
self.assertSelects(
- self.tree.findAll(SoupStrainer('a')),
+ self.tree.find_all(SoupStrainer('a')),
['First tag.', 'Nested tag.'])
def test_find_all_by_tag_names(self):
self.assertSelects(
- self.tree.findAll(['a', 'b']),
+ self.tree.find_all(['a', 'b']),
['First tag.', 'Second tag.', 'Nested tag.'])
def test_find_all_by_tag_dict(self):
self.assertSelects(
- self.tree.findAll({'a' : True, 'b' : True}),
+ self.tree.find_all({'a' : True, 'b' : True}),
['First tag.', 'Second tag.', 'Nested tag.'])
def test_find_all_by_tag_re(self):
self.assertSelects(
- self.tree.findAll(re.compile('^[ab]$')),
+ self.tree.find_all(re.compile('^[ab]$')),
['First tag.', 'Second tag.', 'Nested tag.'])
def test_find_all_with_tags_matching_method(self):
@@ -135,26 +135,26 @@ class TestFindAllByName(TreeTest):
<b id="b">Match 2.</a>""")
self.assertSelects(
- tree.findAll(id_matches_name), ["Match 1.", "Match 2."])
+ tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
class TestFindAllByAttribute(TreeTest):
def test_find_all_by_attribute_name(self):
- # You can pass in keyword arguments to findAll to search by
+ # You can pass in keyword arguments to find_all to search by
# attribute.
tree = self.soup("""
<a id="first">Matching a.</a>
<a id="second">
Non-matching <b id="first">Matching b.</b>a.
</a>""")
- self.assertSelects(tree.findAll(id='first'),
+ self.assertSelects(tree.find_all(id='first'),
["Matching a.", "Matching b."])
def test_find_all_by_attribute_dict(self):
# You can pass in a dictionary as the argument 'attrs'. This
# lets you search for attributes like 'name' (a fixed argument
- # to findAll) and 'class' (a reserved word in Python.)
+ # to find_all) and 'class' (a reserved word in Python.)
tree = self.soup("""
<a name="name1" class="class1">Name match.</a>
<a name="name2" class="class2">Class match.</a>
@@ -163,14 +163,14 @@ class TestFindAllByAttribute(TreeTest):
""")
# This doesn't do what you want.
- self.assertSelects(tree.findAll(name='name1'),
+ self.assertSelects(tree.find_all(name='name1'),
["A tag called 'name1'."])
# This does what you want.
- self.assertSelects(tree.findAll(attrs={'name' : 'name1'}),
+ self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
["Name match."])
# Passing class='class2' would cause a syntax error.
- self.assertSelects(tree.findAll(attrs={'class' : 'class2'}),
+ self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
["Class match."])
def test_find_all_by_class(self):
@@ -180,8 +180,8 @@ class TestFindAllByAttribute(TreeTest):
<a class="2">Class 2.</a>
<b class="1">Class 1.</a>
""")
- self.assertSelects(tree.findAll('a', '1'), ['Class 1.'])
- self.assertSelects(tree.findAll(attrs='1'), ['Class 1.', 'Class 1.'])
+ self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
+ self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
def test_find_all_by_attribute_soupstrainer(self):
tree = self.soup("""
@@ -189,24 +189,24 @@ class TestFindAllByAttribute(TreeTest):
<a id="second">Non-match.</a>""")
strainer = SoupStrainer(attrs={'id' : 'first'})
- self.assertSelects(tree.findAll(strainer), ['Match.'])
+ self.assertSelects(tree.find_all(strainer), ['Match.'])
def test_find_all_with_missing_atribute(self):
- # You can pass in None as the value of an attribute to findAll.
+ # You can pass in None as the value of an attribute to find_all.
# This will match tags that do not have that attribute set.
tree = self.soup("""<a id="1">ID present.</a>
<a>No ID present.</a>
<a id="">ID is empty.</a>""")
- self.assertSelects(tree.findAll('a', id=None), ["No ID present."])
+ self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
def test_find_all_with_defined_attribute(self):
- # You can pass in None as the value of an attribute to findAll.
+ # You can pass in None as the value of an attribute to find_all.
# This will match tags that have that attribute set to any value.
tree = self.soup("""<a id="1">ID present.</a>
<a>No ID present.</a>
<a id="">ID is empty.</a>""")
self.assertSelects(
- tree.findAll(id=True), ["ID present.", "ID is empty."])
+ tree.find_all(id=True), ["ID present.", "ID is empty."])
def test_find_all_with_numeric_attribute(self):
# If you search for a number, it's treated as a string.
@@ -214,8 +214,8 @@ class TestFindAllByAttribute(TreeTest):
<a id="1">Quoted attribute.</a>""")
expected = ["Unquoted attribute.", "Quoted attribute."]
- self.assertSelects(tree.findAll(id=1), expected)
- self.assertSelects(tree.findAll(id="1"), expected)
+ self.assertSelects(tree.find_all(id=1), expected)
+ self.assertSelects(tree.find_all(id="1"), expected)
def test_find_all_with_list_attribute_values(self):
# You can pass a list of attribute values instead of just one,
@@ -224,7 +224,7 @@ class TestFindAllByAttribute(TreeTest):
<a id="2">2</a>
<a id="3">3</a>
<a>No ID.</a>""")
- self.assertSelects(tree.findAll(id=["1", "3", "4"]),
+ self.assertSelects(tree.find_all(id=["1", "3", "4"]),
["1", "3"])
def test_find_all_with_regular_expression_attribute_value(self):
@@ -237,7 +237,7 @@ class TestFindAllByAttribute(TreeTest):
<a id="b">One b.</a>
<a>No ID.</a>""")
- self.assertSelects(tree.findAll(id=re.compile("^a+$")),
+ self.assertSelects(tree.find_all(id=re.compile("^a+$")),
["One a.", "Two as."])
@@ -270,12 +270,12 @@ class TestParentOperations(TreeTest):
def test_find_parents(self):
self.assertSelectsIDs(
- self.start.findParents('ul'), ['bottom', 'middle', 'top'])
+ self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
self.assertSelectsIDs(
- self.start.findParents('ul', id="middle"), ['middle'])
+ self.start.find_parents('ul', id="middle"), ['middle'])
def test_find_parent(self):
- self.assertEquals(self.start.findParent('ul')['id'], 'bottom')
+ self.assertEquals(self.start.find_parent('ul')['id'], 'bottom')
def test_parent_of_text_element(self):
text = self.tree.find(text="Start here")
@@ -283,10 +283,10 @@ class TestParentOperations(TreeTest):
def test_text_element_find_parent(self):
text = self.tree.find(text="Start here")
- self.assertEquals(text.findParent('ul')['id'], 'bottom')
+ self.assertEquals(text.find_parent('ul')['id'], 'bottom')
def test_parent_generator(self):
- parents = [parent['id'] for parent in self.start.parentGenerator()
+ parents = [parent['id'] for parent in self.start.parents
if parent is not None and parent.has_key('id')]
self.assertEquals(parents, ['bottom', 'middle', 'top'])
@@ -318,21 +318,21 @@ class TestNextOperations(ProximityTest):
self.assertEquals(self.tree.next, None)
def test_find_all_next(self):
- self.assertSelects(self.start.findAllNext('b'), ["Two", "Three"])
- self.assertSelects(self.start.findAllNext(id=3), ["Three"])
+ self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
+ self.assertSelects(self.start.find_all_next(id=3), ["Three"])
def test_find_next(self):
- self.assertEquals(self.start.findNext('b')['id'], '2')
- self.assertEquals(self.start.findNext(text="Three"), "Three")
+ self.assertEquals(self.start.find_next('b')['id'], '2')
+ self.assertEquals(self.start.find_next(text="Three"), "Three")
def test_find_next_for_text_element(self):
text = self.tree.find(text="One")
- self.assertEquals(text.findNext("b").string, "Two")
- self.assertSelects(text.findAllNext("b"), ["Two", "Three"])
+ self.assertEquals(text.find_next("b").string, "Two")
+ self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
def test_next_generator(self):
start = self.tree.find(text="Two")
- successors = [node for node in start.nextGenerator()]
+ successors = [node for node in start.next_elements]
# There are two successors: the final <b> tag and its text contents.
# Then we go off the end.
tag, contents, none = successors
@@ -340,7 +340,7 @@ class TestNextOperations(ProximityTest):
self.assertEquals(contents, "Three")
self.assertEquals(none, None)
- # XXX Should nextGenerator really return None? Seems like it
+ # XXX Should next_elements really return None? Seems like it
# should just stop.
@@ -369,22 +369,22 @@ class TestPreviousOperations(ProximityTest):
# of the "Three" node itself, which is why "Three" shows up
# here.
self.assertSelects(
- self.end.findAllPrevious('b'), ["Three", "Two", "One"])
- self.assertSelects(self.end.findAllPrevious(id=1), ["One"])
+ self.end.find_all_previous('b'), ["Three", "Two", "One"])
+ self.assertSelects(self.end.find_all_previous(id=1), ["One"])
def test_find_previous(self):
- self.assertEquals(self.end.findPrevious('b')['id'], '3')
- self.assertEquals(self.end.findPrevious(text="One"), "One")
+ self.assertEquals(self.end.find_previous('b')['id'], '3')
+ self.assertEquals(self.end.find_previous(text="One"), "One")
def test_find_previous_for_text_element(self):
text = self.tree.find(text="Three")
- self.assertEquals(text.findPrevious("b").string, "Three")
+ self.assertEquals(text.find_previous("b").string, "Three")
self.assertSelects(
- text.findAllPrevious("b"), ["Three", "Two", "One"])
+ text.find_all_previous("b"), ["Three", "Two", "One"])
def test_previous_generator(self):
start = self.tree.find(text="One")
- predecessors = [node for node in start.previousGenerator()]
+ predecessors = [node for node in start.previous_elements]
# There are four predecessors: the <b> tag containing "One"
# the <body> tag, the <head> tag, and the <html> tag. Then we
@@ -447,13 +447,13 @@ class TestNextSibling(SiblingTest):
self.assertEquals(last_span.nextSibling, None)
def test_find_next_sibling(self):
- self.assertEquals(self.start.findNextSibling('span')['id'], '2')
+ self.assertEquals(self.start.find_next_sibling('span')['id'], '2')
def test_next_siblings(self):
- self.assertSelectsIDs(self.start.findNextSiblings("span"),
+ self.assertSelectsIDs(self.start.find_next_siblings("span"),
['2', '3', '4'])
- self.assertSelectsIDs(self.start.findNextSiblings(id='3'), ['3'])
+ self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
def test_next_sibling_for_text_element(self):
soup = self.soup("Foo<b>bar</b>baz")
@@ -461,9 +461,9 @@ class TestNextSibling(SiblingTest):
self.assertEquals(start.nextSibling.name, 'b')
self.assertEquals(start.nextSibling.nextSibling, 'baz')
- self.assertSelects(start.findNextSiblings('b'), ['bar'])
- self.assertEquals(start.findNextSibling(text="baz"), "baz")
- self.assertEquals(start.findNextSibling(text="nonesuch"), None)
+ self.assertSelects(start.find_next_siblings('b'), ['bar'])
+ self.assertEquals(start.find_next_sibling(text="baz"), "baz")
+ self.assertEquals(start.find_next_sibling(text="nonesuch"), None)
class TestPreviousSibling(SiblingTest):
@@ -492,13 +492,13 @@ class TestPreviousSibling(SiblingTest):
self.assertEquals(first_span.previousSibling, None)
def test_find_previous_sibling(self):
- self.assertEquals(self.end.findPreviousSibling('span')['id'], '3')
+ self.assertEquals(self.end.find_previous_sibling('span')['id'], '3')
def test_previous_siblings(self):
- self.assertSelectsIDs(self.end.findPreviousSiblings("span"),
+ self.assertSelectsIDs(self.end.find_previous_siblings("span"),
['3', '2', '1'])
- self.assertSelectsIDs(self.end.findPreviousSiblings(id='1'), ['1'])
+ self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
def test_previous_sibling_for_text_element(self):
soup = self.soup("Foo<b>bar</b>baz")
@@ -506,9 +506,9 @@ class TestPreviousSibling(SiblingTest):
self.assertEquals(start.previousSibling.name, 'b')
self.assertEquals(start.previousSibling.previousSibling, 'Foo')
- self.assertSelects(start.findPreviousSiblings('b'), ['bar'])
- self.assertEquals(start.findPreviousSibling(text="Foo"), "Foo")
- self.assertEquals(start.findPreviousSibling(text="nonesuch"), None)
+ self.assertSelects(start.find_previous_siblings('b'), ['bar'])
+ self.assertEquals(start.find_previous_sibling(text="Foo"), "Foo")
+ self.assertEquals(start.find_previous_sibling(text="nonesuch"), None)
class TestTreeModification(SoupTest):
@@ -623,7 +623,7 @@ class TestTreeModification(SoupTest):
def test_replace_with(self):
soup = self.soup(
"<p>There's <b>no</b> business like <b>show</b> business</p>")
- no, show = soup.findAll('b')
+ no, show = soup.find_all('b')
show.replaceWith(no)
self.assertEquals(
soup.decode(),