summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG1
-rw-r--r--TODO8
-rw-r--r--bs4/element.py64
-rw-r--r--tests/test_tree.py8
4 files changed, 35 insertions, 46 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 6d05ae4..00d80da 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -22,6 +22,7 @@ version of the API is in use, the module is now called 'bs4':
Methods have been renamed to comply with PEP 8. The old names still
work. Here are the renames:
+ * replaceWith -> replace_with
* findAll -> find_all
* findAllNext -> find_all_next
* findAllPrevious -> find_all_previous
diff --git a/TODO b/TODO
index a6f444f..2606566 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,11 @@
+3.0.8 optimized findAll('tag-name') and findAll(True), bypassing the
+soupstrainer process.
+
+define a setter for Tag.string which replaces the tag's contents with
+a string.
+
+Tag.text: concatenate all strings and return them
+
Bare ampersands should be converted to HTML entities upon output.
Add namespace support.
diff --git a/bs4/element.py b/bs4/element.py
index 315ee63..6fb6210 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -27,19 +27,20 @@ class PageElement(object):
self.previousSibling = self.parent.contents[-1]
self.previousSibling.nextSibling = self
- def replaceWith(self, replaceWith):
+ def replace_with(self, replace_with):
oldParent = self.parent
myIndex = self.parent.contents.index(self)
- if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
+ if hasattr(replace_with, 'parent') and replace_with.parent == self.parent:
# We're replacing this element with one of its siblings.
- index = self.parent.contents.index(replaceWith)
+ index = self.parent.contents.index(replace_with)
if index and index < myIndex:
# Furthermore, it comes before this element. That
# means that when we extract it, the index of this
# element will change.
myIndex = myIndex - 1
self.extract()
- oldParent.insert(myIndex, replaceWith)
+ oldParent.insert(myIndex, replace_with)
+ replaceWith = replace_with # BS4
def extract(self):
"""Destructively rips this element out of the tree."""
@@ -52,7 +53,7 @@ class PageElement(object):
#Find the two elements that would be next to each other if
#this element (and any children) hadn't been parsed. Connect
#the two.
- lastChild = self._lastRecursiveChild()
+ lastChild = self._last_recursive_child()
nextElement = lastChild.next
if self.previous:
@@ -70,7 +71,7 @@ class PageElement(object):
self.previousSibling = self.nextSibling = None
return self
- def _lastRecursiveChild(self):
+ def _last_recursive_child(self):
"Finds the last element beneath this object to be parsed."
lastChild = self
while hasattr(lastChild, 'contents') and lastChild.contents:
@@ -106,11 +107,11 @@ class PageElement(object):
previousChild = self.contents[position-1]
newChild.previousSibling = previousChild
newChild.previousSibling.nextSibling = newChild
- newChild.previous = previousChild._lastRecursiveChild()
+ newChild.previous = previousChild._last_recursive_child()
if newChild.previous:
newChild.previous.next = newChild
- newChildsLastElement = newChild._lastRecursiveChild()
+ newChildsLastElement = newChild._last_recursive_child()
if position >= len(self.contents):
newChild.nextSibling = None
@@ -144,7 +145,7 @@ class PageElement(object):
def find_next(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
appears after this Tag in the document."""
- return self._findOne(self.find_all_next, name, attrs, text, **kwargs)
+ return self._find_one(self.find_all_next, name, attrs, text, **kwargs)
findNext = find_next # BS3
def find_all_next(self, name=None, attrs={}, text=None, limit=None,
@@ -158,7 +159,7 @@ class PageElement(object):
def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the closest sibling to this Tag that matches the
given criteria and appears after this Tag in the document."""
- return self._findOne(self.find_next_siblings, name, attrs, text,
+ return self._find_one(self.find_next_siblings, name, attrs, text,
**kwargs)
findNextSibling = find_next_sibling # BS3
@@ -174,7 +175,7 @@ class PageElement(object):
def find_previous(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
appears before this Tag in the document."""
- return self._findOne(
+ return self._find_one(
self.find_all_previous, name, attrs, text, **kwargs)
findPrevious = find_previous # BS3
@@ -190,7 +191,7 @@ class PageElement(object):
def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the closest sibling to this Tag that matches the
given criteria and appears before this Tag in the document."""
- return self._findOne(self.find_previous_siblings, name, attrs, text,
+ return self._find_one(self.find_previous_siblings, name, attrs, text,
**kwargs)
findPreviousSibling = find_previous_sibling # BS3
@@ -206,7 +207,7 @@ class PageElement(object):
def find_parent(self, name=None, attrs={}, **kwargs):
"""Returns the closest parent of this Tag that matches the given
criteria."""
- # NOTE: We can't use _findOne because findParents takes a different
+ # NOTE: We can't use _find_one because findParents takes a different
# set of arguments.
r = None
l = self.find_parents(name, attrs, 1)
@@ -226,7 +227,7 @@ class PageElement(object):
#These methods do the real heavy lifting.
- def _findOne(self, method, name, attrs, text, **kwargs):
+ def _find_one(self, method, name, attrs, text, **kwargs):
r = None
l = method(name, attrs, text, 1, **kwargs)
if l:
@@ -310,27 +311,10 @@ class PageElement(object):
return self.parents
# Utility methods
- def substituteEncoding(self, str, encoding=None):
+ def substitute_encoding(self, str, encoding=None):
encoding = encoding or "utf-8"
return str.replace("%SOUP-ENCODING%", encoding)
- def toEncoding(self, s, encoding=None):
- """Encodes an object to a string in some encoding, or to Unicode.
- ."""
- if isinstance(s, unicode):
- if encoding:
- s = s.encode(encoding)
- elif isinstance(s, str):
- if encoding:
- s = s.encode(encoding)
- else:
- s = unicode(s)
- else:
- if encoding:
- s = self.toEncoding(str(s), encoding)
- else:
- s = unicode(s)
- return s
class NavigableString(unicode, PageElement):
@@ -386,6 +370,7 @@ class Comment(NavigableString):
PREFIX = u'<!--'
SUFFIX = u'-->'
+
class Declaration(NavigableString):
PREFIX = u'<!'
SUFFIX = u'!>'
@@ -524,10 +509,7 @@ class Tag(PageElement):
def __eq__(self, other):
"""Returns true iff this tag has the same name, the same attributes,
- and the same contents (recursively) as the given tag.
-
- XXX: right now this will return false if two tags have the
- same attributes in a different order. Should this be fixed?"""
+ and the same contents (recursively) as the given tag."""
if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
return False
for i in range(0, len(self.contents)):
@@ -578,7 +560,7 @@ class Tag(PageElement):
if (self.contains_substitutions
and eventual_encoding is not None
and '%SOUP-ENCODING%' in val):
- val = self.substituteEncoding(val, eventual_encoding)
+ val = self.substitute_encoding(val, eventual_encoding)
decoded = (key + '='
+ EntitySubstitution.substitute_xml(val, True))
@@ -702,15 +684,13 @@ class Tag(PageElement):
#Generator methods
@property
def children(self):
- for i in range(0, len(self.contents)):
- yield self.contents[i]
- raise StopIteration
+ return iter(self.contents) # XXX This seems to be untested.
@property
def recursive_children(self):
if not len(self.contents):
- raise StopIteration
- stopNode = self._lastRecursiveChild().next
+ raise StopIteration # XXX return instead?
+ stopNode = self._last_recursive_child().next
current = self.contents[0]
while current is not stopNode:
yield current
diff --git a/tests/test_tree.py b/tests/test_tree.py
index c61b326..f2989fe 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -557,12 +557,12 @@ class TestTreeModification(SoupTest):
text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
soup = self.soup(text)
c = soup.c
- soup.c.replaceWith(c)
+ soup.c.replace_with(c)
self.assertEquals(soup.decode(), self.document_for(text))
def test_replace_final_node(self):
soup = self.soup("<b>Argh!</b>")
- soup.find(text="Argh!").replaceWith("Hooray!")
+ soup.find(text="Argh!").replace_with("Hooray!")
new_text = soup.find(text="Hooray!")
b = soup.b
self.assertEqual(new_text.previous, b)
@@ -635,7 +635,7 @@ class TestTreeModification(SoupTest):
soup = self.soup(
"<p>There's <b>no</b> business like <b>show</b> business</p>")
no, show = soup.find_all('b')
- show.replaceWith(no)
+ show.replace_with(no)
self.assertEquals(
soup.decode(),
self.document_for(
@@ -654,7 +654,7 @@ class TestTreeModification(SoupTest):
# right") with the <f> tag ("refuse").
remove_tag = soup.b
move_tag = soup.f
- remove_tag.replaceWith(move_tag)
+ remove_tag.replace_with(move_tag)
self.assertEqual(
soup.decode(), self.document_for(