summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--bs4/__init__.py16
-rw-r--r--bs4/element.py64
-rw-r--r--tests/test_tree.py62
4 files changed, 76 insertions, 68 deletions
diff --git a/CHANGELOG b/CHANGELOG
index cd01b3b..a636544 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -39,6 +39,8 @@ Some attributes have also been renamed:
* Tag.isSelfClosing -> Tag.is_empty_element
* UnicodeDammit.unicode -> UnicodeDammit.unicode_markup
+ * Tag.next -> Tag.next_element
+ * Tag.previous -> Tag.previous_element
So have some arguments to popular methods:
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 22ecc43..6406bef 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -159,10 +159,10 @@ class BeautifulSoup(Tag):
def object_was_parsed(self, o):
"""Add an object to the parse tree."""
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
+ o.setup(self.currentTag, self.previous_element)
+ if self.previous_element:
+ self.previous_element.next_element = o
+ self.previous_element = o
self.currentTag.contents.append(o)
@@ -206,12 +206,12 @@ class BeautifulSoup(Tag):
return None
tag = Tag(self, self.builder, name, attrs, self.currentTag,
- self.previous)
+ self.previous_element)
if tag is None:
return tag
- if self.previous:
- self.previous.next = tag
- self.previous = tag
+ if self.previous_element:
+ self.previous_element.next_element = tag
+ self.previous_element = tag
self.pushTag(tag)
return tag
diff --git a/bs4/element.py b/bs4/element.py
index 587078c..729ab36 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,9 +1,6 @@
+import collections
import re
import types
-try:
- from htmlentitydefs import name2codepoint
-except ImportError:
- name2codepoint = {}
from bs4.dammit import EntitySubstitution
DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -13,12 +10,12 @@ class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
- def setup(self, parent=None, previous=None):
+ def setup(self, parent=None, previous_element=None):
"""Sets up the initial relations between this element and
other elements."""
self.parent = parent
- self.previous = previous
- self.next = None
+ self.previous_element = previous_element
+ self.next_element = None
self.previousSibling = None
self.nextSibling = None
if self.parent and self.parent.contents:
@@ -52,14 +49,14 @@ class PageElement(object):
#this element (and any children) hadn't been parsed. Connect
#the two.
lastChild = self._last_recursive_child()
- nextElement = lastChild.next
+ nextElement = lastChild.next_element
- if self.previous:
- self.previous.next = nextElement
+ if self.previous_element:
+ self.previous_element.next_element = nextElement
if nextElement:
- nextElement.previous = self.previous
- self.previous = None
- lastChild.next = None
+ nextElement.previous_element = self.previous_element
+ self.previous_element = None
+ lastChild.next_element = None
self.parent = None
if self.previousSibling:
@@ -100,14 +97,14 @@ class PageElement(object):
previousChild = None
if position == 0:
newChild.previousSibling = None
- newChild.previous = self
+ newChild.previous_element = self
else:
previousChild = self.contents[position-1]
newChild.previousSibling = previousChild
newChild.previousSibling.nextSibling = newChild
- newChild.previous = previousChild._last_recursive_child()
- if newChild.previous:
- newChild.previous.next = newChild
+ newChild.previous_element = previousChild._last_recursive_child()
+ if newChild.previous_element:
+ newChild.previous_element.next_element = newChild
newChildsLastElement = newChild._last_recursive_child()
@@ -122,18 +119,18 @@ class PageElement(object):
if not parent: # This is the last element in the document.
break
if parentsNextSibling:
- newChildsLastElement.next = parentsNextSibling
+ newChildsLastElement.next_element = parentsNextSibling
else:
- newChildsLastElement.next = None
+ newChildsLastElement.next_element = None
else:
nextChild = self.contents[position]
newChild.nextSibling = nextChild
if newChild.nextSibling:
newChild.nextSibling.previousSibling = newChild
- newChildsLastElement.next = nextChild
+ newChildsLastElement.next_element = nextChild
- if newChildsLastElement.next:
- newChildsLastElement.next.previous = newChildsLastElement
+ if newChildsLastElement.next_element:
+ newChildsLastElement.next_element.previous_element = newChildsLastElement
self.contents.insert(position, newChild)
def append(self, tag):
@@ -223,6 +220,14 @@ class PageElement(object):
findParents = find_parents # BS3
fetchParents = find_parents # BS2
+ @property
+ def next(self):
+ return self.next_element
+
+ @property
+ def previous(self):
+ return self.previous_element
+
#These methods do the real heavy lifting.
def _find_one(self, method, name, attrs, text, **kwargs):
@@ -260,7 +265,7 @@ class PageElement(object):
def next_elements(self):
i = self
while i:
- i = i.next
+ i = i.next_element
yield i
@property
@@ -274,7 +279,7 @@ class PageElement(object):
def previous_elements(self):
i = self
while i:
- i = i.previous
+ i = i.previous_element
yield i
@property
@@ -688,11 +693,11 @@ class Tag(PageElement):
def recursive_children(self):
if not len(self.contents):
raise StopIteration # XXX return instead?
- stopNode = self._last_recursive_child().next
+ stopNode = self._last_recursive_child().next_element
current = self.contents[0]
while current is not stopNode:
yield current
- current = current.next
+ current = current.next_element
# Old names for backwards compatibility
def childGenerator(self):
@@ -733,8 +738,9 @@ class SoupStrainer(object):
if isinstance(markupName, Tag):
markup = markupName
markupAttrs = markup
- callFunctionWithTagData = callable(self.name) \
- and not isinstance(markupName, Tag)
+ callFunctionWithTagData = (
+ isinstance(self.name, collections.Callable)
+ and not isinstance(markupName, Tag))
if (not self.name) \
or callFunctionWithTagData \
@@ -795,7 +801,7 @@ class SoupStrainer(object):
result = False
if matchAgainst == True and type(matchAgainst) == types.BooleanType:
result = markup != None
- elif callable(matchAgainst):
+ elif isinstance(matchAgainst, collections.Callable):
result = matchAgainst(markup)
else:
#Custom match methods take the tag as an argument, but all
diff --git a/tests/test_tree.py b/tests/test_tree.py
index f2989fe..87a7e3a 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -307,16 +307,16 @@ class TestNextOperations(ProximityTest):
self.start = self.tree.b
def test_next(self):
- self.assertEquals(self.start.next, "One")
- self.assertEquals(self.start.next.next['id'], "2")
+ self.assertEquals(self.start.next_element, "One")
+ self.assertEquals(self.start.next_element.next_element['id'], "2")
def test_next_of_last_item_is_none(self):
last = self.tree.find(text="Three")
- self.assertEquals(last.next, None)
+ self.assertEquals(last.next_element, None)
def test_next_of_root_is_none(self):
# The document root is outside the next/previous chain.
- self.assertEquals(self.tree.next, None)
+ self.assertEquals(self.tree.next_element, None)
def test_find_all_next(self):
self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
@@ -352,17 +352,17 @@ class TestPreviousOperations(ProximityTest):
self.end = self.tree.find(text="Three")
def test_previous(self):
- self.assertEquals(self.end.previous['id'], "3")
- self.assertEquals(self.end.previous.previous, "Two")
+ self.assertEquals(self.end.previous_element['id'], "3")
+ self.assertEquals(self.end.previous_element.previous_element, "Two")
def test_previous_of_first_item_is_none(self):
first = self.tree.find('html')
- self.assertEquals(first.previous, None)
+ self.assertEquals(first.previous_element, None)
def test_previous_of_root_is_none(self):
# The document root is outside the next/previous chain.
# XXX This is broken!
- #self.assertEquals(self.tree.previous, None)
+ #self.assertEquals(self.tree.previous_element, None)
pass
def test_find_all_previous(self):
@@ -436,7 +436,7 @@ class TestNextSibling(SiblingTest):
self.assertEquals(self.start.nextSibling.nextSibling['id'], '3')
# Note the difference between nextSibling and next.
- self.assertEquals(self.start.next['id'], '1.1')
+ self.assertEquals(self.start.next_element['id'], '1.1')
def test_next_sibling_may_not_exist(self):
self.assertEquals(self.tree.html.nextSibling, None)
@@ -481,7 +481,7 @@ class TestPreviousSibling(SiblingTest):
self.assertEquals(self.end.previousSibling.previousSibling['id'], '2')
# Note the difference between previousSibling and previous.
- self.assertEquals(self.end.previous['id'], '3.1')
+ self.assertEquals(self.end.previous_element['id'], '3.1')
def test_previous_sibling_may_not_exist(self):
self.assertEquals(self.tree.html.previousSibling, None)
@@ -565,10 +565,10 @@ class TestTreeModification(SoupTest):
soup.find(text="Argh!").replace_with("Hooray!")
new_text = soup.find(text="Hooray!")
b = soup.b
- self.assertEqual(new_text.previous, b)
+ self.assertEqual(new_text.previous_element, b)
self.assertEqual(new_text.parent, b)
- self.assertEqual(new_text.previous.next, new_text)
- self.assertEqual(new_text.next, None)
+ self.assertEqual(new_text.previous_element.next_element, new_text)
+ self.assertEqual(new_text.next_element, None)
def test_consecutive_text_nodes(self):
# A builder should never create two consecutive text nodes,
@@ -582,14 +582,14 @@ class TestTreeModification(SoupTest):
"<a><b>Argh!Hooray!</b><c></c></a>"))
new_text = soup.find(text="Hooray!")
- self.assertEqual(new_text.previous, "Argh!")
- self.assertEqual(new_text.previous.next, new_text)
+ self.assertEqual(new_text.previous_element, "Argh!")
+ self.assertEqual(new_text.previous_element.next_element, new_text)
self.assertEqual(new_text.previousSibling, "Argh!")
self.assertEqual(new_text.previousSibling.nextSibling, new_text)
self.assertEqual(new_text.nextSibling, None)
- self.assertEqual(new_text.next, soup.c)
+ self.assertEqual(new_text.next_element, soup.c)
def test_insert_tag(self):
@@ -610,8 +610,8 @@ class TestTreeModification(SoupTest):
self.assertEqual(magic_tag.previousSibling, b_tag)
find = b_tag.find(text="Find")
- self.assertEqual(find.next, magic_tag)
- self.assertEqual(magic_tag.previous, find)
+ self.assertEqual(find.next_element, magic_tag)
+ self.assertEqual(magic_tag.previous_element, find)
c_tag = soup.c
self.assertEqual(magic_tag.nextSibling, c_tag)
@@ -619,8 +619,8 @@ class TestTreeModification(SoupTest):
the = magic_tag.find(text="the")
self.assertEqual(the.parent, magic_tag)
- self.assertEqual(the.next, c_tag)
- self.assertEqual(c_tag.previous, the)
+ self.assertEqual(the.next_element, c_tag)
+ self.assertEqual(c_tag.previous_element, the)
def test_insert_works_on_empty_element_tag(self):
# This is a little strange, since most HTML parsers don't allow
@@ -643,7 +643,7 @@ class TestTreeModification(SoupTest):
self.assertEquals(show.parent, None)
self.assertEquals(no.parent, soup.p)
- self.assertEquals(no.next, "no")
+ self.assertEquals(no.next_element, "no")
self.assertEquals(no.nextSibling, " business")
def test_nested_tag_replace_with(self):
@@ -662,24 +662,24 @@ class TestTreeModification(SoupTest):
# The <b> tag is now an orphan.
self.assertEqual(remove_tag.parent, None)
- self.assertEqual(remove_tag.find(text="right").next, None)
- self.assertEqual(remove_tag.previous, None)
+ self.assertEqual(remove_tag.find(text="right").next_element, None)
+ self.assertEqual(remove_tag.previous_element, None)
self.assertEqual(remove_tag.nextSibling, None)
self.assertEqual(remove_tag.previousSibling, None)
# The <f> tag is now connected to the <a> tag.
self.assertEqual(move_tag.parent, soup.a)
- self.assertEqual(move_tag.previous, "We")
- self.assertEqual(move_tag.next.next, soup.e)
+ self.assertEqual(move_tag.previous_element, "We")
+ self.assertEqual(move_tag.next_element.next_element, soup.e)
self.assertEqual(move_tag.nextSibling, None)
# The gap where the <f> tag used to be has been mended, and
# the word "to" is now connected to the <g> tag.
to_text = soup.find(text="to")
g_tag = soup.g
- self.assertEqual(to_text.next, g_tag)
+ self.assertEqual(to_text.next_element, g_tag)
self.assertEqual(to_text.nextSibling, g_tag)
- self.assertEqual(g_tag.previous, to_text)
+ self.assertEqual(g_tag.previous_element, to_text)
self.assertEqual(g_tag.previousSibling, to_text)
def test_extract(self):
@@ -696,15 +696,15 @@ class TestTreeModification(SoupTest):
# The extracted tag is now an orphan.
self.assertEqual(len(soup.body.contents), 2)
self.assertEqual(extracted.parent, None)
- self.assertEqual(extracted.previous, None)
- self.assertEqual(extracted.next.next, None)
+ self.assertEqual(extracted.previous_element, None)
+ self.assertEqual(extracted.next_element.next_element, None)
# The gap where the extracted tag used to be has been mended.
content_1 = soup.find(text="Some content. ")
content_2 = soup.find(text=" More content.")
- self.assertEquals(content_1.next, content_2)
+ self.assertEquals(content_1.next_element, content_2)
self.assertEquals(content_1.nextSibling, content_2)
- self.assertEquals(content_2.previous, content_1)
+ self.assertEquals(content_2.previous_element, content_1)
self.assertEquals(content_2.previousSibling, content_1)