summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG10
-rw-r--r--bs4/__init__.py23
-rw-r--r--bs4/builder/__init__.py8
-rw-r--r--bs4/builder/_html5lib.py10
-rw-r--r--bs4/builder/_lxml.py3
-rw-r--r--bs4/dammit.py13
-rw-r--r--bs4/element.py104
-rw-r--r--tests/test_soup.py12
-rw-r--r--tests/test_tree.py80
9 files changed, 143 insertions, 120 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 00d80da..4d1d075 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -35,9 +35,19 @@ work. Here are the renames:
* findPreviousSibling -> find_previous_sibling
* findPreviousSiblings -> find_previous_siblings
+Methods have been renamed for compatibility with Python 3.
+
+ * Tag.has_key() -> Tag.has_attr()
+
+ (This was misleading, anyway, because has_key() looked at
+ a tag's attributes and __in__ looked at a tag's contents.)
+
Some attributes have also been renamed:
* Tag.isSelfClosing -> Tag.is_empty_element
+ * UnicodeDammit.unicode -> UnicodeDammit.unicode_markup
+ * Tag.next -> Tag.next_element
+ * Tag.previous -> Tag.previous_element
So have some arguments to popular methods:
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 8baeec4..6406bef 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -16,7 +16,6 @@ For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
"""
-from __future__ import generators
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.0.0a"
@@ -27,9 +26,9 @@ __all__ = ['BeautifulSoup']
import re
-from builder import builder_registry
-from dammit import UnicodeDammit
-from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
+from .builder import builder_registry
+from .dammit import UnicodeDammit
+from .element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
class BeautifulSoup(Tag):
@@ -160,10 +159,10 @@ class BeautifulSoup(Tag):
def object_was_parsed(self, o):
"""Add an object to the parse tree."""
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
+ o.setup(self.currentTag, self.previous_element)
+ if self.previous_element:
+ self.previous_element.next_element = o
+ self.previous_element = o
self.currentTag.contents.append(o)
@@ -207,12 +206,12 @@ class BeautifulSoup(Tag):
return None
tag = Tag(self, self.builder, name, attrs, self.currentTag,
- self.previous)
+ self.previous_element)
if tag is None:
return tag
- if self.previous:
- self.previous.next = tag
- self.previous = tag
+ if self.previous_element:
+ self.previous_element.next_element = tag
+ self.previous_element = tag
self.pushTag(tag)
return tag
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index afd49b9..e6d4fa1 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -144,7 +144,7 @@ class SAXTreeBuilder(TreeBuilder):
pass
def startElement(self, name, attrs):
- attrs = dict((key[1], value) for key, value in attrs.items())
+ attrs = dict((key[1], value) for key, value in list(attrs.items()))
#print "Start %s, %r" % (name, attrs)
self.soup.handle_starttag(name, attrs)
@@ -247,16 +247,16 @@ def register_treebuilders_from(module):
# builder registrations will take precedence. In general, we want
# html5lib to take precedence over lxml, because it's more
# reliable. And we only want to use HTMLParser as a last result.
-import _htmlparser
+from .import _htmlparser
register_treebuilders_from(_htmlparser)
try:
- import _lxml
+ from . import _lxml
register_treebuilders_from(_lxml)
except ImportError:
# They don't have lxml installed.
pass
try:
- import _html5lib
+ from . import _html5lib
register_treebuilders_from(_html5lib)
except ImportError:
# They don't have html5lib installed.
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index d74c4b0..e9d7f58 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -102,18 +102,18 @@ class AttrList(object):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self):
- return self.attrs.items().__iter__()
+ return list(self.attrs.items()).__iter__()
def __setitem__(self, name, value):
"set attr", name, value
self.element[name] = value
def items(self):
- return self.attrs.items()
+ return list(self.attrs.items())
def keys(self):
- return self.attrs.keys()
+ return list(self.attrs.keys())
def __getitem__(self, name):
return self.attrs[name]
def __contains__(self, name):
- return name in self.attrs.keys()
+ return name in list(self.attrs.keys())
class Element(html5lib.treebuilders._base.Node):
@@ -155,7 +155,7 @@ class Element(html5lib.treebuilders._base.Node):
def setAttributes(self, attributes):
if attributes is not None and attributes != {}:
- for name, value in attributes.items():
+ for name, value in list(attributes.items()):
self.element[name] = value
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 57798f6..07b2032 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -3,6 +3,7 @@ __all__ = [
'LXMLTreeBuilder',
]
+import collections
from lxml import etree
from bs4.element import Comment, Doctype
from bs4.builder import (
@@ -36,7 +37,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if parser is None:
# Use the default parser.
parser = self.default_parser
- if callable(parser):
+ if isinstance(parser, collections.Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False)
self.parser = parser
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 75d445e..4aafe81 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,7 +9,6 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
-import types
# Autodetects character encodings. Very useful.
# Download from http://chardet.feedparser.org/
@@ -37,7 +36,7 @@ class EntitySubstitution(object):
lookup = {}
reverse_lookup = {}
characters = []
- for codepoint, name in codepoint2name.items():
+ for codepoint, name in list(codepoint2name.items()):
if codepoint == 34:
# There's no point in turning the quotation mark into
# ", unless it happens within an attribute value, which
@@ -175,7 +174,7 @@ class UnicodeDammit:
self.tried_encodings = []
if markup == '' or isinstance(markup, unicode):
self.original_encoding = None
- self.unicode = unicode(markup)
+ self.unicode_markup = unicode(markup)
return
u = None
@@ -197,7 +196,7 @@ class UnicodeDammit:
if u:
break
- self.unicode = u
+ self.unicode_markup = u
if not u: self.original_encoding = None
def _sub_ms_char(self, match):
@@ -205,7 +204,7 @@ class UnicodeDammit:
entity."""
orig = match.group(1)
sub = self.MS_CHARS.get(orig)
- if type(sub) == types.TupleType:
+ if type(sub) == tuple:
if self.smart_quotes_to == 'xml':
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
else:
@@ -234,7 +233,7 @@ class UnicodeDammit:
u = self._to_unicode(markup, proposed)
self.markup = u
self.original_encoding = proposed
- except Exception, e:
+ except Exception as e:
# print "That didn't work!"
# print e
return None
@@ -375,7 +374,7 @@ class UnicodeDammit:
250,251,252,253,254,255)
import string
c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
return s.translate(c.EBCDIC_TO_ASCII_MAP)
MS_CHARS = { '\x80' : ('euro', '20AC'),
diff --git a/bs4/element.py b/bs4/element.py
index 587078c..a9814e1 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,9 +1,5 @@
+import collections
import re
-import types
-try:
- from htmlentitydefs import name2codepoint
-except ImportError:
- name2codepoint = {}
from bs4.dammit import EntitySubstitution
DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -13,12 +9,12 @@ class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
- def setup(self, parent=None, previous=None):
+ def setup(self, parent=None, previous_element=None):
"""Sets up the initial relations between this element and
other elements."""
self.parent = parent
- self.previous = previous
- self.next = None
+ self.previous_element = previous_element
+ self.next_element = None
self.previousSibling = None
self.nextSibling = None
if self.parent and self.parent.contents:
@@ -52,14 +48,14 @@ class PageElement(object):
#this element (and any children) hadn't been parsed. Connect
#the two.
lastChild = self._last_recursive_child()
- nextElement = lastChild.next
+ nextElement = lastChild.next_element
- if self.previous:
- self.previous.next = nextElement
+ if self.previous_element:
+ self.previous_element.next_element = nextElement
if nextElement:
- nextElement.previous = self.previous
- self.previous = None
- lastChild.next = None
+ nextElement.previous_element = self.previous_element
+ self.previous_element = None
+ lastChild.next_element = None
self.parent = None
if self.previousSibling:
@@ -78,8 +74,7 @@ class PageElement(object):
def insert(self, position, newChild):
if (isinstance(newChild, basestring)
- or isinstance(newChild, unicode)) \
- and not isinstance(newChild, NavigableString):
+ and not isinstance(newChild, NavigableString)):
newChild = NavigableString(newChild)
position = min(position, len(self.contents))
@@ -100,14 +95,14 @@ class PageElement(object):
previousChild = None
if position == 0:
newChild.previousSibling = None
- newChild.previous = self
+ newChild.previous_element = self
else:
previousChild = self.contents[position-1]
newChild.previousSibling = previousChild
newChild.previousSibling.nextSibling = newChild
- newChild.previous = previousChild._last_recursive_child()
- if newChild.previous:
- newChild.previous.next = newChild
+ newChild.previous_element = previousChild._last_recursive_child()
+ if newChild.previous_element:
+ newChild.previous_element.next_element = newChild
newChildsLastElement = newChild._last_recursive_child()
@@ -122,18 +117,18 @@ class PageElement(object):
if not parent: # This is the last element in the document.
break
if parentsNextSibling:
- newChildsLastElement.next = parentsNextSibling
+ newChildsLastElement.next_element = parentsNextSibling
else:
- newChildsLastElement.next = None
+ newChildsLastElement.next_element = None
else:
nextChild = self.contents[position]
newChild.nextSibling = nextChild
if newChild.nextSibling:
newChild.nextSibling.previousSibling = newChild
- newChildsLastElement.next = nextChild
+ newChildsLastElement.next_element = nextChild
- if newChildsLastElement.next:
- newChildsLastElement.next.previous = newChildsLastElement
+ if newChildsLastElement.next_element:
+ newChildsLastElement.next_element.previous_element = newChildsLastElement
self.contents.insert(position, newChild)
def append(self, tag):
@@ -223,6 +218,14 @@ class PageElement(object):
findParents = find_parents # BS3
fetchParents = find_parents # BS2
+ @property
+ def next(self):
+ return self.next_element
+
+ @property
+ def previous(self):
+ return self.previous_element
+
#These methods do the real heavy lifting.
def _find_one(self, method, name, attrs, text, **kwargs):
@@ -243,7 +246,7 @@ class PageElement(object):
results = ResultSet(strainer)
while True:
try:
- i = generator.next()
+ i = next(generator)
except StopIteration:
break
if i:
@@ -260,7 +263,7 @@ class PageElement(object):
def next_elements(self):
i = self
while i:
- i = i.next
+ i = i.next_element
yield i
@property
@@ -274,7 +277,7 @@ class PageElement(object):
def previous_elements(self):
i = self
while i:
- i = i.previous
+ i = i.previous_element
yield i
@property
@@ -341,7 +344,9 @@ class NavigableString(unicode, PageElement):
if attr == 'string':
return self
else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+ raise AttributeError(
+ "'%s' object has no attribute '%s'" % (
+ self.__class__.__name__, attr))
def output_ready(self, substitute_html_entities=False):
if substitute_html_entities:
@@ -458,8 +463,8 @@ class Tag(PageElement):
attribute."""
return self.attrs.get(key, default)
- def has_key(self, key):
- return self.attrs.has_key(key)
+ def has_attr(self, key):
+ return key in self.attrs
def __getitem__(self, key):
"""tag[key] returns the value of the 'key' attribute for the tag,
@@ -488,14 +493,14 @@ class Tag(PageElement):
def __delitem__(self, key):
"Deleting tag[key] deletes all 'key' attributes for the tag."
- if self.attrs.has_key(key):
+ if key in self.attrs:
del self.attrs[key]
def __call__(self, *args, **kwargs):
"""Calling a tag like a function is the same as calling its
find_all() method. Eg. tag('a') returns a list of all the A tags
found within this tag."""
- return apply(self.find_all, args, kwargs)
+ return self.find_all(args, kwargs)
def __getattr__(self, tag):
#print "Getattr %s.%s" % (self.__class__, tag)
@@ -503,7 +508,8 @@ class Tag(PageElement):
return self.find(tag[:-3])
elif tag.find('__') != 0:
return self.find(tag)
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+ raise AttributeError(
+ "'%s' object has no attribute '%s'" % (self.__class__, tag))
def __eq__(self, other):
"""Returns true iff this tag has the same name, the same attributes,
@@ -688,11 +694,11 @@ class Tag(PageElement):
def recursive_children(self):
if not len(self.contents):
raise StopIteration # XXX return instead?
- stopNode = self._last_recursive_child().next
+ stopNode = self._last_recursive_child().next_element
current = self.contents[0]
while current is not stopNode:
yield current
- current = current.next
+ current = current.next_element
# Old names for backwards compatibility
def childGenerator(self):
@@ -701,6 +707,10 @@ class Tag(PageElement):
def recursiveChildGenerator(self):
return self.recursive_children
+ # This was kind of misleading because has_key() (attributes) was
+ # different from __in__ (contents). has_key() is gone in Python 3,
+ # anyway.
+ has_key = has_attr
# Next, a couple classes to represent queries and their results.
class SoupStrainer(object):
@@ -733,8 +743,9 @@ class SoupStrainer(object):
if isinstance(markupName, Tag):
markup = markupName
markupAttrs = markup
- callFunctionWithTagData = callable(self.name) \
- and not isinstance(markupName, Tag)
+ callFunctionWithTagData = (
+ isinstance(self.name, collections.Callable)
+ and not isinstance(markupName, Tag))
if (not self.name) \
or callFunctionWithTagData \
@@ -745,7 +756,7 @@ class SoupStrainer(object):
else:
match = True
markupAttrMap = None
- for attr, matchAgainst in self.attrs.items():
+ for attr, matchAgainst in list(self.attrs.items()):
if not markupAttrMap:
if hasattr(markupAttrs, 'get'):
markupAttrMap = markupAttrs
@@ -786,16 +797,16 @@ class SoupStrainer(object):
if self._matches(markup, self.text):
found = markup
else:
- raise Exception, "I don't know how to match against a %s" \
- % markup.__class__
+ raise Exception(
+ "I don't know how to match against a %s" % markup.__class__)
return found
def _matches(self, markup, matchAgainst):
#print "Matching %s against %s" % (markup, matchAgainst)
result = False
- if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+ if matchAgainst == True and isinstance(matchAgainst, bool):
result = markup != None
- elif callable(matchAgainst):
+ elif isinstance(matchAgainst, collections.Callable):
result = matchAgainst(markup)
else:
#Custom match methods take the tag as an argument, but all
@@ -813,12 +824,9 @@ class SoupStrainer(object):
or not isinstance(matchAgainst, basestring))):
result = markup in matchAgainst
elif hasattr(matchAgainst, 'items'):
- result = markup.has_key(matchAgainst)
+ result = matchAgainst in markup
elif matchAgainst and isinstance(markup, basestring):
- if isinstance(markup, unicode):
- matchAgainst = unicode(matchAgainst)
- else:
- matchAgainst = str(matchAgainst)
+ matchAgainst = markup.__class__(matchAgainst)
if not result:
result = matchAgainst == markup
diff --git a/tests/test_soup.py b/tests/test_soup.py
index d283b8a..87d6f3b 100644
--- a/tests/test_soup.py
+++ b/tests/test_soup.py
@@ -86,37 +86,37 @@ class TestUnicodeDammit(unittest.TestCase):
markup = "<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEquals(
- dammit.unicode, u"<foo>\u2018\u2019\u201c\u201d</foo>")
+ dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
def test_smart_quotes_to_xml_entities(self):
markup = "<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
self.assertEquals(
- dammit.unicode, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
+ dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
def test_smart_quotes_to_html_entities(self):
markup = "<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="html")
self.assertEquals(
- dammit.unicode, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
+ dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
def test_detect_utf8(self):
utf8 = "\xc3\xa9"
dammit = UnicodeDammit(utf8)
- self.assertEquals(dammit.unicode, u'\xe9')
+ self.assertEquals(dammit.unicode_markup, u'\xe9')
self.assertEquals(dammit.original_encoding, 'utf-8')
def test_convert_hebrew(self):
hebrew = "\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEquals(dammit.original_encoding, 'iso-8859-8')
- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
+ self.assertEquals(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
dammit = UnicodeDammit(utf_8)
self.assertEquals(dammit.original_encoding, 'utf-8')
- self.assertEquals(dammit.unicode.encode("utf-8"), utf_8)
+ self.assertEquals(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
diff --git a/tests/test_tree.py b/tests/test_tree.py
index f2989fe..cf14d0c 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -10,7 +10,7 @@ methods tested here.
"""
import copy
-import cPickle as pickle
+import pickle
import re
from bs4 import BeautifulSoup
from bs4.builder import builder_registry
@@ -288,7 +288,7 @@ class TestParentOperations(TreeTest):
def test_parent_generator(self):
parents = [parent['id'] for parent in self.start.parents
- if parent is not None and parent.has_key('id')]
+ if parent is not None and 'id' in parent.attrs]
self.assertEquals(parents, ['bottom', 'middle', 'top'])
@@ -307,16 +307,16 @@ class TestNextOperations(ProximityTest):
self.start = self.tree.b
def test_next(self):
- self.assertEquals(self.start.next, "One")
- self.assertEquals(self.start.next.next['id'], "2")
+ self.assertEquals(self.start.next_element, "One")
+ self.assertEquals(self.start.next_element.next_element['id'], "2")
def test_next_of_last_item_is_none(self):
last = self.tree.find(text="Three")
- self.assertEquals(last.next, None)
+ self.assertEquals(last.next_element, None)
def test_next_of_root_is_none(self):
# The document root is outside the next/previous chain.
- self.assertEquals(self.tree.next, None)
+ self.assertEquals(self.tree.next_element, None)
def test_find_all_next(self):
self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
@@ -352,17 +352,17 @@ class TestPreviousOperations(ProximityTest):
self.end = self.tree.find(text="Three")
def test_previous(self):
- self.assertEquals(self.end.previous['id'], "3")
- self.assertEquals(self.end.previous.previous, "Two")
+ self.assertEquals(self.end.previous_element['id'], "3")
+ self.assertEquals(self.end.previous_element.previous_element, "Two")
def test_previous_of_first_item_is_none(self):
first = self.tree.find('html')
- self.assertEquals(first.previous, None)
+ self.assertEquals(first.previous_element, None)
def test_previous_of_root_is_none(self):
# The document root is outside the next/previous chain.
# XXX This is broken!
- #self.assertEquals(self.tree.previous, None)
+ #self.assertEquals(self.tree.previous_element, None)
pass
def test_find_all_previous(self):
@@ -436,7 +436,7 @@ class TestNextSibling(SiblingTest):
self.assertEquals(self.start.nextSibling.nextSibling['id'], '3')
# Note the difference between nextSibling and next.
- self.assertEquals(self.start.next['id'], '1.1')
+ self.assertEquals(self.start.next_element['id'], '1.1')
def test_next_sibling_may_not_exist(self):
self.assertEquals(self.tree.html.nextSibling, None)
@@ -481,7 +481,7 @@ class TestPreviousSibling(SiblingTest):
self.assertEquals(self.end.previousSibling.previousSibling['id'], '2')
# Note the difference between previousSibling and previous.
- self.assertEquals(self.end.previous['id'], '3.1')
+ self.assertEquals(self.end.previous_element['id'], '3.1')
def test_previous_sibling_may_not_exist(self):
self.assertEquals(self.tree.html.previousSibling, None)
@@ -565,10 +565,10 @@ class TestTreeModification(SoupTest):
soup.find(text="Argh!").replace_with("Hooray!")
new_text = soup.find(text="Hooray!")
b = soup.b
- self.assertEqual(new_text.previous, b)
+ self.assertEqual(new_text.previous_element, b)
self.assertEqual(new_text.parent, b)
- self.assertEqual(new_text.previous.next, new_text)
- self.assertEqual(new_text.next, None)
+ self.assertEqual(new_text.previous_element.next_element, new_text)
+ self.assertEqual(new_text.next_element, None)
def test_consecutive_text_nodes(self):
# A builder should never create two consecutive text nodes,
@@ -582,14 +582,14 @@ class TestTreeModification(SoupTest):
"<a><b>Argh!Hooray!</b><c></c></a>"))
new_text = soup.find(text="Hooray!")
- self.assertEqual(new_text.previous, "Argh!")
- self.assertEqual(new_text.previous.next, new_text)
+ self.assertEqual(new_text.previous_element, "Argh!")
+ self.assertEqual(new_text.previous_element.next_element, new_text)
self.assertEqual(new_text.previousSibling, "Argh!")
self.assertEqual(new_text.previousSibling.nextSibling, new_text)
self.assertEqual(new_text.nextSibling, None)
- self.assertEqual(new_text.next, soup.c)
+ self.assertEqual(new_text.next_element, soup.c)
def test_insert_tag(self):
@@ -610,8 +610,8 @@ class TestTreeModification(SoupTest):
self.assertEqual(magic_tag.previousSibling, b_tag)
find = b_tag.find(text="Find")
- self.assertEqual(find.next, magic_tag)
- self.assertEqual(magic_tag.previous, find)
+ self.assertEqual(find.next_element, magic_tag)
+ self.assertEqual(magic_tag.previous_element, find)
c_tag = soup.c
self.assertEqual(magic_tag.nextSibling, c_tag)
@@ -619,8 +619,8 @@ class TestTreeModification(SoupTest):
the = magic_tag.find(text="the")
self.assertEqual(the.parent, magic_tag)
- self.assertEqual(the.next, c_tag)
- self.assertEqual(c_tag.previous, the)
+ self.assertEqual(the.next_element, c_tag)
+ self.assertEqual(c_tag.previous_element, the)
def test_insert_works_on_empty_element_tag(self):
# This is a little strange, since most HTML parsers don't allow
@@ -643,7 +643,7 @@ class TestTreeModification(SoupTest):
self.assertEquals(show.parent, None)
self.assertEquals(no.parent, soup.p)
- self.assertEquals(no.next, "no")
+ self.assertEquals(no.next_element, "no")
self.assertEquals(no.nextSibling, " business")
def test_nested_tag_replace_with(self):
@@ -662,24 +662,24 @@ class TestTreeModification(SoupTest):
# The <b> tag is now an orphan.
self.assertEqual(remove_tag.parent, None)
- self.assertEqual(remove_tag.find(text="right").next, None)
- self.assertEqual(remove_tag.previous, None)
+ self.assertEqual(remove_tag.find(text="right").next_element, None)
+ self.assertEqual(remove_tag.previous_element, None)
self.assertEqual(remove_tag.nextSibling, None)
self.assertEqual(remove_tag.previousSibling, None)
# The <f> tag is now connected to the <a> tag.
self.assertEqual(move_tag.parent, soup.a)
- self.assertEqual(move_tag.previous, "We")
- self.assertEqual(move_tag.next.next, soup.e)
+ self.assertEqual(move_tag.previous_element, "We")
+ self.assertEqual(move_tag.next_element.next_element, soup.e)
self.assertEqual(move_tag.nextSibling, None)
# The gap where the <f> tag used to be has been mended, and
# the word "to" is now connected to the <g> tag.
to_text = soup.find(text="to")
g_tag = soup.g
- self.assertEqual(to_text.next, g_tag)
+ self.assertEqual(to_text.next_element, g_tag)
self.assertEqual(to_text.nextSibling, g_tag)
- self.assertEqual(g_tag.previous, to_text)
+ self.assertEqual(g_tag.previous_element, to_text)
self.assertEqual(g_tag.previousSibling, to_text)
def test_extract(self):
@@ -696,15 +696,15 @@ class TestTreeModification(SoupTest):
# The extracted tag is now an orphan.
self.assertEqual(len(soup.body.contents), 2)
self.assertEqual(extracted.parent, None)
- self.assertEqual(extracted.previous, None)
- self.assertEqual(extracted.next.next, None)
+ self.assertEqual(extracted.previous_element, None)
+ self.assertEqual(extracted.next_element.next_element, None)
# The gap where the extracted tag used to be has been mended.
content_1 = soup.find(text="Some content. ")
content_2 = soup.find(text=" More content.")
- self.assertEquals(content_1.next, content_2)
+ self.assertEquals(content_1.next_element, content_2)
self.assertEquals(content_1.nextSibling, content_2)
- self.assertEquals(content_2.previous, content_1)
+ self.assertEquals(content_2.previous_element, content_1)
self.assertEquals(content_2.previousSibling, content_1)
@@ -735,11 +735,17 @@ class TestElementObjects(SoupTest):
self.assertEqual(soup.a, None)
self.assertEqual(soup.aTag, None)
- def test_has_key(self):
- """has_key() checks for the presence of an attribute."""
+ def test_has_attr(self):
+ """has_attr() checks for the presence of an attribute.
+
+ Please note note: has_attr() is different from
+ __in__. has_attr() checks the tag's attributes and __in__
+ checks the tag's chidlren.
+ """
soup = self.soup("<foo attr='bar'>")
- self.assertTrue(soup.foo.has_key('attr'))
- self.assertFalse(soup.foo.has_key('attr2'))
+ self.assertTrue(soup.foo.has_attr('attr'))
+ self.assertFalse(soup.foo.has_attr('attr2'))
+
def test_attributes_come_out_in_alphabetical_order(self):
markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'