summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG10
-rw-r--r--bs4/__init__.py26
-rw-r--r--bs4/builder/__init__.py8
-rw-r--r--bs4/builder/_html5lib.py10
-rw-r--r--bs4/builder/_lxml.py3
-rw-r--r--bs4/dammit.py13
-rw-r--r--bs4/element.py109
-rw-r--r--bs4/util.py23
-rw-r--r--tests/test_lxml.py13
-rw-r--r--tests/test_soup.py12
-rw-r--r--tests/test_tree.py85
11 files changed, 155 insertions, 157 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 1e8b449..d2a1c08 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -38,9 +38,19 @@ still work. Here are the renames:
* nextSibling -> next_sibling
* previousSibling -> previous_sibling
+Methods have been renamed for compatibility with Python 3.
+
+ * Tag.has_key() -> Tag.has_attr()
+
+ (This was misleading, anyway, because has_key() looked at
+ a tag's attributes and __in__ looked at a tag's contents.)
+
Some attributes have also been renamed:
* Tag.isSelfClosing -> Tag.is_empty_element
+ * UnicodeDammit.unicode -> UnicodeDammit.unicode_markup
+ * Tag.next -> Tag.next_element
+ * Tag.previous -> Tag.previous_element
So have some arguments to popular methods:
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 66a1c02..c036521 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -16,7 +16,6 @@ For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
"""
-from __future__ import generators
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.0.0a"
@@ -27,10 +26,9 @@ __all__ = ['BeautifulSoup']
import re
-from util import isList, buildSet
-from builder import builder_registry
-from dammit import UnicodeDammit
-from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
+from .builder import builder_registry
+from .dammit import UnicodeDammit
+from .element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
class BeautifulSoup(Tag):
@@ -145,7 +143,7 @@ class BeautifulSoup(Tag):
if self.currentData:
currentData = u''.join(self.currentData)
if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
- not buildSet([tag.name for tag in self.tagStack]).intersection(
+ not set([tag.name for tag in self.tagStack]).intersection(
self.builder.preserve_whitespace_tags)):
if '\n' in currentData:
currentData = '\n'
@@ -161,10 +159,10 @@ class BeautifulSoup(Tag):
def object_was_parsed(self, o):
"""Add an object to the parse tree."""
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
+ o.setup(self.currentTag, self.previous_element)
+ if self.previous_element:
+ self.previous_element.next_element = o
+ self.previous_element = o
self.currentTag.contents.append(o)
def _popToTag(self, name, inclusivePop=True):
@@ -208,12 +206,12 @@ class BeautifulSoup(Tag):
return None
tag = Tag(self, self.builder, name, attrs, self.currentTag,
- self.previous)
+ self.previous_element)
if tag is None:
return tag
- if self.previous:
- self.previous.next = tag
- self.previous = tag
+ if self.previous_element:
+ self.previous_element.next_element = tag
+ self.previous_element = tag
self.pushTag(tag)
return tag
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index afd49b9..e6d4fa1 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -144,7 +144,7 @@ class SAXTreeBuilder(TreeBuilder):
pass
def startElement(self, name, attrs):
- attrs = dict((key[1], value) for key, value in attrs.items())
+ attrs = dict((key[1], value) for key, value in list(attrs.items()))
#print "Start %s, %r" % (name, attrs)
self.soup.handle_starttag(name, attrs)
@@ -247,16 +247,16 @@ def register_treebuilders_from(module):
# builder registrations will take precedence. In general, we want
# html5lib to take precedence over lxml, because it's more
# reliable. And we only want to use HTMLParser as a last result.
-import _htmlparser
+from .import _htmlparser
register_treebuilders_from(_htmlparser)
try:
- import _lxml
+ from . import _lxml
register_treebuilders_from(_lxml)
except ImportError:
# They don't have lxml installed.
pass
try:
- import _html5lib
+ from . import _html5lib
register_treebuilders_from(_html5lib)
except ImportError:
# They don't have html5lib installed.
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index d74c4b0..e9d7f58 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -102,18 +102,18 @@ class AttrList(object):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self):
- return self.attrs.items().__iter__()
+ return list(self.attrs.items()).__iter__()
def __setitem__(self, name, value):
"set attr", name, value
self.element[name] = value
def items(self):
- return self.attrs.items()
+ return list(self.attrs.items())
def keys(self):
- return self.attrs.keys()
+ return list(self.attrs.keys())
def __getitem__(self, name):
return self.attrs[name]
def __contains__(self, name):
- return name in self.attrs.keys()
+ return name in list(self.attrs.keys())
class Element(html5lib.treebuilders._base.Node):
@@ -155,7 +155,7 @@ class Element(html5lib.treebuilders._base.Node):
def setAttributes(self, attributes):
if attributes is not None and attributes != {}:
- for name, value in attributes.items():
+ for name, value in list(attributes.items()):
self.element[name] = value
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 57798f6..07b2032 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -3,6 +3,7 @@ __all__ = [
'LXMLTreeBuilder',
]
+import collections
from lxml import etree
from bs4.element import Comment, Doctype
from bs4.builder import (
@@ -36,7 +37,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if parser is None:
# Use the default parser.
parser = self.default_parser
- if callable(parser):
+ if isinstance(parser, collections.Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False)
self.parser = parser
diff --git a/bs4/dammit.py b/bs4/dammit.py
index f3e770e..ed5dc29 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,7 +9,6 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
-import types
# Autodetects character encodings. Very useful.
# Download from http://chardet.feedparser.org/
@@ -37,7 +36,7 @@ class EntitySubstitution(object):
lookup = {}
reverse_lookup = {}
characters = []
- for codepoint, name in codepoint2name.items():
+ for codepoint, name in list(codepoint2name.items()):
if codepoint == 34:
# There's no point in turning the quotation mark into
# ", unless it happens within an attribute value, which
@@ -174,7 +173,7 @@ class UnicodeDammit:
self.tried_encodings = []
if markup == '' or isinstance(markup, unicode):
self.original_encoding = None
- self.unicode = unicode(markup)
+ self.unicode_markup = unicode(markup)
return
u = None
@@ -196,7 +195,7 @@ class UnicodeDammit:
if u:
break
- self.unicode = u
+ self.unicode_markup = u
if not u:
self.original_encoding = None
@@ -205,7 +204,7 @@ class UnicodeDammit:
entity."""
orig = match.group(1)
sub = self.MS_CHARS.get(orig)
- if type(sub) == types.TupleType:
+ if type(sub) == tuple:
if self.smart_quotes_to == 'xml':
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
else:
@@ -234,7 +233,7 @@ class UnicodeDammit:
u = self._to_unicode(markup, proposed)
self.markup = u
self.original_encoding = proposed
- except Exception, e:
+ except Exception as e:
# print "That didn't work!"
# print e
return None
@@ -376,7 +375,7 @@ class UnicodeDammit:
250,251,252,253,254,255)
import string
c.EBCDIC_TO_ASCII_MAP = string.maketrans(
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
return s.translate(c.EBCDIC_TO_ASCII_MAP)
MS_CHARS = {'\x80': ('euro', '20AC'),
diff --git a/bs4/element.py b/bs4/element.py
index e141aa8..95661ae 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,13 +1,7 @@
+import collections
import re
-import types
-try:
- from htmlentitydefs import name2codepoint
-except ImportError:
- name2codepoint = {}
from bs4.dammit import EntitySubstitution
-from util import isList
-
DEFAULT_OUTPUT_ENCODING = "utf-8"
@@ -32,12 +26,12 @@ class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
- def setup(self, parent=None, previous=None):
+ def setup(self, parent=None, previous_element=None):
"""Sets up the initial relations between this element and
other elements."""
self.parent = parent
- self.previous = previous
- self.next = None
+ self.previous_element = previous_element
+ self.next_element = None
self.previous_sibling = None
self.next_sibling = None
if self.parent and self.parent.contents:
@@ -81,14 +75,14 @@ class PageElement(object):
#this element (and any children) hadn't been parsed. Connect
#the two.
last_child = self._last_recursive_child()
- next_element = last_child.next
+ next_element = last_child.next_element
- if self.previous:
- self.previous.next = next_element
+ if self.previous_element:
+ self.previous_element.next_element = next_element
if next_element:
- next_element.previous = self.previous
- self.previous = None
- last_child.next = None
+ next_element.previous_element = self.previous_element
+ self.previous_element = None
+ last_child.next_element = None
self.parent = None
if self.previous_sibling:
@@ -129,14 +123,14 @@ class PageElement(object):
previous_child = None
if position == 0:
new_child.previous_sibling = None
- new_child.previous = self
+ new_child.previous_element = self
else:
previous_child = self.contents[position - 1]
new_child.previous_sibling = previous_child
new_child.previous_sibling.next_sibling = new_child
- new_child.previous = previous_child._last_recursive_child()
+ new_child.previous_element = previous_child._last_recursive_child()
if new_child.previous:
- new_child.previous.next = new_child
+ new_child.previous_element.next_element = new_child
new_childs_last_element = new_child._last_recursive_child()
@@ -151,18 +145,18 @@ class PageElement(object):
if not parent: # This is the last element in the document.
break
if parents_next_sibling:
- new_childs_last_element.next = parents_next_sibling
+ new_childs_last_element.next_element = parents_next_sibling
else:
- new_childs_last_element.next = None
+ new_childs_last_element.next_element = None
else:
next_child = self.contents[position]
new_child.next_sibling = next_child
if new_child.next_sibling:
new_child.next_sibling.previous_sibling = new_child
- new_childs_last_element.next = next_child
+ new_childs_last_element.next_element = next_child
- if new_childs_last_element.next:
- new_childs_last_element.next.previous = new_childs_last_element
+ if new_childs_last_element.next_element:
+ new_childs_last_element.next_element.previous_element = new_childs_last_element
self.contents.insert(position, new_child)
def append(self, tag):
@@ -252,6 +246,14 @@ class PageElement(object):
findParents = find_parents # BS3
fetchParents = find_parents # BS2
+ @property
+ def next(self):
+ return self.next_element
+
+ @property
+ def previous(self):
+ return self.previous_element
+
#These methods do the real heavy lifting.
def _find_one(self, method, name, attrs, text, **kwargs):
@@ -283,7 +285,7 @@ class PageElement(object):
results = ResultSet(strainer)
while True:
try:
- i = generator.next()
+ i = next(generator)
except StopIteration:
break
if i:
@@ -300,7 +302,7 @@ class PageElement(object):
def next_elements(self):
i = self
while i is not None:
- i = i.next
+ i = i.next_element
yield i
@property
@@ -314,7 +316,7 @@ class PageElement(object):
def previous_elements(self):
i = self
while i is not None:
- i = i.previous
+ i = i.previous_element
yield i
@property
@@ -381,8 +383,9 @@ class NavigableString(unicode, PageElement):
if attr == 'string':
return self
else:
- raise AttributeError("'%s' object has no attribute '%s'" %
- (self.__class__.__name__, attr))
+ raise AttributeError(
+ "'%s' object has no attribute '%s'" % (
+ self.__class__.__name__, attr))
def output_ready(self, substitute_html_entities=False):
if substitute_html_entities:
@@ -554,7 +557,7 @@ class Tag(PageElement):
attribute."""
return self.attrs.get(key, default)
- def has_key(self, key):
+ def has_attr(self, key):
return key in self.attrs
def __getitem__(self, key):
@@ -590,7 +593,7 @@ class Tag(PageElement):
"""Calling a tag like a function is the same as calling its
find_all() method. Eg. tag('a') returns a list of all the A tags
found within this tag."""
- return apply(self.find_all, args, kwargs)
+ return self.find_all(args, kwargs)
def __getattr__(self, tag):
#print "Getattr %s.%s" % (self.__class__, tag)
@@ -598,8 +601,8 @@ class Tag(PageElement):
return self.find(tag[:-3])
elif not tag.startswith("__"):
return self.find(tag)
- raise AttributeError("'%s' object has no attribute '%s'" %
- (self.__class__, tag))
+ raise AttributeError(
+ "'%s' object has no attribute '%s'" % (self.__class__, tag))
def __eq__(self, other):
"""Returns true iff this tag has the same name, the same attributes,
@@ -782,11 +785,11 @@ class Tag(PageElement):
def recursive_children(self):
if not len(self.contents):
return
- stopNode = self._last_recursive_child().next
+ stopNode = self._last_recursive_child().next_element
current = self.contents[0]
while current is not stopNode:
yield current
- current = current.next
+ current = current.next_element
# Old names for backwards compatibility
def childGenerator(self):
@@ -795,6 +798,10 @@ class Tag(PageElement):
def recursiveChildGenerator(self):
return self.recursive_children
+ # This was kind of misleading because has_key() (attributes) was
+ # different from __in__ (contents). has_key() is gone in Python 3,
+ # anyway.
+ has_key = has_attr
# Next, a couple classes to represent queries and their results.
class SoupStrainer(object):
@@ -827,19 +834,20 @@ class SoupStrainer(object):
if isinstance(markup_name, Tag):
markup = markup_name
markup_attrs = markup
- call_function_with_tag_data = callable(self.name) \
- and not isinstance(markup_name, Tag)
-
- if (not self.name) \
- or call_function_with_tag_data \
- or (markup and self._matches(markup, self.name)) \
- or (not markup and self._matches(markup_name, self.name)):
+ call_function_with_tag_data = (
+ isinstance(self.name, collections.Callable)
+ and not isinstance(markup_name, Tag))
+
+ if ((not self.name)
+ or call_function_with_tag_data
+ or (markup and self._matches(markup, self.name))
+ or (not markup and self._matches(markup_name, self.name))):
if call_function_with_tag_data:
match = self.name(markup_name, markup_attrs)
else:
match = True
markup_attr_map = None
- for attr, match_against in self.attrs.items():
+ for attr, match_against in list(self.attrs.items()):
if not markup_attr_map:
if hasattr(markup_attrs, 'get'):
markup_attr_map = markup_attrs
@@ -864,7 +872,7 @@ class SoupStrainer(object):
found = None
# If given a list of items, scan it for a text element that
# matches.
- if isList(markup) and not isinstance(markup, Tag):
+ if hasattr(markup, '__iter__') and not isinstance(markup, Tag):
for element in markup:
if isinstance(element, NavigableString) \
and self.search(element):
@@ -881,8 +889,8 @@ class SoupStrainer(object):
if self._matches(markup, self.text):
found = markup
else:
- raise Exception("I don't know how to match against a %s"
- % markup.__class__)
+ raise Exception(
+ "I don't know how to match against a %s" % markup.__class__)
return found
def _matches(self, markup, match_against):
@@ -890,7 +898,7 @@ class SoupStrainer(object):
result = False
if match_against is True:
result = markup is not None
- elif callable(match_against):
+ elif isinstance(match_against, collections.Callable):
result = match_against(markup)
else:
#Custom match methods take the tag as an argument, but all
@@ -903,17 +911,14 @@ class SoupStrainer(object):
if hasattr(match_against, 'match'):
# It's a regexp object.
result = markup and match_against.search(markup)
- elif (isList(match_against)
+ elif (hasattr(match_against, '__iter__')
and (markup is not None
or not isinstance(match_against, basestring))):
result = markup in match_against
elif hasattr(match_against, 'items'):
result = match_against in markup
elif match_against and isinstance(markup, basestring):
- if isinstance(markup, unicode):
- match_against = unicode(match_against)
- else:
- match_against = str(match_against)
+ match_against = markup.__class__(match_against)
if not result:
result = match_against == markup
diff --git a/bs4/util.py b/bs4/util.py
deleted file mode 100644
index 8e33273..0000000
--- a/bs4/util.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Helper functions and mixin classes for Beautiful Soup
-
-import types
-try:
- set
-except NameError:
- from sets import Set as set
-
-
-def isList(l):
- """Convenience method that works with all 2.x versions of Python
- to determine whether or not something is listlike."""
- return ((hasattr(l, '__iter__') and not isinstance(l, basestring))
- or (type(l) in (types.ListType, types.TupleType)))
-
-
-def buildSet(args=None):
- """Turns a list or a string into a set."""
- if isinstance(args, str):
- return set([args])
- if args is None:
- return set()
- return set(args)
diff --git a/tests/test_lxml.py b/tests/test_lxml.py
index 65c8719..7ce33e8 100644
--- a/tests/test_lxml.py
+++ b/tests/test_lxml.py
@@ -523,6 +523,13 @@ class TestLXMLXMLBuilder(SoupTest):
def default_builder(self):
return LXMLTreeBuilderForXML()
+ def test_mixed_case_tags(self):
+ # Mixed-case tags are *not* folded to lowercase, but the
+ # end tag is always the same case as the start tag.
+ self.assertSoupEquals(
+ "<a><B><Cd><EFG /></CD></b></A>",
+ "<a><B><Cd><EFG /></Cd></B></a>")
+
def test_cdata_becomes_text(self):
# LXML sends CData sections as 'data' events, so we can't
@@ -535,12 +542,6 @@ class TestLXMLXMLBuilder(SoupTest):
self.assertEquals(cdata.__class__.__name__, 'NavigableString')
- def test_mixed_case_tags(self):
- # Mixed-case tags are folded to lowercase.
- self.assertSoupEquals(
- "<a><B><Cd><EFG></efg></CD></b></A>",
- "<a><b><cd><efg></efg></cd></b></a>")
-
def test_can_handle_invalid_xml(self):
self.assertSoupEquals("<a><b>", "<a><b /></a>")
diff --git a/tests/test_soup.py b/tests/test_soup.py
index d283b8a..87d6f3b 100644
--- a/tests/test_soup.py
+++ b/tests/test_soup.py
@@ -86,37 +86,37 @@ class TestUnicodeDammit(unittest.TestCase):
markup = "<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEquals(
- dammit.unicode, u"<foo>\u2018\u2019\u201c\u201d</foo>")
+ dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
def test_smart_quotes_to_xml_entities(self):
markup = "<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
self.assertEquals(
- dammit.unicode, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
+ dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
def test_smart_quotes_to_html_entities(self):
markup = "<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="html")
self.assertEquals(
- dammit.unicode, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
+ dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
def test_detect_utf8(self):
utf8 = "\xc3\xa9"
dammit = UnicodeDammit(utf8)
- self.assertEquals(dammit.unicode, u'\xe9')
+ self.assertEquals(dammit.unicode_markup, u'\xe9')
self.assertEquals(dammit.original_encoding, 'utf-8')
def test_convert_hebrew(self):
hebrew = "\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEquals(dammit.original_encoding, 'iso-8859-8')
- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
+ self.assertEquals(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
dammit = UnicodeDammit(utf_8)
self.assertEquals(dammit.original_encoding, 'utf-8')
- self.assertEquals(dammit.unicode.encode("utf-8"), utf_8)
+ self.assertEquals(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
diff --git a/tests/test_tree.py b/tests/test_tree.py
index 1718c6a..8d0027c 100644
--- a/tests/test_tree.py
+++ b/tests/test_tree.py
@@ -10,7 +10,7 @@ methods tested here.
"""
import copy
-import cPickle as pickle
+import pickle
import re
from bs4 import BeautifulSoup
from bs4.builder import builder_registry
@@ -309,7 +309,7 @@ class TestParentOperations(TreeTest):
def test_parent_generator(self):
parents = [parent['id'] for parent in self.start.parents
- if parent is not None and parent.has_key('id')]
+ if parent is not None and 'id' in parent.attrs]
self.assertEquals(parents, ['bottom', 'middle', 'top'])
@@ -328,19 +328,20 @@ class TestNextOperations(ProximityTest):
self.start = self.tree.b
def test_next(self):
- self.assertEquals(self.start.next, "One")
- self.assertEquals(self.start.next.next['id'], "2")
+ self.assertEquals(self.start.next_element, "One")
+ self.assertEquals(self.start.next_element.next_element['id'], "2")
def test_next_of_last_item_is_none(self):
last = self.tree.find(text="Three")
- self.assertEquals(last.next, None)
+ self.assertEquals(last.next_element, None)
def test_next_of_root_is_none(self):
# The document root is outside the next/previous chain.
- self.assertEquals(self.tree.next, None)
+ self.assertEquals(self.tree.next_element, None)
def test_find_all_next(self):
self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
+ self.start.find_all_next(id=3)
self.assertSelects(self.start.find_all_next(id=3), ["Three"])
def test_find_next(self):
@@ -373,17 +374,17 @@ class TestPreviousOperations(ProximityTest):
self.end = self.tree.find(text="Three")
def test_previous(self):
- self.assertEquals(self.end.previous['id'], "3")
- self.assertEquals(self.end.previous.previous, "Two")
+ self.assertEquals(self.end.previous_element['id'], "3")
+ self.assertEquals(self.end.previous_element.previous_element, "Two")
def test_previous_of_first_item_is_none(self):
first = self.tree.find('html')
- self.assertEquals(first.previous, None)
+ self.assertEquals(first.previous_element, None)
def test_previous_of_root_is_none(self):
# The document root is outside the next/previous chain.
# XXX This is broken!
- #self.assertEquals(self.tree.previous, None)
+ #self.assertEquals(self.tree.previous_element, None)
pass
def test_find_all_previous(self):
@@ -456,8 +457,8 @@ class TestNextSibling(SiblingTest):
self.assertEquals(self.start.next_sibling['id'], '2')
self.assertEquals(self.start.next_sibling.next_sibling['id'], '3')
- # Note the difference between next_sibling and next.
- self.assertEquals(self.start.next['id'], '1.1')
+ # Note the difference between next_sibling and next_element.
+ self.assertEquals(self.start.next_element['id'], '1.1')
def test_next_sibling_may_not_exist(self):
self.assertEquals(self.tree.html.next_sibling, None)
@@ -501,8 +502,8 @@ class TestPreviousSibling(SiblingTest):
self.assertEquals(self.end.previous_sibling['id'], '3')
self.assertEquals(self.end.previous_sibling.previous_sibling['id'], '2')
- # Note the difference between previous_sibling and previous.
- self.assertEquals(self.end.previous['id'], '3.1')
+ # Note the difference between previous_sibling and previous_element.
+ self.assertEquals(self.end.previous_element['id'], '3.1')
def test_previous_sibling_may_not_exist(self):
self.assertEquals(self.tree.html.previous_sibling, None)
@@ -586,10 +587,10 @@ class TestTreeModification(SoupTest):
soup.find(text="Argh!").replace_with("Hooray!")
new_text = soup.find(text="Hooray!")
b = soup.b
- self.assertEqual(new_text.previous, b)
+ self.assertEqual(new_text.previous_element, b)
self.assertEqual(new_text.parent, b)
- self.assertEqual(new_text.previous.next, new_text)
- self.assertEqual(new_text.next, None)
+ self.assertEqual(new_text.previous_element.next_element, new_text)
+ self.assertEqual(new_text.next_element, None)
def test_consecutive_text_nodes(self):
# A builder should never create two consecutive text nodes,
@@ -603,14 +604,14 @@ class TestTreeModification(SoupTest):
"<a><b>Argh!Hooray!</b><c></c></a>"))
new_text = soup.find(text="Hooray!")
- self.assertEqual(new_text.previous, "Argh!")
- self.assertEqual(new_text.previous.next, new_text)
+ self.assertEqual(new_text.previous_element, "Argh!")
+ self.assertEqual(new_text.previous_element.next_element, new_text)
self.assertEqual(new_text.previous_sibling, "Argh!")
self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
self.assertEqual(new_text.next_sibling, None)
- self.assertEqual(new_text.next, soup.c)
+ self.assertEqual(new_text.next_element, soup.c)
def test_insert_tag(self):
builder = self.default_builder
@@ -630,8 +631,8 @@ class TestTreeModification(SoupTest):
self.assertEqual(magic_tag.previous_sibling, b_tag)
find = b_tag.find(text="Find")
- self.assertEqual(find.next, magic_tag)
- self.assertEqual(magic_tag.previous, find)
+ self.assertEqual(find.next_element, magic_tag)
+ self.assertEqual(magic_tag.previous_element, find)
c_tag = soup.c
self.assertEqual(magic_tag.next_sibling, c_tag)
@@ -639,8 +640,8 @@ class TestTreeModification(SoupTest):
the = magic_tag.find(text="the")
self.assertEqual(the.parent, magic_tag)
- self.assertEqual(the.next, c_tag)
- self.assertEqual(c_tag.previous, the)
+ self.assertEqual(the.next_element, c_tag)
+ self.assertEqual(c_tag.previous_element, the)
def test_insert_works_on_empty_element_tag(self):
# This is a little strange, since most HTML parsers don't allow
@@ -663,7 +664,7 @@ class TestTreeModification(SoupTest):
self.assertEquals(show.parent, None)
self.assertEquals(no.parent, soup.p)
- self.assertEquals(no.next, "no")
+ self.assertEquals(no.next_element, "no")
self.assertEquals(no.next_sibling, " business")
def test_nested_tag_replace_with(self):
@@ -682,24 +683,24 @@ class TestTreeModification(SoupTest):
# The <b> tag is now an orphan.
self.assertEqual(remove_tag.parent, None)
- self.assertEqual(remove_tag.find(text="right").next, None)
- self.assertEqual(remove_tag.previous, None)
+ self.assertEqual(remove_tag.find(text="right").next_element, None)
+ self.assertEqual(remove_tag.previous_element, None)
self.assertEqual(remove_tag.next_sibling, None)
self.assertEqual(remove_tag.previous_sibling, None)
# The <f> tag is now connected to the <a> tag.
self.assertEqual(move_tag.parent, soup.a)
- self.assertEqual(move_tag.previous, "We")
- self.assertEqual(move_tag.next.next, soup.e)
+ self.assertEqual(move_tag.previous_element, "We")
+ self.assertEqual(move_tag.next_element.next_element, soup.e)
self.assertEqual(move_tag.next_sibling, None)
# The gap where the <f> tag used to be has been mended, and
# the word "to" is now connected to the <g> tag.
to_text = soup.find(text="to")
g_tag = soup.g
- self.assertEqual(to_text.next, g_tag)
+ self.assertEqual(to_text.next_element, g_tag)
self.assertEqual(to_text.next_sibling, g_tag)
- self.assertEqual(g_tag.previous, to_text)
+ self.assertEqual(g_tag.previous_element, to_text)
self.assertEqual(g_tag.previous_sibling, to_text)
def test_replace_with_children(self):
@@ -724,15 +725,15 @@ class TestTreeModification(SoupTest):
# The extracted tag is now an orphan.
self.assertEqual(len(soup.body.contents), 2)
self.assertEqual(extracted.parent, None)
- self.assertEqual(extracted.previous, None)
- self.assertEqual(extracted.next.next, None)
+ self.assertEqual(extracted.previous_element, None)
+ self.assertEqual(extracted.next_element.next_element, None)
# The gap where the extracted tag used to be has been mended.
content_1 = soup.find(text="Some content. ")
content_2 = soup.find(text=" More content.")
- self.assertEquals(content_1.next, content_2)
+ self.assertEquals(content_1.next_element, content_2)
self.assertEquals(content_1.next_sibling, content_2)
- self.assertEquals(content_2.previous, content_1)
+ self.assertEquals(content_2.previous_element, content_1)
self.assertEquals(content_2.previous_sibling, content_1)
def test_clear(self):
@@ -785,11 +786,17 @@ class TestElementObjects(SoupTest):
self.assertEqual(soup.a, None)
self.assertEqual(soup.aTag, None)
- def test_has_key(self):
- """has_key() checks for the presence of an attribute."""
+ def test_has_attr(self):
+ """has_attr() checks for the presence of an attribute.
+
+ Please note note: has_attr() is different from
+ __in__. has_attr() checks the tag's attributes and __in__
+ checks the tag's chidlren.
+ """
soup = self.soup("<foo attr='bar'>")
- self.assertTrue(soup.foo.has_key('attr'))
- self.assertFalse(soup.foo.has_key('attr2'))
+ self.assertTrue(soup.foo.has_attr('attr'))
+ self.assertFalse(soup.foo.has_attr('attr2'))
+
def test_attributes_come_out_in_alphabetical_order(self):
markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'