summaryrefslogtreecommitdiff
path: root/beautifulsoup/element.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2023-01-25 15:04:42 -0500
committerLeonard Richardson <leonardr@segfault.org>2023-01-25 15:04:42 -0500
commit9b4eb2db8a830ff4522de4b744548039fdf5a2e6 (patch)
tree702868259331cb80ea710c56626105b92ee840d1 /beautifulsoup/element.py
parent02e7c7271829655781feb4dd5476814511096111 (diff)
Removed very copy of the code that was imported as part of the bzr import but not removed.
Diffstat (limited to 'beautifulsoup/element.py')
-rw-r--r--beautifulsoup/element.py855
1 files changed, 0 insertions, 855 deletions
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
deleted file mode 100644
index 61ed4ab..0000000
--- a/beautifulsoup/element.py
+++ /dev/null
@@ -1,855 +0,0 @@
-import re
-import types
-try:
- from htmlentitydefs import name2codepoint
-except ImportError:
- name2codepoint = {}
-from beautifulsoup.dammit import EntitySubstitution
-
-from util import isList
-
-DEFAULT_OUTPUT_ENCODING = "utf-8"
-
-
-class PageElement(object):
- """Contains the navigational information for some part of the page
- (either a tag or a piece of text)"""
-
- def setup(self, parent=None, previous=None):
- """Sets up the initial relations between this element and
- other elements."""
- self.parent = parent
- self.previous = previous
- self.next = None
- self.previousSibling = None
- self.nextSibling = None
- if self.parent and self.parent.contents:
- self.previousSibling = self.parent.contents[-1]
- self.previousSibling.nextSibling = self
-
- def replaceWith(self, replaceWith):
- oldParent = self.parent
- myIndex = self.parent.contents.index(self)
- if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
- # We're replacing this element with one of its siblings.
- index = self.parent.contents.index(replaceWith)
- if index and index < myIndex:
- # Furthermore, it comes before this element. That
- # means that when we extract it, the index of this
- # element will change.
- myIndex = myIndex - 1
- self.extract()
- oldParent.insert(myIndex, replaceWith)
-
- def extract(self):
- """Destructively rips this element out of the tree."""
- if self.parent:
- try:
- self.parent.contents.remove(self)
- except ValueError:
- pass
-
- #Find the two elements that would be next to each other if
- #this element (and any children) hadn't been parsed. Connect
- #the two.
- lastChild = self._lastRecursiveChild()
- nextElement = lastChild.next
-
- if self.previous:
- self.previous.next = nextElement
- if nextElement:
- nextElement.previous = self.previous
- self.previous = None
- lastChild.next = None
-
- self.parent = None
- if self.previousSibling:
- self.previousSibling.nextSibling = self.nextSibling
- if self.nextSibling:
- self.nextSibling.previousSibling = self.previousSibling
- self.previousSibling = self.nextSibling = None
- return self
-
- def _lastRecursiveChild(self):
- "Finds the last element beneath this object to be parsed."
- lastChild = self
- while hasattr(lastChild, 'contents') and lastChild.contents:
- lastChild = lastChild.contents[-1]
- return lastChild
-
- def insert(self, position, newChild):
- if (isinstance(newChild, basestring)
- or isinstance(newChild, unicode)) \
- and not isinstance(newChild, NavigableString):
- newChild = NavigableString(newChild)
-
- position = min(position, len(self.contents))
- if hasattr(newChild, 'parent') and newChild.parent != None:
- # We're 'inserting' an element that's already one
- # of this object's children.
- if newChild.parent == self:
- index = self.find(newChild)
- if index and index < position:
- # Furthermore we're moving it further down the
- # list of this object's children. That means that
- # when we extract this element, our target index
- # will jump down one.
- position = position - 1
- newChild.extract()
-
- newChild.parent = self
- previousChild = None
- if position == 0:
- newChild.previousSibling = None
- newChild.previous = self
- else:
- previousChild = self.contents[position-1]
- newChild.previousSibling = previousChild
- newChild.previousSibling.nextSibling = newChild
- newChild.previous = previousChild._lastRecursiveChild()
- if newChild.previous:
- newChild.previous.next = newChild
-
- newChildsLastElement = newChild._lastRecursiveChild()
-
- if position >= len(self.contents):
- newChild.nextSibling = None
-
- parent = self
- parentsNextSibling = None
- while not parentsNextSibling:
- parentsNextSibling = parent.nextSibling
- parent = parent.parent
- if not parent: # This is the last element in the document.
- break
- if parentsNextSibling:
- newChildsLastElement.next = parentsNextSibling
- else:
- newChildsLastElement.next = None
- else:
- nextChild = self.contents[position]
- newChild.nextSibling = nextChild
- if newChild.nextSibling:
- newChild.nextSibling.previousSibling = newChild
- newChildsLastElement.next = nextChild
-
- if newChildsLastElement.next:
- newChildsLastElement.next.previous = newChildsLastElement
- self.contents.insert(position, newChild)
-
- def append(self, tag):
- """Appends the given tag to the contents of this tag."""
- self.insert(len(self.contents), tag)
-
- def find_next(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
- appears after this Tag in the document."""
- return self._findOne(self.find_all_next, name, attrs, text, **kwargs)
- findNext = find_next # BS3
-
- def find_all_next(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns all items that match the given criteria and appear
- after this Tag in the document."""
- return self._find_all(name, attrs, text, limit, self.next_elements,
- **kwargs)
- findAllNext = find_all_next # BS3
-
- def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears after this Tag in the document."""
- return self._findOne(self.find_next_siblings, name, attrs, text,
- **kwargs)
- findNextSibling = find_next_sibling # BS3
-
- def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns the siblings of this Tag that match the given
- criteria and appear after this Tag in the document."""
- return self._find_all(name, attrs, text, limit,
- self.next_siblings, **kwargs)
- findNextSiblings = find_next_siblings # BS3
- fetchNextSiblings = find_next_siblings # BS2
-
- def find_previous(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
- appears before this Tag in the document."""
- return self._findOne(
- self.find_all_previous, name, attrs, text, **kwargs)
- findPrevious = find_previous # BS3
-
- def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns all items that match the given criteria and appear
- before this Tag in the document."""
- return self._find_all(name, attrs, text, limit, self.previous_elements,
- **kwargs)
- findAllPrevious = find_all_previous # BS3
- fetchPrevious = find_all_previous # BS2
-
- def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears before this Tag in the document."""
- return self._findOne(self.find_previous_siblings, name, attrs, text,
- **kwargs)
- findPreviousSibling = find_previous_sibling # BS3
-
- def find_previous_siblings(self, name=None, attrs={}, text=None,
- limit=None, **kwargs):
- """Returns the siblings of this Tag that match the given
- criteria and appear before this Tag in the document."""
- return self._find_all(name, attrs, text, limit,
- self.previous_siblings, **kwargs)
- findPreviousSiblings = find_previous_siblings # BS3
- fetchPreviousSiblings = find_previous_siblings # BS2
-
- def find_parent(self, name=None, attrs={}, **kwargs):
- """Returns the closest parent of this Tag that matches the given
- criteria."""
- # NOTE: We can't use _findOne because findParents takes a different
- # set of arguments.
- r = None
- l = self.find_parents(name, attrs, 1)
- if l:
- r = l[0]
- return r
- findParent = find_parent # BS3
-
- def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
- """Returns the parents of this Tag that match the given
- criteria."""
-
- return self._find_all(name, attrs, None, limit, self.parents,
- **kwargs)
- findParents = find_parents # BS3
- fetchParents = find_parents # BS2
-
- #These methods do the real heavy lifting.
-
- def _findOne(self, method, name, attrs, text, **kwargs):
- r = None
- l = method(name, attrs, text, 1, **kwargs)
- if l:
- r = l[0]
- return r
-
- def _find_all(self, name, attrs, text, limit, generator, **kwargs):
- "Iterates over a generator looking for things that match."
-
- if isinstance(name, SoupStrainer):
- strainer = name
- else:
- # Build a SoupStrainer
- strainer = SoupStrainer(name, attrs, text, **kwargs)
- results = ResultSet(strainer)
- while True:
- try:
- i = generator.next()
- except StopIteration:
- break
- if i:
- found = strainer.search(i)
- if found:
- results.append(found)
- if limit and len(results) >= limit:
- break
- return results
-
- #These generators can be used to navigate starting from both
- #NavigableStrings and Tags.
- @property
- def next_elements(self):
- i = self
- while i:
- i = i.next
- yield i
-
- @property
- def next_siblings(self):
- i = self
- while i:
- i = i.nextSibling
- yield i
-
- @property
- def previous_elements(self):
- i = self
- while i:
- i = i.previous
- yield i
-
- @property
- def previous_siblings(self):
- i = self
- while i:
- i = i.previousSibling
- yield i
-
- @property
- def parents(self):
- i = self
- while i:
- i = i.parent
- yield i
-
- # Old non-property versions of the generators, for backwards
- # compatibility with BS3.
- def nextGenerator(self):
- return self.next_elements
-
- def nextSiblingGenerator(self):
- return self.next_siblings
-
- def previousGenerator(self):
- return self.previous_elements
-
- def previousSiblingGenerator(self):
- return self.previous_siblings
-
- def parentGenerator(self):
- return self.parents
-
- # Utility methods
- def substituteEncoding(self, str, encoding=None):
- encoding = encoding or "utf-8"
- return str.replace("%SOUP-ENCODING%", encoding)
-
- def toEncoding(self, s, encoding=None):
- """Encodes an object to a string in some encoding, or to Unicode.
- ."""
- if isinstance(s, unicode):
- if encoding:
- s = s.encode(encoding)
- elif isinstance(s, str):
- if encoding:
- s = s.encode(encoding)
- else:
- s = unicode(s)
- else:
- if encoding:
- s = self.toEncoding(str(s), encoding)
- else:
- s = unicode(s)
- return s
-
-class NavigableString(unicode, PageElement):
-
- PREFIX = ''
- SUFFIX = ''
-
- def __new__(cls, value):
- """Create a new NavigableString.
-
- When unpickling a NavigableString, this method is called with
- the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
- passed in to the superclass's __new__ or the superclass won't know
- how to handle non-ASCII characters.
- """
- if isinstance(value, unicode):
- return unicode.__new__(cls, value)
- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
-
- def __getnewargs__(self):
- return (unicode(self),)
-
- def __getattr__(self, attr):
- """text.string gives you text. This is for backwards
- compatibility for Navigable*String, but for CData* it lets you
- get the string without the CData wrapper."""
- if attr == 'string':
- return self
- else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-
- def output_ready(self, substitute_html_entities=False):
- if substitute_html_entities:
- output = EntitySubstitution.substitute_html(self)
- else:
- output = self
- return self.PREFIX + output + self.SUFFIX
-
-
-class CData(NavigableString):
-
- PREFIX = u'<![CDATA['
- SUFFIX = u']]>'
-
-
-class ProcessingInstruction(NavigableString):
-
- PREFIX = u'<?'
- SUFFIX = u'?>'
-
-
-class Comment(NavigableString):
-
- PREFIX = u'<!--'
- SUFFIX = u'-->'
-
-class Declaration(NavigableString):
- PREFIX = u'<!'
- SUFFIX = u'!>'
-
-
-class Doctype(NavigableString):
-
- @classmethod
- def for_name_and_ids(cls, name, pub_id, system_id):
- value = name
- if pub_id is not None:
- value += ' PUBLIC "%s"' % pub_id
- if system_id is not None:
- value += ' SYSTEM "%s"' % system_id
-
- return Doctype(value)
-
- PREFIX = u'<!DOCTYPE '
- SUFFIX = u'>'
-
-
-class Tag(PageElement):
-
- """Represents a found HTML tag with its attributes and contents."""
-
- def __init__(self, parser, builder, name, attrs=None, parent=None,
- previous=None):
- "Basic constructor."
-
- # We don't actually store the parser object: that lets extracted
- # chunks be garbage-collected.
- self.parserClass = parser.__class__
- self.name = name
- if attrs == None:
- attrs = {}
- else:
- attrs = dict(attrs)
- self.attrs = attrs
- self.contents = []
- self.setup(parent, previous)
- self.hidden = False
-
- # Set up any substitutions, such as the charset in a META tag.
- self.contains_substitutions = builder.set_up_substitutions(self)
-
- self.can_be_empty_element = builder.can_be_empty_element(name)
-
- @property
- def is_empty_element(self):
- """Is this tag an empty-element tag? (aka a self-closing tag)
-
- A tag that has contents is never an empty-element tag.
-
- A tag that has no contents may or may not be an empty-element
- tag. It depends on the builder used to create the tag. If the
- builder has a designated list of empty-element tags, then only
- a tag whose name shows up in that list is considered an
- empty-element tag.
-
- If the builder has no designated list of empty-element tags,
- then any tag with no contents is an empty-element tag.
- """
- return len(self.contents) == 0 and self.can_be_empty_element
- isSelfClosing = is_empty_element # BS3
-
-
- @property
- def string(self):
- """Convenience property to get the single string within this tag.
-
- :Return: If this tag has a single string child, return value
- is that string. If this tag has no children, or more than one
- child, return value is None. If this tag has one child tag,
- return value is the 'string' attribute of the child tag,
- recursively.
- """
- if len(self.contents) != 1:
- return None
- child = self.contents[0]
- if isinstance(child, NavigableString):
- return child
- return child.string
-
- def get(self, key, default=None):
- """Returns the value of the 'key' attribute for the tag, or
- the value given for 'default' if it doesn't have that
- attribute."""
- return self.attrs.get(key, default)
-
- def has_key(self, key):
- return self.attrs.has_key(key)
-
- def __getitem__(self, key):
- """tag[key] returns the value of the 'key' attribute for the tag,
- and throws an exception if it's not there."""
- return self.attrs[key]
-
- def __iter__(self):
- "Iterating over a tag iterates over its contents."
- return iter(self.contents)
-
- def __len__(self):
- "The length of a tag is the length of its list of contents."
- return len(self.contents)
-
- def __contains__(self, x):
- return x in self.contents
-
- def __nonzero__(self):
- "A tag is non-None even if it has no contents."
- return True
-
- def __setitem__(self, key, value):
- """Setting tag[key] sets the value of the 'key' attribute for the
- tag."""
- self.attrs[key] = value
-
- def __delitem__(self, key):
- "Deleting tag[key] deletes all 'key' attributes for the tag."
- if self.attrs.has_key(key):
- del self.attrs[key]
-
- def __call__(self, *args, **kwargs):
- """Calling a tag like a function is the same as calling its
- find_all() method. Eg. tag('a') returns a list of all the A tags
- found within this tag."""
- return apply(self.find_all, args, kwargs)
-
- def __getattr__(self, tag):
- #print "Getattr %s.%s" % (self.__class__, tag)
- if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
- return self.find(tag[:-3])
- elif tag.find('__') != 0:
- return self.find(tag)
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
-
- def __eq__(self, other):
- """Returns true iff this tag has the same name, the same attributes,
- and the same contents (recursively) as the given tag.
-
- XXX: right now this will return false if two tags have the
- same attributes in a different order. Should this be fixed?"""
- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
- return False
- for i in range(0, len(self.contents)):
- if self.contents[i] != other.contents[i]:
- return False
- return True
-
- def __ne__(self, other):
- """Returns true iff this tag is not identical to the other tag,
- as defined in __eq__."""
- return not self == other
-
- def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- """Renders this tag as a string."""
- return self.encode(encoding)
-
- def __unicode__(self):
- return self.decode()
-
- def __str__(self):
- return self.encode()
-
- def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
- indent_level=None, substitute_html_entities=False):
- return self.decode(indent_level, encoding,
- substitute_html_entities).encode(encoding)
-
- def decode(self, indent_level=None,
- eventual_encoding=DEFAULT_OUTPUT_ENCODING,
- substitute_html_entities=False):
- """Returns a Unicode representation of this tag and its contents.
-
- :param eventual_encoding: The tag is destined to be
- encoded into this encoding. This method is _not_
- responsible for performing that encoding. This information
- is passed in so that it can be substituted in if the
- document contains a <META> tag that mentions the document's
- encoding.
- """
- attrs = []
- if self.attrs:
- for key, val in sorted(self.attrs.items()):
- if val is None:
- decoded = key
- else:
- if not isinstance(val, basestring):
- val = str(val)
- if (self.contains_substitutions
- and eventual_encoding is not None
- and '%SOUP-ENCODING%' in val):
- val = self.substituteEncoding(val, eventual_encoding)
-
- decoded = (key + '='
- + EntitySubstitution.substitute_xml(val, True))
- attrs.append(decoded)
- close = ''
- closeTag = ''
- if self.is_empty_element:
- close = ' /'
- else:
- closeTag = '</%s>' % self.name
-
- pretty_print = (indent_level is not None)
- if pretty_print:
- space = (' ' * (indent_level-1))
- indent_contents = indent_level + 1
- else:
- space = ''
- indent_contents = None
- contents = self.decode_contents(
- indent_contents, eventual_encoding, substitute_html_entities)
-
- if self.hidden:
- # This is the 'document root' object.
- s = contents
- else:
- s = []
- attributeString = ''
- if attrs:
- attributeString = ' ' + ' '.join(attrs)
- if pretty_print:
- s.append(space)
- s.append('<%s%s%s>' % (self.name, attributeString, close))
- if pretty_print:
- s.append("\n")
- s.append(contents)
- if pretty_print and contents and contents[-1] != "\n":
- s.append("\n")
- if pretty_print and closeTag:
- s.append(space)
- s.append(closeTag)
- if pretty_print and closeTag and self.nextSibling:
- s.append("\n")
- s = ''.join(s)
- return s
-
- def decompose(self):
- """Recursively destroys the contents of this tree."""
- contents = [i for i in self.contents]
- for i in contents:
- if isinstance(i, Tag):
- i.decompose()
- else:
- i.extract()
- self.extract()
-
- def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return self.encode(encoding, True)
-
- def decode_contents(self, indent_level=None,
- eventual_encoding=DEFAULT_OUTPUT_ENCODING,
- substitute_html_entities=False):
- """Renders the contents of this tag as a Unicode string.
-
- :param eventual_encoding: The tag is destined to be
- encoded into this encoding. This method is _not_
- responsible for performing that encoding. This information
- is passed in so that it can be substituted in if the
- document contains a <META> tag that mentions the document's
- encoding.
- """
- pretty_print = (indent_level is not None)
- s=[]
- for c in self:
- text = None
- if isinstance(c, NavigableString):
- text = c.output_ready(substitute_html_entities)
- elif isinstance(c, Tag):
- s.append(c.decode(indent_level, eventual_encoding,
- substitute_html_entities))
- if text and indent_level:
- text = text.strip()
- if text:
- if pretty_print:
- s.append(" " * (indent_level-1))
- s.append(text)
- if pretty_print:
- s.append("\n")
- return ''.join(s)
-
- #Soup methods
-
- def find(self, name=None, attrs={}, recursive=True, text=None,
- **kwargs):
- """Return only the first child of this Tag matching the given
- criteria."""
- r = None
- l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
- if l:
- r = l[0]
- return r
- findChild = find
-
- def find_all(self, name=None, attrs={}, recursive=True, text=None,
- limit=None, **kwargs):
- """Extracts a list of Tag objects that match the given
- criteria. You can specify the name of the Tag and any
- attributes you want the Tag to have.
-
- The value of a key-value pair in the 'attrs' map can be a
- string, a list of strings, a regular expression object, or a
- callable that takes a string and returns whether or not the
- string matches for some custom definition of 'matches'. The
- same is true of the tag name."""
- generator = self.recursive_children
- if not recursive:
- generator = self.children
- return self._find_all(name, attrs, text, limit, generator, **kwargs)
- findAll = find_all # BS3
- findChildren = find_all # BS2
-
- #Generator methods
- @property
- def children(self):
- for i in range(0, len(self.contents)):
- yield self.contents[i]
- raise StopIteration
-
- @property
- def recursive_children(self):
- if not len(self.contents):
- raise StopIteration
- stopNode = self._lastRecursiveChild().next
- current = self.contents[0]
- while current is not stopNode:
- yield current
- current = current.next
-
- # Old names for backwards compatibility
- def childGenerator(self):
- return self.children
-
- def recursiveChildGenerator(self):
- return self.recursive_children
-
-
-# Next, a couple classes to represent queries and their results.
-class SoupStrainer(object):
- """Encapsulates a number of ways of matching a markup element (tag or
- text)."""
-
- def __init__(self, name=None, attrs={}, text=None, **kwargs):
- self.name = name
- if isinstance(attrs, basestring):
- kwargs['class'] = attrs
- attrs = None
- if kwargs:
- if attrs:
- attrs = attrs.copy()
- attrs.update(kwargs)
- else:
- attrs = kwargs
- self.attrs = attrs
- self.text = text
-
- def __str__(self):
- if self.text:
- return self.text
- else:
- return "%s|%s" % (self.name, self.attrs)
-
- def searchTag(self, markupName=None, markupAttrs={}):
- found = None
- markup = None
- if isinstance(markupName, Tag):
- markup = markupName
- markupAttrs = markup
- callFunctionWithTagData = callable(self.name) \
- and not isinstance(markupName, Tag)
-
- if (not self.name) \
- or callFunctionWithTagData \
- or (markup and self._matches(markup, self.name)) \
- or (not markup and self._matches(markupName, self.name)):
- if callFunctionWithTagData:
- match = self.name(markupName, markupAttrs)
- else:
- match = True
- markupAttrMap = None
- for attr, matchAgainst in self.attrs.items():
- if not markupAttrMap:
- if hasattr(markupAttrs, 'get'):
- markupAttrMap = markupAttrs
- else:
- markupAttrMap = {}
- for k,v in markupAttrs:
- markupAttrMap[k] = v
- attrValue = markupAttrMap.get(attr)
- if not self._matches(attrValue, matchAgainst):
- match = False
- break
- if match:
- if markup:
- found = markup
- else:
- found = markupName
- return found
-
- def search(self, markup):
- #print 'looking for %s in %s' % (self, markup)
- found = None
- # If given a list of items, scan it for a text element that
- # matches.
- if isList(markup) and not isinstance(markup, Tag):
- for element in markup:
- if isinstance(element, NavigableString) \
- and self.search(element):
- found = element
- break
- # If it's a Tag, make sure its name or attributes match.
- # Don't bother with Tags if we're searching for text.
- elif isinstance(markup, Tag):
- if not self.text:
- found = self.searchTag(markup)
- # If it's text, make sure the text matches.
- elif isinstance(markup, NavigableString) or \
- isinstance(markup, basestring):
- if self._matches(markup, self.text):
- found = markup
- else:
- raise Exception, "I don't know how to match against a %s" \
- % markup.__class__
- return found
-
- def _matches(self, markup, matchAgainst):
- #print "Matching %s against %s" % (markup, matchAgainst)
- result = False
- if matchAgainst == True and type(matchAgainst) == types.BooleanType:
- result = markup != None
- elif callable(matchAgainst):
- result = matchAgainst(markup)
- else:
- #Custom match methods take the tag as an argument, but all
- #other ways of matching match the tag name as a string.
- if isinstance(markup, Tag):
- markup = markup.name
- if markup is not None and not isinstance(markup, basestring):
- markup = unicode(markup)
- #Now we know that chunk is either a string, or None.
- if hasattr(matchAgainst, 'match'):
- # It's a regexp object.
- result = markup and matchAgainst.search(markup)
- elif (isList(matchAgainst)
- and (markup is not None
- or not isinstance(matchAgainst, basestring))):
- result = markup in matchAgainst
- elif hasattr(matchAgainst, 'items'):
- result = markup.has_key(matchAgainst)
- elif matchAgainst and isinstance(markup, basestring):
- if isinstance(markup, unicode):
- matchAgainst = unicode(matchAgainst)
- else:
- matchAgainst = str(matchAgainst)
-
- if not result:
- result = matchAgainst == markup
- return result
-
-
-class ResultSet(list):
- """A ResultSet is just a list that keeps track of the SoupStrainer
- that created it."""
- def __init__(self, source):
- list.__init__([])
- self.source = source