8 files changed, 0 insertions, 2218 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py
deleted file mode 100644
index 518e95f..0000000
--- a/beautifulsoup/__init__.py
+++ /dev/null
@@ -1,295 +0,0 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup uses a plug-in parser to parse a (possibly invalid) XML
-or HTML document into a tree representation. The parser does the work
-of building a parse tree, and Beautiful Soup provides provides methods
-and Pythonic idioms that make it easy to navigate, search, and modify
-the parse tree.
-
-Beautiful Soup works with Python 2.5 and up. To get it to work, you
-must install either lxml or html5lib.
-
-For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
-http://www.crummy.com/software/BeautifulSoup/documentation.html
-
-Here, have some legalese:
-
-Copyright (c) 2004-2011, Leonard Richardson
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-  * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
-  * Redistributions in binary form must reproduce the above
-    copyright notice, this list of conditions and the following
-    disclaimer in the documentation and/or other materials provided
-    with the distribution.
-
-  * Neither the name of the the Beautiful Soup Consortium and All
-    Night Kosher Bakery nor the names of its contributors may be
-    used to endorse or promote products derived from this software
-    without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
-
-"""
-from __future__ import generators
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.0.0"
-__copyright__ = "Copyright (c) 2004-2011 Leonard Richardson"
-__license__ = "New-style BSD"
-
-__all__ = ['BeautifulSoup']
-
-import re
-
-from util import isList, buildSet
-from builder import builder_registry
-from dammit import UnicodeDammit
-from element import DEFAULT_OUTPUT_ENCODING, NavigableString, Tag
-
-
-class BeautifulSoup(Tag):
-    """
-    This class defines the basic interface called by the tree builders.
-
-    These methods will be called by the parser:
-      reset()
-      feed(markup)
-
-    The tree builder may call these methods from its feed() implementation:
-      handle_starttag(name, attrs) # See note about return value
-      handle_endtag(name)
-      handle_data(data) # Appends to the current data node
-      endData(containerClass=NavigableString) # Ends the current data node
-
-    No matter how complicated the underlying parser is, you should be
-    able to build a tree using 'start tag' events, 'end tag' events,
-    'data' events, and "done with data" events.
-
-    If you encounter an empty-element tag (aka a self-closing tag,
-    like HTML's <br> tag), call handle_starttag and then
-    handle_endtag.
-    """
-    ROOT_TAG_NAME = u'[document]'
-
-    # If the end-user gives no indication which tree builder they
-    # want, look for one with these features.
-    DEFAULT_BUILDER_FEATURES = ['html']
-
-    # Used when determining whether a text node is all whitespace and
-    # can be replaced with a single space. A text node that contains
-    # fancy Unicode spaces (usually non-breaking) should be left
-    # alone.
-    STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
-
-    def __init__(self, markup="", features=None, builder=None,
-                 parse_only=None, from_encoding=None):
-        """The Soup object is initialized as the 'root tag', and the
-        provided markup (which can be a string or a file-like object)
-        is fed into the underlying parser."""
-
-        if builder is None:
-            if isinstance(features, basestring):
-                features = [features]
-            if features is None or len(features) == 0:
-                features = self.DEFAULT_BUILDER_FEATURES
-            builder_class = builder_registry.lookup(*features)
-            if builder_class is None:
-                raise ValueError(
-                    "Couldn't find a tree builder with the features you "
-                    "requested: %s. Do you need to install a parser library?"
-                    % ",".join(features))
-            builder = builder_class()
-        self.builder = builder
-        self.is_xml = builder.is_xml
-        self.builder.soup = self
-
-        self.parse_only = parse_only
-
-        self.reset()
-
-        if hasattr(markup, 'read'):        # It's a file-type object.
-            markup = markup.read()
-        self.markup, self.original_encoding, self.declared_html_encoding = (
-            self.builder.prepare_markup(markup, from_encoding))
-
-        try:
-            self._feed()
-        except StopParsing:
-            pass
-
-        # Clear out the markup and the builder so they can be CGed.
-        self.markup = None
-        self.builder.soup = None
-        self.builder = None
-
-    def _feed(self):
-        # Convert the document to Unicode.
-        self.builder.reset()
-
-        self.builder.feed(self.markup)
-        # Close out any unfinished strings and close all the open tags.
-        self.endData()
-        while self.currentTag.name != self.ROOT_TAG_NAME:
-            self.popTag()
-
-    def reset(self):
-        Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
-        self.hidden = 1
-        self.builder.reset()
-        self.currentData = []
-        self.currentTag = None
-        self.tagStack = []
-        self.pushTag(self)
-
-    def popTag(self):
-        tag = self.tagStack.pop()
-        #print "Pop", tag.name
-        if self.tagStack:
-            self.currentTag = self.tagStack[-1]
-        return self.currentTag
-
-    def pushTag(self, tag):
-        #print "Push", tag.name
-        if self.currentTag:
-            self.currentTag.contents.append(tag)
-        self.tagStack.append(tag)
-        self.currentTag = self.tagStack[-1]
-
-    def endData(self, containerClass=NavigableString):
-        if self.currentData:
-            currentData = u''.join(self.currentData)
-            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
-                not buildSet([tag.name for tag in self.tagStack]).intersection(
-                    self.builder.preserve_whitespace_tags)):
-                if '\n' in currentData:
-                    currentData = '\n'
-                else:
-                    currentData = ' '
-            self.currentData = []
-            if self.parse_only and len(self.tagStack) <= 1 and \
-                   (not self.parse_only.text or \
-                    not self.parse_only.search(currentData)):
-                return
-            o = containerClass(currentData)
-            self.object_was_parsed(o)
-
-    def object_was_parsed(self, o):
-        """Add an object to the parse tree."""
-        o.setup(self.currentTag, self.previous)
-        if self.previous:
-            self.previous.next = o
-        self.previous = o
-        self.currentTag.contents.append(o)
-
-
-    def _popToTag(self, name, inclusivePop=True):
-        """Pops the tag stack up to and including the most recent
-        instance of the given tag. If inclusivePop is false, pops the tag
-        stack up to but *not* including the most recent instqance of
-        the given tag."""
-        #print "Popping to %s" % name
-        if name == self.ROOT_TAG_NAME:
-            return
-
-        numPops = 0
-        mostRecentTag = None
-        for i in range(len(self.tagStack)-1, 0, -1):
-            if name == self.tagStack[i].name:
-                numPops = len(self.tagStack)-i
-                break
-        if not inclusivePop:
-            numPops = numPops - 1
-
-        for i in range(0, numPops):
-            mostRecentTag = self.popTag()
-        return mostRecentTag
-
-    def handle_starttag(self, name, attrs):
-        """Push a start tag on to the stack.
-
-        If this method returns None, the tag was rejected by the
-        SoupStrainer. You should proceed as if the tag had not occured
-        in the document. For instance, if this was a self-closing tag,
-        don't call handle_endtag.
-        """
-
-        #print "Start tag %s: %s" % (name, attrs)
-        self.endData()
-
-        if (self.parse_only and len(self.tagStack) <= 1
-            and (self.parse_only.text
-                 or not self.parse_only.searchTag(name, attrs))):
-            return None
-
-        tag = Tag(self, self.builder, name, attrs, self.currentTag,
-                  self.previous)
-        if tag is None:
-            return tag
-        if self.previous:
-            self.previous.next = tag
-        self.previous = tag
-        self.pushTag(tag)
-        return tag
-
-
-    def handle_endtag(self, name):
-        #print "End tag: " + name
-        self.endData()
-        self._popToTag(name)
-
-    def handle_data(self, data):
-        self.currentData.append(data)
-
-    def decode(self, pretty_print=False,
-               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               substitute_html_entities=False):
-        """Returns a string or Unicode representation of this document.
-        To get Unicode, pass None for encoding."""
-        if self.is_xml:
-            # Print the XML declaration
-            encoding_part = ''
-            if eventual_encoding != None:
-                encoding_part = ' encoding="%s"' % eventual_encoding
-            prefix = u'<?xml version="1.0"%s>\n' % encoding_part
-        else:
-            prefix = u''
-        if not pretty_print:
-            indent_level = None
-        else:
-            indent_level = 0
-        return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding,
-            substitute_html_entities)
-
-
-class StopParsing(Exception):
-    pass
-
-
-#By default, act as an HTML pretty-printer.
-if __name__ == '__main__':
-    import sys
-    soup = BeautifulSoup(sys.stdin)
-    print soup.prettify()
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
deleted file mode 100644
index 10c6b7f..0000000
--- a/beautifulsoup/builder/__init__.py
+++ /dev/null
@@ -1,259 +0,0 @@
-from collections import defaultdict
-import re
-import sys
-
-__all__ = [
-    'HTMLTreeBuilder',
-    'SAXTreeBuilder',
-    'TreeBuilder',
-    'TreeBuilderRegistry',
-    ]
-
-# Some useful features for a TreeBuilder to have.
-FAST = 'fast'
-PERMISSIVE = 'permissive'
-XML = 'xml'
-HTML = 'html'
-HTML_5 = 'html5'
-
-
-class TreeBuilderRegistry(object):
-
-    def __init__(self):
-        self.builders_for_feature = defaultdict(list)
-        self.builders = []
-
-    def register(self, treebuilder_class):
-        """Register a treebuilder based on its advertised features."""
-        for feature in treebuilder_class.features:
-            self.builders_for_feature[feature].insert(0, treebuilder_class)
-        self.builders.insert(0, treebuilder_class)
-
-    def lookup(self, *features):
-        if len(self.builders) == 0:
-            # There are no builders at all.
-            return None
-
-        if len(features) == 0:
-            # They didn't ask for any features. Give them the most
-            # recently registered builder.
-            return self.builders[0]
-
-        # Go down the list of features in order, and eliminate any builders
-        # that don't match every feature.
-        features = list(features)
-        features.reverse()
-        candidates = None
-        candidate_set = None
-        while len(features) > 0:
-            feature = features.pop()
-            we_have_the_feature = self.builders_for_feature.get(feature, [])
-            if len(we_have_the_feature) > 0:
-                if candidates is None:
-                    candidates = we_have_the_feature
-                    candidate_set = set(candidates)
-                else:
-                    # Eliminate any candidates that don't have this feature.
-                    candidate_set = candidate_set.intersection(
-                        set(we_have_the_feature))
-
-        # The only valid candidates are the ones in candidate_set.
-        # Go through the original list of candidates and pick the first one
-        # that's in candidate_set.
-        if candidate_set is None:
-            return None
-        for candidate in candidates:
-            if candidate in candidate_set:
-                return candidate
-        return None
-
-# The BeautifulSoup class will take feature lists from developers and use them
-# to look up builders in this registry.
-builder_registry = TreeBuilderRegistry()
-
-
-class TreeBuilder(object):
-    """Turn a document into a Beautiful Soup object tree."""
-
-    features = []
-
-    is_xml = False
-    preserve_whitespace_tags = set()
-    empty_element_tags = None # A tag will be considered an empty-element
-                              # tag when and only when it has no contents.
-
-    def __init__(self):
-        self.soup = None
-
-    def reset(self):
-        pass
-
-    def can_be_empty_element(self, tag_name):
-        """Might a tag with this name be an empty-element tag?
-
-        The final markup may or may not actually present this tag as
-        self-closing.
-
-        For instance: an HTMLBuilder does not consider a <p> tag to be
-        an empty-element tag (it's not in
-        HTMLBuilder.empty_element_tags). This means an empty <p> tag
-        will be presented as "<p></p>", not "<p />".
-
-        The default implementation has no opinion about which tags are
-        empty-element tags, so a tag will be presented as an
-        empty-element tag if and only if it has no contents.
-        "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
-        be left alone.
-        """
-        if self.empty_element_tags is None:
-            return True
-        return tag_name in self.empty_element_tags
-
-    def feed(self, markup):
-        raise NotImplementedError()
-
-    def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
-        return markup, None, None
-
-    def test_fragment_to_document(self, fragment):
-        """Wrap an HTML fragment to make it look like a document.
-
-        Different parsers do this differently. For instance, lxml
-        introduces an empty <head> tag, and html5lib
-        doesn't. Abstracting this away lets us write simple tests
-        which run HTML fragments through the parser and compare the
-        results against other HTML fragments.
-
-        This method should not be used outside of tests.
-        """
-        return fragment
-
-    def set_up_substitutions(self, tag):
-        pass
-
-
-class SAXTreeBuilder(TreeBuilder):
-    """A Beautiful Soup treebuilder that listens for SAX events."""
-
-    def feed(self, markup):
-        raise NotImplementedError()
-
-    def close(self):
-        pass
-
-    def startElement(self, name, attrs):
-        attrs = dict((key[1], value) for key, value in attrs.items())
-        #print "Start %s, %r" % (name, attrs)
-        self.soup.handle_starttag(name, attrs)
-
-    def endElement(self, name):
-        #print "End %s" % name
-        self.soup.handle_endtag(name)
-
-    def startElementNS(self, nsTuple, nodeName, attrs):
-        # Throw away (ns, nodeName) for now.
-        self.startElement(nodeName, attrs)
-
-    def endElementNS(self, nsTuple, nodeName):
-        # Throw away (ns, nodeName) for now.
-        self.endElement(nodeName)
-        #handler.endElementNS((ns, node.nodeName), node.nodeName)
-
-    def startPrefixMapping(self, prefix, nodeValue):
-        # Ignore the prefix for now.
-        pass
-
-    def endPrefixMapping(self, prefix):
-        # Ignore the prefix for now.
-        # handler.endPrefixMapping(prefix)
-        pass
-
-    def characters(self, content):
-        self.soup.handle_data(content)
-
-    def startDocument(self):
-        pass
-
-    def endDocument(self):
-        pass
-
-
-class HTMLTreeBuilder(TreeBuilder):
-    """This TreeBuilder knows facts about HTML.
-
-    Such as which tags are empty-element tags.
-    """
-
-    preserve_whitespace_tags = set(['pre', 'textarea'])
-    empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
-                              'spacer', 'link', 'frame', 'base'])
-
-    # Used by set_up_substitutions to detect the charset in a META tag
-    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
-
-    def set_up_substitutions(self, tag):
-        if tag.name != 'meta':
-            return False
-
-        http_equiv = tag.get('http-equiv')
-        content = tag.get('content')
-
-        if (http_equiv is not None
-            and content is not None
-            and http_equiv.lower() == 'content-type'):
-            # This is an interesting meta tag.
-            match = self.CHARSET_RE.search(content)
-            if match:
-                if (self.soup.declared_html_encoding is not None or
-                    self.soup.original_encoding == self.soup.from_encoding):
-                    # An HTML encoding was sniffed while converting
-                    # the document to Unicode, or an HTML encoding was
-                    # sniffed during a previous pass through the
-                    # document, or an encoding was specified
-                    # explicitly and it worked. Rewrite the meta tag.
-                    def rewrite(match):
-                        return match.group(1) + "%SOUP-ENCODING%"
-                    tag['content'] = self.CHARSET_RE.sub(rewrite, content)
-                    return True
-                else:
-                    # This is our first pass through the document.
-                    # Go through it again with the encoding information.
-                    new_charset = match.group(3)
-                    if (new_charset is not None
-                        and new_charset != self.soup.original_encoding):
-                        self.soup.declared_html_encoding = new_charset
-                        self.soup._feed(self.soup.declared_html_encoding)
-                        raise StopParsing
-                    pass
-        return False
-
-
-def register_treebuilders_from(module):
-    """Copy TreeBuilders from the given module into this module."""
-    # I'm fairly sure this is not the best way to do this.
-    this_module = sys.modules[__package__]
-    for name in module.__all__:
-        obj = getattr(module, name)
-
-        if issubclass(obj, TreeBuilder):
-            setattr(this_module, name, obj)
-            this_module.__all__.append(name)
-            # Register the builder while we're at it.
-            this_module.builder_registry.register(obj)
-
-# Builders are registered in reverse order of priority, so that custom
-# builder registrations will take precedence. In general, we want
-# html5lib to take precedence over lxml, because it's more reliable.
-try:
-    import _lxml
-    register_treebuilders_from(_lxml)
-except ImportError:
-    # They don't have lxml installed.
-    pass
-try:
-    import _html5lib
-    register_treebuilders_from(_html5lib)
-except ImportError:
-    # They don't have html5lib installed.
-    pass
diff --git a/beautifulsoup/builder/_html5lib.py b/beautifulsoup/builder/_html5lib.py
deleted file mode 100644
index f8a7a40..0000000
--- a/beautifulsoup/builder/_html5lib.py
+++ /dev/null
@@ -1,233 +0,0 @@
-__all__ = [
-    'HTML5TreeBuilder',
-    ]
-
-from beautifulsoup.builder import (
-    PERMISSIVE,
-    HTML,
-    HTML_5,
-    HTMLTreeBuilder,
-    )
-import html5lib
-from html5lib.constants import DataLossWarning
-import warnings
-from beautifulsoup.element import (
-    Comment,
-    Doctype,
-    NavigableString,
-    Tag,
-    )
-
-class HTML5TreeBuilder(HTMLTreeBuilder):
-    """Use html5lib to build a tree."""
-
-    features = ['html5lib', PERMISSIVE, HTML_5, HTML]
-
-    def prepare_markup(self, markup, user_specified_encoding):
-        # Store the user-specified encoding for use later on.
-        self.user_specified_encoding = user_specified_encoding
-        return markup, None, None
-
-    # These methods are defined by Beautiful Soup.
-    def feed(self, markup):
-        parser = html5lib.HTMLParser(tree=self.create_treebuilder)
-        doc = parser.parse(markup, encoding=self.user_specified_encoding)
-
-        # Set the character encoding detected by the tokenizer.
-        if isinstance(markup, unicode):
-            # We need to special-case this because html5lib sets
-            # charEncoding to UTF-8 if it gets Unicode input.
-            doc.original_encoding = None
-        else:
-            doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
-
-    def create_treebuilder(self, namespaceHTMLElements):
-        self.underlying_builder = TreeBuilderForHtml5lib(
-            self.soup, namespaceHTMLElements)
-        return self.underlying_builder
-
-    def test_fragment_to_document(self, fragment):
-        """See `TreeBuilder`."""
-        return u'<html><head></head><body>%s</body></html>' % fragment
-
-
-class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
-
-    def __init__(self, soup, namespaceHTMLElements):
-        self.soup = soup
-        if namespaceHTMLElements:
-            warnings.warn("namespaceHTMLElements not supported yet",
-                          DataLossWarning)
-        super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
-
-    def documentClass(self):
-        self.soup.reset()
-        return Element(self.soup, self.soup, None)
-
-    def insertDoctype(self, token):
-        name = token["name"]
-        publicId = token["publicId"]
-        systemId = token["systemId"]
-
-        doctype = Doctype.for_name_and_ids(name, publicId, systemId)
-        self.soup.object_was_parsed(doctype)
-
-    def elementClass(self, name, namespace):
-        if namespace is not None:
-            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
-        return Element(Tag(self.soup, self.soup.builder, name), self.soup, namespace)
-
-    def commentClass(self, data):
-        return TextNode(Comment(data), self.soup)
-
-    def fragmentClass(self):
-        self.soup = BeautifulSoup("")
-        self.soup.name = "[document_fragment]"
-        return Element(self.soup, self.soup, None)
-
-    def appendChild(self, node):
-        self.soup.insert(len(self.soup.contents), node.element)
-
-    def testSerializer(self, element):
-        return testSerializer(element)
-
-    def getDocument(self):
-        return self.soup
-
-    def getFragment(self):
-        return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
-
-class AttrList(object):
-    def __init__(self, element):
-        self.element = element
-        self.attrs = dict(self.element.attrs)
-    def __iter__(self):
-        return self.attrs.items().__iter__()
-    def __setitem__(self, name, value):
-        "set attr", name, value
-        self.element[name] = value
-    def items(self):
-        return self.attrs.items()
-    def keys(self):
-        return self.attrs.keys()
-    def __getitem__(self, name):
-        return self.attrs[name]
-    def __contains__(self, name):
-        return name in self.attrs.keys()
-
-
-class Element(html5lib.treebuilders._base.Node):
-    def __init__(self, element, soup, namespace):
-        html5lib.treebuilders._base.Node.__init__(self, element.name)
-        self.element = element
-        self.soup = soup
-        self.namespace = namespace
-
-    def _nodeIndex(self, node, refNode):
-        # Finds a node by identity rather than equality
-        for index in range(len(self.element.contents)):
-            if id(self.element.contents[index]) == id(refNode.element):
-                return index
-        return None
-
-    def appendChild(self, node):
-        if (node.element.__class__ == NavigableString and self.element.contents
-            and self.element.contents[-1].__class__ == NavigableString):
-            # Concatenate new text onto old text node
-            # (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...")
-            newStr = NavigableString(self.element.contents[-1]+node.element)
-
-            # Remove the old text node
-            # (Can't simply use .extract() by itself, because it fails if
-            # an equal text node exists within the parent node)
-            oldElement = self.element.contents[-1]
-            del self.element.contents[-1]
-            oldElement.parent = None
-            oldElement.extract()
-
-            self.element.insert(len(self.element.contents), newStr)
-        else:
-            self.element.insert(len(self.element.contents), node.element)
-            node.parent = self
-
-    def getAttributes(self):
-        return AttrList(self.element)
-
-    def setAttributes(self, attributes):
-        if attributes is not None and attributes != {}:
-            for name, value in attributes.items():
-                self.element[name] =  value
-            # The attributes may contain variables that need substitution.
-            # Call set_up_substitutions manually.
-            # The Tag constructor calls this method automatically,
-            # but html5lib creates a Tag object before setting up
-            # the attributes.
-            self.element.contains_substitutions = (
-                self.soup.builder.set_up_substitutions(
-                    self.element))
-    attributes = property(getAttributes, setAttributes)
-
-    def insertText(self, data, insertBefore=None):
-        text = TextNode(NavigableString(data), self.soup)
-        if insertBefore:
-            self.insertBefore(text, insertBefore)
-        else:
-            self.appendChild(text)
-
-    def insertBefore(self, node, refNode):
-        index = self._nodeIndex(node, refNode)
-        if (node.element.__class__ == NavigableString and self.element.contents
-            and self.element.contents[index-1].__class__ == NavigableString):
-            # (See comments in appendChild)
-            newStr = NavigableString(self.element.contents[index-1]+node.element)
-            oldNode = self.element.contents[index-1]
-            del self.element.contents[index-1]
-            oldNode.parent = None
-            oldNode.extract()
-
-            self.element.insert(index-1, newStr)
-        else:
-            self.element.insert(index, node.element)
-            node.parent = self
-
-    def removeChild(self, node):
-        index = self._nodeIndex(node.parent, node)
-        del node.parent.element.contents[index]
-        node.element.parent = None
-        node.element.extract()
-        node.parent = None
-
-    def reparentChildren(self, newParent):
-        while self.element.contents:
-            child = self.element.contents[0]
-            child.extract()
-            if isinstance(child, Tag):
-                newParent.appendChild(Element(child, self.soup, namespaces["html"]))
-            else:
-                newParent.appendChild(TextNode(child, self.soup))
-
-    def cloneNode(self):
-        node = Element(Tag(self.soup, self.soup.builder, self.element.name), self.soup, self.namespace)
-        for key,value in self.attributes:
-            node.attributes[key] = value
-        return node
-
-    def hasContent(self):
-        return self.element.contents
-
-    def getNameTuple(self):
-        if self.namespace == None:
-            return namespaces["html"], self.name
-        else:
-            return self.namespace, self.name
-
-    nameTuple = property(getNameTuple)
-
-class TextNode(Element):
-    def __init__(self, element, soup):
-        html5lib.treebuilders._base.Node.__init__(self, None)
-        self.element = element
-        self.soup = soup
-
-    def cloneNode(self):
-        raise NotImplementedError
diff --git a/beautifulsoup/builder/_lxml.py b/beautifulsoup/builder/_lxml.py
deleted file mode 100644
index 23ac485..0000000
--- a/beautifulsoup/builder/_lxml.py
+++ /dev/null
@@ -1,108 +0,0 @@
-__all__ = [
-    'LXMLTreeBuilderForXML',
-    'LXMLTreeBuilder',
-    ]
-
-from lxml import etree
-from beautifulsoup.element import Comment, Doctype
-from beautifulsoup.builder import (
-    FAST,
-    HTML,
-    HTMLTreeBuilder,
-    PERMISSIVE,
-    TreeBuilder,
-    XML)
-from beautifulsoup.dammit import UnicodeDammit
-import types
-
-LXML = 'lxml'
-
-class LXMLTreeBuilderForXML(TreeBuilder):
-    DEFAULT_PARSER_CLASS = etree.XMLParser
-
-    is_xml = True
-
-    # Well, it's permissive by XML parser standards.
-    features = [LXML, XML, FAST, PERMISSIVE]
-
-    @property
-    def default_parser(self):
-        # This can either return a parser object or a class, which
-        # will be instantiated with default arguments.
-        return etree.XMLParser(target=self, strip_cdata=False, recover=True)
-
-    def __init__(self, parser=None, empty_element_tags=None):
-        if empty_element_tags is not None:
-            self.empty_element_tags = set(empty_element_tags)
-        if parser is None:
-            # Use the default parser.
-            parser = self.default_parser
-        if callable(parser):
-            # Instantiate the parser with default arguments
-            parser = parser(target=self, strip_cdata=False)
-        self.parser = parser
-        self.soup = None
-
-    def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
-        """
-        :return: A 3-tuple (markup, original encoding, encoding
-        declared within markup).
-        """
-        if isinstance(markup, unicode):
-            return markup, None, None
-
-        try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, isHTML=True)
-        return (dammit.markup, dammit.original_encoding,
-                dammit.declared_html_encoding)
-
-    def feed(self, markup):
-        self.parser.feed(markup)
-        self.parser.close()
-
-    def close(self):
-        pass
-
-    def start(self, name, attrs):
-        self.soup.handle_starttag(name, attrs)
-
-    def end(self, name):
-        self.soup.endData()
-        completed_tag = self.soup.tagStack[-1]
-        self.soup.handle_endtag(name)
-
-    def pi(self, target, data):
-        pass
-
-    def data(self, content):
-        self.soup.handle_data(content)
-
-    def doctype(self, name, pubid, system):
-        self.soup.endData()
-        doctype = Doctype.for_name_and_ids(name, pubid, system)
-        self.soup.object_was_parsed(doctype)
-
-    def comment(self, content):
-        "Handle comments as Comment objects."
-        self.soup.endData()
-        self.soup.handle_data(content)
-        self.soup.endData(Comment)
-
-    def test_fragment_to_document(self, fragment):
-        """See `TreeBuilder`."""
-        return u'<?xml version="1.0" encoding="utf-8">\n%s' % fragment
-
-
-class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
-
-    features = [LXML, HTML, FAST]
-    is_xml = False
-
-    @property
-    def default_parser(self):
-        return etree.HTMLParser
-
-    def test_fragment_to_document(self, fragment):
-        """See `TreeBuilder`."""
-        return u'<html><body>%s</body></html>' % fragment
diff --git a/beautifulsoup/dammit.py b/beautifulsoup/dammit.py
deleted file mode 100644
index 4483118..0000000
--- a/beautifulsoup/dammit.py
+++ /dev/null
@@ -1,410 +0,0 @@
-"""Beautiful Soup bonus library: Unicode, Dammit
-
-This class forces XML data into a standard format (usually to UTF-8 or
-Unicode).  It is heavily based on code from Mark Pilgrim's Universal
-Feed Parser. It does not rewrite the XML or HTML to reflect a new
-encoding; that's the tree builder's job.
-"""
-
-import codecs
-from htmlentitydefs import codepoint2name
-import re
-import types
-
-# Autodetects character encodings. Very useful.
-# Download from http://chardet.feedparser.org/
-#  or 'apt-get install python-chardet'
-#  or 'easy_install chardet'
-try:
-    import chardet
-    #import chardet.constants
-    #chardet.constants._debug = 1
-except ImportError:
-    chardet = None
-
-# Available from http://cjkpython.i18n.org/.
-try:
-    import iconv_codec
-except ImportError:
-    pass
-
-
-class EntitySubstitution(object):
-
-    """Substitute XML or HTML entities for the corresponding characters."""
-
-    def _populate_class_variables():
-        lookup = {}
-        characters = []
-        for codepoint, name in codepoint2name.items():
-            if codepoint == 34:
-                # There's no point in turning the quotation mark into
-                # &quot;, unless it happens within an attribute value, which
-                # is handled elsewhere.
-                continue;
-            character = unichr(codepoint)
-            characters.append(character)
-            lookup[character] = name
-        re_definition = "[%s]" % "".join(characters)
-        return lookup, re.compile(re_definition)
-    CHARACTER_TO_HTML_ENTITY, CHARACTER_TO_HTML_ENTITY_RE = (
-        _populate_class_variables())
-
-
-    CHARACTER_TO_XML_ENTITY = {
-        "'" : "apos",
-        '"' : "quot",
-        "&" : "amp",
-        "<" : "lt",
-        ">" : "gt",
-        }
-
-    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
-                                           "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
-                                           ")")
-
-    @classmethod
-    def _substitute_html_entity(cls, matchobj):
-        entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
-        return "&%s;" % entity
-
-    @classmethod
-    def _substitute_xml_entity(cls, matchobj):
-        """Used with a regular expression to substitute the
-        appropriate XML entity for an XML special character."""
-        entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
-        return "&%s;" % entity
-
-    @classmethod
-    def substitute_xml(cls, value, make_quoted_attribute=False):
-        """Substitute XML entities for special XML characters.
-
-        :param value: A string to be substituted. The less-than sign will
-          become &lt;, the greater-than sign will become &gt;, and any
-          ampersands that are not part of an entity defition will
-          become &amp;.
-
-        :param make_quoted_attribute: If True, then the string will be
-         quoted, as befits an attribute value.
-
-         Ordinarily, the string will be quoted using double quotes.
-
-          Bob's Bar -> "Bob's Bar"
-
-         If the string contains double quotes, it will be quoted using
-         single quotes.
-
-          Welcome to "my bar" -> 'Welcome to "my bar"'
-
-         If the string contains both single and double quotes, the
-         double quotes will be escaped, and the string will be quoted
-         using double quotes.
-
-          Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
-        """
-        if make_quoted_attribute:
-            quote_with = '"'
-            if '"' in value:
-                if "'" in value:
-                    # The string contains both single and double
-                    # quotes.  Turn the double quotes into
-                    # entities. We quote the double quotes rather than
-                    # the single quotes because the entity name is
-                    # "&quot;" whether this is HTML or XML.  If we
-                    # quoted the single quotes, we'd have to decide
-                    # between &apos; and &squot;.
-                    replace_with = "&quot;"
-                    value = value.replace('"', replace_with)
-                else:
-                    # There are double quotes but no single quotes.
-                    # We can use single quotes to quote the attribute.
-                    quote_with = "'"
-
-        # Escape angle brackets, and ampersands that aren't part of
-        # entities.
-        value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
-            cls._substitute_xml_entity, value)
-        if make_quoted_attribute:
-            return quote_with + value + quote_with
-        else:
-            return value
-
-    @classmethod
-    def substitute_html(cls, s):
-        """Replace certain Unicode characters with named HTML entities.
-
-        This differs from data.encode(encoding, 'xmlcharrefreplace')
-        in that the goal is to make the result more readable (to those
-        with ASCII displays) rather than to recover from
-        errors. There's absolutely nothing wrong with a UTF-8 string
-        containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
-        character with "&eacute;" will make it more readable to some
-        people.
-        """
-        return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
-            cls._substitute_html_entity, s)
-
-
-class UnicodeDammit:
-    """A class for detecting the encoding of a *ML document and
-    converting it to a Unicode string. If the source encoding is
-    windows-1252, can replace MS smart quotes with their HTML or XML
-    equivalents."""
-
-    # This dictionary maps commonly seen values for "charset" in HTML
-    # meta tags to the corresponding Python codec names. It only covers
-    # values that aren't in Python's aliases and can't be determined
-    # by the heuristics in find_codec.
-    CHARSET_ALIASES = { "macintosh" : "mac-roman",
-                        "x-sjis" : "shift-jis" }
-
-    ENCODINGS_WITH_SMART_QUOTES = [
-        "windows-1252",
-        "iso-8859-1",
-        "iso-8859-2",
-        ]
-
-    def __init__(self, markup, override_encodings=[],
-                 smart_quotes_to=None, isHTML=False):
-        self.declared_html_encoding = None
-        self.markup, document_encoding, sniffed_encoding = \
-                     self._detectEncoding(markup, isHTML)
-        self.smart_quotes_to = smart_quotes_to
-        self.tried_encodings = []
-        if markup == '' or isinstance(markup, unicode):
-            self.original_encoding = None
-            self.unicode = unicode(markup)
-            return
-
-        u = None
-        for proposed_encoding in (
-            override_encodings + [document_encoding, sniffed_encoding]):
-            if proposed_encoding is not None:
-                u = self._convert_from(proposed_encoding)
-                if u:
-                    break
-
-        # If no luck and we have auto-detection library, try that:
-        if not u and chardet and not isinstance(self.markup, unicode):
-            u = self._convert_from(chardet.detect(self.markup)['encoding'])
-
-        # As a last resort, try utf-8 and windows-1252:
-        if not u:
-            for proposed_encoding in ("utf-8", "windows-1252"):
-                u = self._convert_from(proposed_encoding)
-                if u:
-                    break
-
-        self.unicode = u
-        if not u: self.original_encoding = None
-
-    def _sub_ms_char(self, match):
-        """Changes a MS smart quote character to an XML or HTML
-        entity."""
-        orig = match.group(1)
-        sub = self.MS_CHARS.get(orig)
-        if type(sub) == types.TupleType:
-            if self.smart_quotes_to == 'xml':
-                sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
-            else:
-                sub = '&'.encode() + sub[0].encode() + ';'.encode()
-        else:
-            sub = sub.encode()
-        return sub
-
-    def _convert_from(self, proposed):
-        proposed = self.find_codec(proposed)
-        if not proposed or proposed in self.tried_encodings:
-            return None
-        self.tried_encodings.append(proposed)
-        markup = self.markup
-
-        # Convert smart quotes to HTML if coming from an encoding
-        # that might have them.
-        if (self.smart_quotes_to is not None
-            and proposed.lower() in self.ENCODINGS_WITH_SMART_QUOTES):
-            smart_quotes_re = "([\x80-\x9f])"
-            smart_quotes_compiled = re.compile(smart_quotes_re)
-            markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
-
-        try:
-            # print "Trying to convert document to %s" % proposed
-            u = self._to_unicode(markup, proposed)
-            self.markup = u
-            self.original_encoding = proposed
-        except Exception, e:
-            # print "That didn't work!"
-            # print e
-            return None
-        #print "Correct encoding: %s" % proposed
-        return self.markup
-
-    def _to_unicode(self, data, encoding):
-        '''Given a string and its encoding, decodes the string into Unicode.
-        %encoding is a string recognized by encodings.aliases'''
-
-        # strip Byte Order Mark (if present)
-        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
-               and (data[2:4] != '\x00\x00'):
-            encoding = 'utf-16be'
-            data = data[2:]
-        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
-                 and (data[2:4] != '\x00\x00'):
-            encoding = 'utf-16le'
-            data = data[2:]
-        elif data[:3] == '\xef\xbb\xbf':
-            encoding = 'utf-8'
-            data = data[3:]
-        elif data[:4] == '\x00\x00\xfe\xff':
-            encoding = 'utf-32be'
-            data = data[4:]
-        elif data[:4] == '\xff\xfe\x00\x00':
-            encoding = 'utf-32le'
-            data = data[4:]
-        newdata = unicode(data, encoding)
-        return newdata
-
-    def _detectEncoding(self, xml_data, isHTML=False):
-        """Given a document, tries to detect its XML encoding."""
-        xml_encoding = sniffed_xml_encoding = None
-        try:
-            if xml_data[:4] == '\x4c\x6f\xa7\x94':
-                # EBCDIC
-                xml_data = self._ebcdic_to_ascii(xml_data)
-            elif xml_data[:4] == '\x00\x3c\x00\x3f':
-                # UTF-16BE
-                sniffed_xml_encoding = 'utf-16be'
-                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
-            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
-                     and (xml_data[2:4] != '\x00\x00'):
-                # UTF-16BE with BOM
-                sniffed_xml_encoding = 'utf-16be'
-                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
-            elif xml_data[:4] == '\x3c\x00\x3f\x00':
-                # UTF-16LE
-                sniffed_xml_encoding = 'utf-16le'
-                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
-            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
-                     (xml_data[2:4] != '\x00\x00'):
-                # UTF-16LE with BOM
-                sniffed_xml_encoding = 'utf-16le'
-                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
-            elif xml_data[:4] == '\x00\x00\x00\x3c':
-                # UTF-32BE
-                sniffed_xml_encoding = 'utf-32be'
-                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
-            elif xml_data[:4] == '\x3c\x00\x00\x00':
-                # UTF-32LE
-                sniffed_xml_encoding = 'utf-32le'
-                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
-            elif xml_data[:4] == '\x00\x00\xfe\xff':
-                # UTF-32BE with BOM
-                sniffed_xml_encoding = 'utf-32be'
-                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
-            elif xml_data[:4] == '\xff\xfe\x00\x00':
-                # UTF-32LE with BOM
-                sniffed_xml_encoding = 'utf-32le'
-                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
-            elif xml_data[:3] == '\xef\xbb\xbf':
-                # UTF-8 with BOM
-                sniffed_xml_encoding = 'utf-8'
-                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
-            else:
-                sniffed_xml_encoding = 'ascii'
-                pass
-        except:
-            xml_encoding_match = None
-        xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode()
-        xml_encoding_match = re.compile(xml_encoding_re).match(xml_data)
-        if not xml_encoding_match and isHTML:
-            meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode()
-            regexp = re.compile(meta_re, re.I)
-            xml_encoding_match = regexp.search(xml_data)
-        if xml_encoding_match is not None:
-            xml_encoding = xml_encoding_match.groups()[0].decode(
-                'ascii').lower()
-            if isHTML:
-                self.declared_html_encoding = xml_encoding
-            if sniffed_xml_encoding and \
-               (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
-                                 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
-                                 'utf-16', 'utf-32', 'utf_16', 'utf_32',
-                                 'utf16', 'u16')):
-                xml_encoding = sniffed_xml_encoding
-        return xml_data, xml_encoding, sniffed_xml_encoding
-
-
-    def find_codec(self, charset):
-        return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
-               or (charset and self._codec(charset.replace("-", ""))) \
-               or (charset and self._codec(charset.replace("-", "_"))) \
-               or charset
-
-    def _codec(self, charset):
-        if not charset: return charset
-        codec = None
-        try:
-            codecs.lookup(charset)
-            codec = charset
-        except (LookupError, ValueError):
-            pass
-        return codec
-
-    EBCDIC_TO_ASCII_MAP = None
-    def _ebcdic_to_ascii(self, s):
-        c = self.__class__
-        if not c.EBCDIC_TO_ASCII_MAP:
-            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
-                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
-                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
-                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
-                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
-                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
-                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
-                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
-                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
-                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
-                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
-                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
-                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
-                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
-                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
-                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
-                    250,251,252,253,254,255)
-            import string
-            c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
-            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
-        return s.translate(c.EBCDIC_TO_ASCII_MAP)
-
-    MS_CHARS = { '\x80' : ('euro', '20AC'),
-                 '\x81' : ' ',
-                 '\x82' : ('sbquo', '201A'),
-                 '\x83' : ('fnof', '192'),
-                 '\x84' : ('bdquo', '201E'),
-                 '\x85' : ('hellip', '2026'),
-                 '\x86' : ('dagger', '2020'),
-                 '\x87' : ('Dagger', '2021'),
-                 '\x88' : ('circ', '2C6'),
-                 '\x89' : ('permil', '2030'),
-                 '\x8A' : ('Scaron', '160'),
-                 '\x8B' : ('lsaquo', '2039'),
-                 '\x8C' : ('OElig', '152'),
-                 '\x8D' : '?',
-                 '\x8E' : ('#x17D', '17D'),
-                 '\x8F' : '?',
-                 '\x90' : '?',
-                 '\x91' : ('lsquo', '2018'),
-                 '\x92' : ('rsquo', '2019'),
-                 '\x93' : ('ldquo', '201C'),
-                 '\x94' : ('rdquo', '201D'),
-                 '\x95' : ('bull', '2022'),
-                 '\x96' : ('ndash', '2013'),
-                 '\x97' : ('mdash', '2014'),
-                 '\x98' : ('tilde', '2DC'),
-                 '\x99' : ('trade', '2122'),
-                 '\x9a' : ('scaron', '161'),
-                 '\x9b' : ('rsaquo', '203A'),
-                 '\x9c' : ('oelig', '153'),
-                 '\x9d' : '?',
-                 '\x9e' : ('#x17E', '17E'),
-                 '\x9f' : ('Yuml', ''),}
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
deleted file mode 100644
index 61ed4ab..0000000
--- a/beautifulsoup/element.py
+++ /dev/null
@@ -1,855 +0,0 @@
-import re
-import types
-try:
-    from htmlentitydefs import name2codepoint
-except ImportError:
-    name2codepoint = {}
-from beautifulsoup.dammit import EntitySubstitution
-
-from util import isList
-
-DEFAULT_OUTPUT_ENCODING = "utf-8"
-
-
-class PageElement(object):
-    """Contains the navigational information for some part of the page
-    (either a tag or a piece of text)"""
-
-    def setup(self, parent=None, previous=None):
-        """Sets up the initial relations between this element and
-        other elements."""
-        self.parent = parent
-        self.previous = previous
-        self.next = None
-        self.previousSibling = None
-        self.nextSibling = None
-        if self.parent and self.parent.contents:
-            self.previousSibling = self.parent.contents[-1]
-            self.previousSibling.nextSibling = self
-
-    def replaceWith(self, replaceWith):
-        oldParent = self.parent
-        myIndex = self.parent.contents.index(self)
-        if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
-            # We're replacing this element with one of its siblings.
-            index = self.parent.contents.index(replaceWith)
-            if index and index < myIndex:
-                # Furthermore, it comes before this element. That
-                # means that when we extract it, the index of this
-                # element will change.
-                myIndex = myIndex - 1
-        self.extract()
-        oldParent.insert(myIndex, replaceWith)
-
-    def extract(self):
-        """Destructively rips this element out of the tree."""
-        if self.parent:
-            try:
-                self.parent.contents.remove(self)
-            except ValueError:
-                pass
-
-        #Find the two elements that would be next to each other if
-        #this element (and any children) hadn't been parsed. Connect
-        #the two.
-        lastChild = self._lastRecursiveChild()
-        nextElement = lastChild.next
-
-        if self.previous:
-            self.previous.next = nextElement
-        if nextElement:
-            nextElement.previous = self.previous
-        self.previous = None
-        lastChild.next = None
-
-        self.parent = None
-        if self.previousSibling:
-            self.previousSibling.nextSibling = self.nextSibling
-        if self.nextSibling:
-            self.nextSibling.previousSibling = self.previousSibling
-        self.previousSibling = self.nextSibling = None
-        return self
-
-    def _lastRecursiveChild(self):
-        "Finds the last element beneath this object to be parsed."
-        lastChild = self
-        while hasattr(lastChild, 'contents') and lastChild.contents:
-            lastChild = lastChild.contents[-1]
-        return lastChild
-
-    def insert(self, position, newChild):
-        if (isinstance(newChild, basestring)
-            or isinstance(newChild, unicode)) \
-            and not isinstance(newChild, NavigableString):
-            newChild = NavigableString(newChild)
-
-        position =  min(position, len(self.contents))
-        if hasattr(newChild, 'parent') and newChild.parent != None:
-            # We're 'inserting' an element that's already one
-            # of this object's children.
-            if newChild.parent == self:
-                index = self.find(newChild)
-                if index and index < position:
-                    # Furthermore we're moving it further down the
-                    # list of this object's children. That means that
-                    # when we extract this element, our target index
-                    # will jump down one.
-                    position = position - 1
-            newChild.extract()
-
-        newChild.parent = self
-        previousChild = None
-        if position == 0:
-            newChild.previousSibling = None
-            newChild.previous = self
-        else:
-            previousChild = self.contents[position-1]
-            newChild.previousSibling = previousChild
-            newChild.previousSibling.nextSibling = newChild
-            newChild.previous = previousChild._lastRecursiveChild()
-        if newChild.previous:
-            newChild.previous.next = newChild
-
-        newChildsLastElement = newChild._lastRecursiveChild()
-
-        if position >= len(self.contents):
-            newChild.nextSibling = None
-
-            parent = self
-            parentsNextSibling = None
-            while not parentsNextSibling:
-                parentsNextSibling = parent.nextSibling
-                parent = parent.parent
-                if not parent: # This is the last element in the document.
-                    break
-            if parentsNextSibling:
-                newChildsLastElement.next = parentsNextSibling
-            else:
-                newChildsLastElement.next = None
-        else:
-            nextChild = self.contents[position]
-            newChild.nextSibling = nextChild
-            if newChild.nextSibling:
-                newChild.nextSibling.previousSibling = newChild
-            newChildsLastElement.next = nextChild
-
-        if newChildsLastElement.next:
-            newChildsLastElement.next.previous = newChildsLastElement
-        self.contents.insert(position, newChild)
-
-    def append(self, tag):
-        """Appends the given tag to the contents of this tag."""
-        self.insert(len(self.contents), tag)
-
-    def find_next(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the first item that matches the given criteria and
-        appears after this Tag in the document."""
-        return self._findOne(self.find_all_next, name, attrs, text, **kwargs)
-    findNext = find_next # BS3
-
-    def find_all_next(self, name=None, attrs={}, text=None, limit=None,
-                    **kwargs):
-        """Returns all items that match the given criteria and appear
-        after this Tag in the document."""
-        return self._find_all(name, attrs, text, limit, self.next_elements,
-                             **kwargs)
-    findAllNext = find_all_next # BS3
-
-    def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears after this Tag in the document."""
-        return self._findOne(self.find_next_siblings, name, attrs, text,
-                             **kwargs)
-    findNextSibling = find_next_sibling # BS3
-
-    def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
-                           **kwargs):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear after this Tag in the document."""
-        return self._find_all(name, attrs, text, limit,
-                              self.next_siblings, **kwargs)
-    findNextSiblings = find_next_siblings  # BS3
-    fetchNextSiblings = find_next_siblings # BS2
-
-    def find_previous(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the first item that matches the given criteria and
-        appears before this Tag in the document."""
-        return self._findOne(
-            self.find_all_previous, name, attrs, text, **kwargs)
-    findPrevious = find_previous # BS3
-
-    def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
-                        **kwargs):
-        """Returns all items that match the given criteria and appear
-        before this Tag in the document."""
-        return self._find_all(name, attrs, text, limit, self.previous_elements,
-                           **kwargs)
-    findAllPrevious = find_all_previous # BS3
-    fetchPrevious = find_all_previous   # BS2
-
-    def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears before this Tag in the document."""
-        return self._findOne(self.find_previous_siblings, name, attrs, text,
-                             **kwargs)
-    findPreviousSibling = find_previous_sibling # BS3
-
-    def find_previous_siblings(self, name=None, attrs={}, text=None,
-                               limit=None, **kwargs):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear before this Tag in the document."""
-        return self._find_all(name, attrs, text, limit,
-                              self.previous_siblings, **kwargs)
-    findPreviousSiblings = find_previous_siblings  # BS3
-    fetchPreviousSiblings = find_previous_siblings # BS2
-
-    def find_parent(self, name=None, attrs={}, **kwargs):
-        """Returns the closest parent of this Tag that matches the given
-        criteria."""
-        # NOTE: We can't use _findOne because findParents takes a different
-        # set of arguments.
-        r = None
-        l = self.find_parents(name, attrs, 1)
-        if l:
-            r = l[0]
-        return r
-    findParent = find_parent # BS3
-
-    def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
-        """Returns the parents of this Tag that match the given
-        criteria."""
-
-        return self._find_all(name, attrs, None, limit, self.parents,
-                             **kwargs)
-    findParents = find_parents  # BS3
-    fetchParents = find_parents # BS2
-
-    #These methods do the real heavy lifting.
-
-    def _findOne(self, method, name, attrs, text, **kwargs):
-        r = None
-        l = method(name, attrs, text, 1, **kwargs)
-        if l:
-            r = l[0]
-        return r
-
-    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
-        "Iterates over a generator looking for things that match."
-
-        if isinstance(name, SoupStrainer):
-            strainer = name
-        else:
-            # Build a SoupStrainer
-            strainer = SoupStrainer(name, attrs, text, **kwargs)
-        results = ResultSet(strainer)
-        while True:
-            try:
-                i = generator.next()
-            except StopIteration:
-                break
-            if i:
-                found = strainer.search(i)
-                if found:
-                    results.append(found)
-                    if limit and len(results) >= limit:
-                        break
-        return results
-
-    #These generators can be used to navigate starting from both
-    #NavigableStrings and Tags.
-    @property
-    def next_elements(self):
-        i = self
-        while i:
-            i = i.next
-            yield i
-
-    @property
-    def next_siblings(self):
-        i = self
-        while i:
-            i = i.nextSibling
-            yield i
-
-    @property
-    def previous_elements(self):
-        i = self
-        while i:
-            i = i.previous
-            yield i
-
-    @property
-    def previous_siblings(self):
-        i = self
-        while i:
-            i = i.previousSibling
-            yield i
-
-    @property
-    def parents(self):
-        i = self
-        while i:
-            i = i.parent
-            yield i
-
-    # Old non-property versions of the generators, for backwards
-    # compatibility with BS3.
-    def nextGenerator(self):
-        return self.next_elements
-
-    def nextSiblingGenerator(self):
-        return self.next_siblings
-
-    def previousGenerator(self):
-        return self.previous_elements
-
-    def previousSiblingGenerator(self):
-        return self.previous_siblings
-
-    def parentGenerator(self):
-        return self.parents
-
-    # Utility methods
-    def substituteEncoding(self, str, encoding=None):
-        encoding = encoding or "utf-8"
-        return str.replace("%SOUP-ENCODING%", encoding)
-
-    def toEncoding(self, s, encoding=None):
-        """Encodes an object to a string in some encoding, or to Unicode.
-        ."""
-        if isinstance(s, unicode):
-            if encoding:
-                s = s.encode(encoding)
-        elif isinstance(s, str):
-            if encoding:
-                s = s.encode(encoding)
-            else:
-                s = unicode(s)
-        else:
-            if encoding:
-                s  = self.toEncoding(str(s), encoding)
-            else:
-                s = unicode(s)
-        return s
-
-class NavigableString(unicode, PageElement):
-
-    PREFIX = ''
-    SUFFIX = ''
-
-    def __new__(cls, value):
-        """Create a new NavigableString.
-
-        When unpickling a NavigableString, this method is called with
-        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
-        passed in to the superclass's __new__ or the superclass won't know
-        how to handle non-ASCII characters.
-        """
-        if isinstance(value, unicode):
-            return unicode.__new__(cls, value)
-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
-
-    def __getnewargs__(self):
-        return (unicode(self),)
-
-    def __getattr__(self, attr):
-        """text.string gives you text. This is for backwards
-        compatibility for Navigable*String, but for CData* it lets you
-        get the string without the CData wrapper."""
-        if attr == 'string':
-            return self
-        else:
-            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-
-    def output_ready(self, substitute_html_entities=False):
-        if substitute_html_entities:
-            output = EntitySubstitution.substitute_html(self)
-        else:
-            output = self
-        return self.PREFIX + output + self.SUFFIX
-
-
-class CData(NavigableString):
-
-    PREFIX = u'<![CDATA['
-    SUFFIX = u']]>'
-
-
-class ProcessingInstruction(NavigableString):
-
-    PREFIX = u'<?'
-    SUFFIX = u'?>'
-
-
-class Comment(NavigableString):
-
-    PREFIX = u'<!--'
-    SUFFIX = u'-->'
-
-class Declaration(NavigableString):
-    PREFIX = u'<!'
-    SUFFIX = u'!>'
-
-
-class Doctype(NavigableString):
-
-    @classmethod
-    def for_name_and_ids(cls, name, pub_id, system_id):
-        value = name
-        if pub_id is not None:
-            value += ' PUBLIC "%s"' % pub_id
-        if system_id is not None:
-            value += ' SYSTEM "%s"' % system_id
-
-        return Doctype(value)
-
-    PREFIX = u'<!DOCTYPE '
-    SUFFIX = u'>'
-
-
-class Tag(PageElement):
-
-    """Represents a found HTML tag with its attributes and contents."""
-
-    def __init__(self, parser, builder, name, attrs=None, parent=None,
-                 previous=None):
-        "Basic constructor."
-
-        # We don't actually store the parser object: that lets extracted
-        # chunks be garbage-collected.
-        self.parserClass = parser.__class__
-        self.name = name
-        if attrs == None:
-            attrs = {}
-        else:
-            attrs = dict(attrs)
-        self.attrs = attrs
-        self.contents = []
-        self.setup(parent, previous)
-        self.hidden = False
-
-        # Set up any substitutions, such as the charset in a META tag.
-        self.contains_substitutions = builder.set_up_substitutions(self)
-
-        self.can_be_empty_element = builder.can_be_empty_element(name)
-
-    @property
-    def is_empty_element(self):
-        """Is this tag an empty-element tag? (aka a self-closing tag)
-
-        A tag that has contents is never an empty-element tag.
-
-        A tag that has no contents may or may not be an empty-element
-        tag. It depends on the builder used to create the tag. If the
-        builder has a designated list of empty-element tags, then only
-        a tag whose name shows up in that list is considered an
-        empty-element tag.
-
-        If the builder has no designated list of empty-element tags,
-        then any tag with no contents is an empty-element tag.
-        """
-        return len(self.contents) == 0 and self.can_be_empty_element
-    isSelfClosing = is_empty_element # BS3
-
-
-    @property
-    def string(self):
-        """Convenience property to get the single string within this tag.
-
-        :Return: If this tag has a single string child, return value
-         is that string. If this tag has no children, or more than one
-         child, return value is None. If this tag has one child tag,
-         return value is the 'string' attribute of the child tag,
-         recursively.
-        """
-        if len(self.contents) != 1:
-            return None
-        child = self.contents[0]
-        if isinstance(child, NavigableString):
-            return child
-        return child.string
-
-    def get(self, key, default=None):
-        """Returns the value of the 'key' attribute for the tag, or
-        the value given for 'default' if it doesn't have that
-        attribute."""
-        return self.attrs.get(key, default)
-
-    def has_key(self, key):
-        return self.attrs.has_key(key)
-
-    def __getitem__(self, key):
-        """tag[key] returns the value of the 'key' attribute for the tag,
-        and throws an exception if it's not there."""
-        return self.attrs[key]
-
-    def __iter__(self):
-        "Iterating over a tag iterates over its contents."
-        return iter(self.contents)
-
-    def __len__(self):
-        "The length of a tag is the length of its list of contents."
-        return len(self.contents)
-
-    def __contains__(self, x):
-        return x in self.contents
-
-    def __nonzero__(self):
-        "A tag is non-None even if it has no contents."
-        return True
-
-    def __setitem__(self, key, value):
-        """Setting tag[key] sets the value of the 'key' attribute for the
-        tag."""
-        self.attrs[key] = value
-
-    def __delitem__(self, key):
-        "Deleting tag[key] deletes all 'key' attributes for the tag."
-        if self.attrs.has_key(key):
-            del self.attrs[key]
-
-    def __call__(self, *args, **kwargs):
-        """Calling a tag like a function is the same as calling its
-        find_all() method. Eg. tag('a') returns a list of all the A tags
-        found within this tag."""
-        return apply(self.find_all, args, kwargs)
-
-    def __getattr__(self, tag):
-        #print "Getattr %s.%s" % (self.__class__, tag)
-        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
-            return self.find(tag[:-3])
-        elif tag.find('__') != 0:
-            return self.find(tag)
-        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
-
-    def __eq__(self, other):
-        """Returns true iff this tag has the same name, the same attributes,
-        and the same contents (recursively) as the given tag.
-
-        XXX: right now this will return false if two tags have the
-        same attributes in a different order. Should this be fixed?"""
-        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
-            return False
-        for i in range(0, len(self.contents)):
-            if self.contents[i] != other.contents[i]:
-                return False
-        return True
-
-    def __ne__(self, other):
-        """Returns true iff this tag is not identical to the other tag,
-        as defined in __eq__."""
-        return not self == other
-
-    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        """Renders this tag as a string."""
-        return self.encode(encoding)
-
-    def __unicode__(self):
-        return self.decode()
-
-    def __str__(self):
-        return self.encode()
-
-    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
-               indent_level=None, substitute_html_entities=False):
-        return self.decode(indent_level, encoding,
-                           substitute_html_entities).encode(encoding)
-
-    def decode(self, indent_level=None,
-               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               substitute_html_entities=False):
-        """Returns a Unicode representation of this tag and its contents.
-
-        :param eventual_encoding: The tag is destined to be
-           encoded into this encoding. This method is _not_
-           responsible for performing that encoding. This information
-           is passed in so that it can be substituted in if the
-           document contains a <META> tag that mentions the document's
-           encoding.
-        """
-        attrs = []
-        if self.attrs:
-            for key, val in sorted(self.attrs.items()):
-                if val is None:
-                    decoded = key
-                else:
-                    if not isinstance(val, basestring):
-                        val = str(val)
-                    if (self.contains_substitutions
-                        and eventual_encoding is not None
-                        and '%SOUP-ENCODING%' in val):
-                        val = self.substituteEncoding(val, eventual_encoding)
-
-                    decoded = (key + '='
-                               + EntitySubstitution.substitute_xml(val, True))
-                attrs.append(decoded)
-        close = ''
-        closeTag = ''
-        if self.is_empty_element:
-            close = ' /'
-        else:
-            closeTag = '</%s>' % self.name
-
-        pretty_print = (indent_level is not None)
-        if pretty_print:
-            space = (' ' * (indent_level-1))
-            indent_contents = indent_level + 1
-        else:
-            space = ''
-            indent_contents = None
-        contents = self.decode_contents(
-            indent_contents, eventual_encoding, substitute_html_entities)
-
-        if self.hidden:
-            # This is the 'document root' object.
-            s = contents
-        else:
-            s = []
-            attributeString = ''
-            if attrs:
-                attributeString = ' ' + ' '.join(attrs)
-            if pretty_print:
-                s.append(space)
-            s.append('<%s%s%s>' % (self.name, attributeString, close))
-            if pretty_print:
-                s.append("\n")
-            s.append(contents)
-            if pretty_print and contents and contents[-1] != "\n":
-                s.append("\n")
-            if pretty_print and closeTag:
-                s.append(space)
-            s.append(closeTag)
-            if pretty_print and closeTag and self.nextSibling:
-                s.append("\n")
-            s = ''.join(s)
-        return s
-
-    def decompose(self):
-        """Recursively destroys the contents of this tree."""
-        contents = [i for i in self.contents]
-        for i in contents:
-            if isinstance(i, Tag):
-                i.decompose()
-            else:
-                i.extract()
-        self.extract()
-
-    def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        return self.encode(encoding, True)
-
-    def decode_contents(self, indent_level=None,
-                       eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-                       substitute_html_entities=False):
-        """Renders the contents of this tag as a Unicode string.
-
-        :param eventual_encoding: The tag is destined to be
-           encoded into this encoding. This method is _not_
-           responsible for performing that encoding. This information
-           is passed in so that it can be substituted in if the
-           document contains a <META> tag that mentions the document's
-           encoding.
-        """
-        pretty_print = (indent_level is not None)
-        s=[]
-        for c in self:
-            text = None
-            if isinstance(c, NavigableString):
-                text = c.output_ready(substitute_html_entities)
-            elif isinstance(c, Tag):
-                s.append(c.decode(indent_level, eventual_encoding,
-                                  substitute_html_entities))
-            if text and indent_level:
-                text = text.strip()
-            if text:
-                if pretty_print:
-                    s.append(" " * (indent_level-1))
-                s.append(text)
-                if pretty_print:
-                    s.append("\n")
-        return ''.join(s)
-
-    #Soup methods
-
-    def find(self, name=None, attrs={}, recursive=True, text=None,
-             **kwargs):
-        """Return only the first child of this Tag matching the given
-        criteria."""
-        r = None
-        l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
-        if l:
-            r = l[0]
-        return r
-    findChild = find
-
-    def find_all(self, name=None, attrs={}, recursive=True, text=None,
-                 limit=None, **kwargs):
-        """Extracts a list of Tag objects that match the given
-        criteria.  You can specify the name of the Tag and any
-        attributes you want the Tag to have.
-
-        The value of a key-value pair in the 'attrs' map can be a
-        string, a list of strings, a regular expression object, or a
-        callable that takes a string and returns whether or not the
-        string matches for some custom definition of 'matches'. The
-        same is true of the tag name."""
-        generator = self.recursive_children
-        if not recursive:
-            generator = self.children
-        return self._find_all(name, attrs, text, limit, generator, **kwargs)
-    findAll = find_all      # BS3
-    findChildren = find_all # BS2
-
-    #Generator methods
-    @property
-    def children(self):
-        for i in range(0, len(self.contents)):
-            yield self.contents[i]
-        raise StopIteration
-
-    @property
-    def recursive_children(self):
-        if not len(self.contents):
-            raise StopIteration
-        stopNode = self._lastRecursiveChild().next
-        current = self.contents[0]
-        while current is not stopNode:
-            yield current
-            current = current.next
-
-    # Old names for backwards compatibility
-    def childGenerator(self):
-        return self.children
-
-    def recursiveChildGenerator(self):
-        return self.recursive_children
-
-
-# Next, a couple classes to represent queries and their results.
-class SoupStrainer(object):
-    """Encapsulates a number of ways of matching a markup element (tag or
-    text)."""
-
-    def __init__(self, name=None, attrs={}, text=None, **kwargs):
-        self.name = name
-        if isinstance(attrs, basestring):
-            kwargs['class'] = attrs
-            attrs = None
-        if kwargs:
-            if attrs:
-                attrs = attrs.copy()
-                attrs.update(kwargs)
-            else:
-                attrs = kwargs
-        self.attrs = attrs
-        self.text = text
-
-    def __str__(self):
-        if self.text:
-            return self.text
-        else:
-            return "%s|%s" % (self.name, self.attrs)
-
-    def searchTag(self, markupName=None, markupAttrs={}):
-        found = None
-        markup = None
-        if isinstance(markupName, Tag):
-            markup = markupName
-            markupAttrs = markup
-        callFunctionWithTagData = callable(self.name) \
-                                and not isinstance(markupName, Tag)
-
-        if (not self.name) \
-               or callFunctionWithTagData \
-               or (markup and self._matches(markup, self.name)) \
-               or (not markup and self._matches(markupName, self.name)):
-            if callFunctionWithTagData:
-                match = self.name(markupName, markupAttrs)
-            else:
-                match = True
-                markupAttrMap = None
-                for attr, matchAgainst in self.attrs.items():
-                    if not markupAttrMap:
-                         if hasattr(markupAttrs, 'get'):
-                            markupAttrMap = markupAttrs
-                         else:
-                            markupAttrMap = {}
-                            for k,v in markupAttrs:
-                                markupAttrMap[k] = v
-                    attrValue = markupAttrMap.get(attr)
-                    if not self._matches(attrValue, matchAgainst):
-                        match = False
-                        break
-            if match:
-                if markup:
-                    found = markup
-                else:
-                    found = markupName
-        return found
-
-    def search(self, markup):
-        #print 'looking for %s in %s' % (self, markup)
-        found = None
-        # If given a list of items, scan it for a text element that
-        # matches.
-        if isList(markup) and not isinstance(markup, Tag):
-            for element in markup:
-                if isinstance(element, NavigableString) \
-                       and self.search(element):
-                    found = element
-                    break
-        # If it's a Tag, make sure its name or attributes match.
-        # Don't bother with Tags if we're searching for text.
-        elif isinstance(markup, Tag):
-            if not self.text:
-                found = self.searchTag(markup)
-        # If it's text, make sure the text matches.
-        elif isinstance(markup, NavigableString) or \
-                 isinstance(markup, basestring):
-            if self._matches(markup, self.text):
-                found = markup
-        else:
-            raise Exception, "I don't know how to match against a %s" \
-                  % markup.__class__
-        return found
-
-    def _matches(self, markup, matchAgainst):
-        #print "Matching %s against %s" % (markup, matchAgainst)
-        result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
-            result = markup != None
-        elif callable(matchAgainst):
-            result = matchAgainst(markup)
-        else:
-            #Custom match methods take the tag as an argument, but all
-            #other ways of matching match the tag name as a string.
-            if isinstance(markup, Tag):
-                markup = markup.name
-            if markup is not None and not isinstance(markup, basestring):
-                markup = unicode(markup)
-            #Now we know that chunk is either a string, or None.
-            if hasattr(matchAgainst, 'match'):
-                # It's a regexp object.
-                result = markup and matchAgainst.search(markup)
-            elif (isList(matchAgainst)
-                  and (markup is not None
-                       or not isinstance(matchAgainst, basestring))):
-                result = markup in matchAgainst
-            elif hasattr(matchAgainst, 'items'):
-                result = markup.has_key(matchAgainst)
-            elif matchAgainst and isinstance(markup, basestring):
-                if isinstance(markup, unicode):
-                    matchAgainst = unicode(matchAgainst)
-                else:
-                    matchAgainst = str(matchAgainst)
-
-            if not result:
-                result = matchAgainst == markup
-        return result
-
-
-class ResultSet(list):
-    """A ResultSet is just a list that keeps track of the SoupStrainer
-    that created it."""
-    def __init__(self, source):
-        list.__init__([])
-        self.source = source
diff --git a/beautifulsoup/testing.py b/beautifulsoup/testing.py
deleted file mode 100644
index 8fd9abf..0000000
--- a/beautifulsoup/testing.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""Helper classes for tests."""
-
-import unittest
-from beautifulsoup import BeautifulSoup
-from beautifulsoup.element import Comment, SoupStrainer
-from beautifulsoup.builder import LXMLTreeBuilder
-
-class SoupTest(unittest.TestCase):
-
-    @property
-    def default_builder(self):
-        return LXMLTreeBuilder()
-
-    def soup(self, markup, **kwargs):
-        """Build a Beautiful Soup object from markup."""
-        builder = kwargs.pop('builder', self.default_builder)
-        return BeautifulSoup(markup, builder=builder, **kwargs)
-
-    def document_for(self, markup):
-        """Turn an HTML fragment into a document.
-
-        The details depend on the builder.
-        """
-        return self.default_builder.test_fragment_to_document(markup)
-
-    def assertSoupEquals(self, to_parse, compare_parsed_to=None):
-        builder = self.default_builder
-        obj = BeautifulSoup(to_parse, builder=builder)
-        if compare_parsed_to is None:
-            compare_parsed_to = to_parse
-
-        self.assertEquals(obj.decode(), self.document_for(compare_parsed_to))
-
-
-
-
-
diff --git a/beautifulsoup/util.py b/beautifulsoup/util.py
deleted file mode 100644
index 5978865..0000000
--- a/beautifulsoup/util.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Helper functions and mixin classes for Beautiful Soup
-
-import types
-try:
-    set
-except NameError:
-    from sets import Set as set
-
-def isList(l):
-    """Convenience method that works with all 2.x versions of Python
-    to determine whether or not something is listlike."""
-    return ((hasattr(l, '__iter__') and not isinstance(l, basestring))
-            or (type(l) in (types.ListType, types.TupleType)))
-
-def buildSet(args=None):
-    """Turns a list or a string into a set."""
-    if isinstance(args, str):
-        return set([args])
-    if args is None:
-        return set()
-    return set(args)