3 files changed, 0 insertions, 600 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
deleted file mode 100644
index 10c6b7f..0000000
--- a/beautifulsoup/builder/__init__.py
+++ /dev/null
@@ -1,259 +0,0 @@
-from collections import defaultdict
-import re
-import sys
-
-__all__ = [
-    'HTMLTreeBuilder',
-    'SAXTreeBuilder',
-    'TreeBuilder',
-    'TreeBuilderRegistry',
-    ]
-
-# Some useful features for a TreeBuilder to have.
-FAST = 'fast'
-PERMISSIVE = 'permissive'
-XML = 'xml'
-HTML = 'html'
-HTML_5 = 'html5'
-
-
-class TreeBuilderRegistry(object):
-
-    def __init__(self):
-        self.builders_for_feature = defaultdict(list)
-        self.builders = []
-
-    def register(self, treebuilder_class):
-        """Register a treebuilder based on its advertised features."""
-        for feature in treebuilder_class.features:
-            self.builders_for_feature[feature].insert(0, treebuilder_class)
-        self.builders.insert(0, treebuilder_class)
-
-    def lookup(self, *features):
-        if len(self.builders) == 0:
-            # There are no builders at all.
-            return None
-
-        if len(features) == 0:
-            # They didn't ask for any features. Give them the most
-            # recently registered builder.
-            return self.builders[0]
-
-        # Go down the list of features in order, and eliminate any builders
-        # that don't match every feature.
-        features = list(features)
-        features.reverse()
-        candidates = None
-        candidate_set = None
-        while len(features) > 0:
-            feature = features.pop()
-            we_have_the_feature = self.builders_for_feature.get(feature, [])
-            if len(we_have_the_feature) > 0:
-                if candidates is None:
-                    candidates = we_have_the_feature
-                    candidate_set = set(candidates)
-                else:
-                    # Eliminate any candidates that don't have this feature.
-                    candidate_set = candidate_set.intersection(
-                        set(we_have_the_feature))
-
-        # The only valid candidates are the ones in candidate_set.
-        # Go through the original list of candidates and pick the first one
-        # that's in candidate_set.
-        if candidate_set is None:
-            return None
-        for candidate in candidates:
-            if candidate in candidate_set:
-                return candidate
-        return None
-
-# The BeautifulSoup class will take feature lists from developers and use them
-# to look up builders in this registry.
-builder_registry = TreeBuilderRegistry()
-
-
-class TreeBuilder(object):
-    """Turn a document into a Beautiful Soup object tree."""
-
-    features = []
-
-    is_xml = False
-    preserve_whitespace_tags = set()
-    empty_element_tags = None # A tag will be considered an empty-element
-                              # tag when and only when it has no contents.
-
-    def __init__(self):
-        self.soup = None
-
-    def reset(self):
-        pass
-
-    def can_be_empty_element(self, tag_name):
-        """Might a tag with this name be an empty-element tag?
-
-        The final markup may or may not actually present this tag as
-        self-closing.
-
-        For instance: an HTMLBuilder does not consider a <p> tag to be
-        an empty-element tag (it's not in
-        HTMLBuilder.empty_element_tags). This means an empty <p> tag
-        will be presented as "<p></p>", not "<p />".
-
-        The default implementation has no opinion about which tags are
-        empty-element tags, so a tag will be presented as an
-        empty-element tag if and only if it has no contents.
-        "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
-        be left alone.
-        """
-        if self.empty_element_tags is None:
-            return True
-        return tag_name in self.empty_element_tags
-
-    def feed(self, markup):
-        raise NotImplementedError()
-
-    def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
-        return markup, None, None
-
-    def test_fragment_to_document(self, fragment):
-        """Wrap an HTML fragment to make it look like a document.
-
-        Different parsers do this differently. For instance, lxml
-        introduces an empty <head> tag, and html5lib
-        doesn't. Abstracting this away lets us write simple tests
-        which run HTML fragments through the parser and compare the
-        results against other HTML fragments.
-
-        This method should not be used outside of tests.
-        """
-        return fragment
-
-    def set_up_substitutions(self, tag):
-        pass
-
-
-class SAXTreeBuilder(TreeBuilder):
-    """A Beautiful Soup treebuilder that listens for SAX events."""
-
-    def feed(self, markup):
-        raise NotImplementedError()
-
-    def close(self):
-        pass
-
-    def startElement(self, name, attrs):
-        attrs = dict((key[1], value) for key, value in attrs.items())
-        #print "Start %s, %r" % (name, attrs)
-        self.soup.handle_starttag(name, attrs)
-
-    def endElement(self, name):
-        #print "End %s" % name
-        self.soup.handle_endtag(name)
-
-    def startElementNS(self, nsTuple, nodeName, attrs):
-        # Throw away (ns, nodeName) for now.
-        self.startElement(nodeName, attrs)
-
-    def endElementNS(self, nsTuple, nodeName):
-        # Throw away (ns, nodeName) for now.
-        self.endElement(nodeName)
-        #handler.endElementNS((ns, node.nodeName), node.nodeName)
-
-    def startPrefixMapping(self, prefix, nodeValue):
-        # Ignore the prefix for now.
-        pass
-
-    def endPrefixMapping(self, prefix):
-        # Ignore the prefix for now.
-        # handler.endPrefixMapping(prefix)
-        pass
-
-    def characters(self, content):
-        self.soup.handle_data(content)
-
-    def startDocument(self):
-        pass
-
-    def endDocument(self):
-        pass
-
-
-class HTMLTreeBuilder(TreeBuilder):
-    """This TreeBuilder knows facts about HTML.
-
-    Such as which tags are empty-element tags.
-    """
-
-    preserve_whitespace_tags = set(['pre', 'textarea'])
-    empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
-                              'spacer', 'link', 'frame', 'base'])
-
-    # Used by set_up_substitutions to detect the charset in a META tag
-    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
-
-    def set_up_substitutions(self, tag):
-        if tag.name != 'meta':
-            return False
-
-        http_equiv = tag.get('http-equiv')
-        content = tag.get('content')
-
-        if (http_equiv is not None
-            and content is not None
-            and http_equiv.lower() == 'content-type'):
-            # This is an interesting meta tag.
-            match = self.CHARSET_RE.search(content)
-            if match:
-                if (self.soup.declared_html_encoding is not None or
-                    self.soup.original_encoding == self.soup.from_encoding):
-                    # An HTML encoding was sniffed while converting
-                    # the document to Unicode, or an HTML encoding was
-                    # sniffed during a previous pass through the
-                    # document, or an encoding was specified
-                    # explicitly and it worked. Rewrite the meta tag.
-                    def rewrite(match):
-                        return match.group(1) + "%SOUP-ENCODING%"
-                    tag['content'] = self.CHARSET_RE.sub(rewrite, content)
-                    return True
-                else:
-                    # This is our first pass through the document.
-                    # Go through it again with the encoding information.
-                    new_charset = match.group(3)
-                    if (new_charset is not None
-                        and new_charset != self.soup.original_encoding):
-                        self.soup.declared_html_encoding = new_charset
-                        self.soup._feed(self.soup.declared_html_encoding)
-                        raise StopParsing
-                    pass
-        return False
-
-
-def register_treebuilders_from(module):
-    """Copy TreeBuilders from the given module into this module."""
-    # I'm fairly sure this is not the best way to do this.
-    this_module = sys.modules[__package__]
-    for name in module.__all__:
-        obj = getattr(module, name)
-
-        if issubclass(obj, TreeBuilder):
-            setattr(this_module, name, obj)
-            this_module.__all__.append(name)
-            # Register the builder while we're at it.
-            this_module.builder_registry.register(obj)
-
-# Builders are registered in reverse order of priority, so that custom
-# builder registrations will take precedence. In general, we want
-# html5lib to take precedence over lxml, because it's more reliable.
-try:
-    import _lxml
-    register_treebuilders_from(_lxml)
-except ImportError:
-    # They don't have lxml installed.
-    pass
-try:
-    import _html5lib
-    register_treebuilders_from(_html5lib)
-except ImportError:
-    # They don't have html5lib installed.
-    pass
diff --git a/beautifulsoup/builder/_html5lib.py b/beautifulsoup/builder/_html5lib.py
deleted file mode 100644
index f8a7a40..0000000
--- a/beautifulsoup/builder/_html5lib.py
+++ /dev/null
@@ -1,233 +0,0 @@
-__all__ = [
-    'HTML5TreeBuilder',
-    ]
-
-from beautifulsoup.builder import (
-    PERMISSIVE,
-    HTML,
-    HTML_5,
-    HTMLTreeBuilder,
-    )
-import html5lib
-from html5lib.constants import DataLossWarning
-import warnings
-from beautifulsoup.element import (
-    Comment,
-    Doctype,
-    NavigableString,
-    Tag,
-    )
-
-class HTML5TreeBuilder(HTMLTreeBuilder):
-    """Use html5lib to build a tree."""
-
-    features = ['html5lib', PERMISSIVE, HTML_5, HTML]
-
-    def prepare_markup(self, markup, user_specified_encoding):
-        # Store the user-specified encoding for use later on.
-        self.user_specified_encoding = user_specified_encoding
-        return markup, None, None
-
-    # These methods are defined by Beautiful Soup.
-    def feed(self, markup):
-        parser = html5lib.HTMLParser(tree=self.create_treebuilder)
-        doc = parser.parse(markup, encoding=self.user_specified_encoding)
-
-        # Set the character encoding detected by the tokenizer.
-        if isinstance(markup, unicode):
-            # We need to special-case this because html5lib sets
-            # charEncoding to UTF-8 if it gets Unicode input.
-            doc.original_encoding = None
-        else:
-            doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
-
-    def create_treebuilder(self, namespaceHTMLElements):
-        self.underlying_builder = TreeBuilderForHtml5lib(
-            self.soup, namespaceHTMLElements)
-        return self.underlying_builder
-
-    def test_fragment_to_document(self, fragment):
-        """See `TreeBuilder`."""
-        return u'<html><head></head><body>%s</body></html>' % fragment
-
-
-class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
-
-    def __init__(self, soup, namespaceHTMLElements):
-        self.soup = soup
-        if namespaceHTMLElements:
-            warnings.warn("namespaceHTMLElements not supported yet",
-                          DataLossWarning)
-        super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
-
-    def documentClass(self):
-        self.soup.reset()
-        return Element(self.soup, self.soup, None)
-
-    def insertDoctype(self, token):
-        name = token["name"]
-        publicId = token["publicId"]
-        systemId = token["systemId"]
-
-        doctype = Doctype.for_name_and_ids(name, publicId, systemId)
-        self.soup.object_was_parsed(doctype)
-
-    def elementClass(self, name, namespace):
-        if namespace is not None:
-            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
-        return Element(Tag(self.soup, self.soup.builder, name), self.soup, namespace)
-
-    def commentClass(self, data):
-        return TextNode(Comment(data), self.soup)
-
-    def fragmentClass(self):
-        self.soup = BeautifulSoup("")
-        self.soup.name = "[document_fragment]"
-        return Element(self.soup, self.soup, None)
-
-    def appendChild(self, node):
-        self.soup.insert(len(self.soup.contents), node.element)
-
-    def testSerializer(self, element):
-        return testSerializer(element)
-
-    def getDocument(self):
-        return self.soup
-
-    def getFragment(self):
-        return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
-
-class AttrList(object):
-    def __init__(self, element):
-        self.element = element
-        self.attrs = dict(self.element.attrs)
-    def __iter__(self):
-        return self.attrs.items().__iter__()
-    def __setitem__(self, name, value):
-        "set attr", name, value
-        self.element[name] = value
-    def items(self):
-        return self.attrs.items()
-    def keys(self):
-        return self.attrs.keys()
-    def __getitem__(self, name):
-        return self.attrs[name]
-    def __contains__(self, name):
-        return name in self.attrs.keys()
-
-
-class Element(html5lib.treebuilders._base.Node):
-    def __init__(self, element, soup, namespace):
-        html5lib.treebuilders._base.Node.__init__(self, element.name)
-        self.element = element
-        self.soup = soup
-        self.namespace = namespace
-
-    def _nodeIndex(self, node, refNode):
-        # Finds a node by identity rather than equality
-        for index in range(len(self.element.contents)):
-            if id(self.element.contents[index]) == id(refNode.element):
-                return index
-        return None
-
-    def appendChild(self, node):
-        if (node.element.__class__ == NavigableString and self.element.contents
-            and self.element.contents[-1].__class__ == NavigableString):
-            # Concatenate new text onto old text node
-            # (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...")
-            newStr = NavigableString(self.element.contents[-1]+node.element)
-
-            # Remove the old text node
-            # (Can't simply use .extract() by itself, because it fails if
-            # an equal text node exists within the parent node)
-            oldElement = self.element.contents[-1]
-            del self.element.contents[-1]
-            oldElement.parent = None
-            oldElement.extract()
-
-            self.element.insert(len(self.element.contents), newStr)
-        else:
-            self.element.insert(len(self.element.contents), node.element)
-            node.parent = self
-
-    def getAttributes(self):
-        return AttrList(self.element)
-
-    def setAttributes(self, attributes):
-        if attributes is not None and attributes != {}:
-            for name, value in attributes.items():
-                self.element[name] =  value
-            # The attributes may contain variables that need substitution.
-            # Call set_up_substitutions manually.
-            # The Tag constructor calls this method automatically,
-            # but html5lib creates a Tag object before setting up
-            # the attributes.
-            self.element.contains_substitutions = (
-                self.soup.builder.set_up_substitutions(
-                    self.element))
-    attributes = property(getAttributes, setAttributes)
-
-    def insertText(self, data, insertBefore=None):
-        text = TextNode(NavigableString(data), self.soup)
-        if insertBefore:
-            self.insertBefore(text, insertBefore)
-        else:
-            self.appendChild(text)
-
-    def insertBefore(self, node, refNode):
-        index = self._nodeIndex(node, refNode)
-        if (node.element.__class__ == NavigableString and self.element.contents
-            and self.element.contents[index-1].__class__ == NavigableString):
-            # (See comments in appendChild)
-            newStr = NavigableString(self.element.contents[index-1]+node.element)
-            oldNode = self.element.contents[index-1]
-            del self.element.contents[index-1]
-            oldNode.parent = None
-            oldNode.extract()
-
-            self.element.insert(index-1, newStr)
-        else:
-            self.element.insert(index, node.element)
-            node.parent = self
-
-    def removeChild(self, node):
-        index = self._nodeIndex(node.parent, node)
-        del node.parent.element.contents[index]
-        node.element.parent = None
-        node.element.extract()
-        node.parent = None
-
-    def reparentChildren(self, newParent):
-        while self.element.contents:
-            child = self.element.contents[0]
-            child.extract()
-            if isinstance(child, Tag):
-                newParent.appendChild(Element(child, self.soup, namespaces["html"]))
-            else:
-                newParent.appendChild(TextNode(child, self.soup))
-
-    def cloneNode(self):
-        node = Element(Tag(self.soup, self.soup.builder, self.element.name), self.soup, self.namespace)
-        for key,value in self.attributes:
-            node.attributes[key] = value
-        return node
-
-    def hasContent(self):
-        return self.element.contents
-
-    def getNameTuple(self):
-        if self.namespace == None:
-            return namespaces["html"], self.name
-        else:
-            return self.namespace, self.name
-
-    nameTuple = property(getNameTuple)
-
-class TextNode(Element):
-    def __init__(self, element, soup):
-        html5lib.treebuilders._base.Node.__init__(self, None)
-        self.element = element
-        self.soup = soup
-
-    def cloneNode(self):
-        raise NotImplementedError
diff --git a/beautifulsoup/builder/_lxml.py b/beautifulsoup/builder/_lxml.py
deleted file mode 100644
index 23ac485..0000000
--- a/beautifulsoup/builder/_lxml.py
+++ /dev/null
@@ -1,108 +0,0 @@
-__all__ = [
-    'LXMLTreeBuilderForXML',
-    'LXMLTreeBuilder',
-    ]
-
-from lxml import etree
-from beautifulsoup.element import Comment, Doctype
-from beautifulsoup.builder import (
-    FAST,
-    HTML,
-    HTMLTreeBuilder,
-    PERMISSIVE,
-    TreeBuilder,
-    XML)
-from beautifulsoup.dammit import UnicodeDammit
-import types
-
-LXML = 'lxml'
-
-class LXMLTreeBuilderForXML(TreeBuilder):
-    DEFAULT_PARSER_CLASS = etree.XMLParser
-
-    is_xml = True
-
-    # Well, it's permissive by XML parser standards.
-    features = [LXML, XML, FAST, PERMISSIVE]
-
-    @property
-    def default_parser(self):
-        # This can either return a parser object or a class, which
-        # will be instantiated with default arguments.
-        return etree.XMLParser(target=self, strip_cdata=False, recover=True)
-
-    def __init__(self, parser=None, empty_element_tags=None):
-        if empty_element_tags is not None:
-            self.empty_element_tags = set(empty_element_tags)
-        if parser is None:
-            # Use the default parser.
-            parser = self.default_parser
-        if callable(parser):
-            # Instantiate the parser with default arguments
-            parser = parser(target=self, strip_cdata=False)
-        self.parser = parser
-        self.soup = None
-
-    def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
-        """
-        :return: A 3-tuple (markup, original encoding, encoding
-        declared within markup).
-        """
-        if isinstance(markup, unicode):
-            return markup, None, None
-
-        try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, isHTML=True)
-        return (dammit.markup, dammit.original_encoding,
-                dammit.declared_html_encoding)
-
-    def feed(self, markup):
-        self.parser.feed(markup)
-        self.parser.close()
-
-    def close(self):
-        pass
-
-    def start(self, name, attrs):
-        self.soup.handle_starttag(name, attrs)
-
-    def end(self, name):
-        self.soup.endData()
-        completed_tag = self.soup.tagStack[-1]
-        self.soup.handle_endtag(name)
-
-    def pi(self, target, data):
-        pass
-
-    def data(self, content):
-        self.soup.handle_data(content)
-
-    def doctype(self, name, pubid, system):
-        self.soup.endData()
-        doctype = Doctype.for_name_and_ids(name, pubid, system)
-        self.soup.object_was_parsed(doctype)
-
-    def comment(self, content):
-        "Handle comments as Comment objects."
-        self.soup.endData()
-        self.soup.handle_data(content)
-        self.soup.endData(Comment)
-
-    def test_fragment_to_document(self, fragment):
-        """See `TreeBuilder`."""
-        return u'<?xml version="1.0" encoding="utf-8">\n%s' % fragment
-
-
-class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
-
-    features = [LXML, HTML, FAST]
-    is_xml = False
-
-    @property
-    def default_parser(self):
-        return etree.HTMLParser
-
-    def test_fragment_to_document(self, fragment):
-        """See `TreeBuilder`."""
-        return u'<html><body>%s</body></html>' % fragment