diff options
Diffstat (limited to 'src/beautifulsoup/builder/__init__.py')
-rw-r--r-- | src/beautifulsoup/builder/__init__.py | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py index 544e896..cf54c9c 100644 --- a/src/beautifulsoup/builder/__init__.py +++ b/src/beautifulsoup/builder/__init__.py @@ -1,8 +1,10 @@ from beautifulsoup.element import Entities -__all__ = ['TreeBuilder', - 'HTMLTreeBuilder', - ] +__all__ = [ + 'HTMLTreeBuilder', + 'SAXTreeBuilder', + 'TreeBuilder', + ] class TreeBuilder(Entities): """Turn a document into a Beautiful Soup object tree.""" @@ -25,6 +27,52 @@ class TreeBuilder(Entities): raise NotImplementedError() +class SAXTreeBuilder(TreeBuilder): + """A Beautiful Soup treebuilder that listens for SAX events.""" + + def feed(self, markup): + raise NotImplementedError() + + def close(self): + pass + + def startElement(self, name, attrs): + attrs = dict((key[1], value) for key, value in attrs.items()) + #print "Start %s, %r" % (name, attrs) + self.soup.handle_starttag(name, attrs) + + def endElement(self, name): + #print "End %s" % name + self.soup.handle_endtag(name) + + def startElementNS(self, nsTuple, nodeName, attrs): + # Throw away (ns, nodeName) for now. + self.startElement(nodeName, attrs) + + def endElementNS(self, nsTuple, nodeName): + # Throw away (ns, nodeName) for now. + self.endElement(nodeName) + #handler.endElementNS((ns, node.nodeName), node.nodeName) + + def startPrefixMapping(self, prefix, nodeValue): + # Ignore the prefix for now. + pass + + def endPrefixMapping(self, prefix): + # Ignore the prefix for now. + # handler.endPrefixMapping(prefix) + pass + + def characters(self, content): + self.soup.handle_data(content) + + def startDocument(self): + pass + + def endDocument(self): + pass + + class HTMLTreeBuilder(TreeBuilder): """This TreeBuilder knows facts about HTML. |