summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/beautifulsoup/builder/__init__.py54
-rw-r--r--src/beautifulsoup/builder/html5lib_builder.py48
-rw-r--r--src/beautifulsoup/tests/test_soup.py2
3 files changed, 53 insertions, 51 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py
index 544e896..cf54c9c 100644
--- a/src/beautifulsoup/builder/__init__.py
+++ b/src/beautifulsoup/builder/__init__.py
@@ -1,8 +1,10 @@
from beautifulsoup.element import Entities
-__all__ = ['TreeBuilder',
- 'HTMLTreeBuilder',
- ]
+__all__ = [
+ 'HTMLTreeBuilder',
+ 'SAXTreeBuilder',
+ 'TreeBuilder',
+ ]
class TreeBuilder(Entities):
"""Turn a document into a Beautiful Soup object tree."""
@@ -25,6 +27,52 @@ class TreeBuilder(Entities):
raise NotImplementedError()
+class SAXTreeBuilder(TreeBuilder):
+ """A Beautiful Soup treebuilder that listens for SAX events."""
+
+ def feed(self, markup):
+ raise NotImplementedError()
+
+ def close(self):
+ pass
+
+ def startElement(self, name, attrs):
+ attrs = dict((key[1], value) for key, value in attrs.items())
+ #print "Start %s, %r" % (name, attrs)
+ self.soup.handle_starttag(name, attrs)
+
+ def endElement(self, name):
+ #print "End %s" % name
+ self.soup.handle_endtag(name)
+
+ def startElementNS(self, nsTuple, nodeName, attrs):
+ # Throw away (ns, nodeName) for now.
+ self.startElement(nodeName, attrs)
+
+ def endElementNS(self, nsTuple, nodeName):
+ # Throw away (ns, nodeName) for now.
+ self.endElement(nodeName)
+ #handler.endElementNS((ns, node.nodeName), node.nodeName)
+
+ def startPrefixMapping(self, prefix, nodeValue):
+ # Ignore the prefix for now.
+ pass
+
+ def endPrefixMapping(self, prefix):
+ # Ignore the prefix for now.
+ # handler.endPrefixMapping(prefix)
+ pass
+
+ def characters(self, content):
+ self.soup.handle_data(content)
+
+ def startDocument(self):
+ pass
+
+ def endDocument(self):
+ pass
+
+
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML.
diff --git a/src/beautifulsoup/builder/html5lib_builder.py b/src/beautifulsoup/builder/html5lib_builder.py
index 4f3f686..b4ef4de 100644
--- a/src/beautifulsoup/builder/html5lib_builder.py
+++ b/src/beautifulsoup/builder/html5lib_builder.py
@@ -1,54 +1,8 @@
from html5lib.treebuilders.dom import dom2sax
from html5lib import treewalkers
-from beautifulsoup.element import Comment
-from beautifulsoup.builder import HTMLTreeBuilder, TreeBuilder
+from beautifulsoup.builder import HTMLTreeBuilder, SAXTreeBuilder
import html5lib
-class SAXTreeBuilder(TreeBuilder):
- """A Beautiful Soup treebuilder that listens for SAX events."""
-
- def feed(self, markup):
- raise NotImplementedError()
-
- def close(self):
- pass
-
- def startElement(self, name, attrs):
- attrs = dict((key[1], value) for key, value in attrs.items())
- #print "Start %s, %r" % (name, attrs)
- self.soup.handle_starttag(name, attrs)
-
- def endElement(self, name):
- #print "End %s" % name
- self.soup.handle_endtag(name)
-
- def startElementNS(self, nsTuple, nodeName, attrs):
- # Throw away (ns, nodeName) for now.
- self.startElement(nodeName, attrs)
-
- def endElementNS(self, nsTuple, nodeName):
- # Throw away (ns, nodeName) for now.
- self.endElement(nodeName)
- #handler.endElementNS((ns, node.nodeName), node.nodeName)
-
- def startPrefixMapping(self, prefix, nodeValue):
- # Ignore the prefix for now.
- pass
-
- def endPrefixMapping(self, prefix):
- # Ignore the prefix for now.
- # handler.endPrefixMapping(prefix)
- pass
-
- def characters(self, content):
- self.soup.handle_data(content)
-
- def startDocument(self):
- pass
-
- def endDocument(self):
- pass
-
class HTML5TreeBuilder(SAXTreeBuilder, HTMLTreeBuilder):
"""Use html5lib to build a tree, then turn the parsed tree into
diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py
index 90201a5..80357f0 100644
--- a/src/beautifulsoup/tests/test_soup.py
+++ b/src/beautifulsoup/tests/test_soup.py
@@ -10,7 +10,7 @@ import unittest
from beautifulsoup import *
from beautifulsoup.element import CData, Comment, Declaration, SoupStrainer, Tag
from beautifulsoup.dammit import UnicodeDammit
-from beautifulsoup.builder.html5_builder import HTML5TreeBuilder
+from beautifulsoup.builder.html5lib_builder import HTML5TreeBuilder
def additional_tests():
return unittest.TestLoader().loadTestsFromName(__name__)