summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2009-04-09 12:14:58 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2009-04-09 12:14:58 -0400
commit4440f31504605ac7572ca8d84d4cbfd7af19aa1e (patch)
tree8fc50a3382e442f6234d5e0ea8c38df04f9857cd
parent09516183e935676302431fcdd04b08d40f0ed2e7 (diff)
Minor cleanup.
-rw-r--r--BeautifulSoup.py10
-rw-r--r--lxml_builder.py14
-rw-r--r--lxml_test.py2
3 files changed, 13 insertions, 13 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py
index a16448c..99ce460 100644
--- a/BeautifulSoup.py
+++ b/BeautifulSoup.py
@@ -1012,13 +1012,16 @@ class TreeBuilder(Entities):
self_closing_tags = set()
assume_html = False
+ def __init__(self):
+ self.soup = None
+
def isSelfClosingTag(self, name):
return name in self.self_closing_tags
def reset(self):
pass
- def close(self):
+ def feed(self):
pass
@@ -1469,7 +1472,6 @@ class BeautifulStoneSoup(Tag):
except StopParsing:
pass
self.markup = None # The markup can now be GCed.
- self.builder.close()
self.builder.soup = None
self.builder = None # So can the builder.
@@ -1654,7 +1656,7 @@ class BeautifulStoneSoup(Tag):
class BeautifulSoup(BeautifulStoneSoup):
-
+ """A convenience class for parsing HTML without creating a builder."""
def _defaultBuilder(self):
return HTMLParserBuilder()
@@ -1670,7 +1672,7 @@ class StopParsing(Exception):
# or Unicode). It is heavily based on code from Mark Pilgrim's
# Universal Feed Parser. It does not rewrite the XML or HTML to
# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
-# (XML) and BeautifulSoup.start_meta (HTML).
+# (XML) and BeautifulSoup.handleSpecialMetaTag (HTML).
# Autodetects character encodings.
# Download from http://chardet.feedparser.org/
diff --git a/lxml_builder.py b/lxml_builder.py
index 95988e4..16c60f4 100644
--- a/lxml_builder.py
+++ b/lxml_builder.py
@@ -1,5 +1,5 @@
from lxml import etree
-from BeautifulSoup import TreeBuilder
+from BeautifulSoup import TreeBuilder, Comment
class LXMLBuilder(TreeBuilder):
@@ -15,6 +15,9 @@ class LXMLBuilder(TreeBuilder):
self.parser.feed(markup)
self.parser.close()
+ def close(self):
+ pass
+
def start(self, name, attrs):
self.soup.handle_starttag(name, attrs)
@@ -26,11 +29,6 @@ class LXMLBuilder(TreeBuilder):
def comment(self, content):
"Handle comments as Comment objects."
- self._toStringSubclass(content, Comment)
-
- def _toStringSubclass(self, text, subclass):
- """Adds a certain piece of text to the tree as a NavigableString
- subclass."""
self.soup.endData()
- self.data(text)
- self.soup.endData(subclass)
+ self.soup.handle_data(content)
+ self.soup.endData(Comment)
diff --git a/lxml_test.py b/lxml_test.py
index 77c04e8..35880fb 100644
--- a/lxml_test.py
+++ b/lxml_test.py
@@ -9,5 +9,5 @@ soup = BeautifulSoup("<foo>bar</foo>", builder=builder)
print soup.prettify()
builder = LXMLBuilder(parser_class=etree.HTMLParser, self_closing_tags=["br"])
-soup = BeautifulSoup("<html><head><title>test<body><h1>page<script>foo<b>bar</script><br />title</h1>", builder=builder)
+soup = BeautifulSoup("<html><head><title>test<body><h1>page<!--Comment--><script>foo<b>bar</script><br />title</h1>", builder=builder)
print soup.prettify()