diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2009-04-09 12:14:58 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2009-04-09 12:14:58 -0400 |
commit | 4440f31504605ac7572ca8d84d4cbfd7af19aa1e (patch) | |
tree | 8fc50a3382e442f6234d5e0ea8c38df04f9857cd | |
parent | 09516183e935676302431fcdd04b08d40f0ed2e7 (diff) |
Minor cleanup.
-rw-r--r-- | BeautifulSoup.py | 10 | ||||
-rw-r--r-- | lxml_builder.py | 14 | ||||
-rw-r--r-- | lxml_test.py | 2 |
3 files changed, 13 insertions, 13 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py index a16448c..99ce460 100644 --- a/BeautifulSoup.py +++ b/BeautifulSoup.py @@ -1012,13 +1012,16 @@ class TreeBuilder(Entities): self_closing_tags = set() assume_html = False + def __init__(self): + self.soup = None + def isSelfClosingTag(self, name): return name in self.self_closing_tags def reset(self): pass - def close(self): + def feed(self): pass @@ -1469,7 +1472,6 @@ class BeautifulStoneSoup(Tag): except StopParsing: pass self.markup = None # The markup can now be GCed. - self.builder.close() self.builder.soup = None self.builder = None # So can the builder. @@ -1654,7 +1656,7 @@ class BeautifulStoneSoup(Tag): class BeautifulSoup(BeautifulStoneSoup): - + """A convenience class for parsing HTML without creating a builder.""" def _defaultBuilder(self): return HTMLParserBuilder() @@ -1670,7 +1672,7 @@ class StopParsing(Exception): # or Unicode). It is heavily based on code from Mark Pilgrim's # Universal Feed Parser. It does not rewrite the XML or HTML to # reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi -# (XML) and BeautifulSoup.start_meta (HTML). +# (XML) and BeautifulSoup.handleSpecialMetaTag (HTML). # Autodetects character encodings. # Download from http://chardet.feedparser.org/ diff --git a/lxml_builder.py b/lxml_builder.py index 95988e4..16c60f4 100644 --- a/lxml_builder.py +++ b/lxml_builder.py @@ -1,5 +1,5 @@ from lxml import etree -from BeautifulSoup import TreeBuilder +from BeautifulSoup import TreeBuilder, Comment class LXMLBuilder(TreeBuilder): @@ -15,6 +15,9 @@ class LXMLBuilder(TreeBuilder): self.parser.feed(markup) self.parser.close() + def close(self): + pass + def start(self, name, attrs): self.soup.handle_starttag(name, attrs) @@ -26,11 +29,6 @@ class LXMLBuilder(TreeBuilder): def comment(self, content): "Handle comments as Comment objects." - self._toStringSubclass(content, Comment) - - def _toStringSubclass(self, text, subclass): - """Adds a certain piece of text to the tree as a NavigableString - subclass.""" self.soup.endData() - self.data(text) - self.soup.endData(subclass) + self.soup.handle_data(content) + self.soup.endData(Comment) diff --git a/lxml_test.py b/lxml_test.py index 77c04e8..35880fb 100644 --- a/lxml_test.py +++ b/lxml_test.py @@ -9,5 +9,5 @@ soup = BeautifulSoup("<foo>bar</foo>", builder=builder) print soup.prettify() builder = LXMLBuilder(parser_class=etree.HTMLParser, self_closing_tags=["br"]) -soup = BeautifulSoup("<html><head><title>test<body><h1>page<script>foo<b>bar</script><br />title</h1>", builder=builder) +soup = BeautifulSoup("<html><head><title>test<body><h1>page<!--Comment--><script>foo<b>bar</script><br />title</h1>", builder=builder) print soup.prettify() |