summaryrefslogtreecommitdiff
path: root/beautifulsoup
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2011-02-11 09:10:56 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2011-02-11 09:10:56 -0500
commitd0531c4204a67a4289025bf7108a922f680fa057 (patch)
treecdad3f97812e658d84a611b6017b7198fd97d818 /beautifulsoup
parent3366ad67dc2dfdd508267efc87dfc851b612fb0d (diff)
parentd89c8878ea86a2575c87e9fad8081cfcd81e0bcd (diff)
Ported some more tests, fixed an encoding problem, and added rudimentary doctype handling.
Diffstat (limited to 'beautifulsoup')
-rw-r--r--beautifulsoup/builder/lxml_builder.py41
-rw-r--r--beautifulsoup/element.py8
2 files changed, 46 insertions, 3 deletions
diff --git a/beautifulsoup/builder/lxml_builder.py b/beautifulsoup/builder/lxml_builder.py
new file mode 100644
index 0000000..8336ab4
--- /dev/null
+++ b/beautifulsoup/builder/lxml_builder.py
@@ -0,0 +1,41 @@
+from lxml import etree
+from beautifulsoup.element import Comment, Doctype
+from beautifulsoup.builder import HTMLTreeBuilder
+
+class LXMLTreeBuilder(HTMLTreeBuilder):
+
+ def __init__(self, parser_class=etree.HTMLParser):
+ self.parser = parser_class(target=self)
+ self.soup = None
+
+ def feed(self, markup):
+ self.parser.feed(markup)
+ self.parser.close()
+
+ def close(self):
+ pass
+
+ def start(self, name, attrs):
+ self.soup.handle_starttag(name, attrs)
+
+ def end(self, name):
+ self.soup.handle_endtag(name)
+
+ def data(self, content):
+ self.soup.handle_data(content)
+
+ def doctype(self, name, pubid, system):
+ self.soup.endData()
+ self.soup.handle_data(name)
+ self.soup.endData(Doctype)
+
+ def comment(self, content):
+ "Handle comments as Comment objects."
+ self.soup.endData()
+ self.soup.handle_data(content)
+ self.soup.endData(Comment)
+
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return u'<html><body>%s</body></html>' % fragment
+
diff --git a/beautifulsoup/element.py b/beautifulsoup/element.py
index bd9bcbf..b2e0e12 100644
--- a/beautifulsoup/element.py
+++ b/beautifulsoup/element.py
@@ -346,9 +346,6 @@ class NavigableString(unicode, PageElement):
else:
raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
- def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return self.decode().encode(encoding)
-
def decodeGivenEventualEncoding(self, eventualEncoding):
return self
@@ -373,6 +370,11 @@ class Declaration(NavigableString):
def decodeGivenEventualEncoding(self, eventualEncoding):
return u'<!' + self + u'>'
+class Doctype(NavigableString):
+
+ def decodeGivenEventualEncoding(self, eventualEncoding):
+ return u'<!DOCTYPE ' + self + u'>'
+
class Tag(PageElement, Entities):
"""Represents a found HTML tag with its attributes and contents."""