summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/beautifulsoup/builder/lxml_builder.py35
-rw-r--r--src/beautifulsoup/tests/test_lxml.py6
-rw-r--r--src/beautifulsoup/tests/treebuilder.py30
3 files changed, 71 insertions, 0 deletions
diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py
new file mode 100644
index 0000000..2963a02
--- /dev/null
+++ b/src/beautifulsoup/builder/lxml_builder.py
@@ -0,0 +1,35 @@
+from lxml import etree
+from beautifulsoup.element import Comment
+from beautifulsoup.builder import TreeBuilder
+
+class LXMLTreeBuilder(TreeBuilder):
+
+ def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=[]):
+ self.parser = parser_class(target=self)
+ self.self_closing_tags = self_closing_tags
+ self.soup = None
+
+ def isSelfClosingTag(self, name):
+ return name in self.self_closing_tags
+
+ def feed(self, markup):
+ self.parser.feed(markup)
+ self.parser.close()
+
+ def close(self):
+ pass
+
+ def start(self, name, attrs):
+ self.soup.handle_starttag(name, attrs)
+
+ def end(self, name):
+ self.soup.handle_endtag(name)
+
+ def data(self, content):
+ self.soup.handle_data(content)
+
+ def comment(self, content):
+ "Handle comments as Comment objects."
+ self.soup.endData()
+ self.soup.handle_data(content)
+ self.soup.endData(Comment)
diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py
new file mode 100644
index 0000000..5211301
--- /dev/null
+++ b/src/beautifulsoup/tests/test_lxml.py
@@ -0,0 +1,6 @@
+from treebuilder import CompatibilityTest
+from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
+import unittest
+
+def additional_tests():
+ return unittest.TestSuite([CompatibilityTest(LXMLTreeBuilder())])
diff --git a/src/beautifulsoup/tests/treebuilder.py b/src/beautifulsoup/tests/treebuilder.py
new file mode 100644
index 0000000..25be325
--- /dev/null
+++ b/src/beautifulsoup/tests/treebuilder.py
@@ -0,0 +1,30 @@
+"""Tree builder compatibility suite.
+
+If you create a tree builder class, also create a test suite that
+subclasses this one. This test suite will parse various bits of
+well-formed HTML with your tree builder. Not every tree builder will
+handle bad HTML in the same way, but every builder should be able to
+handle _good_ HTML in the same way.
+"""
+
+import unittest
+from beautifulsoup import BeautifulSoup
+from test_soup import SoupTest
+
+class CompatibilityTest(SoupTest):
+
+ def __init__(self, builder):
+ self.builder = builder
+
+ _testMethodName = "test"
+
+ def test(self):
+ self.test_bare_string()
+ self.test_tag_nesting()
+
+ def test_bare_string(self):
+ self.assertSoupEquals("A bare string")
+
+ def test_tag_nesting(self):
+ self.assertSoupEquals("<b>Inside a B tag</b>")
+ self.assertSoupEquals("<p>A <i>nested <b>tag</b></i></p>")