From 522c72ef59e2b544a30a6bfbc3001dc1e84fed45 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 10 Apr 2009 15:20:58 -0400 Subject: Added a simple compatibility test suite for tree builders. --- src/beautifulsoup/builder/lxml_builder.py | 35 +++++++++++++++++++++++++++++++ src/beautifulsoup/tests/test_lxml.py | 6 ++++++ src/beautifulsoup/tests/treebuilder.py | 30 ++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 src/beautifulsoup/builder/lxml_builder.py create mode 100644 src/beautifulsoup/tests/test_lxml.py create mode 100644 src/beautifulsoup/tests/treebuilder.py (limited to 'src') diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py new file mode 100644 index 0000000..2963a02 --- /dev/null +++ b/src/beautifulsoup/builder/lxml_builder.py @@ -0,0 +1,35 @@ +from lxml import etree +from beautifulsoup.element import Comment +from beautifulsoup.builder import TreeBuilder + +class LXMLTreeBuilder(TreeBuilder): + + def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=[]): + self.parser = parser_class(target=self) + self.self_closing_tags = self_closing_tags + self.soup = None + + def isSelfClosingTag(self, name): + return name in self.self_closing_tags + + def feed(self, markup): + self.parser.feed(markup) + self.parser.close() + + def close(self): + pass + + def start(self, name, attrs): + self.soup.handle_starttag(name, attrs) + + def end(self, name): + self.soup.handle_endtag(name) + + def data(self, content): + self.soup.handle_data(content) + + def comment(self, content): + "Handle comments as Comment objects." + self.soup.endData() + self.soup.handle_data(content) + self.soup.endData(Comment) diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py new file mode 100644 index 0000000..5211301 --- /dev/null +++ b/src/beautifulsoup/tests/test_lxml.py @@ -0,0 +1,6 @@ +from treebuilder import CompatibilityTest +from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder +import unittest + +def additional_tests(): + return unittest.TestSuite([CompatibilityTest(LXMLTreeBuilder())]) diff --git a/src/beautifulsoup/tests/treebuilder.py b/src/beautifulsoup/tests/treebuilder.py new file mode 100644 index 0000000..25be325 --- /dev/null +++ b/src/beautifulsoup/tests/treebuilder.py @@ -0,0 +1,30 @@ +"""Tree builder compatibility suite. + +If you create a tree builder class, also create a test suite that +subclasses this one. This test suite will parse various bits of +well-formed HTML with your tree builder. Not every tree builder will +handle bad HTML in the same way, but every builder should be able to +handle _good_ HTML in the same way. +""" + +import unittest +from beautifulsoup import BeautifulSoup +from test_soup import SoupTest + +class CompatibilityTest(SoupTest): + + def __init__(self, builder): + self.builder = builder + + _testMethodName = "test" + + def test(self): + self.test_bare_string() + self.test_tag_nesting() + + def test_bare_string(self): + self.assertSoupEquals("A bare string") + + def test_tag_nesting(self): + self.assertSoupEquals("Inside a B tag") + self.assertSoupEquals("

A nested tag

") -- cgit v1.2.3