diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/beautifulsoup/builder/lxml_builder.py | 35 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_lxml.py | 6 | ||||
-rw-r--r-- | src/beautifulsoup/tests/treebuilder.py | 30 |
3 files changed, 71 insertions, 0 deletions
diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py new file mode 100644 index 0000000..2963a02 --- /dev/null +++ b/src/beautifulsoup/builder/lxml_builder.py @@ -0,0 +1,35 @@ +from lxml import etree +from beautifulsoup.element import Comment +from beautifulsoup.builder import TreeBuilder + +class LXMLTreeBuilder(TreeBuilder): + + def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=[]): + self.parser = parser_class(target=self) + self.self_closing_tags = self_closing_tags + self.soup = None + + def isSelfClosingTag(self, name): + return name in self.self_closing_tags + + def feed(self, markup): + self.parser.feed(markup) + self.parser.close() + + def close(self): + pass + + def start(self, name, attrs): + self.soup.handle_starttag(name, attrs) + + def end(self, name): + self.soup.handle_endtag(name) + + def data(self, content): + self.soup.handle_data(content) + + def comment(self, content): + "Handle comments as Comment objects." + self.soup.endData() + self.soup.handle_data(content) + self.soup.endData(Comment) diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py new file mode 100644 index 0000000..5211301 --- /dev/null +++ b/src/beautifulsoup/tests/test_lxml.py @@ -0,0 +1,6 @@ +from treebuilder import CompatibilityTest +from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder +import unittest + +def additional_tests(): + return unittest.TestSuite([CompatibilityTest(LXMLTreeBuilder())]) diff --git a/src/beautifulsoup/tests/treebuilder.py b/src/beautifulsoup/tests/treebuilder.py new file mode 100644 index 0000000..25be325 --- /dev/null +++ b/src/beautifulsoup/tests/treebuilder.py @@ -0,0 +1,30 @@ +"""Tree builder compatibility suite. + +If you create a tree builder class, also create a test suite that +subclasses this one. This test suite will parse various bits of +well-formed HTML with your tree builder. Not every tree builder will +handle bad HTML in the same way, but every builder should be able to +handle _good_ HTML in the same way. +""" + +import unittest +from beautifulsoup import BeautifulSoup +from test_soup import SoupTest + +class CompatibilityTest(SoupTest): + + def __init__(self, builder): + self.builder = builder + + _testMethodName = "test" + + def test(self): + self.test_bare_string() + self.test_tag_nesting() + + def test_bare_string(self): + self.assertSoupEquals("A bare string") + + def test_tag_nesting(self): + self.assertSoupEquals("<b>Inside a B tag</b>") + self.assertSoupEquals("<p>A <i>nested <b>tag</b></i></p>") |