diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/beautifulsoup/builder/lxml_builder.py | 6 | ||||
-rw-r--r-- | src/beautifulsoup/tests/helpers.py | 18 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_lxml.py | 56 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_soup.py | 6 | ||||
-rw-r--r-- | src/beautifulsoup/tests/treebuilder.py | 46 |
5 files changed, 79 insertions, 53 deletions
diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py index 2963a02..d8bf3f0 100644 --- a/src/beautifulsoup/builder/lxml_builder.py +++ b/src/beautifulsoup/builder/lxml_builder.py @@ -1,11 +1,13 @@ from lxml import etree from beautifulsoup.element import Comment -from beautifulsoup.builder import TreeBuilder +from beautifulsoup.builder import HTMLParserTreeBuilder, TreeBuilder class LXMLTreeBuilder(TreeBuilder): - def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=[]): + def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=None): self.parser = parser_class(target=self) + if self_closing_tags is None: + self_closing_tags = HTMLParserTreeBuilder.self_closing_tags self.self_closing_tags = self_closing_tags self.soup = None diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py new file mode 100644 index 0000000..b9de4f2 --- /dev/null +++ b/src/beautifulsoup/tests/helpers.py @@ -0,0 +1,18 @@ +"""Helper classes for tests.""" + +import unittest +from beautifulsoup import BeautifulSoup +from beautifulsoup.element import SoupStrainer +from test_soup import SoupTest + +class SoupTest(unittest.TestCase): + + default_builder = None + + def assertSoupEquals(self, to_parse, compare_parsed_to=None): + obj = BeautifulSoup(to_parse, builder=self.default_builder) + if compare_parsed_to is None: + compare_parsed_to = to_parse + + self.assertEquals(obj.decode(), compare_parsed_to) + diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py index 5211301..a69b9aa 100644 --- a/src/beautifulsoup/tests/test_lxml.py +++ b/src/beautifulsoup/tests/test_lxml.py @@ -1,6 +1,56 @@ -from treebuilder import CompatibilityTest +from helpers import SoupTest +from beautifulsoup import BeautifulSoup +from beautifulsoup.element import SoupStrainer from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder import unittest -def additional_tests(): - return unittest.TestSuite([CompatibilityTest(LXMLTreeBuilder())]) +class TestLXMLBuilder(SoupTest): + + def __init__(self, builder): + super(TestLXMLBuilder, self).__init__() + self.default_builder = LXMLTreeBuilder() + + def runTest(self): + self.test_bare_string() + self.test_tag_nesting() + self.test_self_closing() + self.test_soupstrainer() + + def document_for(self, s): + """Turn a fragment into an HTML document. + + lxml does this to HTML fragments it receives, so we need to do it + if we're going to understand what comes out of lxml. + """ + return u'<html><body>%s</body></html>' % s + + def test_bare_string(self): + self.assertSoupEquals( + "A bare string", self.document_for("<p>A bare string</p>")) + + def test_tag_nesting(self): + b_tag = "<b>Inside a B tag</b>" + self.assertSoupEquals(b_tag, self.document_for(b_tag)) + + nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>" + self.assertSoupEquals(nested_b_tag, self.document_for(nested_b_tag)) + + def test_self_closing(self): + self.assertSoupEquals( + "<p>A <meta> tag</p>", self.document_for("<p>A <meta /> tag</p>")) + + def test_soupstrainer(self): + strainer = SoupStrainer("b") + soup = BeautifulSoup("A <b>bold</b> <i>statement</i>", + self.default_builder, + parseOnlyThese=strainer) + self.assertEquals(soup.decode(), "<b>bold</b>") + + soup = BeautifulSoup("A <b>bold</b> <meta> <i>statement</i>", + self.default_builder, + parseOnlyThese=strainer) + self.assertEquals(soup.decode(), "<b>bold</b>") + + +def test_suite(): + unittest.TestLoader().loadTestsFromName('__name__') diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index ff91104..c5a02b6 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -19,13 +19,15 @@ def additional_tests(): class SoupTest(unittest.TestCase): - def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup, + default_builder = HTMLParserXMLTreeBuilder() + + def assertSoupEquals(self, toParse, rep=None, builder=None, encoding=None): """Parse the given text and make sure its string rep is the other given text.""" if rep == None: rep = toParse - obj = c(toParse) + obj = BeautifulSoup(toParse, builder=self.default_builder) if encoding is None: rep2 = obj.decode() else: diff --git a/src/beautifulsoup/tests/treebuilder.py b/src/beautifulsoup/tests/treebuilder.py deleted file mode 100644 index 489708d..0000000 --- a/src/beautifulsoup/tests/treebuilder.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Tree builder compatibility suite. - -If you create a tree builder class, also create a test suite that -subclasses this one. This test suite will parse various bits of -well-formed HTML with your tree builder. Not every tree builder will -handle bad HTML in the same way, but every builder should be able to -handle _good_ HTML in the same way. -""" - -import unittest -from beautifulsoup import BeautifulSoup -from beautifulsoup.element import SoupStrainer -from test_soup import SoupTest - -class CompatibilityTest(SoupTest): - - def __init__(self, builder): - self.builder = builder - - _testMethodName = "test" - - def test(self): - self.test_bare_string() - self.test_tag_nesting() - self.test_self_closing() - self.test_soupstrainer() - - def test_bare_string(self): - self.assertSoupEquals("A bare string") - - def test_tag_nesting(self): - self.assertSoupEquals("<b>Inside a B tag</b>") - self.assertSoupEquals("<p>A <i>nested <b>tag</b></i></p>") - - def test_self_closing(self): - self.assertSoupEquals("A <meta> tag", "A <meta /> tag") - - def test_soupstrainer(self): - strainer = SoupStrainer("b") - soup = BeautifulSoup("A <b>bold</b> <i>statement</i>", - parseOnlyThese=strainer) - self.assertEquals(soup.decode(), "<b>bold</b>") - - soup = BeautifulSoup("A <b>bold</b> <meta> <i>statement</i>", - parseOnlyThese=strainer) - self.assertEquals(soup.decode(), "<b>bold</b>") |