From 7ba9e49cada146978c1f02d9c28430fdcf56ab72 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 29 Dec 2010 10:38:46 -0500 Subject: Refactored the code that turns HTML fragments into parser-specific documents for test purposes. --- src/beautifulsoup/builder/__init__.py | 13 +++++++++++++ src/beautifulsoup/builder/html5lib_builder.py | 3 +++ src/beautifulsoup/builder/lxml_builder.py | 5 +++++ src/beautifulsoup/tests/helpers.py | 10 ++++++++-- src/beautifulsoup/tests/test_lxml.py | 20 ++++---------------- src/beautifulsoup/tests/test_soup.py | 6 +++--- 6 files changed, 36 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py index cf54c9c..b7db8db 100644 --- a/src/beautifulsoup/builder/__init__.py +++ b/src/beautifulsoup/builder/__init__.py @@ -26,6 +26,19 @@ class TreeBuilder(Entities): def feed(self, markup): raise NotImplementedError() + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + """ + return fragment + class SAXTreeBuilder(TreeBuilder): """A Beautiful Soup treebuilder that listens for SAX events.""" diff --git a/src/beautifulsoup/builder/html5lib_builder.py b/src/beautifulsoup/builder/html5lib_builder.py index b4ef4de..80c3e6d 100644 --- a/src/beautifulsoup/builder/html5lib_builder.py +++ b/src/beautifulsoup/builder/html5lib_builder.py @@ -21,4 +21,7 @@ class HTML5TreeBuilder(SAXTreeBuilder, HTMLTreeBuilder): walker = treewalkers.getTreeWalker('dom') dom2sax(doc, self) + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'%s' % fragment diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py index 4949fea..3e1de5f 100644 --- a/src/beautifulsoup/builder/lxml_builder.py +++ b/src/beautifulsoup/builder/lxml_builder.py @@ -29,3 +29,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder): self.soup.endData() self.soup.handle_data(content) self.soup.endData(Comment) + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'%s' % fragment + diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py index 219d95c..d237556 100644 --- a/src/beautifulsoup/tests/helpers.py +++ b/src/beautifulsoup/tests/helpers.py @@ -3,15 +3,21 @@ import unittest from beautifulsoup import BeautifulSoup from beautifulsoup.element import SoupStrainer +from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): default_builder = None def assertSoupEquals(self, to_parse, compare_parsed_to=None): - obj = BeautifulSoup(to_parse, builder=self.default_builder) + builder = self.default_builder + if builder is None: + builder = LXMLTreeBuilder() + obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse - self.assertEquals(obj.decode(), compare_parsed_to) + self.assertEquals( + obj.decode(), + builder.test_fragment_to_document(compare_parsed_to)) diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py index bbbbe18..12fd31e 100644 --- a/src/beautifulsoup/tests/test_lxml.py +++ b/src/beautifulsoup/tests/test_lxml.py @@ -6,38 +6,26 @@ import unittest class TestLXMLBuilder(SoupTest): - def __init__(self, builder): - super(TestLXMLBuilder, self).__init__() - self.default_builder = LXMLTreeBuilder() - def runTest(self): self.test_bare_string() self.test_tag_nesting() self.test_self_closing() self.test_soupstrainer() - def document_for(self, s): - """Turn a fragment into an HTML document. - - lxml does this to HTML fragments it receives, so we need to do it - if we're going to understand what comes out of lxml. - """ - return u'%s' % s - def test_bare_string(self): self.assertSoupEquals( - "A bare string", self.document_for("

A bare string

")) + "A bare string", "

A bare string

") def test_tag_nesting(self): b_tag = "Inside a B tag" - self.assertSoupEquals(b_tag, self.document_for(b_tag)) + self.assertSoupEquals(b_tag) nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag, self.document_for(nested_b_tag)) + self.assertSoupEquals(nested_b_tag) def test_self_closing(self): self.assertSoupEquals( - "

A tag

", self.document_for("

A tag

")) + "

A tag

", "

A tag

") def test_soupstrainer(self): strainer = SoupStrainer("b") diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index 80357f0..571164a 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -41,9 +41,9 @@ class FollowThatTag(SoupTest): def setUp(self): ml = """ 1 - 2 - 3 - 4 + 2 + 3 + 4 4""" self.soup = BeautifulStoneSoup(ml) -- cgit v1.2.3