diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2010-12-29 10:38:46 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2010-12-29 10:38:46 -0500 |
commit | 7ba9e49cada146978c1f02d9c28430fdcf56ab72 (patch) | |
tree | 77fd6a9d2ab9940569e21e0bcd6b29fed33b03ee /src | |
parent | 017a21625f347665ad23da6dd109b9af29b4b443 (diff) |
Refactored the code that turns HTML fragments into parser-specific documents for test purposes.
Diffstat (limited to 'src')
-rw-r--r-- | src/beautifulsoup/builder/__init__.py | 13 | ||||
-rw-r--r-- | src/beautifulsoup/builder/html5lib_builder.py | 3 | ||||
-rw-r--r-- | src/beautifulsoup/builder/lxml_builder.py | 5 | ||||
-rw-r--r-- | src/beautifulsoup/tests/helpers.py | 10 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_lxml.py | 20 | ||||
-rw-r--r-- | src/beautifulsoup/tests/test_soup.py | 6 |
6 files changed, 36 insertions, 21 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py index cf54c9c..b7db8db 100644 --- a/src/beautifulsoup/builder/__init__.py +++ b/src/beautifulsoup/builder/__init__.py @@ -26,6 +26,19 @@ class TreeBuilder(Entities): def feed(self, markup): raise NotImplementedError() + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty <head> tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + """ + return fragment + class SAXTreeBuilder(TreeBuilder): """A Beautiful Soup treebuilder that listens for SAX events.""" diff --git a/src/beautifulsoup/builder/html5lib_builder.py b/src/beautifulsoup/builder/html5lib_builder.py index b4ef4de..80c3e6d 100644 --- a/src/beautifulsoup/builder/html5lib_builder.py +++ b/src/beautifulsoup/builder/html5lib_builder.py @@ -21,4 +21,7 @@ class HTML5TreeBuilder(SAXTreeBuilder, HTMLTreeBuilder): walker = treewalkers.getTreeWalker('dom') dom2sax(doc, self) + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'<html><head></head><body>%s</body></html>' % fragment diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py index 4949fea..3e1de5f 100644 --- a/src/beautifulsoup/builder/lxml_builder.py +++ b/src/beautifulsoup/builder/lxml_builder.py @@ -29,3 +29,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder): self.soup.endData() self.soup.handle_data(content) self.soup.endData(Comment) + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return u'<html><body>%s</body></html>' % fragment + diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py index 219d95c..d237556 100644 --- a/src/beautifulsoup/tests/helpers.py +++ b/src/beautifulsoup/tests/helpers.py @@ -3,15 +3,21 @@ import unittest from beautifulsoup import BeautifulSoup from beautifulsoup.element import SoupStrainer +from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder class SoupTest(unittest.TestCase): default_builder = None def assertSoupEquals(self, to_parse, compare_parsed_to=None): - obj = BeautifulSoup(to_parse, builder=self.default_builder) + builder = self.default_builder + if builder is None: + builder = LXMLTreeBuilder() + obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse - self.assertEquals(obj.decode(), compare_parsed_to) + self.assertEquals( + obj.decode(), + builder.test_fragment_to_document(compare_parsed_to)) diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py index bbbbe18..12fd31e 100644 --- a/src/beautifulsoup/tests/test_lxml.py +++ b/src/beautifulsoup/tests/test_lxml.py @@ -6,38 +6,26 @@ import unittest class TestLXMLBuilder(SoupTest): - def __init__(self, builder): - super(TestLXMLBuilder, self).__init__() - self.default_builder = LXMLTreeBuilder() - def runTest(self): self.test_bare_string() self.test_tag_nesting() self.test_self_closing() self.test_soupstrainer() - def document_for(self, s): - """Turn a fragment into an HTML document. - - lxml does this to HTML fragments it receives, so we need to do it - if we're going to understand what comes out of lxml. - """ - return u'<html><body>%s</body></html>' % s - def test_bare_string(self): self.assertSoupEquals( - "A bare string", self.document_for("<p>A bare string</p>")) + "A bare string", "<p>A bare string</p>") def test_tag_nesting(self): b_tag = "<b>Inside a B tag</b>" - self.assertSoupEquals(b_tag, self.document_for(b_tag)) + self.assertSoupEquals(b_tag) nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>" - self.assertSoupEquals(nested_b_tag, self.document_for(nested_b_tag)) + self.assertSoupEquals(nested_b_tag) def test_self_closing(self): self.assertSoupEquals( - "<p>A <meta> tag</p>", self.document_for("<p>A <meta /> tag</p>")) + "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>") def test_soupstrainer(self): strainer = SoupStrainer("b") diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py index 80357f0..571164a 100644 --- a/src/beautifulsoup/tests/test_soup.py +++ b/src/beautifulsoup/tests/test_soup.py @@ -41,9 +41,9 @@ class FollowThatTag(SoupTest): def setUp(self): ml = """ <a id="x">1</a> - <A id="a">2</a> - <b id="b">3</a> - <b href="foo" id="x">4</a> + <A id="a">2</A> + <b id="b">3</b> + <b href="foo" id="x">4</b> <ac width=100>4</ac>""" self.soup = BeautifulStoneSoup(ml) |