summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2010-12-29 10:38:46 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2010-12-29 10:38:46 -0500
commit7ba9e49cada146978c1f02d9c28430fdcf56ab72 (patch)
tree77fd6a9d2ab9940569e21e0bcd6b29fed33b03ee /src
parent017a21625f347665ad23da6dd109b9af29b4b443 (diff)
Refactored the code that turns HTML fragments into parser-specific documents for test purposes.
Diffstat (limited to 'src')
-rw-r--r--src/beautifulsoup/builder/__init__.py13
-rw-r--r--src/beautifulsoup/builder/html5lib_builder.py3
-rw-r--r--src/beautifulsoup/builder/lxml_builder.py5
-rw-r--r--src/beautifulsoup/tests/helpers.py10
-rw-r--r--src/beautifulsoup/tests/test_lxml.py20
-rw-r--r--src/beautifulsoup/tests/test_soup.py6
6 files changed, 36 insertions, 21 deletions
diff --git a/src/beautifulsoup/builder/__init__.py b/src/beautifulsoup/builder/__init__.py
index cf54c9c..b7db8db 100644
--- a/src/beautifulsoup/builder/__init__.py
+++ b/src/beautifulsoup/builder/__init__.py
@@ -26,6 +26,19 @@ class TreeBuilder(Entities):
def feed(self, markup):
raise NotImplementedError()
+ def test_fragment_to_document(self, fragment):
+ """Wrap an HTML fragment to make it look like a document.
+
+ Different parsers do this differently. For instance, lxml
+ introduces an empty <head> tag, and html5lib
+ doesn't. Abstracting this away lets us write simple tests
+ which run HTML fragments through the parser and compare the
+ results against other HTML fragments.
+
+ This method should not be used outside of tests.
+ """
+ return fragment
+
class SAXTreeBuilder(TreeBuilder):
"""A Beautiful Soup treebuilder that listens for SAX events."""
diff --git a/src/beautifulsoup/builder/html5lib_builder.py b/src/beautifulsoup/builder/html5lib_builder.py
index b4ef4de..80c3e6d 100644
--- a/src/beautifulsoup/builder/html5lib_builder.py
+++ b/src/beautifulsoup/builder/html5lib_builder.py
@@ -21,4 +21,7 @@ class HTML5TreeBuilder(SAXTreeBuilder, HTMLTreeBuilder):
walker = treewalkers.getTreeWalker('dom')
dom2sax(doc, self)
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return u'<html><head></head><body>%s</body></html>' % fragment
diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py
index 4949fea..3e1de5f 100644
--- a/src/beautifulsoup/builder/lxml_builder.py
+++ b/src/beautifulsoup/builder/lxml_builder.py
@@ -29,3 +29,8 @@ class LXMLTreeBuilder(HTMLTreeBuilder):
self.soup.endData()
self.soup.handle_data(content)
self.soup.endData(Comment)
+
+ def test_fragment_to_document(self, fragment):
+ """See `TreeBuilder`."""
+ return u'<html><body>%s</body></html>' % fragment
+
diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py
index 219d95c..d237556 100644
--- a/src/beautifulsoup/tests/helpers.py
+++ b/src/beautifulsoup/tests/helpers.py
@@ -3,15 +3,21 @@
import unittest
from beautifulsoup import BeautifulSoup
from beautifulsoup.element import SoupStrainer
+from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
class SoupTest(unittest.TestCase):
default_builder = None
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
- obj = BeautifulSoup(to_parse, builder=self.default_builder)
+ builder = self.default_builder
+ if builder is None:
+ builder = LXMLTreeBuilder()
+ obj = BeautifulSoup(to_parse, builder=builder)
if compare_parsed_to is None:
compare_parsed_to = to_parse
- self.assertEquals(obj.decode(), compare_parsed_to)
+ self.assertEquals(
+ obj.decode(),
+ builder.test_fragment_to_document(compare_parsed_to))
diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py
index bbbbe18..12fd31e 100644
--- a/src/beautifulsoup/tests/test_lxml.py
+++ b/src/beautifulsoup/tests/test_lxml.py
@@ -6,38 +6,26 @@ import unittest
class TestLXMLBuilder(SoupTest):
- def __init__(self, builder):
- super(TestLXMLBuilder, self).__init__()
- self.default_builder = LXMLTreeBuilder()
-
def runTest(self):
self.test_bare_string()
self.test_tag_nesting()
self.test_self_closing()
self.test_soupstrainer()
- def document_for(self, s):
- """Turn a fragment into an HTML document.
-
- lxml does this to HTML fragments it receives, so we need to do it
- if we're going to understand what comes out of lxml.
- """
- return u'<html><body>%s</body></html>' % s
-
def test_bare_string(self):
self.assertSoupEquals(
- "A bare string", self.document_for("<p>A bare string</p>"))
+ "A bare string", "<p>A bare string</p>")
def test_tag_nesting(self):
b_tag = "<b>Inside a B tag</b>"
- self.assertSoupEquals(b_tag, self.document_for(b_tag))
+ self.assertSoupEquals(b_tag)
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
- self.assertSoupEquals(nested_b_tag, self.document_for(nested_b_tag))
+ self.assertSoupEquals(nested_b_tag)
def test_self_closing(self):
self.assertSoupEquals(
- "<p>A <meta> tag</p>", self.document_for("<p>A <meta /> tag</p>"))
+ "<p>A <meta> tag</p>", "<p>A <meta /> tag</p>")
def test_soupstrainer(self):
strainer = SoupStrainer("b")
diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py
index 80357f0..571164a 100644
--- a/src/beautifulsoup/tests/test_soup.py
+++ b/src/beautifulsoup/tests/test_soup.py
@@ -41,9 +41,9 @@ class FollowThatTag(SoupTest):
def setUp(self):
ml = """
<a id="x">1</a>
- <A id="a">2</a>
- <b id="b">3</a>
- <b href="foo" id="x">4</a>
+ <A id="a">2</A>
+ <b id="b">3</b>
+ <b href="foo" id="x">4</b>
<ac width=100>4</ac>"""
self.soup = BeautifulStoneSoup(ml)