summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/beautifulsoup/builder/lxml_builder.py6
-rw-r--r--src/beautifulsoup/tests/helpers.py18
-rw-r--r--src/beautifulsoup/tests/test_lxml.py56
-rw-r--r--src/beautifulsoup/tests/test_soup.py6
-rw-r--r--src/beautifulsoup/tests/treebuilder.py46
5 files changed, 79 insertions, 53 deletions
diff --git a/src/beautifulsoup/builder/lxml_builder.py b/src/beautifulsoup/builder/lxml_builder.py
index 2963a02..d8bf3f0 100644
--- a/src/beautifulsoup/builder/lxml_builder.py
+++ b/src/beautifulsoup/builder/lxml_builder.py
@@ -1,11 +1,13 @@
from lxml import etree
from beautifulsoup.element import Comment
-from beautifulsoup.builder import TreeBuilder
+from beautifulsoup.builder import HTMLParserTreeBuilder, TreeBuilder
class LXMLTreeBuilder(TreeBuilder):
- def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=[]):
+ def __init__(self, parser_class=etree.HTMLParser, self_closing_tags=None):
self.parser = parser_class(target=self)
+ if self_closing_tags is None:
+ self_closing_tags = HTMLParserTreeBuilder.self_closing_tags
self.self_closing_tags = self_closing_tags
self.soup = None
diff --git a/src/beautifulsoup/tests/helpers.py b/src/beautifulsoup/tests/helpers.py
new file mode 100644
index 0000000..b9de4f2
--- /dev/null
+++ b/src/beautifulsoup/tests/helpers.py
@@ -0,0 +1,18 @@
+"""Helper classes for tests."""
+
+import unittest
+from beautifulsoup import BeautifulSoup
+from beautifulsoup.element import SoupStrainer
+from test_soup import SoupTest
+
+class SoupTest(unittest.TestCase):
+
+ default_builder = None
+
+ def assertSoupEquals(self, to_parse, compare_parsed_to=None):
+ obj = BeautifulSoup(to_parse, builder=self.default_builder)
+ if compare_parsed_to is None:
+ compare_parsed_to = to_parse
+
+ self.assertEquals(obj.decode(), compare_parsed_to)
+
diff --git a/src/beautifulsoup/tests/test_lxml.py b/src/beautifulsoup/tests/test_lxml.py
index 5211301..a69b9aa 100644
--- a/src/beautifulsoup/tests/test_lxml.py
+++ b/src/beautifulsoup/tests/test_lxml.py
@@ -1,6 +1,56 @@
-from treebuilder import CompatibilityTest
+from helpers import SoupTest
+from beautifulsoup import BeautifulSoup
+from beautifulsoup.element import SoupStrainer
from beautifulsoup.builder.lxml_builder import LXMLTreeBuilder
import unittest
-def additional_tests():
- return unittest.TestSuite([CompatibilityTest(LXMLTreeBuilder())])
+class TestLXMLBuilder(SoupTest):
+
+ def __init__(self, builder):
+ super(TestLXMLBuilder, self).__init__()
+ self.default_builder = LXMLTreeBuilder()
+
+ def runTest(self):
+ self.test_bare_string()
+ self.test_tag_nesting()
+ self.test_self_closing()
+ self.test_soupstrainer()
+
+ def document_for(self, s):
+ """Turn a fragment into an HTML document.
+
+ lxml does this to HTML fragments it receives, so we need to do it
+ if we're going to understand what comes out of lxml.
+ """
+ return u'<html><body>%s</body></html>' % s
+
+ def test_bare_string(self):
+ self.assertSoupEquals(
+ "A bare string", self.document_for("<p>A bare string</p>"))
+
+ def test_tag_nesting(self):
+ b_tag = "<b>Inside a B tag</b>"
+ self.assertSoupEquals(b_tag, self.document_for(b_tag))
+
+ nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
+ self.assertSoupEquals(nested_b_tag, self.document_for(nested_b_tag))
+
+ def test_self_closing(self):
+ self.assertSoupEquals(
+ "<p>A <meta> tag</p>", self.document_for("<p>A <meta /> tag</p>"))
+
+ def test_soupstrainer(self):
+ strainer = SoupStrainer("b")
+ soup = BeautifulSoup("A <b>bold</b> <i>statement</i>",
+ self.default_builder,
+ parseOnlyThese=strainer)
+ self.assertEquals(soup.decode(), "<b>bold</b>")
+
+ soup = BeautifulSoup("A <b>bold</b> <meta> <i>statement</i>",
+ self.default_builder,
+ parseOnlyThese=strainer)
+ self.assertEquals(soup.decode(), "<b>bold</b>")
+
+
+def test_suite():
+ unittest.TestLoader().loadTestsFromName('__name__')
diff --git a/src/beautifulsoup/tests/test_soup.py b/src/beautifulsoup/tests/test_soup.py
index ff91104..c5a02b6 100644
--- a/src/beautifulsoup/tests/test_soup.py
+++ b/src/beautifulsoup/tests/test_soup.py
@@ -19,13 +19,15 @@ def additional_tests():
class SoupTest(unittest.TestCase):
- def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup,
+ default_builder = HTMLParserXMLTreeBuilder()
+
+ def assertSoupEquals(self, toParse, rep=None, builder=None,
encoding=None):
"""Parse the given text and make sure its string rep is the other
given text."""
if rep == None:
rep = toParse
- obj = c(toParse)
+ obj = BeautifulSoup(toParse, builder=self.default_builder)
if encoding is None:
rep2 = obj.decode()
else:
diff --git a/src/beautifulsoup/tests/treebuilder.py b/src/beautifulsoup/tests/treebuilder.py
deleted file mode 100644
index 489708d..0000000
--- a/src/beautifulsoup/tests/treebuilder.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""Tree builder compatibility suite.
-
-If you create a tree builder class, also create a test suite that
-subclasses this one. This test suite will parse various bits of
-well-formed HTML with your tree builder. Not every tree builder will
-handle bad HTML in the same way, but every builder should be able to
-handle _good_ HTML in the same way.
-"""
-
-import unittest
-from beautifulsoup import BeautifulSoup
-from beautifulsoup.element import SoupStrainer
-from test_soup import SoupTest
-
-class CompatibilityTest(SoupTest):
-
- def __init__(self, builder):
- self.builder = builder
-
- _testMethodName = "test"
-
- def test(self):
- self.test_bare_string()
- self.test_tag_nesting()
- self.test_self_closing()
- self.test_soupstrainer()
-
- def test_bare_string(self):
- self.assertSoupEquals("A bare string")
-
- def test_tag_nesting(self):
- self.assertSoupEquals("<b>Inside a B tag</b>")
- self.assertSoupEquals("<p>A <i>nested <b>tag</b></i></p>")
-
- def test_self_closing(self):
- self.assertSoupEquals("A <meta> tag", "A <meta /> tag")
-
- def test_soupstrainer(self):
- strainer = SoupStrainer("b")
- soup = BeautifulSoup("A <b>bold</b> <i>statement</i>",
- parseOnlyThese=strainer)
- self.assertEquals(soup.decode(), "<b>bold</b>")
-
- soup = BeautifulSoup("A <b>bold</b> <meta> <i>statement</i>",
- parseOnlyThese=strainer)
- self.assertEquals(soup.decode(), "<b>bold</b>")