diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 19:50:45 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 19:50:45 -0500 |
commit | 2fa73e2cb99b0816148ade6150f378993907534e (patch) | |
tree | cf7e2371881c680990157cae621f6045f5941f56 /tests | |
parent | e6320fad4cd162ab6c7dfe02be5206f5c3f8c25b (diff) | |
parent | ce3742abd4c7fe39247569e82e2b3acdd6052bb1 (diff) |
Added a registry for tree builders and made it possible to find a tree builder that has the features you want from the BeautifulSoup constructor.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_builder_registry.py | 115 | ||||
-rw-r--r-- | tests/test_html5lib.py | 4 | ||||
-rw-r--r-- | tests/test_lxml.py | 4 | ||||
-rw-r--r-- | tests/test_soup.py | 2 | ||||
-rw-r--r-- | tests/test_tree.py | 5 |
5 files changed, 123 insertions, 7 deletions
diff --git a/tests/test_builder_registry.py b/tests/test_builder_registry.py new file mode 100644 index 0000000..17e3fb1 --- /dev/null +++ b/tests/test_builder_registry.py @@ -0,0 +1,115 @@ +"""Tests of the builder registry.""" + +import unittest + +from beautifulsoup import BeautifulSoup +from beautifulsoup.builder import ( + builder_registry as registry, + LXMLTreeBuilderForXML, + LXMLTreeBuilder, + TreeBuilderRegistry, + HTML5TreeBuilder +) + + + +class BuiltInRegistryTest(unittest.TestCase): + """Test the built-in registry with the default builders registered.""" + + def test_combination(self): + self.assertEquals(registry.lookup('fast', 'html'), + LXMLTreeBuilder) + self.assertEquals(registry.lookup('permissive', 'xml'), + LXMLTreeBuilderForXML) + self.assertEquals(registry.lookup('permissive', 'html'), + HTML5TreeBuilder) + + def test_lookup_by_markup_type(self): + self.assertEquals(registry.lookup('html'), HTML5TreeBuilder) + self.assertEquals(registry.lookup('xml'), LXMLTreeBuilderForXML) + + def test_named_library(self): + self.assertEquals(registry.lookup('lxml', 'xml'), + LXMLTreeBuilderForXML) + self.assertEquals(registry.lookup('lxml', 'html'), + LXMLTreeBuilder) + self.assertEquals(registry.lookup('html5lib'), + HTML5TreeBuilder) + + def test_unimplemented_combinations(self): + self.assertEquals(registry.lookup('fast', 'permissive', 'html'), + None) + + def test_beautifulsoup_constructor_does_lookup(self): + # You can pass in a string. + BeautifulSoup("", features="html") + # Or a list of strings. + BeautifulSoup("", features=["html", "permissive"]) + + # You'll get an exception if BS can't find an appropriate + # builder. + self.assertRaises(ValueError, BeautifulSoup, + "", features="no-such-feature") + +class RegistryTest(unittest.TestCase): + """Test the TreeBuilderRegistry class in general.""" + + def setUp(self): + self.registry = TreeBuilderRegistry() + + def builder_for_features(self, *feature_list): + cls = type('Builder_' + '_'.join(feature_list), + (object,), {'features' : feature_list}) + + self.registry.register(cls) + return cls + + def test_register_with_no_features(self): + builder = self.builder_for_features() + + # Since the builder advertises no features, you can't find it + # by looking up features. + self.assertEquals(self.registry.lookup('foo'), None) + + # But you can find it by doing a lookup with no features, if + # this happens to be the only registered builder. + self.assertEquals(self.registry.lookup(), builder) + + def test_register_with_features_makes_lookup_succeed(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEquals(self.registry.lookup('foo'), builder) + self.assertEquals(self.registry.lookup('bar'), builder) + + def test_lookup_fails_when_no_builder_implements_feature(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEquals(self.registry.lookup('baz'), None) + + def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): + builder1 = self.builder_for_features('foo') + builder2 = self.builder_for_features('bar') + self.assertEquals(self.registry.lookup(), builder2) + + def test_lookup_fails_when_no_tree_builders_registered(self): + self.assertEquals(self.registry.lookup(), None) + + def test_lookup_gets_most_recent_builder_supporting_all_features(self): + has_one = self.builder_for_features('foo') + has_the_other = self.builder_for_features('bar') + has_both_early = self.builder_for_features('foo', 'bar', 'baz') + has_both_late = self.builder_for_features('foo', 'bar', 'quux') + lacks_one = self.builder_for_features('bar') + has_the_other = self.builder_for_features('foo') + + # There are two builders featuring 'foo' and 'bar', but + # the one that also features 'quux' was registered later. + self.assertEquals(self.registry.lookup('foo', 'bar'), + has_both_late) + + # There is only one builder featuring 'foo', 'bar', and 'baz'. + self.assertEquals(self.registry.lookup('foo', 'bar', 'baz'), + has_both_early) + + def test_lookup_fails_when_cannot_reconcile_requested_features(self): + builder1 = self.builder_for_features('foo', 'bar') + builder2 = self.builder_for_features('foo', 'baz') + self.assertEquals(self.registry.lookup('bar', 'baz'), None) diff --git a/tests/test_html5lib.py b/tests/test_html5lib.py index aa0bad2..5abc29d 100644 --- a/tests/test_html5lib.py +++ b/tests/test_html5lib.py @@ -18,7 +18,7 @@ class TestHTML5Builder(TestLXMLBuilder): strainer = SoupStrainer("b") markup = "<p>A <b>bold</b> statement.</p>" soup = self.soup(markup, - parseOnlyThese=strainer) + parse_only=strainer) self.assertEquals( soup.decode(), self.document_for(markup)) @@ -210,7 +210,7 @@ class TestHTML5LibEncodingConversion(TestLXMLBuilderEncodingConversion): # A real-world test to make sure we can convert ISO-8859-9 (a # Hebrew encoding) to UTF-8. soup = self.soup(self.HEBREW_DOCUMENT, - fromEncoding="iso-8859-8") + from_encoding="iso-8859-8") self.assertEquals(soup.original_encoding, 'iso8859-8') self.assertEquals( soup.encode('utf-8'), diff --git a/tests/test_lxml.py b/tests/test_lxml.py index 9d08aef..df2f341 100644 --- a/tests/test_lxml.py +++ b/tests/test_lxml.py @@ -325,7 +325,7 @@ class TestLXMLBuilder(SoupTest): def test_soupstrainer(self): strainer = SoupStrainer("b") soup = self.soup("A <b>bold</b> <meta /> <i>statement</i>", - parseOnlyThese=strainer) + parse_only=strainer) self.assertEquals(soup.decode(), "<b>bold</b>") @@ -506,7 +506,7 @@ class TestLXMLBuilderEncodingConversion(SoupTest): # A real-world test to make sure we can convert ISO-8859-9 (a # Hebrew encoding) to UTF-8. soup = self.soup(self.HEBREW_DOCUMENT, - fromEncoding="iso-8859-8") + from_encoding="iso-8859-8") self.assertEquals(soup.original_encoding, 'iso-8859-8') self.assertEquals( soup.encode('utf-8'), diff --git a/tests/test_soup.py b/tests/test_soup.py index 01dff53..bb2262a 100644 --- a/tests/test_soup.py +++ b/tests/test_soup.py @@ -12,7 +12,7 @@ class TestSelectiveParsing(SoupTest): def test_parse_with_soupstrainer(self): markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>" strainer = SoupStrainer("b") - soup = self.soup(markup, parseOnlyThese=strainer) + soup = self.soup(markup, parse_only=strainer) self.assertEquals(soup.encode(), "<b>Yes</b><b>Yes <c>Yes</c></b>") diff --git a/tests/test_tree.py b/tests/test_tree.py index 384d518..0b3d72e 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -13,6 +13,7 @@ import copy import cPickle as pickle import re from beautifulsoup import BeautifulSoup +from beautifulsoup.builder import builder_registry from beautifulsoup.element import CData, SoupStrainer, Tag from beautifulsoup.testing import SoupTest @@ -523,7 +524,7 @@ class TestTreeModification(SoupTest): self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>')) def test_new_tag_creation(self): - builder = BeautifulSoup.default_builder() + builder = builder_registry.lookup('html5lib')() soup = self.soup("<body></body>", builder=builder) a = Tag(soup, builder, 'a') ol = Tag(soup, builder, 'ol') @@ -863,7 +864,7 @@ class TestSubstitutions(SoupTest): # meta tag got filtered out by the strainer. This test makes # sure that doesn't happen. strainer = SoupStrainer('pre') - soup = self.soup(markup, parseOnlyThese=strainer) + soup = self.soup(markup, parse_only=strainer) self.assertEquals(soup.contents[0].name, 'pre') |