diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 19:48:13 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 19:48:13 -0500 |
commit | ce3742abd4c7fe39247569e82e2b3acdd6052bb1 (patch) | |
tree | cf7e2371881c680990157cae621f6045f5941f56 | |
parent | 4afd4ca029aed4c5e2bc225e3938f4f4879ba155 (diff) |
Fixed bug in the BS constructor lookup, and added the test file I've been working on this whole time.
-rw-r--r-- | beautifulsoup/__init__.py | 11 | ||||
-rw-r--r-- | tests/test_builder_registry.py | 115 |
2 files changed, 122 insertions, 4 deletions
diff --git a/beautifulsoup/__init__.py b/beautifulsoup/__init__.py index 234cb89..c998924 100644 --- a/beautifulsoup/__init__.py +++ b/beautifulsoup/__init__.py @@ -103,21 +103,24 @@ class BeautifulSoup(Tag): # alone. STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } - def __init__(self, markup="", parse_only=None, from_encoding=None, - builder=None, *features): + def __init__(self, markup="", features=None, builder=None, + parse_only=None, from_encoding=None): """The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser.""" if builder is None: + if isinstance(features, basestring): + features = [features] if len(features) == 0: features = self.DEFAULT_BUILDER_FEATURES - builder = builder_registry.lookup(*features) - if builder is None: + builder_class = builder_registry.lookup(*features) + if builder_class is None: raise ValueError( "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) + builder = builder_class() self.builder = builder self.builder.soup = self diff --git a/tests/test_builder_registry.py b/tests/test_builder_registry.py new file mode 100644 index 0000000..17e3fb1 --- /dev/null +++ b/tests/test_builder_registry.py @@ -0,0 +1,115 @@ +"""Tests of the builder registry.""" + +import unittest + +from beautifulsoup import BeautifulSoup +from beautifulsoup.builder import ( + builder_registry as registry, + LXMLTreeBuilderForXML, + LXMLTreeBuilder, + TreeBuilderRegistry, + HTML5TreeBuilder +) + + + +class BuiltInRegistryTest(unittest.TestCase): + """Test the built-in registry with the default builders registered.""" + + def test_combination(self): + self.assertEquals(registry.lookup('fast', 'html'), + LXMLTreeBuilder) + self.assertEquals(registry.lookup('permissive', 'xml'), + LXMLTreeBuilderForXML) + self.assertEquals(registry.lookup('permissive', 'html'), + HTML5TreeBuilder) + + def test_lookup_by_markup_type(self): + self.assertEquals(registry.lookup('html'), HTML5TreeBuilder) + self.assertEquals(registry.lookup('xml'), LXMLTreeBuilderForXML) + + def test_named_library(self): + self.assertEquals(registry.lookup('lxml', 'xml'), + LXMLTreeBuilderForXML) + self.assertEquals(registry.lookup('lxml', 'html'), + LXMLTreeBuilder) + self.assertEquals(registry.lookup('html5lib'), + HTML5TreeBuilder) + + def test_unimplemented_combinations(self): + self.assertEquals(registry.lookup('fast', 'permissive', 'html'), + None) + + def test_beautifulsoup_constructor_does_lookup(self): + # You can pass in a string. + BeautifulSoup("", features="html") + # Or a list of strings. + BeautifulSoup("", features=["html", "permissive"]) + + # You'll get an exception if BS can't find an appropriate + # builder. + self.assertRaises(ValueError, BeautifulSoup, + "", features="no-such-feature") + +class RegistryTest(unittest.TestCase): + """Test the TreeBuilderRegistry class in general.""" + + def setUp(self): + self.registry = TreeBuilderRegistry() + + def builder_for_features(self, *feature_list): + cls = type('Builder_' + '_'.join(feature_list), + (object,), {'features' : feature_list}) + + self.registry.register(cls) + return cls + + def test_register_with_no_features(self): + builder = self.builder_for_features() + + # Since the builder advertises no features, you can't find it + # by looking up features. + self.assertEquals(self.registry.lookup('foo'), None) + + # But you can find it by doing a lookup with no features, if + # this happens to be the only registered builder. + self.assertEquals(self.registry.lookup(), builder) + + def test_register_with_features_makes_lookup_succeed(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEquals(self.registry.lookup('foo'), builder) + self.assertEquals(self.registry.lookup('bar'), builder) + + def test_lookup_fails_when_no_builder_implements_feature(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEquals(self.registry.lookup('baz'), None) + + def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): + builder1 = self.builder_for_features('foo') + builder2 = self.builder_for_features('bar') + self.assertEquals(self.registry.lookup(), builder2) + + def test_lookup_fails_when_no_tree_builders_registered(self): + self.assertEquals(self.registry.lookup(), None) + + def test_lookup_gets_most_recent_builder_supporting_all_features(self): + has_one = self.builder_for_features('foo') + has_the_other = self.builder_for_features('bar') + has_both_early = self.builder_for_features('foo', 'bar', 'baz') + has_both_late = self.builder_for_features('foo', 'bar', 'quux') + lacks_one = self.builder_for_features('bar') + has_the_other = self.builder_for_features('foo') + + # There are two builders featuring 'foo' and 'bar', but + # the one that also features 'quux' was registered later. + self.assertEquals(self.registry.lookup('foo', 'bar'), + has_both_late) + + # There is only one builder featuring 'foo', 'bar', and 'baz'. + self.assertEquals(self.registry.lookup('foo', 'bar', 'baz'), + has_both_early) + + def test_lookup_fails_when_cannot_reconcile_requested_features(self): + builder1 = self.builder_for_features('foo', 'bar') + builder2 = self.builder_for_features('foo', 'baz') + self.assertEquals(self.registry.lookup('bar', 'baz'), None) |