diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 18:28:15 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2011-02-20 18:28:15 -0500 |
commit | 2f0b16b8ea82b09034dff0e747e29d89e97b5680 (patch) | |
tree | f35913725a819cdd0068180357468dee5bb89240 /beautifulsoup/builder/__init__.py | |
parent | 1fa18e957db92cfa056151f4e0d93c44243df1d9 (diff) |
Tree builders now advertise their features.
Diffstat (limited to 'beautifulsoup/builder/__init__.py')
-rw-r--r-- | beautifulsoup/builder/__init__.py | 33 |
1 files changed, 24 insertions, 9 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py index 0888cef..4178730 100644 --- a/beautifulsoup/builder/__init__.py +++ b/beautifulsoup/builder/__init__.py @@ -1,3 +1,4 @@ +from collections import defaultdict import re import sys from beautifulsoup.element import Entities @@ -8,17 +9,20 @@ __all__ = [ 'TreeBuilder', ] -# Some useful keywords. +# Some useful features for a TreeBuilder to have. FAST = 'fast' -ACCURATE = 'accurate' +PERMISSIVE = 'permissive' XML = 'xml' HTML = 'html' -builders_for_tag = {} +# Which builders have a given feature? +treebuilders_for_feature = defaultdict(list) class TreeBuilder(Entities): """Turn a document into a Beautiful Soup object tree.""" + features = [] + assume_html = False preserve_whitespace_tags = set() empty_element_tags = None # A tag will be considered an empty-element @@ -173,27 +177,38 @@ class HTMLTreeBuilder(TreeBuilder): return False -def register_builders_from(module): - """Copy everything in __all___ from the given module into this module.""" +def register_treebuilder(treebuilder_class): + """Register a treebuilder based on its advertised features.""" + for feature in treebuilder_class.features: + treebuilders_for_feature[feature].append(treebuilder_class) + + +def register_treebuilders_from(module): + """Copy TreeBuilder subclasses from the given module into this module.""" # I'm fairly sure this is not the best way to do this. this_module = sys.modules[__package__] for name in module.__all__: obj = getattr(module, name) - setattr(this_module, name, obj) - this_module.__all__.append(name) + + if issubclass(obj, TreeBuilder): + setattr(this_module, name, obj) + this_module.__all__.append(name) + register_treebuilder(obj) # Builders are registered in reverse order of priority, so that custom # builder registrations will take precedence. In general, we want # html5lib to take precedence over lxml, because it's more reliable. try: import _lxml - register_builders_from(_lxml) + register_treebuilders_from(_lxml) except ImportError: # They don't have lxml installed. pass try: import _html5lib - register_builders_from(_html5lib) + register_treebuilders_from(_html5lib) except ImportError: # They don't have html5lib installed. pass + +print treebuilders_for_feature |