summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--beautifulsoup/builder/__init__.py41
-rw-r--r--beautifulsoup/builder/_html5lib.py9
2 files changed, 35 insertions, 15 deletions
diff --git a/beautifulsoup/builder/__init__.py b/beautifulsoup/builder/__init__.py
index 5e55f7f..0888cef 100644
--- a/beautifulsoup/builder/__init__.py
+++ b/beautifulsoup/builder/__init__.py
@@ -8,6 +8,14 @@ __all__ = [
'TreeBuilder',
]
+# Some useful keywords.
+FAST = 'fast'
+ACCURATE = 'accurate'
+XML = 'xml'
+HTML = 'html'
+
+builders_for_tag = {}
+
class TreeBuilder(Entities):
"""Turn a document into a Beautiful Soup object tree."""
@@ -166,19 +174,26 @@ class HTMLTreeBuilder(TreeBuilder):
def register_builders_from(module):
+ """Copy everything in __all___ from the given module into this module."""
# I'm fairly sure this is not the best way to do this.
-
- # Copy everything mentioned in the builder module's __all__ into
- # this module.
this_module = sys.modules[__package__]
for name in module.__all__:
- setattr(this_module, name, getattr(module, name))
-
- # Add all names from the builder module's __all__ to this module's
- # __all__.
- this_module.__all__ += module.__all__
-
-import _lxml
-register_builders_from(_lxml)
-import _html5lib
-register_builders_from(_html5lib)
+ obj = getattr(module, name)
+ setattr(this_module, name, obj)
+ this_module.__all__.append(name)
+
+# Builders are registered in reverse order of priority, so that custom
+# builder registrations will take precedence. In general, we want
+# html5lib to take precedence over lxml, because it's more reliable.
+try:
+ import _lxml
+ register_builders_from(_lxml)
+except ImportError:
+ # They don't have lxml installed.
+ pass
+try:
+ import _html5lib
+ register_builders_from(_html5lib)
+except ImportError:
+ # They don't have html5lib installed.
+ pass
diff --git a/beautifulsoup/builder/_html5lib.py b/beautifulsoup/builder/_html5lib.py
index 9cca0b0..020b7ea 100644
--- a/beautifulsoup/builder/_html5lib.py
+++ b/beautifulsoup/builder/_html5lib.py
@@ -2,7 +2,11 @@ __all__ = [
'HTML5TreeBuilder',
]
-from beautifulsoup.builder import HTMLTreeBuilder, SAXTreeBuilder
+from beautifulsoup.builder import (
+ ACCURATE,
+ HTML,
+ HTMLTreeBuilder,
+ )
import html5lib
from html5lib.constants import DataLossWarning
import warnings
@@ -13,10 +17,11 @@ from beautifulsoup.element import (
Tag,
)
-
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
+ tags = [ACCURATE, HTML]
+
def prepare_markup(self, markup, user_specified_encoding):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding