summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/__init__.py15
-rw-r--r--bs4/builder/_lxml.py4
2 files changed, 13 insertions, 6 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index e28242b..7efbf89 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -175,8 +175,8 @@ class TreeBuilder(object):
raise NotImplementedError()
def prepare_markup(self, markup, user_specified_encoding=None,
- document_declared_encoding=None):
- return markup, None, None, False
+ document_declared_encoding=None, exclude_encodings=None):
+ yield markup, None, None, False
def test_fragment_to_document(self, fragment):
"""Wrap an HTML fragment to make it look like a document.
@@ -363,8 +363,15 @@ def register_treebuilders_from(module):
this_module.builder_registry.register(obj)
class ParserRejectedMarkup(Exception):
- pass
-
+ def __init__(self, message_or_exception):
+ """Explain why the parser rejected the given markup, either
+ with a textual explanation or another exception.
+ """
+ if isinstance(message_or_exception, Exception):
+ e = message_or_exception
+ message_or_exception = "%s: %s" % (e.__class__.__name__, unicode(e))
+ super(ParserRejectedMarkup, self).__init__(message_or_exception)
+
# Builders are registered in reverse order of priority, so that custom
# builder registrations will take precedence. In general, we want lxml
# to take precedence over html5lib, because it's faster. And we only
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 85be1b5..ea66d8b 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -175,7 +175,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
- raise ParserRejectedMarkup(str(e))
+ raise ParserRejectedMarkup(e)
def close(self):
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
@@ -294,7 +294,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
- raise ParserRejectedMarkup(str(e))
+ raise ParserRejectedMarkup(e)
def test_fragment_to_document(self, fragment):