summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py12
1 files changed, 11 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index e27ca6f..e85a0bf 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -302,6 +302,8 @@ class BeautifulSoup(Tag):
' Beautiful Soup.' % markup)
self._check_markup_is_url(markup)
+ rejections = []
+ success = False
for (self.markup, self.original_encoding, self.declared_html_encoding,
self.contains_replacement_characters) in (
self.builder.prepare_markup(
@@ -309,10 +311,18 @@ class BeautifulSoup(Tag):
self.reset()
try:
self._feed()
+ success = True
break
- except ParserRejectedMarkup:
+ except ParserRejectedMarkup as e:
+ rejections.append(e)
pass
+ if not success:
+ other_exceptions = [unicode(e) for e in rejections]
+ raise ParserRejectedMarkup(
+ u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+ )
+
# Clear out the markup and remove the builder's circular
# reference to this object.
self.markup = None