summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-06-02 22:19:37 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-06-02 22:19:37 -0400
commit4a9444ac0b74fbf84cf86b9fcf6055c85e65f62a (patch)
tree570cbcb2c9ab9cf458edee87490afeffd8377560 /bs4/__init__.py
parent11dad27424b319a2034f59f5a7f48286551102d0 (diff)
parent4f9a654766df9ddd05e3ef274b4715b42668724f (diff)
Merged in big encoding-detection refactoring branch.
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py22
1 files changed, 10 insertions, 12 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 03b2416..7b5964a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -26,7 +26,7 @@ __all__ = ['BeautifulSoup']
import re
import warnings
-from .builder import builder_registry
+from .builder import builder_registry, ParserRejectedMarkup
from .dammit import UnicodeDammit
from .element import (
CData,
@@ -160,18 +160,17 @@ class BeautifulSoup(Tag):
self.parse_only = parse_only
- self.reset()
-
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
- (self.markup, self.original_encoding, self.declared_html_encoding,
- self.contains_replacement_characters) = (
- self.builder.prepare_markup(markup, from_encoding))
-
- try:
- self._feed()
- except StopParsing:
- pass
+ for (self.markup, self.original_encoding, self.declared_html_encoding,
+ self.contains_replacement_characters) in (
+ self.builder.prepare_markup(markup, from_encoding)):
+ self.reset()
+ try:
+ self._feed()
+ break
+ except ParserRejectedMarkup, e:
+ pass
# Clear out the markup and remove the builder's circular
# reference to this object.
@@ -353,7 +352,6 @@ class BeautifulStoneSoup(BeautifulSoup):
class StopParsing(Exception):
pass
-
class FeatureNotFound(ValueError):
pass