summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-05-31 09:17:11 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-05-31 09:17:11 -0400
commit19f05a586c79b86be8ebe06a3728ab9a94162bee (patch)
tree295326e49419a40a8942dc3b0552e51f97e18abb /bs4/__init__.py
parent342da7818966498e1fc2100c0b920cbc242c9831 (diff)
Create a new lxml parser object for every new parsing strategy.
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py22
1 files changed, 10 insertions, 12 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index a949d6d..956f26e 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -26,7 +26,7 @@ __all__ = ['BeautifulSoup']
import re
import warnings
-from .builder import builder_registry
+from .builder import builder_registry, ParserRejectedMarkup
from .dammit import UnicodeDammit
from .element import (
CData,
@@ -160,18 +160,17 @@ class BeautifulSoup(Tag):
self.parse_only = parse_only
- self.reset()
-
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
- (self.markup, self.original_encoding, self.declared_html_encoding,
- self.contains_replacement_characters) = (
- self.builder.prepare_markup(markup, from_encoding))
-
- try:
- self._feed()
- except StopParsing:
- pass
+ for (self.markup, self.original_encoding, self.declared_html_encoding,
+ self.contains_replacement_characters) in (
+ self.builder.prepare_markup(markup, from_encoding)):
+ self.reset()
+ try:
+ self._feed()
+ break
+ except ParserRejectedMarkup, e:
+ pass
# Clear out the markup and remove the builder's circular
# reference to this object.
@@ -353,7 +352,6 @@ class BeautifulStoneSoup(BeautifulSoup):
class StopParsing(Exception):
pass
-
class FeatureNotFound(ValueError):
pass