diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-05-31 09:17:11 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-05-31 09:17:11 -0400 |
commit | 19f05a586c79b86be8ebe06a3728ab9a94162bee (patch) | |
tree | 295326e49419a40a8942dc3b0552e51f97e18abb /bs4/__init__.py | |
parent | 342da7818966498e1fc2100c0b920cbc242c9831 (diff) |
Create a new lxml parser object for every new parsing strategy.
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r-- | bs4/__init__.py | 22 |
1 files changed, 10 insertions, 12 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py index a949d6d..956f26e 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -26,7 +26,7 @@ __all__ = ['BeautifulSoup'] import re import warnings -from .builder import builder_registry +from .builder import builder_registry, ParserRejectedMarkup from .dammit import UnicodeDammit from .element import ( CData, @@ -160,18 +160,17 @@ class BeautifulSoup(Tag): self.parse_only = parse_only - self.reset() - if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - (self.markup, self.original_encoding, self.declared_html_encoding, - self.contains_replacement_characters) = ( - self.builder.prepare_markup(markup, from_encoding)) - - try: - self._feed() - except StopParsing: - pass + for (self.markup, self.original_encoding, self.declared_html_encoding, + self.contains_replacement_characters) in ( + self.builder.prepare_markup(markup, from_encoding)): + self.reset() + try: + self._feed() + break + except ParserRejectedMarkup, e: + pass # Clear out the markup and remove the builder's circular # reference to this object. @@ -353,7 +352,6 @@ class BeautifulStoneSoup(BeautifulSoup): class StopParsing(Exception): pass - class FeatureNotFound(ValueError): pass |