From 19f05a586c79b86be8ebe06a3728ab9a94162bee Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 31 May 2013 09:17:11 -0400 Subject: Create a new lxml parser object for every new parsing strategy. --- bs4/builder/_htmlparser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'bs4/builder/_htmlparser.py') diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index e34c9fa..2b98969 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -133,13 +133,14 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): replaced with REPLACEMENT CHARACTER). """ if isinstance(markup, unicode): - return markup, None, None, False + yield (markup, None, None, False) + return try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) - return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) + yield (dammit.markup, dammit.original_encoding, + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): args, kwargs = self.parser_args -- cgit v1.2.3