diff options
author | Leonard Richardson <leonardr@segfault.org> | 2013-05-30 12:43:22 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2013-05-30 12:43:22 -0400 |
commit | 342da7818966498e1fc2100c0b920cbc242c9831 (patch) | |
tree | 528231e14ea162e78c20c4202606b568add638af /bs4/dammit.py | |
parent | ea23194367fb36d201cf6b8134601a73070dff63 (diff) |
Refactored code a bit.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index 71281d2..cb6d354 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -223,24 +223,25 @@ class EncodingDetector: self.declared_encoding = None self.sniffed_encoding = None - def _yield(self, encoding, tried): + def _usable(self, encoding, tried): if encoding not in tried and encoding is not None: - yield encoding tried.add(encoding) + return True + return False @property def encodings(self): """Yield a number of encodings that might work for this markup.""" tried = set() for e in self.override_encodings: - for x in self._yield(e, tried): - yield x + if self._usable(e, tried): + yield e if self.sniffed_encoding is None: (self.markup, self.sniffed_encoding) = self.strip_byte_order_mark(self.markup) - for x in self._yield(self.sniffed_encoding, tried): - yield x + if self._usable(self.sniffed_encoding, tried): + yield self.sniffed_encoding if self.declared_encoding is None: self.declared_encoding = self.find_declared_encoding( @@ -261,20 +262,18 @@ class EncodingDetector: # only called if the sniffed encoding didn't work. self.declared_encoding = self.sniffed_encoding - if self.declared_encoding is not None: - for x in self._yield(self.declared_encoding, tried): - yield x + if self._usable(self.declared_encoding, tried): + yield self.declared_encoding if self.chardet_encoding is None: self.chardet_encoding = chardet_dammit(self.markup) - if self.chardet_encoding is not None: - for x in self._yield(self.chardet_encoding, tried): - yield x + if self._usable(self.chardet_encoding, tried): + yield self.chardet_encoding # As a last-ditch effort, try utf-8 and windows-1252. for e in ('utf-8', 'windows-1252'): - for x in self._yield(e, tried): - yield x + if self._usable(e, tried): + yield e @classmethod def strip_byte_order_mark(cls, markup): |