summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2013-05-30 12:43:22 -0400
committerLeonard Richardson <leonardr@segfault.org>2013-05-30 12:43:22 -0400
commit342da7818966498e1fc2100c0b920cbc242c9831 (patch)
tree528231e14ea162e78c20c4202606b568add638af /bs4/dammit.py
parentea23194367fb36d201cf6b8134601a73070dff63 (diff)
Refactored code a bit.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py27
1 files changed, 13 insertions, 14 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 71281d2..cb6d354 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -223,24 +223,25 @@ class EncodingDetector:
self.declared_encoding = None
self.sniffed_encoding = None
- def _yield(self, encoding, tried):
+ def _usable(self, encoding, tried):
if encoding not in tried and encoding is not None:
- yield encoding
tried.add(encoding)
+ return True
+ return False
@property
def encodings(self):
"""Yield a number of encodings that might work for this markup."""
tried = set()
for e in self.override_encodings:
- for x in self._yield(e, tried):
- yield x
+ if self._usable(e, tried):
+ yield e
if self.sniffed_encoding is None:
(self.markup,
self.sniffed_encoding) = self.strip_byte_order_mark(self.markup)
- for x in self._yield(self.sniffed_encoding, tried):
- yield x
+ if self._usable(self.sniffed_encoding, tried):
+ yield self.sniffed_encoding
if self.declared_encoding is None:
self.declared_encoding = self.find_declared_encoding(
@@ -261,20 +262,18 @@ class EncodingDetector:
# only called if the sniffed encoding didn't work.
self.declared_encoding = self.sniffed_encoding
- if self.declared_encoding is not None:
- for x in self._yield(self.declared_encoding, tried):
- yield x
+ if self._usable(self.declared_encoding, tried):
+ yield self.declared_encoding
if self.chardet_encoding is None:
self.chardet_encoding = chardet_dammit(self.markup)
- if self.chardet_encoding is not None:
- for x in self._yield(self.chardet_encoding, tried):
- yield x
+ if self._usable(self.chardet_encoding, tried):
+ yield self.chardet_encoding
# As a last-ditch effort, try utf-8 and windows-1252.
for e in ('utf-8', 'windows-1252'):
- for x in self._yield(e, tried):
- yield x
+ if self._usable(e, tried):
+ yield e
@classmethod
def strip_byte_order_mark(cls, markup):