summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-07-03 09:20:54 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-07-03 09:20:54 -0400
commitfdfe4bfbfe66420cd33b318d033de09a35649416 (patch)
tree291dae45c303d30206f632e02e9b381656e090c1 /bs4/dammit.py
parentb4a56ad4d0dafdf90fb28bbcdcf24019d78a0ef4 (diff)
Unicode data cannot have a byte-order mark. Returning early stops a warning from happening.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 8e6b347..317ad6d 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -272,6 +272,9 @@ class EncodingDetector:
def strip_byte_order_mark(cls, data):
"""If a byte-order mark is present, strip it and return the encoding it implies."""
encoding = None
+ if isinstance(data, unicode):
+ # Unicode data cannot have a byte-order mark.
+ return data, encoding
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16be'