diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-07-03 09:20:54 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-07-03 09:20:54 -0400 |
commit | fdfe4bfbfe66420cd33b318d033de09a35649416 (patch) | |
tree | 291dae45c303d30206f632e02e9b381656e090c1 /bs4/dammit.py | |
parent | b4a56ad4d0dafdf90fb28bbcdcf24019d78a0ef4 (diff) |
Unicode data cannot have a byte-order mark. Returning early stops a warning from happening.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index 8e6b347..317ad6d 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -272,6 +272,9 @@ class EncodingDetector: def strip_byte_order_mark(cls, data): """If a byte-order mark is present, strip it and return the encoding it implies.""" encoding = None + if isinstance(data, unicode): + # Unicode data cannot have a byte-order mark. + return data, encoding if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ and (data[2:4] != '\x00\x00'): encoding = 'utf-16be' |