summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py41
1 files changed, 23 insertions, 18 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index e017408..de016ca 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -16,32 +16,37 @@ import re
import logging
import string
-# Import a library to autodetect character encodings.
-chardet_type = None
+# Import a library to autodetect character encodings. We'll support
+# any of a number of libraries that all support the same API:
+#
+# * cchardet
+# * chardet
+# * charset-normalizer
+chardet_module = None
try:
- # First try the fast C implementation.
# PyPI package: cchardet
- import cchardet
- def chardet_dammit(s):
- if isinstance(s, str):
- return None
- return cchardet.detect(s)['encoding']
+ import cchardet as chardet_module
except ImportError:
try:
- # Fall back to the pure Python implementation
# Debian package: python-chardet
# PyPI package: chardet
- import chardet
- def chardet_dammit(s):
- if isinstance(s, str):
- return None
- return chardet.detect(s)['encoding']
- #import chardet.constants
- #chardet.constants._debug = 1
+ import chardet as chardet_module
except ImportError:
- # No chardet available.
- def chardet_dammit(s):
+ try:
+ # PyPI package: charset-normalizer
+ import charset_normalizer as chardet_module
+ except ImportError:
+ # No chardet available.
+ chardet_module = None
+
+if chardet_module:
+ def chardet_dammit(s):
+ if isinstance(s, str):
return None
+ return chardet_module.detect(s)['encoding']
+else:
+ def chardet_dammit(s):
+ return None
# Available from http://cjkpython.i18n.org/.
#