diff options
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 41 |
1 files changed, 23 insertions, 18 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index e017408..de016ca 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -16,32 +16,37 @@ import re import logging import string -# Import a library to autodetect character encodings. -chardet_type = None +# Import a library to autodetect character encodings. We'll support +# any of a number of libraries that all support the same API: +# +# * cchardet +# * chardet +# * charset-normalizer +chardet_module = None try: - # First try the fast C implementation. # PyPI package: cchardet - import cchardet - def chardet_dammit(s): - if isinstance(s, str): - return None - return cchardet.detect(s)['encoding'] + import cchardet as chardet_module except ImportError: try: - # Fall back to the pure Python implementation # Debian package: python-chardet # PyPI package: chardet - import chardet - def chardet_dammit(s): - if isinstance(s, str): - return None - return chardet.detect(s)['encoding'] - #import chardet.constants - #chardet.constants._debug = 1 + import chardet as chardet_module except ImportError: - # No chardet available. - def chardet_dammit(s): + try: + # PyPI package: charset-normalizer + import charset_normalizer as chardet_module + except ImportError: + # No chardet available. + chardet_module = None + +if chardet_module: + def chardet_dammit(s): + if isinstance(s, str): return None + return chardet_module.detect(s)['encoding'] +else: + def chardet_dammit(s): + return None # Available from http://cjkpython.i18n.org/. # |