diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/htscharset.c | 22 | ||||
-rw-r--r-- | src/htscharset.h | 5 |
2 files changed, 27 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c index 6386c09..614f8b4 100644 --- a/src/htscharset.c +++ b/src/htscharset.c @@ -1192,6 +1192,28 @@ hts_UCS4* hts_convertUTF8StringToUCS4(const char *s, size_t size, size_t *nChars return dest; } +int hts_isStringUTF8(const char *s, size_t size) { + const unsigned char *const data = (const unsigned char*) s; + size_t i; + + for(i = 0 ; i < size ; ) { + hts_UCS4 uc; + + /* Reader: can read bytes up to j */ +#define RD ( i < size ? data[i++] : -1 ) + + /* Writer: upon error, return FFFD (replacement character) */ +#define WR(C) if ((C) == -1) { return 0; } + + /* Read Unicode character. */ + READ_UNICODE(RD, WR); +#undef RD +#undef WR + } + + return 1; +} + char *hts_convertUCS4StringToUTF8(const hts_UCS4 *s, size_t nChars) { size_t i; char *dest = NULL; diff --git a/src/htscharset.h b/src/htscharset.h index 92f8b7c..0b5b4f2 100644 --- a/src/htscharset.h +++ b/src/htscharset.h @@ -87,6 +87,11 @@ extern char *hts_getCharsetFromMeta(const char *html, size_t size); extern int hts_isStringAscii(const char *s, size_t size); /** + * Is the given string an UTF-8 string ? + **/ +extern int hts_isStringUTF8(const char *s, size_t size); + +/** * Is the given charset the UTF-8 charset ? **/ extern int hts_isCharsetUTF8(const char *charset); |