diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-07-12 15:24:59 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-07-12 15:24:59 +0000 |
commit | 676776e99be7a347eab5d4bbf4196204467571f5 (patch) | |
tree | 304523ad7f4b08713c4f4837bc8e93c0a9f45e79 /src/htscharset.c | |
parent | 98ea5be208a248bb7006748a64c1f5f59b2f5c6d (diff) |
Added hts_isStringUTF8() function.
Diffstat (limited to 'src/htscharset.c')
-rw-r--r-- | src/htscharset.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c index 6386c09..614f8b4 100644 --- a/src/htscharset.c +++ b/src/htscharset.c @@ -1192,6 +1192,28 @@ hts_UCS4* hts_convertUTF8StringToUCS4(const char *s, size_t size, size_t *nChars return dest; } +int hts_isStringUTF8(const char *s, size_t size) { + const unsigned char *const data = (const unsigned char*) s; + size_t i; + + for(i = 0 ; i < size ; ) { + hts_UCS4 uc; + + /* Reader: can read bytes up to j */ +#define RD ( i < size ? data[i++] : -1 ) + + /* Writer: upon error, return FFFD (replacement character) */ +#define WR(C) if ((C) == -1) { return 0; } + + /* Read Unicode character. */ + READ_UNICODE(RD, WR); +#undef RD +#undef WR + } + + return 1; +} + char *hts_convertUCS4StringToUTF8(const hts_UCS4 *s, size_t nChars) { size_t i; char *dest = NULL; |