summaryrefslogtreecommitdiff
path: root/src/htscharset.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-07-12 15:24:59 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-07-12 15:24:59 +0000
commit676776e99be7a347eab5d4bbf4196204467571f5 (patch)
tree304523ad7f4b08713c4f4837bc8e93c0a9f45e79 /src/htscharset.c
parent98ea5be208a248bb7006748a64c1f5f59b2f5c6d (diff)
Added hts_isStringUTF8() function.
Diffstat (limited to 'src/htscharset.c')
-rw-r--r--src/htscharset.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c
index 6386c09..614f8b4 100644
--- a/src/htscharset.c
+++ b/src/htscharset.c
@@ -1192,6 +1192,28 @@ hts_UCS4* hts_convertUTF8StringToUCS4(const char *s, size_t size, size_t *nChars
return dest;
}
+int hts_isStringUTF8(const char *s, size_t size) {
+ const unsigned char *const data = (const unsigned char*) s;
+ size_t i;
+
+ for(i = 0 ; i < size ; ) {
+ hts_UCS4 uc;
+
+ /* Reader: can read bytes up to j */
+#define RD ( i < size ? data[i++] : -1 )
+
+ /* Writer: upon error, return FFFD (replacement character) */
+#define WR(C) if ((C) == -1) { return 0; }
+
+ /* Read Unicode character. */
+ READ_UNICODE(RD, WR);
+#undef RD
+#undef WR
+ }
+
+ return 1;
+}
+
char *hts_convertUCS4StringToUTF8(const hts_UCS4 *s, size_t nChars) {
size_t i;
char *dest = NULL;