summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/htscharset.c22
-rw-r--r--src/htscharset.h5
2 files changed, 27 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c
index 6386c09..614f8b4 100644
--- a/src/htscharset.c
+++ b/src/htscharset.c
@@ -1192,6 +1192,28 @@ hts_UCS4* hts_convertUTF8StringToUCS4(const char *s, size_t size, size_t *nChars
return dest;
}
+int hts_isStringUTF8(const char *s, size_t size) {
+ const unsigned char *const data = (const unsigned char*) s;
+ size_t i;
+
+ for(i = 0 ; i < size ; ) {
+ hts_UCS4 uc;
+
+ /* Reader: can read bytes up to j */
+#define RD ( i < size ? data[i++] : -1 )
+
+ /* Writer: upon error, return FFFD (replacement character) */
+#define WR(C) if ((C) == -1) { return 0; }
+
+ /* Read Unicode character. */
+ READ_UNICODE(RD, WR);
+#undef RD
+#undef WR
+ }
+
+ return 1;
+}
+
char *hts_convertUCS4StringToUTF8(const hts_UCS4 *s, size_t nChars) {
size_t i;
char *dest = NULL;
diff --git a/src/htscharset.h b/src/htscharset.h
index 92f8b7c..0b5b4f2 100644
--- a/src/htscharset.h
+++ b/src/htscharset.h
@@ -87,6 +87,11 @@ extern char *hts_getCharsetFromMeta(const char *html, size_t size);
extern int hts_isStringAscii(const char *s, size_t size);
/**
+ * Is the given string an UTF-8 string ?
+ **/
+extern int hts_isStringUTF8(const char *s, size_t size);
+
+/**
* Is the given charset the UTF-8 charset ?
**/
extern int hts_isCharsetUTF8(const char *charset);