summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-05-21 19:08:12 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-05-21 19:08:12 +0000
commit8c00d09bb4be779e888dbc78b688462e7bb6dbdf (patch)
tree8a219d571a73fc641a7b0c57e730b5c105b6da82
parent03d57e2e73891f28f09a721d5d58108912d7157f (diff)
Fixed hts_isCharsetUTF8()
Added hts_copyStringUTF8()
-rw-r--r--src/htscharset.c18
-rw-r--r--src/htscharset.h10
2 files changed, 28 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c
index 54465ae..1e46562 100644
--- a/src/htscharset.c
+++ b/src/htscharset.c
@@ -547,6 +547,22 @@ size_t hts_stringLengthUTF8(const char *s) {
return len;
}
+size_t hts_copyStringUTF8(char *dest, const char *src, size_t size) {
+ const unsigned char *const bytes = (const unsigned char *) src;
+ size_t i, mark;
+
+ for(i = 0, mark = 0; ( i == 0 || bytes[i + 1] != '\0' ) && i <= size; i++) {
+ const unsigned char c = bytes[i];
+
+ if (HTS_IS_LEADING_UTF8(c)) {
+ mark = i;
+ }
+ }
+ dest[mark] = '\0';
+
+ return mark;
+}
+
int hts_isCharsetUTF8(const char *charset) {
return charset != NULL
&& ( strcasecmp(charset, "utf-8") == 0
@@ -981,6 +997,8 @@ int hts_isStringIDNA(const char *s, size_t size) {
&& strncasecmp(&s[startSeg], "xn--", 4) == 0) {
return 1;
}
+ /* next segment start */
+ startSeg = i + 1;
}
}
return 0;
diff --git a/src/htscharset.h b/src/htscharset.h
index c992d23..0551e88 100644
--- a/src/htscharset.h
+++ b/src/htscharset.h
@@ -93,6 +93,16 @@ extern int hts_isCharsetUTF8(const char *charset);
**/
extern size_t hts_stringLengthUTF8(const char *s);
+/**
+ * Copy at most 'nBytes' bytes from src to dest, not truncating UTF-8
+ * sequences.
+ * Returns the number of bytes copied, not including the terminating \0.
+ **/
+extern size_t hts_copyStringUTF8(char *dest, const char *src,
+ size_t nBytes);
+
+/* WIN32 specific. */
+
#ifdef _WIN32
/**