summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/htscharset.c18
-rw-r--r--src/htscharset.h10
2 files changed, 28 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c
index 54465ae..1e46562 100644
--- a/src/htscharset.c
+++ b/src/htscharset.c
@@ -547,6 +547,22 @@ size_t hts_stringLengthUTF8(const char *s) {
return len;
}
+size_t hts_copyStringUTF8(char *dest, const char *src, size_t size) {
+ const unsigned char *const bytes = (const unsigned char *) src;
+ size_t i, mark;
+
+ for(i = 0, mark = 0; ( i == 0 || bytes[i + 1] != '\0' ) && i <= size; i++) {
+ const unsigned char c = bytes[i];
+
+ if (HTS_IS_LEADING_UTF8(c)) {
+ mark = i;
+ }
+ }
+ dest[mark] = '\0';
+
+ return mark;
+}
+
int hts_isCharsetUTF8(const char *charset) {
return charset != NULL
&& ( strcasecmp(charset, "utf-8") == 0
@@ -981,6 +997,8 @@ int hts_isStringIDNA(const char *s, size_t size) {
&& strncasecmp(&s[startSeg], "xn--", 4) == 0) {
return 1;
}
+ /* next segment start */
+ startSeg = i + 1;
}
}
return 0;
diff --git a/src/htscharset.h b/src/htscharset.h
index c992d23..0551e88 100644
--- a/src/htscharset.h
+++ b/src/htscharset.h
@@ -93,6 +93,16 @@ extern int hts_isCharsetUTF8(const char *charset);
**/
extern size_t hts_stringLengthUTF8(const char *s);
+/**
+ * Copy at most 'nBytes' bytes from src to dest, not truncating UTF-8
+ * sequences.
+ * Returns the number of bytes copied, not including the terminating \0.
+ **/
+extern size_t hts_copyStringUTF8(char *dest, const char *src,
+ size_t nBytes);
+
+/* WIN32 specific. */
+
#ifdef _WIN32
/**