summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/htscharset.c16
-rw-r--r--src/htscharset.h5
2 files changed, 21 insertions, 0 deletions
diff --git a/src/htscharset.c b/src/htscharset.c
index a65e299..54465ae 100644
--- a/src/htscharset.c
+++ b/src/htscharset.c
@@ -970,6 +970,22 @@ char *hts_convertStringUTF8ToIDNA(const char *s, size_t size) {
return dest;
}
+int hts_isStringIDNA(const char *s, size_t size) {
+ size_t i, startSeg;
+ for(i = startSeg = 0 ; i <= size ; i++) {
+ const unsigned char c = i < size ? s[i] : 0;
+ if (c == 0 || c == '.' || c == ':' || c == '/' || c == '?') {
+ const size_t segSize = i - startSeg;
+ /* IDNA segment ? */
+ if (segSize > 4
+ && strncasecmp(&s[startSeg], "xn--", 4) == 0) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
char *hts_convertStringIDNAToUTF8(const char *s, size_t size) {
char *dest = NULL;
size_t capa = 0, destSize = 0;
diff --git a/src/htscharset.h b/src/htscharset.h
index 5851ba3..c992d23 100644
--- a/src/htscharset.h
+++ b/src/htscharset.h
@@ -69,6 +69,11 @@ extern char *hts_convertStringUTF8ToIDNA(const char *s, size_t size);
extern char *hts_convertStringIDNAToUTF8(const char *s, size_t size);
/**
+ * Has the given string any IDNA segments ?
+ **/
+extern int hts_isStringIDNA(const char *s, size_t size);
+
+/**
* Extract the charset from the HTML buffer "html"
**/
extern char *hts_getCharsetFromMeta(const char *html, size_t size);