diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-19 10:07:45 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-19 10:07:45 +0000 |
commit | 003cb47b95eb7676ca27791c97de7d884e83d058 (patch) | |
tree | 9ebb74dbf56c2efa3d1400a10f1922edcc64ab9f /src/htstools.c | |
parent | bb90afc68247b6158abd62430ae7ad4102683c47 (diff) |
Added support for IDNA / RFC 3492 (Punycode) handling within URLs.
Diffstat (limited to 'src/htstools.c')
-rw-r--r-- | src/htstools.c | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/htstools.c b/src/htstools.c index 885bdc8..3a5ca70 100644 --- a/src/htstools.c +++ b/src/htstools.c @@ -40,6 +40,7 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" #include "htstools.h" #include "htsstrings.h" +#include "htscharset.h" #ifdef _WIN32 #include "windows.h" #else @@ -276,6 +277,21 @@ int ident_url_relatif(const char *lien, const char *origin_adr, } } + // IDNA / RFC 3492 (Punycode) handling for HTTP(s) + if (!link_has_authority(adr) || strfield(adr, "https:")) { + char *const a = jump_identification(adr); + // Non-ASCII characters (theorically forbidden, but browsers are lenient) + if (!hts_isStringAscii(a, strlen(a))) { + char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a)); + if (idna != NULL) { + if (strlen(idna) < HTS_URLMAXSIZE) { + strcpybuff(a, idna); + } + free(idna); + } + } + } + return ok; } |