diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-31 11:38:53 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-31 11:38:53 +0000 |
commit | bc31ec0da9573d482de24f27241482f50e46e60c (patch) | |
tree | e5e80dd055b2e4790802728d4e3b4b5b8c361277 /src/htscoremain.c | |
parent | 8767fd0e750b70a121d95e3ecf7e59bcec499d95 (diff) |
Fixed issue 14 (http://code.google.com/p/httrack/issues/detail?id=14)
Rationale:
* hostname is ASCII, non-ascii characters shall be encoded with IDNA
* URI filenames may embed non-ascii characters, which MUST be UTF-8 encoded
* query string may embed non-ascii characters, which are encoded with the pahe charset into %xx codes
Diffstat (limited to 'src/htscoremain.c')
-rw-r--r-- | src/htscoremain.c | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/src/htscoremain.c b/src/htscoremain.c index 534c469..c2ff520 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -2342,10 +2342,13 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) { htsmain_free(); return 0; break; - case '6': // entities: httrack -#6 + case '6': // entities: httrack -#6 "&foo;" ["encoding"] if (++na < argc) { char *const s = strdup(argv[na]); - if (s != NULL && hts_unescape_entities(s, s, strlen(s)) == 0) { + const char *const enc = na + 1 < argc ? argv[na + 1] : "UTF-8"; + if (s != NULL + && hts_unescapeEntitiesWithCharset(s, s, strlen(s), + enc) == 0) { printf("%s\n", s); free(s); } else { |