From bc31ec0da9573d482de24f27241482f50e46e60c Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Fri, 31 May 2013 11:38:53 +0000 Subject: Fixed issue 14 (http://code.google.com/p/httrack/issues/detail?id=14) Rationale: * hostname is ASCII, non-ascii characters shall be encoded with IDNA * URI filenames may embed non-ascii characters, which MUST be UTF-8 encoded * query string may embed non-ascii characters, which are encoded with the pahe charset into %xx codes --- src/htscoremain.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/htscoremain.c') diff --git a/src/htscoremain.c b/src/htscoremain.c index 534c469..c2ff520 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -2342,10 +2342,13 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) { htsmain_free(); return 0; break; - case '6': // entities: httrack -#6 + case '6': // entities: httrack -#6 "&foo;" ["encoding"] if (++na < argc) { char *const s = strdup(argv[na]); - if (s != NULL && hts_unescape_entities(s, s, strlen(s)) == 0) { + const char *const enc = na + 1 < argc ? argv[na + 1] : "UTF-8"; + if (s != NULL + && hts_unescapeEntitiesWithCharset(s, s, strlen(s), + enc) == 0) { printf("%s\n", s); free(s); } else { -- cgit v1.2.3