From 7b5c1c5a8487fe9dfcd2799359a5395ccf797372 Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Sat, 1 Jun 2013 09:32:38 +0000 Subject: Do not magically detect UTF-8 pages as "utf-8" charset, because is changes the way links are decoded. --- src/htscore.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/htscore.c b/src/htscore.c index abf390a..e0293fa 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -1480,11 +1480,13 @@ int httpmirror(char *url1, httrackp * opt) { free(charset); } /* Could not detect charset: could it be UTF-8 ? */ - if (page_charset[0] == '\0') { - if (is_unicode_utf8(r.adr, r.size)) { - strcpy(page_charset, "utf-8"); - } - } + /* No, we can not do that: browsers do not do it + (and it would break links). */ + //if (page_charset[0] == '\0') { + // if (is_unicode_utf8(r.adr, r.size)) { + // strcpy(page_charset, "utf-8"); + // } + //} /* Could not detect charset */ if (page_charset[0] == '\0') { hts_log_print(opt, LOG_INFO, -- cgit v1.2.3