summaryrefslogtreecommitdiff
path: root/src/htscore.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-06-01 09:32:38 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-06-01 09:32:38 +0000
commit7b5c1c5a8487fe9dfcd2799359a5395ccf797372 (patch)
treefc54b6a2abcd0dd018d351d6290f498d15bf0d4c /src/htscore.c
parent36f1288c807a40db2dc96faf2096ae37d63cbf09 (diff)
Do not magically detect UTF-8 pages as "utf-8" charset, because is changes the way links are decoded.
Diffstat (limited to 'src/htscore.c')
-rw-r--r--src/htscore.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/htscore.c b/src/htscore.c
index abf390a..e0293fa 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -1480,11 +1480,13 @@ int httpmirror(char *url1, httrackp * opt) {
free(charset);
}
/* Could not detect charset: could it be UTF-8 ? */
- if (page_charset[0] == '\0') {
- if (is_unicode_utf8(r.adr, r.size)) {
- strcpy(page_charset, "utf-8");
- }
- }
+ /* No, we can not do that: browsers do not do it
+ (and it would break links). */
+ //if (page_charset[0] == '\0') {
+ // if (is_unicode_utf8(r.adr, r.size)) {
+ // strcpy(page_charset, "utf-8");
+ // }
+ //}
/* Could not detect charset */
if (page_charset[0] == '\0') {
hts_log_print(opt, LOG_INFO,