diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/htsname.c | 49 | ||||
-rw-r--r-- | src/htsname.h | 6 | ||||
-rw-r--r-- | src/htsparse.c | 68 |
3 files changed, 57 insertions, 66 deletions
diff --git a/src/htsname.c b/src/htsname.c index 9f5d753..44fd6fd 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -112,27 +112,14 @@ static void cleanDoubleSlash(char *s) { } } -// legacy version, without page charset +// forme le nom du fichier à sauver (save) à partir de fil et adr +// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html) int url_savename(char *adr_complete, char *fil_complete, char *save, char *former_adr, char *former_fil, char *referer_adr, char *referer_fil, httrackp * opt, lien_url ** liens, int lien_tot, struct_back * sback, cache_back * cache, hash_struct * hash, int ptr, int numero_passe, const lien_back * headers) { - return url_savename2(adr_complete, fil_complete, save, former_adr, former_fil, - referer_adr, referer_fil, opt, liens, lien_tot, sback, - cache, hash, ptr, numero_passe, headers, /* unknown */ - NULL); -} - -// forme le nom du fichier à sauver (save) à partir de fil et adr -// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html) -int url_savename2(char *adr_complete, char *fil_complete, char *save, - char *former_adr, char *former_fil, char *referer_adr, - char *referer_fil, httrackp * opt, lien_url ** liens, - int lien_tot, struct_back * sback, cache_back * cache, - hash_struct * hash, int ptr, int numero_passe, - const lien_back * headers, const char *charset) { char catbuff[CATBUFF_SIZE]; const char *mime_type = (headers && !HTTP_IS_REDIRECT(headers->r. @@ -670,10 +657,10 @@ int url_savename2(char *adr_complete, char *fil_complete, char *save, strcpybuff(fil_complete, curr_fil); // copier adr, fil - return url_savename2(curr_adr, curr_fil, save, NULL, NULL, - referer_adr, referer_fil, opt, liens, - lien_tot, sback, cache, hash, ptr, - numero_passe, NULL, charset); + return url_savename(curr_adr, curr_fil, save, NULL, NULL, + referer_adr, referer_fil, opt, liens, + lien_tot, sback, cache, hash, ptr, + numero_passe, NULL); } // --- --- --- @@ -1374,18 +1361,18 @@ int url_savename2(char *adr_complete, char *fil_complete, char *save, /* ensure that there is no ../ (potential vulnerability) */ fil_simplifie(save); - /* convert name to UTF-8 ? */ - if (charset != NULL && charset[0] != '\0') { - char *const s = hts_convertStringToUTF8(save, (int) strlen(save), charset); - - if (s != NULL) { - hts_log_print(opt, LOG_DEBUG, - "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'", - save, s, charset); - strcpybuff(save, s); - free(s); - } - } + /* convert name to UTF-8 ? Note: already done while parsing. */ + //if (charset != NULL && charset[0] != '\0') { + // char *const s = hts_convertStringToUTF8(save, (int) strlen(save), charset); + + // if (s != NULL) { + // hts_log_print(opt, LOG_DEBUG, + // "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'", + // save, s, charset); + // strcpybuff(save, s); + // free(s); + // } + //} /* callback */ RUN_CALLBACK5(opt, savename, adr_complete, fil_complete, referer_adr, diff --git a/src/htsname.h b/src/htsname.h index 433c75e..f7949b7 100644 --- a/src/htsname.h +++ b/src/htsname.h @@ -89,12 +89,6 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, int lien_tot, struct_back * sback, cache_back * cache, hash_struct * hash, int ptr, int numero_passe, const lien_back * headers); -int url_savename2(char *adr_complete, char *fil_complete, char *save, - char *former_adr, char *former_fil, char *referer_adr, - char *referer_fil, httrackp * opt, lien_url ** liens, - int lien_tot, struct_back * sback, cache_back * cache, - hash_struct * hash, int ptr, int numero_passe, - const lien_back * headers, const char *charset); void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete, int short_ver); void url_savename_addstr(char *d, char *s); diff --git a/src/htsparse.c b/src/htsparse.c index 1dd6e5f..fc3b6e1 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -2082,6 +2082,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* Unescape/escape %20 and other */ { + const char *const charset = str->page_charset_; + const int hasCharset = charset != NULL + && *charset != '\0'; char BIGSTK query[HTS_URLMAXSIZE * 2]; char *a = strchr(lien, '?'); @@ -2094,13 +2097,24 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { unescape_amp(lien); unescape_amp(query); // décoder l'inutile (%2E par exemple) et coder espaces - // Bad: strcpybuff(lien,unescape_http(lien)); - // Bad: strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1)); - /* Never unescape high-chars (we don't know the encoding!!) */ - strcpybuff(lien, unescape_http_unharm(catbuff, lien, 1)); /* note: '%' is still escaped */ + // Unescape high-chars foir UTF-8 conversion + strcpybuff(lien, unescape_http_unharm(catbuff, lien, !hasCharset)); /* note: '%' is still escaped */ escape_remove_control(lien); // ???? No! escape_spc_url(lien); strcatbuff(lien, query); /* restore */ + + // Charset conversion for the URI filename + // (not for the query string!) + if (hasCharset) { + char *const s = hts_convertStringToUTF8(lien, (int) strlen(lien), charset); + if (s != NULL) { + hts_log_print(opt, LOG_DEBUG, + "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'", + lien, s, charset); + strcpybuff(lien, s); + free(s); + } + } } // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance! @@ -2495,10 +2509,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { strcpybuff(last_adr, adr); // ancienne adresse //strcpybuff(last_fil,fil); // ancien chemin r_sv = - url_savename2(adr, fil, save, former_adr, former_fil, - liens[ptr]->adr, liens[ptr]->fil, opt, - liens, lien_tot, sback, cache, hash, ptr, - numero_passe, NULL, str->page_charset_); + url_savename(adr, fil, save, former_adr, former_fil, + liens[ptr]->adr, liens[ptr]->fil, opt, + liens, lien_tot, sback, cache, hash, ptr, + numero_passe, NULL); if (strcmp(jump_identification(last_adr), jump_identification(adr)) != 0) { // a changé // 2e test si moved @@ -3585,12 +3599,11 @@ int hts_mirror_check_moved(htsmoduleStruct * str, char BIGSTK mov_sav[HTS_URLMAXSIZE * 2]; // calculer lien et éventuellement modifier addresse/fichier - if (url_savename2 + if (url_savename (mov_adr, mov_fil, mov_sav, NULL, NULL, liens[liens[ptr]->precedent]->adr, liens[liens[ptr]->precedent]->fil, opt, liens, lien_tot, - sback, cache, hash, ptr, numero_passe, NULL, - str->page_charset_) != -1) { + sback, cache, hash, ptr, numero_passe, NULL) != -1) { if (hash_read(hash, mov_sav, "", 0, 0) < 0) { // n'existe pas déja // enregistrer lien (MACRO) avec SAV IDENTIQUE liens_record(mov_adr, mov_fil, liens[ptr]->sav, "", ""); @@ -4026,10 +4039,9 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, char BIGSTK add_sav[HTS_URLMAXSIZE * 2]; // calculer lien et éventuellement modifier addresse/fichier - if (url_savename2 + if (url_savename (add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, liens, - lien_tot, sback, cache, hash, ptr, numero_passe, NULL, - str->page_charset_) != -1) { + lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) { if (hash_read(hash, add_sav, "", 0, 0) < 0) { // n'existe pas déja // enregistrer lien (MACRO) liens_record(add_adr, add_fil, add_sav, "", ""); @@ -4552,9 +4564,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Recompute filename with MIME type */ save[0] = '\0'; - url_savename2(adr, fil, save, former_adr, former_fil, liens[ptr]->adr, - liens[ptr]->fil, opt, liens, lien_tot, sback, cache, - hash, ptr, numero_passe, &back, str->page_charset_); + url_savename(adr, fil, save, former_adr, former_fil, liens[ptr]->adr, + liens[ptr]->fil, opt, liens, lien_tot, sback, cache, + hash, ptr, numero_passe, &back); /* Recompute authorization with MIME type */ { @@ -4625,10 +4637,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Recompute filename with MIME type */ save[0] = '\0'; - url_savename2(adr, fil, save, former_adr, former_fil, liens[ptr]->adr, - liens[ptr]->fil, opt, liens, lien_tot, sback, cache, - hash, ptr, numero_passe, &delayed_back, - str->page_charset_); + url_savename(adr, fil, save, former_adr, former_fil, liens[ptr]->adr, + liens[ptr]->fil, opt, liens, lien_tot, sback, cache, + hash, ptr, numero_passe, &delayed_back); /* Recompute authorization with MIME type */ { @@ -4814,10 +4825,10 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Recompute filename for hash lookup */ save[0] = '\0'; - url_savename2(adr, fil, save, former_adr, former_fil, - liens[ptr]->adr, liens[ptr]->fil, opt, liens, - lien_tot, sback, cache, hash, ptr, numero_passe, - &delayed_back, str->page_charset_); + url_savename(adr, fil, save, former_adr, former_fil, + liens[ptr]->adr, liens[ptr]->fil, opt, liens, + lien_tot, sback, cache, hash, ptr, numero_passe, + &delayed_back); } else { hts_log_print(opt, LOG_WARNING, "Unable to test %s%s (loop to same filename)", @@ -4833,10 +4844,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, if (!continue_loop) { /* Recompute filename with MIME type */ save[0] = '\0'; - url_savename2(adr, fil, save, former_adr, former_fil, - liens[ptr]->adr, liens[ptr]->fil, opt, liens, lien_tot, - sback, cache, hash, ptr, numero_passe, &delayed_back, - str->page_charset_); + url_savename(adr, fil, save, former_adr, former_fil, + liens[ptr]->adr, liens[ptr]->fil, opt, liens, lien_tot, + sback, cache, hash, ptr, numero_passe, &delayed_back); /* Recompute authorization with MIME type */ { |