summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/htsname.c49
-rw-r--r--src/htsname.h6
-rw-r--r--src/htsparse.c68
3 files changed, 57 insertions, 66 deletions
diff --git a/src/htsname.c b/src/htsname.c
index 9f5d753..44fd6fd 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -112,27 +112,14 @@ static void cleanDoubleSlash(char *s) {
}
}
-// legacy version, without page charset
+// forme le nom du fichier à sauver (save) à partir de fil et adr
+// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
int url_savename(char *adr_complete, char *fil_complete, char *save,
char *former_adr, char *former_fil, char *referer_adr,
char *referer_fil, httrackp * opt, lien_url ** liens,
int lien_tot, struct_back * sback, cache_back * cache,
hash_struct * hash, int ptr, int numero_passe,
const lien_back * headers) {
- return url_savename2(adr_complete, fil_complete, save, former_adr, former_fil,
- referer_adr, referer_fil, opt, liens, lien_tot, sback,
- cache, hash, ptr, numero_passe, headers, /* unknown */
- NULL);
-}
-
-// forme le nom du fichier à sauver (save) à partir de fil et adr
-// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
-int url_savename2(char *adr_complete, char *fil_complete, char *save,
- char *former_adr, char *former_fil, char *referer_adr,
- char *referer_fil, httrackp * opt, lien_url ** liens,
- int lien_tot, struct_back * sback, cache_back * cache,
- hash_struct * hash, int ptr, int numero_passe,
- const lien_back * headers, const char *charset) {
char catbuff[CATBUFF_SIZE];
const char *mime_type = (headers
&& !HTTP_IS_REDIRECT(headers->r.
@@ -670,10 +657,10 @@ int url_savename2(char *adr_complete, char *fil_complete, char *save,
strcpybuff(fil_complete, curr_fil);
// copier adr, fil
- return url_savename2(curr_adr, curr_fil, save, NULL, NULL,
- referer_adr, referer_fil, opt, liens,
- lien_tot, sback, cache, hash, ptr,
- numero_passe, NULL, charset);
+ return url_savename(curr_adr, curr_fil, save, NULL, NULL,
+ referer_adr, referer_fil, opt, liens,
+ lien_tot, sback, cache, hash, ptr,
+ numero_passe, NULL);
}
// --- --- ---
@@ -1374,18 +1361,18 @@ int url_savename2(char *adr_complete, char *fil_complete, char *save,
/* ensure that there is no ../ (potential vulnerability) */
fil_simplifie(save);
- /* convert name to UTF-8 ? */
- if (charset != NULL && charset[0] != '\0') {
- char *const s = hts_convertStringToUTF8(save, (int) strlen(save), charset);
-
- if (s != NULL) {
- hts_log_print(opt, LOG_DEBUG,
- "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'",
- save, s, charset);
- strcpybuff(save, s);
- free(s);
- }
- }
+ /* convert name to UTF-8 ? Note: already done while parsing. */
+ //if (charset != NULL && charset[0] != '\0') {
+ // char *const s = hts_convertStringToUTF8(save, (int) strlen(save), charset);
+
+ // if (s != NULL) {
+ // hts_log_print(opt, LOG_DEBUG,
+ // "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'",
+ // save, s, charset);
+ // strcpybuff(save, s);
+ // free(s);
+ // }
+ //}
/* callback */
RUN_CALLBACK5(opt, savename, adr_complete, fil_complete, referer_adr,
diff --git a/src/htsname.h b/src/htsname.h
index 433c75e..f7949b7 100644
--- a/src/htsname.h
+++ b/src/htsname.h
@@ -89,12 +89,6 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
int lien_tot, struct_back * sback, cache_back * cache,
hash_struct * hash, int ptr, int numero_passe,
const lien_back * headers);
-int url_savename2(char *adr_complete, char *fil_complete, char *save,
- char *former_adr, char *former_fil, char *referer_adr,
- char *referer_fil, httrackp * opt, lien_url ** liens,
- int lien_tot, struct_back * sback, cache_back * cache,
- hash_struct * hash, int ptr, int numero_passe,
- const lien_back * headers, const char *charset);
void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete,
int short_ver);
void url_savename_addstr(char *d, char *s);
diff --git a/src/htsparse.c b/src/htsparse.c
index 1dd6e5f..fc3b6e1 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -2082,6 +2082,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
/* Unescape/escape %20 and other   */
{
+ const char *const charset = str->page_charset_;
+ const int hasCharset = charset != NULL
+ && *charset != '\0';
char BIGSTK query[HTS_URLMAXSIZE * 2];
char *a = strchr(lien, '?');
@@ -2094,13 +2097,24 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
unescape_amp(lien);
unescape_amp(query);
// décoder l'inutile (%2E par exemple) et coder espaces
- // Bad: strcpybuff(lien,unescape_http(lien));
- // Bad: strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
- /* Never unescape high-chars (we don't know the encoding!!) */
- strcpybuff(lien, unescape_http_unharm(catbuff, lien, 1)); /* note: '%' is still escaped */
+ // Unescape high-chars foir UTF-8 conversion
+ strcpybuff(lien, unescape_http_unharm(catbuff, lien, !hasCharset)); /* note: '%' is still escaped */
escape_remove_control(lien);
// ???? No! escape_spc_url(lien);
strcatbuff(lien, query); /* restore */
+
+ // Charset conversion for the URI filename
+ // (not for the query string!)
+ if (hasCharset) {
+ char *const s = hts_convertStringToUTF8(lien, (int) strlen(lien), charset);
+ if (s != NULL) {
+ hts_log_print(opt, LOG_DEBUG,
+ "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'",
+ lien, s, charset);
+ strcpybuff(lien, s);
+ free(s);
+ }
+ }
}
// convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance!
@@ -2495,10 +2509,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
strcpybuff(last_adr, adr); // ancienne adresse
//strcpybuff(last_fil,fil); // ancien chemin
r_sv =
- url_savename2(adr, fil, save, former_adr, former_fil,
- liens[ptr]->adr, liens[ptr]->fil, opt,
- liens, lien_tot, sback, cache, hash, ptr,
- numero_passe, NULL, str->page_charset_);
+ url_savename(adr, fil, save, former_adr, former_fil,
+ liens[ptr]->adr, liens[ptr]->fil, opt,
+ liens, lien_tot, sback, cache, hash, ptr,
+ numero_passe, NULL);
if (strcmp(jump_identification(last_adr), jump_identification(adr)) != 0) { // a changé
// 2e test si moved
@@ -3585,12 +3599,11 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
char BIGSTK mov_sav[HTS_URLMAXSIZE * 2];
// calculer lien et éventuellement modifier addresse/fichier
- if (url_savename2
+ if (url_savename
(mov_adr, mov_fil, mov_sav, NULL, NULL,
liens[liens[ptr]->precedent]->adr,
liens[liens[ptr]->precedent]->fil, opt, liens, lien_tot,
- sback, cache, hash, ptr, numero_passe, NULL,
- str->page_charset_) != -1) {
+ sback, cache, hash, ptr, numero_passe, NULL) != -1) {
if (hash_read(hash, mov_sav, "", 0, 0) < 0) { // n'existe pas déja
// enregistrer lien (MACRO) avec SAV IDENTIQUE
liens_record(mov_adr, mov_fil, liens[ptr]->sav, "", "");
@@ -4026,10 +4039,9 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
char BIGSTK add_sav[HTS_URLMAXSIZE * 2];
// calculer lien et éventuellement modifier addresse/fichier
- if (url_savename2
+ if (url_savename
(add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, liens,
- lien_tot, sback, cache, hash, ptr, numero_passe, NULL,
- str->page_charset_) != -1) {
+ lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) {
if (hash_read(hash, add_sav, "", 0, 0) < 0) { // n'existe pas déja
// enregistrer lien (MACRO)
liens_record(add_adr, add_fil, add_sav, "", "");
@@ -4552,9 +4564,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename with MIME type */
save[0] = '\0';
- url_savename2(adr, fil, save, former_adr, former_fil, liens[ptr]->adr,
- liens[ptr]->fil, opt, liens, lien_tot, sback, cache,
- hash, ptr, numero_passe, &back, str->page_charset_);
+ url_savename(adr, fil, save, former_adr, former_fil, liens[ptr]->adr,
+ liens[ptr]->fil, opt, liens, lien_tot, sback, cache,
+ hash, ptr, numero_passe, &back);
/* Recompute authorization with MIME type */
{
@@ -4625,10 +4637,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename with MIME type */
save[0] = '\0';
- url_savename2(adr, fil, save, former_adr, former_fil, liens[ptr]->adr,
- liens[ptr]->fil, opt, liens, lien_tot, sback, cache,
- hash, ptr, numero_passe, &delayed_back,
- str->page_charset_);
+ url_savename(adr, fil, save, former_adr, former_fil, liens[ptr]->adr,
+ liens[ptr]->fil, opt, liens, lien_tot, sback, cache,
+ hash, ptr, numero_passe, &delayed_back);
/* Recompute authorization with MIME type */
{
@@ -4814,10 +4825,10 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename for hash lookup */
save[0] = '\0';
- url_savename2(adr, fil, save, former_adr, former_fil,
- liens[ptr]->adr, liens[ptr]->fil, opt, liens,
- lien_tot, sback, cache, hash, ptr, numero_passe,
- &delayed_back, str->page_charset_);
+ url_savename(adr, fil, save, former_adr, former_fil,
+ liens[ptr]->adr, liens[ptr]->fil, opt, liens,
+ lien_tot, sback, cache, hash, ptr, numero_passe,
+ &delayed_back);
} else {
hts_log_print(opt, LOG_WARNING,
"Unable to test %s%s (loop to same filename)",
@@ -4833,10 +4844,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
if (!continue_loop) {
/* Recompute filename with MIME type */
save[0] = '\0';
- url_savename2(adr, fil, save, former_adr, former_fil,
- liens[ptr]->adr, liens[ptr]->fil, opt, liens, lien_tot,
- sback, cache, hash, ptr, numero_passe, &delayed_back,
- str->page_charset_);
+ url_savename(adr, fil, save, former_adr, former_fil,
+ liens[ptr]->adr, liens[ptr]->fil, opt, liens, lien_tot,
+ sback, cache, hash, ptr, numero_passe, &delayed_back);
/* Recompute authorization with MIME type */
{