diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2014-05-29 15:42:53 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2014-05-29 15:42:53 +0000 |
commit | 4f5776a8fac840619f01a61a3655a4608e04d9fd (patch) | |
tree | b9bb0b6ce0eb39118ed5646f66687a616890248a | |
parent | 8adeadbdb63e1cb8e4d62fe400ffefada9122d86 (diff) |
Big cleanup: introducing cleaner lien_adrfilsave and lien_adrfil structures holding address/uri or address/uri/filename rather than passing opaque char* of unknown size.
-rw-r--r-- | src/htscore.c | 44 | ||||
-rw-r--r-- | src/htscore.h | 21 | ||||
-rw-r--r-- | src/htscoremain.c | 39 | ||||
-rw-r--r-- | src/htslib.c | 94 | ||||
-rw-r--r-- | src/htslib.h | 11 | ||||
-rw-r--r-- | src/htsname.c | 332 | ||||
-rw-r--r-- | src/htsname.h | 23 | ||||
-rw-r--r-- | src/htsparse.c | 812 | ||||
-rw-r--r-- | src/htsparse.h | 6 | ||||
-rw-r--r-- | src/htstools.c | 57 | ||||
-rw-r--r-- | src/htstools.h | 13 |
11 files changed, 730 insertions, 722 deletions
diff --git a/src/htscore.c b/src/htscore.c index c20c2a0..d2d396d 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -998,6 +998,10 @@ int httpmirror(char *url1, httrackp * opt) { } ptr++; } + // We're done! + if (ptr == opt->lien_tot) { + goto jump_if_done; + } } if (heap(ptr) != NULL) { // on a qq chose à récupérer? @@ -3716,8 +3720,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) { if (link != NULL && str != NULL && link[0] != '\0') { ENGINE_LOAD_CONTEXT_BASE(); /* */ - char BIGSTK adr[HTS_URLMAXSIZE * 2], fil[HTS_URLMAXSIZE * 2], - save[HTS_URLMAXSIZE * 2]; + lien_adrfilsave afs; char BIGSTK codebase[HTS_URLMAXSIZE * 2]; /* */ @@ -3788,13 +3791,13 @@ int htsAddLink(htsmoduleStruct * str, char *link) { if (strnotempty(lien) && strlen(lien) < HTS_URLMAXSIZE) { // calculer les chemins et noms de sauvegarde - if (ident_url_relatif(lien, urladr(), codebase, adr, fil) >= 0) { // reformage selon chemin + if (ident_url_relatif(lien, urladr(), codebase, &afs.af) >= 0) { // reformage selon chemin int r; int set_prio_to = 0; int just_test_it = 0; forbidden_url = - hts_acceptlink(opt, ptr, adr, fil, NULL, NULL, &set_prio_to, &just_test_it); + hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil, NULL, NULL, &set_prio_to, &just_test_it); hts_log_print(opt, LOG_DEBUG, "result for wizard external module link: %d", forbidden_url); @@ -3811,29 +3814,26 @@ int htsAddLink(htsmoduleStruct * str, char *link) { opt->savename_83 = 0; // note: adr,fil peuvent être patchés r = - url_savename(adr, fil, save, NULL, NULL, NULL, NULL, opt, opt->liens, - opt->lien_tot, sback, cache, hashptr, ptr, numero_passe, + url_savename(&afs, NULL, NULL, NULL, opt, sback, cache, hashptr, ptr, numero_passe, NULL); // resolve unresolved type - if (r != -1 && forbidden_url == 0 && IS_DELAYED_EXT(save) + if (r != -1 && forbidden_url == 0 && IS_DELAYED_EXT(afs.save) ) { // pas d'erreur, on continue - char BIGSTK former_adr[HTS_URLMAXSIZE * 2]; - char BIGSTK former_fil[HTS_URLMAXSIZE * 2]; + lien_adrfil former; - former_adr[0] = former_fil[0] = '\0'; + former.adr[0] = former.fil[0] = '\0'; r = - hts_wait_delayed(str, adr, fil, save, NULL, NULL, former_adr, - former_fil, &forbidden_url); + hts_wait_delayed(str, &afs, NULL, NULL, &former, &forbidden_url); } // end resolve unresolved type opt->savename_type = a; opt->savename_83 = b; if (r != -1 && !forbidden_url) { if (savename()) { - if (lienrelatif(tempo, save, savename()) == 0) { + if (lienrelatif(tempo, afs.save, savename()) == 0) { hts_log_print(opt, LOG_DEBUG, "(module): relative link at %s build with %s and %s: %s", - adr, save, savename(), tempo); + afs.af.adr, afs.save, savename(), tempo); if (str->localLink && str->localLinkSize > (int) strlen(tempo) + 1) { strcpybuff(str->localLink, tempo); @@ -3847,19 +3847,19 @@ int htsAddLink(htsmoduleStruct * str, char *link) { hts_log_print(opt, LOG_DEBUG, "(module): file not caught: %s", lien); if (str->localLink - && str->localLinkSize > (int) (strlen(adr) + strlen(fil) + 8)) { + && str->localLinkSize > (int) (strlen(afs.af.adr) + strlen(afs.af.fil) + 8)) { str->localLink[0] = '\0'; - if (!link_has_authority(adr)) + if (!link_has_authority(afs.af.adr)) strcpybuff(str->localLink, "http://"); - strcatbuff(str->localLink, adr); - strcatbuff(str->localLink, fil); + strcatbuff(str->localLink, afs.af.adr); + strcatbuff(str->localLink, afs.af.fil); } r = -1; } // if (r != -1) { - hts_log_print(opt, LOG_DEBUG, "(module): %s%s -> %s (base %s)", adr, - fil, save, codebase); + hts_log_print(opt, LOG_DEBUG, "(module): %s%s -> %s (base %s)", afs.af.adr, + afs.af.fil, afs.save, codebase); // modifié par rapport à l'autre version (cf prio_fix notamment et save2) @@ -3869,7 +3869,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) { // // On part de la fin et on essaye de se presser (économise temps machine) { - int i = hash_read(hashptr, save, NULL, HASH_STRUCT_FILENAME ); // lecture type 0 (sav) + int i = hash_read(hashptr, afs.save, NULL, HASH_STRUCT_FILENAME ); // lecture type 0 (sav) if (i >= 0) { heap(i)->depth = maximum(heap(i)->depth, prio_fix); @@ -3882,7 +3882,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) { // >>>> CREER LE LIEN JAVA <<<< // enregistrer fichier (MACRO) - if (!hts_record_link(opt, adr, fil, save, "", "", "")) { // erreur, pas de place réservée + if (!hts_record_link(opt, afs.af.adr, afs.af.fil, afs.save, "", "", "")) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n", __LINE__); hts_log_print(opt, LOG_PANIC, "Not enough memory"); opt->state.exit_xh = -1; /* fatal error -> exit */ diff --git a/src/htscore.h b/src/htscore.h index 35593d1..774b1d7 100644 --- a/src/htscore.h +++ b/src/htscore.h @@ -152,6 +152,26 @@ struct lien_url { int testmode; // mode test uniquement, envoyer juste un head! }; +// adr, fil +#ifndef HTS_DEF_FWSTRUCT_lien_adrfil +#define HTS_DEF_FWSTRUCT_lien_adrfil +typedef struct lien_adrfil lien_adrfil; +#endif +struct lien_adrfil { + char adr[HTS_URLMAXSIZE * 2]; // adresse + char fil[HTS_URLMAXSIZE * 2]; // nom du fichier distant +}; + +// adr, fil, save +#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave +#define HTS_DEF_FWSTRUCT_lien_adrfilsave +typedef struct lien_adrfilsave lien_adrfilsave; +#endif +struct lien_adrfilsave { + lien_adrfil af; + char save[HTS_URLMAXSIZE * 2]; // nom à sauver sur disque (avec chemin éventuel) +}; + // chargement de fichiers en 'arrière plan' #ifndef HTS_DEF_FWSTRUCT_lien_back #define HTS_DEF_FWSTRUCT_lien_back @@ -398,7 +418,6 @@ int htsAddLink(htsmoduleStruct * str, char *link); // Void void voidf(void); - #define HTS_TOPINDEX "TOP_INDEX_HTTRACK" #endif diff --git a/src/htscoremain.c b/src/htscoremain.c index 1ca82e8..e5af672 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -2007,8 +2007,7 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) { cache.hashtable = (void *) cache_hashtable; /* copy backcache hash */ cache.ro = 1; /* read only */ if (cache.hashtable) { - char BIGSTK adr[HTS_URLMAXSIZE * 2]; - char BIGSTK fil[HTS_URLMAXSIZE * 2]; + lien_adrfilsave afs; char BIGSTK url[HTS_URLMAXSIZE * 2]; char linepos[256]; int pos; @@ -2031,52 +2030,50 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) { /* */ a++; /* read "host/file" */ - a += binput(a, adr, HTS_URLMAXSIZE); - a += binput(a, fil, HTS_URLMAXSIZE); + a += binput(a, afs.af.adr, HTS_URLMAXSIZE); + a += binput(a, afs.af.fil, HTS_URLMAXSIZE); url[0] = '\0'; - if (!link_has_authority(adr)) + if (!link_has_authority(afs.af.adr)) strcatbuff(url, "http://"); - strcatbuff(url, adr); - strcatbuff(url, fil); + strcatbuff(url, afs.af.adr); + strcatbuff(url, afs.af.fil); /* read position */ a += binput(a, linepos, 200); sscanf(linepos, "%d", &pos); if (!hasFilter || (strjoker(url, filter, NULL, NULL) != NULL) ) { - r = cache_read_ro(opt, &cache, adr, fil, "", NULL); // lire entrée cache + data + r = cache_read_ro(opt, &cache, afs.af.adr, afs.af.fil, "", NULL); // lire entrée cache + data if (r.statuscode != -1) { // No errors found++; if (!hasFilter) { fprintf(stdout, "%s%s%s\r\n", - (link_has_authority(adr)) ? "" : - "http://", adr, fil); + (link_has_authority(afs.af.adr)) ? "" : + "http://", afs.af.adr, afs.af.fil); } else { char msg[256], cdate[256]; - char BIGSTK sav[HTS_URLMAXSIZE * 2]; infostatuscode(msg, r.statuscode); time_gmt_rfc822(cdate); fprintf(stdout, "HTTP/1.1 %d %s\r\n", r.statuscode, r.msg[0] ? r.msg : msg); - fprintf(stdout, "X-Host: %s\r\n", adr); - fprintf(stdout, "X-File: %s\r\n", fil); + fprintf(stdout, "X-Host: %s\r\n", afs.af.adr); + fprintf(stdout, "X-File: %s\r\n", afs.af.fil); fprintf(stdout, "X-URL: %s%s%s\r\n", - (link_has_authority(adr)) ? "" : - "http://", adr, fil); + (link_has_authority(afs.af.adr)) ? "" : + "http://", afs.af.adr, afs.af.fil); if (url_savename - (adr, fil, sav, /*former_adr */ NULL, - /*former_fil */ NULL, /*referer_adr */ + (&afs, /*former */ NULL, + /*referer_adr */ NULL, /*referer_fil */ NULL, - /*opt */ opt, /*liens */ NULL, - /*lien_tot */ 0, /*sback */ NULL, + /*opt */ opt, /*sback */ NULL, /*cache */ &cache, /*hash */ NULL, /*ptr */ 0, /*numero_passe */ 0, /*mime_type */ NULL) != -1) { - if (fexist(sav)) { + if (fexist(afs.save)) { fprintf(stdout, "Content-location: %s\r\n", - sav); + afs.save); } } fprintf(stdout, "Date: %s\r\n", cdate); diff --git a/src/htslib.c b/src/htslib.c index c058733..fe2863e 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -1984,36 +1984,6 @@ LLint http_xfread1(htsblk * r, int bufl) { } } -// teste une adresse, et suit l'éventuel chemin "moved" -// retourne 200 ou le code d'erreur (404=NOT FOUND, etc) -// copie dans loc la véritable adresse si celle-ci est différente -htsblk http_location(httrackp * opt, char *adr, char *fil, char *loc) { - htsblk retour; - int retry = 0; - int tryagain; - - // note: "RFC says" - // 5 boucles au plus, on en teste au plus 8 ici - // sinon abandon.. - do { - tryagain = 0; - switch ((retour = http_test(opt, adr, fil, loc)).statuscode) { - case HTTP_OK: - break; // ok! - case HTTP_MOVED_PERMANENTLY: - case HTTP_FOUND: - case HTTP_SEE_OTHER: - case HTTP_TEMPORARY_REDIRECT: // moved! - // recalculer adr et fil! - if (ident_url_absolute(loc, adr, fil) != -1) { - tryagain = 1; // retenter - retry++; // ..encore une fois - } - } - } while((tryagain) && (retry < 5 + 3)); - return retour; -} - // teste si une URL (validité, header, taille) // retourne 200 ou le code d'erreur (404=NOT FOUND, etc) // en cas de moved xx, dans location @@ -2340,12 +2310,12 @@ T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port, // couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html // retour=-1 si erreur. // si file://... alors adresse=file:// (et coupe le ?query dans ce cas) -int ident_url_absolute(const char *url, char *adr, char *fil) { +int ident_url_absolute(const char *url, lien_adrfil *adrfil) { int pos = 0; int scheme = 0; - // effacer adr et fil - adr[0] = fil[0] = '\0'; + // effacer adrfil->adr et adrfil->fil + adrfil->adr[0] = adrfil->fil[0] = '\0'; #if HDEBUG printf("protocol: %s\n", url); @@ -2364,15 +2334,15 @@ int ident_url_absolute(const char *url, char *adr, char *fil) { // 1. optional scheme ":" if ((pos = strfield(url, "file:"))) { // fichier local!! (pour les tests) //!!p+=3; - strcpybuff(adr, "file://"); + strcpybuff(adrfil->adr, "file://"); } else if ((pos = strfield(url, "http:"))) { // HTTP //!!p+=3; } else if ((pos = strfield(url, "ftp:"))) { // FTP - strcpybuff(adr, "ftp://"); // FTP!! + strcpybuff(adrfil->adr, "ftp://"); // FTP!! //!!p+=3; #if HTS_USEOPENSSL } else if ((pos = strfield(url, "https:"))) { // HTTPS - strcpybuff(adr, "https://"); + strcpybuff(adrfil->adr, "https://"); #endif } else if (scheme) { return -1; // erreur non reconnu @@ -2385,13 +2355,13 @@ int ident_url_absolute(const char *url, char *adr, char *fil) { // (url+pos) now points to the path (not net path) - //## if (adr[0]!=lOCAL_CHAR) { // adresse normale http - if (!strfield(adr, "file:")) { // PAS file:// + //## if (adrfil->adr[0]!=lOCAL_CHAR) { // adrfil->adresse normale http + if (!strfield(adrfil->adr, "file:")) { // PAS adrfil->file:// const char *p, *q; p = url + pos; - // p pointe sur le début de l'adresse, ex: www.truc.fr/sommaire/index.html + // p pointe sur le début de l'adrfil->adresse, ex: www.truc.fr/sommaire/index.html q = strchr(jump_identification(p), '/'); if (q == 0) q = strchr(jump_identification(p), '?'); // http://www.foo.com?bar=1 @@ -2404,53 +2374,53 @@ int ident_url_absolute(const char *url, char *adr, char *fil) { //strcpybuff(retour.msg,"Path too long"); return -1; // erreur } - // recopier adresse www.. - strncatbuff(adr, p, ((int) (q - p))); - // *( adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette! + // recopier adrfil->adresse www.. + strncatbuff(adrfil->adr, p, ((int) (q - p))); + // *( adrfil->adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette! // recopier chemin /pub/.. if (q[0] != '/') // page par défaut (/) - strcatbuff(fil, "/"); - strcatbuff(fil, q); + strcatbuff(adrfil->fil, "/"); + strcatbuff(adrfil->fil, q); // SECURITE: // simplifier url pour les ../ - fil_simplifie(fil); - } else { // localhost file:// + fil_simplifie(adrfil->fil); + } else { // localhost adrfil->file:// const char *p; size_t i; char *a; p = url + pos; - if (*p == '/' || *p == '\\') { /* file:///.. */ - strcatbuff(fil, p); // fichier local ; adr="#" + if (*p == '/' || *p == '\\') { /* adrfil->file:///.. */ + strcatbuff(adrfil->fil, p); // fichier local ; adrfil->adr="#" } else { if (p[1] != ':') { - strcatbuff(fil, "//"); /* file://server/foo */ - strcatbuff(fil, p); + strcatbuff(adrfil->fil, "//"); /* adrfil->file://server/foo */ + strcatbuff(adrfil->fil, p); } else { - strcatbuff(fil, p); // file://C:\.. + strcatbuff(adrfil->fil, p); // adrfil->file://C:\.. } } - a = strchr(fil, '?'); + a = strchr(adrfil->fil, '?'); if (a) - *a = '\0'; /* couper query (inutile pour file:// lors de la requête) */ - // filtrer les \\ -> / pour les fichiers DOS - for(i = 0; fil[i] != '\0'; i++) - if (fil[i] == '\\') - fil[i] = '/'; + *a = '\0'; /* couper query (inutile pour adrfil->file:// lors de la requête) */ + // adrfil->filtrer les \\ -> / pour les fichiers DOS + for(i = 0; adrfil->fil[i] != '\0'; i++) + if (adrfil->fil[i] == '\\') + adrfil->fil[i] = '/'; } // no hostname - if (!strnotempty(adr)) + if (!strnotempty(adrfil->adr)) return -1; // erreur non reconnu // nommer au besoin.. (non utilisé normalement) - if (!strnotempty(fil)) - strcpybuff(fil, "default-index.html"); + if (!strnotempty(adrfil->fil)) + strcpybuff(adrfil->fil, "default-index.html"); - // case insensitive pour adresse + // case insensitive pour adrfil->adresse { - char *a = jump_identification(adr); + char *a = jump_identification(adrfil->adr); while(*a) { if ((*a >= 'A') && (*a <= 'Z')) diff --git a/src/htslib.h b/src/htslib.h index 84c5666..cbd71aa 100644 --- a/src/htslib.h +++ b/src/htslib.h @@ -50,6 +50,14 @@ typedef struct htsblk htsblk; #define HTS_DEF_FWSTRUCT_t_dnscache typedef struct t_dnscache t_dnscache; #endif +#ifndef HTS_DEF_FWSTRUCT_lien_adrfil +#define HTS_DEF_FWSTRUCT_lien_adrfil +typedef struct lien_adrfil lien_adrfil; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave +#define HTS_DEF_FWSTRUCT_lien_adrfilsave +typedef struct lien_adrfilsave lien_adrfilsave; +#endif /* définitions globales */ #include "htsglobal.h" @@ -264,7 +272,6 @@ HTS_INLINE void deletehttp(htsblk * r); HTS_INLINE int deleteaddr(htsblk * r); HTS_INLINE void deletesoc(T_SOC soc); HTS_INLINE void deletesoc_r(htsblk * r); -htsblk http_location(httrackp * opt, char *adr, char *fil, char *loc); htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc); int check_readinput(htsblk * r); int check_readinput_t(T_SOC soc, int timeout); @@ -317,7 +324,7 @@ int linput_trim(FILE * fp, char *s, int max); int linput_cpp(FILE * fp, char *s, int max); void rawlinput(FILE * fp, char *s, int max); char *strstrcase(char *s, const char *o); -int ident_url_absolute(const char *url, char *adr, char *fil); +int ident_url_absolute(const char *url, lien_adrfil *adrfil); void fil_simplifie(char *f); int is_unicode_utf8(const char *buffer, const size_t size); void map_characters(unsigned char *buffer, unsigned int size, diff --git a/src/htsname.c b/src/htsname.c index ace3a33..ec6a839 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -47,15 +47,15 @@ Please visit our Website: http://www.httrack.com { /* ajout nom */\ char BIGSTK buff[HTS_URLMAXSIZE*2];\ buff[0]='\0';\ - strncatbuff(buff,start_pos,(int) (nom_pos - start_pos));\ - url_savename_addstr(save,buff);\ + strncatbuff(buff,start_pos,nom_pos - start_pos);\ + url_savename_addstr(afs->save, buff);\ } #define ADD_STANDARD_NAME(shortname) \ { /* ajout nom */\ char BIGSTK buff[HTS_URLMAXSIZE*2];\ standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\ - url_savename_addstr(save,buff);\ + url_savename_addstr(afs->save, buff);\ } /* Avoid stupid DOS system folders/file such as 'nul' */ @@ -82,11 +82,11 @@ static const char *hts_tbdev[] = { HTS_STAT.stat_errors=fspc(opt,NULL,"error"); \ HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); \ HTS_STAT.stat_infos=fspc(opt,NULL,"info"); \ - HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); \ + HTS_STAT.nbk=backlinks_done(sback,opt->liens,opt->lien_tot,ptr); \ HTS_STAT.nb=back_transferred(HTS_STAT.stat_bytes,sback); \ /* Check */ \ { \ - if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,opt->lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ return -1; \ } \ } \ @@ -139,10 +139,10 @@ static void cleanEndingSpaceOrDot(char *s) { // forme le nom du fichier à sauver (save) à partir de fil et adr // système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html) -int url_savename(char *adr_complete, char *fil_complete, char *save, - char *former_adr, char *former_fil, const char *referer_adr, - const char *referer_fil, httrackp * opt, lien_url ** liens, - int lien_tot, struct_back * sback, cache_back * cache, +int url_savename(lien_adrfilsave *const afs, + lien_adrfil *const former, + const char *referer_adr, const char *referer_fil, + httrackp * opt, struct_back * sback, cache_back * cache, hash_struct * hash, int ptr, int numero_passe, const lien_back * headers) { char catbuff[CATBUFF_SIZE]; @@ -152,7 +152,10 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, lien_back *const back = sback->lnk; /* */ - char BIGSTK newfil[HTS_URLMAXSIZE * 2]; /* ="" */ + char BIGSTK fil[HTS_URLMAXSIZE * 2]; /* ="" */ + + const char *const adr_complete = afs->af.adr; + const char *const fil_complete = afs->af.fil; /*char BIGSTK normadr_[HTS_URLMAXSIZE*2]; */ char BIGSTK normadr_[HTS_URLMAXSIZE * 2], normfil_[HTS_URLMAXSIZE * 2]; @@ -162,12 +165,11 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, { "http", "https", "ftp", "file", "unknown" }; int protocol = PROTOCOL_HTTP; const char *const adr = jump_identification(adr_complete); - char *fil = fil_complete; // copy of fil, used for lookups (see urlhack) const char *normadr = adr; - const char *normfil = fil; + const char *normfil = fil_complete; const char *const print_adr = jump_protocol(adr); - char *start_pos = NULL, *nom_pos = NULL, *dot_pos = NULL; // Position nom et point + const char *start_pos = NULL, *nom_pos = NULL, *dot_pos = NULL; // Position nom et point // pour changement d'extension ou de nom (content-disposition) int ext_chg = 0, ext_chg_delayed = 0; @@ -176,8 +178,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, int max_char = 0; //CLEAR - newfil[0] = ext[0] = '\0'; - save[0] = '\0'; + fil[0] = ext[0] = '\0'; + afs->save[0] = '\0'; /* 8-3 ? */ switch (opt->savename_83) { @@ -199,7 +201,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, if (opt->urlhack) { // copy of adr (without protocol), used for lookups (see urlhack) normadr = adr_normalized(adr, normadr_); - normfil = fil_normalized(fil, normfil_); + normfil = fil_normalized(fil_complete, normfil_); } else { if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder char *pos = strchr(adr_complete, ':'); @@ -227,8 +229,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // court-circuit pour lien primaire if (strnotempty(adr) == 0) { - if (strcmp(fil, "primary") == 0) { - strcatbuff(save, "primary.html"); + if (strcmp(fil_complete, "primary") == 0) { + strcatbuff(afs->save, "primary.html"); return 0; } } @@ -259,21 +261,21 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // vérifier que le nom n'a pas déja été calculé (si oui le renvoyer tel que) // vérifier que le nom n'est pas déja pris... // NOTE: si on cherche /toto/ et que /toto est trouvé on le prend (et réciproquqment) ** // ** - if (liens != NULL) { + if (opt->liens != NULL) { int i; i = hash_read(hash, normadr, normfil, HASH_STRUCT_ADR_PATH); // recherche table 1 (adr+fil) if (i >= 0) { // ok, trouvé - strcpybuff(save, liens[i]->sav); + strcpybuff(afs->save, heap(i)->sav); return 0; } - i = hash_read(hash, normadr, normfil, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former_adr+former_fil) + i = hash_read(hash, normadr, normfil, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former->adr+former->fil) if (i >= 0) { // ok, trouvé // copier location moved! - strcpybuff(adr_complete, liens[i]->adr); - strcpybuff(fil_complete, liens[i]->fil); + strcpybuff(afs->af.adr, heap(i)->adr); + strcpybuff(afs->af.fil, heap(i)->fil); // et save - strcpybuff(save, liens[i]->sav); // copier (formé à partir du nouveau lien!) + strcpybuff(afs->save, heap(i)->sav); // copier (formé à partir du nouveau lien!) return 0; } // chercher sans / ou avec / dans former @@ -286,13 +288,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, fil_complete_patche[strlen(fil_complete_patche) - 1] = '\0'; else strcatbuff(fil_complete_patche, "/"); - i = hash_read(hash, normadr, fil_complete_patche, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former_adr+former_fil) + i = hash_read(hash, normadr, fil_complete_patche, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former->adr+former->fil) if (i >= 0) { - // écraser fil et adr (pas former_fil?????) - strcpybuff(adr_complete, liens[i]->adr); - strcpybuff(fil_complete, liens[i]->fil); + // écraser fil et adr (pas former->fil?????) + strcpybuff(afs->af.adr, heap(i)->adr); + strcpybuff(afs->af.fil, heap(i)->fil); // écrire save - strcpybuff(save, liens[i]->sav); + strcpybuff(afs->save, heap(i)->sav); return 0; } } @@ -303,13 +305,12 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, { char *a; - a = strchr(fil, '?'); + a = strchr(fil_complete, '?'); if (a != NULL) { - strncatbuff(newfil, fil, (int) (a - fil)); + strncatbuff(fil, fil_complete, a - fil_complete); } else { - strcpybuff(newfil, fil); + strcpybuff(fil, fil_complete); } - fil = newfil; } // decode remaining % (normally not necessary; already done in htsparse.c) @@ -465,8 +466,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // int hihp = opt->state._hts_in_html_parsing; int has_been_moved = 0; - char BIGSTK curr_adr[HTS_URLMAXSIZE * 2], - curr_fil[HTS_URLMAXSIZE * 2]; + lien_adrfil current; /* Ensure we don't use too many sockets by using a "testing" one If we have only 1 simultaneous connection authorized, wait for pending download @@ -475,20 +475,20 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET(); /* Rock'in */ - curr_adr[0] = curr_fil[0] = '\0'; + current.adr[0] = current.fil[0] = '\0'; opt->state._hts_in_html_parsing = 2; // test hts_log_print(opt, LOG_DEBUG, "Testing link type %s%s", adr_complete, fil_complete); - strcpybuff(curr_adr, adr_complete); - strcpybuff(curr_fil, fil_complete); + strcpybuff(current.adr, adr_complete); + strcpybuff(current.fil, fil_complete); // ajouter dans le backing le fichier en mode test // savename: rien car en mode test if (back_add - (sback, opt, cache, curr_adr, curr_fil, BACK_ADD_TEST, + (sback, opt, cache, current.adr, current.fil, BACK_ADD_TEST, referer_adr, referer_fil, 1) != -1) { int b; - b = back_index(opt, sback, curr_adr, curr_fil, BACK_ADD_TEST); + b = back_index(opt, sback, current.adr, current.fil, BACK_ADD_TEST); if (b >= 0) { int stop_looping = 0; int petits_tours = 0; @@ -511,11 +511,11 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, HTS_STAT.stat_errors = fspc(opt, NULL, "error"); HTS_STAT.stat_warnings = fspc(opt, NULL, "warning"); HTS_STAT.stat_infos = fspc(opt, NULL, "info"); - HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr); + HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr); HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback); if (!RUN_CALLBACK7 - (opt, loop, sback->lnk, sback->count, b, ptr, lien_tot, + (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot, (int) (time_local() - HTS_STAT.stat_timestart), &HTS_STAT)) { return -1; @@ -526,42 +526,40 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // traitement des 304,303.. if (back[b].status <= 0) { if (HTTP_IS_REDIRECT(back[b].r.statuscode)) { // agh moved.. un tit tour de plus - if ((petits_tours < 5) && (former_adr) && (former_fil)) { // on va pas tourner en rond non plus! - if ((int) strnotempty(back[b].r.location)) { // location existe! - char BIGSTK mov_url[HTS_URLMAXSIZE * 2], - mov_adr[HTS_URLMAXSIZE * 2], - mov_fil[HTS_URLMAXSIZE * 2]; - mov_url[0] = mov_adr[0] = mov_fil[0] = '\0'; + if ((petits_tours < 5) && former != NULL) { // on va pas tourner en rond non plus! + if (strnotempty(back[b].r.location)) { // location existe! + char BIGSTK mov_url[HTS_URLMAXSIZE * 2]; + lien_adrfil moved; + mov_url[0] = moved.adr[0] = moved.fil[0] = '\0'; // strcpybuff(mov_url, back[b].r.location); // copier URL if (ident_url_relatif - (mov_url, curr_adr, curr_fil, mov_adr, - mov_fil) >= 0) { + (mov_url, current.adr, current.fil, &moved) >= 0) { // si non bouclage sur soi même, ou si test avec GET non testé - if ((strcmp(mov_adr, curr_adr)) - || (strcmp(mov_fil, curr_fil)) + if ((strcmp(moved.adr, current.adr)) + || (strcmp(moved.fil, current.fil)) || (get_test_request == 0)) { // bouclage? - if ((!strcmp(mov_adr, curr_adr)) - && (!strcmp(mov_fil, curr_fil))) + if ((!strcmp(moved.adr, current.adr)) + && (!strcmp(moved.fil, current.fil))) get_test_request = 1; // faire requète avec GET - // recopier former_adr/fil? - if ((former_adr) && (former_fil)) { - if (strnotempty(former_adr) == 0) { // Pas déja noté - strcpybuff(former_adr, curr_adr); - strcpybuff(former_fil, curr_fil); + // recopier former->adr/fil? + if (former != NULL) { + if (strnotempty(former->adr) == 0) { // Pas déja noté + strcpybuff(former->adr, current.adr); + strcpybuff(former->fil, current.fil); } } // check explicit forbidden - don't follow 3xx in this case { int set_prio_to = 0; - if (hts_acceptlink(opt, ptr, lien_tot, liens, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ + if (hts_acceptlink(opt, ptr, moved.adr, moved.fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ has_been_moved = 1; back_maydelete(opt, cache, sback, b); // ok - strcpybuff(curr_adr, mov_adr); - strcpybuff(curr_fil, mov_fil); + strcpybuff(current.adr, moved.adr); + strcpybuff(current.fil, moved.fil); mov_url[0] = '\0'; stop_looping = 1; } @@ -572,8 +570,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, ) { // ftp, ok on arrête has_been_moved = 1; back_maydelete(opt, cache, sback, b); // ok - strcpybuff(curr_adr, mov_adr); - strcpybuff(curr_fil, mov_fil); + strcpybuff(current.adr, moved.adr); + strcpybuff(current.fil, moved.fil); stop_looping = 1; } else if (*mov_url) { const char *methode; @@ -584,24 +582,24 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, methode = BACK_ADD_TEST2; // tester avec GET hts_log_print(opt, LOG_WARNING, "Loop with HEAD request (during prefetch) at %s%s", - curr_adr, curr_fil); + current.adr, current.fil); } // Ajouter URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET(); - if (back_add(sback, opt, cache, mov_adr, mov_fil, methode, referer_adr, referer_fil, 1) != -1) { // OK + if (back_add(sback, opt, cache, moved.adr, moved.fil, methode, referer_adr, referer_fil, 1) != -1) { // OK hts_log_print(opt, LOG_DEBUG, "(during prefetch) %s (%d) to link %s at %s%s", back[b].r.msg, back[b].r.statuscode, - back[b].r.location, curr_adr, - curr_fil); + back[b].r.location, current.adr, + current.fil); // libérer emplacement backing actuel et attendre le prochain back_maydelete(opt, cache, sback, b); - strcpybuff(curr_adr, mov_adr); - strcpybuff(curr_fil, mov_fil); + strcpybuff(current.adr, moved.adr); + strcpybuff(current.fil, moved.fil); b = - back_index(opt, sback, curr_adr, curr_fil, + back_index(opt, sback, current.adr, current.fil, methode); if (!get_test_request) has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé @@ -610,7 +608,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } else { // sinon on fait rien et on s'en va.. (ftp etc) hts_log_print(opt, LOG_DEBUG, "Warning: Savename redirect backing error at %s%s", - mov_adr, mov_fil); + moved.adr, moved.fil); } } } else { @@ -672,13 +670,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // oops, a été déplacé.. on recalcule en récursif (osons!) if (has_been_moved) { // copier adr, fil (optionnel, mais sinon marche pas pour le rip) - strcpybuff(adr_complete, curr_adr); - strcpybuff(fil_complete, curr_fil); + strcpybuff(afs->af.adr, current.adr); + strcpybuff(afs->af.fil, current.fil); // copier adr, fil - return url_savename(curr_adr, curr_fil, save, NULL, NULL, - referer_adr, referer_fil, opt, liens, - lien_tot, sback, cache, hash, ptr, + return url_savename(afs, NULL, + referer_adr, referer_fil, opt, + sback, cache, hash, ptr, numero_passe, NULL); } // --- --- --- @@ -748,7 +746,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } // Rechercher premier / et dernier . { - char *a = fil + strlen(fil) - 1; + const char *a = fil + strlen(fil) - 1; // passer structures start_pos = fil; @@ -769,7 +767,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // ajouter nom du site éventuellement en premier if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t) const char *a = StringBuff(opt->savename_userdef); - char *b = save; + char *b = afs->save; /*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */ char tok; @@ -790,7 +788,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, */ // Construire nom - while((*a) && (((int) (b - save)) < HTS_URLMAXSIZE)) { // parser, et pas trop long.. + while((*a) && (((int) (b - afs->save)) < HTS_URLMAXSIZE)) { // parser, et pas trop long.. if (*a == '%') { int short_ver = 0; @@ -1047,26 +1045,26 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // adresse url if (!opt->savename_83) { // noms longs (et pas de .) - strcatbuff(save, final_adr); + strcatbuff(afs->save, final_adr); } else { // noms 8-3 if (strlen(final_adr) > 4) { if (strfield(final_adr, "www.")) - hts_appendStringUTF8(save, final_adr + 4, max_char); + hts_appendStringUTF8(afs->save, final_adr + 4, max_char); else - hts_appendStringUTF8(save, final_adr, max_char); + hts_appendStringUTF8(afs->save, final_adr, max_char); } else - hts_appendStringUTF8(save, final_adr, max_char); + hts_appendStringUTF8(afs->save, final_adr, max_char); } /* release */ RELEASE_ADR(); if (*fil != '/') - strcatbuff(save, "/"); + strcatbuff(afs->save, "/"); } } - hts_lowcase(save); + hts_lowcase(afs->save); /* // ne sert à rien car a déja été filtré normalement @@ -1092,42 +1090,42 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, DECLARE_ADR(final_adr); if (!opt->savename_83) { // noms longs - strcatbuff(save, final_adr); - strcatbuff(save, "/"); + strcatbuff(afs->save, final_adr); + strcatbuff(afs->save, "/"); } else { // noms 8-3 if (strlen(final_adr) > 4) { if (strfield(final_adr, "www.")) - hts_appendStringUTF8(save, final_adr + 4, max_char); + hts_appendStringUTF8(afs->save, final_adr + 4, max_char); else - hts_appendStringUTF8(save, final_adr, max_char); - strcatbuff(save, "/"); + hts_appendStringUTF8(afs->save, final_adr, max_char); + strcatbuff(afs->save, "/"); } else { - hts_appendStringUTF8(save, final_adr, max_char); - strcatbuff(save, "/"); + hts_appendStringUTF8(afs->save, final_adr, max_char); + strcatbuff(afs->save, "/"); } } /* release */ RELEASE_ADR(); } else { - strcatbuff(save, "web/"); // répertoire général + strcatbuff(afs->save, "web/"); // répertoire général } } // si un html à coup sûr if ((ext_chg != 0) ? (ishtml_ext(ext) == 1) : (ishtml(opt, fil) == 1)) { if (opt->savename_type % 100 == 2) { // html/ - strcatbuff(save, "html/"); + strcatbuff(afs->save, "html/"); } } else { if ((opt->savename_type % 100 == 1) || (opt->savename_type % 100 == 2)) { // html & images - strcatbuff(save, "images/"); + strcatbuff(afs->save, "images/"); } } switch (opt->savename_type % 100) { case 4: case 5:{ // séparer par types - char *a = fil + strlen(fil) - 1; + const char *a = fil + strlen(fil) - 1; // passer structures while((a > fil) && (*a != '/') && (*a != '\\')) @@ -1138,17 +1136,17 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // html? if ((ext_chg != 0) ? (ishtml_ext(ext) == 1) : (ishtml(opt, fil) == 1)) { if (opt->savename_type % 100 == 5) - strcatbuff(save, "html/"); + strcatbuff(afs->save, "html/"); } else { - char *a = fil + strlen(fil) - 1; + const char *a = fil + strlen(fil) - 1; while((a > fil) && (*a != '/') && (*a != '.')) a--; if (*a != '.') - strcatbuff(save, "other"); + strcatbuff(afs->save, "other"); else - strcatbuff(save, a + 1); - strcatbuff(save, "/"); + strcatbuff(afs->save, a + 1); + strcatbuff(afs->save, "/"); } /*strcatbuff(save,a); */ /* add name */ @@ -1157,8 +1155,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, break; case 99:{ // 'codé' .. c'est un gadget size_t i; - int j; - char *a; + size_t j; + const char *a; char C[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-"; int L; @@ -1174,19 +1172,19 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } srand(s); - j = (int) strlen(save); + j = strlen(afs->save); for(i = 0; i < 8; i++) { char c = C[(rand() % L)]; - save[i + j] = c; + afs->save[i + j] = c; } - save[i + j] = '\0'; + afs->save[i + j] = '\0'; // ajouter extension a = fil + strlen(fil) - 1; while((a > fil) && (*a != '/') && (*a != '.')) a--; if (*a == '.') { - strcatbuff(save, a); // ajouter + strcatbuff(afs->save, a); // ajouter } } break; @@ -1205,10 +1203,10 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, break; } - hts_lowcase(save); + hts_lowcase(afs->save); - if (save[strlen(save) - 1] == '/') - strcatbuff(save, DEFAULT_HTML); // nommer page par défaut!! + if (afs->save[strlen(afs->save) - 1] == '/') + strcatbuff(afs->save, DEFAULT_HTML); // nommer page par défaut!! } // vérifier qu'on ne doit pas forcer l'extension @@ -1226,23 +1224,23 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // de même en cas de manque d'extension on en place une de manière forcée.. // cela évite les /chez/toto et les /chez/toto/index.html incompatibles if (opt->savename_type != -1 && opt->savename_delayed != 2) { - char *a = save + strlen(save) - 1; + char *a = afs->save + strlen(afs->save) - 1; - while((a > save) && (*a != '.') && (*a != '/')) + while((a > afs->save) && (*a != '.') && (*a != '/')) a--; if (*a != '.') { // agh pas de point //strcatbuff(save,".none"); // a éviter - strcatbuff(save, ".html"); // préférable! + strcatbuff(afs->save, ".html"); // préférable! hts_log_print(opt, LOG_DEBUG, "Default HTML type set for %s%s => %s", - adr_complete, fil_complete, save); + adr_complete, fil_complete, afs->save); } } // effacer pass au besoin pour les autentifications // (plus la peine : masqué au début) /* { - char* a=jump_identification(save); - if (a!=save) { + char* a=jump_identification(afs->save); + if (a!=afs->save) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; char *b; tempo[0]='\0'; @@ -1259,18 +1257,18 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, */ // éviter les / au début (cause: N100) - if (save[0] == '/') { + if (afs->save[0] == '/') { char BIGSTK tempo[HTS_URLMAXSIZE * 2]; - strcpybuff(tempo, save + 1); - strcpybuff(save, tempo); + strcpybuff(tempo, afs->save + 1); + strcpybuff(afs->save, tempo); } /* Cleanup reserved or forbidden characters. */ { size_t i; - for(i = 0 ; save[i] != '\0' ; i++) { - unsigned char c = (unsigned char) save[i]; + for(i = 0 ; afs->save[i] != '\0' ; i++) { + unsigned char c = (unsigned char) afs->save[i]; if (c < 32 // control || c == 127 // unwise || c == '~' // unix unwise @@ -1295,13 +1293,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, ) ) { - save[i] = '_'; + afs->save[i] = '_'; } } } // éliminer les // (comme ftp://) - cleanDoubleSlash(save); + cleanDoubleSlash(afs->save); #if HTS_OVERRIDE_DOS_FOLDERS /* Replace /foo/nul/bar by /foo/nul_/bar */ @@ -1309,7 +1307,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, int i = 0; while(hts_tbdev[i][0]) { - char *a = save; + char *a = afs->save; while((a = strstrcase(a, (char *) hts_tbdev[i]))) { switch ((int) a[strlen(hts_tbdev[i])]) { @@ -1320,10 +1318,10 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, char BIGSTK tempo[HTS_URLMAXSIZE * 2]; tempo[0] = '\0'; - strncatbuff(tempo, save, (int) (a - save) + strlen(hts_tbdev[i])); + strncatbuff(tempo, afs->save, (int) (a - afs->save) + strlen(hts_tbdev[i])); strcatbuff(tempo, "_"); strcatbuff(tempo, a + strlen(hts_tbdev[i])); - strcpybuff(save, tempo); + strcpybuff(afs->save, tempo); } break; } @@ -1334,7 +1332,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } /* Strip ending . or ' ' forbidden on windoz */ - cleanEndingSpaceOrDot(save); + cleanEndingSpaceOrDot(afs->save); #endif @@ -1342,8 +1340,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, if (opt->savename_83) { char BIGSTK n83[HTS_URLMAXSIZE * 2]; - long_to_83(opt->savename_83, n83, save); - strcpybuff(save, n83); + long_to_83(opt->savename_83, n83, afs->save); + strcpybuff(afs->save, n83); } // enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson) // Level 1 File names are restricted to 8 characters with a 3 character extension, @@ -1354,12 +1352,12 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, if (opt->savename_83 > 0) { char *a, *last; - for(last = save + strlen(save) - 1; - last != save && *last != '/' && *last != '\\' && *last != '.'; last--) ; + for(last = afs->save + strlen(afs->save) - 1; + last != afs->save && *last != '/' && *last != '\\' && *last != '.'; last--) ; if (*last != '.') { last = NULL; } - for(a = save; *a != '\0'; a++) { + for(a = afs->save; *a != '\0'; a++) { if (*a >= 'a' && *a <= 'z') { *a -= 'a' - 'A'; } else if (*a == '.') { @@ -1376,7 +1374,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } /* ensure that there is no ../ (potential vulnerability) */ - fil_simplifie(save); + fil_simplifie(afs->save); /* convert name to UTF-8 ? Note: already done while parsing. */ //if (charset != NULL && charset[0] != '\0') { @@ -1393,17 +1391,17 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, /* callback */ RUN_CALLBACK5(opt, savename, adr_complete, fil_complete, referer_adr, - referer_fil, save); + referer_fil, afs->save); hts_log_print(opt, LOG_DEBUG, "engine: save-name: local name: %s%s -> %s", - adr, fil, save); + adr, fil, afs->save); /* Ensure that the MANDATORY "temporary" extension is set */ if (ext_chg_delayed) { char *ptr; char *lastDot = NULL; - for(ptr = save; *ptr != 0; ptr++) { + for(ptr = afs->save; *ptr != 0; ptr++) { if (*ptr == '.') { lastDot = ptr; } else if (*ptr == '/' || *ptr == '\\') { @@ -1411,8 +1409,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } } if (lastDot == NULL) { - strcatbuff(save, "." DELAYED_EXT); - } else if (!IS_DELAYED_EXT(save)) { + strcatbuff(afs->save, "." DELAYED_EXT); + } else if (!IS_DELAYED_EXT(afs->save)) { strcatbuff(lastDot, "." DELAYED_EXT); } } @@ -1425,12 +1423,12 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, #define MIN_LAST_SEG_RESERVE 12 #define MAX_LAST_SEG_RESERVE 24 #define MAX_SEG_LEN 48 - if (hts_stringLengthUTF8(save) + + if (hts_stringLengthUTF8(afs->save) + hts_stringLengthUTF8(StringBuff(opt->path_html_utf8)) >= HTS_MAX_PATH_LEN) { // convert to Unicode (much simpler) size_t wsaveLen; - hts_UCS4 *const wsave = hts_convertUTF8StringToUCS4(save, strlen(save), &wsaveLen); + hts_UCS4 *const wsave = hts_convertUTF8StringToUCS4(afs->save, strlen(afs->save), &wsaveLen); if (wsave != NULL) { const size_t parentLen = hts_stringLengthUTF8(StringBuff(opt->path_html_utf8)); @@ -1485,7 +1483,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // copy final name and cleanup saveFinal = hts_convertUCS4StringToUTF8(wsave, j); if (saveFinal != NULL) { - strcpybuff(save, saveFinal); + strcpybuff(afs->save, saveFinal); free(saveFinal); } else { hts_log_print(opt, LOG_ERROR, "Could not revert to UTF-8: %s%s", @@ -1495,13 +1493,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, // log in debug hts_log_print(opt, LOG_DEBUG, "Too long filename shortened: %s%s => %s", - adr_complete, fil_complete, save); + adr_complete, fil_complete, afs->save); } else { - hts_log_print(opt, LOG_ERROR, "Could not read UTF-8: %s", save); + hts_log_print(opt, LOG_ERROR, "Could not read UTF-8: %s", afs->save); } // Re-check again ending space or dot after cut (see bug #5) - cleanEndingSpaceOrDot(save); + cleanEndingSpaceOrDot(afs->save); } #undef MAX_UTF8_SEQ_CHARS #undef MIN_LAST_SEG_RESERVE @@ -1512,11 +1510,11 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, char BIGSTK tempo[HTS_URLMAXSIZE * 2]; strcpybuff(tempo, StringBuff(opt->path_html_utf8)); - strcatbuff(tempo, save); - strcpybuff(save, tempo); + strcatbuff(tempo, afs->save); + strcpybuff(afs->save, tempo); } // vérifier que le nom n'est pas déja pris... - if (liens != NULL) { + if (opt->liens != NULL) { int nom_ok; do { @@ -1530,20 +1528,20 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, printf("\nStart search\n"); #endif - i = hash_read(hash, save, NULL, HASH_STRUCT_FILENAME); // lecture type 0 (sav) + i = hash_read(hash, afs->save, NULL, HASH_STRUCT_FILENAME); // lecture type 0 (sav) if (i >= 0) { - int sameAdr = (strfield2(liens[i]->adr, normadr) != 0); + int sameAdr = (strfield2(heap(i)->adr, normadr) != 0); int sameFil; // NO - URL hack is only for stripping // and www. //if (opt->urlhack != 0) - // sameFil = ( strfield2(liens[i]->fil, normfil) != 0); + // sameFil = ( strfield2(heap(i)->fil, normfil) != 0); //else - sameFil = (strcmp(liens[i]->fil, normfil) == 0); + sameFil = (strcmp(heap(i)->fil, normfil) == 0); if (sameAdr && sameFil) { // ok c'est le même lien, adresse déja définie /* Take the existing name not to screw up with cAsE sEnSiTiViTy of Linux/Unix */ - if (strcmp(liens[i]->sav, save) != 0) { - strcpybuff(save, liens[i]->sav); + if (strcmp(heap(i)->sav, afs->save) != 0) { + strcpybuff(afs->save, heap(i)->sav); } i = 0; #if DEBUG_SAVENAME @@ -1551,7 +1549,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, #endif } else { // utilisé par un AUTRE, changer de nom char BIGSTK tempo[HTS_URLMAXSIZE * 2]; - char *a = save + strlen(save) - 1; + char *a = afs->save + strlen(afs->save) - 1; char *b; int n = 2; char collisionSeparator = ((opt->savename_83 != 2) ? '-' : '_'); @@ -1559,18 +1557,18 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, tempo[0] = '\0'; #if DEBUG_SAVENAME - printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n", liens[i]->fil, + printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n", heap(i)->fil, fil_complete); #endif nom_ok = 0; i = 0; - while((a > save) && (*a != '.') && (*a != '\\') && (*a != '/')) + while((a > afs->save) && (*a != '.') && (*a != '\\') && (*a != '/')) a--; if (*a == '.') - strncatbuff(tempo, save, (int) (a - save)); + strncatbuff(tempo, afs->save, a - afs->save); else - strcatbuff(tempo, save); + strcatbuff(tempo, afs->save); // tester la présence d'un -xx (ex: index-2.html -> index-3.html) b = tempo + strlen(tempo) - 1; @@ -1601,7 +1599,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, if (*a == '.') strcatbuff(tempo, a); - strcpybuff(save, tempo); + strcpybuff(afs->save, tempo); //printf("switched: %s\n",save); @@ -1619,7 +1617,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save, } /* nom avec md5 urilisé partout */ -void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete, +void standard_name(char *b, const char *dot_pos, const char *nom_pos, const char *fil, int short_ver) { char md5[32 + 2]; @@ -1627,9 +1625,9 @@ void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete, /* Nom */ if (dot_pos) { if (!short_ver) // Noms longs - strncatbuff(b, nom_pos, (int) (dot_pos - nom_pos)); + strncatbuff(b, nom_pos, (dot_pos - nom_pos)); else - strncatbuff(b, nom_pos, min((int) (dot_pos - nom_pos), 8)); + strncatbuff(b, nom_pos, min(dot_pos - nom_pos, 8)); } else { if (!short_ver) // Noms longs strcatbuff(b, nom_pos); @@ -1637,7 +1635,7 @@ void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete, strncatbuff(b, nom_pos, 8); } /* MD5 - 16 bits */ - strncatbuff(b, url_md5(md5, fil_complete), 4); + strncatbuff(b, url_md5(md5, fil), 4); /* Ext */ if (dot_pos) { strcatbuff(b, "."); @@ -1658,11 +1656,11 @@ void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete, } /* Petit md5 */ -char *url_md5(char *digest, char *fil_complete) { +char *url_md5(char *digest, const char *fil) { char *a; digest[0] = '\0'; - a = strchr(fil_complete, '?'); + a = strchr(fil, '?'); if (a) { if (strlen(a)) { char BIGSTK buff[HTS_URLMAXSIZE * 2]; @@ -1677,7 +1675,7 @@ char *url_md5(char *digest, char *fil_complete) { } // interne à url_savename: ajoute une chaîne à une autre avec \ -> / -void url_savename_addstr(char *d, char *s) { +void url_savename_addstr(char *d, const char *s) { int i = (int) strlen(d); while(*s) { diff --git a/src/htsname.h b/src/htsname.h index 4c67893..675b0cb 100644 --- a/src/htsname.h +++ b/src/htsname.h @@ -80,18 +80,27 @@ typedef struct hash_struct hash_struct; #define HTS_DEF_FWSTRUCT_lien_back typedef struct lien_back lien_back; #endif +#ifndef HTS_DEF_FWSTRUCT_lien_adrfil +#define HTS_DEF_FWSTRUCT_lien_adrfil +typedef struct lien_adrfil lien_adrfil; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave +#define HTS_DEF_FWSTRUCT_lien_adrfilsave +typedef struct lien_adrfilsave lien_adrfilsave; +#endif // note: 'headers' can either be null, or incomplete (only r member filled) -int url_savename(char *adr_complete, char *fil_complete, char *save, - char *former_adr, char *former_fil, const char *referer_adr, - const char *referer_fil, httrackp * opt, lien_url ** liens, - int lien_tot, struct_back * sback, cache_back * cache, +int url_savename(lien_adrfilsave *const afs, + lien_adrfil *const former, + const char *referer_adr, const char *referer_fil, + httrackp * opt, struct_back * sback, cache_back * cache, hash_struct * hash, int ptr, int numero_passe, const lien_back * headers); -void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete, +void standard_name(char *b, const char *dot_pos, const char *nom_pos, + const char *fil_complete, int short_ver); -void url_savename_addstr(char *d, char *s); -char *url_md5(char *digest_buffer, char *fil_complete); +void url_savename_addstr(char *d, const char *s); +char *url_md5(char *digest_buffer, const char *fil_complete); void url_savename_refname(const char *adr, const char *fil, char *filename); char *url_savename_refname_fullpath(httrackp * opt, const char *adr, const char *fil); diff --git a/src/htsparse.c b/src/htsparse.c index 820af38..6269450 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -78,12 +78,12 @@ Please visit our Website: http://www.httrack.com } \ } \ ht_len+=A; -#define HT_ADD_ADR \ +#define HT_add_adr \ if ((opt->getmode & 1) && (ptr>0)) { \ - size_t i = ((size_t) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ + size_t i = ((html - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ memcpy(ht_buff+j, lastsaved, i); \ ht_buff[j+i]='\0'; \ - lastsaved=adr; \ + lastsaved=html; \ } #define HT_ADD(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ @@ -283,7 +283,7 @@ Please visit our Website: http://www.httrack.com #define AUTOMATE_LOOKUP_CURRENT_ADR() do { \ if (inscript) { \ int new_state_pos; \ - new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \ + new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*html]; \ if (new_state_pos < 0) { \ new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \ } \ @@ -297,7 +297,7 @@ Please visit our Website: http://www.httrack.com #define INCREMENT_CURRENT_ADR(steps) do { \ int steps__ = (int) ( steps ); \ while(steps__ > 0) { \ - adr++; \ + html++; \ AUTOMATE_LOOKUP_CURRENT_ADR(); \ steps__ --; \ } \ @@ -322,13 +322,26 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } if (RUN_CALLBACK4(opt, check_html, r->adr, (int) r->size, urladr(), urlfil())) { - FILE *fp = NULL; // fichier écrit localement - char *adr = r->adr; // pointeur (on parcourt) - char *lastsaved; // adresse du dernier octet sauvé + 1 + FILE *fp = NULL; // fichier écrit localement + const char *html = r->adr; // pointeur (on parcours) + const char *lastsaved; // adresse du dernier octet sauvé + 1 hts_log_print(opt, LOG_DEBUG, "scanning file %s%s (%s)..", urladr(), urlfil(), savename()); + /* Hack to avoid NULL char problems with C syntax */ + /* Yes, some bogus HTML pages can embed null chars + and therefore can not be properly handled if this hack is not done + */ + if (r->adr != NULL) { + size_t i; + for(i = 0 ; i < (size_t) r->size ; i++) { + if (r->adr[i] == '\0') { + r->adr[i] = ' '; + } + } + } + // Indexing! #if HTS_MAKE_KEYWORD_INDEX if (opt->kindex) { @@ -392,7 +405,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { //int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";) // - const char *intag_start = adr; + const char *intag_start = html; const char *intag_name = NULL; const char *intag_startattr = NULL; int intag_start_valid = 0; @@ -405,7 +418,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int parent_relative = 0; // the parent is the base path (.js, .css..) HT_ADD_START; // débuter - lastsaved = adr; + lastsaved = html; /* Initialize script automate for comments, quotes.. */ memset(inscript_state, 0xff, sizeof(inscript_state)); @@ -491,15 +504,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { else if (compare_mime(opt, r->contenttype, str->url_file, "text/xml") != 0 || compare_mime(opt, r->contenttype, str->url_file, "application/xml") != 0) { - if (strstr(adr, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup + if (strstr(html, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup { /* RSS file */ inscript = intag = 0; intag_start_valid = 0; in_media = NULL; // regular XML } else { // cancel: write all - adr = r->adr + r->size; - HT_ADD_ADR; - lastsaved = adr; + html = r->adr + r->size; + HT_add_adr; + lastsaved = html; } } // Detect UTF8 format @@ -531,18 +544,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { error = 0; /* Break if we are done yet */ - if ((adr - r->adr) >= r->size) + if (html - r->adr >= r->size) break; - /* Hack to avoid NULL char problems with C syntax */ - /* Yes, some bogus HTML pages can embed null chars - and therefore can not be properly handled if this hack is not done - */ - if (!(*adr)) { - if (((int) (adr - r->adr)) < r->size) - *adr = ' '; - } - /* index.html built here */ @@ -555,18 +559,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!in_media) { if (opt->makeindex && (ptr > 0)) { if (opt->getmode & 1) { // autorisation d'écrire - p = strfield(adr, "title"); + p = strfield(html, "title"); if (p) { - if (*(adr - 1) == '/') + if (*(html - 1) == '/') p = 0; // /title } else { - if (strfield(adr, "/html")) + if (strfield(html, "/html")) p = -1; // noter, mais sans titre - else if (strfield(adr, "body")) + else if (strfield(html, "body")) p = -1; // noter, mais sans titre - else if (((int) (adr - r->adr)) >= (r->size - 1)) + else if (((int) (html - r->adr)) >= (r->size - 1)) p = -1; // noter, mais sans titre - else if ((int) (adr - r->adr) >= r->size - 2) // we got to hurry + else if ((int) (html - r->adr) >= r->size - 2) // we got to hurry p = -1; // xxc xxc xxc } } else @@ -603,7 +607,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { s[0] = '\0'; if (p > 0) { - a = strchr(adr, '>'); + a = strchr(html, '>'); if (a != NULL) { a++; while(is_space(*a)) @@ -669,7 +673,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { */ /* Parse */ - if ((*adr == '<') /* No starting tag */ + if ((*html == '<') /* No starting tag */ &&(!inscript) /* Not in (java)script */ &&(!incomment) /* Not in comment (<!--) */ &&(!in_media) /* Not in media */ @@ -678,8 +682,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { intag_ctype = 0; //parseall_incomment=0; //inquote=0; // effacer quote - intag_start = adr; - for(intag_name = adr + 1; is_realspace(*intag_name); intag_name++) ; + intag_start = html; + for(intag_name = html + 1; is_realspace(*intag_name); intag_name++) ; intag_start_valid = 1; codebase[0] = '\0'; // effacer éventuel codebase @@ -688,9 +692,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int pos; // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> - if ((pos = rech_tageq_all(adr, "http-equiv"))) { + if ((pos = rech_tageq_all(html, "http-equiv"))) { const char *token = NULL; - int len = rech_endtoken(adr + pos, &token); + int len = rech_endtoken(html + pos, &token); if (len > 0) { if (strfield(token, "content-type")) { @@ -711,11 +715,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // We are looking for the first head so that we can declare the HTTP-headers charset early // Emit as soon as we see the first <head>, <meta>, or <body> tag. // FIXME: we currently emit the tag BEFORE the <head> tag, actually, which is not clean - if ((p = strfield(adr, "<head>")) != 0 - || ((p = strfield(adr, "<head")) != 0 && isspace(adr[p])) - || (p = strfield(adr, "<body>")) != 0 - || ((p = strfield(adr, "<body")) != 0 && isspace(adr[p])) - || ((p = strfield(adr, "<meta")) != 0 && isspace(adr[p])) + if ((p = strfield(html, "<head>")) != 0 + || ((p = strfield(html, "<head")) != 0 && isspace(html[p])) + || (p = strfield(html, "<body>")) != 0 + || ((p = strfield(html, "<body")) != 0 && isspace(html[p])) + || ((p = strfield(html, "<meta")) != 0 && isspace(html[p])) ) { emited_footer++; } else { @@ -724,7 +728,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { break; case 1: // And the closing comment info tag - if ((p = strfield(adr, "</html") != 0)) { + if ((p = strfield(html, "</html") != 0)) { emited_footer++; } else { p = 0; @@ -768,15 +772,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } // éliminer les <!-- (commentaires) : intag dévalidé - if (*(adr + 1) == '!') - if (*(adr + 2) == '-') - if (*(adr + 3) == '-') { + if (*(html + 1) == '!') + if (*(html + 2) == '-') + if (*(html + 3) == '-') { intag = 0; incomment = 1; intag_start_valid = 0; } - } else if ((*adr == '>') /* ending tag */ + } else if ((*html == '>') /* ending tag */ &&((!inscript && !in_media) || (inscript_tag)) /* and in tag (or in script) */ ) { if (inscript_tag) { @@ -817,7 +821,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } else { /* end of comment? */ // vérifier fermeture correcte - if ((*(adr - 1) == '-') && (*(adr - 2) == '-')) { + if ((*(html - 1) == '-') && (*(html - 2) == '-')) { intag = 0; incomment = 0; intag_start_valid = 0; @@ -830,7 +834,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { <!-- foo > example <!-- bar > is sometimes accepted by browsers when no --> is used somewhere else.. darn those browsers are dirty */ - if (!strstr(adr, "-->")) { + if (!strstr(html, "-->")) { intag = 0; incomment = 0; intag_start_valid = 0; @@ -850,18 +854,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int p_searchMETAURL = 0; // chercher ..URL=<url> int add_class = 0; // ajouter .class int add_class_dots_to_patch = 0; // number of '.' in code="x.y.z<realname>" - char *p_flush = NULL; + const char *p_flush = NULL; // ------------------------------------------------------------ // parsing évolé // ------------------------------------------------------------ - if (((isalpha((unsigned char) *adr)) || (*adr == '/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester.. + if (((isalpha((unsigned char) *html)) || (*html == '/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester.. /* caractère de terminaison pour "miniparsing" javascript=.. ? (ex: <a href="javascript:()" action="foo"> ) */ if (inscript_tag) { if (inscript_tag_lastc) { - if (*adr == inscript_tag_lastc) { + if (*html == inscript_tag_lastc) { /* sortir */ inscript_tag = inscript = 0; incomment = 0; @@ -888,9 +892,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { p = 0; valid_p = 1; } else if (strcmp(in_media, "AAM") == 0) { // AAM - if (is_space((unsigned char) adr[0]) - && !is_space((unsigned char) adr[1])) { - char *a = adr + 1; + if (is_space((unsigned char) html[0]) + && !is_space((unsigned char) html[1])) { + const char *a = html + 1; int n = 0; int ok = 0; int dot = 0; @@ -909,7 +913,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { tmp[0] = '\0'; strncat(tmp, a + dot + 1, n - dot - 1); if (is_knowntype(opt, tmp) || ishtml_ext(tmp) != -1) { - adr++; + html++; p = 0; valid_p = 1; unquoted_script = 1; @@ -926,21 +930,21 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // note: inscript==1 donc on sautera après les \" if (inscript) { if (inscriptgen) { // on est déja dans un objet générant.. - if (*adr == scriptgen_q) { // fermeture des " ou ' - if (*(adr - 1) != '\\') { // non + if (*html == scriptgen_q) { // fermeture des " ou ' + if (*(html - 1) != '\\') { // non inscriptgen = 0; // ok parsing terminé } } } else { - char *a = NULL; + const char *a = NULL; char check_this_fking_line = 0; // parsing code javascript.. char must_be_terminated = 0; // caractère obligatoire de terminaison! int token_size; - if (!(token_size = strfield(adr, ".writeln"))) // détection ...objet.write[ln]("code html")... - token_size = strfield(adr, ".write"); + if (!(token_size = strfield(html, ".writeln"))) // détection ...objet.write[ln]("code html")... + token_size = strfield(html, ".write"); if (token_size) { - a = adr + token_size; + a = html + token_size; while(is_realspace(*a)) a++; // sauter espaces if (*a == '(') { // début parenthèse @@ -966,7 +970,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { while(is_realspace(*a)) a++; if ((*a == '\'') || (*a == '"')) { // départ de '' ou "" - char *b; + const char *b; scriptgen_q = *a; // quote b = a + 1; // départ de la chaîne @@ -997,10 +1001,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // NOTE: le code javascript autogénéré n'est pas pris en compte!! // (et ne marche pas dans 50% des cas de toute facon!) if (check_this_fking_line == 1) { - p = (int) (b - adr); // calculer saut! + p = (int) (b - html); // calculer saut! } else { inscriptgen = 1; // SCRIPTGEN actif - adr = b; // jump + html = b; // jump } if ((opt->debug > 1) && (opt->log != NULL)) { @@ -1029,9 +1033,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!p) { // si dans un tag, et pas dans un script - sauf si on analyse un obj.write(".. if ((intag && (!inscript)) || inscriptgen) { - if ((*(adr - 1) == '<') || (is_space(*(adr - 1)))) { // <tag < tag etc + if ((*(html - 1) == '<') || (is_space(*(html - 1)))) { // <tag < tag etc // <A HREF=.. pour les liens HTML - p = rech_tageq(adr, "href"); + p = rech_tageq(html, "href"); if (p) { // href.. tester si c'est une bas href! if ((intag_start_valid) && check_tag(intag_start, "base")) { // oui! // ** note: base href et codebase ne font pas bon ménage.. @@ -1044,7 +1048,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detect[i]))) { - p = rech_tageq(adr, hts_detect[i]); + p = rech_tageq(html, hts_detect[i]); if (p) { /* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */ if (strcmp(hts_detect[i], "archive") == 0) { @@ -1060,7 +1064,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detectbeg[i]))) { - p = rech_tageqbegdigits(adr, hts_detectbeg[i]); + p = rech_tageqbegdigits(html, hts_detectbeg[i]); i++; } } @@ -1070,17 +1074,17 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detectURL[i]))) { - p = rech_tageq(adr, hts_detectURL[i]); + p = rech_tageq(html, hts_detectURL[i]); i++; } if (p) { if (intag_ctype == 1) { p = 0; #if 0 - //if ((pos=rech_tageq(adr, "content"))) { + //if ((pos=rech_tageq(html, "content"))) { char temp[256]; char *token = NULL; - int len = rech_endtoken(adr + pos, &token); + int len = rech_endtoken(html + pos, &token); if (len > 0 && len < sizeof(temp) - 2) { char *chpos; @@ -1113,7 +1117,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detectandleave[i]))) { - p = rech_tageq(adr, hts_detectandleave[i]); + p = rech_tageq(html, hts_detectandleave[i]); i++; } if (p) @@ -1127,20 +1131,20 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* détection onLoad etc */ while((p == 0) && (strnotempty(hts_detect_js[i]))) { - p = rech_tageq(adr, hts_detect_js[i]); + p = rech_tageq(html, hts_detect_js[i]); i++; } /* non détecté - détecter également les onXxxxx= */ if (p == 0) { - if ((*adr == 'o') && (*(adr + 1) == 'n') - && isUpperLetter(*(adr + 2))) { + if ((*html == 'o') && (*(html + 1) == 'n') + && isUpperLetter(*(html + 2))) { p = 0; - while(isalpha((unsigned char) adr[p]) && (p < 64)) + while(isalpha((unsigned char) html[p]) && (p < 64)) p++; if (p < 64) { - while(is_space(adr[p])) + while(is_space(html[p])) p++; - if (adr[p] == '=') + if (html[p] == '=') p++; else p = 0; @@ -1150,8 +1154,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } /* OK, événement repéré */ if (p) { - inscript_tag_lastc = *(adr + p); /* à attendre à la fin */ - adr += p /*+ 1*/; /* saut */ + inscript_tag_lastc = *(html + p); /* à attendre à la fin */ + html += p /*+ 1*/; /* saut */ /* On est désormais dans du code javascript */ @@ -1166,7 +1170,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire] if (p == 0) { - p = rech_tageq(adr, "code"); + p = rech_tageq(html, "code"); if (p) { if ((intag_start_valid) && check_tag(intag_start, "applet")) { // dans un <applet ! p_type = -1; // juste le nom de fichier+dossier, écire avant codebase @@ -1177,9 +1181,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // pas très propre mais c'est ce qu'il y a de plus simple à faire!! { - char *a; + const char *a; - a = adr; + a = html; while((*a) && (*a != '>') && (!rech_tageq(a, "codebase"))) a++; @@ -1187,20 +1191,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char *b; b = strchr(a, '>'); - if (b) { - if (((int) (b - adr)) < 1000) { // au total < 1Ko + if (b != NULL) { + if (b - html < 1000) { // au total < 1Ko char BIGSTK tempo[HTS_URLMAXSIZE * 2]; + const size_t offset = html - r->adr; + char *const modify = &r->adr[offset]; + assertf(modify == html); tempo[0] = '\0'; - strncatbuff(tempo, a, (int) (b - a)); + strncatbuff(tempo, a, b - a); strcatbuff(tempo, " "); - strncatbuff(tempo, adr, (int) (a - adr - 1)); + strncatbuff(tempo, html, a - html - 1); // éventuellement remplire par des espaces pour avoir juste la taille - while((int) strlen(tempo) < ((int) (b - adr))) + while(strlen(tempo) < (size_t) (b - html)) strcatbuff(tempo, " "); // pas d'erreur? - if ((int) strlen(tempo) == ((int) (b - adr))) { - strncpy(adr, tempo, strlen(tempo)); // PAS d'octet nul à la fin! + if (strlen(tempo) == b - html) { + strncpy(modify, tempo, strlen(tempo)); // PAS d'octet nul à la fin! p = 0; // DEVALIDER!! p_type = 0; add_class = 0; @@ -1215,7 +1222,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // liens à patcher mais pas à charger (ex: codebase) if (p == 0) { // note: si non chargé (ex: ignorer .class) patché tout de même - p = rech_tageq(adr, "codebase"); + p = rech_tageq(html, "codebase"); if (p) { if ((intag_start_valid) && check_tag(intag_start, "applet")) { // dans un <applet ! p_type = -2; @@ -1229,18 +1236,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (opt->robots) { if ((intag_start_valid) && check_tag(intag_start, "meta")) { - if (rech_tageq(adr, "name")) { // name=robots.txt + if (rech_tageq(html, "name")) { // name=robots.txt char tempo[1100]; char *a; tempo[0] = '\0'; - a = strchr(adr, '>'); + a = strchr(html, '>'); #if DEBUG_ROBOTS printf("robots.txt meta tag detected\n"); #endif if (a) { - if (((int) (a - adr)) < 999) { - strncatbuff(tempo, adr, (int) (a - adr)); + if (((int) (a - html)) < 999) { + strncatbuff(tempo, html, (int) (a - html)); if (strstrcase(tempo, "content")) { if (strstrcase(tempo, "robots")) { if (strstrcase(tempo, "nofollow")) { @@ -1265,7 +1272,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // entrée dans une applet javascript /*if (!inscript) { // sinon on est dans un obj.write(".. if (p==0) - if (rech_sampletag(adr,"script")) + if (rech_sampletag(html,"script")) if (check_tag(intag_start,"script")) { inscript=1; } @@ -1280,13 +1287,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { #if 0 /* Check // javascript comments */ - if (*adr == 10 || *adr == 13) { + if (*html == 10 || *html == 13) { inscript_check_comments = 1; inscript_in_comments = 0; } else if (inscript_check_comments) { - if (!is_realspace(*adr)) { + if (!is_realspace(*html)) { inscript_check_comments = 0; - if (adr[0] == '/' && adr[1] == '/') { + if (html[0] == '/' && html[1] == '/') { inscript_in_comments = 1; } } @@ -1295,15 +1302,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* Parse */ assertf(inscript_name != NULL); - if (*adr == '/' + if (*html == '/' && - ((strfield(adr, "/script") + ((strfield(html, "/script") && strfield(inscript_name, "script")) - || (strfield(adr, "/style") + || (strfield(html, "/style") && strfield(inscript_name, "style")) ) && inscript_locked == 0) { - char *a = adr; + const char *a = html; //while(is_realspace(*(--a))); while(is_realspace(*a)) @@ -1336,54 +1343,54 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->parsejava & HTSPARSE_NO_JAVASCRIPT) == 0) { int nc; - nc = strfield(adr, ".src"); // nom.src="image"; - if (!nc && inscript_tag && inscript_tag_lastc == *(adr - 1)) - nc = strfield(adr, "src"); // onXXX='src="image";' + nc = strfield(html, ".src"); // nom.src="image"; + if (!nc && inscript_tag && inscript_tag_lastc == *(html - 1)) + nc = strfield(html, "src"); // onXXX='src="image";' if (!nc) - nc = strfield(adr, ".location"); // document.location="doc" + nc = strfield(html, ".location"); // document.location="doc" if (!nc) - nc = strfield(adr, ":location"); // javascript:location="doc" + nc = strfield(html, ":location"); // javascript:location="doc" if (!nc) { // location="doc" - if ((nc = strfield(adr, "location")) - && !isspace(*(adr - 1)) + if ((nc = strfield(html, "location")) + && !isspace(*(html - 1)) ) nc = 0; } if (!nc) - nc = strfield(adr, ".href"); // document.location="doc" + nc = strfield(html, ".href"); // document.location="doc" if (!nc) - if ((nc = strfield(adr, ".open"))) { // window.open("doc",.. + if ((nc = strfield(html, ".open"))) { // window.open("doc",.. expected = '('; // parenthèse expected_end = "),"; // fin: virgule ou parenthèse ensure_not_mime = 1; //* ensure the url is not a mime type */ } if (!nc) - if ((nc = strfield(adr, ".replace"))) { // window.replace("url") + if ((nc = strfield(html, ".replace"))) { // window.replace("url") expected = '('; // parenthèse expected_end = ")"; // fin: parenthèse } if (!nc) - if ((nc = strfield(adr, ".link"))) { // window.link("url") + if ((nc = strfield(html, ".link"))) { // window.link("url") expected = '('; // parenthèse expected_end = ")"; // fin: parenthèse } - if (!nc && (nc = strfield(adr, "url")) && (!isalnum(*(adr - 1))) && *(adr - 1) != '_') { // url(url) + if (!nc && (nc = strfield(html, "url")) && (!isalnum(*(html - 1))) && *(html - 1) != '_') { // url(url) expected = '('; // parenthèse expected_end = ")"; // fin: parenthèse can_avoid_quotes = 1; quotes_replacement = ')'; } if (!nc) - if ((nc = strfield(adr, "import"))) { // import "url" - if (is_space(*(adr + nc))) { + if ((nc = strfield(html, "import"))) { // import "url" + if (is_space(*(html + nc))) { expected = 0; // no char expected } else nc = 0; } if (nc) { - char *a; + const char *a; - a = adr + nc; + a = html + nc; while(is_realspace(*a)) a++; if ((*a == expected) || (!expected)) { @@ -1392,7 +1399,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { while(is_realspace(*a)) a++; if ((*a == 34) || (*a == '\'') || (can_avoid_quotes)) { - char *b, *c; + const char *b, *c; int ndelim = 1; if ((*a == 34) || (*a == '\'')) @@ -1465,7 +1472,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { "link detected in javascript: %s", str); } - p = (int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER + p = (int) (a - html); // p non nul: TRAITER CHAINE COMME FICHIER if (can_avoid_quotes) { ending_p = quotes_replacement; } @@ -1487,14 +1494,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { //p=rech_tageq(adr,"primary"); // lien primaire, yeah p = 0; // No stupid tag anymore, raw link valid_p = 1; // Valid even if p==0 - while((adr[p] == '\r') || (adr[p] == '\n')) + while((html[p] == '\r') || (html[p] == '\n')) p++; //can_avoid_quotes=1; ending_p = '\r'; } - } else if (isspace((unsigned char) *adr)) { - intag_startattr = adr + 1; // attribute in tag (for dirty parsing) + } else if (isspace((unsigned char) *html)) { + intag_startattr = html + 1; // attribute in tag (for dirty parsing) } // ------------------------------------------------------------ @@ -1504,18 +1511,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // ------------------------------------------------------------ if (opt->parseall && (opt->parsejava & HTSPARSE_NO_AGGRESSIVE) == 0 && (ptr > 0) && (!in_media) /* && (!inscript_in_comments) */ ) { // option parsing "brut" //int incomment_justquit=0; - if (!is_realspace(*adr)) { + if (!is_realspace(*html)) { int noparse = 0; // Gestion des /* */ #if 0 if (inscript) { if (parseall_incomment) { - if ((*adr == '/') && (*(adr - 1) == '*')) + if ((*html == '/') && (*(html - 1) == '*')) parseall_incomment = 0; incomment_justquit = 1; // ne pas noter dernier caractère } else { - if ((*adr == '/') && (*(adr + 1) == '*')) + if ((*html == '/') && (*(html + 1) == '*')) parseall_incomment = 1; } } else @@ -1536,12 +1543,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!noparse) { //if ((!parseall_incomment) && (!noparse)) { if (!p) { // non déja trouvé - if (adr != r->adr) { // >1 caractère + if (html != r->adr) { // >1 caractère // scanner les chaines - if ((*adr == '\"') || (*adr == '\'')) { // "xx.gif" 'xx.gif' + if ((*html == '\"') || (*html == '\'')) { // "xx.gif" 'xx.gif' if (strchr("=(,", parseall_lastc)) { // exemple: a="img.gif.. (handles comments) - char *a = adr; - char stop = *adr; // " ou ' + const char *a = html; + char stop = *html; // " ou ' int count = 0; // sauter caractères @@ -1574,7 +1581,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { tempo[0] = '\0'; type[0] = '\0'; // - strncatbuff(tempo, adr + 1, count); + strncatbuff(tempo, html + 1, count); // if ((!strchr(tempo, ' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript) int invalid_url = 0; @@ -1702,7 +1709,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // plus dans un commentaire if (inscript_state_pos == INSCRIPT_START && inscript_state_pos_prev == INSCRIPT_START) { - parseall_lastc = *adr; // caractère avant le prochain + parseall_lastc = *html; // caractère avant le prochain } } // if realspace @@ -1714,14 +1721,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // if ((p > 0) || (valid_p)) { // on a repéré un lien //int lien_valide=0; - char *eadr = NULL; /* fin de l'URL */ + const char *eadr = NULL; /* fin de l'URL */ //char* quote_adr=NULL; /* adresse du ? dans l'adresse */ int ok = 1; char quote = '\0'; int quoteinscript = 0; int noquote = 0; - char *tag_attr_start = adr; + const char *tag_attr_start = html; // si nofollow ou un stop a été déclenché, réécrire tous les liens en externe if ((nofollow) @@ -1734,27 +1741,27 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // écrire codebase avant, flusher avant code if ((p_type == -1) || (p_type == -2)) { if ((opt->getmode & 1) && (ptr > 0)) { - HT_ADD_ADR; // refresh + HT_add_adr; // refresh } - lastsaved = adr; // dernier écrit+1 + lastsaved = html; // dernier écrit+1 } // sauter espaces // adr+=p; INCREMENT_CURRENT_ADR(p); - while((is_space(*adr) - || (inscriptgen && adr[0] == '\\' && is_space(adr[1]) + while((is_space(*html) + || (inscriptgen && html[0] == '\\' && is_space(html[1]) ) ) && quote == '\0') { if (!quote) - if ((*adr == '\"') || (*adr == '\'')) { - quote = *adr; // on doit attendre cela à la fin - if (inscriptgen && *(adr - 1) == '\\') { + if ((*html == '\"') || (*html == '\'')) { + quote = *html; // on doit attendre cela à la fin + if (inscriptgen && *(html - 1) == '\\') { quoteinscript = 1; /* will wait for \" */ } } // puis quitter - // adr++; // sauter les espaces, "" et cie + // html++; // sauter les espaces, "" et cie INCREMENT_CURRENT_ADR(1); } @@ -1767,9 +1774,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // sauter éventuel \" ou \' javascript if (inscript) { // on est dans un obj.write(".. - if (*adr == '\\') { - if ((*(adr + 1) == '\'') || (*(adr + 1) == '"')) { // \" ou \' - // adr+=2; // sauter + if (*html == '\\') { + if ((*(html + 1) == '\'') || (*(html + 1) == '"')) { // \" ou \' + // html+=2; // sauter INCREMENT_CURRENT_ADR(2); } } @@ -1778,19 +1785,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (p_searchMETAURL) { int l = 0; - while((adr + l + 4 < r->adr + r->size) - && (!strfield(adr + l, "URL=")) + while((html + l + 4 < r->adr + r->size) + && (!strfield(html + l, "URL=")) && (l < 128)) l++; - if (!strfield(adr + l, "URL=")) + if (!strfield(html + l, "URL=")) ok = -1; else - adr += (l + 4); + html += (l + 4); } /* éviter les javascript:document.location=.. : les parser, plutôt */ if (ok != -1) { - if (strfield(adr, "javascript:") + if (strfield(html, "javascript:") && !inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */ ) { ok = -1; @@ -1808,22 +1815,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } if (p_type == 1) { - if (*adr == '#') { - adr++; // sauter # pour usemap etc + if (*html == '#') { + html++; // sauter # pour usemap etc } } - eadr = adr; + eadr = html; // ne pas flusher après code si on doit écrire le codebase avant! if ((p_type != -1) && (p_type != 2) && (p_type != -2)) { if ((opt->getmode & 1) && (ptr > 0)) { - HT_ADD_ADR; // refresh + HT_add_adr; // refresh } - lastsaved = adr; // dernier écrit+1 + lastsaved = html; // dernier écrit+1 // après on écrira soit les données initiales, // soir une URL/lien modifié! } else if (p_type == -1) - p_flush = adr; // flusher jusqu'à adr ensuite + p_flush = html; // flusher jusqu'à adr ensuite if (ok != -1) { // continuer // découper le lien @@ -1832,7 +1839,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!is_space(*eadr)) ok = 0; } - if ((((int) (eadr - adr))) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path) + if ((((int) (eadr - html))) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path) ok = -1; // ne pas traiter ce lien if (ok > 0) { @@ -1877,10 +1884,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } while(ok == 1); // Empty link detected - if ((((int) (eadr - adr))) <= 1) { // link empty + if ((((int) (eadr - html))) <= 1) { // link empty ok = -1; // No - if (*adr != '#') { // Not empty+unique # - if ((((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr) + if (*html != '#') { // Not empty+unique # + if ((((int) (eadr - html)) == 1)) { // 1=link empty with delim (end_adr-start_adr) if (quote) { if ((opt->getmode & 1) && (ptr > 0)) { HT_ADD("#"); // We add this for a <href=""> @@ -1890,7 +1897,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag - if (strfield(adr, "(Empty Reference!)")) { + if (strfield(html, "(Empty Reference!)")) { ok = -1; // No } @@ -1900,13 +1907,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char BIGSTK lien[HTS_URLMAXSIZE * 2]; int meme_adresse = 0; // 0 par défaut pour primary - //char *copie_de_adr=adr; + //char *copie_de_adr=html; //char* p; // construire lien (découpage) - if ((((int) (eadr - adr)) - 1) < HTS_URLMAXSIZE) { // pas trop long? - strncpy(lien, adr, ((int) (eadr - adr)) - 1); - *(lien + (((int) (eadr - adr))) - 1) = '\0'; + if ((((int) (eadr - html)) - 1) < HTS_URLMAXSIZE) { // pas trop long? + strncpy(lien, html, ((int) (eadr - html)) - 1); + *(lien + (((int) (eadr - html))) - 1) = '\0'; //printf("link: %s\n",lien); // supprimer les espaces while((lien[strlen(lien) - 1] == ' ') && (strnotempty(lien))) @@ -1918,15 +1925,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // ------------------------------------------------------ // Lien repéré et extrait if (strnotempty(lien) > 0) { // construction du lien - char BIGSTK adr[HTS_URLMAXSIZE * 2], fil[HTS_URLMAXSIZE * 2]; // ATTENTION adr cache le "vrai" adr + lien_adrfilsave afs; int forbidden_url = -1; // lien non interdit (mais non autorisé..) int just_test_it = 0; // mode de test des liens int set_prio_to = 0; // pour capture de page isolée int import_done = 0; // lien importé (ne pas scanner ensuite *à priori*) // - adr[0] = '\0'; - fil[0] = '\0'; + afs.af.adr[0] = '\0'; + afs.af.fil[0] = '\0'; + afs.save[0] = '\0'; // // 0: autorisé // 1: interdit (patcher tout de même adresse) @@ -2207,7 +2215,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // Vérifier les codebase=applet (au lieu de applet/) if (p_type == -2) { // codebase if (strnotempty(lien)) { - if (fil[strlen(lien) - 1] != '/') { // pas répertoire + if (lien[strlen(lien) - 1] != '/') { // pas répertoire strcatbuff(lien, "/"); } } @@ -2231,17 +2239,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // copier nom host si besoin est if (!link_has_authority(lien)) { // pas de http:// - char BIGSTK adr2[HTS_URLMAXSIZE * 2], fil2[HTS_URLMAXSIZE * 2]; // ** euh ident_url_relatif?? + lien_adrfil af2; // ** euh ident_url_relatif?? - if (ident_url_relatif(lien, urladr(), urlfil(), adr2, fil2) < - 0) { + if (ident_url_relatif(lien, urladr(), urlfil(), &af2) < 0) { error = 1; } else { strcpybuff(lien, "http://"); - strcatbuff(lien, adr2); - if (*fil2 != '/') + strcatbuff(lien, af2.adr); + if (*af2.fil != '/') strcatbuff(lien, "/"); - strcatbuff(lien, fil2); + strcatbuff(lien, af2.fil); { char *a; @@ -2321,7 +2328,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { HTS_URLMAXSIZE) { // mailto: and co: do NOT add base if (ident_url_relatif - (lien, urladr(), urlfil(), adr, fil) >= 0) { + (lien, urladr(), urlfil(), &afs.af) >= 0) { char BIGSTK tempo[HTS_URLMAXSIZE * 2]; // base est absolue @@ -2342,19 +2349,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { lien); } } else { - char BIGSTK badr[HTS_URLMAXSIZE * 2], - bfil[HTS_URLMAXSIZE * 2]; - if (ident_url_absolute(_base, badr, bfil) >= 0) { - if (((int) strlen(badr) + (int) strlen(lien)) < + lien_adrfil baseaf; + if (ident_url_absolute(_base, &baseaf) >= 0) { + if (((int) strlen(baseaf.adr) + (int) strlen(lien)) < HTS_URLMAXSIZE) { char BIGSTK tempo[HTS_URLMAXSIZE * 2]; // base est absolue tempo[0] = '\0'; - if (!link_has_authority(badr)) { + if (!link_has_authority(baseaf.adr)) { strcatbuff(tempo, "http://"); } - strcatbuff(tempo, badr); + strcatbuff(tempo, baseaf.adr); strcatbuff(tempo, lien); strcpybuff(lien, tempo); // patcher en considérant base @@ -2384,8 +2390,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { relativeurladr(), relativeurlfil()); if ((reponse = ident_url_relatif(lien, relativeurladr(), relativeurlfil(), - adr, fil)) < 0) { - adr[0] = '\0'; // erreur + &afs.af)) < 0) { + afs.af.adr[0] = '\0'; // erreur if (reponse == -2) { hts_log_print(opt, LOG_WARNING, "Link %s not caught (unknown protocol)", @@ -2398,14 +2404,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { hts_log_print(opt, LOG_DEBUG, "built relative link %s with %s%s -> %s%s", - lien, relativeurladr(), relativeurlfil(), adr, - fil); + lien, relativeurladr(), relativeurlfil(), afs.af.adr, + afs.af.fil); } } else { hts_log_print(opt, LOG_DEBUG, "link %s not build, error detected before", lien); - adr[0] = '\0'; + afs.af.adr[0] = '\0'; } // Le lien doit juste être réécrit, mais ne doit pas générer un lien @@ -2413,7 +2419,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (p_nocatch) { forbidden_url = 1; // interdire récupération du lien hts_log_print(opt, LOG_DEBUG, "link forced external at %s%s", - adr, fil); + afs.af.adr, afs.af.fil); } // Tester si un lien doit être accepté ou refusé (wizard) // forbidden_url=1 : lien refusé @@ -2421,11 +2427,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? if ((p_type != 2) && (p_type != -2)) { // tester autorisations? if (!p_nocatch) { - if (adr[0] != '\0') { + if (afs.af.adr[0] != '\0') { hts_log_print(opt, LOG_DEBUG, - "wizard link test at %s%s..", adr, fil); + "wizard link test at %s%s..", afs.af.adr, afs.af.fil); forbidden_url = - hts_acceptlink(opt, ptr, adr, fil, + hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil, intag_name ? intag_name : NULL, intag_name ? tag_attr_start : NULL, &set_prio_to, &just_test_it); @@ -2437,25 +2443,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // calculer meme_adresse meme_adresse = - strfield2(jump_identification(adr), + strfield2(jump_identification(afs.af.adr), jump_identification(urladr())); // Début partie sauvegarde // ici on forme le nom du fichier à sauver, et on patche l'URL - if (adr[0] != '\0') { + if (afs.af.adr[0] != '\0') { // savename(): simplifier les ../ et autres joyeusetés - char BIGSTK save[HTS_URLMAXSIZE * 2]; int r_sv = 0; // En cas de moved, adresse première - char BIGSTK former_adr[HTS_URLMAXSIZE * 2]; - char BIGSTK former_fil[HTS_URLMAXSIZE * 2]; + lien_adrfil former; // - save[0] = '\0'; - former_adr[0] = '\0'; - former_fil[0] = '\0'; + afs.save[0] = '\0'; + former.adr[0] = '\0'; + former.fil[0] = '\0'; // // nom du chemin à sauver si on doit le calculer @@ -2471,14 +2475,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* Calc */ last_adr[0] = '\0'; //char last_fil[HTS_URLMAXSIZE*2]=""; - strcpybuff(last_adr, adr); // ancienne adresse + strcpybuff(last_adr, afs.af.adr); // ancienne adresse //strcpybuff(last_fil,fil); // ancien chemin r_sv = - url_savename(adr, fil, save, former_adr, former_fil, - heap(ptr)->adr, heap(ptr)->fil, opt, - opt->liens, opt->lien_tot, sback, cache, hash, ptr, + url_savename(&afs, &former, heap(ptr)->adr, heap(ptr)->fil, opt, + sback, cache, hash, ptr, numero_passe, NULL); - if (strcmp(jump_identification(last_adr), jump_identification(adr)) != 0) { // a changé + if (strcmp(jump_identification(last_adr), + jump_identification(afs.af.adr)) != 0) { // a changé // 2e test si moved @@ -2487,12 +2491,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // forbidden_url=0 : lien accepté if ((ptr > 0) && (p_type != 2) && (p_type != -2)) { // tester autorisations? if (!p_nocatch) { - if (adr[0] != '\0') { + if (afs.af.adr[0] != '\0') { hts_log_print(opt, LOG_DEBUG, "wizard moved link retest at %s%s..", - adr, fil); + afs.af.adr, afs.af.fil); forbidden_url = - hts_acceptlink(opt, ptr, adr, fil, + hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil, intag_name ? intag_name : NULL, intag_name ? tag_attr_start : NULL, &set_prio_to, @@ -2507,19 +2511,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { meme_adresse = 0; // on a changé } } else { - strcpybuff(save, ""); // dummy + strcpybuff(afs.save, ""); // dummy } } // resolve unresolved type if (r_sv != -1 && p_type != 2 && p_type != -2 - && forbidden_url == 0 && IS_DELAYED_EXT(save) + && forbidden_url == 0 && IS_DELAYED_EXT(afs.save) ) { time_t t; // pas d'erreur, on continue r_sv = - hts_wait_delayed(str, adr, fil, save, heap(ptr)->adr, - heap(ptr)->fil, former_adr, former_fil, + hts_wait_delayed(str, &afs, heap(ptr)->adr, + heap(ptr)->fil, &former, &forbidden_url); /* User interaction, because hts_wait_delayed can be slow.. (3.43) */ @@ -2541,22 +2545,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (forbidden_url != 1) { // le lien va être chargé if ((p_type == 2) || (p_type == -2)) { // base href ou codebase, pas un lien hts_log_print(opt, LOG_DEBUG, "Code/Codebase: %s%s", - adr, fil); + afs.af.adr, afs.af.fil); } else if ((opt->getmode & 4) == 0) { hts_log_print(opt, LOG_DEBUG, "Record: %s%s -> %s", - adr, fil, save); + afs.af.adr, afs.af.fil, afs.save); } else { - if (!ishtml(opt, fil)) + if (!ishtml(opt, afs.af.fil)) hts_log_print(opt, LOG_DEBUG, - "Record after: %s%s -> %s", adr, fil, - save); + "Record after: %s%s -> %s", afs.af.adr, afs.af.fil, + afs.save); else hts_log_print(opt, LOG_DEBUG, "Record: %s%s -> %s", - adr, fil, save); + afs.af.adr, afs.af.fil, afs.save); } } else - hts_log_print(opt, LOG_DEBUG, "External: %s%s", adr, - fil); + hts_log_print(opt, LOG_DEBUG, "External: %s%s", afs.af.adr, + afs.af.fil); } /* FIN log */ @@ -2567,29 +2571,29 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* */ else if (opt->urlmode == 0) { // URL absolue dans tous les cas if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + strncatbuff(tmp, afs.af.adr, aut - afs.af.adr); // scheme HT_ADD(tmp); // Protocol HT_ADD("//"); } } if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) /* */ @@ -2601,34 +2605,34 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->getmode & 1) && (ptr > 0)) { if (p_type != -1) { // pas que le nom de fichier (pas classe java) if (!opt->external) { - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr)); // scheme HT_ADD(tmp); // Protocol HT_ADD("//"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } } // @@ -2642,9 +2646,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int cat_data_len = 0; // ajouter lien external - switch ((link_has_authority(adr)) ? 1 - : ((fil[strlen(fil) - 1] == - '/') ? 1 : (ishtml(opt, fil)))) { + switch ((link_has_authority(afs.af.adr)) ? 1 + : ((afs.af.fil[strlen(afs.af.fil) - 1] == + '/') ? 1 : (ishtml(opt, afs.af.fil)))) { case 1: case -2: // html ou répertoire if (opt->getmode & 1) { // sauver html @@ -2659,15 +2663,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { default: // inconnu // asp, cgi.. if ((strfield2 - (fil + max(0, (int) strlen(fil) - 4), + (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4), ".gif")) || (strfield2 - (fil + max(0, (int) strlen(fil) - 4), + (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4), ".jpg")) || (strfield2 - (fil + max(0, (int) strlen(fil) - 4), + (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4), ".xbm")) /*|| (ishtml(opt,fil)!=0) */ ) { @@ -2690,13 +2694,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // html,gif if (patch_it) { - char BIGSTK save[HTS_URLMAXSIZE * 2]; + char BIGSTK save[HTS_URLMAXSIZE * 2];
char BIGSTK tempo[HTS_URLMAXSIZE * 2]; strcpybuff(save, StringBuff(opt->path_html_utf8)); strcatbuff(save, cat_name); - if (lienrelatif(tempo, save, relativesavename()) == - 0) { + if (lienrelatif(tempo, save, relativesavename()) == 0) { /* Never escape high-chars (we don't know the encoding!!) */ inplace_escape_uri_utf(tempo, sizeof(tempo)); // escape with %xx //if (!no_esc_utf) @@ -2708,33 +2711,33 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { HT_ADD("?link="); // page externe // same as above - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme + strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr) + 2); // scheme HT_ADD(tmp); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } } // @@ -2773,13 +2776,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { // écrire normalement le nom de fichier HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } // patcher? } // external } else { // que le nom de fichier (classe java) @@ -2790,7 +2793,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // Calculer chemin tempo_pat[0] = '\0'; - strcpybuff(tempo, fil); // <-- ajouté + strcpybuff(tempo, afs.af.fil); // <-- ajouté { char *a = strrchr(tempo, '/'); @@ -2814,7 +2817,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char BIGSTK tempo2[HTS_URLMAXSIZE * 2]; strcpybuff(tempo2, a + 1); // FICHIER - strncatbuff(tempo_pat, tempo, (int) (a - tempo) + 1); // chemin + strncatbuff(tempo_pat, tempo, (a - tempo) + 1); // chemin strcpybuff(tempo, tempo2); // fichier } } @@ -2828,9 +2831,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (strnotempty(tempo_pat)) { HT_ADD("codebase=\"http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } if (*tempo_pat != '/') HT_ADD("/"); @@ -2866,12 +2869,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char BIGSTK cid[HTS_URLMAXSIZE * 3]; HT_ADD("cid:"); - make_content_id(adr, fil, cid, sizeof(cid)); + make_content_id(afs.af.adr, afs.af.fil, cid, sizeof(cid)); HT_ADD_HTMLESCAPED(cid); lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } else if (opt->urlmode == 3) { // URI absolue / if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } else if (opt->urlmode == 5) { // transparent proxy URL @@ -2881,23 +2884,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char *pos; if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr)); // scheme HT_ADD(tmp); // Protocol HT_ADD("//"); } } // filename is taken as URI (ex: "C:\My Website\www.example.com\foo4242.html) - uri = save; + uri = afs.save; // .. after stripping the path prefix (ex: "www.example.com\foo4242.html) if (strnotempty(StringBuff(opt->path_html_utf8))) { @@ -2913,7 +2916,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // put original query string if any (ex: "www.example.com/foo4242.html?q=45) - pos = strchr(fil, '?'); + pos = strchr(afs.af.fil, '?'); if (pos != NULL) { strcatbuff(tempo, pos); } @@ -2927,7 +2930,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { tempo[0] = '\0'; // calculer le lien relatif - if (lienrelatif(tempo, save, relativesavename()) == 0) { + if (lienrelatif(tempo, afs.save, relativesavename()) == 0) { if (!in_media) { // In media (such as real audio): don't patch /* Never escape high-chars (we don't know the encoding!!) */ inplace_escape_uri_utf(tempo, sizeof(tempo)); @@ -2944,7 +2947,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } hts_log_print(opt, LOG_DEBUG, "relative link at %s build with %s and %s: %s", - adr, save, relativesavename(), tempo); + afs.af.adr, afs.save, relativesavename(), tempo); // lien applet (code) - il faut placer un codebase avant if (p_type == -1) { // que le nom de fichier @@ -3033,7 +3036,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { hts_log_print(opt, LOG_WARNING, "Error building relative link %s and %s", - save, relativesavename()); + afs.save, relativesavename()); } } // sinon le lien sera écrit normalement @@ -3048,13 +3051,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { #endif /* Security check */ - if (strlen(save) >= HTS_URLMAXSIZE) { - adr[0] = '\0'; + if (strlen(afs.save) >= HTS_URLMAXSIZE) { + afs.af.adr[0] = '\0'; hts_log_print(opt, LOG_WARNING, "Link is too long: %s", - save); + afs.save); } - if ((adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste + if ((afs.af.adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste // n'y a-t-il pas trop de liens? if (0) { // CLEANUP @@ -3083,7 +3086,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->getmode & 4) == 0) { // traiter html après pass_fix = 0; } else { // vérifier que ce n'est pas un !html - if (!ishtml(opt, fil)) + if (!ishtml(opt, afs.af.fil)) pass_fix = 1; // priorité inférieure (traiter après) else pass_fix = max(0, numero_passe); // priorité normale @@ -3106,15 +3109,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // // On part de la fin et on essaye de se presser (économise temps machine) { - int i = hash_read(hash, save, NULL, 0); // lecture type 0 (sav) + int i = hash_read(hash, afs.save, NULL, 0); // lecture type 0 (sav) if (i >= 0) { if ((opt->debug > 1) && (opt->log != NULL)) { - if (strcmp(adr, heap(i)->adr) != 0 - || strcmp(fil, heap(i)->fil) != 0) { + if (strcmp(afs.af.adr, heap(i)->adr) != 0 + || strcmp(afs.af.fil, heap(i)->fil) != 0) { hts_log_print(opt, LOG_DEBUG, "merging similar links %s%s and %s%s", - adr, fil, heap(i)->adr, + afs.af.adr, afs.af.fil, heap(i)->adr, heap(i)->fil); } } @@ -3136,16 +3139,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // DEBUT ROBOTS.TXT AJOUT if (!just_test_it) { - if ((!strfield(adr, "ftp://")) // non ftp - && (!strfield(adr, "file://")) + if ((!strfield(afs.af.adr, "ftp://")) // non ftp + && (!strfield(afs.af.adr, "file://")) ) { // non file if (opt->robots) { // récupérer robots - if (ishtml(opt, fil) != 0) { // pas la peine pour des fichiers isolés - if (checkrobots(_ROBOTS, adr, "") != -1) { // robots.txt ? - checkrobots_set(_ROBOTS, adr, ""); // ajouter entrée vide - if (checkrobots(_ROBOTS, adr, "") == -1) { // robots.txt ? + if (ishtml(opt, afs.af.fil) != 0) { // pas la peine pour des fichiers isolés + if (checkrobots(_ROBOTS, afs.af.adr, "") != -1) { // robots.txt ? + checkrobots_set(_ROBOTS, afs.af.adr, ""); // ajouter entrée vide + if (checkrobots(_ROBOTS, afs.af.adr, "") == -1) { // robots.txt ? // enregistrer robots.txt (MACRO) - if (!hts_record_link(opt, adr, "/robots.txt", "", "", "", NULL)) { + if (!hts_record_link(opt, afs.af.adr, "/robots.txt", "", "", "", NULL)) { printf ("PANIC! : Not enough memory [%d]\n", __LINE__); @@ -3173,7 +3176,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { #endif hts_log_print(opt, LOG_DEBUG, "robots.txt added at %s", - adr); + afs.af.adr); } else { hts_log_print(opt, LOG_ERROR, "Unexpected robots.txt error at %d", @@ -3187,7 +3190,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // FIN ROBOTS.TXT AJOUT // enregistrer - if (!hts_record_link(opt, adr, fil, save, former_adr, former_fil, codebase)) { + if (!hts_record_link(opt, afs.af.adr, afs.af.fil, afs.save, + former.adr, former.fil, codebase)) { printf("PANIC! : Not enough memory [%d]\n", __LINE__); hts_log_print(opt, LOG_PANIC, "Not enough memory"); @@ -3245,7 +3249,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { // if !dejafait hts_log_print(opt, LOG_DEBUG, "link has already been recorded, cancelled: %s", - save); + afs.save); } @@ -3260,9 +3264,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // if ok==0 - assertf(eadr - adr >= 0); // Should not go back - if (eadr > adr) { - INCREMENT_CURRENT_ADR(eadr - 1 - adr); + assertf(eadr - html >= 0); // Should not go back + if (eadr > html) { + INCREMENT_CURRENT_ADR(eadr - 1 - html); } // adr=eadr-1; // ** sauter @@ -3276,7 +3280,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // si '<' ou '>' // plus loin - adr++; // automate will be checked next loop + html++; // automate will be checked next loop /* Otimization: if we are scanning in HTML data (not in tag or script), then jump to the next starting tag */ @@ -3288,25 +3292,25 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { &&(!inscript_tag) /* Not in tag with script inside */ ) { /* Not at the end */ - if ((((int) (adr - r->adr))) < r->size) { + if ((((int) (html - r->adr))) < r->size) { /* Not on a starting tag yet */ - if (*adr != '<') { + if (*html != '<') { /* strchr does not well behave with null chrs.. */ /* char* adr_next = strchr(adr,'<'); */ - char *adr_next = adr; + const char *adr_next = html; while(*adr_next != '<' && (adr_next - r->adr) < r->size) { adr_next++; } /* Jump to near end (index hack) */ if (!adr_next || *adr_next != '<') { - if (((int) (adr - r->adr) < (r->size - 4)) + if (((int) (html - r->adr) < (r->size - 4)) && (r->size > 4) ) { - adr = r->adr + r->size - 2; + html = r->adr + r->size - 2; } } else { - adr = adr_next; + html = adr_next; } } } @@ -3315,8 +3319,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // ---------- // écrire peu à peu if ((opt->getmode & 1) && (ptr > 0)) - HT_ADD_ADR; - lastsaved = adr; // dernier écrit+1 + HT_add_adr; + lastsaved = html; // dernier écrit+1 // ---------- // Checks @@ -3325,13 +3329,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // Check max time if (!back_checkmirror(opt)) { - adr = r->adr + r->size; + html = r->adr + r->size; } } // pour les stats du shell si parsing trop long if (r->size) opt->state._hts_in_html_done = - (100 * ((int) (adr - r->adr))) / (int) (r->size); + (100 * ((int) (html - r->adr))) / (int) (r->size); if (opt->state._hts_in_html_poll) { opt->state._hts_in_html_poll = 0; // temps à attendre, et remplir autant que l'on peut le cache (backing) @@ -3369,7 +3373,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { back_wait(sback, opt, cache, HTS_STAT.stat_timestart); back_fillmax(sback, opt, cache, ptr, numero_passe); } - } while((((int) (adr - r->adr))) < r->size); + } while(html - r->adr < r->size); opt->state._hts_in_html_parsing = 0; // flag opt->state._hts_cancel = 0; // pas de cancel @@ -3440,22 +3444,23 @@ int hts_mirror_check_moved(htsmoduleStruct * str, hts_log_print(opt, LOG_WARNING, "%s for %s%s", r->msg, urladr(), urlfil()); { - char BIGSTK mov_url[HTS_URLMAXSIZE * 2], mov_adr[HTS_URLMAXSIZE * 2], - mov_fil[HTS_URLMAXSIZE * 2]; + char BIGSTK mov_url[HTS_URLMAXSIZE * 2]; + lien_adrfilsave savedmoved; + lien_adrfil *const moved = &savedmoved.af; int get_it = 0; // ne pas prendre le fichier à la même adresse par défaut int reponse = 0; mov_url[0] = '\0'; - mov_adr[0] = '\0'; - mov_fil[0] = '\0'; + moved->adr[0] = '\0'; + moved->fil[0] = '\0'; + savedmoved.save[0] = '\0'; // strcpybuff(mov_url, r->location); // url qque -> adresse+fichier if ((reponse = - ident_url_relatif(mov_url, urladr(), urlfil(), mov_adr, - mov_fil)) >= 0) { + ident_url_relatif(mov_url, urladr(), urlfil(), moved)) >= 0) { int set_prio_to = 0; // pas de priotité fixéd par wizard // check whether URLHack is harmless or not @@ -3464,24 +3469,24 @@ int hts_mirror_check_moved(htsmoduleStruct * str, char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2]; n_adr[0] = n_fil[0] = '\0'; - (void) adr_normalized(mov_adr, n_adr); - (void) fil_normalized(mov_fil, n_fil); + (void) adr_normalized(moved->adr, n_adr); + (void) fil_normalized(moved->fil, n_fil); (void) adr_normalized(urladr(), pn_adr); (void) fil_normalized(urlfil(), pn_fil); if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) { hts_log_print(opt, LOG_WARNING, "Redirected link is identical because of 'URL Hack' option: %s%s and %s%s", - urladr(), urlfil(), mov_adr, mov_fil); + urladr(), urlfil(), moved->adr, moved->fil); } } - //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue + //if (ident_url_absolute(mov_url,moved->adr,moved->fil)!=-1) { // ok URL reconnue // c'est (en gros) la même URL.. // si c'est un problème de casse dans le host c'est que le serveur est buggé // ("RFC says.." : host name IS case insensitive) - if ((strfield2(mov_adr, urladr()) != 0) && (strfield2(mov_fil, urlfil()) != 0)) { // identique à casse près + if ((strfield2(moved->adr, urladr()) != 0) && (strfield2(moved->fil, urlfil()) != 0)) { // identique à casse près // on tourne en rond - if (strcmp(mov_fil, urlfil()) == 0) { + if (strcmp(moved->fil, urlfil()) == 0) { error = 1; get_it = -1; // ne rien faire hts_log_print(opt, LOG_WARNING, @@ -3495,23 +3500,23 @@ int hts_mirror_check_moved(htsmoduleStruct * str, // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) hts_log_print(opt, LOG_DEBUG, "wizard link test for moved file at %s%s..", - mov_adr, mov_fil); + moved->adr, moved->fil); // accepté? - if (hts_acceptlink(opt, ptr, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */ + if (hts_acceptlink(opt, ptr, moved->adr, moved->fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */ get_it = 1; hts_log_print(opt, LOG_DEBUG, "moved link accepted: %s%s", - mov_adr, mov_fil); + moved->adr, moved->fil); } } /* sinon traité normalement */ } - //if ((strfield2(mov_adr,urladr())!=0) && (strfield2(mov_fil,urlfil())!=0)) { // identique à casse près + //if ((strfield2(moved->adr,urladr())!=0) && (strfield2(moved->fil,urlfil())!=0)) { // identique à casse près if (get_it == 1) { // court-circuiter le reste du traitement // et reculer pour mieux sauter hts_log_print(opt, LOG_WARNING, "Warning moved treated for %s%s (real one is %s%s)", - urladr(), urlfil(), mov_adr, mov_fil); + urladr(), urlfil(), moved->adr, moved->fil); // canceller lien actuel error = 1; hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry @@ -3520,17 +3525,14 @@ int hts_mirror_check_moved(htsmoduleStruct * str, // set_prio_to=0+1; // protection if the moved URL is an html page!! //xxc xxc { - char BIGSTK mov_sav[HTS_URLMAXSIZE * 2]; - // calculer lien et éventuellement modifier addresse/fichier - if (url_savename - (mov_adr, mov_fil, mov_sav, NULL, NULL, + if (url_savename(&savedmoved, NULL, heap(heap(ptr)->precedent)->adr, - heap(heap(ptr)->precedent)->fil, opt, opt->liens, opt->lien_tot, + heap(heap(ptr)->precedent)->fil, opt, sback, cache, hash, ptr, numero_passe, NULL) != -1) { - if (hash_read(hash, mov_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja + if (hash_read(hash, savedmoved.save, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja // enregistrer lien avec SAV IDENTIQUE - if (hts_record_link(opt, mov_adr, mov_fil, heap(ptr)->sav, "", "", NULL)) { + if (hts_record_link(opt, moved->adr, moved->fil, heap(ptr)->sav, "", "", NULL)) { // mode test? heap_top()->testmode = heap(ptr)->testmode; heap_top()->link_import = 0; // mode normal @@ -3577,7 +3579,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str, inplace_escape_uri(mov_url, sizeof(mov_url)); } else { char BIGSTK cid[HTS_URLMAXSIZE * 3]; - make_content_id(mov_adr, mov_fil, cid, sizeof(cid)); + make_content_id(moved->adr, moved->fil, cid, sizeof(cid)); strcpybuff(mov_url, "cid:"); strcatbuff(mov_url, cid); } @@ -3918,28 +3920,24 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, // changement dans les préférences if (opt->state._hts_addurl) { - char BIGSTK add_adr[HTS_URLMAXSIZE * 2]; - char BIGSTK add_fil[HTS_URLMAXSIZE * 2]; + lien_adrfilsave add; while(*opt->state._hts_addurl) { char BIGSTK add_url[HTS_URLMAXSIZE * 2]; - add_adr[0] = add_fil[0] = add_url[0] = '\0'; + add.af.adr[0] = add.af.fil[0] = add_url[0] = '\0'; if (!link_has_authority(*opt->state._hts_addurl)) strcpybuff(add_url, "http://"); // ajouter http:// strcatbuff(add_url, *opt->state._hts_addurl); - if (ident_url_absolute(add_url, add_adr, add_fil) >= 0) { + if (ident_url_absolute(add_url, &add.af) >= 0) { // ----Ajout---- - // noter NOUVEAU lien - char BIGSTK add_sav[HTS_URLMAXSIZE * 2]; // calculer lien et éventuellement modifier addresse/fichier if (url_savename - (add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, opt->liens, - opt->lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) { - if (hash_read(hash, add_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja + (&add, NULL, NULL, NULL, opt, sback, cache, hash, ptr, numero_passe, NULL) != -1) { + if (hash_read(hash, add.save, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja // enregistrer lien - if (hts_record_link(opt, add_adr, add_fil, add_sav, "", "", NULL)) { + if (hts_record_link(opt, add.af.adr, add.af.fil, add.save, "", "", NULL)) { heap_top()->testmode = 0; // mode test? heap_top()->link_import = 0; // mode normal heap_top()->depth = opt->depth; @@ -3948,8 +3946,8 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, heap_top()->premier = heap_top_index(); heap_top()->precedent = heap_top_index(); // - hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add_adr, - add_fil); + hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add.af.adr, + add.af.fil); // } else { // oups erreur, plus de mémoire!! printf("PANIC! : Not enough memory [%d]\n", __LINE__); @@ -3961,7 +3959,7 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, } else { hts_log_print(opt, LOG_NOTICE, "Existing link %s%s not added after user request", - add_adr, add_fil); + add.af.adr, add.af.fil); } } @@ -4415,9 +4413,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str, } /* Wait for delayed types */ -int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, - char *parent_adr, char *parent_fil, char *former_adr, - char *former_fil, int *forbidden_url) { +int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs, + char *parent_adr, char *parent_fil, lien_adrfil *former, + int *forbidden_url) { ENGINE_LOAD_CONTEXT_BASE(); hash_struct *const hash = hashptr; @@ -4426,17 +4424,17 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, char in_error_msg[32]; // resolve unresolved type - if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(save) + if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(afs->save) && !opt->state.stop) { int loops; int continue_loop; - hts_log_print(opt, LOG_DEBUG, "Waiting for type to be known: %s%s", adr, - fil); + hts_log_print(opt, LOG_DEBUG, "Waiting for type to be known: %s%s", afs->af.adr, + afs->af.fil); /* Follow while type is unknown and redirects occurs */ for(loops = 0, continue_loop = 1; - IS_DELAYED_EXT(save) && continue_loop && loops < 7; loops++) { + IS_DELAYED_EXT(afs->save) && continue_loop && loops < 7; loops++) { continue_loop = 0; /* @@ -4449,22 +4447,22 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, lien_back back; memset(&back, 0, sizeof(back)); - back.r = cache_read(opt, cache, adr, fil, NULL, NULL); // test uniquement + back.r = cache_read(opt, cache, afs->af.adr, afs->af.fil, NULL, NULL); // test uniquement if (back.r.statuscode == HTTP_OK && strnotempty(back.r.contenttype)) { // cache found, and aswer is 'OK' hts_log_print(opt, LOG_DEBUG, "Direct type lookup in cache (-%%D1): %s", back.r.contenttype); /* Recompute filename with MIME type */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr, - heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache, + afs->save[0] = '\0'; + url_savename(afs, former, heap(ptr)->adr, + heap(ptr)->fil, opt, sback, cache, hash, ptr, numero_passe, &back); /* Recompute authorization with MIME type */ { int new_forbidden_url = - hts_acceptmime(opt, ptr, adr, fil, back.r.contenttype); + hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, back.r.contenttype); if (new_forbidden_url != -1) { hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d", new_forbidden_url); @@ -4472,7 +4470,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, *forbidden_url = new_forbidden_url; hts_log_print(opt, LOG_DEBUG, "link forbidden because of MIME types restrictions: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); break; // exit loop } } @@ -4484,11 +4482,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, } /* Check if the file was recorded already (necessary for redirects) */ - if (hash_read(hash, save, NULL, HASH_STRUCT_FILENAME) >= 0) { + if (hash_read(hash, afs->save, NULL, HASH_STRUCT_FILENAME) >= 0) { if (loops == 0) { /* Should not happend */ hts_log_print(opt, LOG_ERROR, "Duplicate entry in hts_wait_delayed() cancelled: %s%s -> %s", - adr, fil, save); + afs->af.adr, afs->af.fil, afs->save); } /* Exit loop (we're done) */ continue_loop = 0; @@ -4497,11 +4495,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Add in backing (back_index() will respond correctly) */ if (back_add_if_not_exists - (sback, opt, cache, adr, fil, save, parent_adr, parent_fil, + (sback, opt, cache, afs->af.adr, afs->af.fil, afs->save, parent_adr, parent_fil, 0) != -1) { int b; - b = back_index(opt, sback, adr, fil, save); + b = back_index(opt, sback, afs->af.adr, afs->af.fil, afs->save); if (b < 0) { printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n", __LINE__); @@ -4529,15 +4527,15 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, b = -1; /* Recompute filename with MIME type */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr, - heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache, + afs->save[0] = '\0'; + url_savename(afs, former, heap(ptr)->adr, + heap(ptr)->fil, opt, sback, cache, hash, ptr, numero_passe, &delayed_back); /* Recompute authorization with MIME type */ { int new_forbidden_url = - hts_acceptmime(opt, ptr, adr, fil, delayed_back.r.contenttype); + hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, delayed_back.r.contenttype); if (new_forbidden_url != -1) { hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d", *forbidden_url); @@ -4545,7 +4543,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, *forbidden_url = new_forbidden_url; hts_log_print(opt, LOG_DEBUG, "link forbidden because of MIME types restrictions: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); break; // exit loop } } @@ -4553,9 +4551,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Re-Add wiht correct type */ if (back_add_if_not_exists - (sback, opt, cache, adr, fil, save, parent_adr, parent_fil, + (sback, opt, cache, afs->af.adr, afs->af.fil, afs->save, parent_adr, parent_fil, 0) != -1) { - b = back_index(opt, sback, adr, fil, save); + b = back_index(opt, sback, afs->af.adr, afs->af.fil, afs->save); } if (b < 0) { printf @@ -4664,36 +4662,35 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Handle redirect */ if ((int) strnotempty(mov_url)) { // location existe! - char BIGSTK mov_adr[HTS_URLMAXSIZE * 2], - mov_fil[HTS_URLMAXSIZE * 2]; - mov_adr[0] = mov_fil[0] = '\0'; + lien_adrfil moved; + moved.adr[0] = moved.fil[0] = '\0'; // - if (ident_url_relatif(mov_url, adr, fil, mov_adr, mov_fil) >= 0) { + if (ident_url_relatif(mov_url, afs->af.adr, afs->af.fil, &moved) >= 0) { hts_log_print(opt, LOG_DEBUG, "Redirect while resolving type: %s%s -> %s%s", - adr, fil, mov_adr, mov_fil); + afs->af.adr, afs->af.fil, moved.adr, moved.fil); // si non bouclage sur soi même, ou si test avec GET non testé - if (strcmp(mov_adr, adr) != 0 || strcmp(mov_fil, fil) != 0) { + if (strcmp(moved.adr, afs->af.adr) != 0 || strcmp(moved.fil, afs->af.fil) != 0) { - // recopier former_adr/fil? - if ((former_adr) && (former_fil)) { - if (strnotempty(former_adr) == 0) { // Pas déja noté - strcpybuff(former_adr, adr); - strcpybuff(former_fil, fil); + // recopier former->adr/fil? + if (former != NULL) { + if (strnotempty(former->adr) == 0) { // Pas déja noté + strcpybuff(former->adr, afs->af.adr); + strcpybuff(former->fil, afs->af.fil); } } // check explicit forbidden - don't follow 3xx in this case { int set_prio_to = 0; - if (hts_acceptlink(opt, ptr, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ + if (hts_acceptlink(opt, ptr, moved.adr, moved.fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ /* Note: the cache 'cached_tests' system will remember this error, and we'll only issue ONE request */ *forbidden_url = 1; /* Forbidden! */ hts_log_print(opt, LOG_DEBUG, "link forbidden because of redirect beyond the mirror scope at %s%s -> %s%s", - adr, fil, mov_adr, mov_fil); - strcpybuff(adr, mov_adr); - strcpybuff(fil, mov_fil); + afs->af.adr, afs->af.fil, moved.adr, moved.fil); + strcpybuff(afs->af.adr, moved.adr); + strcpybuff(afs->af.fil, moved.fil); mov_url[0] = '\0'; break; } @@ -4701,45 +4698,44 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, // ftp: stop! if (strfield(mov_url, "ftp://")) { - strcpybuff(adr, mov_adr); - strcpybuff(fil, mov_fil); + strcpybuff(afs->af.adr, moved.adr); + strcpybuff(afs->af.fil, moved.fil); break; } /* ok, continue */ - strcpybuff(adr, mov_adr); - strcpybuff(fil, mov_fil); + strcpybuff(afs->af.adr, moved.adr); + strcpybuff(afs->af.fil, moved.fil); continue_loop = 1; /* Recompute filename for hash lookup */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, - heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens, - opt->lien_tot, sback, cache, hash, ptr, numero_passe, + afs->save[0] = '\0'; + url_savename(afs, former, heap(ptr)->adr, heap(ptr)->fil, + opt, sback, cache, hash, ptr, numero_passe, &delayed_back); } else { hts_log_print(opt, LOG_WARNING, "Unable to test %s%s (loop to same filename)", - adr, fil); + afs->af.adr, afs->af.fil); } // loop to same location } // ident_url_relatif() } // location } // redirect - hts_log_print(opt, LOG_DEBUG, "Final type for %s%s: '%s'", adr, fil, + hts_log_print(opt, LOG_DEBUG, "Final type for %s%s: '%s'", afs->af.adr, afs->af.fil, delayed_back.r.contenttype); /* If we are done, do additional checks with final type and authorizations */ if (!continue_loop) { /* Recompute filename with MIME type */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, - heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens, opt->lien_tot, + afs->save[0] = '\0'; + url_savename(afs, former, + heap(ptr)->adr, heap(ptr)->fil, opt, sback, cache, hash, ptr, numero_passe, &delayed_back); /* Recompute authorization with MIME type */ { int new_forbidden_url = - hts_acceptmime(opt, ptr, adr, fil, delayed_back.r.contenttype); + hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, delayed_back.r.contenttype); if (new_forbidden_url != -1) { hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d", *forbidden_url); @@ -4747,7 +4743,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, *forbidden_url = new_forbidden_url; hts_log_print(opt, LOG_DEBUG, "link forbidden because of MIME types restrictions: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); break; // exit loop } } @@ -4764,7 +4760,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, } } /* Patch destination filename for direct-to-disk mode */ - strcpybuff(back[b].url_sav, save); + strcpybuff(back[b].url_sav, afs->save); } } // b >= 0 @@ -4791,25 +4787,25 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, if (in_error == STATUSCODE_TOO_BIG) { hts_log_print(opt, LOG_INFO, "link not taken because of its size (%d bytes) at %s%s", - (int) in_error_size, adr, fil); + (int) in_error_size, afs->af.adr, afs->af.fil); } else { hts_log_print(opt, LOG_INFO, "link not taken because of error (%d '%s') at %s%s", - in_error, in_error_msg, adr, fil); + in_error, in_error_msg, afs->af.adr, afs->af.fil); } } } // error - if (*forbidden_url != 1 && IS_DELAYED_EXT(save)) { + if (*forbidden_url != 1 && IS_DELAYED_EXT(afs->save)) { *forbidden_url = 1; if (in_error) { hts_log_print(opt, LOG_WARNING, "link in error (%d '%s'), type unknown, aborting: %s%s", - in_error, in_error_msg, adr, fil); + in_error, in_error_msg, afs->af.adr, afs->af.fil); } else { hts_log_print(opt, LOG_WARNING, "link is probably looping, type unknown, aborting: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); } } diff --git a/src/htsparse.h b/src/htsparse.h index 2d1abd4..01eb953 100644 --- a/src/htsparse.h +++ b/src/htsparse.h @@ -135,9 +135,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str, to be ready for naming, having its header MIME type If the final URL is to be forbidden, sets 'forbidden_url' to the corresponding value */ -int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, - char *parent_adr, char *parent_fil, char *former_adr, - char *former_fil, int *forbidden_url); +int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs, + char *parent_adr, char *parent_fil, lien_adrfil *former, + int *forbidden_url); /* Context state */ diff --git a/src/htstools.c b/src/htstools.c index 1780a53..1422df4 100644 --- a/src/htstools.c +++ b/src/htstools.c @@ -121,13 +121,16 @@ static void unescapehttp(const char *s, String * tempo) { // 0 : ok // -1 : erreur // -2 : protocole non supporté (ftp) -int ident_url_relatif(const char *lien, const char *origin_adr, - const char *origin_fil, char *adr, char *fil) { +int ident_url_relatif(const char *lien, const char *origin_adr,
+ const char *origin_fil, + lien_adrfil* const adrfil) { int ok = 0; int scheme = 0; - adr[0] = '\0'; - fil[0] = '\0'; //effacer buffers + assertf(adrfil != NULL); + + adrfil->adr[0] = '\0'; + adrfil->fil[0] = '\0'; //effacer buffers // lien non vide! if (strnotempty(lien) == 0) @@ -149,13 +152,13 @@ int ident_url_relatif(const char *lien, const char *origin_adr, || (strfield(lien, "file://")) // scheme+// || (strncmp(lien, "//", 2) == 0) // // sans scheme (-> default) ) { - if (ident_url_absolute(lien, adr, fil) == -1) { + if (ident_url_absolute(lien, adrfil) == -1) { ok = -1; // erreur URL } } else if (strfield(lien, "ftp://")) { // Note: ftp:foobar.gif is not valid if (ftp_available()) { // ftp supporté - if (ident_url_absolute(lien, adr, fil) == -1) { + if (ident_url_absolute(lien, adrfil) == -1) { ok = -1; // erreur URL } } else { @@ -164,7 +167,7 @@ int ident_url_relatif(const char *lien, const char *origin_adr, #if HTS_USEOPENSSL } else if (strfield(lien, "https://")) { // Note: ftp:foobar.gif is not valid - if (ident_url_absolute(lien, adr, fil) == -1) { + if (ident_url_absolute(lien, adrfil) == -1) { ok = -1; // erreur URL } #endif @@ -191,30 +194,30 @@ int ident_url_relatif(const char *lien, const char *origin_adr, /* patch scheme if necessary */ if (strfield(lien, "http:")) { lien += 5; - strcpybuff(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http) + strcpybuff(adrfil->adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http) } else if (strfield(lien, "https:")) { lien += 6; - strcpybuff(adr, "https://"); // même adresse forcée en https - strcatbuff(adr, jump_protocol(origin_adr)); + strcpybuff(adrfil->adr, "https://"); // même adresse forcée en https + strcatbuff(adrfil->adr, jump_protocol(origin_adr)); } else if (strfield(lien, "ftp:")) { lien += 4; - strcpybuff(adr, "ftp://"); // même adresse forcée en ftp - strcatbuff(adr, jump_protocol(origin_adr)); + strcpybuff(adrfil->adr, "ftp://"); // même adresse forcée en ftp + strcatbuff(adrfil->adr, jump_protocol(origin_adr)); } else { - strcpybuff(adr, origin_adr); // même adresse ; et même éventuel protocole + strcpybuff(adrfil->adr, origin_adr); // même adresse ; et même éventuel protocole } if (*lien != '/') { // sinon c'est un lien absolu if (*lien == '\0') { - strcpybuff(fil, origin_fil); + strcpybuff(adrfil->fil, origin_fil); } else if (*lien == '?') { // example: a href="?page=2" char *a; - strcpybuff(fil, origin_fil); - a = strchr(fil, '?'); + strcpybuff(adrfil->fil, origin_fil); + a = strchr(adrfil->fil, '?'); if (a) *a = '\0'; - strcatbuff(fil, lien); + strcatbuff(adrfil->fil, lien); } else { const char *a = strchr(origin_fil, '?'); @@ -225,14 +228,14 @@ int ident_url_relatif(const char *lien, const char *origin_adr, if (*a == '/') { // ok on a un '/' if ((((int) (a - origin_fil)) + 1 + strlen(lien)) < HTS_URLMAXSIZE) { // copier chemin - strncpy(fil, origin_fil, ((int) (a - origin_fil)) + 1); - *(fil + ((int) (a - origin_fil)) + 1) = '\0'; + strncpy(adrfil->fil, origin_fil, ((int) (a - origin_fil)) + 1); + *(adrfil->fil + ((int) (a - origin_fil)) + 1) = '\0'; // copier chemin relatif - if (((int) strlen(fil) + (int) strlen(lien)) < HTS_URLMAXSIZE) { - strcatbuff(fil, lien + ((*lien == '/') ? 1 : 0)); + if (((int) strlen(adrfil->fil) + (int) strlen(lien)) < HTS_URLMAXSIZE) { + strcatbuff(adrfil->fil, lien + ((*lien == '/') ? 1 : 0)); // simplifier url pour les ../ - fil_simplifie(fil); + fil_simplifie(adrfil->fil); } else ok = -1; // erreur } else { // erreur @@ -244,8 +247,8 @@ int ident_url_relatif(const char *lien, const char *origin_adr, } } else { // chemin absolu // copier chemin directement - strcatbuff(fil, lien); - fil_simplifie(fil); + strcatbuff(adrfil->fil, lien); + fil_simplifie(adrfil->fil); } // *lien!='/' } else ok = -1; @@ -254,7 +257,7 @@ int ident_url_relatif(const char *lien, const char *origin_adr, // case insensitive pour adresse { - char *a = jump_identification(adr); + char *a = jump_identification(adrfil->adr); while(*a) { if ((*a >= 'A') && (*a <= 'Z')) @@ -264,8 +267,8 @@ int ident_url_relatif(const char *lien, const char *origin_adr, } // IDNA / RFC 3492 (Punycode) handling for HTTP(s) - if (!link_has_authority(adr) || strfield(adr, "https:")) { - char *const a = jump_identification(adr); + if (!link_has_authority(adrfil->adr) || strfield(adrfil->adr, "https:")) { + char *const a = jump_identification(adrfil->adr); // Non-ASCII characters (theorically forbidden, but browsers are lenient) if (!hts_isStringAscii(a, strlen(a))) { char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a)); diff --git a/src/htstools.h b/src/htstools.h index 7198fef..94f7c4a 100644 --- a/src/htstools.h +++ b/src/htstools.h @@ -47,11 +47,20 @@ typedef struct httrackp httrackp; typedef struct find_handle_struct find_handle_struct; typedef find_handle_struct *find_handle; #endif +#ifndef HTS_DEF_FWSTRUCT_lien_adrfil +#define HTS_DEF_FWSTRUCT_lien_adrfil +typedef struct lien_adrfil lien_adrfil; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave +#define HTS_DEF_FWSTRUCT_lien_adrfilsave +typedef struct lien_adrfilsave lien_adrfilsave; +#endif /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -int ident_url_relatif(const char *lien, const char *urladr, const char *urlfil, - char *adr, char *fil); +int ident_url_relatif(const char *lien, const char *origin_adr,
+ const char *origin_fil, + lien_adrfil* const adrfil); int lienrelatif(char *s, const char *link, const char *curr); int link_has_authority(const char *lien); int link_has_authorization(const char *lien); |