diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2014-05-29 15:42:53 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2014-05-29 15:42:53 +0000 |
commit | 4f5776a8fac840619f01a61a3655a4608e04d9fd (patch) | |
tree | b9bb0b6ce0eb39118ed5646f66687a616890248a /src/htsparse.c | |
parent | 8adeadbdb63e1cb8e4d62fe400ffefada9122d86 (diff) |
Big cleanup: introducing cleaner lien_adrfilsave and lien_adrfil structures holding address/uri or address/uri/filename rather than passing opaque char* of unknown size.
Diffstat (limited to 'src/htsparse.c')
-rw-r--r-- | src/htsparse.c | 812 |
1 files changed, 404 insertions, 408 deletions
diff --git a/src/htsparse.c b/src/htsparse.c index 820af38..6269450 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -78,12 +78,12 @@ Please visit our Website: http://www.httrack.com } \ } \ ht_len+=A; -#define HT_ADD_ADR \ +#define HT_add_adr \ if ((opt->getmode & 1) && (ptr>0)) { \ - size_t i = ((size_t) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ + size_t i = ((html - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ memcpy(ht_buff+j, lastsaved, i); \ ht_buff[j+i]='\0'; \ - lastsaved=adr; \ + lastsaved=html; \ } #define HT_ADD(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ @@ -283,7 +283,7 @@ Please visit our Website: http://www.httrack.com #define AUTOMATE_LOOKUP_CURRENT_ADR() do { \ if (inscript) { \ int new_state_pos; \ - new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \ + new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*html]; \ if (new_state_pos < 0) { \ new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \ } \ @@ -297,7 +297,7 @@ Please visit our Website: http://www.httrack.com #define INCREMENT_CURRENT_ADR(steps) do { \ int steps__ = (int) ( steps ); \ while(steps__ > 0) { \ - adr++; \ + html++; \ AUTOMATE_LOOKUP_CURRENT_ADR(); \ steps__ --; \ } \ @@ -322,13 +322,26 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } if (RUN_CALLBACK4(opt, check_html, r->adr, (int) r->size, urladr(), urlfil())) { - FILE *fp = NULL; // fichier écrit localement - char *adr = r->adr; // pointeur (on parcourt) - char *lastsaved; // adresse du dernier octet sauvé + 1 + FILE *fp = NULL; // fichier écrit localement + const char *html = r->adr; // pointeur (on parcours) + const char *lastsaved; // adresse du dernier octet sauvé + 1 hts_log_print(opt, LOG_DEBUG, "scanning file %s%s (%s)..", urladr(), urlfil(), savename()); + /* Hack to avoid NULL char problems with C syntax */ + /* Yes, some bogus HTML pages can embed null chars + and therefore can not be properly handled if this hack is not done + */ + if (r->adr != NULL) { + size_t i; + for(i = 0 ; i < (size_t) r->size ; i++) { + if (r->adr[i] == '\0') { + r->adr[i] = ' '; + } + } + } + // Indexing! #if HTS_MAKE_KEYWORD_INDEX if (opt->kindex) { @@ -392,7 +405,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { //int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";) // - const char *intag_start = adr; + const char *intag_start = html; const char *intag_name = NULL; const char *intag_startattr = NULL; int intag_start_valid = 0; @@ -405,7 +418,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int parent_relative = 0; // the parent is the base path (.js, .css..) HT_ADD_START; // débuter - lastsaved = adr; + lastsaved = html; /* Initialize script automate for comments, quotes.. */ memset(inscript_state, 0xff, sizeof(inscript_state)); @@ -491,15 +504,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { else if (compare_mime(opt, r->contenttype, str->url_file, "text/xml") != 0 || compare_mime(opt, r->contenttype, str->url_file, "application/xml") != 0) { - if (strstr(adr, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup + if (strstr(html, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup { /* RSS file */ inscript = intag = 0; intag_start_valid = 0; in_media = NULL; // regular XML } else { // cancel: write all - adr = r->adr + r->size; - HT_ADD_ADR; - lastsaved = adr; + html = r->adr + r->size; + HT_add_adr; + lastsaved = html; } } // Detect UTF8 format @@ -531,18 +544,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { error = 0; /* Break if we are done yet */ - if ((adr - r->adr) >= r->size) + if (html - r->adr >= r->size) break; - /* Hack to avoid NULL char problems with C syntax */ - /* Yes, some bogus HTML pages can embed null chars - and therefore can not be properly handled if this hack is not done - */ - if (!(*adr)) { - if (((int) (adr - r->adr)) < r->size) - *adr = ' '; - } - /* index.html built here */ @@ -555,18 +559,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!in_media) { if (opt->makeindex && (ptr > 0)) { if (opt->getmode & 1) { // autorisation d'écrire - p = strfield(adr, "title"); + p = strfield(html, "title"); if (p) { - if (*(adr - 1) == '/') + if (*(html - 1) == '/') p = 0; // /title } else { - if (strfield(adr, "/html")) + if (strfield(html, "/html")) p = -1; // noter, mais sans titre - else if (strfield(adr, "body")) + else if (strfield(html, "body")) p = -1; // noter, mais sans titre - else if (((int) (adr - r->adr)) >= (r->size - 1)) + else if (((int) (html - r->adr)) >= (r->size - 1)) p = -1; // noter, mais sans titre - else if ((int) (adr - r->adr) >= r->size - 2) // we got to hurry + else if ((int) (html - r->adr) >= r->size - 2) // we got to hurry p = -1; // xxc xxc xxc } } else @@ -603,7 +607,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { s[0] = '\0'; if (p > 0) { - a = strchr(adr, '>'); + a = strchr(html, '>'); if (a != NULL) { a++; while(is_space(*a)) @@ -669,7 +673,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { */ /* Parse */ - if ((*adr == '<') /* No starting tag */ + if ((*html == '<') /* No starting tag */ &&(!inscript) /* Not in (java)script */ &&(!incomment) /* Not in comment (<!--) */ &&(!in_media) /* Not in media */ @@ -678,8 +682,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { intag_ctype = 0; //parseall_incomment=0; //inquote=0; // effacer quote - intag_start = adr; - for(intag_name = adr + 1; is_realspace(*intag_name); intag_name++) ; + intag_start = html; + for(intag_name = html + 1; is_realspace(*intag_name); intag_name++) ; intag_start_valid = 1; codebase[0] = '\0'; // effacer éventuel codebase @@ -688,9 +692,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int pos; // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> - if ((pos = rech_tageq_all(adr, "http-equiv"))) { + if ((pos = rech_tageq_all(html, "http-equiv"))) { const char *token = NULL; - int len = rech_endtoken(adr + pos, &token); + int len = rech_endtoken(html + pos, &token); if (len > 0) { if (strfield(token, "content-type")) { @@ -711,11 +715,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // We are looking for the first head so that we can declare the HTTP-headers charset early // Emit as soon as we see the first <head>, <meta>, or <body> tag. // FIXME: we currently emit the tag BEFORE the <head> tag, actually, which is not clean - if ((p = strfield(adr, "<head>")) != 0 - || ((p = strfield(adr, "<head")) != 0 && isspace(adr[p])) - || (p = strfield(adr, "<body>")) != 0 - || ((p = strfield(adr, "<body")) != 0 && isspace(adr[p])) - || ((p = strfield(adr, "<meta")) != 0 && isspace(adr[p])) + if ((p = strfield(html, "<head>")) != 0 + || ((p = strfield(html, "<head")) != 0 && isspace(html[p])) + || (p = strfield(html, "<body>")) != 0 + || ((p = strfield(html, "<body")) != 0 && isspace(html[p])) + || ((p = strfield(html, "<meta")) != 0 && isspace(html[p])) ) { emited_footer++; } else { @@ -724,7 +728,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { break; case 1: // And the closing comment info tag - if ((p = strfield(adr, "</html") != 0)) { + if ((p = strfield(html, "</html") != 0)) { emited_footer++; } else { p = 0; @@ -768,15 +772,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } // éliminer les <!-- (commentaires) : intag dévalidé - if (*(adr + 1) == '!') - if (*(adr + 2) == '-') - if (*(adr + 3) == '-') { + if (*(html + 1) == '!') + if (*(html + 2) == '-') + if (*(html + 3) == '-') { intag = 0; incomment = 1; intag_start_valid = 0; } - } else if ((*adr == '>') /* ending tag */ + } else if ((*html == '>') /* ending tag */ &&((!inscript && !in_media) || (inscript_tag)) /* and in tag (or in script) */ ) { if (inscript_tag) { @@ -817,7 +821,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } else { /* end of comment? */ // vérifier fermeture correcte - if ((*(adr - 1) == '-') && (*(adr - 2) == '-')) { + if ((*(html - 1) == '-') && (*(html - 2) == '-')) { intag = 0; incomment = 0; intag_start_valid = 0; @@ -830,7 +834,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { <!-- foo > example <!-- bar > is sometimes accepted by browsers when no --> is used somewhere else.. darn those browsers are dirty */ - if (!strstr(adr, "-->")) { + if (!strstr(html, "-->")) { intag = 0; incomment = 0; intag_start_valid = 0; @@ -850,18 +854,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int p_searchMETAURL = 0; // chercher ..URL=<url> int add_class = 0; // ajouter .class int add_class_dots_to_patch = 0; // number of '.' in code="x.y.z<realname>" - char *p_flush = NULL; + const char *p_flush = NULL; // ------------------------------------------------------------ // parsing évolé // ------------------------------------------------------------ - if (((isalpha((unsigned char) *adr)) || (*adr == '/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester.. + if (((isalpha((unsigned char) *html)) || (*html == '/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester.. /* caractère de terminaison pour "miniparsing" javascript=.. ? (ex: <a href="javascript:()" action="foo"> ) */ if (inscript_tag) { if (inscript_tag_lastc) { - if (*adr == inscript_tag_lastc) { + if (*html == inscript_tag_lastc) { /* sortir */ inscript_tag = inscript = 0; incomment = 0; @@ -888,9 +892,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { p = 0; valid_p = 1; } else if (strcmp(in_media, "AAM") == 0) { // AAM - if (is_space((unsigned char) adr[0]) - && !is_space((unsigned char) adr[1])) { - char *a = adr + 1; + if (is_space((unsigned char) html[0]) + && !is_space((unsigned char) html[1])) { + const char *a = html + 1; int n = 0; int ok = 0; int dot = 0; @@ -909,7 +913,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { tmp[0] = '\0'; strncat(tmp, a + dot + 1, n - dot - 1); if (is_knowntype(opt, tmp) || ishtml_ext(tmp) != -1) { - adr++; + html++; p = 0; valid_p = 1; unquoted_script = 1; @@ -926,21 +930,21 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // note: inscript==1 donc on sautera après les \" if (inscript) { if (inscriptgen) { // on est déja dans un objet générant.. - if (*adr == scriptgen_q) { // fermeture des " ou ' - if (*(adr - 1) != '\\') { // non + if (*html == scriptgen_q) { // fermeture des " ou ' + if (*(html - 1) != '\\') { // non inscriptgen = 0; // ok parsing terminé } } } else { - char *a = NULL; + const char *a = NULL; char check_this_fking_line = 0; // parsing code javascript.. char must_be_terminated = 0; // caractère obligatoire de terminaison! int token_size; - if (!(token_size = strfield(adr, ".writeln"))) // détection ...objet.write[ln]("code html")... - token_size = strfield(adr, ".write"); + if (!(token_size = strfield(html, ".writeln"))) // détection ...objet.write[ln]("code html")... + token_size = strfield(html, ".write"); if (token_size) { - a = adr + token_size; + a = html + token_size; while(is_realspace(*a)) a++; // sauter espaces if (*a == '(') { // début parenthèse @@ -966,7 +970,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { while(is_realspace(*a)) a++; if ((*a == '\'') || (*a == '"')) { // départ de '' ou "" - char *b; + const char *b; scriptgen_q = *a; // quote b = a + 1; // départ de la chaîne @@ -997,10 +1001,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // NOTE: le code javascript autogénéré n'est pas pris en compte!! // (et ne marche pas dans 50% des cas de toute facon!) if (check_this_fking_line == 1) { - p = (int) (b - adr); // calculer saut! + p = (int) (b - html); // calculer saut! } else { inscriptgen = 1; // SCRIPTGEN actif - adr = b; // jump + html = b; // jump } if ((opt->debug > 1) && (opt->log != NULL)) { @@ -1029,9 +1033,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!p) { // si dans un tag, et pas dans un script - sauf si on analyse un obj.write(".. if ((intag && (!inscript)) || inscriptgen) { - if ((*(adr - 1) == '<') || (is_space(*(adr - 1)))) { // <tag < tag etc + if ((*(html - 1) == '<') || (is_space(*(html - 1)))) { // <tag < tag etc // <A HREF=.. pour les liens HTML - p = rech_tageq(adr, "href"); + p = rech_tageq(html, "href"); if (p) { // href.. tester si c'est une bas href! if ((intag_start_valid) && check_tag(intag_start, "base")) { // oui! // ** note: base href et codebase ne font pas bon ménage.. @@ -1044,7 +1048,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detect[i]))) { - p = rech_tageq(adr, hts_detect[i]); + p = rech_tageq(html, hts_detect[i]); if (p) { /* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */ if (strcmp(hts_detect[i], "archive") == 0) { @@ -1060,7 +1064,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detectbeg[i]))) { - p = rech_tageqbegdigits(adr, hts_detectbeg[i]); + p = rech_tageqbegdigits(html, hts_detectbeg[i]); i++; } } @@ -1070,17 +1074,17 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detectURL[i]))) { - p = rech_tageq(adr, hts_detectURL[i]); + p = rech_tageq(html, hts_detectURL[i]); i++; } if (p) { if (intag_ctype == 1) { p = 0; #if 0 - //if ((pos=rech_tageq(adr, "content"))) { + //if ((pos=rech_tageq(html, "content"))) { char temp[256]; char *token = NULL; - int len = rech_endtoken(adr + pos, &token); + int len = rech_endtoken(html + pos, &token); if (len > 0 && len < sizeof(temp) - 2) { char *chpos; @@ -1113,7 +1117,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int i = 0; while((p == 0) && (strnotempty(hts_detectandleave[i]))) { - p = rech_tageq(adr, hts_detectandleave[i]); + p = rech_tageq(html, hts_detectandleave[i]); i++; } if (p) @@ -1127,20 +1131,20 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* détection onLoad etc */ while((p == 0) && (strnotempty(hts_detect_js[i]))) { - p = rech_tageq(adr, hts_detect_js[i]); + p = rech_tageq(html, hts_detect_js[i]); i++; } /* non détecté - détecter également les onXxxxx= */ if (p == 0) { - if ((*adr == 'o') && (*(adr + 1) == 'n') - && isUpperLetter(*(adr + 2))) { + if ((*html == 'o') && (*(html + 1) == 'n') + && isUpperLetter(*(html + 2))) { p = 0; - while(isalpha((unsigned char) adr[p]) && (p < 64)) + while(isalpha((unsigned char) html[p]) && (p < 64)) p++; if (p < 64) { - while(is_space(adr[p])) + while(is_space(html[p])) p++; - if (adr[p] == '=') + if (html[p] == '=') p++; else p = 0; @@ -1150,8 +1154,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } /* OK, événement repéré */ if (p) { - inscript_tag_lastc = *(adr + p); /* à attendre à la fin */ - adr += p /*+ 1*/; /* saut */ + inscript_tag_lastc = *(html + p); /* à attendre à la fin */ + html += p /*+ 1*/; /* saut */ /* On est désormais dans du code javascript */ @@ -1166,7 +1170,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire] if (p == 0) { - p = rech_tageq(adr, "code"); + p = rech_tageq(html, "code"); if (p) { if ((intag_start_valid) && check_tag(intag_start, "applet")) { // dans un <applet ! p_type = -1; // juste le nom de fichier+dossier, écire avant codebase @@ -1177,9 +1181,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // pas très propre mais c'est ce qu'il y a de plus simple à faire!! { - char *a; + const char *a; - a = adr; + a = html; while((*a) && (*a != '>') && (!rech_tageq(a, "codebase"))) a++; @@ -1187,20 +1191,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char *b; b = strchr(a, '>'); - if (b) { - if (((int) (b - adr)) < 1000) { // au total < 1Ko + if (b != NULL) { + if (b - html < 1000) { // au total < 1Ko char BIGSTK tempo[HTS_URLMAXSIZE * 2]; + const size_t offset = html - r->adr; + char *const modify = &r->adr[offset]; + assertf(modify == html); tempo[0] = '\0'; - strncatbuff(tempo, a, (int) (b - a)); + strncatbuff(tempo, a, b - a); strcatbuff(tempo, " "); - strncatbuff(tempo, adr, (int) (a - adr - 1)); + strncatbuff(tempo, html, a - html - 1); // éventuellement remplire par des espaces pour avoir juste la taille - while((int) strlen(tempo) < ((int) (b - adr))) + while(strlen(tempo) < (size_t) (b - html)) strcatbuff(tempo, " "); // pas d'erreur? - if ((int) strlen(tempo) == ((int) (b - adr))) { - strncpy(adr, tempo, strlen(tempo)); // PAS d'octet nul à la fin! + if (strlen(tempo) == b - html) { + strncpy(modify, tempo, strlen(tempo)); // PAS d'octet nul à la fin! p = 0; // DEVALIDER!! p_type = 0; add_class = 0; @@ -1215,7 +1222,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // liens à patcher mais pas à charger (ex: codebase) if (p == 0) { // note: si non chargé (ex: ignorer .class) patché tout de même - p = rech_tageq(adr, "codebase"); + p = rech_tageq(html, "codebase"); if (p) { if ((intag_start_valid) && check_tag(intag_start, "applet")) { // dans un <applet ! p_type = -2; @@ -1229,18 +1236,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (opt->robots) { if ((intag_start_valid) && check_tag(intag_start, "meta")) { - if (rech_tageq(adr, "name")) { // name=robots.txt + if (rech_tageq(html, "name")) { // name=robots.txt char tempo[1100]; char *a; tempo[0] = '\0'; - a = strchr(adr, '>'); + a = strchr(html, '>'); #if DEBUG_ROBOTS printf("robots.txt meta tag detected\n"); #endif if (a) { - if (((int) (a - adr)) < 999) { - strncatbuff(tempo, adr, (int) (a - adr)); + if (((int) (a - html)) < 999) { + strncatbuff(tempo, html, (int) (a - html)); if (strstrcase(tempo, "content")) { if (strstrcase(tempo, "robots")) { if (strstrcase(tempo, "nofollow")) { @@ -1265,7 +1272,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // entrée dans une applet javascript /*if (!inscript) { // sinon on est dans un obj.write(".. if (p==0) - if (rech_sampletag(adr,"script")) + if (rech_sampletag(html,"script")) if (check_tag(intag_start,"script")) { inscript=1; } @@ -1280,13 +1287,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { #if 0 /* Check // javascript comments */ - if (*adr == 10 || *adr == 13) { + if (*html == 10 || *html == 13) { inscript_check_comments = 1; inscript_in_comments = 0; } else if (inscript_check_comments) { - if (!is_realspace(*adr)) { + if (!is_realspace(*html)) { inscript_check_comments = 0; - if (adr[0] == '/' && adr[1] == '/') { + if (html[0] == '/' && html[1] == '/') { inscript_in_comments = 1; } } @@ -1295,15 +1302,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* Parse */ assertf(inscript_name != NULL); - if (*adr == '/' + if (*html == '/' && - ((strfield(adr, "/script") + ((strfield(html, "/script") && strfield(inscript_name, "script")) - || (strfield(adr, "/style") + || (strfield(html, "/style") && strfield(inscript_name, "style")) ) && inscript_locked == 0) { - char *a = adr; + const char *a = html; //while(is_realspace(*(--a))); while(is_realspace(*a)) @@ -1336,54 +1343,54 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->parsejava & HTSPARSE_NO_JAVASCRIPT) == 0) { int nc; - nc = strfield(adr, ".src"); // nom.src="image"; - if (!nc && inscript_tag && inscript_tag_lastc == *(adr - 1)) - nc = strfield(adr, "src"); // onXXX='src="image";' + nc = strfield(html, ".src"); // nom.src="image"; + if (!nc && inscript_tag && inscript_tag_lastc == *(html - 1)) + nc = strfield(html, "src"); // onXXX='src="image";' if (!nc) - nc = strfield(adr, ".location"); // document.location="doc" + nc = strfield(html, ".location"); // document.location="doc" if (!nc) - nc = strfield(adr, ":location"); // javascript:location="doc" + nc = strfield(html, ":location"); // javascript:location="doc" if (!nc) { // location="doc" - if ((nc = strfield(adr, "location")) - && !isspace(*(adr - 1)) + if ((nc = strfield(html, "location")) + && !isspace(*(html - 1)) ) nc = 0; } if (!nc) - nc = strfield(adr, ".href"); // document.location="doc" + nc = strfield(html, ".href"); // document.location="doc" if (!nc) - if ((nc = strfield(adr, ".open"))) { // window.open("doc",.. + if ((nc = strfield(html, ".open"))) { // window.open("doc",.. expected = '('; // parenthèse expected_end = "),"; // fin: virgule ou parenthèse ensure_not_mime = 1; //* ensure the url is not a mime type */ } if (!nc) - if ((nc = strfield(adr, ".replace"))) { // window.replace("url") + if ((nc = strfield(html, ".replace"))) { // window.replace("url") expected = '('; // parenthèse expected_end = ")"; // fin: parenthèse } if (!nc) - if ((nc = strfield(adr, ".link"))) { // window.link("url") + if ((nc = strfield(html, ".link"))) { // window.link("url") expected = '('; // parenthèse expected_end = ")"; // fin: parenthèse } - if (!nc && (nc = strfield(adr, "url")) && (!isalnum(*(adr - 1))) && *(adr - 1) != '_') { // url(url) + if (!nc && (nc = strfield(html, "url")) && (!isalnum(*(html - 1))) && *(html - 1) != '_') { // url(url) expected = '('; // parenthèse expected_end = ")"; // fin: parenthèse can_avoid_quotes = 1; quotes_replacement = ')'; } if (!nc) - if ((nc = strfield(adr, "import"))) { // import "url" - if (is_space(*(adr + nc))) { + if ((nc = strfield(html, "import"))) { // import "url" + if (is_space(*(html + nc))) { expected = 0; // no char expected } else nc = 0; } if (nc) { - char *a; + const char *a; - a = adr + nc; + a = html + nc; while(is_realspace(*a)) a++; if ((*a == expected) || (!expected)) { @@ -1392,7 +1399,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { while(is_realspace(*a)) a++; if ((*a == 34) || (*a == '\'') || (can_avoid_quotes)) { - char *b, *c; + const char *b, *c; int ndelim = 1; if ((*a == 34) || (*a == '\'')) @@ -1465,7 +1472,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { "link detected in javascript: %s", str); } - p = (int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER + p = (int) (a - html); // p non nul: TRAITER CHAINE COMME FICHIER if (can_avoid_quotes) { ending_p = quotes_replacement; } @@ -1487,14 +1494,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { //p=rech_tageq(adr,"primary"); // lien primaire, yeah p = 0; // No stupid tag anymore, raw link valid_p = 1; // Valid even if p==0 - while((adr[p] == '\r') || (adr[p] == '\n')) + while((html[p] == '\r') || (html[p] == '\n')) p++; //can_avoid_quotes=1; ending_p = '\r'; } - } else if (isspace((unsigned char) *adr)) { - intag_startattr = adr + 1; // attribute in tag (for dirty parsing) + } else if (isspace((unsigned char) *html)) { + intag_startattr = html + 1; // attribute in tag (for dirty parsing) } // ------------------------------------------------------------ @@ -1504,18 +1511,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // ------------------------------------------------------------ if (opt->parseall && (opt->parsejava & HTSPARSE_NO_AGGRESSIVE) == 0 && (ptr > 0) && (!in_media) /* && (!inscript_in_comments) */ ) { // option parsing "brut" //int incomment_justquit=0; - if (!is_realspace(*adr)) { + if (!is_realspace(*html)) { int noparse = 0; // Gestion des /* */ #if 0 if (inscript) { if (parseall_incomment) { - if ((*adr == '/') && (*(adr - 1) == '*')) + if ((*html == '/') && (*(html - 1) == '*')) parseall_incomment = 0; incomment_justquit = 1; // ne pas noter dernier caractère } else { - if ((*adr == '/') && (*(adr + 1) == '*')) + if ((*html == '/') && (*(html + 1) == '*')) parseall_incomment = 1; } } else @@ -1536,12 +1543,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!noparse) { //if ((!parseall_incomment) && (!noparse)) { if (!p) { // non déja trouvé - if (adr != r->adr) { // >1 caractère + if (html != r->adr) { // >1 caractère // scanner les chaines - if ((*adr == '\"') || (*adr == '\'')) { // "xx.gif" 'xx.gif' + if ((*html == '\"') || (*html == '\'')) { // "xx.gif" 'xx.gif' if (strchr("=(,", parseall_lastc)) { // exemple: a="img.gif.. (handles comments) - char *a = adr; - char stop = *adr; // " ou ' + const char *a = html; + char stop = *html; // " ou ' int count = 0; // sauter caractères @@ -1574,7 +1581,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { tempo[0] = '\0'; type[0] = '\0'; // - strncatbuff(tempo, adr + 1, count); + strncatbuff(tempo, html + 1, count); // if ((!strchr(tempo, ' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript) int invalid_url = 0; @@ -1702,7 +1709,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // plus dans un commentaire if (inscript_state_pos == INSCRIPT_START && inscript_state_pos_prev == INSCRIPT_START) { - parseall_lastc = *adr; // caractère avant le prochain + parseall_lastc = *html; // caractère avant le prochain } } // if realspace @@ -1714,14 +1721,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // if ((p > 0) || (valid_p)) { // on a repéré un lien //int lien_valide=0; - char *eadr = NULL; /* fin de l'URL */ + const char *eadr = NULL; /* fin de l'URL */ //char* quote_adr=NULL; /* adresse du ? dans l'adresse */ int ok = 1; char quote = '\0'; int quoteinscript = 0; int noquote = 0; - char *tag_attr_start = adr; + const char *tag_attr_start = html; // si nofollow ou un stop a été déclenché, réécrire tous les liens en externe if ((nofollow) @@ -1734,27 +1741,27 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // écrire codebase avant, flusher avant code if ((p_type == -1) || (p_type == -2)) { if ((opt->getmode & 1) && (ptr > 0)) { - HT_ADD_ADR; // refresh + HT_add_adr; // refresh } - lastsaved = adr; // dernier écrit+1 + lastsaved = html; // dernier écrit+1 } // sauter espaces // adr+=p; INCREMENT_CURRENT_ADR(p); - while((is_space(*adr) - || (inscriptgen && adr[0] == '\\' && is_space(adr[1]) + while((is_space(*html) + || (inscriptgen && html[0] == '\\' && is_space(html[1]) ) ) && quote == '\0') { if (!quote) - if ((*adr == '\"') || (*adr == '\'')) { - quote = *adr; // on doit attendre cela à la fin - if (inscriptgen && *(adr - 1) == '\\') { + if ((*html == '\"') || (*html == '\'')) { + quote = *html; // on doit attendre cela à la fin + if (inscriptgen && *(html - 1) == '\\') { quoteinscript = 1; /* will wait for \" */ } } // puis quitter - // adr++; // sauter les espaces, "" et cie + // html++; // sauter les espaces, "" et cie INCREMENT_CURRENT_ADR(1); } @@ -1767,9 +1774,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // sauter éventuel \" ou \' javascript if (inscript) { // on est dans un obj.write(".. - if (*adr == '\\') { - if ((*(adr + 1) == '\'') || (*(adr + 1) == '"')) { // \" ou \' - // adr+=2; // sauter + if (*html == '\\') { + if ((*(html + 1) == '\'') || (*(html + 1) == '"')) { // \" ou \' + // html+=2; // sauter INCREMENT_CURRENT_ADR(2); } } @@ -1778,19 +1785,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (p_searchMETAURL) { int l = 0; - while((adr + l + 4 < r->adr + r->size) - && (!strfield(adr + l, "URL=")) + while((html + l + 4 < r->adr + r->size) + && (!strfield(html + l, "URL=")) && (l < 128)) l++; - if (!strfield(adr + l, "URL=")) + if (!strfield(html + l, "URL=")) ok = -1; else - adr += (l + 4); + html += (l + 4); } /* éviter les javascript:document.location=.. : les parser, plutôt */ if (ok != -1) { - if (strfield(adr, "javascript:") + if (strfield(html, "javascript:") && !inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */ ) { ok = -1; @@ -1808,22 +1815,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } if (p_type == 1) { - if (*adr == '#') { - adr++; // sauter # pour usemap etc + if (*html == '#') { + html++; // sauter # pour usemap etc } } - eadr = adr; + eadr = html; // ne pas flusher après code si on doit écrire le codebase avant! if ((p_type != -1) && (p_type != 2) && (p_type != -2)) { if ((opt->getmode & 1) && (ptr > 0)) { - HT_ADD_ADR; // refresh + HT_add_adr; // refresh } - lastsaved = adr; // dernier écrit+1 + lastsaved = html; // dernier écrit+1 // après on écrira soit les données initiales, // soir une URL/lien modifié! } else if (p_type == -1) - p_flush = adr; // flusher jusqu'à adr ensuite + p_flush = html; // flusher jusqu'à adr ensuite if (ok != -1) { // continuer // découper le lien @@ -1832,7 +1839,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (!is_space(*eadr)) ok = 0; } - if ((((int) (eadr - adr))) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path) + if ((((int) (eadr - html))) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path) ok = -1; // ne pas traiter ce lien if (ok > 0) { @@ -1877,10 +1884,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } while(ok == 1); // Empty link detected - if ((((int) (eadr - adr))) <= 1) { // link empty + if ((((int) (eadr - html))) <= 1) { // link empty ok = -1; // No - if (*adr != '#') { // Not empty+unique # - if ((((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr) + if (*html != '#') { // Not empty+unique # + if ((((int) (eadr - html)) == 1)) { // 1=link empty with delim (end_adr-start_adr) if (quote) { if ((opt->getmode & 1) && (ptr > 0)) { HT_ADD("#"); // We add this for a <href=""> @@ -1890,7 +1897,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } } // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag - if (strfield(adr, "(Empty Reference!)")) { + if (strfield(html, "(Empty Reference!)")) { ok = -1; // No } @@ -1900,13 +1907,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char BIGSTK lien[HTS_URLMAXSIZE * 2]; int meme_adresse = 0; // 0 par défaut pour primary - //char *copie_de_adr=adr; + //char *copie_de_adr=html; //char* p; // construire lien (découpage) - if ((((int) (eadr - adr)) - 1) < HTS_URLMAXSIZE) { // pas trop long? - strncpy(lien, adr, ((int) (eadr - adr)) - 1); - *(lien + (((int) (eadr - adr))) - 1) = '\0'; + if ((((int) (eadr - html)) - 1) < HTS_URLMAXSIZE) { // pas trop long? + strncpy(lien, html, ((int) (eadr - html)) - 1); + *(lien + (((int) (eadr - html))) - 1) = '\0'; //printf("link: %s\n",lien); // supprimer les espaces while((lien[strlen(lien) - 1] == ' ') && (strnotempty(lien))) @@ -1918,15 +1925,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // ------------------------------------------------------ // Lien repéré et extrait if (strnotempty(lien) > 0) { // construction du lien - char BIGSTK adr[HTS_URLMAXSIZE * 2], fil[HTS_URLMAXSIZE * 2]; // ATTENTION adr cache le "vrai" adr + lien_adrfilsave afs; int forbidden_url = -1; // lien non interdit (mais non autorisé..) int just_test_it = 0; // mode de test des liens int set_prio_to = 0; // pour capture de page isolée int import_done = 0; // lien importé (ne pas scanner ensuite *à priori*) // - adr[0] = '\0'; - fil[0] = '\0'; + afs.af.adr[0] = '\0'; + afs.af.fil[0] = '\0'; + afs.save[0] = '\0'; // // 0: autorisé // 1: interdit (patcher tout de même adresse) @@ -2207,7 +2215,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // Vérifier les codebase=applet (au lieu de applet/) if (p_type == -2) { // codebase if (strnotempty(lien)) { - if (fil[strlen(lien) - 1] != '/') { // pas répertoire + if (lien[strlen(lien) - 1] != '/') { // pas répertoire strcatbuff(lien, "/"); } } @@ -2231,17 +2239,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // copier nom host si besoin est if (!link_has_authority(lien)) { // pas de http:// - char BIGSTK adr2[HTS_URLMAXSIZE * 2], fil2[HTS_URLMAXSIZE * 2]; // ** euh ident_url_relatif?? + lien_adrfil af2; // ** euh ident_url_relatif?? - if (ident_url_relatif(lien, urladr(), urlfil(), adr2, fil2) < - 0) { + if (ident_url_relatif(lien, urladr(), urlfil(), &af2) < 0) { error = 1; } else { strcpybuff(lien, "http://"); - strcatbuff(lien, adr2); - if (*fil2 != '/') + strcatbuff(lien, af2.adr); + if (*af2.fil != '/') strcatbuff(lien, "/"); - strcatbuff(lien, fil2); + strcatbuff(lien, af2.fil); { char *a; @@ -2321,7 +2328,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { HTS_URLMAXSIZE) { // mailto: and co: do NOT add base if (ident_url_relatif - (lien, urladr(), urlfil(), adr, fil) >= 0) { + (lien, urladr(), urlfil(), &afs.af) >= 0) { char BIGSTK tempo[HTS_URLMAXSIZE * 2]; // base est absolue @@ -2342,19 +2349,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { lien); } } else { - char BIGSTK badr[HTS_URLMAXSIZE * 2], - bfil[HTS_URLMAXSIZE * 2]; - if (ident_url_absolute(_base, badr, bfil) >= 0) { - if (((int) strlen(badr) + (int) strlen(lien)) < + lien_adrfil baseaf; + if (ident_url_absolute(_base, &baseaf) >= 0) { + if (((int) strlen(baseaf.adr) + (int) strlen(lien)) < HTS_URLMAXSIZE) { char BIGSTK tempo[HTS_URLMAXSIZE * 2]; // base est absolue tempo[0] = '\0'; - if (!link_has_authority(badr)) { + if (!link_has_authority(baseaf.adr)) { strcatbuff(tempo, "http://"); } - strcatbuff(tempo, badr); + strcatbuff(tempo, baseaf.adr); strcatbuff(tempo, lien); strcpybuff(lien, tempo); // patcher en considérant base @@ -2384,8 +2390,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { relativeurladr(), relativeurlfil()); if ((reponse = ident_url_relatif(lien, relativeurladr(), relativeurlfil(), - adr, fil)) < 0) { - adr[0] = '\0'; // erreur + &afs.af)) < 0) { + afs.af.adr[0] = '\0'; // erreur if (reponse == -2) { hts_log_print(opt, LOG_WARNING, "Link %s not caught (unknown protocol)", @@ -2398,14 +2404,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { hts_log_print(opt, LOG_DEBUG, "built relative link %s with %s%s -> %s%s", - lien, relativeurladr(), relativeurlfil(), adr, - fil); + lien, relativeurladr(), relativeurlfil(), afs.af.adr, + afs.af.fil); } } else { hts_log_print(opt, LOG_DEBUG, "link %s not build, error detected before", lien); - adr[0] = '\0'; + afs.af.adr[0] = '\0'; } // Le lien doit juste être réécrit, mais ne doit pas générer un lien @@ -2413,7 +2419,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (p_nocatch) { forbidden_url = 1; // interdire récupération du lien hts_log_print(opt, LOG_DEBUG, "link forced external at %s%s", - adr, fil); + afs.af.adr, afs.af.fil); } // Tester si un lien doit être accepté ou refusé (wizard) // forbidden_url=1 : lien refusé @@ -2421,11 +2427,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? if ((p_type != 2) && (p_type != -2)) { // tester autorisations? if (!p_nocatch) { - if (adr[0] != '\0') { + if (afs.af.adr[0] != '\0') { hts_log_print(opt, LOG_DEBUG, - "wizard link test at %s%s..", adr, fil); + "wizard link test at %s%s..", afs.af.adr, afs.af.fil); forbidden_url = - hts_acceptlink(opt, ptr, adr, fil, + hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil, intag_name ? intag_name : NULL, intag_name ? tag_attr_start : NULL, &set_prio_to, &just_test_it); @@ -2437,25 +2443,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // calculer meme_adresse meme_adresse = - strfield2(jump_identification(adr), + strfield2(jump_identification(afs.af.adr), jump_identification(urladr())); // Début partie sauvegarde // ici on forme le nom du fichier à sauver, et on patche l'URL - if (adr[0] != '\0') { + if (afs.af.adr[0] != '\0') { // savename(): simplifier les ../ et autres joyeusetés - char BIGSTK save[HTS_URLMAXSIZE * 2]; int r_sv = 0; // En cas de moved, adresse première - char BIGSTK former_adr[HTS_URLMAXSIZE * 2]; - char BIGSTK former_fil[HTS_URLMAXSIZE * 2]; + lien_adrfil former; // - save[0] = '\0'; - former_adr[0] = '\0'; - former_fil[0] = '\0'; + afs.save[0] = '\0'; + former.adr[0] = '\0'; + former.fil[0] = '\0'; // // nom du chemin à sauver si on doit le calculer @@ -2471,14 +2475,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* Calc */ last_adr[0] = '\0'; //char last_fil[HTS_URLMAXSIZE*2]=""; - strcpybuff(last_adr, adr); // ancienne adresse + strcpybuff(last_adr, afs.af.adr); // ancienne adresse //strcpybuff(last_fil,fil); // ancien chemin r_sv = - url_savename(adr, fil, save, former_adr, former_fil, - heap(ptr)->adr, heap(ptr)->fil, opt, - opt->liens, opt->lien_tot, sback, cache, hash, ptr, + url_savename(&afs, &former, heap(ptr)->adr, heap(ptr)->fil, opt, + sback, cache, hash, ptr, numero_passe, NULL); - if (strcmp(jump_identification(last_adr), jump_identification(adr)) != 0) { // a changé + if (strcmp(jump_identification(last_adr), + jump_identification(afs.af.adr)) != 0) { // a changé // 2e test si moved @@ -2487,12 +2491,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // forbidden_url=0 : lien accepté if ((ptr > 0) && (p_type != 2) && (p_type != -2)) { // tester autorisations? if (!p_nocatch) { - if (adr[0] != '\0') { + if (afs.af.adr[0] != '\0') { hts_log_print(opt, LOG_DEBUG, "wizard moved link retest at %s%s..", - adr, fil); + afs.af.adr, afs.af.fil); forbidden_url = - hts_acceptlink(opt, ptr, adr, fil, + hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil, intag_name ? intag_name : NULL, intag_name ? tag_attr_start : NULL, &set_prio_to, @@ -2507,19 +2511,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { meme_adresse = 0; // on a changé } } else { - strcpybuff(save, ""); // dummy + strcpybuff(afs.save, ""); // dummy } } // resolve unresolved type if (r_sv != -1 && p_type != 2 && p_type != -2 - && forbidden_url == 0 && IS_DELAYED_EXT(save) + && forbidden_url == 0 && IS_DELAYED_EXT(afs.save) ) { time_t t; // pas d'erreur, on continue r_sv = - hts_wait_delayed(str, adr, fil, save, heap(ptr)->adr, - heap(ptr)->fil, former_adr, former_fil, + hts_wait_delayed(str, &afs, heap(ptr)->adr, + heap(ptr)->fil, &former, &forbidden_url); /* User interaction, because hts_wait_delayed can be slow.. (3.43) */ @@ -2541,22 +2545,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (forbidden_url != 1) { // le lien va être chargé if ((p_type == 2) || (p_type == -2)) { // base href ou codebase, pas un lien hts_log_print(opt, LOG_DEBUG, "Code/Codebase: %s%s", - adr, fil); + afs.af.adr, afs.af.fil); } else if ((opt->getmode & 4) == 0) { hts_log_print(opt, LOG_DEBUG, "Record: %s%s -> %s", - adr, fil, save); + afs.af.adr, afs.af.fil, afs.save); } else { - if (!ishtml(opt, fil)) + if (!ishtml(opt, afs.af.fil)) hts_log_print(opt, LOG_DEBUG, - "Record after: %s%s -> %s", adr, fil, - save); + "Record after: %s%s -> %s", afs.af.adr, afs.af.fil, + afs.save); else hts_log_print(opt, LOG_DEBUG, "Record: %s%s -> %s", - adr, fil, save); + afs.af.adr, afs.af.fil, afs.save); } } else - hts_log_print(opt, LOG_DEBUG, "External: %s%s", adr, - fil); + hts_log_print(opt, LOG_DEBUG, "External: %s%s", afs.af.adr, + afs.af.fil); } /* FIN log */ @@ -2567,29 +2571,29 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { /* */ else if (opt->urlmode == 0) { // URL absolue dans tous les cas if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + strncatbuff(tmp, afs.af.adr, aut - afs.af.adr); // scheme HT_ADD(tmp); // Protocol HT_ADD("//"); } } if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) /* */ @@ -2601,34 +2605,34 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->getmode & 1) && (ptr > 0)) { if (p_type != -1) { // pas que le nom de fichier (pas classe java) if (!opt->external) { - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr)); // scheme HT_ADD(tmp); // Protocol HT_ADD("//"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } } // @@ -2642,9 +2646,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { int cat_data_len = 0; // ajouter lien external - switch ((link_has_authority(adr)) ? 1 - : ((fil[strlen(fil) - 1] == - '/') ? 1 : (ishtml(opt, fil)))) { + switch ((link_has_authority(afs.af.adr)) ? 1 + : ((afs.af.fil[strlen(afs.af.fil) - 1] == + '/') ? 1 : (ishtml(opt, afs.af.fil)))) { case 1: case -2: // html ou répertoire if (opt->getmode & 1) { // sauver html @@ -2659,15 +2663,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { default: // inconnu // asp, cgi.. if ((strfield2 - (fil + max(0, (int) strlen(fil) - 4), + (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4), ".gif")) || (strfield2 - (fil + max(0, (int) strlen(fil) - 4), + (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4), ".jpg")) || (strfield2 - (fil + max(0, (int) strlen(fil) - 4), + (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4), ".xbm")) /*|| (ishtml(opt,fil)!=0) */ ) { @@ -2690,13 +2694,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // html,gif if (patch_it) { - char BIGSTK save[HTS_URLMAXSIZE * 2]; + char BIGSTK save[HTS_URLMAXSIZE * 2];
char BIGSTK tempo[HTS_URLMAXSIZE * 2]; strcpybuff(save, StringBuff(opt->path_html_utf8)); strcatbuff(save, cat_name); - if (lienrelatif(tempo, save, relativesavename()) == - 0) { + if (lienrelatif(tempo, save, relativesavename()) == 0) { /* Never escape high-chars (we don't know the encoding!!) */ inplace_escape_uri_utf(tempo, sizeof(tempo)); // escape with %xx //if (!no_esc_utf) @@ -2708,33 +2711,33 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { HT_ADD("?link="); // page externe // same as above - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme + strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr) + 2); // scheme HT_ADD(tmp); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } } // @@ -2773,13 +2776,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { // écrire normalement le nom de fichier HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } - if (*fil != '/') + if (afs.af.fil[0] != '/') HT_ADD("/"); - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } // patcher? } // external } else { // que le nom de fichier (classe java) @@ -2790,7 +2793,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // Calculer chemin tempo_pat[0] = '\0'; - strcpybuff(tempo, fil); // <-- ajouté + strcpybuff(tempo, afs.af.fil); // <-- ajouté { char *a = strrchr(tempo, '/'); @@ -2814,7 +2817,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char BIGSTK tempo2[HTS_URLMAXSIZE * 2]; strcpybuff(tempo2, a + 1); // FICHIER - strncatbuff(tempo_pat, tempo, (int) (a - tempo) + 1); // chemin + strncatbuff(tempo_pat, tempo, (a - tempo) + 1); // chemin strcpybuff(tempo, tempo2); // fichier } } @@ -2828,9 +2831,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if (strnotempty(tempo_pat)) { HT_ADD("codebase=\"http://"); if (!opt->passprivacy) { - HT_ADD_HTMLESCAPED(adr); // Password + HT_ADD_HTMLESCAPED(afs.af.adr); // Password } else { - HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password } if (*tempo_pat != '/') HT_ADD("/"); @@ -2866,12 +2869,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char BIGSTK cid[HTS_URLMAXSIZE * 3]; HT_ADD("cid:"); - make_content_id(adr, fil, cid, sizeof(cid)); + make_content_id(afs.af.adr, afs.af.fil, cid, sizeof(cid)); HT_ADD_HTMLESCAPED(cid); lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } else if (opt->urlmode == 3) { // URI absolue / if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html - HT_ADD_HTMLESCAPED(fil); + HT_ADD_HTMLESCAPED(afs.af.fil); } lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } else if (opt->urlmode == 5) { // transparent proxy URL @@ -2881,23 +2884,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { char *pos; if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html - if (!link_has_authority(adr)) { + if (!link_has_authority(afs.af.adr)) { HT_ADD("http://"); } else { - char *aut = strstr(adr, "//"); + char *aut = strstr(afs.af.adr, "//"); if (aut) { char tmp[256]; tmp[0] = '\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr)); // scheme HT_ADD(tmp); // Protocol HT_ADD("//"); } } // filename is taken as URI (ex: "C:\My Website\www.example.com\foo4242.html) - uri = save; + uri = afs.save; // .. after stripping the path prefix (ex: "www.example.com\foo4242.html) if (strnotempty(StringBuff(opt->path_html_utf8))) { @@ -2913,7 +2916,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // put original query string if any (ex: "www.example.com/foo4242.html?q=45) - pos = strchr(fil, '?'); + pos = strchr(afs.af.fil, '?'); if (pos != NULL) { strcatbuff(tempo, pos); } @@ -2927,7 +2930,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { tempo[0] = '\0'; // calculer le lien relatif - if (lienrelatif(tempo, save, relativesavename()) == 0) { + if (lienrelatif(tempo, afs.save, relativesavename()) == 0) { if (!in_media) { // In media (such as real audio): don't patch /* Never escape high-chars (we don't know the encoding!!) */ inplace_escape_uri_utf(tempo, sizeof(tempo)); @@ -2944,7 +2947,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } hts_log_print(opt, LOG_DEBUG, "relative link at %s build with %s and %s: %s", - adr, save, relativesavename(), tempo); + afs.af.adr, afs.save, relativesavename(), tempo); // lien applet (code) - il faut placer un codebase avant if (p_type == -1) { // que le nom de fichier @@ -3033,7 +3036,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { hts_log_print(opt, LOG_WARNING, "Error building relative link %s and %s", - save, relativesavename()); + afs.save, relativesavename()); } } // sinon le lien sera écrit normalement @@ -3048,13 +3051,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { #endif /* Security check */ - if (strlen(save) >= HTS_URLMAXSIZE) { - adr[0] = '\0'; + if (strlen(afs.save) >= HTS_URLMAXSIZE) { + afs.af.adr[0] = '\0'; hts_log_print(opt, LOG_WARNING, "Link is too long: %s", - save); + afs.save); } - if ((adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste + if ((afs.af.adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste // n'y a-t-il pas trop de liens? if (0) { // CLEANUP @@ -3083,7 +3086,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { if ((opt->getmode & 4) == 0) { // traiter html après pass_fix = 0; } else { // vérifier que ce n'est pas un !html - if (!ishtml(opt, fil)) + if (!ishtml(opt, afs.af.fil)) pass_fix = 1; // priorité inférieure (traiter après) else pass_fix = max(0, numero_passe); // priorité normale @@ -3106,15 +3109,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // // On part de la fin et on essaye de se presser (économise temps machine) { - int i = hash_read(hash, save, NULL, 0); // lecture type 0 (sav) + int i = hash_read(hash, afs.save, NULL, 0); // lecture type 0 (sav) if (i >= 0) { if ((opt->debug > 1) && (opt->log != NULL)) { - if (strcmp(adr, heap(i)->adr) != 0 - || strcmp(fil, heap(i)->fil) != 0) { + if (strcmp(afs.af.adr, heap(i)->adr) != 0 + || strcmp(afs.af.fil, heap(i)->fil) != 0) { hts_log_print(opt, LOG_DEBUG, "merging similar links %s%s and %s%s", - adr, fil, heap(i)->adr, + afs.af.adr, afs.af.fil, heap(i)->adr, heap(i)->fil); } } @@ -3136,16 +3139,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // DEBUT ROBOTS.TXT AJOUT if (!just_test_it) { - if ((!strfield(adr, "ftp://")) // non ftp - && (!strfield(adr, "file://")) + if ((!strfield(afs.af.adr, "ftp://")) // non ftp + && (!strfield(afs.af.adr, "file://")) ) { // non file if (opt->robots) { // récupérer robots - if (ishtml(opt, fil) != 0) { // pas la peine pour des fichiers isolés - if (checkrobots(_ROBOTS, adr, "") != -1) { // robots.txt ? - checkrobots_set(_ROBOTS, adr, ""); // ajouter entrée vide - if (checkrobots(_ROBOTS, adr, "") == -1) { // robots.txt ? + if (ishtml(opt, afs.af.fil) != 0) { // pas la peine pour des fichiers isolés + if (checkrobots(_ROBOTS, afs.af.adr, "") != -1) { // robots.txt ? + checkrobots_set(_ROBOTS, afs.af.adr, ""); // ajouter entrée vide + if (checkrobots(_ROBOTS, afs.af.adr, "") == -1) { // robots.txt ? // enregistrer robots.txt (MACRO) - if (!hts_record_link(opt, adr, "/robots.txt", "", "", "", NULL)) { + if (!hts_record_link(opt, afs.af.adr, "/robots.txt", "", "", "", NULL)) { printf ("PANIC! : Not enough memory [%d]\n", __LINE__); @@ -3173,7 +3176,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { #endif hts_log_print(opt, LOG_DEBUG, "robots.txt added at %s", - adr); + afs.af.adr); } else { hts_log_print(opt, LOG_ERROR, "Unexpected robots.txt error at %d", @@ -3187,7 +3190,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // FIN ROBOTS.TXT AJOUT // enregistrer - if (!hts_record_link(opt, adr, fil, save, former_adr, former_fil, codebase)) { + if (!hts_record_link(opt, afs.af.adr, afs.af.fil, afs.save, + former.adr, former.fil, codebase)) { printf("PANIC! : Not enough memory [%d]\n", __LINE__); hts_log_print(opt, LOG_PANIC, "Not enough memory"); @@ -3245,7 +3249,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } else { // if !dejafait hts_log_print(opt, LOG_DEBUG, "link has already been recorded, cancelled: %s", - save); + afs.save); } @@ -3260,9 +3264,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // if ok==0 - assertf(eadr - adr >= 0); // Should not go back - if (eadr > adr) { - INCREMENT_CURRENT_ADR(eadr - 1 - adr); + assertf(eadr - html >= 0); // Should not go back + if (eadr > html) { + INCREMENT_CURRENT_ADR(eadr - 1 - html); } // adr=eadr-1; // ** sauter @@ -3276,7 +3280,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { } // si '<' ou '>' // plus loin - adr++; // automate will be checked next loop + html++; // automate will be checked next loop /* Otimization: if we are scanning in HTML data (not in tag or script), then jump to the next starting tag */ @@ -3288,25 +3292,25 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { &&(!inscript_tag) /* Not in tag with script inside */ ) { /* Not at the end */ - if ((((int) (adr - r->adr))) < r->size) { + if ((((int) (html - r->adr))) < r->size) { /* Not on a starting tag yet */ - if (*adr != '<') { + if (*html != '<') { /* strchr does not well behave with null chrs.. */ /* char* adr_next = strchr(adr,'<'); */ - char *adr_next = adr; + const char *adr_next = html; while(*adr_next != '<' && (adr_next - r->adr) < r->size) { adr_next++; } /* Jump to near end (index hack) */ if (!adr_next || *adr_next != '<') { - if (((int) (adr - r->adr) < (r->size - 4)) + if (((int) (html - r->adr) < (r->size - 4)) && (r->size > 4) ) { - adr = r->adr + r->size - 2; + html = r->adr + r->size - 2; } } else { - adr = adr_next; + html = adr_next; } } } @@ -3315,8 +3319,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // ---------- // écrire peu à peu if ((opt->getmode & 1) && (ptr > 0)) - HT_ADD_ADR; - lastsaved = adr; // dernier écrit+1 + HT_add_adr; + lastsaved = html; // dernier écrit+1 // ---------- // Checks @@ -3325,13 +3329,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { // Check max time if (!back_checkmirror(opt)) { - adr = r->adr + r->size; + html = r->adr + r->size; } } // pour les stats du shell si parsing trop long if (r->size) opt->state._hts_in_html_done = - (100 * ((int) (adr - r->adr))) / (int) (r->size); + (100 * ((int) (html - r->adr))) / (int) (r->size); if (opt->state._hts_in_html_poll) { opt->state._hts_in_html_poll = 0; // temps à attendre, et remplir autant que l'on peut le cache (backing) @@ -3369,7 +3373,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) { back_wait(sback, opt, cache, HTS_STAT.stat_timestart); back_fillmax(sback, opt, cache, ptr, numero_passe); } - } while((((int) (adr - r->adr))) < r->size); + } while(html - r->adr < r->size); opt->state._hts_in_html_parsing = 0; // flag opt->state._hts_cancel = 0; // pas de cancel @@ -3440,22 +3444,23 @@ int hts_mirror_check_moved(htsmoduleStruct * str, hts_log_print(opt, LOG_WARNING, "%s for %s%s", r->msg, urladr(), urlfil()); { - char BIGSTK mov_url[HTS_URLMAXSIZE * 2], mov_adr[HTS_URLMAXSIZE * 2], - mov_fil[HTS_URLMAXSIZE * 2]; + char BIGSTK mov_url[HTS_URLMAXSIZE * 2]; + lien_adrfilsave savedmoved; + lien_adrfil *const moved = &savedmoved.af; int get_it = 0; // ne pas prendre le fichier à la même adresse par défaut int reponse = 0; mov_url[0] = '\0'; - mov_adr[0] = '\0'; - mov_fil[0] = '\0'; + moved->adr[0] = '\0'; + moved->fil[0] = '\0'; + savedmoved.save[0] = '\0'; // strcpybuff(mov_url, r->location); // url qque -> adresse+fichier if ((reponse = - ident_url_relatif(mov_url, urladr(), urlfil(), mov_adr, - mov_fil)) >= 0) { + ident_url_relatif(mov_url, urladr(), urlfil(), moved)) >= 0) { int set_prio_to = 0; // pas de priotité fixéd par wizard // check whether URLHack is harmless or not @@ -3464,24 +3469,24 @@ int hts_mirror_check_moved(htsmoduleStruct * str, char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2]; n_adr[0] = n_fil[0] = '\0'; - (void) adr_normalized(mov_adr, n_adr); - (void) fil_normalized(mov_fil, n_fil); + (void) adr_normalized(moved->adr, n_adr); + (void) fil_normalized(moved->fil, n_fil); (void) adr_normalized(urladr(), pn_adr); (void) fil_normalized(urlfil(), pn_fil); if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) { hts_log_print(opt, LOG_WARNING, "Redirected link is identical because of 'URL Hack' option: %s%s and %s%s", - urladr(), urlfil(), mov_adr, mov_fil); + urladr(), urlfil(), moved->adr, moved->fil); } } - //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue + //if (ident_url_absolute(mov_url,moved->adr,moved->fil)!=-1) { // ok URL reconnue // c'est (en gros) la même URL.. // si c'est un problème de casse dans le host c'est que le serveur est buggé // ("RFC says.." : host name IS case insensitive) - if ((strfield2(mov_adr, urladr()) != 0) && (strfield2(mov_fil, urlfil()) != 0)) { // identique à casse près + if ((strfield2(moved->adr, urladr()) != 0) && (strfield2(moved->fil, urlfil()) != 0)) { // identique à casse près // on tourne en rond - if (strcmp(mov_fil, urlfil()) == 0) { + if (strcmp(moved->fil, urlfil()) == 0) { error = 1; get_it = -1; // ne rien faire hts_log_print(opt, LOG_WARNING, @@ -3495,23 +3500,23 @@ int hts_mirror_check_moved(htsmoduleStruct * str, // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) hts_log_print(opt, LOG_DEBUG, "wizard link test for moved file at %s%s..", - mov_adr, mov_fil); + moved->adr, moved->fil); // accepté? - if (hts_acceptlink(opt, ptr, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */ + if (hts_acceptlink(opt, ptr, moved->adr, moved->fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */ get_it = 1; hts_log_print(opt, LOG_DEBUG, "moved link accepted: %s%s", - mov_adr, mov_fil); + moved->adr, moved->fil); } } /* sinon traité normalement */ } - //if ((strfield2(mov_adr,urladr())!=0) && (strfield2(mov_fil,urlfil())!=0)) { // identique à casse près + //if ((strfield2(moved->adr,urladr())!=0) && (strfield2(moved->fil,urlfil())!=0)) { // identique à casse près if (get_it == 1) { // court-circuiter le reste du traitement // et reculer pour mieux sauter hts_log_print(opt, LOG_WARNING, "Warning moved treated for %s%s (real one is %s%s)", - urladr(), urlfil(), mov_adr, mov_fil); + urladr(), urlfil(), moved->adr, moved->fil); // canceller lien actuel error = 1; hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry @@ -3520,17 +3525,14 @@ int hts_mirror_check_moved(htsmoduleStruct * str, // set_prio_to=0+1; // protection if the moved URL is an html page!! //xxc xxc { - char BIGSTK mov_sav[HTS_URLMAXSIZE * 2]; - // calculer lien et éventuellement modifier addresse/fichier - if (url_savename - (mov_adr, mov_fil, mov_sav, NULL, NULL, + if (url_savename(&savedmoved, NULL, heap(heap(ptr)->precedent)->adr, - heap(heap(ptr)->precedent)->fil, opt, opt->liens, opt->lien_tot, + heap(heap(ptr)->precedent)->fil, opt, sback, cache, hash, ptr, numero_passe, NULL) != -1) { - if (hash_read(hash, mov_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja + if (hash_read(hash, savedmoved.save, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja // enregistrer lien avec SAV IDENTIQUE - if (hts_record_link(opt, mov_adr, mov_fil, heap(ptr)->sav, "", "", NULL)) { + if (hts_record_link(opt, moved->adr, moved->fil, heap(ptr)->sav, "", "", NULL)) { // mode test? heap_top()->testmode = heap(ptr)->testmode; heap_top()->link_import = 0; // mode normal @@ -3577,7 +3579,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str, inplace_escape_uri(mov_url, sizeof(mov_url)); } else { char BIGSTK cid[HTS_URLMAXSIZE * 3]; - make_content_id(mov_adr, mov_fil, cid, sizeof(cid)); + make_content_id(moved->adr, moved->fil, cid, sizeof(cid)); strcpybuff(mov_url, "cid:"); strcatbuff(mov_url, cid); } @@ -3918,28 +3920,24 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, // changement dans les préférences if (opt->state._hts_addurl) { - char BIGSTK add_adr[HTS_URLMAXSIZE * 2]; - char BIGSTK add_fil[HTS_URLMAXSIZE * 2]; + lien_adrfilsave add; while(*opt->state._hts_addurl) { char BIGSTK add_url[HTS_URLMAXSIZE * 2]; - add_adr[0] = add_fil[0] = add_url[0] = '\0'; + add.af.adr[0] = add.af.fil[0] = add_url[0] = '\0'; if (!link_has_authority(*opt->state._hts_addurl)) strcpybuff(add_url, "http://"); // ajouter http:// strcatbuff(add_url, *opt->state._hts_addurl); - if (ident_url_absolute(add_url, add_adr, add_fil) >= 0) { + if (ident_url_absolute(add_url, &add.af) >= 0) { // ----Ajout---- - // noter NOUVEAU lien - char BIGSTK add_sav[HTS_URLMAXSIZE * 2]; // calculer lien et éventuellement modifier addresse/fichier if (url_savename - (add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, opt->liens, - opt->lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) { - if (hash_read(hash, add_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja + (&add, NULL, NULL, NULL, opt, sback, cache, hash, ptr, numero_passe, NULL) != -1) { + if (hash_read(hash, add.save, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja // enregistrer lien - if (hts_record_link(opt, add_adr, add_fil, add_sav, "", "", NULL)) { + if (hts_record_link(opt, add.af.adr, add.af.fil, add.save, "", "", NULL)) { heap_top()->testmode = 0; // mode test? heap_top()->link_import = 0; // mode normal heap_top()->depth = opt->depth; @@ -3948,8 +3946,8 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, heap_top()->premier = heap_top_index(); heap_top()->precedent = heap_top_index(); // - hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add_adr, - add_fil); + hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add.af.adr, + add.af.fil); // } else { // oups erreur, plus de mémoire!! printf("PANIC! : Not enough memory [%d]\n", __LINE__); @@ -3961,7 +3959,7 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str, } else { hts_log_print(opt, LOG_NOTICE, "Existing link %s%s not added after user request", - add_adr, add_fil); + add.af.adr, add.af.fil); } } @@ -4415,9 +4413,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str, } /* Wait for delayed types */ -int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, - char *parent_adr, char *parent_fil, char *former_adr, - char *former_fil, int *forbidden_url) { +int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs, + char *parent_adr, char *parent_fil, lien_adrfil *former, + int *forbidden_url) { ENGINE_LOAD_CONTEXT_BASE(); hash_struct *const hash = hashptr; @@ -4426,17 +4424,17 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, char in_error_msg[32]; // resolve unresolved type - if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(save) + if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(afs->save) && !opt->state.stop) { int loops; int continue_loop; - hts_log_print(opt, LOG_DEBUG, "Waiting for type to be known: %s%s", adr, - fil); + hts_log_print(opt, LOG_DEBUG, "Waiting for type to be known: %s%s", afs->af.adr, + afs->af.fil); /* Follow while type is unknown and redirects occurs */ for(loops = 0, continue_loop = 1; - IS_DELAYED_EXT(save) && continue_loop && loops < 7; loops++) { + IS_DELAYED_EXT(afs->save) && continue_loop && loops < 7; loops++) { continue_loop = 0; /* @@ -4449,22 +4447,22 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, lien_back back; memset(&back, 0, sizeof(back)); - back.r = cache_read(opt, cache, adr, fil, NULL, NULL); // test uniquement + back.r = cache_read(opt, cache, afs->af.adr, afs->af.fil, NULL, NULL); // test uniquement if (back.r.statuscode == HTTP_OK && strnotempty(back.r.contenttype)) { // cache found, and aswer is 'OK' hts_log_print(opt, LOG_DEBUG, "Direct type lookup in cache (-%%D1): %s", back.r.contenttype); /* Recompute filename with MIME type */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr, - heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache, + afs->save[0] = '\0'; + url_savename(afs, former, heap(ptr)->adr, + heap(ptr)->fil, opt, sback, cache, hash, ptr, numero_passe, &back); /* Recompute authorization with MIME type */ { int new_forbidden_url = - hts_acceptmime(opt, ptr, adr, fil, back.r.contenttype); + hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, back.r.contenttype); if (new_forbidden_url != -1) { hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d", new_forbidden_url); @@ -4472,7 +4470,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, *forbidden_url = new_forbidden_url; hts_log_print(opt, LOG_DEBUG, "link forbidden because of MIME types restrictions: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); break; // exit loop } } @@ -4484,11 +4482,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, } /* Check if the file was recorded already (necessary for redirects) */ - if (hash_read(hash, save, NULL, HASH_STRUCT_FILENAME) >= 0) { + if (hash_read(hash, afs->save, NULL, HASH_STRUCT_FILENAME) >= 0) { if (loops == 0) { /* Should not happend */ hts_log_print(opt, LOG_ERROR, "Duplicate entry in hts_wait_delayed() cancelled: %s%s -> %s", - adr, fil, save); + afs->af.adr, afs->af.fil, afs->save); } /* Exit loop (we're done) */ continue_loop = 0; @@ -4497,11 +4495,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Add in backing (back_index() will respond correctly) */ if (back_add_if_not_exists - (sback, opt, cache, adr, fil, save, parent_adr, parent_fil, + (sback, opt, cache, afs->af.adr, afs->af.fil, afs->save, parent_adr, parent_fil, 0) != -1) { int b; - b = back_index(opt, sback, adr, fil, save); + b = back_index(opt, sback, afs->af.adr, afs->af.fil, afs->save); if (b < 0) { printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n", __LINE__); @@ -4529,15 +4527,15 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, b = -1; /* Recompute filename with MIME type */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr, - heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache, + afs->save[0] = '\0'; + url_savename(afs, former, heap(ptr)->adr, + heap(ptr)->fil, opt, sback, cache, hash, ptr, numero_passe, &delayed_back); /* Recompute authorization with MIME type */ { int new_forbidden_url = - hts_acceptmime(opt, ptr, adr, fil, delayed_back.r.contenttype); + hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, delayed_back.r.contenttype); if (new_forbidden_url != -1) { hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d", *forbidden_url); @@ -4545,7 +4543,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, *forbidden_url = new_forbidden_url; hts_log_print(opt, LOG_DEBUG, "link forbidden because of MIME types restrictions: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); break; // exit loop } } @@ -4553,9 +4551,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Re-Add wiht correct type */ if (back_add_if_not_exists - (sback, opt, cache, adr, fil, save, parent_adr, parent_fil, + (sback, opt, cache, afs->af.adr, afs->af.fil, afs->save, parent_adr, parent_fil, 0) != -1) { - b = back_index(opt, sback, adr, fil, save); + b = back_index(opt, sback, afs->af.adr, afs->af.fil, afs->save); } if (b < 0) { printf @@ -4664,36 +4662,35 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, /* Handle redirect */ if ((int) strnotempty(mov_url)) { // location existe! - char BIGSTK mov_adr[HTS_URLMAXSIZE * 2], - mov_fil[HTS_URLMAXSIZE * 2]; - mov_adr[0] = mov_fil[0] = '\0'; + lien_adrfil moved; + moved.adr[0] = moved.fil[0] = '\0'; // - if (ident_url_relatif(mov_url, adr, fil, mov_adr, mov_fil) >= 0) { + if (ident_url_relatif(mov_url, afs->af.adr, afs->af.fil, &moved) >= 0) { hts_log_print(opt, LOG_DEBUG, "Redirect while resolving type: %s%s -> %s%s", - adr, fil, mov_adr, mov_fil); + afs->af.adr, afs->af.fil, moved.adr, moved.fil); // si non bouclage sur soi même, ou si test avec GET non testé - if (strcmp(mov_adr, adr) != 0 || strcmp(mov_fil, fil) != 0) { + if (strcmp(moved.adr, afs->af.adr) != 0 || strcmp(moved.fil, afs->af.fil) != 0) { - // recopier former_adr/fil? - if ((former_adr) && (former_fil)) { - if (strnotempty(former_adr) == 0) { // Pas déja noté - strcpybuff(former_adr, adr); - strcpybuff(former_fil, fil); + // recopier former->adr/fil? + if (former != NULL) { + if (strnotempty(former->adr) == 0) { // Pas déja noté + strcpybuff(former->adr, afs->af.adr); + strcpybuff(former->fil, afs->af.fil); } } // check explicit forbidden - don't follow 3xx in this case { int set_prio_to = 0; - if (hts_acceptlink(opt, ptr, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ + if (hts_acceptlink(opt, ptr, moved.adr, moved.fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ /* Note: the cache 'cached_tests' system will remember this error, and we'll only issue ONE request */ *forbidden_url = 1; /* Forbidden! */ hts_log_print(opt, LOG_DEBUG, "link forbidden because of redirect beyond the mirror scope at %s%s -> %s%s", - adr, fil, mov_adr, mov_fil); - strcpybuff(adr, mov_adr); - strcpybuff(fil, mov_fil); + afs->af.adr, afs->af.fil, moved.adr, moved.fil); + strcpybuff(afs->af.adr, moved.adr); + strcpybuff(afs->af.fil, moved.fil); mov_url[0] = '\0'; break; } @@ -4701,45 +4698,44 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, // ftp: stop! if (strfield(mov_url, "ftp://")) { - strcpybuff(adr, mov_adr); - strcpybuff(fil, mov_fil); + strcpybuff(afs->af.adr, moved.adr); + strcpybuff(afs->af.fil, moved.fil); break; } /* ok, continue */ - strcpybuff(adr, mov_adr); - strcpybuff(fil, mov_fil); + strcpybuff(afs->af.adr, moved.adr); + strcpybuff(afs->af.fil, moved.fil); continue_loop = 1; /* Recompute filename for hash lookup */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, - heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens, - opt->lien_tot, sback, cache, hash, ptr, numero_passe, + afs->save[0] = '\0'; + url_savename(afs, former, heap(ptr)->adr, heap(ptr)->fil, + opt, sback, cache, hash, ptr, numero_passe, &delayed_back); } else { hts_log_print(opt, LOG_WARNING, "Unable to test %s%s (loop to same filename)", - adr, fil); + afs->af.adr, afs->af.fil); } // loop to same location } // ident_url_relatif() } // location } // redirect - hts_log_print(opt, LOG_DEBUG, "Final type for %s%s: '%s'", adr, fil, + hts_log_print(opt, LOG_DEBUG, "Final type for %s%s: '%s'", afs->af.adr, afs->af.fil, delayed_back.r.contenttype); /* If we are done, do additional checks with final type and authorizations */ if (!continue_loop) { /* Recompute filename with MIME type */ - save[0] = '\0'; - url_savename(adr, fil, save, former_adr, former_fil, - heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens, opt->lien_tot, + afs->save[0] = '\0'; + url_savename(afs, former, + heap(ptr)->adr, heap(ptr)->fil, opt, sback, cache, hash, ptr, numero_passe, &delayed_back); /* Recompute authorization with MIME type */ { int new_forbidden_url = - hts_acceptmime(opt, ptr, adr, fil, delayed_back.r.contenttype); + hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, delayed_back.r.contenttype); if (new_forbidden_url != -1) { hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d", *forbidden_url); @@ -4747,7 +4743,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, *forbidden_url = new_forbidden_url; hts_log_print(opt, LOG_DEBUG, "link forbidden because of MIME types restrictions: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); break; // exit loop } } @@ -4764,7 +4760,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, } } /* Patch destination filename for direct-to-disk mode */ - strcpybuff(back[b].url_sav, save); + strcpybuff(back[b].url_sav, afs->save); } } // b >= 0 @@ -4791,25 +4787,25 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save, if (in_error == STATUSCODE_TOO_BIG) { hts_log_print(opt, LOG_INFO, "link not taken because of its size (%d bytes) at %s%s", - (int) in_error_size, adr, fil); + (int) in_error_size, afs->af.adr, afs->af.fil); } else { hts_log_print(opt, LOG_INFO, "link not taken because of error (%d '%s') at %s%s", - in_error, in_error_msg, adr, fil); + in_error, in_error_msg, afs->af.adr, afs->af.fil); } } } // error - if (*forbidden_url != 1 && IS_DELAYED_EXT(save)) { + if (*forbidden_url != 1 && IS_DELAYED_EXT(afs->save)) { *forbidden_url = 1; if (in_error) { hts_log_print(opt, LOG_WARNING, "link in error (%d '%s'), type unknown, aborting: %s%s", - in_error, in_error_msg, adr, fil); + in_error, in_error_msg, afs->af.adr, afs->af.fil); } else { hts_log_print(opt, LOG_WARNING, "link is probably looping, type unknown, aborting: %s%s", - adr, fil); + afs->af.adr, afs->af.fil); } } |