summaryrefslogtreecommitdiff
path: root/src/htsparse.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-05-26 19:27:46 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-05-26 19:27:46 +0000
commit5364514f340287bcce591955fce500ed57d2da79 (patch)
treeca2097bd351fe9d4d137a2d7fe687dd0b815d56b /src/htsparse.c
parentf078a39677953cf8d3cdaedcb1b3c4db65473308 (diff)
Big links heap handling cleanup, and removed very old and legacy macros
Diffstat (limited to 'src/htsparse.c')
-rw-r--r--src/htsparse.c587
1 files changed, 238 insertions, 349 deletions
diff --git a/src/htsparse.c b/src/htsparse.c
index a8ee0f3..e49b00f 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -60,17 +60,6 @@ Please visit our Website: http://www.httrack.com
#include "htsparse.h"
#include "htsback.h"
-// specific defines
-#define urladr (liens[ptr]->adr)
-#define urlfil (liens[ptr]->fil)
-#define savename (liens[ptr]->sav)
-#define parenturladr (liens[liens[ptr]->precedent]->adr)
-#define parenturlfil (liens[liens[ptr]->precedent]->fil)
-#define parentsavename (liens[liens[ptr]->precedent]->sav)
-#define relativeurladr ((!parent_relative)?urladr:parenturladr)
-#define relativeurlfil ((!parent_relative)?urlfil:parenturlfil)
-#define relativesavename ((!parent_relative)?savename:parentsavename)
-
// does nothing
#define XH_uninit do {} while(0)
@@ -148,25 +137,25 @@ Please visit our Website: http://www.httrack.com
int ok=0;\
if (ht_buff) { \
char digest[32+2];\
- off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),savename));\
+ off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),OPT_GET_BUFF_SIZE(opt),savename()));\
digest[0]='\0';\
domd5mem(ht_buff,ht_len,digest,1);\
if (fsize_old==ht_len) { \
int mlen = 0;\
char* mbuff;\
- cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
+ cache_readdata(cache,"//[HTML-MD5]//",savename(),&mbuff,&mlen);\
if (mlen) \
mbuff[mlen]='\0';\
if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
ok=1;\
- hts_log_print(opt, LOG_DEBUG, "File not re-written (md5): %s",savename);\
+ hts_log_print(opt, LOG_DEBUG, "File not re-written (md5): %s",savename());\
} else {\
ok=0;\
} \
}\
if (!ok) { \
- file_notify(opt,urladr, urlfil, savename, 1, 1, r->notmodified); \
- fp=filecreate(&opt->state.strc, savename); \
+ file_notify(opt,urladr(), urlfil(), savename(), 1, 1, r->notmodified); \
+ fp=filecreate(&opt->state.strc, savename()); \
if (fp) { \
if (ht_len>0) {\
if (fwrite(ht_buff,1,ht_len,fp) != ht_len) { \
@@ -175,7 +164,7 @@ Please visit our Website: http://www.httrack.com
opt->state.exit_xh=-1;\
}\
if (opt->log) { \
- hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to write HTML file %s", savename);\
+ hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to write HTML file %s", savename());\
if (fcheck) {\
hts_log_print(opt, LOG_ERROR, "* * Fatal write error, giving up");\
}\
@@ -184,24 +173,24 @@ Please visit our Website: http://www.httrack.com
}\
fclose(fp); fp=NULL; \
if (strnotempty(r->lastmodified)) \
- set_filetime_rfc822(savename,r->lastmodified); \
+ set_filetime_rfc822(savename(),r->lastmodified); \
} else {\
int fcheck;\
if ((fcheck=check_fatal_io_errno())) {\
hts_log_print(opt, LOG_ERROR, "Mirror aborted: disk full or filesystem problems"); \
opt->state.exit_xh=-1;\
}\
- hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to save file %s", savename);\
+ hts_log_print(opt, LOG_ERROR | LOG_ERRNO, "Unable to save file %s", savename());\
if (fcheck) {\
hts_log_print(opt, LOG_ERROR, "* * Fatal write error, giving up");\
}\
}\
} else {\
- file_notify(opt,urladr, urlfil, savename, 0, 0, r->notmodified); \
- filenote(&opt->state.strc, savename,NULL); \
+ file_notify(opt,urladr(), urlfil(), savename(), 0, 0, r->notmodified); \
+ filenote(&opt->state.strc, savename(),NULL); \
}\
if (cache->ndx)\
- cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
+ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename(),digest,(int)strlen(digest));\
} \
freet(ht_buff); ht_buff=NULL; \
}
@@ -231,74 +220,6 @@ Please visit our Website: http://www.httrack.com
makeindex_done=1; /* ok c'est fait */ \
} while(0)
-// Enregistrement d'un lien:
-// on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité)
-// puis on vérifie qu'on a assez de marge dans le buffer - sinon on en réalloue un autre
-// enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite
-// codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
-// FA,FS: former_adr et former_fil, lien original
-#define liens_record_sav_len(A)
-
-// COPIE DE HTSCORE.C
-
-#define liens_record(A,F,S,FA,FF) { \
- int notecode=0; \
- size_t lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
- adr_len=strlen(A),\
- fil_len=strlen(F),\
- sav_len=strlen(S),\
- cod_len=0,\
- former_adr_len=strlen(FA),\
- former_fil_len=strlen(FF); \
- if (former_adr_len>0) {\
- former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
- former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
- } else \
- former_adr_len=former_fil_len=0;\
- if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) {\
- notecode=1; \
- cod_len=strlen(codebase); \
- cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
- } \
- adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
- fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
- sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
- if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
- lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
- lien_size=add_tab_alloc; \
- if (lien_buffer!=NULL) { \
- liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
- liens[lien_tot]->firstblock=1; \
- } \
- } else { \
- liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
- liens[lien_tot]->firstblock=0; \
- } \
- if (liens[lien_tot]!=NULL) { \
- liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
- liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
- liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
- liens[lien_tot]->cod=NULL; \
- if (notecode) { \
- liens[lien_tot]->cod=lien_buffer; \
- lien_buffer+=cod_len; \
- lien_size-=cod_len; \
- strcpybuff(liens[lien_tot]->cod,codebase); \
- } \
- if (former_adr_len>0) {\
- liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
- liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
- strcpybuff(liens[lien_tot]->former_adr,FA); \
- strcpybuff(liens[lien_tot]->former_fil,FF); \
- }\
- strcpybuff(liens[lien_tot]->adr,A); \
- strcpybuff(liens[lien_tot]->fil,F); \
- strcpybuff(liens[lien_tot]->sav,S); \
- liens_record_sav_len(liens[lien_tot]); \
- hash_write(hashptr,lien_tot); \
- } \
-}
-
#define ENGINE_DEFINE_CONTEXT() \
ENGINE_DEFINE_CONTEXT_BASE(); \
/* */ \
@@ -316,7 +237,6 @@ Please visit our Website: http://www.httrack.com
/* */ \
int error = * stre->error_; \
int store_errpage = * stre->store_errpage_; \
- int lien_max = *stre->lien_max_; \
/* */ \
int makeindex_done = *stre->makeindex_done_; \
FILE* makeindex_fp = *stre->makeindex_fp_; \
@@ -331,7 +251,6 @@ Please visit our Website: http://www.httrack.com
/* */ \
error = * stre->error_; \
store_errpage = * stre->store_errpage_; \
- lien_max = *stre->lien_max_; \
/* */ \
makeindex_done = *stre->makeindex_done_; \
makeindex_fp = *stre->makeindex_fp_; \
@@ -349,7 +268,6 @@ Please visit our Website: http://www.httrack.com
/* */ \
* stre->error_ = error; \
* stre->store_errpage_ = store_errpage; \
- * stre->lien_max_ = lien_max; \
/* */ \
*stre->makeindex_done_ = makeindex_done; \
*stre->makeindex_fp_ = makeindex_fp; \
@@ -396,26 +314,26 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
char *cAddr = r->adr;
int cSize = (int) r->size;
- hts_log_print(opt, LOG_DEBUG, "engine: preprocess-html: %s%s", urladr,
- urlfil);
- if (RUN_CALLBACK4(opt, preprocess, &cAddr, &cSize, urladr, urlfil) == 1) {
+ hts_log_print(opt, LOG_DEBUG, "engine: preprocess-html: %s%s", urladr(),
+ urlfil());
+ if (RUN_CALLBACK4(opt, preprocess, &cAddr, &cSize, urladr(), urlfil()) == 1) {
r->adr = cAddr;
r->size = cSize;
}
}
- if (RUN_CALLBACK4(opt, check_html, r->adr, (int) r->size, urladr, urlfil)) {
+ if (RUN_CALLBACK4(opt, check_html, r->adr, (int) r->size, urladr(), urlfil())) {
FILE *fp = NULL; // fichier écrit localement
char *adr = r->adr; // pointeur (on parcourt)
char *lastsaved; // adresse du dernier octet sauvé + 1
- hts_log_print(opt, LOG_DEBUG, "scanning file %s%s (%s)..", urladr, urlfil,
- savename);
+ hts_log_print(opt, LOG_DEBUG, "scanning file %s%s (%s)..", urladr(), urlfil(),
+ savename());
// Indexing!
#if HTS_MAKE_KEYWORD_INDEX
if (opt->kindex) {
if (index_keyword
- (r->adr, r->size, r->contenttype, savename,
+ (r->adr, r->size, r->contenttype, savename(),
StringBuff(opt->path_html_utf8))) {
hts_log_print(opt, LOG_DEBUG, "indexing file..done");
} else {
@@ -545,7 +463,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
(opt, r->contenttype, str->url_file,
"application/x-javascript") != 0) {
// all links must be checked against parent, not this link
- if (liens[ptr]->precedent != 0) {
+ if (heap(ptr)->precedent != 0) {
parent_relative = 1;
}
}
@@ -633,7 +551,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
if (!makeindex_done) { // autoriation d'écrire un index
if (!detect_title) {
- if (opt->depth == liens[ptr]->depth) { // on note toujours les premiers liens
+ if (opt->depth == heap(ptr)->depth) { // on note toujours les premiers liens
if (!in_media) {
if (opt->makeindex && (ptr > 0)) {
if (opt->getmode & 1) { // autorisation d'écrire
@@ -694,7 +612,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
}
if (lienrelatif
- (tempo, liens[ptr]->sav,
+ (tempo, heap(ptr)->sav,
concat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
StringBuff(opt->path_html_utf8),
"index.html")) == 0) {
@@ -740,7 +658,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
}
- } else if (liens[ptr]->depth < opt->depth) { // on a sauté level1+1 et level1
+ } else if (heap(ptr)->depth < opt->depth) { // on a sauté level1+1 et level1
HT_INDEX_END;
}
} // if (opt->makeindex)
@@ -831,7 +749,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
time_gmt_rfc822(gmttime);
strcatbuff(tempo, eol);
sprintf(tempo + strlen(tempo), StringBuff(opt->footer),
- jump_identification(urladr), urlfil, gmttime,
+ jump_identification(urladr()), urlfil(), gmttime,
HTTRACK_VERSIONID, "", "", "", "", "", "", "");
strcatbuff(tempo, eol);
//fwrite(tempo,1,strlen(tempo),fp);
@@ -1328,12 +1246,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
#if DEBUG_ROBOTS
printf
("robots.txt meta tag: nofollow in %s%s\n",
- urladr, urlfil);
+ urladr(), urlfil());
#endif
nofollow = 1; // NE PLUS suivre liens dans cette page
hts_log_print(opt, LOG_WARNING,
"Link %s%s not scanned (follow robots meta tag)",
- urladr, urlfil);
+ urladr(), urlfil());
}
}
}
@@ -2314,7 +2232,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (!link_has_authority(lien)) { // pas de http://
char BIGSTK adr2[HTS_URLMAXSIZE * 2], fil2[HTS_URLMAXSIZE * 2]; // ** euh ident_url_relatif??
- if (ident_url_relatif(lien, urladr, urlfil, adr2, fil2) <
+ if (ident_url_relatif(lien, urladr(), urlfil(), adr2, fil2) <
0) {
error = 1;
} else {
@@ -2335,7 +2253,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
//char BIGSTK tempo[HTS_URLMAXSIZE*2];
//strcpybuff(tempo,"http://");
- //strcatbuff(tempo,urladr); // host
+ //strcatbuff(tempo,urladr()); // host
//if (*lien!='/')
// strcatbuff(tempo,"/");
//strcatbuff(tempo,lien);
@@ -2402,7 +2320,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
HTS_URLMAXSIZE) {
// mailto: and co: do NOT add base
if (ident_url_relatif
- (lien, urladr, urlfil, adr, fil) >= 0) {
+ (lien, urladr(), urlfil(), adr, fil) >= 0) {
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
// base est absolue
@@ -2462,9 +2380,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
hts_log_print(opt, LOG_DEBUG,
"build relative link %s with %s%s", lien,
- relativeurladr, relativeurlfil);
+ relativeurladr(), relativeurlfil());
if ((reponse =
- ident_url_relatif(lien, relativeurladr, relativeurlfil,
+ ident_url_relatif(lien, relativeurladr(), relativeurlfil(),
adr, fil)) < 0) {
adr[0] = '\0'; // erreur
if (reponse == -2) {
@@ -2474,12 +2392,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} else {
hts_log_print(opt, LOG_DEBUG,
"ident_url_relatif failed for %s with %s%s",
- lien, relativeurladr, relativeurlfil);
+ lien, relativeurladr(), relativeurlfil());
}
} else {
hts_log_print(opt, LOG_DEBUG,
"built relative link %s with %s%s -> %s%s",
- lien, relativeurladr, relativeurlfil, adr,
+ lien, relativeurladr(), relativeurlfil(), adr,
fil);
}
} else {
@@ -2506,7 +2424,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
hts_log_print(opt, LOG_DEBUG,
"wizard link test at %s%s..", adr, fil);
forbidden_url =
- hts_acceptlink(opt, ptr, lien_tot, liens, adr, fil,
+ hts_acceptlink(opt, ptr, opt->lien_tot, opt->liens, adr, fil,
intag_name ? intag_name : NULL,
intag_name ? tag_attr_start : NULL,
&set_prio_to, &just_test_it);
@@ -2519,13 +2437,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// calculer meme_adresse
meme_adresse =
strfield2(jump_identification(adr),
- jump_identification(urladr));
+ jump_identification(urladr()));
// Début partie sauvegarde
// ici on forme le nom du fichier à sauver, et on patche l'URL
if (adr[0] != '\0') {
- // savename: simplifier les ../ et autres joyeusetés
+ // savename(): simplifier les ../ et autres joyeusetés
char BIGSTK save[HTS_URLMAXSIZE * 2];
int r_sv = 0;
@@ -2556,8 +2474,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//strcpybuff(last_fil,fil); // ancien chemin
r_sv =
url_savename(adr, fil, save, former_adr, former_fil,
- liens[ptr]->adr, liens[ptr]->fil, opt,
- liens, lien_tot, sback, cache, hash, ptr,
+ heap(ptr)->adr, heap(ptr)->fil, opt,
+ opt->liens, opt->lien_tot, sback, cache, hash, ptr,
numero_passe, NULL);
if (strcmp(jump_identification(last_adr), jump_identification(adr)) != 0) { // a changé
@@ -2573,7 +2491,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
"wizard moved link retest at %s%s..",
adr, fil);
forbidden_url =
- hts_acceptlink(opt, ptr, lien_tot, liens, adr,
+ hts_acceptlink(opt, ptr, opt->lien_tot, opt->liens, adr,
fil,
intag_name ? intag_name : NULL,
intag_name ? tag_attr_start :
@@ -2600,8 +2518,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// pas d'erreur, on continue
r_sv =
- hts_wait_delayed(str, adr, fil, save, liens[ptr]->adr,
- liens[ptr]->fil, former_adr, former_fil,
+ hts_wait_delayed(str, adr, fil, save, heap(ptr)->adr,
+ heap(ptr)->fil, former_adr, former_fil,
&forbidden_url);
/* User interaction, because hts_wait_delayed can be slow.. (3.43) */
@@ -2777,7 +2695,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
strcpybuff(save, StringBuff(opt->path_html_utf8));
strcatbuff(save, cat_name);
- if (lienrelatif(tempo, save, relativesavename) ==
+ if (lienrelatif(tempo, save, relativesavename()) ==
0) {
/* Never escape high-chars (we don't know the encoding!!) */
inplace_escape_uri_utf(tempo, sizeof(tempo)); // escape with %xx
@@ -3009,7 +2927,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
tempo[0] = '\0';
// calculer le lien relatif
- if (lienrelatif(tempo, save, relativesavename) == 0) {
+ if (lienrelatif(tempo, save, relativesavename()) == 0) {
if (!in_media) { // In media (such as real audio): don't patch
/* Never escape high-chars (we don't know the encoding!!) */
inplace_escape_uri_utf(tempo, sizeof(tempo));
@@ -3026,7 +2944,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
hts_log_print(opt, LOG_DEBUG,
"relative link at %s build with %s and %s: %s",
- adr, save, relativesavename, tempo);
+ adr, save, relativesavename(), tempo);
// lien applet (code) - il faut placer un codebase avant
if (p_type == -1) { // que le nom de fichier
@@ -3115,7 +3033,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} else {
hts_log_print(opt, LOG_WARNING,
"Error building relative link %s and %s",
- save, relativesavename);
+ save, relativesavename());
}
} // sinon le lien sera écrit normalement
@@ -3138,7 +3056,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if ((adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste
// n'y a-t-il pas trop de liens?
- if (lien_tot + 1 >= lien_max - 4) { // trop de liens!
+ if (0) {
+ // CLEANUP
+#if 0
+ if (opt->lien_tot + 1 >= lien_max - 4) { // trop de liens!
printf("PANIC! : Too many URLs : >%d [%d]\n", lien_tot,
__LINE__);
hts_log_print(opt, LOG_PANIC,
@@ -3154,7 +3075,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
XH_uninit; // désallocation mémoire & buffers
return -1;
-
+#endif
} else { // noter le lien sur la listes des liens à charger
int pass_fix, dejafait = 0;
@@ -3189,16 +3110,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (i >= 0) {
if ((opt->debug > 1) && (opt->log != NULL)) {
- if (strcmp(adr, liens[i]->adr) != 0
- || strcmp(fil, liens[i]->fil) != 0) {
+ if (strcmp(adr, heap(i)->adr) != 0
+ || strcmp(fil, heap(i)->fil) != 0) {
hts_log_print(opt, LOG_DEBUG,
"merging similar links %s%s and %s%s",
- adr, fil, liens[i]->adr,
- liens[i]->fil);
+ adr, fil, heap(i)->adr,
+ heap(i)->fil);
}
}
- liens[i]->depth =
- maximum(liens[i]->depth, liens[ptr]->depth - 1);
+ heap(i)->depth =
+ maximum(heap(i)->depth, heap(ptr)->depth - 1);
dejafait = 1;
}
}
@@ -3210,7 +3131,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// >>>> CREER LE LIEN <<<<
//
// enregistrer lien à charger
- //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
+ //heap_top()->adr[0]=heap_top()->fil[0]=heap_top()->sav[0]='\0';
// même adresse: l'objet père est l'objet père de l'actuel
// DEBUT ROBOTS.TXT AJOUT
@@ -3224,17 +3145,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
checkrobots_set(_ROBOTS, adr, ""); // ajouter entrée vide
if (checkrobots(_ROBOTS, adr, "") == -1) { // robots.txt ?
// enregistrer robots.txt (MACRO)
- liens_record(adr, "/robots.txt", "", "",
- "");
- if (liens[lien_tot] == NULL) { // erreur, pas de place réservée
+ if (!hts_record_link(opt, adr, "/robots.txt", "", "", "", NULL)) {
printf
("PANIC! : Not enough memory [%d]\n",
__LINE__);
- hts_log_print(opt, LOG_PANIC,
- "Not enough memory, can not re-allocate %d bytes",
- (int) ((add_tab_alloc +
- 1) *
- sizeof(lien_url)));
+ hts_log_print(opt, LOG_PANIC, "Not enough memory");
if ((opt->getmode & 1) && (ptr > 0)) {
if (fp) {
fclose(fp);
@@ -3244,15 +3159,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
XH_uninit; // désallocation mémoire & buffers
return -1;
}
- liens[lien_tot]->testmode = 0; // pas mode test
- liens[lien_tot]->link_import = 0; // pas mode import
- liens[lien_tot]->premier = lien_tot;
- liens[lien_tot]->precedent = ptr;
- liens[lien_tot]->depth = 0;
- liens[lien_tot]->pass2 =
- max(0, numero_passe);
- liens[lien_tot]->retry = 0;
- lien_tot++; // UN LIEN DE PLUS
+ heap_top()->testmode = 0; // pas mode test
+ heap_top()->link_import = 0; // pas mode import
+ heap_top()->premier = heap_top_index();
+ heap_top()->precedent = ptr;
+ heap_top()->depth = 0;
+ heap_top()->pass2 = max(0, numero_passe);
+ heap_top()->retry = 0;
#if DEBUG_ROBOTS
printf
("robots.txt: added file robots.txt for %s\n",
@@ -3273,15 +3186,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// FIN ROBOTS.TXT AJOUT
- // enregistrer (MACRO)
- liens_record(adr, fil, save, former_adr, former_fil);
- if (liens[lien_tot] == NULL) { // erreur, pas de place réservée
+ // enregistrer
+ if (!hts_record_link(opt, adr, fil, save, former_adr, former_fil, codebase)) {
printf("PANIC! : Not enough memory [%d]\n",
__LINE__);
- hts_log_print(opt, LOG_PANIC,
- "Not enough memory, can not re-allocate %d bytes",
- (int) ((add_tab_alloc +
- 1) * sizeof(lien_url)));
+ hts_log_print(opt, LOG_PANIC, "Not enough memory");
if ((opt->getmode & 1) && (ptr > 0)) {
if (fp) {
fclose(fp);
@@ -3293,47 +3202,46 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// mode test?
if (!just_test_it)
- liens[lien_tot]->testmode = 0; // pas mode test
+ heap_top()->testmode = 0; // pas mode test
else
- liens[lien_tot]->testmode = 1; // mode test
+ heap_top()->testmode = 1; // mode test
if (!import_done)
- liens[lien_tot]->link_import = 0; // pas mode import
+ heap_top()->link_import = 0; // pas mode import
else
- liens[lien_tot]->link_import = 1; // mode import
+ heap_top()->link_import = 1; // mode import
// écrire autres paramètres de la structure-lien
if ((meme_adresse) && (!import_done)
- && (liens[ptr]->premier != 0))
- liens[lien_tot]->premier = liens[ptr]->premier;
+ && (heap(ptr)->premier != 0))
+ heap_top()->premier = heap(ptr)->premier;
else // sinon l'objet père est le précédent lui même
- liens[lien_tot]->premier = lien_tot;
- // liens[lien_tot]->premier=ptr;
+ heap_top()->premier = heap_top_index();
+ // heap_top()->premier=ptr;
- liens[lien_tot]->precedent = ptr;
+ heap_top()->precedent = ptr;
// noter la priorité
if (!set_prio_to)
- liens[lien_tot]->depth = liens[ptr]->depth - 1;
+ heap_top()->depth = heap(ptr)->depth - 1;
else
- liens[lien_tot]->depth = max(0, min(liens[ptr]->depth - 1, set_prio_to - 1)); // PRIORITE NULLE (catch page)
+ heap_top()->depth = max(0, min(heap(ptr)->depth - 1, set_prio_to - 1)); // PRIORITE NULLE (catch page)
// noter pass
- liens[lien_tot]->pass2 = pass_fix;
- liens[lien_tot]->retry = opt->retry;
+ heap_top()->pass2 = pass_fix;
+ heap_top()->retry = opt->retry;
- //strcpybuff(liens[lien_tot]->adr,adr);
- //strcpybuff(liens[lien_tot]->fil,fil);
- //strcpybuff(liens[lien_tot]->sav,save);
+ //strcpybuff(heap_top()->adr,adr);
+ //strcpybuff(heap_top()->fil,fil);
+ //strcpybuff(heap_top()->sav,save);
if (!just_test_it) {
hts_log_print(opt, LOG_DEBUG,
"OK, NOTE: %s%s -> %s",
- liens[lien_tot]->adr,
- liens[lien_tot]->fil,
- liens[lien_tot]->sav);
+ heap_top()->adr,
+ heap_top()->fil,
+ heap_top()->sav);
} else {
hts_log_print(opt, LOG_DEBUG, "OK, TEST: %s%s",
- liens[lien_tot]->adr,
- liens[lien_tot]->fil);
+ heap_top()->adr,
+ heap_top()->fil);
}
- lien_tot++; // UN LIEN DE PLUS
} else { // if !dejafait
hts_log_print(opt, LOG_DEBUG,
"link has already been recorded, cancelled: %s",
@@ -3428,7 +3336,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
opt->state._hts_in_html_poll = 0;
// temps à attendre, et remplir autant que l'on peut le cache (backing)
back_wait(sback, opt, cache, HTS_STAT.stat_timestart);
- back_fillmax(sback, opt, cache, liens, ptr, numero_passe, lien_tot);
+ back_fillmax(sback, opt, cache, opt->liens, ptr, numero_passe, opt->lien_tot);
// Transfer rate
engine_stats();
@@ -3438,11 +3346,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
- HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr);
+ HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
if (!RUN_CALLBACK7
- (opt, loop, sback->lnk, sback->count, 0, ptr, lien_tot,
+ (opt, loop, sback->lnk, sback->count, 0, ptr, opt->lien_tot,
(int) (time_local() - HTS_STAT.stat_timestart), &HTS_STAT)) {
hts_log_print(opt, LOG_ERROR, "Exit requested by shell or user");
*stre->exit_xh_ = 1; // exit requested
@@ -3459,7 +3367,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// refresh the backing system each 2 seconds
if (engine_stats()) {
back_wait(sback, opt, cache, HTS_STAT.stat_timestart);
- back_fillmax(sback, opt, cache, liens, ptr, numero_passe, lien_tot);
+ back_fillmax(sback, opt, cache, opt->liens, ptr, numero_passe, opt->lien_tot);
}
} while((((int) (adr - r->adr))) < r->size);
@@ -3472,8 +3380,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int cSize = (int) ht_len;
hts_log_print(opt, LOG_DEBUG, "engine: postprocess-html: %s%s",
- urladr, urlfil);
- if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr, urlfil) ==
+ urladr(), urlfil());
+ if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr(), urlfil()) ==
1) {
ht_buff = cAddr;
ht_len = cSize;
@@ -3495,8 +3403,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
}
// sauver fichier
- //structcheck(savename);
- //filesave(opt,r->adr,r->size,savename);
+ //structcheck(savename());
+ //filesave(opt,r->adr,r->size,savename());
} // analyse OK
@@ -3529,7 +3437,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//int i=0;
// char* p;
- hts_log_print(opt, LOG_WARNING, "%s for %s%s", r->msg, urladr, urlfil);
+ hts_log_print(opt, LOG_WARNING, "%s for %s%s", r->msg, urladr(), urlfil());
{
char BIGSTK mov_url[HTS_URLMAXSIZE * 2], mov_adr[HTS_URLMAXSIZE * 2],
@@ -3546,7 +3454,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
// url qque -> adresse+fichier
if ((reponse =
- ident_url_relatif(mov_url, urladr, urlfil, mov_adr,
+ ident_url_relatif(mov_url, urladr(), urlfil(), mov_adr,
mov_fil)) >= 0) {
int set_prio_to = 0; // pas de priotité fixéd par wizard
@@ -3558,27 +3466,27 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
n_adr[0] = n_fil[0] = '\0';
(void) adr_normalized(mov_adr, n_adr);
(void) fil_normalized(mov_fil, n_fil);
- (void) adr_normalized(urladr, pn_adr);
- (void) fil_normalized(urlfil, pn_fil);
+ (void) adr_normalized(urladr(), pn_adr);
+ (void) fil_normalized(urlfil(), pn_fil);
if (strcasecmp(n_adr, pn_adr) == 0
&& strcasecmp(n_fil, pn_fil) == 0) {
hts_log_print(opt, LOG_WARNING,
"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s",
- urladr, urlfil, mov_adr, mov_fil);
+ urladr(), urlfil(), mov_adr, mov_fil);
}
}
//if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
// c'est (en gros) la même URL..
// si c'est un problème de casse dans le host c'est que le serveur est buggé
// ("RFC says.." : host name IS case insensitive)
- if ((strfield2(mov_adr, urladr) != 0) && (strfield2(mov_fil, urlfil) != 0)) { // identique à casse près
+ if ((strfield2(mov_adr, urladr()) != 0) && (strfield2(mov_fil, urlfil()) != 0)) { // identique à casse près
// on tourne en rond
- if (strcmp(mov_fil, urlfil) == 0) {
+ if (strcmp(mov_fil, urlfil()) == 0) {
error = 1;
get_it = -1; // ne rien faire
hts_log_print(opt, LOG_WARNING,
"Can not bear crazy server (%s) for %s%s", r->msg,
- urladr, urlfil);
+ urladr(), urlfil());
} else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
get_it = 1;
}
@@ -3589,7 +3497,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
"wizard link test for moved file at %s%s..",
mov_adr, mov_fil);
// accepté?
- if (hts_acceptlink(opt, ptr, lien_tot, liens, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */
+ if (hts_acceptlink(opt, ptr, opt->lien_tot, opt->liens, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */
get_it = 1;
hts_log_print(opt, LOG_DEBUG, "moved link accepted: %s%s",
mov_adr, mov_fil);
@@ -3597,13 +3505,13 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
} /* sinon traité normalement */
}
- //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ //if ((strfield2(mov_adr,urladr())!=0) && (strfield2(mov_fil,urlfil())!=0)) { // identique à casse près
if (get_it == 1) {
// court-circuiter le reste du traitement
// et reculer pour mieux sauter
hts_log_print(opt, LOG_WARNING,
"Warning moved treated for %s%s (real one is %s%s)",
- urladr, urlfil, mov_adr, mov_fil);
+ urladr(), urlfil(), mov_adr, mov_fil);
// canceller lien actuel
error = 1;
hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
@@ -3617,33 +3525,27 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
// calculer lien et éventuellement modifier addresse/fichier
if (url_savename
(mov_adr, mov_fil, mov_sav, NULL, NULL,
- liens[liens[ptr]->precedent]->adr,
- liens[liens[ptr]->precedent]->fil, opt, liens, lien_tot,
+ heap(heap(ptr)->precedent)->adr,
+ heap(heap(ptr)->precedent)->fil, opt, opt->liens, opt->lien_tot,
sback, cache, hash, ptr, numero_passe, NULL) != -1) {
if (hash_read(hash, mov_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja
- // enregistrer lien (MACRO) avec SAV IDENTIQUE
- liens_record(mov_adr, mov_fil, liens[ptr]->sav, "", "");
- //liens_record(mov_adr,mov_fil,mov_sav,"","");
- if (liens[lien_tot] != NULL) { // OK, pas d'erreur
+ // enregistrer lien avec SAV IDENTIQUE
+ if (hts_record_link(opt, mov_adr, mov_fil, heap(ptr)->sav, "", "", NULL)) {
// mode test?
- liens[lien_tot]->testmode = liens[ptr]->testmode;
- liens[lien_tot]->link_import = 0; // mode normal
+ heap_top()->testmode = heap(ptr)->testmode;
+ heap_top()->link_import = 0; // mode normal
if (!set_prio_to)
- liens[lien_tot]->depth = liens[ptr]->depth;
+ heap_top()->depth = heap(ptr)->depth;
else
- liens[lien_tot]->depth = max(0, min(set_prio_to - 1, liens[ptr]->depth)); // PRIORITE NULLE (catch page)
- liens[lien_tot]->pass2 =
- max(liens[ptr]->pass2, numero_passe);
- liens[lien_tot]->retry = liens[ptr]->retry;
- liens[lien_tot]->premier = liens[ptr]->premier;
- liens[lien_tot]->precedent = liens[ptr]->precedent;
- lien_tot++;
+ heap_top()->depth = max(0, min(set_prio_to - 1, heap(ptr)->depth)); // PRIORITE NULLE (catch page)
+ heap_top()->pass2 =
+ max(heap(ptr)->pass2, numero_passe);
+ heap_top()->retry = heap(ptr)->retry;
+ heap_top()->premier = heap(ptr)->premier;
+ heap_top()->precedent = heap(ptr)->precedent;
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
- hts_log_print(opt, LOG_PANIC,
- "Not enough memory, can not re-allocate %d bytes",
- (int) ((add_tab_alloc +
- 1) * sizeof(lien_url)));
+ hts_log_print(opt, LOG_PANIC, "Not enough memory");
//if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
return 0;
@@ -3651,7 +3553,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
} else {
hts_log_print(opt, LOG_INFO,
"moving %s to an existing file %s",
- liens[ptr]->fil, urlfil);
+ heap(ptr)->fil, urlfil());
}
}
@@ -3670,7 +3572,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
char *const rn = (char *) malloct(rn_size);
if (rn != NULL) {
hts_log_print(opt, LOG_WARNING, "File has moved from %s%s to %s",
- urladr, urlfil, mov_url);
+ urladr(), urlfil(), mov_url);
if (!opt->mimehtml) {
inplace_escape_uri(mov_url, sizeof(mov_url));
} else {
@@ -3712,33 +3614,30 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
} else if ((r->statuscode == HTTP_PRECONDITION_FAILED)
|| (r->statuscode == HTTP_REQUESTED_RANGE_NOT_SATISFIABLE)
) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
- if (fexist_utf8(liens[ptr]->sav)) {
- remove(liens[ptr]->sav); // Eliminer
+ if (fexist_utf8(heap(ptr)->sav)) {
+ remove(heap(ptr)->sav); // Eliminer
} else {
hts_log_print(opt, LOG_WARNING,
"Unexpected 412/416 error (%s) for %s%s, '%s' could not be found on disk",
- r->msg, urladr, urlfil,
- liens[ptr]->sav != NULL ? liens[ptr]->sav : "");
+ r->msg, urladr(), urlfil(),
+ heap(ptr)->sav != NULL ? heap(ptr)->sav : "");
}
- if (!fexist_utf8(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
+ if (!fexist_utf8(heap(ptr)->sav)) { // Bien éliminé? (sinon on boucle..)
#if HDEBUG
printf("Partial content NOT up-to-date, reget all file for %s\n",
- liens[ptr]->sav);
+ heap(ptr)->sav);
#endif
hts_log_print(opt, LOG_DEBUG, "Partial file reget (%s) for %s%s",
- r->msg, urladr, urlfil);
- // enregistrer le MEME lien (MACRO)
- liens_record(liens[ptr]->adr, liens[ptr]->fil, liens[ptr]->sav, "",
- "");
- if (liens[lien_tot] != NULL) { // OK, pas d'erreur
- liens[lien_tot]->testmode = liens[ptr]->testmode; // mode test?
- liens[lien_tot]->link_import = 0; // pas mode import
- liens[lien_tot]->depth = liens[ptr]->depth;
- liens[lien_tot]->pass2 = max(liens[ptr]->pass2, numero_passe);
- liens[lien_tot]->retry = liens[ptr]->retry;
- liens[lien_tot]->premier = liens[ptr]->premier;
- liens[lien_tot]->precedent = ptr;
- lien_tot++;
+ r->msg, urladr(), urlfil());
+ // enregistrer le MEME lien
+ if (hts_record_link(opt, heap(ptr)->adr, heap(ptr)->fil, heap(ptr)->sav, "", "", NULL)) {
+ heap_top()->testmode = heap(ptr)->testmode; // mode test?
+ heap_top()->link_import = 0; // pas mode import
+ heap_top()->depth = heap(ptr)->depth;
+ heap_top()->pass2 = max(heap(ptr)->pass2, numero_passe);
+ heap_top()->retry = heap(ptr)->retry;
+ heap_top()->premier = heap(ptr)->premier;
+ heap_top()->precedent = ptr;
//
// canceller lien actuel
error = 1;
@@ -3746,15 +3645,13 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
- hts_log_print(opt, LOG_PANIC,
- "Not enough memory, can not re-allocate %d bytes",
- (int) ((add_tab_alloc + 1) * sizeof(lien_url)));
+ hts_log_print(opt, LOG_PANIC, "Not enough memory");
//if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
return 0;
}
} else {
- hts_log_print(opt, LOG_ERROR, "Can not remove old file %s", urlfil);
+ hts_log_print(opt, LOG_ERROR, "Can not remove old file %s", urlfil());
error = 1;
}
@@ -3775,27 +3672,27 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//case -1: can_retry=1; break;
case STATUSCODE_TIMEOUT:
if (opt->hostcontrol) { // timeout et retry épuisés
- if ((opt->hostcontrol & 1) && (liens[ptr]->retry <= 0)) {
- hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr, urlfil);
- host_ban(opt, liens, ptr, lien_tot, sback,
- jump_identification(urladr));
+ if ((opt->hostcontrol & 1) && (heap(ptr)->retry <= 0)) {
+ hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr(), urlfil());
+ host_ban(opt, opt->liens, ptr, opt->lien_tot, sback,
+ jump_identification(urladr()));
hts_log_print(opt, LOG_DEBUG,
- "Info: previous log - link banned: %s%s", urladr,
- urlfil);
+ "Info: previous log - link banned: %s%s", urladr(),
+ urlfil());
} else
can_retry = 1;
} else
can_retry = 1;
break;
case STATUSCODE_SLOW:
- if ((opt->hostcontrol) && (liens[ptr]->retry <= 0)) { // too slow
+ if ((opt->hostcontrol) && (heap(ptr)->retry <= 0)) { // too slow
if (opt->hostcontrol & 2) {
- hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr, urlfil);
- host_ban(opt, liens, ptr, lien_tot, sback,
- jump_identification(urladr));
+ hts_log_print(opt, LOG_DEBUG, "Link banned: %s%s", urladr(), urlfil());
+ host_ban(opt, opt->liens, ptr, opt->lien_tot, sback,
+ jump_identification(urladr()));
hts_log_print(opt, LOG_DEBUG,
- "Info: previous log - link banned: %s%s", urladr,
- urlfil);
+ "Info: previous log - link banned: %s%s", urladr(),
+ urlfil());
} else
can_retry = 1;
} else
@@ -3819,29 +3716,29 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
break;
}
- if (strcmp(liens[ptr]->fil, "/primary") != 0) { // no primary (internal page 0)
- if ((liens[ptr]->retry <= 0) || (!can_retry)) { // retry épuisés (ou retry impossible)
+ if (strcmp(heap(ptr)->fil, "/primary") != 0) { // no primary (internal page 0)
+ if ((heap(ptr)->retry <= 0) || (!can_retry)) { // retry épuisés (ou retry impossible)
if ((opt->retry > 0) && (can_retry)) {
hts_log_print(opt, LOG_ERROR,
"\"%s\" (%d) after %d retries at link %s%s (from %s%s)",
- r->msg, r->statuscode, opt->retry, urladr, urlfil,
- liens[liens[ptr]->precedent]->adr,
- liens[liens[ptr]->precedent]->fil);
+ r->msg, r->statuscode, opt->retry, urladr(), urlfil(),
+ heap(heap(ptr)->precedent)->adr,
+ heap(heap(ptr)->precedent)->fil);
} else {
if (r->statuscode == STATUSCODE_TEST_OK) { // test OK
hts_log_print(opt, LOG_INFO, "Test OK at link %s%s (from %s%s)",
- urladr, urlfil, liens[liens[ptr]->precedent]->adr,
- liens[liens[ptr]->precedent]->fil);
+ urladr(), urlfil(), heap(heap(ptr)->precedent)->adr,
+ heap(heap(ptr)->precedent)->fil);
} else {
- if (strcmp(urlfil, "/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut
+ if (strcmp(urlfil(), "/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut
hts_log_print(opt, LOG_ERROR,
"\"%s\" (%d) at link %s%s (from %s%s)", r->msg,
- r->statuscode, urladr, urlfil,
- liens[liens[ptr]->precedent]->adr,
- liens[liens[ptr]->precedent]->fil);
+ r->statuscode, urladr(), urlfil(),
+ heap(heap(ptr)->precedent)->adr,
+ heap(heap(ptr)->precedent)->fil);
} else {
hts_log_print(opt, LOG_DEBUG, "No robots.txt rules at %s",
- urladr);
+ urladr());
}
}
}
@@ -3850,13 +3747,13 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
// due to the "no connection -> previous restored" hack
// This prevent the engine from wiping all data if the website has been deleted (or moved)
// since last time (which is quite annoying)
- if (liens[ptr]->precedent != 0) {
+ if (heap(ptr)->precedent != 0) {
// ici on teste si on doit enregistrer la page tout de même
if (opt->errpage) {
store_errpage = 1;
}
} else {
- if (strcmp(urlfil, "/robots.txt") != 0) {
+ if (strcmp(urlfil(), "/robots.txt") != 0) {
/*
This is an error caused by a link entered by the user
That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
@@ -3873,33 +3770,29 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
} else { // retry!!
hts_log_print(opt, LOG_NOTICE,
"Retry after error %d (%s) at link %s%s (from %s%s)",
- r->statuscode, r->msg, urladr, urlfil,
- liens[liens[ptr]->precedent]->adr,
- liens[liens[ptr]->precedent]->fil);
+ r->statuscode, r->msg, urladr(), urlfil(),
+ heap(heap(ptr)->precedent)->adr,
+ heap(heap(ptr)->precedent)->fil);
// redemander fichier
- liens_record(urladr, urlfil, savename, "", "");
- if (liens[lien_tot] != NULL) { // OK, pas d'erreur
- liens[lien_tot]->testmode = liens[ptr]->testmode; // mode test?
- liens[lien_tot]->link_import = 0; // pas mode import
- liens[lien_tot]->depth = liens[ptr]->depth;
- liens[lien_tot]->pass2 = max(liens[ptr]->pass2, numero_passe);
- liens[lien_tot]->retry = liens[ptr]->retry - 1; // moins 1 retry!
- liens[lien_tot]->premier = liens[ptr]->premier;
- liens[lien_tot]->precedent = liens[ptr]->precedent;
- lien_tot++;
+ if (hts_record_link(opt, urladr(), urlfil(), savename(), "", "", codebase)) {
+ heap_top()->testmode = heap(ptr)->testmode; // mode test?
+ heap_top()->link_import = 0; // pas mode import
+ heap_top()->depth = heap(ptr)->depth;
+ heap_top()->pass2 = max(heap(ptr)->pass2, numero_passe);
+ heap_top()->retry = heap(ptr)->retry - 1; // moins 1 retry!
+ heap_top()->premier = heap(ptr)->premier;
+ heap_top()->precedent = heap(ptr)->precedent;
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
- hts_log_print(opt, LOG_PANIC,
- "Not enough memory, can not re-allocate %d bytes",
- (int) ((add_tab_alloc + 1) * sizeof(lien_url)));
+ hts_log_print(opt, LOG_PANIC, "Not enough memory");
//if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
return 0;
}
}
} else {
- hts_log_print(opt, LOG_DEBUG, "Info: no robots.txt at %s%s", urladr,
- urlfil);
+ hts_log_print(opt, LOG_DEBUG, "Info: no robots.txt at %s%s", urladr(),
+ urlfil());
}
if (!store_errpage) {
if (r->adr) { // désalloc
@@ -3979,12 +3872,12 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
- HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr);
+ HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
b = 0;
if (!RUN_CALLBACK7
- (opt, loop, sback->lnk, sback->count, b, ptr, lien_tot,
+ (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot,
(int) (time_local() - HTS_STAT.stat_timestart), &HTS_STAT)
|| !back_checkmirror(opt)) {
hts_log_print(opt, LOG_ERROR, "Exit requested by shell or user");
@@ -4044,29 +3937,25 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
// calculer lien et éventuellement modifier addresse/fichier
if (url_savename
- (add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, liens,
- lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) {
+ (add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, opt->liens,
+ opt->lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) {
if (hash_read(hash, add_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja
- // enregistrer lien (MACRO)
- liens_record(add_adr, add_fil, add_sav, "", "");
- if (liens[lien_tot] != NULL) { // OK, pas d'erreur
- liens[lien_tot]->testmode = 0; // mode test?
- liens[lien_tot]->link_import = 0; // mode normal
- liens[lien_tot]->depth = opt->depth;
- liens[lien_tot]->pass2 = max(0, numero_passe);
- liens[lien_tot]->retry = opt->retry;
- liens[lien_tot]->premier = lien_tot;
- liens[lien_tot]->precedent = lien_tot;
- lien_tot++;
+ // enregistrer lien
+ if (hts_record_link(opt, add_adr, add_fil, add_sav, "", "", NULL)) {
+ heap_top()->testmode = 0; // mode test?
+ heap_top()->link_import = 0; // mode normal
+ heap_top()->depth = opt->depth;
+ heap_top()->pass2 = max(0, numero_passe);
+ heap_top()->retry = opt->retry;
+ heap_top()->premier = heap_top_index();
+ heap_top()->precedent = heap_top_index();
//
hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add_adr,
add_fil);
//
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
- hts_log_print(opt, LOG_PANIC,
- "Not enough memory, can not re-allocate %d bytes",
- (int) ((add_tab_alloc + 1) * sizeof(lien_url)));
+ hts_log_print(opt, LOG_PANIC, "Not enough memory");
//if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
return;
@@ -4091,7 +3980,7 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
if (opt->state._hts_setpause
|| back_pluggable_sockets_strict(sback, opt) <= 0) {
// index du lien actuel
- int b = back_index(opt, sback, urladr, urlfil, savename);
+ int b = back_index(opt, sback, urladr(), urlfil(), savename());
int prev = opt->state._hts_in_html_parsing;
if (b < 0)
@@ -4108,11 +3997,11 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
- HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr);
+ HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
if (!RUN_CALLBACK7
- (opt, loop, sback->lnk, sback->count, b, ptr, lien_tot,
+ (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot,
(int) (time_local() - HTS_STAT.stat_timestart), &HTS_STAT)) {
hts_log_print(opt, LOG_ERROR, "Exit requested by shell or user");
*stre->exit_xh_ = 1; // exit requested
@@ -4139,9 +4028,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
int n;
/* This is not supposed to hapen. */
- if (liens[ptr]->pass2 == -1) {
- hts_log_print(opt, LOG_WARNING, "Link is already ready %s%s", urladr,
- urlfil);
+ if (heap(ptr)->pass2 == -1) {
+ hts_log_print(opt, LOG_WARNING, "Link is already ready %s%s", urladr(),
+ urlfil());
}
/* User interaction */
@@ -4152,27 +4041,27 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
ENGINE_SET_CONTEXT();
/* Done while processing user interactions ? */
- if (liens[ptr]->pass2 == -1) {
- hts_log_print(opt, LOG_DEBUG, "Link is now ready %s%s", urladr, urlfil);
+ if (heap(ptr)->pass2 == -1) {
+ hts_log_print(opt, LOG_DEBUG, "Link is now ready %s%s", urladr(), urlfil());
// We are ready
return 2; // goto jump_if_done;
}
// si le fichier n'est pas en backing, le mettre..
- if (!back_exist(str->sback, str->opt, urladr, urlfil, savename)) {
+ if (!back_exist(str->sback, str->opt, urladr(), urlfil(), savename())) {
#if BDEBUG==1
- printf("crash backing: %s%s\n", liens[ptr]->adr, liens[ptr]->fil);
+ printf("crash backing: %s%s\n", heap(ptr)->adr, heap(ptr)->fil);
#endif
if (back_add
- (sback, opt, cache, urladr, urlfil, savename,
- liens[liens[ptr]->precedent]->adr, liens[liens[ptr]->precedent]->fil,
- liens[ptr]->testmode) == -1) {
+ (sback, opt, cache, urladr(), urlfil(), savename(),
+ heap(heap(ptr)->precedent)->adr, heap(heap(ptr)->precedent)->fil,
+ heap(ptr)->testmode) == -1) {
printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",
__LINE__);
#if BDEBUG==1
printf("error while crash adding\n");
#endif
- hts_log_print(opt, LOG_ERROR, "Unexpected backing error for %s%s", urladr,
- urlfil);
+ hts_log_print(opt, LOG_ERROR, "Unexpected backing error for %s%s", urladr(),
+ urlfil());
}
}
@@ -4188,7 +4077,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
if ((n > 0) && (!opt->state._hts_setpause)) { // si sockets libre et pas en pause, ajouter
// remplir autant que l'on peut le cache (backing)
- back_fillmax(sback, opt, cache, liens, ptr, numero_passe, lien_tot);
+ back_fillmax(sback, opt, cache, opt->liens, ptr, numero_passe, opt->lien_tot);
}
// index du lien actuel
{
@@ -4203,7 +4092,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
ENGINE_SET_CONTEXT();
// index du lien actuel
- b = back_index(opt, sback, urladr, urlfil, savename);
+ b = back_index(opt, sback, urladr(), urlfil(), savename());
#if BDEBUG==1
printf("back index %d, waiting\n", b);
#endif
@@ -4216,7 +4105,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
back_wait(sback, opt, cache, HTS_STAT.stat_timestart);
// Continue to the loop if link still present
- b = back_index(opt, sback, urladr, urlfil, savename);
+ b = back_index(opt, sback, urladr(), urlfil(), savename());
if (b < 0)
break;
@@ -4229,10 +4118,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
}
// And fill the backing stack
if (back[b].status > 0)
- back_fillmax(sback, opt, cache, liens, ptr, numero_passe, lien_tot);
+ back_fillmax(sback, opt, cache, opt->liens, ptr, numero_passe, opt->lien_tot);
// Continue to the loop if link still present
- b = back_index(opt, sback, urladr, urlfil, savename);
+ b = back_index(opt, sback, urladr(), urlfil(), savename());
if (b < 0)
break;
@@ -4248,10 +4137,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
(int) ((HTS_STAT.HTS_TOTAL_RECV -
*stre->makestat_total_) / (l - makestat_time)),
(LLint) HTS_STAT.HTS_TOTAL_RECV,
- (int) lien_tot - *stre->makestat_lnk_, (int) lien_tot);
+ (int) opt->lien_tot - *stre->makestat_lnk_, (int) opt->lien_tot);
fflush(makestat_fp);
*stre->makestat_total_ = HTS_STAT.HTS_TOTAL_RECV;
- *stre->makestat_lnk_ = lien_tot;
+ *stre->makestat_lnk_ = heap_top_index();
}
if (stre->maketrack_fp != NULL) {
int i;
@@ -4308,11 +4197,11 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
- HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr);
+ HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
if (!RUN_CALLBACK7
- (opt, loop, sback->lnk, sback->count, b, ptr, lien_tot,
+ (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot,
(int) (time_local() - HTS_STAT.stat_timestart), &HTS_STAT)) {
hts_log_print(opt, LOG_ERROR, "Exit requested by shell or user");
*stre->exit_xh_ = 1; // exit requested
@@ -4377,7 +4266,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
(int) (HTS_STAT.HTS_TOTAL_RECV /
(tl - HTS_STAT.stat_timestart)));
fprintf(fp, "SOCKET %d" LF, back_nsoc(sback));
- fprintf(fp, "LINK %d" LF, lien_tot);
+ fprintf(fp, "LINK %d" LF, opt->lien_tot);
{
LLint mem = 0;
@@ -4410,7 +4299,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
if (b < 0) {
hts_log_print(opt, LOG_DEBUG,
"link #%d is ready, no more on the stack, skipping: %s%s..",
- ptr, urladr, urlfil);
+ ptr, urladr(), urlfil());
// prochain lien
// ptr++;
@@ -4451,10 +4340,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
} else if (opt->verbosedisplay == 1) {
if (b >= 0) {
if (back[b].r.statuscode == HTTP_OK)
- printf("%d/%d: %s%s (" LLintP " bytes) - OK\33[K\r", ptr, lien_tot,
+ printf("%d/%d: %s%s (" LLintP " bytes) - OK\33[K\r", ptr, opt->lien_tot,
back[b].url_adr, back[b].url_fil, (LLint) back[b].r.size);
else
- printf("%d/%d: %s%s (" LLintP " bytes) - %d\33[K\r", ptr, lien_tot,
+ printf("%d/%d: %s%s (" LLintP " bytes) - %d\33[K\r", ptr, opt->lien_tot,
back[b].url_adr, back[b].url_fil, (LLint) back[b].r.size,
back[b].r.statuscode);
} else {
@@ -4570,14 +4459,14 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename with MIME type */
save[0] = '\0';
- url_savename(adr, fil, save, former_adr, former_fil, liens[ptr]->adr,
- liens[ptr]->fil, opt, liens, lien_tot, sback, cache,
+ url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr,
+ heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache,
hash, ptr, numero_passe, &back);
/* Recompute authorization with MIME type */
{
int new_forbidden_url =
- hts_acceptmime(opt, ptr, lien_tot, liens, adr, fil,
+ hts_acceptmime(opt, ptr, opt->lien_tot, opt->liens, adr, fil,
back.r.contenttype);
if (new_forbidden_url != -1) {
hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d",
@@ -4644,14 +4533,14 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename with MIME type */
save[0] = '\0';
- url_savename(adr, fil, save, former_adr, former_fil, liens[ptr]->adr,
- liens[ptr]->fil, opt, liens, lien_tot, sback, cache,
+ url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr,
+ heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache,
hash, ptr, numero_passe, &delayed_back);
/* Recompute authorization with MIME type */
{
int new_forbidden_url =
- hts_acceptmime(opt, ptr, lien_tot, liens, adr, fil,
+ hts_acceptmime(opt, ptr, opt->lien_tot, opt->liens, adr, fil,
delayed_back.r.contenttype);
if (new_forbidden_url != -1) {
hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d",
@@ -4701,7 +4590,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
back_wait(sback, opt, cache, 0);
}
if (ptr >= 0) {
- back_fillmax(sback, opt, cache, liens, ptr, numero_passe, lien_tot);
+ back_fillmax(sback, opt, cache, opt->liens, ptr, numero_passe, opt->lien_tot);
}
// on est obligé d'appeler le shell pour le refresh..
{
@@ -4714,11 +4603,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
- HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr);
+ HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
if (!RUN_CALLBACK7
- (opt, loop, sback->lnk, sback->count, b, ptr, lien_tot,
+ (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot,
(int) (time_local() - HTS_STAT.stat_timestart), &HTS_STAT)) {
return -1;
} else if (opt->state._hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing)
@@ -4801,7 +4690,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
{
int set_prio_to = 0;
- if (hts_acceptlink(opt, ptr, lien_tot, liens, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
+ if (hts_acceptlink(opt, ptr, opt->lien_tot, opt->liens, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
/* Note: the cache 'cached_tests' system will remember this error, and we'll only issue ONE request */
*forbidden_url = 1; /* Forbidden! */
hts_log_print(opt, LOG_DEBUG,
@@ -4829,8 +4718,8 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename for hash lookup */
save[0] = '\0';
url_savename(adr, fil, save, former_adr, former_fil,
- liens[ptr]->adr, liens[ptr]->fil, opt, liens,
- lien_tot, sback, cache, hash, ptr, numero_passe,
+ heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens,
+ opt->lien_tot, sback, cache, hash, ptr, numero_passe,
&delayed_back);
} else {
hts_log_print(opt, LOG_WARNING,
@@ -4848,13 +4737,13 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Recompute filename with MIME type */
save[0] = '\0';
url_savename(adr, fil, save, former_adr, former_fil,
- liens[ptr]->adr, liens[ptr]->fil, opt, liens, lien_tot,
+ heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens, opt->lien_tot,
sback, cache, hash, ptr, numero_passe, &delayed_back);
/* Recompute authorization with MIME type */
{
int new_forbidden_url =
- hts_acceptmime(opt, ptr, lien_tot, liens, adr, fil,
+ hts_acceptmime(opt, ptr, opt->lien_tot, opt->liens, adr, fil,
delayed_back.r.contenttype);
if (new_forbidden_url != -1) {
hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d",