summaryrefslogtreecommitdiff
path: root/src/htsback.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:57:43 +0000
committerXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:57:43 +0000
commit64cc4a88da8887ef1f7f4d90be0158d2cc76222d (patch)
treee72af709fbce8bc495f51e7f0518de9a9a2c3b7f /src/htsback.c
parent844ecc37072d515513177c65a8c9dc35c9cdfc1a (diff)
httrack 3.40.4
Diffstat (limited to 'src/htsback.c')
-rw-r--r--src/htsback.c1294
1 files changed, 893 insertions, 401 deletions
diff --git a/src/htsback.c b/src/htsback.c
index 317d4e7..8a9aac5 100644
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -66,25 +66,91 @@ Please visit our Website: http://www.httrack.com
#else
#endif
+#if HTS_USEMMS
+#include "htsmms.h"
+#endif
+
#undef test_flush
#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } }
#define VT_CLREOL "\33[K"
+struct_back* back_new(int back_max) {
+ int i;
+ struct_back* sback = calloct(1, sizeof(struct_back));
+ sback->count = back_max;
+ sback->lnk = (lien_back*) calloct((back_max + 1), sizeof(lien_back));
+ sback->ready = inthash_new(8191);
+ inthash_value_is_malloc(sback->ready, 1);
+ // init
+ for(i = 0 ; i < sback->count ; i++){
+ sback->lnk[i].r.location = sback->lnk[i].location_buffer;
+ sback->lnk[i].status = -1;
+ sback->lnk[i].r.soc = INVALID_SOCKET;
+ }
+ return sback;
+}
+
+void back_free(struct_back** sback) {
+ if (sback != NULL && *sback != NULL) {
+ if ((*sback)->lnk != NULL) {
+ freet((*sback)->lnk);
+ (*sback)->lnk = NULL;
+ }
+ if ((*sback)->ready != NULL) {
+ inthash_delete((inthash *)&(*sback)->ready);
+ }
+ freet(*sback);
+ *sback = NULL;
+ }
+}
+
+void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback) {
+ if (sback != NULL) {
+ int i;
+ // delete live slots
+ for(i = 0 ; i < sback->count ; i++) {
+ back_delete(opt, cache, sback, i);
+ }
+ // delete stored slots
+ if (sback->ready != NULL) {
+ struct_inthash_enum e = inthash_enum_new((inthash)sback->ready);
+ inthash_chain* item;
+ while((item = inthash_enum_next(&e))) {
+ struct_back back1;
+ back1.count = 1;
+ back1.lnk = (lien_back*) item->value.ptr;
+ back1.ready = NULL;
+ back_delete(opt, cache, &back1, 0);
+ }
+ }
+ }
+}
+
// ---
// routines de backing
+
+static int back_index_ready(struct_back* sback, char* adr, char* fil, char* sav, int getIndex);
+static int back_index_fetch(struct_back* sback, char* adr, char* fil, char* sav, int getIndex);
+
// retourne l'index d'un lien dans un tableau de backing
-int back_index(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
+int back_index(struct_back* sback,char* adr,char* fil,char* sav) {
+ return back_index_fetch(sback, adr, fil, sav, 1);
+}
+
+static int back_index_fetch(struct_back* sback, char* adr, char* fil, char* sav, int getIndex) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int i=0;
int index=-1;
- while( i<back_max ) {
+ while( i < back_max ) {
if (back[i].status>=0) // réception OU prêt
if (strfield2(back[i].url_adr,adr)) {
if (strcmp(back[i].url_fil,fil)==0) {
if (index==-1) /* first time we meet, store it */
index=i;
- else if (strcmp(back[i].url_sav,sav)==0) { /* oops, check sav too */
+ else if (sav != NULL && strcmp(back[i].url_sav, sav) == 0) { /* oops, check sav too */
index=i;
return index;
}
@@ -92,11 +158,83 @@ int back_index(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
}
i++;
}
+ // not found in fast repository - search in the storage hashtable
+ if (index == -1 && sav != NULL) {
+ index = back_index_ready(sback, adr, fil, sav, getIndex);
+ }
+ return index;
+}
+
+static int back_index_ready(struct_back* sback, char* adr, char* fil, char* sav, int getIndex) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ int index=-1;
+ void* ptr = NULL;
+ if (inthash_read_pvoid((inthash)sback->ready, sav, &ptr)) {
+ lien_back* itemback = (lien_back*) ptr;
+ if (itemback != NULL) {
+ if (!getIndex) {
+ return sback->count; // positive (but invalid) result
+ } else {
+ // move from hashtable to fast repository
+ int q = back_search_quick(sback);
+ if (q != -1) {
+ deletehttp(&back[q].r); // security check
+ back_move(itemback, &back[q]);
+ inthash_remove((inthash)sback->ready, sav); // delete item
+ back[q].locked = 1; /* locked */
+ index = q;
+ }
+ }
+ }
+ }
return index;
}
+/* Put all backing entries that are ready in the storage hashtable to spare space and CPU */
+int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ int nclean = 0;
+ int i;
+ for( i = 0 ; i < back_max ; i++ ) {
+ // ready, not locked and suitable
+ if (back[i].status == 0 && back[i].locked == 0
+ && back[i].url_sav[0] != '\0'
+ && strcmp(back[i].url_sav, BACK_ADD_TEST) != 0
+ && !IS_DELAYED_EXT(back[i].url_sav)
+ )
+ {
+ lien_back* itemback = calloct(1, sizeof(lien_back));
+ /* Security check */
+ int checkIndex = back_index_ready(sback, back[i].url_adr, back[i].url_fil, back[i].url_sav, 1);
+ if (checkIndex != -1) {
+ if (opt->log) {
+ fspc(opt->log,"warning");
+ fprintf(opt->log,"engine: unexpected duplicate file entry: %s%s -> %s (%d '%s') / %s%s -> %s (%d '%s')"LF,
+ back[checkIndex].url_adr, back[checkIndex].url_fil, back[checkIndex].url_sav, back[checkIndex].r.statuscode, back[checkIndex].r.msg,
+ back[i].url_adr, back[i].url_fil, back[i].url_sav, back[i].r.statuscode, back[i].r.msg
+ );
+ test_flush;
+ }
+ back_delete(NULL, NULL, sback, checkIndex);
+#ifdef _DEBUG
+ /* This should NOT happend! */
+ { int duplicateEntryInBacklog = 1; assertf(!duplicateEntryInBacklog); }
+#endif
+ }
+ back_move(&back[i], itemback);
+ inthash_add_pvoid((inthash)sback->ready, itemback->url_sav, itemback);
+ nclean++;
+ }
+ }
+ return nclean;
+}
+
// nombre d'entrées libres dans le backing
-int back_available(lien_back* back,int back_max) {
+int back_available(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int i;
int nb=0;
for(i=0;i<back_max;i++)
@@ -106,23 +244,61 @@ int back_available(lien_back* back,int back_max) {
}
// retourne estimation de la taille des html et fichiers stockés en mémoire
-LLint back_incache(lien_back* back,int back_max) {
+LLint back_incache(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int i;
LLint sum=0;
for(i=0;i<back_max;i++)
if (back[i].status!=-1)
if (back[i].r.adr) // ne comptabilier que les blocs en mémoire
sum+=max(back[i].r.size,back[i].r.totalsize);
+ // stored (ready) slots
+ if (sback->ready != NULL) {
+ struct_inthash_enum e = inthash_enum_new((inthash)sback->ready);
+ inthash_chain* item;
+ while((item = inthash_enum_next(&e))) {
+ lien_back* ritem = (lien_back*) item->value.ptr;
+ if (ritem->status!=-1)
+ if (ritem->r.adr) // ne comptabilier que les blocs en mémoire
+ sum+=max(ritem->r.size,ritem->r.totalsize);
+ }
+ }
return sum;
}
+// retourne estimation de la taille des html et fichiers stockés en mémoire
+int back_done_incache(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ int i;
+ int n=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].status==0)
+ n++;
+ // stored (ready) slots
+ if (sback->ready != NULL) {
+ struct_inthash_enum e = inthash_enum_new((inthash)sback->ready);
+ inthash_chain* item;
+ while((item = inthash_enum_next(&e))) {
+ lien_back* ritem = (lien_back*) item->value.ptr;
+ if (ritem->status==0)
+ n++;
+ }
+ }
+ return n;
+}
+
+
// le lien a-t-il été mis en backing?
-HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
- return (back_index(back,back_max,adr,fil,sav)>=0);
+HTS_INLINE int back_exist(struct_back* sback,char* adr,char* fil,char* sav) {
+ return (back_index_fetch(sback, adr, fil, sav, /*don't fetch*/0) >= 0);
}
// nombre de sockets en tâche de fond
-int back_nsoc(lien_back* back,int back_max) {
+int back_nsoc(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int n=0;
int i;
for(i=0;i<back_max;i++)
@@ -131,7 +307,9 @@ int back_nsoc(lien_back* back,int back_max) {
return n;
}
-int back_nsoc_overall(lien_back* back,int back_max) {
+int back_nsoc_overall(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int n=0;
int i;
for(i=0;i<back_max;i++)
@@ -145,250 +323,328 @@ int back_nsoc_overall(lien_back* back,int back_max) {
//
// fermer les paramètres de transfert,
// et notamment vérifier les fichiers compressés (décompresser), callback etc.
-int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
- /* Don't store broken files */
- if (back[p].r.totalsize > 0 && back[p].r.size != back[p].r.totalsize && ! opt->tolerant) {
- return -1;
- }
-
- /* Store ? */
- if (!back[p].finalized) {
- back[p].finalized = 1;
- if (
- (back[p].status == 0) // ready
- &&
- (back[p].r.statuscode>0) // not internal error
- ) {
- if (!back[p].testmode) { // not test mode
- char* state="unknown";
-
- /* décompression */
+int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
+
+ /* Store ? */
+ if (!back[p].finalized) {
+ back[p].finalized = 1;
+
+ /* Don't store broken files */
+ if (back[p].r.totalsize > 0 && back[p].r.size != back[p].r.totalsize && ! opt->tolerant) {
+ return -1;
+ }
+
+ if (
+ (back[p].status == 0) // ready
+ &&
+ (back[p].r.statuscode>0) // not internal error
+ )
+ {
+ if (!back[p].testmode) { // not test mode
+ char* state="unknown";
+
+ /* décompression */
#if HTS_USEZLIB
- if (gz_is_available && back[p].r.compressed) {
- if (back[p].r.size > 0) {
- //if ( (back[p].r.adr) && (back[p].r.size>0) ) {
- // stats
- back[p].compressed_size=back[p].r.size;
- // en mémoire -> passage sur disque
- if (!back[p].r.is_write) {
- back[p].tmpfile_buffer[0]='\0';
- back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
- if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
- back[p].r.out=fopen(back[p].tmpfile,"wb");
- if (back[p].r.out) {
- if ((back[p].r.adr) && (back[p].r.size>0)) {
- if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
- back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Write error when decompressing");
- }
- } else {
- back[p].tmpfile[0]='\0';
- back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Empty compressed file");
- }
- } else {
- back[p].tmpfile[0]='\0';
- back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Open error when decompressing");
- }
- }
- }
- // fermer fichier sortie
- if (back[p].r.out!=NULL) {
- fclose(back[p].r.out);
- back[p].r.out=NULL;
- }
- // décompression
- if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) {
- LLint size;
- filecreateempty(back[p].url_sav); // filenote & co
- if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
- back[p].r.size=back[p].r.totalsize=size;
- // fichier -> mémoire
- if (!back[p].r.is_write) {
- deleteaddr(&back[p].r);
- back[p].r.adr=readfile(back[p].url_sav);
- if (!back[p].r.adr) {
- back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Read error when decompressing");
- }
- remove(back[p].url_sav);
- }
- }
- remove(back[p].tmpfile);
- }
- // stats
- HTS_STAT.total_packed+=back[p].compressed_size;
- HTS_STAT.total_unpacked+=back[p].r.size;
- HTS_STAT.total_packedfiles++;
- // unflag
- }
- }
- back[p].r.compressed=0;
-#endif
-
- /* Stats */
- if (cache->txt) {
- char flags[32];
- char s[256];
- time_t tt;
- struct tm* A;
- tt=time(NULL);
- A=localtime(&tt);
- if (A == NULL) {
- int localtime_returned_null=0;
- assert(localtime_returned_null);
- }
- strftime(s,250,"%H:%M:%S",A);
-
- flags[0]='\0';
- /* input flags */
- if (back[p].is_update)
- strcatbuff(flags, "U"); // update request
- else
- strcatbuff(flags, "-");
- if (back[p].range_req_size)
- strcatbuff(flags, "R"); // range request
- else
- strcatbuff(flags, "-");
- /* state flags */
- if (back[p].r.is_file) // direct to disk
- strcatbuff(flags, "F");
- else
- strcatbuff(flags, "-");
- /* output flags */
- if (!back[p].r.notmodified)
- strcatbuff(flags, "M"); // modified
- else
- strcatbuff(flags, "-");
- if (back[p].r.is_chunk) // chunked
- strcatbuff(flags, "C");
- else
- strcatbuff(flags, "-");
- if (back[p].r.compressed)
- strcatbuff(flags, "Z"); // gzip
- else
- strcatbuff(flags, "-");
- /* Err I had to split these.. */
- fprintf(cache->txt,"%s\t", s);
- fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size);
- fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize);
- fprintf(cache->txt,"\t%s\t",flags);
- }
- if (back[p].r.statuscode==200) {
- if (back[p].r.size>=0) {
- if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) {
- HTS_STAT.stat_bytes+=back[p].r.size;
- HTS_STAT.stat_files++;
- }
- if ( (!back[p].r.notmodified) && (opt->is_update) ) {
- HTS_STAT.stat_updated_files++; // page modifiée
- if (opt->log!=NULL) {
- fspc(opt->log,"info");
- if (back[p].is_update) {
- fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
- } else {
- fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
- }
- test_flush;
- }
- if (cache->txt) {
- if (back[p].is_update) {
- state="updated";
- } else {
- state="added";
- }
- }
- } else {
- if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
- test_flush;
- }
- if (cache->txt) {
- if (opt->is_update)
- state="untouched";
- else
- state="added";
- }
- }
- } else {
- if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
- test_flush;
- }
- if (cache->txt) {
- state="empty";
- }
- }
- } else {
- if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
- }
- if (cache->txt) {
- state="error";
- }
- }
- if (cache->txt) {
- fprintf(cache->txt,
- "%d\t"
- "%s ('%s')\t"
- "%s\t"
- "%s%s\t"
- "%s%s\t%s\t"
- "(from %s%s)"
- LF,
- back[p].r.statuscode,
- state, escape_check_url_addr(back[p].r.msg),
- escape_check_url_addr(back[p].r.contenttype),
- ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)),
- escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav),
- escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil)
- );
- if (opt->flush)
- fflush(cache->txt);
- }
-
- /* Cache */
- cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
-
- // status finished callback
+ if (gz_is_available && back[p].r.compressed) {
+ if (back[p].r.size > 0) {
+ //if ( (back[p].r.adr) && (back[p].r.size>0) ) {
+ // stats
+ back[p].compressed_size=back[p].r.size;
+ // en mémoire -> passage sur disque
+ if (!back[p].r.is_write) {
+ back[p].tmpfile_buffer[0]='\0';
+ back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
+ if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
+ back[p].r.out=fopen(back[p].tmpfile,"wb");
+ if (back[p].r.out) {
+ if ((back[p].r.adr) && (back[p].r.size>0)) {
+ if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
+ back[p].r.statuscode=STATUSCODE_INVALID;
+ strcpybuff(back[p].r.msg,"Write error when decompressing");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
+ back[p].r.statuscode=STATUSCODE_INVALID;
+ strcpybuff(back[p].r.msg,"Empty compressed file");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
+ back[p].r.statuscode=STATUSCODE_INVALID;
+ strcpybuff(back[p].r.msg,"Open error when decompressing");
+ }
+ }
+ }
+ // fermer fichier sortie
+ if (back[p].r.out!=NULL) {
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+ // décompression
+ if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
+ if (back[p].url_sav[0]) {
+ LLint size;
+ file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 1, 1, back[p].r.notmodified);
+ filecreateempty(back[p].url_sav); // filenote & co
+ if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
+ back[p].r.size=back[p].r.totalsize=size;
+ // fichier -> mémoire
+ if (!back[p].r.is_write) {
+ deleteaddr(&back[p].r);
+ back[p].r.adr=readfile(back[p].url_sav);
+ if (!back[p].r.adr) {
+ back[p].r.statuscode=STATUSCODE_INVALID;
+ strcpybuff(back[p].r.msg,"Read error when decompressing");
+ }
+ unlink(back[p].url_sav);
+ }
+ }
+ }
+ /* encore that no remaining temporary file exists */
+ unlink(back[p].tmpfile);
+ back[p].tmpfile = NULL;
+ }
+ // stats
+ HTS_STAT.total_packed+=back[p].compressed_size;
+ HTS_STAT.total_unpacked+=back[p].r.size;
+ HTS_STAT.total_packedfiles++;
+ // unflag
+ }
+ }
+ back[p].r.compressed=0;
+#endif
+
+ /* Write mode to disk */
+ if (back[p].r.is_write && back[p].r.adr != NULL) {
+ freet(back[p].r.adr);
+ back[p].r.adr = NULL;
+ }
+
+ /* ************************************************************************
+ REAL MEDIA HACK
+ Check if we have to load locally the file
+ ************************************************************************ */
+ if (back[p].r.statuscode == 200) { // OK (ou 304 en backing)
+ if (back[p].r.is_write) { // Written file
+ if (may_be_hypertext_mime(back[p].r.contenttype, back[p].url_fil)) { // to parse!
+ LLint sz;
+ sz=fsize(back[p].url_sav);
+ if (sz>0) { // ok, exists!
+ if (sz < 8192) { // ok, small file --> to parse!
+ FILE* fp=fopen(back[p].url_sav,"rb");
+ if (fp) {
+ back[p].r.adr=malloct((int)sz + 2);
+ if (back[p].r.adr) {
+ if (fread(back[p].r.adr,1,(INTsys)sz,fp) == sz) {
+ back[p].r.size=sz;
+ back[p].r.adr[sz] = '\0';
+ back[p].r.is_write = 0; /* not anymore a direct-to-disk file */
+ } else {
+ freet(back[p].r.adr);
+ back[p].r.size=0;
+ back[p].r.adr = NULL;
+ back[p].r.statuscode=STATUSCODE_INVALID;
+ strcpybuff(back[p].r.msg, ".RAM read error");
+ }
+ fclose(fp);
+ fp=NULL;
+ // remove (temporary) file!
+ unlink(fconv(back[p].url_sav));
+ }
+ if (fp)
+ fclose(fp);
+ }
+ }
+ }
+ }
+ }
+ }
+ /* EN OF REAL MEDIA HACK */
+
+
+ /* Stats */
+ if (cache->txt) {
+ char flags[32];
+ char s[256];
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ if (A == NULL) {
+ int localtime_returned_null=0;
+ assert(localtime_returned_null);
+ }
+ strftime(s,250,"%H:%M:%S",A);
+
+ flags[0]='\0';
+ /* input flags */
+ if (back[p].is_update)
+ strcatbuff(flags, "U"); // update request
+ else
+ strcatbuff(flags, "-");
+ if (back[p].range_req_size)
+ strcatbuff(flags, "R"); // range request
+ else
+ strcatbuff(flags, "-");
+ /* state flags */
+ if (back[p].r.is_file) // direct to disk
+ strcatbuff(flags, "F");
+ else
+ strcatbuff(flags, "-");
+ /* output flags */
+ if (!back[p].r.notmodified)
+ strcatbuff(flags, "M"); // modified
+ else
+ strcatbuff(flags, "-");
+ if (back[p].r.is_chunk) // chunked
+ strcatbuff(flags, "C");
+ else
+ strcatbuff(flags, "-");
+ if (back[p].r.compressed)
+ strcatbuff(flags, "Z"); // gzip
+ else
+ strcatbuff(flags, "-");
+ /* Err I had to split these.. */
+ fprintf(cache->txt,"%s\t", s);
+ fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size);
+ fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize);
+ fprintf(cache->txt,"\t%s\t",flags);
+ }
+ if (back[p].r.statuscode == 200) {
+ if (back[p].r.size>=0) {
+ if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) {
+ HTS_STAT.stat_bytes+=back[p].r.size;
+ HTS_STAT.stat_files++;
+ }
+ if ( (!back[p].r.notmodified) && (opt->is_update) ) {
+ HTS_STAT.stat_updated_files++; // page modifiée
+ if (opt->log!=NULL) {
+ fspc(opt->log,"info");
+ if (back[p].is_update) {
+ fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ } else {
+ fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ }
+ test_flush;
+ }
+ if (cache->txt) {
+ if (back[p].is_update) {
+ state="updated";
+ } else {
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ test_flush;
+ }
+ if (cache->txt) {
+ if (opt->is_update)
+ state="untouched";
+ else
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ if (cache->txt) {
+ state="empty";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
+ }
+ if (cache->txt) {
+ state="error";
+ }
+ }
+ if (cache->txt) {
+ fprintf(cache->txt,
+ "%d\t"
+ "%s ('%s')\t"
+ "%s\t"
+ "%s%s\t"
+ "%s%s%s\t%s\t"
+ "(from %s%s%s)"
+ LF,
+ back[p].r.statuscode,
+ state, escape_check_url_addr(back[p].r.msg),
+ escape_check_url_addr(back[p].r.contenttype),
+ ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)),
+ (link_has_authority(back[p].url_adr) ? "" : "http://"),escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav),
+ (link_has_authority(back[p].referer_adr) || !back[p].referer_adr[0]) ? "" : "http://",escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil)
+ );
+ if (opt->flush)
+ fflush(cache->txt);
+ }
+
+ /* Cache */
+ if (!IS_DELAYED_EXT(back[p].url_sav)) {
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ } else {
+ if (!HTTP_IS_OK(back[p].r.statuscode)) {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"redirect to %s%s"LF,back[p].url_adr,back[p].url_fil);
+ }
+ /* Store only header reference */
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL);
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"file not stored in cache due to bogus state (incomplete type): %s%s"LF,back[p].url_adr,back[p].url_fil);
+ }
+ }
+ }
+
+ // status finished callback
#if HTS_ANALYSTE
- hts_htmlcheck_xfrstatus(&back[p]);
-#endif
- return 0;
- } else { // testmode
- if (back[p].r.statuscode / 100 >= 3) { /* Store 3XX, 4XX, 5XX test response codes, but NOT 2XX */
- /* Cache */
- cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL);
- }
- }
- }
- }
- return -1;
+ if (hts_htmlcheck_xfrstatus != NULL) {
+ hts_htmlcheck_xfrstatus(&back[p]);
+ }
+#endif
+ return 0;
+ } else { // testmode
+ if (back[p].r.statuscode / 100 >= 3) { /* Store 3XX, 4XX, 5XX test response codes, but NOT 2XX */
+ /* Cache */
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL);
+ }
+ }
+ }
+ }
+ return -1;
}
/* try to keep the connection alive */
-int back_letlive(httrackp* opt, cache_back* cache, lien_back* back, int p) {
- int checkerror;
- htsblk* src = &back[p].r;
- if (src && !src->is_file
- && src->soc != INVALID_SOCKET
- && src->statuscode >= 0 /* no timeout errors & co */
- && src->keep_alive_trailers == 0 /* not yet supported (chunk trailers) */
- && ! ( checkerror = check_sockerror(src->soc) )
- /*&& !check_sockdata(src->soc)*/ /* no unexpected data */
- ) {
- htsblk tmp;
- memset(&tmp, 0, sizeof(tmp));
- /* clear everything but connection: switch, close, and reswitch */
- back_connxfr(src, &tmp);
- back_delete(opt, cache, back, p);
- //deletehttp(src);
- back_connxfr(&tmp, src);
- src->req.flush_garbage=1; /* ignore CRLF garbage */
- return 1;
- }
- return 0;
+int back_letlive(httrackp* opt, cache_back* cache, struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ int checkerror;
+ htsblk* src = &back[p].r;
+ assertf(p >= 0 && p < back_max);
+ if (src && !src->is_file
+ && src->soc != INVALID_SOCKET
+ && src->statuscode >= 0 /* no timeout errors & co */
+ && src->keep_alive_trailers == 0 /* not yet supported (chunk trailers) */
+ && ! ( checkerror = check_sockerror(src->soc) )
+ /*&& !check_sockdata(src->soc)*/ /* no unexpected data */
+ ) {
+ htsblk tmp;
+ memset(&tmp, 0, sizeof(tmp));
+ /* clear everything but connection: switch, close, and reswitch */
+ back_connxfr(src, &tmp);
+ back_delete(opt, cache, sback, p);
+ //deletehttp(src);
+ back_connxfr(&tmp, src);
+ src->req.flush_garbage=1; /* ignore CRLF garbage */
+ return 1;
+ }
+ return 0;
}
void back_connxfr(htsblk* src, htsblk* dst) {
@@ -410,9 +666,34 @@ void back_connxfr(htsblk* src, htsblk* dst) {
src->debugid = 0;
}
+void back_move(lien_back* src, lien_back* dst) {
+ memcpy(dst, src, sizeof(lien_back));
+ memset(src, 0, sizeof(lien_back));
+ src->r.soc=INVALID_SOCKET;
+ src->status=-1;
+ src->r.location = src->location_buffer;
+ dst->r.location = dst->location_buffer;
+}
+
+void back_copy_static(const lien_back* src, lien_back* dst) {
+ memcpy(dst, src, sizeof(lien_back));
+ dst->r.soc=INVALID_SOCKET;
+ dst->r.adr = NULL;
+ dst->r.headers = NULL;
+ dst->r.out = NULL;
+ dst->r.location = dst->location_buffer;
+ dst->r.fp = NULL;
+#if HTS_USEOPENSSL
+ dst->r.ssl_con = NULL;
+#endif
+}
+
// clear, or leave for keep-alive
-int back_maydelete(httrackp* opt,cache_back* cache,lien_back* back, int p) {
- if (p>=0) { // on sait jamais..
+int back_maydelete(httrackp* opt,cache_back* cache,struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
+ if (p >= 0 && p < back_max) { // on sait jamais..
if (
/* Keep-alive authorized by user */
!opt->nokeepalive
@@ -429,7 +710,7 @@ int back_maydelete(httrackp* opt,cache_back* cache,lien_back* back, int p) {
) {
lien_back tmp;
strcpybuff(tmp.url_adr, back[p].url_adr);
- if (back_letlive(opt, cache, back, p)) {
+ if (back_letlive(opt, cache, sback, p)) {
strcpybuff(back[p].url_adr, tmp.url_adr);
back[p].status = -103; // alive & waiting
if ((opt->debug>1) && (opt->log!=NULL)) {
@@ -440,14 +721,17 @@ int back_maydelete(httrackp* opt,cache_back* cache,lien_back* back, int p) {
return 1;
}
}
- back_delete(opt,cache,back, p);
+ back_delete(opt,cache,sback, p);
}
return 0;
}
// clear, or leave for keep-alive
-void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int back_max, int p) {
+void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
TStamp lt = 0;
+ assertf(p >= 0 && p < back_max);
if (back[p].r.soc!=INVALID_SOCKET) {
int q;
if (
@@ -470,7 +754,7 @@ void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int b
/* Connection delay must not exceed keep-alive timeout */
&& ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) )
/* Available slot in backing */
- && ( q = back_search(opt, cache, back, back_max) ) >= 0
+ && ( q = back_search(opt, cache, sback) ) >= 0
)
{
lien_back tmp;
@@ -495,13 +779,16 @@ void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int b
/* attempt to attach a live connection to this slot */
-int back_trylive(httrackp* opt,cache_back* cache,lien_back* back, int back_max, int p) {
+int back_trylive(httrackp* opt,cache_back* cache,struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
if (p>=0 && back[p].status != -103) { // we never know..
- int i = back_searchlive(opt,back, back_max, back[p].url_adr); // search slot
+ int i = back_searchlive(opt,sback, back[p].url_adr); // search slot
if (i >= 0 && i != p) {
deletehttp(&back[p].r); // security check
back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p
- back_delete(opt,cache,back, i); // delete old slot
+ back_delete(opt,cache,sback, i); // delete old slot
back[p].status=100; // ready to connect
return 1; // success: will reuse live connection
}
@@ -510,7 +797,9 @@ int back_trylive(httrackp* opt,cache_back* cache,lien_back* back, int back_max,
}
/* search for a live position, or, if not possible, try to return a new one */
-int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr) {
+int back_searchlive(httrackp* opt, struct_back* sback, char* search_addr) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int i;
/* search for a live socket */
@@ -526,7 +815,9 @@ int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_a
return -1;
}
-int back_search(httrackp* opt,cache_back* cache,lien_back* back, int back_max) {
+int back_search_quick(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int i;
/* try to find an empty place */
@@ -536,11 +827,24 @@ int back_search(httrackp* opt,cache_back* cache,lien_back* back, int back_max) {
}
}
+ /* oops, can't find a place */
+ return -1;
+}
+
+int back_search(httrackp* opt,cache_back* cache,struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ int i;
+
+ /* try to find an empty place */
+ if ( ( i = back_search_quick(sback) ) != -1)
+ return i;
+
/* couldn't find an empty place, try to requisition a keep-alive place */
for(i = 0 ; i < back_max ; i++ ) {
if (back[i].status == -103) {
/* close this place */
- back_delete(opt,cache,back, i);
+ back_delete(opt,cache,sback, i);
return i;
}
}
@@ -549,17 +853,84 @@ int back_search(httrackp* opt,cache_back* cache,lien_back* back, int back_max) {
return -1;
}
+void back_set_finished(struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
+ if (p >= 0 && p < sback->count) { // we never know..
+ /* status: finished (waiting to be validated) */
+ back[p].status=0; /* finished */
+ /* close open r/w streams, if any */
+ if (back[p].r.fp!=NULL) {
+ fclose(back[p].r.fp);
+ back[p].r.fp=NULL;
+ }
+ if (back[p].r.out!=NULL) { // fermer fichier sortie
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+ }
+}
+
+int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
+ if (p >= 0 && p < sback->count) { // on sait jamais..
+ /* close input file */
+ if (back[p].r.fp!=NULL) {
+ fclose(back[p].r.fp);
+ back[p].r.fp=NULL;
+ }
+ /* fichier de sortie */
+ if (back[p].r.out!=NULL) { // fermer fichier sortie
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+ /* set file time */
+ if (back[p].r.is_write) { // ecriture directe
+ /* écrire date "remote" */
+ if (strnotempty(back[p].url_sav)
+ && strnotempty(back[p].r.lastmodified)
+ && fexist(back[p].url_sav)) // normalement existe si on a un fichier de sortie
+ {
+ set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified);
+ }
+ /* executer commande utilisateur après chargement du fichier */
+ //xx usercommand(opt,0,NULL,back[p].url_sav, back[p].url_adr, back[p].url_fil);
+ back[p].r.is_write=0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
// effacer entrée
-int back_delete(httrackp* opt, cache_back* cache, lien_back* back, int p) {
- if (p>=0) { // on sait jamais..
+int back_set_passe2_ptr(httrackp* opt, cache_back* cache, struct_back* sback, int p, int* pass2_ptr) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
+ if (p >= 0 && p < sback->count) { // on sait jamais..
+ back[p].pass2_ptr = pass2_ptr;
+ return 1;
+ }
+ return 0;
+}
+
+// effacer entrée
+int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(p >= 0 && p < back_max);
+ if (p >= 0 && p < sback->count) { // on sait jamais..
// Vérificateur d'intégrité
- #if DEBUG_CHECKINT
+#if DEBUG_CHECKINT
_CHECKINT(&back[p],"Appel back_delete")
- #endif
+#endif
#if HTS_DEBUG_CLOSESOCK
- DEBUG_W("back_delete: #%d\n" _ (int) p);
+ DEBUG_W("back_delete: #%d\n" _ (int) p);
#endif
-
+
// Finalize
if (!back[p].finalized) {
if (
@@ -569,13 +940,18 @@ int back_delete(httrackp* opt, cache_back* cache, lien_back* back, int p) {
&&
(back[p].r.statuscode>0) // not internal error
) {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush;
+ if (opt != NULL && opt->debug>1 && opt->log!=NULL) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush;
+ }
+ }
+ if (cache != NULL) {
+ back_finalize(opt, cache, sback, p);
}
- }
- back_finalize(opt, cache, back, p);
}
back[p].finalized = 0;
+
+ // flush output buffers
+ (void) back_flush_output(opt, cache, sback, p);
// Libérer tous les sockets, handles, buffers..
if (back[p].r.soc!=INVALID_SOCKET) {
@@ -597,12 +973,12 @@ int back_delete(httrackp* opt, cache_back* cache, lien_back* back, int p) {
back[p].chunk_blocksize=0;
back[p].is_chunk=0;
}
- // if (back[p].r.is_file) { // fermer fichier entrée
- if (back[p].r.fp!=NULL) {
- fclose(back[p].r.fp);
- back[p].r.fp=NULL;
- }
- // }
+
+ // only for security
+ if (back[p].tmpfile && back[p].tmpfile[0] != '\0') {
+ (void) unlink(back[p].tmpfile);
+ back[p].tmpfile = NULL;
+ }
// headers
if (back[p].r.headers != NULL) {
@@ -610,36 +986,21 @@ int back_delete(httrackp* opt, cache_back* cache, lien_back* back, int p) {
back[p].r.headers = NULL;
}
- /* fichier de sortie */
- if (back[p].r.out!=NULL) { // fermer fichier sortie
- fclose(back[p].r.out);
- back[p].r.out=NULL;
- }
-
- if (back[p].r.is_write) { // ecriture directe
- /* écrire date "remote" */
- if (strnotempty(back[p].url_sav)) // normalement existe si on a un fichier de sortie
- if (strnotempty(back[p].r.lastmodified)) // last-modified existe
- if (fexist(back[p].url_sav)) // ainsi que le fichier
- set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified);
-
- /* executer commande utilisateur après chargement du fichier */
- //xx usercommand(opt,0,NULL,back[p].url_sav, back[p].url_adr, back[p].url_fil);
- back[p].r.is_write=0;
- }
-
// Tout nettoyer
memset(&back[p], 0, sizeof(lien_back));
back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
// Le plus important: libérer le champ
back[p].status=-1;
+ return 1;
}
return 0;
}
/* Space left on backing stack */
-int back_stack_available(lien_back* back,int back_max) {
+int back_stack_available(struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int p=0,n=0;
for( ; p < back_max ; p++ )
if ( back[p].status == -1 )
@@ -648,7 +1009,19 @@ int back_stack_available(lien_back* back,int back_max) {
}
// ajouter un lien en backing
-int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) {
+int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) {
+ int index = back_index(sback, adr, fil, save);
+ if (index < 0) {
+ return back_add(sback, opt, cache, adr, fil, save, referer_adr, referer_fil, test, pass2_ptr);
+ } else {
+ /* Ensure that the reference to pass2_ptr is set */
+ return back_set_passe2_ptr(opt,cache,sback,index,pass2_ptr);
+ }
+}
+
+int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int p=0;
// vérifier cohérence de adr et fil (non vide!)
@@ -670,8 +1043,8 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
opt->state.back_add_stats++;
// rechercher emplacement
- back_clean(opt, cache, back, back_max);
- if ( ( p = back_search(opt, cache, back, back_max) ) >= 0) {
+ back_clean(opt, cache, sback);
+ if ( ( p = back_search(opt, cache, sback) ) >= 0) {
back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur
// clear r
@@ -719,9 +1092,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
/* Stop requested - abort backing */
if (opt->state.stop) {
- back[p].r.statuscode=-1; // fatal
+ back[p].r.statuscode=STATUSCODE_INVALID; // fatal
strcpybuff(back[p].r.msg,"mirror stopped by user");
back[p].status=0; // terminé
+ back_set_finished(sback, p);
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush;
}
@@ -751,8 +1125,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
if ((strcmp(adr,"file://")) /* pas fichier */
&& ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */
&& ( (strnotempty(save)) || (strcmp(fil,"/robots.txt")==0) ) ) { // si en test on ne doit pas utiliser le cache sinon telescopage avec le 302..
- //if ((!test) && (strcmp(adr,"file://"))
- //if ((!test) && (strncmp(adr,"ftp://",6)) && (strcmp(adr,"file://"))
#if HTS_FAST_CACHE
long int hash_pos;
int hash_pos_return=0;
@@ -788,7 +1160,8 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
sscanf(a,"%d",&pos); // lire position
#endif
if (pos<0) { // pas de mise en cache data, vérifier existence
- if (fsize(fconv(save)) <= 0) { // fichier existe pas ou est vide!
+ /* note: no check with IS_DELAYED_EXT() enabled - postcheck by client please! */
+ if (!IS_DELAYED_EXT(save) && fsize(fconv(save)) <= 0) { // fichier existe pas ou est vide!
int found=0;
/* It is possible that the file has been moved due to changes in build structure */
@@ -830,7 +1203,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
}
}
- }
+ } // fsize() <= 0
}
}
}
@@ -863,7 +1236,8 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
if (!back_checksize(opt,&back[p],0)) {
back[p].status=0; // FINI
- back[p].r.statuscode=-1;
+ back_set_finished(sback, p);
+ back[p].r.statuscode=STATUSCODE_TOO_BIG;
if (!back[p].testmode)
strcpybuff(back[p].r.msg,"Cached file skipped (too big)");
else
@@ -872,7 +1246,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
}
- if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
+ if (back[p].r.statuscode != -1 || IS_DELAYED_EXT(save)) { // pas d'erreur de lecture ou test retardé
if ((opt->debug>0) && (opt->log!=NULL)) {
if (!test) {
fspc(opt->log,"debug"); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
@@ -882,11 +1256,13 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
back[p].r.notmodified=1; // fichier non modifié
back[p].status=0; // OK prêt
+ //file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); // not modified
+ back_set_finished(sback, p);
// finalize transfer
if (!test) {
if (back[p].r.statuscode>0) {
- back_finalize(opt,cache,back,p);
+ back_finalize(opt,cache,sback,p);
}
}
@@ -906,9 +1282,11 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
LLint save_totalsize=back[p].r.totalsize;
back[p].r.totalsize=r.totalsize;
if (!back_checksize(opt,&back[p],1)) {
- r.statuscode = -1;
+ r.statuscode = STATUSCODE_INVALID;
//
back[p].status=0; // FINI
+ back_set_finished(sback, p);
+ back[p].r.statuscode=STATUSCODE_TOO_BIG;
deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET;
if (!back[p].testmode)
strcpybuff(back[p].r.msg,"File too big");
@@ -1010,8 +1388,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
} else if (opt->norecatch) { // tester norecatch
filenote(save,NULL); // ne pas purger tout de même
+ file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified);
back[p].status=0; // OK prêt
- back[p].r.statuscode=-1; // erreur
+ back_set_finished(sback, p);
+ back[p].r.statuscode=STATUSCODE_INVALID; // erreur
strcpybuff(back[p].r.msg,"Null-size file not recaught");
return 0;
}
@@ -1072,6 +1452,25 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
return 0;
}
}
+#if HTS_USEMMS
+ else if (strfield(back[p].url_adr,"mms://")) {
+ MMSDownloadStruct str;
+ if (back[p].testmode) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with mms link for back_add"LF);
+ }
+ return -1; // erreur pas de test permis
+ }
+ if (back[p].r.req.proxy.active) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"warning: direct connection for mms links (proxy settings ignored)"LF);
+ }
+ back[p].status=1000; // connexion externe
+ str.pBack = &back[p];
+ str.pOpt = opt;
+ launch_mms(&str);
+ return 0;
+ }
+#endif
#if HTS_USEOPENSSL
else if (SSL_is_available && strfield(back[p].url_adr,"https://")) { // let's rock
back[p].r.ssl = 1;
@@ -1080,7 +1479,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
#endif
- if (!back_trylive(opt, cache, back, back_max, p)) {
+ if (!back_trylive(opt, cache, sback, p)) {
#if HTS_XGETHOST
#if HDEBUG
printf("back_solve..\n");
@@ -1095,6 +1494,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
if (soc==INVALID_SOCKET) {
back[p].status=0; // fini, erreur
+ back_set_finished(sback, p);
}
}
//
@@ -1161,6 +1561,7 @@ printf("Xfopen ok, poll..\n");
#else
if (soc==INVALID_SOCKET) { // erreur socket
back[p].status=0; // FINI
+ back_set_finished(sback, p);
//if (back[p].soc!=INVALID_SOCKET) deletehttp(back[p].soc);
back[p].r.soc=INVALID_SOCKET;
} else {
@@ -1275,7 +1676,12 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) {
// si c'est un fichier, la résolution est immédiate
// idem pour ftp://
void back_solve(lien_back* back) {
- if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://"))) {
+ if ((!strfield(back->url_adr,"file://"))
+ && ! strfield(back->url_adr,"ftp://")
+#if HTS_USEMMS
+ && ! strfield(back->url_adr,"mms://")
+#endif
+ ) {
//## if (back->url_adr[0]!=lOCAL_CHAR) { // qq chose à préparer
char* a;
if (!(back->r.req.proxy.active))
@@ -1318,7 +1724,12 @@ void back_solve(lien_back* back) {
// détermine si le host a pu être résolu
int host_wait(lien_back* back) {
- if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://"))) {
+ if ((!strfield(back->url_adr,"file://"))
+ && (!strfield(back->url_adr,"ftp://"))
+#if HTS_USEMMS
+ && (!strfield(back->url_adr,"mms://"))
+#endif
+ ) {
//## if (back->url_adr[0]!=lOCAL_CHAR) {
if (!(back->r.req.proxy.active)) {
return (hts_dnstest(back->url_adr));
@@ -1334,7 +1745,9 @@ int host_wait(lien_back* back) {
// cleanup non-html files in backing to save backing space
// and allow faster "save in cache" operation
// also cleanup keep-alive sockets and ensure that not too many sockets are being opened
-void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
+void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
#if HTS_ANALYSTE
int oneMore = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links
#endif
@@ -1344,28 +1757,22 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
/* Check autoclean */
if (!back[i].testmode) { // not test mode
if (strnotempty(back[i].url_sav)) { // filename exists
- if (back[i].r.statuscode==200) { // HTTP "OK"
+ if (back[i].r.statuscode==200) { // HTTP "OK"
if (back[i].r.size>0) { // size>0
- if (back[i].r.is_write // not in memory (on disk, ready)
+ if (back[i].r.is_write // not in memory (on disk, ready)
&& !is_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // not HTML/hypertext
&& !may_be_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // may NOT be parseable mime type
- ) {
+ )
+ {
if (back[i].pass2_ptr) {
- // finalize
- // // back_finalize(opt,cache,back,i);
- // stats
- //HTS_STAT.stat_bytes+=back[i].r.size;
- //HTS_STAT.stat_files++;
- //if ( (!back[i].r.notmodified) && (opt->is_update) ) {
- // HTS_STAT.stat_updated_files++; // page modifiée
- //}
- //xxxcache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav);
+ (void) back_flush_output(opt, cache, sback, i); // flush output buffers
usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil);
*back[i].pass2_ptr=-1; // Done!
+ HTS_STAT.stat_background++;
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush;
}
- back_maydelete(opt,cache,back,i); // May delete backing entry
+ back_maydelete(opt,cache,sback,i); // May delete backing entry
}
} else {
if (!back[i].finalized) {
@@ -1375,19 +1782,19 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
if ( (opt->debug>1) && (opt->log!=NULL) ) {
fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
- back_maydeletehttp(opt, cache, back, back_max, i);
+ back_maydeletehttp(opt, cache, sback, i);
} else {
- /*
- NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO)
+ /*
+ NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO)
*/
/* Lock the entry but do not keep the html data in memory (in cache) */
if (opt->cache) {
htsblk r;
-
+
/* Ensure deleted or recycled socket */
- back_maydeletehttp(opt, cache, back, back_max, i);
+ back_maydeletehttp(opt, cache, sback, i);
assertf(back[i].r.soc == INVALID_SOCKET);
-
+
/* Check header */
cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r);
if (r.statuscode == 200) {
@@ -1426,7 +1833,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
back[i].url_adr);
test_flush;
}
- back_delete(opt,cache,back, i); // delete backing entry
+ back_delete(opt,cache,sback, i); // delete backing entry
}
}
}
@@ -1434,15 +1841,14 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
for(i=0;i<back_max;i++) {
if (back[i].status == 0) { // ready
if (back[i].r.soc != INVALID_SOCKET) {
- back_maydeletehttp(opt,cache,back, back_max, i);
+ back_maydeletehttp(opt,cache,sback, i);
}
-
}
}
/* delete sockets if too many keep-alive'd sockets in background */
if (opt->maxsoc > 0) {
int max = opt->maxsoc + oneMore;
- int curr = back_nsoc_overall(back, back_max);
+ int curr = back_nsoc_overall(sback);
if (curr > max) {
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF,
@@ -1451,16 +1857,26 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
}
for(i = 0 ; i < back_max && curr > max ; i++) {
if (back[i].status == -103) {
- back_delete(opt,cache,back, i); // delete backing entry
+ back_delete(opt,cache,sback, i); // delete backing entry
curr--;
}
}
}
+ /* transfer ready slots to the storage hashtable */
+ {
+ int nxfr = back_cleanup_background(opt,cache,sback);
+ if (nxfr > 0 && (opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(htsback): %d slots ready moved to background"LF, nxfr);
+ test_flush;
+ }
+ }
}
// attente (gestion des buffers des sockets)
-void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart) {
+void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_timestart) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
unsigned int i_mod;
T_SOC nfds=INVALID_SOCKET;
fd_set fds,fds_c,fds_e; // fds pour lecture, connect (write), et erreur
@@ -1482,7 +1898,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#if 1
// Cleanup the stack to save space!
- back_clean(opt,cache,back,back_max);
+ back_clean(opt,cache,sback);
#endif
// recevoir tant qu'il y a des données (avec un maximum de max_loop boucles)
@@ -1567,12 +1983,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
nfds=back[i].r.soc;
}
} else {
- back[i].r.statuscode=-4;
+ back[i].r.statuscode=STATUSCODE_CONNERROR;
if (back[i].status==100)
strcpybuff(back[i].r.msg,"Connect Error");
else
strcpybuff(back[i].r.msg,"Receive Error");
back[i].status=0; // terminé
+ back_set_finished(sback, i);
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush;
}
@@ -1640,15 +2057,16 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
deletehttp(&back[i].r);
}
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-4;
+ back[i].r.statuscode=STATUSCODE_CONNERROR;
if (back[i].status==100)
strcpybuff(back[i].r.msg,"Connect Error");
else
strcpybuff(back[i].r.msg,"Receive Error");
if (back[i].status == -103) { /* Keep-alive socket */
- back_delete(opt,cache,back, i);
+ back_delete(opt,cache,sback, i);
} else {
back[i].status=0; // terminé
+ back_set_finished(sback, i);
}
}
}
@@ -1682,17 +2100,18 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
SSL_set_connect_state(back[i].r.ssl_con);
back[i].status = 102; /* handshake wait */
} else
- back[i].r.statuscode=-6;
+ back[i].r.statuscode=STATUSCODE_SSL_HANDSHAKE;
} else
- back[i].r.statuscode=-6;
+ back[i].r.statuscode=STATUSCODE_SSL_HANDSHAKE;
}
/* Error */
- if (back[i].r.statuscode == -6) {
+ if (back[i].r.statuscode == STATUSCODE_SSL_HANDSHAKE) {
strcpybuff(back[i].r.msg, "bad SSL/TLS handshake");
deletehttp(&back[i].r);
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-5;
+ back[i].r.statuscode=STATUSCODE_NON_FATAL;
back[i].status=0;
+ back_set_finished(sback, i);
}
}
@@ -1753,9 +2172,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
deletehttp(&back[i].r);
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-5;
+ back[i].r.statuscode=STATUSCODE_NON_FATAL;
back[i].status=0;
- }
+ back_set_finished(sback, i);
+ }
} else { /* got it! */
back[i].status=100; // back to waitconnect
}
@@ -1763,8 +2183,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
strcpybuff(back[i].r.msg, "unexpected SSL/TLS error");
deletehttp(&back[i].r);
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-5;
+ back[i].r.statuscode=STATUSCODE_NON_FATAL;
back[i].status=0;
+ back_set_finished(sback, i);
}
}
@@ -1791,6 +2212,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.soc=http_xfopen(0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r));
if (back[i].r.soc==INVALID_SOCKET) {
back[i].status=0; // fini, erreur
+ back_set_finished(sback, i);
if (back[i].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(2): deletehttp\n");
@@ -1798,7 +2220,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
deletehttp(&back[i].r);
}
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-5;
+ back[i].r.statuscode=STATUSCODE_NON_FATAL;
if (strnotempty(back[i].r.msg)==0)
strcpybuff(back[i].r.msg,"Unable to resolve host name");
}
@@ -1825,25 +2247,27 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
back[i].r.msg[j++]='\0';
fclose(fp);
- remove(fconcat(back[i].location_buffer,".ok"));
+ unlink(fconcat(back[i].location_buffer,".ok"));
strcpybuff(fconcat(back[i].location_buffer,".ok"),"");
} else {
strcpybuff(back[i].r.msg,"Unknown ftp result, check if file is ok");
- back[i].r.statuscode=-1;
+ back[i].r.statuscode=STATUSCODE_INVALID;
}
back[i].status=0;
+ back_set_finished(sback, i);
// finalize transfer
if (back[i].r.statuscode>0) {
- back_finalize(opt,cache,back,i);
+ back_finalize(opt,cache,sback,i);
}
}
}
#endif
else if (back[i].status==1001) { // ftp ready
back[i].status=0;
+ back_set_finished(sback, i);
// finalize transfer
if (back[i].r.statuscode>0) {
- back_finalize(opt,cache,back,i);
+ back_finalize(opt,cache,sback,i);
}
}
else if ((back[i].status>0) && (back[i].status<1000)) { // en réception http
@@ -1888,8 +2312,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (strnotempty(back[i].url_sav)) {
if (strcmp(back[i].url_fil,"/robots.txt")) {
if (back[i].r.statuscode==200) { // 'OK'
- if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)
- ) { // pas HTML
+ if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML
if (opt->getmode&2) { // on peut ecrire des non html
int fcheck=0;
back[i].r.is_write=1; // écrire
@@ -1903,11 +2326,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].tmpfile != NULL && back[i].tmpfile[0])
back[i].r.out=fopen(back[i].tmpfile,"wb");
} else {
+ file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 1, 1, back[i].r.notmodified);
back[i].r.compressed=0;
back[i].r.out=filecreate(back[i].url_sav);
}
if (back[i].r.out==NULL) {
if ((fcheck=check_fatal_io_errno())) {
+ fspc(opt->log,"error"); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush;
opt->state.exit_xh=-1; /* fatal error */
}
}
@@ -1941,10 +2366,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
back[i].status=0; // terminé
+ back_set_finished(sback, i);
if (!back[i].testmode)
- back[i].r.statuscode=-10; // EUHH CANCEL
+ back[i].r.statuscode=STATUSCODE_INVALID; // EUHH CANCEL
else
- back[i].r.statuscode=-10; // "TEST OK"
+ back[i].r.statuscode=STATUSCODE_TEST_OK; // "TEST OK"
if (back[i].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(3): deletehttp\n");
@@ -2021,6 +2447,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (retour_fread < 0) { // fin réception
back[i].status=0; // terminé
+ back_set_finished(sback, i);
/*KA back[i].r.soc=INVALID_SOCKET; */
#if CHUNKDEBUG==1
if (back[i].is_chunk)
@@ -2031,7 +2458,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
strcatbuff(back[i].r.msg, "Interrupted transfer");
else
strcatbuff(back[i].r.msg, "No data (connection closed)");
- back[i].r.statuscode=-4;
+ back[i].r.statuscode=STATUSCODE_CONNERROR;
} else if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) {
#if HDEBUG
printf("error interruped: %s\n",back[i].r.adr);
@@ -2040,7 +2467,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
strcatbuff(back[i].r.msg,"Interrupted transfer");
else
strcatbuff(back[i].r.msg,"No data (connection closed)");
- back[i].r.statuscode=-4;
+ back[i].r.statuscode=STATUSCODE_CONNERROR;
}
// Close socket
@@ -2049,12 +2476,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
DEBUG_W("back_wait(4): deletehttp\n");
#endif
/*KA deletehttp(&back[i].r);*/
- back_maydeletehttp(opt, cache, back, back_max, i);
+ back_maydeletehttp(opt, cache, sback, i);
}
// finalize transfer
- if (back[i].r.statuscode>0) {
- back_finalize(opt,cache,back,i);
+ if (back[i].r.statuscode>0
+ && !IS_DELAYED_EXT(back[i].url_sav)
+ ) {
+ back_finalize(opt,cache,sback,i);
}
if (back[i].r.totalsize>0) { // tester totalsize
@@ -2064,7 +2493,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
//#if HTS_CL_IS_FATAL
deleteaddr(&back[i].r);
if (back[i].r.size<back[i].r.totalsize)
- back[i].r.statuscode=-4; // recatch
+ back[i].r.statuscode=STATUSCODE_CONNERROR; // recatch
sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",(LLint)back[i].r.size,(LLint)back[i].r.totalsize);
} else {
//#else
@@ -2109,15 +2538,17 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (sscanf(chunk_data,"%x",&chunk_size) == 1) {
if (chunk_size > 0)
back[i].chunk_blocksize = chunk_size; /* the data block chunk size */
- else
- back[i].chunk_blocksize = -1; /* ending */
- back[i].r.totalsize+=chunk_size; // noter taille
- back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1);
- if (!back[i].r.adr) {
- if (cache->errlog!=NULL) {
- fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
- }
- }
+ else
+ back[i].chunk_blocksize = -1; /* ending */
+ back[i].r.totalsize+=chunk_size; // noter taille
+ if (back[i].r.adr != NULL || !back[i].r.is_write) { // Not to disk
+ back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1);
+ if (!back[i].r.adr) {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+ }
+ }
#if CHUNKDEBUG==1
printf("[%d] chunk length: %d - next total "LLintP":\n",(int)back[i].r.soc,(int)chunk_size,(LLint)back[i].r.totalsize);
#endif
@@ -2177,10 +2608,19 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
/* End */
//if (back[i].status==97) {
back[i].status=0; // fin
+ back_set_finished(sback, i);
//}
- // finalize transfer
- back_finalize(opt,cache,back,i);
+ // finalize transfer if not temporary
+ if (!IS_DELAYED_EXT(back[i].url_sav)) {
+ back_finalize(opt,cache,sback,i);
+ } else {
+ if (back[i].r.statuscode == 200) {
+ if (cache->errlog!=NULL) {
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Unexpected incomplete type with 200 code at %s%s"LF, back[i].url_adr, back[i].url_fil);
+ }
+ }
+ }
if (back[i].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(5): deletehttp\n");
@@ -2189,7 +2629,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (chunk_size < 0) {
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
deleteaddr(&back[i].r);
- back[i].r.statuscode=-1;
+ back[i].r.statuscode=STATUSCODE_INVALID;
strcpybuff(back[i].r.msg,"Invalid chunk");
#if CHUNKDEBUG==1
printf("[%d] chunk error\n", (int)back[i].r.soc);
@@ -2204,7 +2644,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
if (!opt->tolerant) {
deleteaddr(&back[i].r);
- back[i].r.statuscode=-1;
+ back[i].r.statuscode=STATUSCODE_INVALID;
strcpybuff(back[i].r.msg,"Incorrect length");
} else {
// Un warning suffira..
@@ -2323,9 +2763,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
back[i].status=0; // FINI
+ back_set_finished(sback, i);
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
strcpybuff(back[i].r.msg,"External wrapper aborted transfer");
- back[i].r.statuscode = -1;
+ back[i].r.statuscode = STATUSCODE_INVALID;
}
}
#endif
@@ -2352,7 +2793,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.size=0;
back[i].r.totalsize=0;
back[i].chunk_size=0;
- back[i].r.statuscode=-1;
+ back[i].r.statuscode=STATUSCODE_INVALID;
back[i].r.msg[0]='\0';
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -2370,9 +2811,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// Content-Range: bytes */2830
if (back[i].range_req_size == back[i].r.crange) {
filenote(back[i].url_sav,NULL);
- //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil);
+ file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified);
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
back[i].status=0; // READY
+ back_set_finished(sback, i);
back[i].r.size=back[i].r.totalsize=back[i].range_req_size;
back[i].r.statuscode=304; // NOT MODIFIED
if ((opt->debug>1) && (opt->log!=NULL)) {
@@ -2386,7 +2828,22 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.statuscode=200;
}
- // Various hacks to limit re-transfers when updating a mirror
+ // 'do not erase already downloaded file'
+ // on an updated file
+ // with an error : consider a 304 error
+ if (!opt->delete_old) {
+ if (HTTP_IS_ERROR(back[i].r.statuscode) && back[i].is_update && !back[i].testmode) {
+ if (back[i].url_sav[0] && fexist(back[i].url_sav)) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Error ignored %d (%s) because of 'no purge' option for %s%s"LF,back[i].r.statuscode,back[i].r.msg,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ back[i].r.statuscode = 304;
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ }
+ }
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
// Force update if same size detected
if (opt->sizehack) {
// We already have the file
@@ -2421,6 +2878,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
if (r.adr) {
freet(r.adr);
+ r.adr = NULL;
}
}
}
@@ -2438,9 +2896,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.totalsize == size) { // same size!
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
back[i].status=0; // READY
+ back_set_finished(sback, i);
back[i].r.size=back[i].r.totalsize;
filenote(back[i].url_sav,NULL);
- //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil);
+ file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified);
back[i].r.statuscode=304; // NOT MODIFIED
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -2475,9 +2934,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
back[i].status=0; // READY
+ back_set_finished(sback, i);
back[i].r.size=back[i].r.totalsize;
filenote(back[i].url_sav,NULL);
- //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil);
+ file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified);
back[i].r.statuscode=304; // NOT MODIFIED
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -2506,6 +2966,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.soc!=INVALID_SOCKET) {
if (!back_checksize(opt,&back[i],1)) {
back[i].status=0; // FINI
+ back_set_finished(sback, i);
+ back[i].r.statuscode=STATUSCODE_TOO_BIG;
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
if (!back[i].testmode)
strcpybuff(back[i].r.msg,"File too big");
@@ -2529,6 +2991,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
}
back[i].status=0; // terminé
+ back_set_finished(sback, i);
}
// traiter une éventuelle erreur 304 (cache à jour utilisable)
else if (back[i].r.statuscode==304) { // document à jour dans le cache
@@ -2550,7 +3013,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// hack:
// In case of 'if-unmodified-since' hack, a 304 status can be sent
// then, force 'ok' status
- if (back[i].r.statuscode == -1) {
+ if (back[i].r.statuscode == STATUSCODE_INVALID) {
if (fexist(back[i].url_sav)) {
back[i].r.statuscode=200; // OK
strcpybuff(back[i].r.msg, "OK (cached)");
@@ -2566,14 +3029,16 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// Status is okay?
if (back[i].r.statuscode!=-1) { // pas d'erreur de lecture
back[i].status=0; // OK prêt
+ back_set_finished(sback, i);
back[i].r.notmodified=1; // NON modifié!
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
// finalize
+ //file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); // not modified
if (back[i].r.statuscode>0) {
- back_finalize(opt,cache,back,i);
+ back_finalize(opt,cache,sback,i);
}
#if DEBUGCA
@@ -2583,16 +3048,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
//printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
} else { // erreur
back[i].status=0; // terminé
+ back_set_finished(sback, i);
//printf("erreur cache\n");
}
/********** NO - must complete the body! ********** */
#if 0
- } else if ((back[i].r.statuscode==301)
- || (back[i].r.statuscode==302)
- || (back[i].r.statuscode==303)
- || (back[i].r.statuscode==307)
+ } else if (HTTP_IS_REDIRECT(back[i].r.statuscode)
|| (back[i].r.statuscode==412)
|| (back[i].r.statuscode==416)
) { // Ne pas prendre le html, erreurs connues et gérées
@@ -2601,12 +3064,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
// Couper connexion
/*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/
- back_maydeletehttp(opt, cache, back, back_max, i);
+ back_maydeletehttp(opt, cache, sback, i);
back[i].status=0; // terminé
+ back_set_finished(sback, i);
// finalize
if (back[i].r.statuscode>0) {
- back_finalize(opt,cache,back,i);
+ back_finalize(opt,cache,sback,i);
}
#endif
/********** **************************** ********** */
@@ -2629,6 +3093,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_sav)) { // pas HTML
if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir**
filenote(back[i].url_sav,NULL); // noter fichier comme connu
+ file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 1, back[i].r.notmodified);
back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append
if (back[i].r.out) {
back[i].r.is_write=1; // écrire
@@ -2642,6 +3107,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
} else { // On est dans la m**
back[i].status=0; // terminé (voir plus loin)
+ back_set_finished(sback, i);
strcpybuff(back[i].r.msg,"Can not open partial file");
}
}
@@ -2657,6 +3123,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.totalsize+=sz; // plus en fait
if (( fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) {
back[i].status=0; // terminé (voir plus loin)
+ back_set_finished(sback, i);
strcpybuff(back[i].r.msg,"Can not read partial file");
} else {
back[i].r.statuscode=200; // Forcer 'OK'
@@ -2666,16 +3133,19 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
} else {
back[i].status=0; // terminé (voir plus loin)
+ back_set_finished(sback, i);
strcpybuff(back[i].r.msg,"No memory for partial file");
}
fclose(fp);
} else { // Argh..
back[i].status=0; // terminé (voir plus loin)
+ back_set_finished(sback, i);
strcpybuff(back[i].r.msg,"Can not open partial file");
}
}
} else { // Non trouvé??
back[i].status=0; // terminé (voir plus loin)
+ back_set_finished(sback, i);
strcpybuff(back[i].r.msg,"Can not find partial file");
}
// Erreur?
@@ -2688,7 +3158,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
back[i].r.soc=INVALID_SOCKET;
//back[i].r.statuscode=206; ????????
- back[i].r.statuscode=-5;
+ back[i].r.statuscode=STATUSCODE_NON_FATAL;
if (strnotempty(back[i].r.msg))
strcpybuff(back[i].r.msg,"Error attempting to solve status 206 (partial file)");
}
@@ -2699,13 +3169,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.empty /* ?? && back[i].r.statuscode==200 */) { // empty response
// Couper connexion
- back_maydeletehttp(opt, cache, back, back_max, i);
+ back_maydeletehttp(opt, cache, sback, i);
/* KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; */
back[i].status=0; // terminé
+ back_set_finished(sback, i);
if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) {
back[i].r.adr[0] = 0;
}
- back_finalize(opt,cache,back,i);
+ back_finalize(opt,cache,sback,i);
}
else if (!back[i].r.is_chunk) { // pas de chunk
//if (back[i].r.http11!=2) { // pas de chunk
@@ -2730,13 +3201,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
} else { // mode test, ne pas passer en 1!!
back[i].status=0; // READY
+ back_set_finished(sback, i);
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(test ok): deletehttp\n");
#endif
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
if (back[i].r.statuscode==200) {
strcpybuff(back[i].r.msg,"Test: OK");
- back[i].r.statuscode=-10; // test réussi
+ back[i].r.statuscode=STATUSCODE_TEST_OK; // test réussi
}
else { // test a échoué, on ne change rien sauf que l'erreur est à titre indicatif
char tempo[1000];
@@ -2766,7 +3238,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#if HTS_REMOVE_BAD_FILES
if (back[i].status<0) {
if (!back[i].testmode) { // pas en test
- remove(back[i].url_sav); // éliminer fichier (endommagé)
+ unlink(back[i].url_sav); // éliminer fichier (endommagé)
//printf("&& %s\n",back[i].url_sav);
}
}
@@ -2808,7 +3280,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
deletehttp(&back[i].r);
}
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-2;
+ back[i].r.statuscode=STATUSCODE_TIMEOUT;
if (back[i].status==100)
strcpybuff(back[i].r.msg,"Connect Time Out");
else if (back[i].status==101)
@@ -2816,10 +3288,12 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
else
strcpybuff(back[i].r.msg,"Receive Time Out");
back[i].status=0; // terminé
+ back_set_finished(sback, i);
} else if ((back[i].rateout>0) && (back[i].status<99)) {
if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s
if ( (int) ((back[i].r.size)/(act-back[i].rateout_time)) < back[i].rateout ) { // trop lent
back[i].status=0; // terminé
+ back_set_finished(sback, i);
if (back[i].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(rateout): deletehttp\n");
@@ -2827,7 +3301,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
deletehttp(&back[i].r);
}
back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-3;
+ back[i].r.statuscode=STATUSCODE_SLOW;
strcpybuff(back[i].r.msg,"Transfer Rate Too Low");
}
}
@@ -2891,22 +3365,37 @@ int back_checkmirror(httrackp* opt) {
// octets transférés + add
-LLint back_transfered(LLint nb,lien_back* back,int back_max) {
+LLint back_transfered(LLint nb,struct_back* sback) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
int i;
// ajouter octets en instance
for(i=0;i<back_max;i++)
if ((back[i].status>0) && (back[i].status<99 || back[i].status>=1000))
nb+=back[i].r.size;
+ // stored (ready) slots
+ if (sback->ready != NULL) {
+ struct_inthash_enum e = inthash_enum_new((inthash)sback->ready);
+ inthash_chain* item;
+ while((item = inthash_enum_next(&e))) {
+ lien_back* ritem = (lien_back*) item->value.ptr;
+ if ((ritem->status>0) && (ritem->status<99 || ritem->status>=1000))
+ nb+=ritem->r.size;
+ }
+ }
return nb;
}
// infos backing
// j: 1 afficher sockets 2 afficher autres 3 tout afficher
-void back_info(lien_back* back,int i,int j,FILE* fp) {
+void back_info(struct_back* sback,int i,int j,FILE* fp) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(i >= 0 && i < back_max);
if (back[i].status>=0) {
char BIGSTK s[HTS_URLMAXSIZE*2+1024];
s[0]='\0';
- back_infostr(back,i,j,s);
+ back_infostr(sback,i,j,s);
strcatbuff(s,LF);
fprintf(fp,"%s",s);
}
@@ -2914,7 +3403,10 @@ void back_info(lien_back* back,int i,int j,FILE* fp) {
// infos backing
// j: 1 afficher sockets 2 afficher autres 3 tout afficher
-void back_infostr(lien_back* back,int i,int j,char* s) {
+void back_infostr(struct_back* sback,int i,int j,char* s) {
+ lien_back* const back = sback->lnk;
+ const int back_max = sback->count;
+ assertf(i >= 0 && i < back_max);
if (back[i].status>=0) {
int aff=0;
if (j & 1) {