summaryrefslogtreecommitdiff
path: root/src/htsback.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2012-03-19 13:00:51 +0000
committerXavier Roche <xroche@users.noreply.github.com>2012-03-19 13:00:51 +0000
commit1ecdfbe3b855b1f13bcddccc20135f7e35f171a6 (patch)
tree253b431bc06ce806ee47aa7b7c4285da8c5503d6 /src/htsback.c
parent4aec03f2cbabc19cf31e7d6f9fdcd6c84cfa861e (diff)
httrack 3.43.12
Diffstat (limited to 'src/htsback.c')
-rw-r--r--src/htsback.c356
1 files changed, 247 insertions, 109 deletions
diff --git a/src/htsback.c b/src/htsback.c
index 91245a7..723ad60 100644
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -479,8 +479,23 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
back[p].compressed_size=back[p].r.size;
// en mémoire -> passage sur disque
if (!back[p].r.is_write) {
+#if 1
+#ifdef _WIN32
+#undef tempnam
+#define tempnam _tempnam
+#endif
+ char *const tmp = tempnam(NULL, "httrack_temporaryGzipFile_");
+ if (tmp != NULL) {
+ strcpybuff(back[p].tmpfile_buffer, tmp);
+ free(tmp);
+ back[p].tmpfile = back[p].tmpfile_buffer;
+ } else {
+ back[p].tmpfile = NULL;
+ }
+#else
back[p].tmpfile_buffer[0]='\0';
- back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
+ back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
+#endif
if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
back[p].r.out=fopen(back[p].tmpfile,"wb");
if (back[p].r.out) {
@@ -497,7 +512,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
} else {
back[p].tmpfile[0]='\0';
back[p].r.statuscode=STATUSCODE_INVALID;
- strcpybuff(back[p].r.msg,"Open error when decompressing");
+ strcpybuff(back[p].r.msg,"Open error when decompressing (can not create a temporary file)");
}
}
}
@@ -546,6 +561,11 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) {
back[p].r.adr = NULL;
}
+ /* remove reference file, if any */
+ if (back[p].r.is_write) {
+ url_savename_refname_remove(opt, back[p].url_adr, back[p].url_fil);
+ }
+
/* ************************************************************************
REAL MEDIA HACK
Check if we have to load locally the file
@@ -884,6 +904,46 @@ int back_unserialize(FILE *fp, lien_back** dst) {
return 1; /* error */
}
+/* serialize a reference ; used to store references of files being downloaded in case of broken download */
+int back_serialize_ref(httrackp* opt, const lien_back* src) {
+ char *filename = url_savename_refname_fullpath(opt, src->url_adr, src->url_fil);
+ FILE *fp = fopen(filename, "wb");
+ if (fp == NULL) {
+#ifdef _WIN32
+ if (mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), CACHE_REFNAME)) == 0)
+#else
+ if (mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), CACHE_REFNAME), S_IRWXU | S_IRWXG | S_IRWXO) == 0)
+#endif
+ {
+ filename = url_savename_refname_fullpath(opt, src->url_adr, src->url_fil);
+ fp = fopen(filename, "wb");
+ }
+ }
+ if (fp != NULL) {
+ int ser = back_serialize(fp, src);
+ fclose(fp);
+ return ser;
+ }
+ return 1;
+}
+
+/* unserialize a reference ; used to store references of files being downloaded in case of broken download */
+int back_unserialize_ref(httrackp* opt, const char *adr, const char *fil, lien_back** dst) {
+ char *filename = url_savename_refname_fullpath(opt, adr, fil);
+ FILE *fp = fopen(filename, "rb");
+ if (fp != NULL) {
+ int ser = back_unserialize(fp, dst);
+ fclose(fp);
+ if (ser != 0) { /* back_unserialize_ref() != 0 does not need cleaning up */
+ back_clear_entry(*dst); /* delete entry content */
+ freet(*dst); /* delete item */
+ *dst = NULL;
+ }
+ return ser;
+ }
+ return 1;
+}
+
// clear, or leave for keep-alive
int back_maydelete(httrackp* opt,cache_back* cache,struct_back* sback, int p) {
lien_back* const back = sback->lnk;
@@ -1253,6 +1313,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
int p=0;
char catbuff[CATBUFF_SIZE];
char catbuff2[CATBUFF_SIZE];
+ lien_back* itemback = NULL;
#if (defined(_DEBUG) || defined(DEBUG))
if (!test && back_exist(sback,opt,adr,fil,save)) {
@@ -1399,31 +1460,41 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
a+=strlen(buff);
sscanf(a,"%d",&pos); // lire position
#endif
+
+#if HTS_FAST_CACHE==0
if (pos<0) { // pas de mise en cache data, vérifier existence
+#endif
/* note: no check with IS_DELAYED_EXT() enabled - postcheck by client please! */
- if (!IS_DELAYED_EXT(save) && fsize(fconv(catbuff,save)) <= 0) { // fichier existe pas ou est vide!
+ if (save[0] != '\0' && !IS_DELAYED_EXT(save) && fsize(fconv(catbuff,save)) <= 0) { // fichier final n'existe pas ou est vide!
int found=0;
/* It is possible that the file has been moved due to changes in build structure */
{
char BIGSTK previous_save[HTS_URLMAXSIZE*2];
+ htsblk r;
previous_save[0] = '\0';
- back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0);
- if (previous_save[0] != '\0' && fexist(fconv(catbuff,previous_save))) {
- rename(fconv(catbuff,previous_save), fconv(catbuff2,save));
- if (fexist(fconv(catbuff,save))) {
- found = 1;
- if ((opt->debug>1) && (opt->log!=NULL)) {
- HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush;
- }
- } else {
- if ((opt->debug>0) && (opt->log!=NULL)) {
- HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush;
+ r = cache_readex(opt, cache, adr, fil, /*head*/NULL, /*bound to back[p] (temporary)*/back[p].location_buffer, previous_save, /*ro*/1);
+ /* Is supposed to be on disk only */
+ if (r.is_write && previous_save[0] != '\0') {
+ /* Exists, but with another (old) filename: rename (almost) silently */
+ if (strcmp(previous_save, save) != 0 && fexist(fconv(catbuff, previous_save))) {
+ rename(fconv(catbuff, previous_save), fconv(catbuff2,save));
+ if (fexist(fconv(catbuff,save))) {
+ found = 1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush;
+ }
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush;
+ }
}
}
}
+ back[p].location_buffer[0] = '\0';
}
+ /* Not found ? */
if (!found) {
#if HTS_FAST_CACHE
hash_pos_return=0;
@@ -1438,22 +1509,29 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
FILE* fp=fopen(fconv(catbuff,save),"wb");
if (fp) fclose(fp);
if (opt->log!=NULL) {
- HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Previous file not found (erased by user ?), ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
}
}
+ } else {
+ if (opt->log!=NULL) {
+ HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Previous file not found (erased by user ?), recatching: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ }
}
}
} // fsize() <= 0
+#if HTS_FAST_CACHE==0
}
+#endif
}
}
//
- } else
+ } else {
#if HTS_FAST_CACHE
hash_pos_return=0;
#else
a=NULL;
#endif
+ }
// Existe pas en cache, ou bien pas de cache présent
#if HTS_FAST_CACHE
@@ -1567,83 +1645,113 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char*
printf("..is modified test %s\n",back[p].send_too);
#endif
}
- // Okay, pas trouvé dans le cache
- // Et si le fichier existe sur disque?
- // Pas dans le cache: fichier n'a pas été transféré du tout, donc pas sur disque?
- } else {
- if (fexist(save)) { // fichier existe? aghl!
- off_t sz=fsize(save);
- // Bon, là il est possible que le fichier ait été partiellement transféré
- // (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque)
- // PAS de If-Modified-Since, on a pas connaissance des données à la date du cache
- // On demande juste les données restantes si le date est valide (206), tout sinon (200)
- if ((ishtml(opt,save) != 1) && (ishtml(opt,back[p].url_fil)!=1)) { // NON HTML (liens changés!!)
- if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!)
- char lastmodified[256];
- get_filetime_rfc822(save, lastmodified);
- if (strnotempty(lastmodified)) { /* pas de If-.. possible */
+ }
+ /* Not in cache ; maybe in temporary cache ? Warning: non-movable "url_sav" */
+ else if (back_unserialize_ref(opt, adr, fil, &itemback) == 0) {
+ const long file_size = fsize(itemback->url_sav);
+ /* Found file on disk */
+ if (file_size > 0) {
+ char *send_too = back[p].send_too;
+ sprintf(send_too, "Range: bytes="LLintP"-\r\n", (LLint) file_size);
+ send_too += strlen(send_too);
+ /* add etag information */
+ if (strnotempty(itemback->r.etag)) {
+ sprintf(send_too,"If-Match: %s\r\n", itemback->r.etag);
+ send_too += strlen(send_too);
+ }
+ /* add date information */
+ if (strnotempty(itemback->r.lastmodified)) {
+ sprintf(send_too,"If-Unmodified-Since: %s\r\n", itemback->r.lastmodified);
+ send_too += strlen(send_too);
+ }
+ back[p].http11=1; /* 1.1 */
+ back[p].range_req_size=(LLint) file_size;
+ back[p].r.req.range_used=1;
+ back[p].is_update=1; /* this is an update of a file */
+ back[p].r.req.nocompression=1; /* Do not compress when updating! */
+ } else {
+ /* broken ; remove */
+ url_savename_refname_remove(opt, adr, fil);
+ }
+ /* cleanup */
+ back_clear_entry(itemback); /* delete entry content */
+ freet(itemback); /* delete item */
+ itemback = NULL;
+ }
+ /* Not in cache or temporary cache ; found on disk ? (hack) */
+ else if (fexist(save)) {
+ off_t sz=fsize(save);
+ // Bon, là il est possible que le fichier ait été partiellement transféré
+ // (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque)
+ // PAS de If-Modified-Since, on a pas connaissance des données à la date du cache
+ // On demande juste les données restantes si le date est valide (206), tout sinon (200)
+ if ((ishtml(opt,save) != 1) && (ishtml(opt,back[p].url_fil)!=1)) { // NON HTML (liens changés!!)
+ if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!)
+ char lastmodified[256];
+ get_filetime_rfc822(save, lastmodified);
+ if (strnotempty(lastmodified)) { /* pas de If-.. possible */
#if DEBUGCA
- printf("..if unmodified since %s size "LLintP"\n", lastmodified, (LLint)sz);
+ printf("..if unmodified since %s size "LLintP"\n", lastmodified, (LLint)sz);
#endif
- if ((opt->debug>1) && (opt->log!=NULL)) {
- HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
- }
-
- /* impossible - don't have etag or date
- if (strnotempty(back[p].r.etag)) { // ETag (RFC2616)
- sprintf(back[p].send_too,"If-None-Match: %s\r\n",back[p].r.etag);
- back[p].http11=1; // En tête 1.1
- } else if (strnotempty(back[p].r.lastmodified)) {
- sprintf(back[p].send_too,"If-Unmodified-Since: %s\r\n",back[p].r.lastmodified);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+
+ /* impossible - don't have etag or date
+ if (strnotempty(back[p].r.etag)) { // ETag (RFC2616)
+ sprintf(back[p].send_too,"If-None-Match: %s\r\n",back[p].r.etag);
+ back[p].http11=1; // En tête 1.1
+ } else if (strnotempty(back[p].r.lastmodified)) {
+ sprintf(back[p].send_too,"If-Unmodified-Since: %s\r\n",back[p].r.lastmodified);
+ back[p].http11=1; // En tête 1.1
+ } else
+ */
+ if (strlen(lastmodified)) {
+ sprintf(back[p].send_too,
+ "If-Unmodified-Since: %s\r\nRange: bytes="LLintP"-\r\n"
+ , lastmodified, (LLint)sz);
back[p].http11=1; // En tête 1.1
- } else
- */
- if (strlen(lastmodified)) {
- sprintf(back[p].send_too,
- "If-Unmodified-Since: %s\r\nRange: bytes="LLintP"-\r\n"
- , lastmodified, (LLint)sz);
- back[p].http11=1; // En tête 1.1
- back[p].range_req_size=sz;
- back[p].r.req.range_used=1;
- back[p].r.req.nocompression=1;
- } else {
- if ((opt->debug>0) && (opt->log!=NULL)) {
- HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
- }
- }
-
- } else {
+ back[p].is_update=1; /* this is an update of a file */
+ back[p].range_req_size=sz;
+ back[p].r.req.range_used=1;
+ back[p].r.req.nocompression=1;
+ } else {
if ((opt->debug>0) && (opt->log!=NULL)) {
- HTS_LOG(opt,LOG_WARNING);
- /*
- if (opt->http10)
- fprintf(opt->log,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil);
- else
- */
- fprintf(opt->log,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
- test_flush;
+ HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
}
- /* Sinon requête normale... */
- back[p].http11=0;
}
- } else if (opt->norecatch) { // tester norecatch
- filenote(&opt->state.strc,save,NULL); // ne pas purger tout de même
- file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified);
- back[p].status=STATUS_READY; // OK prêt
- back_set_finished(sback, p);
- back[p].r.statuscode=STATUSCODE_INVALID; // erreur
- strcpybuff(back[p].r.msg,"Null-size file not recaught");
- return 0;
- }
- } else {
- if ((opt->debug>0) && (opt->log!=NULL)) {
- HTS_LOG(opt,LOG_WARNING);
- fprintf(opt->log,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
- test_flush;
+
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ HTS_LOG(opt,LOG_WARNING);
+ /*
+ if (opt->http10)
+ fprintf(opt->log,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil);
+ else
+ */
+ fprintf(opt->log,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ /* Sinon requête normale... */
+ back[p].http11=0;
}
- /* Sinon requête normale... */
- back[p].http11=0;
+ } else if (opt->norecatch) { // tester norecatch
+ filenote(&opt->state.strc,save,NULL); // ne pas purger tout de même
+ file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified);
+ back[p].status=STATUS_READY; // OK prêt
+ back_set_finished(sback, p);
+ back[p].r.statuscode=STATUSCODE_INVALID; // erreur
+ strcpybuff(back[p].r.msg,"Null-size file not recaught");
+ return 0;
}
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ HTS_LOG(opt,LOG_WARNING);
+ fprintf(opt->log,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ /* Sinon requête normale... */
+ back[p].http11=0;
}
}
}
@@ -2264,24 +2372,27 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
// vérification de sécurité
if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know..
- do_wait=1;
-
- // noter socket read
- FD_SET(back[i].r.soc,&fds);
-
- // noter socket error
- FD_SET(back[i].r.soc,&fds_e);
-
- // incrémenter nombre de sockets
- nsockets++;
-
- // calculer max
- if (max_c) {
- max_c=0;
- nfds=back[i].r.soc;
- } else if (back[i].r.soc>nfds) {
- // ID socket la plus élevée
- nfds=back[i].r.soc;
+ // Do not endlessly wait when receiving SSL http data (Patrick Pfeifer)
+ if (!(back[i].r.ssl && back[i].status > 0 && back[i].status < 1000)) {
+ do_wait=1;
+
+ // noter socket read
+ FD_SET(back[i].r.soc,&fds);
+
+ // noter socket error
+ FD_SET(back[i].r.soc,&fds_e);
+
+ // incrémenter nombre de sockets
+ nsockets++;
+
+ // calculer max
+ if (max_c) {
+ max_c=0;
+ nfds=back[i].r.soc;
+ } else if (back[i].r.soc>nfds) {
+ // ID socket la plus élevée
+ nfds=back[i].r.soc;
+ }
}
} else {
back[i].r.statuscode=STATUSCODE_CONNERROR;
@@ -2623,8 +2734,23 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
/* .gz are *NOT* depacked!! */
(strfield(get_ext(catbuff,back[i].url_sav),"gz") == 0)
) {
+#if 1
+#ifdef _WIN32
+#undef tempnam
+#define tempnam _tempnam
+#endif
+ char *const tmp = tempnam(NULL, "httrack_temporaryGzipFile_");
+ if (tmp != NULL) {
+ strcpybuff(back[i].tmpfile_buffer, tmp);
+ free(tmp);
+ back[i].tmpfile = back[i].tmpfile_buffer;
+ } else {
+ back[i].tmpfile = NULL;
+ }
+#else
back[i].tmpfile_buffer[0]='\0';
- back[i].tmpfile=tmpnam(back[i].tmpfile_buffer);
+ back[i].tmpfile=tmpnam(back[p].tmpfile_buffer);
+#endif
if (back[i].tmpfile != NULL && back[i].tmpfile[0]) {
if ((back[i].r.out=fopen(back[i].tmpfile,"wb")) == NULL) {
last_errno = errno;
@@ -2662,13 +2788,19 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
test_flush;
}
back[i].r.is_write=0; // erreur, abandonner
-#if HDEBUG
- printf("..error!\n");
-#endif
- }
+ } else {
#ifndef _WIN32
- else chmod(back[i].url_sav,HTS_ACCESS_FILE);
+ chmod(back[i].url_sav, HTS_ACCESS_FILE);
#endif
+ /* create a temporary reference file in case of broken mirror */
+ if (back[i].r.out != NULL) {
+ if (back_serialize_ref(opt, &back[i]) != 0) {
+ if (opt->log != NULL) {
+ HTS_LOG(opt,LOG_WARNING); fprintf(opt->log, "Could not create temporary reference file for %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ }
} else { // on coupe tout!
if ((opt->debug>1) && (opt->log!=NULL)) {
HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -3409,6 +3541,12 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti
if (back[i].r.totalsize>0)
back[i].r.totalsize+=sz; // plus en fait
fseek(back[i].r.out,0,SEEK_END); // à la fin
+ /* create a temporary reference file in case of broken mirror */
+ if (back_serialize_ref(opt, &back[i]) != 0) {
+ if (opt->log != NULL) {
+ HTS_LOG(opt,LOG_WARNING); fprintf(opt->log, "Could not create temporary reference file for %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
#if HDEBUG
printf("continue interrupted file\n");
#endif