summaryrefslogtreecommitdiff
path: root/src/htscache.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/htscache.c')
-rw-r--r--src/htscache.c864
1 files changed, 788 insertions, 76 deletions
diff --git a/src/htscache.c b/src/htscache.c
index b90fa67..aa9a6c8 100644
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -35,15 +35,19 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htscache.h"
/* specific definitions */
#include "htsbase.h"
#include "htsbasenet.h"
#include "htsmd5.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <time.h>
+
+#include "htszlib.h"
+
#include "htsnostatic.h"
/* END specific definitions */
@@ -116,10 +120,15 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char*
// ---stockage en cache---
// stocker dans le cache?
if (opt->cache) {
- if (cache->dat!=NULL) {
+ if (cache_writable(cache)) {
// c'est le seul endroit ou l'on ajoute des elements dans le cache (fichier entier ou header)
// on stocke tout fichier "ok", mais également les réponses 404,301,302...
- if ((r->statuscode==200) /* stocker réponse standard, plus */
+ if (
+#if 1
+ r->statuscode > 0
+#else
+ /* We don't store 5XX errors, because it might be a server problem */
+ (r->statuscode==200) /* stocker réponse standard, plus */
|| (r->statuscode==204) /* no content */
|| (r->statuscode==301) /* moved perm */
|| (r->statuscode==302) /* moved temp */
@@ -129,13 +138,33 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char*
|| (r->statuscode==403) /* unauthorized */
|| (r->statuscode==404) /* not found */
|| (r->statuscode==410) /* gone */
+#endif
)
{ /* ne pas stocker si la page générée est une erreur */
if (!r->is_file) {
// stocker fichiers (et robots.txt)
- if ( (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
+ if ( url_save == NULL || (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
// ajouter le fichier au cache
- cache_add(*r,url_adr,url_fil,url_save,cache->ndx,cache->dat,opt->all_in_cache);
+ cache_add(cache,*r,url_adr,url_fil,url_save,opt->all_in_cache);
+ //
+ // store a reference NOT to redo the same test zillions of times!
+ // (problem reported by Lars Clausen)
+ // we just store statuscode + location (if any)
+ if (url_save == NULL && r->statuscode / 100 >= 3) {
+ // cached "fast" header doesn't uet exists
+ if (inthash_read((inthash)cache->cached_tests, concat(url_adr, url_fil), NULL) == 0) {
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ sprintf(tempo, "%d", (int)r->statuscode);
+ if (r->location != NULL && r->location[0] != '\0') {
+ strcatbuff(tempo, "\n");
+ strcatbuff(tempo, r->location);
+ }
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode);
+ }
+ inthash_add((inthash)cache->cached_tests, concat(url_adr, url_fil), (long int)strdupt(tempo));
+ }
+ }
}
}
}
@@ -145,13 +174,222 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char*
}
+
+#if 01
+
+/* test only - to be removed */
+
+#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \
+ if ( (value != NULL) && (value)[0] != '\0') { \
+ sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+ } \
+} while(0)
+#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \
+ if ( (value != 0) ) { \
+ sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+ } \
+} while(0)
+#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \
+ sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+} while(0)
+
+struct cache_back_zip_entry {
+ unsigned long int hdrPos;
+ unsigned long int size;
+ int compressionMethod;
+};
+
+#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ strcpybuff(refvalue, value); \
+ line[0] = '\0'; \
+ } \
+} while(0)
+#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ int intval = 0; \
+ sscanf(value, "%d", &intval); \
+ (refvalue) = intval; \
+ line[0] = '\0'; \
+ } \
+} while(0)
+
+
+/* Ajout d'un fichier en cache */
+void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) {
+ char BIGSTK filemame[HTS_URLMAXSIZE*4];
+ int dataincache=0; // put data in cache ?
+ char BIGSTK headers[8192];
+ int headersSize = 0;
+ int entryBodySize = 0;
+ int entryFilenameSize = 0;
+ zip_fileinfo fi;
+
+ // robots.txt hack
+ if (url_save == NULL) {
+ dataincache=0; // testing links
+ }
+ else {
+ if ( (strnotempty(url_save)==0) ) {
+ if (strcmp(url_fil,"/robots.txt")==0) // robots.txt
+ dataincache=1;
+ else
+ return; // error (except robots.txt)
+ }
+
+ /* Data in cache ? */
+ if (is_hypertext_mime(r.contenttype, url_fil))
+ dataincache=1;
+ else if (all_in_cache)
+ dataincache=1;
+ }
+
+ if (r.size < 0) // error
+ return;
+
+ // data in cache
+ if (dataincache) {
+ assertf(((int) r.size) == r.size);
+ entryBodySize = (int) r.size;
+ }
+
+ /* Fields */
+ headers[0] = '\0';
+ headersSize = 0;
+ /* */
+ {
+ char* message;
+ if (strlen(r.msg) < 32) {
+ message = r.msg;
+ } else {
+ message = "(See X-StatusMessage)";
+ }
+ /* 64 characters MAX for first line */
+ sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r.statuscode, r.msg);
+ }
+ headersSize += (int) strlen(headers + headersSize);
+ /* Second line MUST ALWAYS be X-In-Cache */
+ ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", dataincache);
+ ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r.statuscode);
+ ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r.msg);
+ ZIP_FIELD_INT(headers, headersSize, "X-Size", r.size); // size
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r.contenttype); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r.charset); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r.lastmodified); // last-modified
+ ZIP_FIELD_STRING(headers, headersSize, "Etag", r.etag); // Etag
+ ZIP_FIELD_STRING(headers, headersSize, "Location", r.location); // 'location' pour moved
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r.cdispo); // Content-disposition
+ ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address
+ ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename
+ ZIP_FIELD_STRING(headers, headersSize, "X-Save", url_save); // Original save filename
+
+ entryFilenameSize = (int) ( strlen(url_adr) + strlen(url_fil));
+
+ /* Filename */
+ if (!link_has_authority(url_adr)) {
+ strcpybuff(filemame, "http://");
+ } else {
+ strcpybuff(filemame, "");
+ }
+ strcatbuff(filemame, url_adr);
+ strcatbuff(filemame, url_fil);
+
+ /* Time */
+ memset(&fi, 0, sizeof(fi));
+ if (r.lastmodified[0] != '\0') {
+ struct tm* tm_s=convert_time_rfc822(r.lastmodified);
+ if (tm_s) {
+ fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec;
+ fi.tmz_date.tm_min = (uInt) tm_s->tm_min;
+ fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour;
+ fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday;
+ fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon;
+ fi.tmz_date.tm_year = (uInt) tm_s->tm_year;
+ }
+ }
+
+ /* Open file - NOTE: headers in "comment" */
+ if (zipOpenNewFileInZip((zipFile) cache->zipOutput,
+ filemame,
+ &fi,
+ /*
+ Store headers in realtime in the local file directory as extra field
+ In case of crash, we'll be able to recover the whole ZIP file by rescanning it
+ */
+ headers,
+ (uInt) strlen(headers),
+ NULL,
+ 0,
+ NULL, /* comment */
+ Z_DEFLATED,
+ Z_DEFAULT_COMPRESSION) != Z_OK)
+ {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+
+ /* Write data in cache */
+ if (dataincache) {
+ if (r.is_write == 0) {
+ if (r.size > 0 && r.adr != NULL) {
+ if (zipWriteInFileInZip((zipFile) cache->zipOutput, r.adr, (int) r.size) != Z_OK) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+ }
+ } else {
+ FILE* fp;
+ // On recopie le fichier..
+ LLint file_size=fsize(fconv(url_save));
+ if (file_size>=0) {
+ fp=fopen(fconv(url_save),"rb");
+ if (fp!=NULL) {
+ char BIGSTK buff[32768];
+ INTsys nl;
+ do {
+ nl=fread(buff,1,32768,fp);
+ if (nl>0) {
+ if (zipWriteInFileInZip((zipFile) cache->zipOutput, buff, (int) nl) != Z_OK) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+ }
+ } while(nl>0);
+ fclose(fp);
+ } else {
+ /* Err FIXME - lost file */
+ }
+ } /* Empty files are OK */
+ }
+ }
+
+ /* Close */
+ if (zipCloseFileInZip((zipFile) cache->zipOutput) != Z_OK) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+
+ /* Flush */
+ if (zipFlush((zipFile) cache->zipOutput) != 0) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+}
+
+#else
+
/* Ajout d'un fichier en cache */
-void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache) {
+void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) {
int pos;
char s[256];
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
int ok=1;
int dataincache=0; // donnée en cache?
+ FILE* cache_ndx = cache->ndx;
+ FILE* cache_dat = cache->dat;
/*char digest[32+2];*/
/*digest[0]='\0';*/
@@ -159,6 +397,8 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
if ( (strnotempty(url_save)==0) ) {
if (strcmp(url_fil,"/robots.txt")==0) // robots.txt
dataincache=1;
+ else if (strcmp(url_fil,"/test")==0) // testing links
+ dataincache=0;
else
return; // erreur (sauf robots.txt)
}
@@ -167,7 +407,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
return; // refusé..
// Mettre les *donées* en cache ?
- if (is_hypertext_mime(r.contenttype)) // html, mise en cache des données et
+ if (is_hypertext_mime(r.contenttype, url_fil)) // html, mise en cache des données et
dataincache=1; // pas uniquement de l'en tête
else if (all_in_cache)
dataincache=1; // forcer tout en cache
@@ -209,6 +449,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
&& cache_wstr(cache_dat,url_adr) != -1 // Original address
&& cache_wstr(cache_dat,url_fil) != -1 // Original URI filename
&& cache_wstr(cache_dat,url_save) != -1 // Original save filename
+ && cache_wstr(cache_dat,r.headers) != -1 // Full HTTP Headers
&& cache_wstr(cache_dat,"HTS") != -1 // end of header
) {
ok=1; /* ok */
@@ -238,7 +479,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
if (cache_wLLint(cache_dat,file_size)!=-1) {
fp=fopen(fconv(url_save),"rb");
if (fp!=NULL) {
- char buff[32768];
+ char BIGSTK buff[32768];
INTsys nl;
do {
nl=fread(buff,1,32768,fp);
@@ -275,6 +516,8 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
fflush(cache_dat); fflush(cache_ndx);
}
+#endif
+
htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) {
return cache_readex(opt,cache,adr,fil,save,location,NULL,0);
@@ -284,19 +527,274 @@ htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* s
return cache_readex(opt,cache,adr,fil,save,location,NULL,1);
}
+static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly);
+
+static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly);
+
// lecture d'un fichier dans le cache
// si save==null alors test unqiquement
htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
char* return_save, int readonly) {
+ if (cache->zipInput != NULL) {
+ return cache_readex_new(opt, cache, adr, fil, save, location, return_save, readonly);
+ } else {
+ return cache_readex_old(opt, cache, adr, fil, save, location, return_save, readonly);
+ }
+}
+
+// lecture d'un fichier dans le cache
+// si save==null alors test unqiquement
+static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly) {
+ char BIGSTK location_default[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
+ char BIGSTK previous_save[HTS_URLMAXSIZE*2];
+ long int hash_pos;
+ int hash_pos_return;
+ htsblk r;
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET;
+
+ if (location) {
+ r.location = location;
+ } else {
+ r.location = location_default;
+ }
+ strcpybuff(r.location, "");
+ strcpybuff(buff, adr);
+ strcatbuff(buff,fil);
+ hash_pos_return = inthash_read((inthash)cache->hashtable, buff, (long int*)&hash_pos);
+ /* avoid errors on data entries */
+ if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') {
+#if HTS_FAST_CACHE
+ hash_pos_return = 0;
+#else
+ a = NULL;
+#endif
+ }
+
+ if (hash_pos_return) {
+ uLong posInZip;
+ if (hash_pos > 0) {
+ posInZip = (uLong) hash_pos;
+ } else {
+ posInZip = (uLong) -hash_pos;
+ }
+ if (unzSetOffset((unzFile) cache->zipInput, posInZip) == Z_OK) {
+ /* Read header (Max 8KiB) */
+ if (unzOpenCurrentFile((unzFile) cache->zipInput) == Z_OK) {
+ char BIGSTK headerBuff[8192 + 2];
+ int readSizeHeader;
+ int totalHeader = 0;
+ int dataincache = 0;
+
+ /* For BIG comments */
+ headerBuff[0]
+ = headerBuff[sizeof(headerBuff) - 1]
+ = headerBuff[sizeof(headerBuff) - 2]
+ = headerBuff[sizeof(headerBuff) - 3] = '\0';
+
+ if ( (readSizeHeader = unzGetLocalExtrafield((unzFile) cache->zipInput, headerBuff, sizeof(headerBuff) - 2)) > 0)
+ /*if (unzGetCurrentFileInfo((unzFile) cache->zipInput, NULL,
+ NULL, 0, NULL, 0, headerBuff, sizeof(headerBuff) - 2) == Z_OK ) */
+ {
+ int offset = 0;
+ char BIGSTK line[HTS_URLMAXSIZE + 2];
+ int lineEof = 0;
+ /*readSizeHeader = (int) strlen(headerBuff);*/
+ headerBuff[readSizeHeader] = '\0';
+ do {
+ char* value;
+ line[0] = '\0';
+ offset += binput(headerBuff + offset, line, sizeof(line) - 2);
+ if (line[0] == '\0') {
+ lineEof = 1;
+ }
+ value = strchr(line, ':');
+ if (value != NULL) {
+ *value++ = '\0';
+ if (*value == ' ' || *value == '\t') value++;
+ ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache);
+ ZIP_READFIELD_INT(line, value, "X-Statuscode", r.statuscode);
+ ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r.msg); // msg
+ ZIP_READFIELD_INT(line, value, "X-Size", r.size); // size
+ ZIP_READFIELD_STRING(line, value, "Content-Type", r.contenttype); // contenttype
+ ZIP_READFIELD_STRING(line, value, "X-Charset", r.charset); // contenttype
+ ZIP_READFIELD_STRING(line, value, "Last-Modified", r.lastmodified); // last-modified
+ ZIP_READFIELD_STRING(line, value, "Etag", r.etag); // Etag
+ ZIP_READFIELD_STRING(line, value, "Location", r.location); // 'location' pour moved
+ ZIP_READFIELD_STRING(line, value, "Content-Disposition", r.cdispo); // Content-disposition
+ ZIP_READFIELD_STRING(line, value, "X-Addr", previous_save); // Original address
+ ZIP_READFIELD_STRING(line, value, "X-Fil", previous_save); // Original URI filename
+ ZIP_READFIELD_STRING(line, value, "X-Save", previous_save); // Original save filename
+ }
+ } while(offset < readSizeHeader && !lineEof);
+ totalHeader = offset;
+
+ /* Complete fields */
+ r.totalsize=r.size;
+ r.adr=NULL;
+ r.out=NULL;
+ r.fp=NULL;
+
+ if (save != NULL) { /* ne pas lire uniquement header */
+ int ok = 0;
+
+#if HTS_DIRECTDISK
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ if (ok) {
+ if (r.msg[0] == '\0') {
+ strcpybuff(r.msg,"Cache Read Error : Unexpected error");
+ }
+ }
+ else if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement
+
+ r.is_write=1; // écrire
+ if (fexist(fconv(save))) { // un fichier existe déja
+ //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ ok=1; // plus rien à faire
+ filenote(save,NULL); // noter comme connu
+ }
+
+ if (!dataincache && !ok) { // Pas de donnée en cache et fichier introuvable : erreur!
+ if (opt->norecatch) {
+ filecreateempty(save);
+ //
+ r.statuscode=-1;
+ strcpybuff(r.msg,"File deleted by user not recaught");
+ ok=1; // ne pas récupérer (et pas d'erreur)
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Previous cache file not found");
+ ok=1; // ne pas récupérer
+ }
+ }
+
+ if (!ok) {
+ r.out=filecreate(save);
+#if HDEBUG
+ printf("direct-disk: %s\n",save);
+#endif
+ if (r.out!=NULL) {
+ char BIGSTK buff[32768+4];
+ LLint size = r.size;
+ if (size > 0) {
+ INTsys nl;
+ do {
+ nl = unzReadCurrentFile((unzFile) cache->zipInput, buff, (int)minimum(size, 32768));
+ if (nl>0) {
+ size-=nl;
+ if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read To Disk");
+ }
+ }
+ } while((nl>0) && (size>0) && (r.statuscode!=-1));
+ }
+
+ fclose(r.out);
+ r.out=NULL;
+#if HTS_WIN==0
+ chmod(save,HTS_ACCESS_FILE);
+#endif
+ //xxusercommand(opt,0,NULL,fconv(save), adr, fil);
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Write Error : Unable to Create File");
+ //printf("%s\n",save);
+ }
+ }
+
+ } else
+#endif
+ { // lire en mémoire
+
+ if (!dataincache) {
+ if (strnotempty(save)) { // Pas de donnée en cache, bizarre car html!!!
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Previous cache file not found (2)");
+ } else { /* Read in memory from cache */
+ if (strnotempty(return_save) && fexist(return_save)) {
+ FILE* fp = fopen(fconv(return_save), "rb");
+ if (fp != NULL) {
+ r.adr=(char*) malloct((INTsys)r.size + 4);
+ if (adr != NULL) {
+ if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Read error in cache disk data");
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache file not found on disk");
+ }
+ }
+ } else {
+ // lire fichier (d'un coup)
+ r.adr=(char*) malloct((INTsys)r.size+4);
+ if (r.adr!=NULL) {
+ if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (INTsys)r.size) != r.size) { // erreur
+ freet(r.adr);
+ r.adr=NULL;
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read Data");
+ } else
+ *(r.adr+r.size)='\0';
+ //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
+ } else { // erreur
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Memory Error");
+ }
+ }
+ }
+ } // si save==null, ne rien charger (juste en tête)
+
+
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read Header Data");
+ }
+ unzCloseCurrentFile((unzFile) cache->zipInput);
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Open File");
+ }
+
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Bad Offset");
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"File Cache Entry Not Found");
+ }
+ if (!location) { /* don't export internal buffer */
+ r.location = NULL;
+ }
+ return r;
+}
+
+
+// lecture d'un fichier dans le cache
+// si save==null alors test unqiquement
+static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly) {
#if HTS_FAST_CACHE
long int hash_pos;
int hash_pos_return;
#else
char* a;
#endif
- char buff[HTS_URLMAXSIZE*2];
- char location_default[HTS_URLMAXSIZE*2];
- char previous_save[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
+ char BIGSTK location_default[HTS_URLMAXSIZE*2];
+ char BIGSTK previous_save[HTS_URLMAXSIZE*2];
htsblk r;
int ok=0;
int header_only=0;
@@ -388,6 +886,9 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
strcpybuff(return_save, previous_save);
}
}
+ if (cache->version >= 5) {
+ r.headers = cache_rstr_addr(cache->olddat);
+ }
//
cache_rstr(cache->olddat,check);
if (strcmp(check,"HTS")==0) { /* intégrité OK */
@@ -425,7 +926,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
#if HTS_DIRECTDISK
// Court-circuit:
// Peut-on stocker le fichier directement sur disque?
- if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype) && strnotempty(save)) { // pas HTML, écrire sur disk directement
+ if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement
int ok=0;
r.is_write=1; // écrire
@@ -457,7 +958,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
printf("direct-disk: %s\n",save);
#endif
if (r.out!=NULL) {
- char buff[32768+4];
+ char BIGSTK buff[32768+4];
LLint size = r.size;
if (size > 0) {
INTsys nl;
@@ -572,7 +1073,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
/* 0 if failed */
int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) {
if (cache_dat) {
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
char s[256];
int pos;
fflush(cache_dat); fflush(cache_ndx);
@@ -599,7 +1100,7 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char*
int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) {
#if HTS_FAST_CACHE
if (cache->hashtable) {
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
long int pos;
strcpybuff(buff,str1); strcatbuff(buff,str2);
if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) {
@@ -651,7 +1152,29 @@ void cache_init(cache_back* cache,httrackp* opt) {
#else
mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER);
#endif
- if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer
+ /* Previous cache from the previous cache version */
+#if 0
+ /* No.. reuse with old httrack releases! */
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/old.ndx"));
+#endif
+ /* Previous cache version */
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat"));
+ rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx"));
+ }
+
+ /* Remove OLD cache */
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.zip")))
+ remove(fconcat(opt->path_log,"hts-cache/old.zip"));
+
+ /* Rename */
+ rename(fconcat(opt->path_log,"hts-cache/new.zip"),fconcat(opt->path_log,"hts-cache/old.zip"));
+ }
+ else if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
#if DEBUGCA
printf("work with former cache\n");
#endif
@@ -677,6 +1200,136 @@ void cache_init(cache_back* cache,httrackp* opt) {
if (
(
!cache->ro &&
+ fsize(fconcat(opt->path_log,"hts-cache/old.zip")) > 0
+ )
+ ||
+ (
+ cache->ro &&
+ fsize(fconcat(opt->path_log,"hts-cache/new.zip")) > 0
+ )
+ )
+ {
+ if (!cache->ro) {
+ cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/old.zip"));
+ } else {
+ cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/new.zip"));
+ }
+
+ // Corrupted ZIP file ? Try to repair!
+ if (cache->zipInput == NULL && !cache->ro) {
+ char* name;
+ uLong repaired = 0;
+ uLong repairedBytes = 0;
+ if (!cache->ro) {
+ name = fconcat(opt->path_log,"hts-cache/old.zip");
+ } else {
+ name = fconcat(opt->path_log,"hts-cache/new.zip");
+ }
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF);
+ fflush(opt->log);
+ }
+ if (unzRepair(name,
+ fconcat(opt->path_log,"hts-cache/repair.zip"),
+ fconcat(opt->path_log,"hts-cache/repair.tmp"),
+ &repaired, &repairedBytes
+ ) == Z_OK) {
+ unlink(name);
+ rename(fconcat(opt->path_log,"hts-cache/repair.zip"), name);
+ cache->zipInput = unzOpen(name);
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF,
+ (int) repairedBytes, (int) repaired);
+ fflush(opt->log);
+ }
+ } else {
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: could not repair the cache"LF);
+ fflush(opt->log);
+ }
+ }
+ }
+
+ // Opened ?
+ if (cache->zipInput!=NULL) {
+
+ /* Ready directory entries */
+ if (unzGoToFirstFile((unzFile) cache->zipInput) == Z_OK) {
+ char comment[128];
+ char BIGSTK filename[HTS_URLMAXSIZE * 4];
+ int entries = 0;
+ memset(comment, 0, sizeof(comment)); // for truncated reads
+ do {
+ int readSizeHeader = 0;
+ filename[0] = '\0';
+ comment[0] = '\0';
+ if (unzOpenCurrentFile((unzFile) cache->zipInput) == Z_OK) {
+ if (
+ (readSizeHeader = unzGetLocalExtrafield((unzFile) cache->zipInput, comment, sizeof(comment) - 2)) > 0
+ &&
+ unzGetCurrentFileInfo((unzFile) cache->zipInput, NULL, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK
+ )
+ {
+ long int pos = (long int) unzGetOffset((unzFile) cache->zipInput);
+ assertf(readSizeHeader < sizeof(comment));
+ comment[readSizeHeader] = '\0';
+ entries++;
+ if (pos > 0) {
+ int dataincache = 0; // data in cache ?
+ char* filenameIndex = filename;
+ if (strfield(filenameIndex, "http://")) {
+ filenameIndex += 7;
+ }
+ if (comment[0] != '\0') {
+ int maxLine = 2;
+ char* a = comment;
+ while(*a && maxLine-- > 0) { // parse only few first lines
+ char BIGSTK line[1024];
+ line[0] = '\0';
+ a+=binput(a, line, sizeof(line) - 2);
+ if (strfield(line, "X-In-Cache:")) {
+ if (strfield2(line, "X-In-Cache: 1")) {
+ dataincache = 1;
+ } else {
+ dataincache = 0;
+ }
+ break;
+ }
+ }
+ }
+ if (dataincache)
+ inthash_add((inthash)cache->hashtable, filenameIndex, pos);
+ else
+ inthash_add((inthash)cache->hashtable, filenameIndex, -pos);
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries);
+ }
+ }
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries);
+ }
+ }
+ unzCloseCurrentFile((unzFile) cache->zipInput);
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries);
+ }
+ }
+ } while( unzGoToNextFile((unzFile) cache->zipInput) == Z_OK );
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries);
+ }
+ opt->is_update=1; // signaler comme update
+
+ }
+
+ }
+
+ } else if (
+ (
+ !cache->ro &&
fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0
)
||
@@ -724,7 +1377,7 @@ void cache_init(cache_back* cache,httrackp* opt) {
if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
cache->version=(int)(firstline[8]-'0'); // cache 1.x
- if (cache->version <= 4) {
+ if (cache->version <= 5) {
a+=cache_brstr(a,firstline);
strcpybuff(cache->lastmodified,firstline);
} else {
@@ -762,7 +1415,7 @@ void cache_init(cache_back* cache,httrackp* opt) {
/* Create hash table for the cache (MUCH FASTER!) */
#if HTS_FAST_CACHE
if (cache->use) {
- char line[HTS_URLMAXSIZE*2];
+ char BIGSTK line[HTS_URLMAXSIZE*2];
char linepos[256];
int pos;
while ( (a!=NULL) && (a < (cache->use+buffl) ) ) {
@@ -793,60 +1446,96 @@ void cache_init(cache_back* cache,httrackp* opt) {
if (!cache->ro) {
// ouvrir caches actuels
structcheck(fconcat(opt->path_log, "hts-cache/"));
- cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
- cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
- // les deux doivent être ouvrables
- if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
- fclose(cache->ndx);
- cache->ndx=NULL;
- }
- if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
- fclose(cache->dat);
- cache->dat=NULL;
- }
- if (cache->ndx!=NULL) {
- char s[256];
-
- cache_wstr(cache->dat,"CACHE-1.4");
- fflush(cache->dat);
- cache_wstr(cache->ndx,"CACHE-1.4");
- fflush(cache->ndx);
- //
- time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
- cache_wstr(cache->ndx,s);
- fflush(cache->ndx); // un petit fflush au cas où
-
- // supprimer old.lst
- if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
- remove(fconcat(opt->path_log,"hts-cache/old.lst"));
- // renommer
- if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
- rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
- // ouvrir
- cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
- {
- filecreate_params tmp;
- strcpybuff(tmp.path,opt->path_html); // chemin
- tmp.lst=cache->lst; // fichier lst
- filenote("",&tmp); // initialiser filecreate
+ if (1) {
+ /* Create ZIP file cache */
+ cache->zipOutput = (void*) zipOpen(fconcat(opt->path_log,"hts-cache/new.zip"), 0);
+
+ if (cache->zipOutput != NULL) {
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpybuff(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
}
-
- // supprimer old.txt
- if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
- remove(fconcat(opt->path_log,"hts-cache/old.txt"));
- // renommer
- if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
- rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
- // ouvrir
- cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
- if (cache->txt) {
- fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
- fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ } else {
+ cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
+ cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
+ // les deux doivent être ouvrables
+ if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
+ fclose(cache->ndx);
+ cache->ndx=NULL;
+ }
+ if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
+ fclose(cache->dat);
+ cache->dat=NULL;
}
- // test
- // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ if (cache->ndx!=NULL) {
+ char s[256];
+
+ cache_wstr(cache->dat,"CACHE-1.5");
+ fflush(cache->dat);
+ cache_wstr(cache->ndx,"CACHE-1.5");
+ fflush(cache->ndx);
+ //
+ time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
+ cache_wstr(cache->ndx,s);
+ fflush(cache->ndx); // un petit fflush au cas où
+
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpybuff(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
+
+ // test
+ // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ }
}
} else {
@@ -906,12 +1595,11 @@ char* readfile_or(char* fil,char* defaultdata) {
int cache_wstr(FILE* fp,char* s) {
INTsys i;
char buff[256+4];
- i=strlen(s);
+ i = s != NULL ? strlen(s) : 0;
sprintf(buff,INTsysP "\n",i);
if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff))
return -1;
- if (i>0)
- if ((INTsys)fwrite(s,1,i,fp) != i)
+ if (i > 0 && (INTsys)fwrite(s,1,i,fp) != i)
return -1;
return 0;
}
@@ -922,10 +1610,34 @@ void cache_rstr(FILE* fp,char* s) {
sscanf(buff,INTsysP,&i);
if (i < 0 || i > 32768) /* error, something nasty happened */
i=0;
- if (i>0)
- fread(s,1,i,fp);
+ if (i>0) {
+ if ((int) fread(s,1,i,fp) != i) {
+ int fread_cache_failed = 0;
+ assertf(fread_cache_failed);
+ }
+ }
*(s+i)='\0';
}
+char* cache_rstr_addr(FILE* fp) {
+ INTsys i;
+ char* addr = NULL;
+ char buff[256+4];
+ linput(fp,buff,256);
+ sscanf(buff,INTsysP,&i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i=0;
+ if (i > 0) {
+ addr = malloct(i + 1);
+ if (addr != NULL) {
+ if ((int) fread(addr,1,i,fp) != i) {
+ int fread_cache_failed = 0;
+ assertf(fread_cache_failed);
+ }
+ *(addr+i)='\0';
+ }
+ }
+ return addr;
+}
int cache_brstr(char* adr,char* s) {
int i;
int off;