summaryrefslogtreecommitdiff
path: root/src/proxy/store.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/proxy/store.c')
-rw-r--r--src/proxy/store.c3554
1 files changed, 1847 insertions, 1707 deletions
diff --git a/src/proxy/store.c b/src/proxy/store.c
index e5d0c8f..2f2b92c 100644
--- a/src/proxy/store.c
+++ b/src/proxy/store.c
@@ -34,7 +34,7 @@ Please visit our Website: http://www.httrack.com
/* Locking */
#ifdef _WIN32
-#include <process.h> /* _beginthread, _endthread */
+#include <process.h> /* _beginthread, _endthread */
#else
#include <pthread.h>
#endif
@@ -57,49 +57,52 @@ Please visit our Website: http://www.httrack.com
/* Unlocked functions */
-static int PT_LookupCache__New_u(PT_Index index, const char* url);
-static PT_Element PT_ReadCache__New_u(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__New_u(PT_Index index, const char *url);
+static PT_Element PT_ReadCache__New_u(PT_Index index, const char *url,
+ int flags);
-static int PT_LookupCache__Old_u(PT_Index index, const char* url);
-static PT_Element PT_ReadCache__Old_u(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__Old_u(PT_Index index, const char *url);
+static PT_Element PT_ReadCache__Old_u(PT_Index index, const char *url,
+ int flags);
-static int PT_LookupCache__Arc_u(PT_Index index, const char* url);
-static PT_Element PT_ReadCache__Arc_u(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__Arc_u(PT_Index index, const char *url);
+static PT_Element PT_ReadCache__Arc_u(PT_Index index, const char *url,
+ int flags);
/* Locking */
#ifdef _WIN32
-void MutexInit(PT_Mutex *pMutex) {
- *pMutex = CreateMutex(NULL,FALSE,NULL);
+void MutexInit(PT_Mutex * pMutex) {
+ *pMutex = CreateMutex(NULL, FALSE, NULL);
}
-void MutexLock(PT_Mutex *pMutex) {
- WaitForSingleObject(*pMutex, INFINITE);
+void MutexLock(PT_Mutex * pMutex) {
+ WaitForSingleObject(*pMutex, INFINITE);
}
-void MutexUnlock(PT_Mutex *pMutex) {
- ReleaseMutex(*pMutex);
+void MutexUnlock(PT_Mutex * pMutex) {
+ ReleaseMutex(*pMutex);
}
-void MutexFree(PT_Mutex *pMutex) {
- CloseHandle(*pMutex);
- *pMutex = NULL;
+void MutexFree(PT_Mutex * pMutex) {
+ CloseHandle(*pMutex);
+ *pMutex = NULL;
}
#else
-void MutexInit(PT_Mutex *pMutex) {
- (void) pthread_mutex_init(pMutex, 0);
+void MutexInit(PT_Mutex * pMutex) {
+ (void) pthread_mutex_init(pMutex, 0);
}
-void MutexLock(PT_Mutex *pMutex) {
- pthread_mutex_lock(pMutex);
+void MutexLock(PT_Mutex * pMutex) {
+ pthread_mutex_lock(pMutex);
}
-void MutexUnlock(PT_Mutex *pMutex) {
- pthread_mutex_unlock(pMutex);
+void MutexUnlock(PT_Mutex * pMutex) {
+ pthread_mutex_unlock(pMutex);
}
-void MutexFree(PT_Mutex *pMutex) {
- pthread_mutex_destroy(pMutex);
+void MutexFree(PT_Mutex * pMutex) {
+ pthread_mutex_destroy(pMutex);
}
#endif
@@ -116,47 +119,48 @@ typedef struct _PT_Index__Arc *PT_Index__Arc;
typedef struct _PT_Index_Functions *PT_Index_Functions;
enum {
- PT_CACHE_UNDEFINED = -1,
- PT_CACHE_MIN = 0,
- PT_CACHE__NEW = PT_CACHE_MIN,
- PT_CACHE__OLD,
- PT_CACHE__ARC,
- PT_CACHE_MAX = PT_CACHE__ARC
+ PT_CACHE_UNDEFINED = -1,
+ PT_CACHE_MIN = 0,
+ PT_CACHE__NEW = PT_CACHE_MIN,
+ PT_CACHE__OLD,
+ PT_CACHE__ARC,
+ PT_CACHE_MAX = PT_CACHE__ARC
};
static int PT_LoadCache__New(PT_Index index, const char *filename);
-static void PT_Index_Delete__New(PT_Index *pindex);
-static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags);
-static int PT_LookupCache__New(PT_Index index, const char* url);
+static void PT_Index_Delete__New(PT_Index * pindex);
+static PT_Element PT_ReadCache__New(PT_Index index, const char *url, int flags);
+static int PT_LookupCache__New(PT_Index index, const char *url);
static int PT_SaveCache__New(PT_Indexes indexes, const char *filename);
-/**/
-static int PT_LoadCache__Old(PT_Index index, const char *filename);
-static void PT_Index_Delete__Old(PT_Index *pindex);
-static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags);
-static int PT_LookupCache__Old(PT_Index index, const char* url);
-/**/
-static int PT_LoadCache__Arc(PT_Index index, const char *filename);
-static void PT_Index_Delete__Arc(PT_Index *pindex);
-static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags);
-static int PT_LookupCache__Arc(PT_Index index, const char* url);
+ /**/ static int PT_LoadCache__Old(PT_Index index, const char *filename);
+static void PT_Index_Delete__Old(PT_Index * pindex);
+static PT_Element PT_ReadCache__Old(PT_Index index, const char *url, int flags);
+static int PT_LookupCache__Old(PT_Index index, const char *url);
+ /**/ static int PT_LoadCache__Arc(PT_Index index, const char *filename);
+static void PT_Index_Delete__Arc(PT_Index * pindex);
+static PT_Element PT_ReadCache__Arc(PT_Index index, const char *url, int flags);
+static int PT_LookupCache__Arc(PT_Index index, const char *url);
static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename);
struct _PT_Index_Functions {
- /* Mandatory services */
- int (*PT_LoadCache)(PT_Index index, const char *filename);
- void (*PT_Index_Delete)(PT_Index *pindex);
- PT_Element (*PT_ReadCache)(PT_Index index, const char* url, int flags);
- int (*PT_LookupCache)(PT_Index index, const char* url);
-
- /* Optional services */
- int (*PT_SaveCache)(PT_Indexes indexes, const char *filename);
+ /* Mandatory services */
+ int (*PT_LoadCache) (PT_Index index, const char *filename);
+ void (*PT_Index_Delete) (PT_Index * pindex);
+ PT_Element(*PT_ReadCache) (PT_Index index, const char *url, int flags);
+ int (*PT_LookupCache) (PT_Index index, const char *url);
+
+ /* Optional services */
+ int (*PT_SaveCache) (PT_Indexes indexes, const char *filename);
};
static _PT_Index_Functions _IndexFuncts[] = {
- { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New, PT_SaveCache__New },
- { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old, NULL },
- { PT_LoadCache__Arc, PT_Index_Delete__Arc, PT_ReadCache__Arc, PT_LookupCache__Arc, PT_SaveCache__Arc },
- { NULL, NULL, NULL, NULL }
+ {PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New,
+ PT_LookupCache__New, PT_SaveCache__New},
+ {PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old,
+ PT_LookupCache__Old, NULL},
+ {PT_LoadCache__Arc, PT_Index_Delete__Arc, PT_ReadCache__Arc,
+ PT_LookupCache__Arc, PT_SaveCache__Arc},
+ {NULL, NULL, NULL, NULL}
};
#define PT_INDEX_COMMON_STRUCTURE \
@@ -165,90 +169,91 @@ static _PT_Index_Functions _IndexFuncts[] = {
char startUrl[1024]
struct _PT_Index__New {
- PT_INDEX_COMMON_STRUCTURE;
- char path[1024]; /* either empty, or must include ending / */
- int fixedPath;
- int safeCache;
- unzFile zFile;
- PT_Mutex zFileLock;
+ PT_INDEX_COMMON_STRUCTURE;
+ char path[1024]; /* either empty, or must include ending / */
+ int fixedPath;
+ int safeCache;
+ unzFile zFile;
+ PT_Mutex zFileLock;
};
struct _PT_Index__Old {
- PT_INDEX_COMMON_STRUCTURE;
- char filenameDat[1024];
- char filenameNdx[1024];
- FILE *dat,*ndx;
- PT_Mutex fileLock;
- int version;
- char lastmodified[1024];
- char path[1024]; /* either empty, or must include ending / */
- int fixedPath;
- int safeCache;
+ PT_INDEX_COMMON_STRUCTURE;
+ char filenameDat[1024];
+ char filenameNdx[1024];
+ FILE *dat, *ndx;
+ PT_Mutex fileLock;
+ int version;
+ char lastmodified[1024];
+ char path[1024]; /* either empty, or must include ending / */
+ int fixedPath;
+ int safeCache;
};
struct _PT_Index__Arc {
- PT_INDEX_COMMON_STRUCTURE;
- FILE *file;
- PT_Mutex fileLock;
- int version;
- char lastmodified[1024];
- char line[2048];
- char filenameIndexBuff[2048];
+ PT_INDEX_COMMON_STRUCTURE;
+ FILE *file;
+ PT_Mutex fileLock;
+ int version;
+ char lastmodified[1024];
+ char line[2048];
+ char filenameIndexBuff[2048];
};
struct _PT_Index {
- int type;
- union {
- _PT_Index__New formatNew;
- _PT_Index__Old formatOld;
- _PT_Index__Arc formatArc;
- struct {
- PT_INDEX_COMMON_STRUCTURE;
- } common;
- } slots;
+ int type;
+ union {
+ _PT_Index__New formatNew;
+ _PT_Index__Old formatOld;
+ _PT_Index__Arc formatArc;
+ struct {
+ PT_INDEX_COMMON_STRUCTURE;
+ } common;
+ } slots;
};
struct _PT_Indexes {
- inthash cil;
- struct _PT_Index **index;
- int index_size;
+ inthash cil;
+ struct _PT_Index **index;
+ int index_size;
};
struct _PT_CacheItem {
- time_t lastUsed;
- size_t size;
- void* data;
+ time_t lastUsed;
+ size_t size;
+ void *data;
};
struct _PT_Cache {
- inthash index;
- size_t maxSize;
- size_t totalSize;
- int count;
+ inthash index;
+ size_t maxSize;
+ size_t totalSize;
+ int count;
};
PT_Indexes PT_New(void) {
- PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1);
- index->cil = inthash_new(127);
- index->index_size = 0;
- index->index = NULL;
- return index;
+ PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1);
+
+ index->cil = inthash_new(127);
+ index->index_size = 0;
+ index->index = NULL;
+ return index;
}
void PT_Delete(PT_Indexes index) {
- if (index != NULL) {
- inthash_delete(&index->cil);
- free(index);
- }
+ if (index != NULL) {
+ inthash_delete(&index->cil);
+ free(index);
+ }
}
int PT_RemoveIndex(PT_Indexes index, int indexId) {
- return 0;
+ return 0;
}
#define assertf(exp)
-static int binput(char* buff,char* s,int max) {
+static int binput(char *buff, char *s, int max) {
int count = 0;
int destCount = 0;
@@ -257,7 +262,7 @@ static int binput(char* buff,char* s,int max) {
if (buff[count] != '\r') {
s[destCount++] = buff[count];
}
- count++;
+ count++;
}
s[destCount] = '\0';
@@ -265,529 +270,573 @@ static int binput(char* buff,char* s,int max) {
return count + 1;
}
-static time_t file_timestamp(const char* file) {
+static time_t file_timestamp(const char *file) {
struct stat buf;
+
if (stat(file, &buf) == 0) {
time_t tt = buf.st_mtime;
- if (tt != (time_t) 0 && tt != (time_t) -1) {
- return tt;
- }
+
+ if (tt != (time_t) 0 && tt != (time_t) - 1) {
+ return tt;
+ }
}
return (time_t) 0;
}
-static int PT_Index_Check__(PT_Index index, const char* file, int line) {
- if (index == NULL)
- return 0;
- if (index->type >= PT_CACHE_MIN && index->type <= PT_CACHE_MAX)
- return 1;
- proxytrack_print_log(CRITICAL, "index corrupted in memory at %s:%d", file, line);
- return 0;
+static int PT_Index_Check__(PT_Index index, const char *file, int line) {
+ if (index == NULL)
+ return 0;
+ if (index->type >= PT_CACHE_MIN && index->type <= PT_CACHE_MAX)
+ return 1;
+ proxytrack_print_log(CRITICAL, "index corrupted in memory at %s:%d", file,
+ line);
+ return 0;
}
-#define SAFE_INDEX(index) PT_Index_Check__(index, __FILE__, __LINE__)
+#define SAFE_INDEX(index) PT_Index_Check__(index, __FILE__, __LINE__)
/* ------------------------------------------------------------ */
/* Generic cache dispatch */
/* ------------------------------------------------------------ */
-void PT_Index_Delete(PT_Index *pindex) {
- if (pindex != NULL && (*pindex) != NULL) {
- PT_Index index = *pindex;
- if (SAFE_INDEX(index)) {
- _IndexFuncts[index->type].PT_Index_Delete(pindex);
- }
- free(index);
- *pindex = NULL;
- }
-}
-
-static void PT_Index_Delete__New(PT_Index *pindex) {
- if (pindex != NULL && (*pindex) != NULL) {
- PT_Index__New index = &(*pindex)->slots.formatNew;
- if (index->zFile != NULL) {
- unzClose(index->zFile);
- index->zFile = NULL;
- }
- if (index->hash != NULL) {
- inthash_delete(&index->hash);
- index->hash = NULL;
- }
- MutexFree(&index->zFileLock);
- }
-}
-
-static void PT_Index_Delete__Old(PT_Index *pindex) {
- if (pindex != NULL && (*pindex) != NULL) {
- PT_Index__Old index = &(*pindex)->slots.formatOld;
- if (index->dat != NULL) {
- fclose(index->dat);
- }
- if (index->ndx != NULL) {
- fclose(index->ndx);
- }
- if (index->hash != NULL) {
- inthash_delete(&index->hash);
- index->hash = NULL;
- }
- MutexFree(&index->fileLock);
- }
-}
-
-static void PT_Index_Delete__Arc(PT_Index *pindex) {
- if (pindex != NULL && (*pindex) != NULL) {
- PT_Index__Arc index = &(*pindex)->slots.formatArc;
- if (index->file != NULL) {
- fclose(index->file);
- }
- MutexFree(&index->fileLock);
- }
+void PT_Index_Delete(PT_Index * pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index index = *pindex;
+
+ if (SAFE_INDEX(index)) {
+ _IndexFuncts[index->type].PT_Index_Delete(pindex);
+ }
+ free(index);
+ *pindex = NULL;
+ }
+}
+
+static void PT_Index_Delete__New(PT_Index * pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index__New index = &(*pindex)->slots.formatNew;
+
+ if (index->zFile != NULL) {
+ unzClose(index->zFile);
+ index->zFile = NULL;
+ }
+ if (index->hash != NULL) {
+ inthash_delete(&index->hash);
+ index->hash = NULL;
+ }
+ MutexFree(&index->zFileLock);
+ }
+}
+
+static void PT_Index_Delete__Old(PT_Index * pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index__Old index = &(*pindex)->slots.formatOld;
+
+ if (index->dat != NULL) {
+ fclose(index->dat);
+ }
+ if (index->ndx != NULL) {
+ fclose(index->ndx);
+ }
+ if (index->hash != NULL) {
+ inthash_delete(&index->hash);
+ index->hash = NULL;
+ }
+ MutexFree(&index->fileLock);
+ }
+}
+
+static void PT_Index_Delete__Arc(PT_Index * pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index__Arc index = &(*pindex)->slots.formatArc;
+
+ if (index->file != NULL) {
+ fclose(index->file);
+ }
+ MutexFree(&index->fileLock);
+ }
}
int PT_AddIndex(PT_Indexes indexes, const char *path) {
- PT_Index index = PT_LoadCache(path);
- if (index != NULL) {
- int ret = PT_IndexMerge(indexes, &index);
- if (index != NULL) {
- PT_Index_Delete(&index);
- }
- return ret;
- }
- return -1;
+ PT_Index index = PT_LoadCache(path);
+
+ if (index != NULL) {
+ int ret = PT_IndexMerge(indexes, &index);
+
+ if (index != NULL) {
+ PT_Index_Delete(&index);
+ }
+ return ret;
+ }
+ return -1;
}
PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) {
- if (indexes != NULL) {
- PT_Element elt = PT_ElementNew();
- int i;
- String html = STRING_EMPTY;
- StringClear(html);
- StringCat(html,
- "<html>"
- PROXYTRACK_COMMENT_HEADER
- DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
- "<head>\r\n"
- "<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>"
- "</head>\r\n"
- "<body>\r\n"
- "<h3>Available sites in this cache:</h3><br />"
- "<br />"
- );
- StringCat(html, "<ul>\r\n");
- for(i = 0 ; i < indexes->index_size ; i++) {
- if (indexes->index[i] != NULL
- && indexes->index[i]->slots.common.startUrl[0] != '\0')
- {
- const char * url = indexes->index[i]->slots.common.startUrl;
- StringCat(html, "<li>\r\n");
- StringCat(html, "<a href=\"");
- StringCat(html, url);
- StringCat(html, "\">");
- StringCat(html, url);
- StringCat(html, "</a>\r\n");
- StringCat(html, "</li>\r\n");
- }
- }
- StringCat(html, "</ul>\r\n");
- StringCat(html, "</body></html>\r\n");
- elt->size = StringLength(html);
- elt->adr = StringAcquire(&html);
- elt->statuscode = HTTP_OK;
- strcpy(elt->charset, "iso-8859-1");
- strcpy(elt->contenttype, "text/html");
- strcpy(elt->msg, "OK");
- StringFree(html);
- return elt;
- }
- return NULL;
-}
-
-static char* strchr_stop(char* str, char c, char stop) {
- for( ; *str != 0 && *str != stop && *str != c ; str++);
- if (*str == c)
- return str;
- return NULL;
-}
-
-char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
- // should be cached!
- if (indexes != NULL && indexes->cil != NULL) {
- unsigned int urlSize;
- String list = STRING_EMPTY;
- String listindexes = STRING_EMPTY;
- String subitem = STRING_EMPTY;
- unsigned int listCount = 0;
- struct_inthash_enum en = inthash_enum_new(indexes->cil);
- inthash_chain* chain;
- inthash hdupes = NULL;
- if (!subtree)
- hdupes= inthash_new(127);
- StringClear(list);
- StringClear(listindexes);
- StringClear(subitem);
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- urlSize = (unsigned int) strlen(url);
- while((chain = inthash_enum_next(&en))) {
- long int index = (long int)chain->value.intg;
- if (urlSize == 0 || strncmp(chain->name, url, urlSize) == 0) {
- if (index >= 0 && index < indexes->index_size) {
- char * item = chain->name + urlSize;
- if (*item == '/')
- item++;
- {
- char * pos = subtree ? 0 : strchr_stop(item, '/', '?');
- unsigned int len = pos ? (unsigned int)( pos - item ) : (unsigned int)strlen(item);
- if (len > 0 /* default document */ || *item == 0) {
- int isFolder = ( item[len] == '/' );
- StringClear(subitem);
- if (len > 0)
- StringMemcat(subitem, item, len);
- if (len == 0 || !inthash_exists(hdupes, StringBuff(subitem))) {
- char* ptr = NULL;
- ptr += StringLength(list);
- if (len > 0)
- StringCat(list, StringBuff(subitem));
- if (isFolder)
- StringCat(list, "/");
- StringMemcat(list, "\0", 1); /* NULL terminated strings */
- StringMemcat(listindexes, &ptr, sizeof(ptr));
- listCount++;
- inthash_write(hdupes, StringBuff(subitem), 0);
- }
- }
- }
- } else {
- proxytrack_print_log(CRITICAL, "PT_Enumerate:Corrupted central index locator");
- }
- }
- }
- StringFree(subitem);
- inthash_delete(&hdupes);
- if (listCount > 0) {
- unsigned int i;
- void* blk;
- char *nullPointer = NULL;
- char* startStrings;
- /* NULL terminated index */
- StringMemcat(listindexes, &nullPointer, sizeof(nullPointer));
- /* start of all strings (index) */
- startStrings = nullPointer + StringLength(listindexes);
- /* copy list of URLs after indexes */
- StringMemcat(listindexes, StringBuff(list), StringLength(list));
- /* ---- no reallocation beyond this point (fixed addresses) ---- */
- /* start of all strings (pointer) */
- startStrings = (startStrings - nullPointer) + StringBuffRW(listindexes);
- /* transform indexes into references */
- for(i = 0 ; i < listCount ; i++) {
- char *ptr = NULL;
- unsigned int ndx;
- memcpy(&ptr, &StringBuff(listindexes)[i*sizeof(char*)], sizeof(char*));
- ndx = (unsigned int) (ptr - nullPointer);
- ptr = startStrings + ndx;
- memcpy(&StringBuffRW(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*));
- }
- blk = StringAcquire(&listindexes);
- StringFree(list);
- StringFree(listindexes);
- return (char **)blk;
- }
- }
- return NULL;
+ if (indexes != NULL) {
+ PT_Element elt = PT_ElementNew();
+ int i;
+ String html = STRING_EMPTY;
+
+ StringClear(html);
+ StringCat(html,
+ "<html>" PROXYTRACK_COMMENT_HEADER
+ DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES "<head>\r\n"
+ "<title>ProxyTrack " PROXYTRACK_VERSION " Catalog</title>"
+ "</head>\r\n" "<body>\r\n"
+ "<h3>Available sites in this cache:</h3><br />" "<br />");
+ StringCat(html, "<ul>\r\n");
+ for(i = 0; i < indexes->index_size; i++) {
+ if (indexes->index[i] != NULL
+ && indexes->index[i]->slots.common.startUrl[0] != '\0') {
+ const char *url = indexes->index[i]->slots.common.startUrl;
+
+ StringCat(html, "<li>\r\n");
+ StringCat(html, "<a href=\"");
+ StringCat(html, url);
+ StringCat(html, "\">");
+ StringCat(html, url);
+ StringCat(html, "</a>\r\n");
+ StringCat(html, "</li>\r\n");
+ }
+ }
+ StringCat(html, "</ul>\r\n");
+ StringCat(html, "</body></html>\r\n");
+ elt->size = StringLength(html);
+ elt->adr = StringAcquire(&html);
+ elt->statuscode = HTTP_OK;
+ strcpy(elt->charset, "iso-8859-1");
+ strcpy(elt->contenttype, "text/html");
+ strcpy(elt->msg, "OK");
+ StringFree(html);
+ return elt;
+ }
+ return NULL;
+}
+
+static char *strchr_stop(char *str, char c, char stop) {
+ for(; *str != 0 && *str != stop && *str != c; str++) ;
+ if (*str == c)
+ return str;
+ return NULL;
+}
+
+char **PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
+ // should be cached!
+ if (indexes != NULL && indexes->cil != NULL) {
+ unsigned int urlSize;
+ String list = STRING_EMPTY;
+ String listindexes = STRING_EMPTY;
+ String subitem = STRING_EMPTY;
+ unsigned int listCount = 0;
+ struct_inthash_enum en = inthash_enum_new(indexes->cil);
+ inthash_chain *chain;
+ inthash hdupes = NULL;
+
+ if (!subtree)
+ hdupes = inthash_new(127);
+ StringClear(list);
+ StringClear(listindexes);
+ StringClear(subitem);
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ urlSize = (unsigned int) strlen(url);
+ while((chain = inthash_enum_next(&en))) {
+ long int index = (long int) chain->value.intg;
+
+ if (urlSize == 0 || strncmp(chain->name, url, urlSize) == 0) {
+ if (index >= 0 && index < indexes->index_size) {
+ char *item = chain->name + urlSize;
+
+ if (*item == '/')
+ item++;
+ {
+ char *pos = subtree ? 0 : strchr_stop(item, '/', '?');
+ unsigned int len =
+ pos ? (unsigned int) (pos - item) : (unsigned int) strlen(item);
+ if (len > 0 /* default document */ || *item == 0) {
+ int isFolder = (item[len] == '/');
+
+ StringClear(subitem);
+ if (len > 0)
+ StringMemcat(subitem, item, len);
+ if (len == 0 || !inthash_exists(hdupes, StringBuff(subitem))) {
+ char *ptr = NULL;
+
+ ptr += StringLength(list);
+ if (len > 0)
+ StringCat(list, StringBuff(subitem));
+ if (isFolder)
+ StringCat(list, "/");
+ StringMemcat(list, "\0", 1); /* NULL terminated strings */
+ StringMemcat(listindexes, &ptr, sizeof(ptr));
+ listCount++;
+ inthash_write(hdupes, StringBuff(subitem), 0);
+ }
+ }
+ }
+ } else {
+ proxytrack_print_log(CRITICAL,
+ "PT_Enumerate:Corrupted central index locator");
+ }
+ }
+ }
+ StringFree(subitem);
+ inthash_delete(&hdupes);
+ if (listCount > 0) {
+ unsigned int i;
+ void *blk;
+ char *nullPointer = NULL;
+ char *startStrings;
+
+ /* NULL terminated index */
+ StringMemcat(listindexes, &nullPointer, sizeof(nullPointer));
+ /* start of all strings (index) */
+ startStrings = nullPointer + StringLength(listindexes);
+ /* copy list of URLs after indexes */
+ StringMemcat(listindexes, StringBuff(list), StringLength(list));
+ /* ---- no reallocation beyond this point (fixed addresses) ---- */
+ /* start of all strings (pointer) */
+ startStrings = (startStrings - nullPointer) + StringBuffRW(listindexes);
+ /* transform indexes into references */
+ for(i = 0; i < listCount; i++) {
+ char *ptr = NULL;
+ unsigned int ndx;
+
+ memcpy(&ptr, &StringBuff(listindexes)[i * sizeof(char *)],
+ sizeof(char *));
+ ndx = (unsigned int) (ptr - nullPointer);
+ ptr = startStrings + ndx;
+ memcpy(&StringBuffRW(listindexes)[i * sizeof(char *)], &ptr,
+ sizeof(char *));
+ }
+ blk = StringAcquire(&listindexes);
+ StringFree(list);
+ StringFree(listindexes);
+ return (char **) blk;
+ }
+ }
+ return NULL;
}
void PT_Enumerate_Delete(char ***plist) {
- if (plist != NULL && *plist != NULL) {
- free(*plist);
- *plist = NULL;
- }
+ if (plist != NULL && *plist != NULL) {
+ free(*plist);
+ *plist = NULL;
+ }
}
static int PT_GetType(const char *filename) {
- char * dot = strrchr(filename, '.');
- if (dot != NULL) {
- if (strcasecmp(dot, ".zip") == 0) {
- return PT_CACHE__NEW;
- } else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) {
- return PT_CACHE__OLD;
- } else if (strcasecmp(dot, ".arc") == 0) {
- return PT_CACHE__ARC;
- }
- }
- return PT_CACHE_UNDEFINED;
+ char *dot = strrchr(filename, '.');
+
+ if (dot != NULL) {
+ if (strcasecmp(dot, ".zip") == 0) {
+ return PT_CACHE__NEW;
+ } else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) {
+ return PT_CACHE__OLD;
+ } else if (strcasecmp(dot, ".arc") == 0) {
+ return PT_CACHE__ARC;
+ }
+ }
+ return PT_CACHE_UNDEFINED;
}
PT_Index PT_LoadCache(const char *filename) {
- int type = PT_GetType(filename);
- if (type != PT_CACHE_UNDEFINED) {
- PT_Index index = calloc(sizeof(_PT_Index), 1);
- if (index != NULL) {
- index->type = type;
- index->slots.common.timestamp = (time_t) time(NULL);
- index->slots.common.startUrl[0] = '\0';
- index->slots.common.hash = inthash_new(8191);
- if (!_IndexFuncts[type].PT_LoadCache(index, filename)) {
- proxytrack_print_log(DEBUG, "reading httrack cache (format #%d) %s : error", type, filename );
- free(index);
- index = NULL;
- return NULL;
- } else {
- proxytrack_print_log(DEBUG, "reading httrack cache (format #%d) %s : success", type, filename );
- }
- /* default starting URL is the first hash entry */
- if (index->slots.common.startUrl[0] == '\0') {
- struct_inthash_enum en = inthash_enum_new(index->slots.common.hash);
- inthash_chain* chain;
- chain = inthash_enum_next(&en);
- if (chain != NULL
- && strstr(chain->name, "/robots.txt") != NULL)
- {
- chain = inthash_enum_next(&en);
- }
- if (chain != NULL) {
- if (!link_has_authority(chain->name))
- strcat(index->slots.common.startUrl, "http://");
- strcat(index->slots.common.startUrl, chain->name);
- }
- }
- }
- return index;
- }
- return NULL;
-}
-
-
-static long int filesize(const char* filename) {
+ int type = PT_GetType(filename);
+
+ if (type != PT_CACHE_UNDEFINED) {
+ PT_Index index = calloc(sizeof(_PT_Index), 1);
+
+ if (index != NULL) {
+ index->type = type;
+ index->slots.common.timestamp = (time_t) time(NULL);
+ index->slots.common.startUrl[0] = '\0';
+ index->slots.common.hash = inthash_new(8191);
+ if (!_IndexFuncts[type].PT_LoadCache(index, filename)) {
+ proxytrack_print_log(DEBUG,
+ "reading httrack cache (format #%d) %s : error",
+ type, filename);
+ free(index);
+ index = NULL;
+ return NULL;
+ } else {
+ proxytrack_print_log(DEBUG,
+ "reading httrack cache (format #%d) %s : success",
+ type, filename);
+ }
+ /* default starting URL is the first hash entry */
+ if (index->slots.common.startUrl[0] == '\0') {
+ struct_inthash_enum en = inthash_enum_new(index->slots.common.hash);
+ inthash_chain *chain;
+
+ chain = inthash_enum_next(&en);
+ if (chain != NULL && strstr(chain->name, "/robots.txt") != NULL) {
+ chain = inthash_enum_next(&en);
+ }
+ if (chain != NULL) {
+ if (!link_has_authority(chain->name))
+ strcat(index->slots.common.startUrl, "http://");
+ strcat(index->slots.common.startUrl, chain->name);
+ }
+ }
+ }
+ return index;
+ }
+ return NULL;
+}
+
+static long int filesize(const char *filename) {
struct stat st;
+
memset(&st, 0, sizeof(st));
if (stat(filename, &st) == 0) {
- return (long int)st.st_size;
+ return (long int) st.st_size;
}
return -1;
-}
+}
-int PT_LookupCache(PT_Index index, const char* url) {
- if (index != NULL && SAFE_INDEX(index)) {
- return _IndexFuncts[index->type].PT_LookupCache(index, url);
- }
- return 0;
+int PT_LookupCache(PT_Index index, const char *url) {
+ if (index != NULL && SAFE_INDEX(index)) {
+ return _IndexFuncts[index->type].PT_LookupCache(index, url);
+ }
+ return 0;
}
int PT_SaveCache(PT_Indexes indexes, const char *filename) {
- int type = PT_GetType(filename);
- if (type != PT_CACHE_UNDEFINED) {
- if (_IndexFuncts[type].PT_SaveCache != NULL) {
- int ret = _IndexFuncts[type].PT_SaveCache(indexes, filename);
- if (ret == 0) {
- (void) set_filetime_time_t(filename, PT_GetTimeIndex(indexes));
- return 0;
- }
- }
- }
- return -1;
-}
-
-int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg) {
- if (indexes != NULL && indexes->cil != NULL) {
- struct_inthash_enum en = inthash_enum_new(indexes->cil);
- inthash_chain* chain;
- while((chain = inthash_enum_next(&en))) {
- const long int index_id = (long int)chain->value.intg;
- const char *const url = chain->name;
- if (index_id >= 0 && index_id <= indexes->index_size) {
- PT_Element item = PT_ReadCache(indexes->index[index_id], url, FETCH_HEADERS | FETCH_BODY);
- if (item != NULL) {
- int ret = callback(arg, url, item);
- PT_Element_Delete(&item);
- if (ret != 0)
- return ret;
- }
- } else {
- proxytrack_print_log(CRITICAL, "PT_ReadCache:Corrupted central index locator");
- return -1;
- }
- }
- }
- return 0;
+ int type = PT_GetType(filename);
+
+ if (type != PT_CACHE_UNDEFINED) {
+ if (_IndexFuncts[type].PT_SaveCache != NULL) {
+ int ret = _IndexFuncts[type].PT_SaveCache(indexes, filename);
+
+ if (ret == 0) {
+ (void) set_filetime_time_t(filename, PT_GetTimeIndex(indexes));
+ return 0;
+ }
+ }
+ }
+ return -1;
+}
+
+int PT_EnumCache(PT_Indexes indexes,
+ int (*callback) (void *, const char *url, PT_Element),
+ void *arg) {
+ if (indexes != NULL && indexes->cil != NULL) {
+ struct_inthash_enum en = inthash_enum_new(indexes->cil);
+ inthash_chain *chain;
+
+ while((chain = inthash_enum_next(&en))) {
+ const long int index_id = (long int) chain->value.intg;
+ const char *const url = chain->name;
+
+ if (index_id >= 0 && index_id <= indexes->index_size) {
+ PT_Element item =
+ PT_ReadCache(indexes->index[index_id], url,
+ FETCH_HEADERS | FETCH_BODY);
+ if (item != NULL) {
+ int ret = callback(arg, url, item);
+
+ PT_Element_Delete(&item);
+ if (ret != 0)
+ return ret;
+ }
+ } else {
+ proxytrack_print_log(CRITICAL,
+ "PT_ReadCache:Corrupted central index locator");
+ return -1;
+ }
+ }
+ }
+ return 0;
}
time_t PT_Index_Timestamp(PT_Index index) {
- return index->slots.common.timestamp;
-}
-
-static int PT_LookupCache__New(PT_Index index, const char* url) {
- int retCode;
- MutexLock(&index->slots.formatNew.zFileLock);
- {
- retCode = PT_LookupCache__New_u(index, url);
- }
- MutexUnlock(&index->slots.formatNew.zFileLock);
- return retCode;
-}
-
-static int PT_LookupCache__New_u(PT_Index index_, const char* url) {
- if (index_ != NULL) {
- PT_Index__New index = &index_->slots.formatNew;
- if (index->hash != NULL && index->zFile != NULL && url != NULL && *url != 0) {
- int hash_pos_return;
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- hash_pos_return = inthash_read(index->hash, url, NULL);
- if (hash_pos_return)
- return 1;
- }
- }
- return 0;
-}
-
-int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex)
-{
- if (pindex != NULL && *pindex != NULL && (*pindex)->slots.common.hash != NULL
- && indexes != NULL)
- {
- PT_Index index = *pindex;
- struct_inthash_enum en = inthash_enum_new(index->slots.common.hash);
- inthash_chain* chain;
- int index_id = indexes->index_size++;
- int nMerged = 0;
- if ((indexes->index = realloc(indexes->index, sizeof(struct _PT_Index)*indexes->index_size)) != NULL) {
- indexes->index[index_id] = index;
- *pindex = NULL;
- while((chain = inthash_enum_next(&en)) != NULL) {
- const char * url = chain->name;
- if (url != NULL && url[0] != '\0') {
- intptr_t previous_index_id = 0;
- if (inthash_read(indexes->cil, url, &previous_index_id)) {
- if (previous_index_id >= 0 && previous_index_id < indexes->index_size) {
- if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer
- break;
- } else {
- proxytrack_print_log(CRITICAL, "PT_IndexMerge:Corrupted central index locator");
- }
- }
- inthash_write(indexes->cil, chain->name, index_id);
- nMerged++;
- }
- }
- } else {
- proxytrack_print_log(CRITICAL, "PT_IndexMerge:Memory exhausted");
- }
- return nMerged;
- }
- return -1;
-}
-
-void PT_Element_Delete(PT_Element *pentry) {
- if (pentry != NULL) {
- PT_Element entry = *pentry;
- if (entry != NULL) {
- if (entry->adr != NULL) {
- free(entry->adr);
- entry->adr = NULL;
- }
- if (entry->headers != NULL) {
- free(entry->headers);
- entry->headers = NULL;
- }
- if (entry->location != NULL) {
- free(entry->location);
- entry->location = NULL;
- }
- free(entry);
- }
- *pentry = NULL;
- }
-}
-
-PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags)
-{
- if (indexes != NULL)
- {
- intptr_t index_id;
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- if (inthash_read(indexes->cil, url, &index_id)) {
- if (index_id >= 0 && index_id <= indexes->index_size) {
- PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags);
- if (item != NULL) {
- item->indexId = (int) index_id;
- return item;
- }
- } else {
- proxytrack_print_log(CRITICAL, "PT_ReadCache:Corrupted central index locator");
- }
- }
- }
- return NULL;
-}
-
-int PT_LookupIndex(PT_Indexes indexes, const char* url) {
- if (indexes != NULL)
- {
- intptr_t index_id;
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- if (inthash_read(indexes->cil, url, &index_id)) {
- if (index_id >= 0 && index_id <= indexes->index_size) {
- return 1;
- } else {
- proxytrack_print_log(CRITICAL, "PT_ReadCache:Corrupted central index locator");
- }
- }
- }
- return 0;
+ return index->slots.common.timestamp;
+}
+
+static int PT_LookupCache__New(PT_Index index, const char *url) {
+ int retCode;
+
+ MutexLock(&index->slots.formatNew.zFileLock);
+ {
+ retCode = PT_LookupCache__New_u(index, url);
+ }
+ MutexUnlock(&index->slots.formatNew.zFileLock);
+ return retCode;
+}
+
+static int PT_LookupCache__New_u(PT_Index index_, const char *url) {
+ if (index_ != NULL) {
+ PT_Index__New index = &index_->slots.formatNew;
+
+ if (index->hash != NULL && index->zFile != NULL && url != NULL && *url != 0) {
+ int hash_pos_return;
+
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ hash_pos_return = inthash_read(index->hash, url, NULL);
+ if (hash_pos_return)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int PT_IndexMerge(PT_Indexes indexes, PT_Index * pindex) {
+ if (pindex != NULL && *pindex != NULL && (*pindex)->slots.common.hash != NULL
+ && indexes != NULL) {
+ PT_Index index = *pindex;
+ struct_inthash_enum en = inthash_enum_new(index->slots.common.hash);
+ inthash_chain *chain;
+ int index_id = indexes->index_size++;
+ int nMerged = 0;
+
+ if ((indexes->index =
+ realloc(indexes->index,
+ sizeof(struct _PT_Index) * indexes->index_size)) != NULL) {
+ indexes->index[index_id] = index;
+ *pindex = NULL;
+ while((chain = inthash_enum_next(&en)) != NULL) {
+ const char *url = chain->name;
+
+ if (url != NULL && url[0] != '\0') {
+ intptr_t previous_index_id = 0;
+
+ if (inthash_read(indexes->cil, url, &previous_index_id)) {
+ if (previous_index_id >= 0
+ && previous_index_id < indexes->index_size) {
+ if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer
+ break;
+ } else {
+ proxytrack_print_log(CRITICAL,
+ "PT_IndexMerge:Corrupted central index locator");
+ }
+ }
+ inthash_write(indexes->cil, chain->name, index_id);
+ nMerged++;
+ }
+ }
+ } else {
+ proxytrack_print_log(CRITICAL, "PT_IndexMerge:Memory exhausted");
+ }
+ return nMerged;
+ }
+ return -1;
+}
+
+void PT_Element_Delete(PT_Element * pentry) {
+ if (pentry != NULL) {
+ PT_Element entry = *pentry;
+
+ if (entry != NULL) {
+ if (entry->adr != NULL) {
+ free(entry->adr);
+ entry->adr = NULL;
+ }
+ if (entry->headers != NULL) {
+ free(entry->headers);
+ entry->headers = NULL;
+ }
+ if (entry->location != NULL) {
+ free(entry->location);
+ entry->location = NULL;
+ }
+ free(entry);
+ }
+ *pentry = NULL;
+ }
+}
+
+PT_Element PT_ReadIndex(PT_Indexes indexes, const char *url, int flags) {
+ if (indexes != NULL) {
+ intptr_t index_id;
+
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(indexes->cil, url, &index_id)) {
+ if (index_id >= 0 && index_id <= indexes->index_size) {
+ PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags);
+
+ if (item != NULL) {
+ item->indexId = (int) index_id;
+ return item;
+ }
+ } else {
+ proxytrack_print_log(CRITICAL,
+ "PT_ReadCache:Corrupted central index locator");
+ }
+ }
+ }
+ return NULL;
+}
+
+int PT_LookupIndex(PT_Indexes indexes, const char *url) {
+ if (indexes != NULL) {
+ intptr_t index_id;
+
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(indexes->cil, url, &index_id)) {
+ if (index_id >= 0 && index_id <= indexes->index_size) {
+ return 1;
+ } else {
+ proxytrack_print_log(CRITICAL,
+ "PT_ReadCache:Corrupted central index locator");
+ }
+ }
+ }
+ return 0;
}
time_t PT_GetTimeIndex(PT_Indexes indexes) {
- if (indexes != NULL && indexes->index_size > 0)
- {
- int i;
- time_t maxt = indexes->index[0]->slots.common.timestamp;
- for(i = 1 ; i < indexes->index_size ; i++) {
- const time_t currt = indexes->index[i]->slots.common.timestamp;
- if (currt > maxt) {
- maxt = currt;
- }
- }
- return maxt;
- }
- return (time_t) -1;
+ if (indexes != NULL && indexes->index_size > 0) {
+ int i;
+ time_t maxt = indexes->index[0]->slots.common.timestamp;
+
+ for(i = 1; i < indexes->index_size; i++) {
+ const time_t currt = indexes->index[i]->slots.common.timestamp;
+
+ if (currt > maxt) {
+ maxt = currt;
+ }
+ }
+ return maxt;
+ }
+ return (time_t) - 1;
}
PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) {
- if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size)
- {
- return indexes->index[indexId];
- }
- return NULL;
+ if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size) {
+ return indexes->index[indexId];
+ }
+ return NULL;
}
PT_Element PT_ElementNew(void) {
- PT_Element r = NULL;
- if ((r = calloc(sizeof(_PT_Element), 1)) == NULL)
- return NULL;
- r->statuscode=STATUSCODE_INVALID;
- r->indexId = -1;
- return r;
-}
+ PT_Element r = NULL;
-PT_Element PT_ReadCache(PT_Index index, const char* url, int flags) {
- if (index != NULL && SAFE_INDEX(index)) {
- return _IndexFuncts[index->type].PT_ReadCache(index, url, flags);
- }
- return NULL;
+ if ((r = calloc(sizeof(_PT_Element), 1)) == NULL)
+ return NULL;
+ r->statuscode = STATUSCODE_INVALID;
+ r->indexId = -1;
+ return r;
}
-static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags) {
- PT_Element retCode;
- MutexLock(&index->slots.formatNew.zFileLock);
- {
- retCode = PT_ReadCache__New_u(index, url, flags);
- }
- MutexUnlock(&index->slots.formatNew.zFileLock);
- return retCode;
+PT_Element PT_ReadCache(PT_Index index, const char *url, int flags) {
+ if (index != NULL && SAFE_INDEX(index)) {
+ return _IndexFuncts[index->type].PT_ReadCache(index, url, flags);
+ }
+ return NULL;
}
+static PT_Element PT_ReadCache__New(PT_Index index, const char *url, int flags) {
+ PT_Element retCode;
+
+ MutexLock(&index->slots.formatNew.zFileLock);
+ {
+ retCode = PT_ReadCache__New_u(index, url, flags);
+ }
+ MutexUnlock(&index->slots.formatNew.zFileLock);
+ return retCode;
+}
/* ------------------------------------------------------------ */
/* New HTTrack cache (new.zip) format */
@@ -825,167 +874,182 @@ static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags)
} while(0)
int PT_LoadCache__New(PT_Index index_, const char *filename) {
- if (index_ != NULL && filename != NULL) {
- PT_Index__New index = &index_->slots.formatNew;
- unzFile zFile = index->zFile = unzOpen(filename);
- index->timestamp = file_timestamp(filename);
- MutexInit(&index->zFileLock);
-
- // Opened ?
- if (zFile!=NULL) {
- const char * abpath;
- int slashes;
- inthash hashtable = index->hash;
-
- /* Compute base path for this index - the filename MUST be absolute! */
- for(slashes = 2, abpath = filename + (int)strlen(filename) - 1
- ; abpath > filename && ( ( *abpath != '/'&& *abpath != '\\' ) || --slashes > 0)
- ; abpath--);
- index->path[0] = '\0';
- if (slashes == 0 && *abpath != 0) {
- int i;
- strncat(index->path, filename, (int) ( abpath - filename ) + 1 );
- for(i = 0 ; index->path[i] != 0 ; i++) {
- if (index->path[i] == '\\') {
- index->path[i] = '/';
- }
- }
- }
-
- /* Ready directory entries */
- if (unzGoToFirstFile(zFile) == Z_OK) {
- char comment[128];
- char filename[HTS_URLMAXSIZE * 4];
- int entries = 0;
- int firstSeen = 0;
- memset(comment, 0, sizeof(comment)); // for truncated reads
- do {
- int readSizeHeader = 0;
- filename[0] = '\0';
- comment[0] = '\0';
- if (unzOpenCurrentFile(zFile) == Z_OK) {
- if (
- (readSizeHeader = unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0
- &&
- unzGetCurrentFileInfo(zFile, NULL, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK
- )
- {
- long int pos = (long int) unzGetOffset(zFile);
- assertf(readSizeHeader < sizeof(comment));
- comment[readSizeHeader] = '\0';
- entries++;
- if (pos > 0) {
- int dataincache = 0; // data in cache ?
- char* filenameIndex = filename;
- if (strncmp(filenameIndex, "http://", 7) == 0) {
- filenameIndex += 7;
- }
- if (comment[0] != '\0') {
- int maxLine = 2;
- char* a = comment;
- while(*a && maxLine-- > 0) { // parse only few first lines
- char line[1024];
- line[0] = '\0';
- a+=binput(a, line, sizeof(line) - 2);
- if (strncmp(line, "X-In-Cache:", 11) == 0) {
- if (strcmp(line, "X-In-Cache: 1") == 0) {
- dataincache = 1;
- } else {
- dataincache = 0;
- }
- break;
- }
- }
- }
- if (dataincache)
- inthash_add(hashtable, filenameIndex, pos);
- else
- inthash_add(hashtable, filenameIndex, -pos);
-
- /* First link as starting URL */
- if (!firstSeen) {
- if (strstr(filenameIndex, "/robots.txt") == NULL) {
- firstSeen = 1;
- if (!link_has_authority(filenameIndex))
- strcat(index->startUrl, "http://");
- strcat(index->startUrl, filenameIndex);
- }
- }
- } else {
- fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries);
- }
- } else {
- fprintf(stderr, "Corrupted cache entry #%d"LF, (int)entries);
- }
- unzCloseCurrentFile(zFile);
- } else {
- fprintf(stderr, "Corrupted cache entry #%d"LF, (int)entries);
- }
- } while( unzGoToNextFile(zFile) == Z_OK );
- return 1;
- } else {
- inthash_delete(&index->hash);
- index = NULL;
- }
- } else {
- index = NULL;
- }
- }
- return 0;
-}
-
-static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flags)
-{
- PT_Index__New index = (PT_Index__New) &index_->slots.formatNew;
- char location_default[HTS_URLMAXSIZE*2];
- char previous_save[HTS_URLMAXSIZE*2];
- char previous_save_[HTS_URLMAXSIZE*2];
- char catbuff[CATBUFF_SIZE];
+ if (index_ != NULL && filename != NULL) {
+ PT_Index__New index = &index_->slots.formatNew;
+ unzFile zFile = index->zFile = unzOpen(filename);
+
+ index->timestamp = file_timestamp(filename);
+ MutexInit(&index->zFileLock);
+
+ // Opened ?
+ if (zFile != NULL) {
+ const char *abpath;
+ int slashes;
+ inthash hashtable = index->hash;
+
+ /* Compute base path for this index - the filename MUST be absolute! */
+ for(slashes = 2, abpath = filename + (int) strlen(filename) - 1;
+ abpath > filename && ((*abpath != '/' && *abpath != '\\')
+ || --slashes > 0);
+ abpath--) ;
+ index->path[0] = '\0';
+ if (slashes == 0 && *abpath != 0) {
+ int i;
+
+ strncat(index->path, filename, (int) (abpath - filename) + 1);
+ for(i = 0; index->path[i] != 0; i++) {
+ if (index->path[i] == '\\') {
+ index->path[i] = '/';
+ }
+ }
+ }
+
+ /* Ready directory entries */
+ if (unzGoToFirstFile(zFile) == Z_OK) {
+ char comment[128];
+ char filename[HTS_URLMAXSIZE * 4];
+ int entries = 0;
+ int firstSeen = 0;
+
+ memset(comment, 0, sizeof(comment)); // for truncated reads
+ do {
+ int readSizeHeader = 0;
+
+ filename[0] = '\0';
+ comment[0] = '\0';
+ if (unzOpenCurrentFile(zFile) == Z_OK) {
+ if ((readSizeHeader =
+ unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0
+ && unzGetCurrentFileInfo(zFile, NULL, filename,
+ sizeof(filename) - 2, NULL, 0, NULL,
+ 0) == Z_OK) {
+ long int pos = (long int) unzGetOffset(zFile);
+
+ assertf(readSizeHeader < sizeof(comment));
+ comment[readSizeHeader] = '\0';
+ entries++;
+ if (pos > 0) {
+ int dataincache = 0; // data in cache ?
+ char *filenameIndex = filename;
+
+ if (strncmp(filenameIndex, "http://", 7) == 0) {
+ filenameIndex += 7;
+ }
+ if (comment[0] != '\0') {
+ int maxLine = 2;
+ char *a = comment;
+
+ while(*a && maxLine-- > 0) { // parse only few first lines
+ char line[1024];
+
+ line[0] = '\0';
+ a += binput(a, line, sizeof(line) - 2);
+ if (strncmp(line, "X-In-Cache:", 11) == 0) {
+ if (strcmp(line, "X-In-Cache: 1") == 0) {
+ dataincache = 1;
+ } else {
+ dataincache = 0;
+ }
+ break;
+ }
+ }
+ }
+ if (dataincache)
+ inthash_add(hashtable, filenameIndex, pos);
+ else
+ inthash_add(hashtable, filenameIndex, -pos);
+
+ /* First link as starting URL */
+ if (!firstSeen) {
+ if (strstr(filenameIndex, "/robots.txt") == NULL) {
+ firstSeen = 1;
+ if (!link_has_authority(filenameIndex))
+ strcat(index->startUrl, "http://");
+ strcat(index->startUrl, filenameIndex);
+ }
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache meta entry #%d" LF,
+ (int) entries);
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache entry #%d" LF, (int) entries);
+ }
+ unzCloseCurrentFile(zFile);
+ } else {
+ fprintf(stderr, "Corrupted cache entry #%d" LF, (int) entries);
+ }
+ } while(unzGoToNextFile(zFile) == Z_OK);
+ return 1;
+ } else {
+ inthash_delete(&index->hash);
+ index = NULL;
+ }
+ } else {
+ index = NULL;
+ }
+ }
+ return 0;
+}
+
+static PT_Element PT_ReadCache__New_u(PT_Index index_, const char *url,
+ int flags) {
+ PT_Index__New index = (PT_Index__New) & index_->slots.formatNew;
+ char location_default[HTS_URLMAXSIZE * 2];
+ char previous_save[HTS_URLMAXSIZE * 2];
+ char previous_save_[HTS_URLMAXSIZE * 2];
+ char catbuff[CATBUFF_SIZE];
intptr_t hash_pos;
int hash_pos_return;
- PT_Element r = NULL;
- if (index == NULL || index->hash == NULL || index->zFile == NULL || url == NULL || *url == 0)
- return NULL;
- if ((r = PT_ElementNew()) == NULL)
- return NULL;
- location_default[0] = '\0';
- previous_save[0] = previous_save_[0] = '\0';
+ PT_Element r = NULL;
+
+ if (index == NULL || index->hash == NULL || index->zFile == NULL
+ || url == NULL || *url == 0)
+ return NULL;
+ if ((r = PT_ElementNew()) == NULL)
+ return NULL;
+ location_default[0] = '\0';
+ previous_save[0] = previous_save_[0] = '\0';
memset(r, 0, sizeof(_PT_Element));
r->location = location_default;
- strcpy(r->location, "");
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
+ strcpy(r->location, "");
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
hash_pos_return = inthash_read(index->hash, url, &hash_pos);
if (hash_pos_return) {
uLong posInZip;
+
if (hash_pos > 0) {
posInZip = (uLong) hash_pos;
} else {
- posInZip = (uLong) -hash_pos;
+ posInZip = (uLong) - hash_pos;
}
- if (unzSetOffset(index->zFile, posInZip) == Z_OK) {
+ if (unzSetOffset(index->zFile, posInZip) == Z_OK) {
/* Read header (Max 8KiB) */
if (unzOpenCurrentFile(index->zFile) == Z_OK) {
char headerBuff[8192 + 2];
int readSizeHeader;
int totalHeader = 0;
int dataincache = 0;
-
+
/* For BIG comments */
- headerBuff[0]
- = headerBuff[sizeof(headerBuff) - 1]
- = headerBuff[sizeof(headerBuff) - 2]
+ headerBuff[0]
+ = headerBuff[sizeof(headerBuff) - 1]
+ = headerBuff[sizeof(headerBuff) - 2]
= headerBuff[sizeof(headerBuff) - 3] = '\0';
- if ( (readSizeHeader = unzGetLocalExtrafield(index->zFile, headerBuff, sizeof(headerBuff) - 2)) > 0)
- {
+ if ((readSizeHeader =
+ unzGetLocalExtrafield(index->zFile, headerBuff,
+ sizeof(headerBuff) - 2)) > 0) {
int offset = 0;
char line[HTS_URLMAXSIZE + 2];
int lineEof = 0;
+
headerBuff[readSizeHeader] = '\0';
do {
- char* value;
+ char *value;
+
line[0] = '\0';
offset += binput(headerBuff + offset, line, sizeof(line) - 2);
if (line[0] == '\0') {
@@ -994,175 +1058,187 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
value = strchr(line, ':');
if (value != NULL) {
*value++ = '\0';
- if (*value == ' ' || *value == '\t') value++;
+ if (*value == ' ' || *value == '\t')
+ value++;
ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache);
ZIP_READFIELD_INT(line, value, "X-Statuscode", r->statuscode);
- ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r->msg); // msg
- ZIP_READFIELD_INT(line, value, "X-Size", r->size); // size
- ZIP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
- ZIP_READFIELD_STRING(line, value, "X-Charset", r->charset); // contenttype
- ZIP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
- ZIP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
- ZIP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
- ZIP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
+ ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r->msg); // msg
+ ZIP_READFIELD_INT(line, value, "X-Size", r->size); // size
+ ZIP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
+ ZIP_READFIELD_STRING(line, value, "X-Charset", r->charset); // contenttype
+ ZIP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
+ ZIP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
+ ZIP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
+ ZIP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
//ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address
//ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename
- ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename
- if (line[0] != '\0') {
- int len = r->headers ? ((int) strlen(r->headers)) : 0;
- int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 );
- r->headers = realloc(r->headers, len + nlen);
- r->headers[len] = '\0';
- strcat(r->headers, line);
- strcat(r->headers, ": ");
- strcat(r->headers, value);
- strcat(r->headers, "\r\n");
- }
+ ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename
+ if (line[0] != '\0') {
+ int len = r->headers ? ((int) strlen(r->headers)) : 0;
+ int nlen =
+ (int) (strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1);
+ r->headers = realloc(r->headers, len + nlen);
+ r->headers[len] = '\0';
+ strcat(r->headers, line);
+ strcat(r->headers, ": ");
+ strcat(r->headers, value);
+ strcat(r->headers, "\r\n");
+ }
}
} while(offset < readSizeHeader && !lineEof);
totalHeader = offset;
- /* Previous entry */
- if (previous_save_[0] != '\0') {
- int pathLen = (int) strlen(index->path);
- if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
- strcpy(previous_save, previous_save_);
- }
- // relative ? (hack)
- else if (index->safeCache
- || (previous_save_[0] != '/' // /home/foo/bar.gif
- && ( !isalpha(previous_save_[0]) || previous_save_[1] != ':' ) ) // c:/home/foo/bar.gif
- )
- {
- index->safeCache = 1;
- sprintf(previous_save, "%s%s", index->path, previous_save_);
- }
- // bogus format (includes buggy absolute path)
- else {
- /* guess previous path */
- if (index->fixedPath == 0) {
- const char * start = jump_protocol_and_auth(url);
- const char * end = start ? strchr(start, '/') : NULL;
- int len = (int) (end - start);
- if (start != NULL && end != NULL && len > 0 && len < 128) {
- char piece[128 + 2];
- const char * where;
- piece[0] = '\0';
- strncat(piece, start, len);
- if ((where = strstr(previous_save_, piece)) != NULL) {
- index->fixedPath = (int) (where - previous_save_); // offset to relative path
- }
- }
- }
- if (index->fixedPath > 0) {
- int saveLen = (int) strlen(previous_save_);
- if (index->fixedPath < saveLen) {
- sprintf(previous_save, "%s%s", index->path, previous_save_ + index->fixedPath);
- } else {
- sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)", previous_save_, (int)index->fixedPath);
- r->statuscode = STATUSCODE_INVALID;
- }
- } else {
- sprintf(previous_save, "%s%s", index->path, previous_save_);
- }
- }
- }
+ /* Previous entry */
+ if (previous_save_[0] != '\0') {
+ int pathLen = (int) strlen(index->path);
+
+ if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
+ strcpy(previous_save, previous_save_);
+ }
+ // relative ? (hack)
+ else if (index->safeCache || (previous_save_[0] != '/' // /home/foo/bar.gif
+ && (!isalpha(previous_save_[0]) || previous_save_[1] != ':')) // c:/home/foo/bar.gif
+ ) {
+ index->safeCache = 1;
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ // bogus format (includes buggy absolute path)
+ else {
+ /* guess previous path */
+ if (index->fixedPath == 0) {
+ const char *start = jump_protocol_and_auth(url);
+ const char *end = start ? strchr(start, '/') : NULL;
+ int len = (int) (end - start);
+
+ if (start != NULL && end != NULL && len > 0 && len < 128) {
+ char piece[128 + 2];
+ const char *where;
+
+ piece[0] = '\0';
+ strncat(piece, start, len);
+ if ((where = strstr(previous_save_, piece)) != NULL) {
+ index->fixedPath = (int) (where - previous_save_); // offset to relative path
+ }
+ }
+ }
+ if (index->fixedPath > 0) {
+ int saveLen = (int) strlen(previous_save_);
+
+ if (index->fixedPath < saveLen) {
+ sprintf(previous_save, "%s%s", index->path,
+ previous_save_ + index->fixedPath);
+ } else {
+ sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)",
+ previous_save_, (int) index->fixedPath);
+ r->statuscode = STATUSCODE_INVALID;
+ }
+ } else {
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ }
+ }
/* Complete fields */
- r->adr=NULL;
- if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
+ r->adr = NULL;
+ if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
int ok = 0;
-
+
// Court-circuit:
// Peut-on stocker le fichier directement sur disque?
if (ok) {
if (r->msg[0] == '\0') {
- strcpy(r->msg,"Cache Read Error : Unexpected error");
+ strcpy(r->msg, "Cache Read Error : Unexpected error");
}
- } else { // lire en mémoire
-
+ } else { // lire en mémoire
+
if (!dataincache) {
- /* Read in memory from cache */
- if (flags & FETCH_BODY) {
- if (strnotempty(previous_save)) {
- FILE* fp = fopen(fconv(catbuff,previous_save), "rb");
- if (fp != NULL) {
- r->adr = (char*) malloc(r->size + 4);
- if (r->adr != NULL) {
- if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
+ /* Read in memory from cache */
+ if (flags & FETCH_BODY) {
+ if (strnotempty(previous_save)) {
+ FILE *fp = fopen(fconv(catbuff, previous_save), "rb");
+
+ if (fp != NULL) {
+ r->adr = (char *) malloc(r->size + 4);
+ if (r->adr != NULL) {
+ if (r->size > 0
+ && fread(r->adr, 1, r->size, fp) != r->size) {
int last_errno = errno;
- r->statuscode=STATUSCODE_INVALID;
- sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno));
- }
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Read error (memory exhausted) from cache");
- }
- fclose(fp);
- } else {
- r->statuscode=STATUSCODE_INVALID;
- sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(catbuff,previous_save));
- }
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cached file name is invalid");
- }
- }
+
+ r->statuscode = STATUSCODE_INVALID;
+ sprintf(r->msg, "Read error in cache disk data: %s",
+ strerror(last_errno));
+ }
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg,
+ "Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ sprintf(r->msg, "Read error (can't open '%s') from cache",
+ fconv(catbuff, previous_save));
+ }
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cached file name is invalid");
+ }
+ }
} else {
- // lire fichier (d'un coup)
- if (flags & FETCH_BODY) {
- r->adr=(char*) malloc(r->size+1);
- if (r->adr!=NULL) {
- if (unzReadCurrentFile(index->zFile, r->adr, (unsigned int) r->size) != r->size) { // erreur
- free(r->adr);
- r->adr=NULL;
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Read Data");
- } else
- *(r->adr+r->size)='\0';
- //printf(">%s status %d\n",back[p].r->contenttype,back[p].r->statuscode);
- } else { // erreur
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Memory Error");
- }
- }
- }
+ // lire fichier (d'un coup)
+ if (flags & FETCH_BODY) {
+ r->adr = (char *) malloc(r->size + 1);
+ if (r->adr != NULL) {
+ if (unzReadCurrentFile(index->zFile, r->adr, (unsigned int) r->size) != r->size) { // erreur
+ free(r->adr);
+ r->adr = NULL;
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Data");
+ } else
+ *(r->adr + r->size) = '\0';
+ //printf(">%s status %d\n",back[p].r->contenttype,back[p].r->statuscode);
+ } else { // erreur
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Memory Error");
+ }
+ }
+ }
}
- } // si save==null, ne rien charger (juste en tête)
+ } // si save==null, ne rien charger (juste en tête)
} else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Read Header Data");
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Header Data");
}
unzCloseCurrentFile(index->zFile);
} else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Open File");
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Open File");
}
} else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Bad Offset");
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Bad Offset");
}
} else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"File Cache Entry Not Found");
- }
- if (r->location[0] != '\0') {
- r->location = strdup(r->location);
- } else {
- r->location = NULL;
- }
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "File Cache Entry Not Found");
+ }
+ if (r->location[0] != '\0') {
+ r->location = strdup(r->location);
+ } else {
+ r->location = NULL;
+ }
return r;
}
static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element) {
- zipFile zFileOut = (zipFile) arg;
- char headers[8192];
+ zipFile zFileOut = (zipFile) arg;
+ char headers[8192];
int headersSize;
zip_fileinfo fi;
- int zErr;
- const char *url_adr = "";
- const char *url_fil = "";
+ int zErr;
+ const char *url_adr = "";
+ const char *url_fil = "";
headers[0] = '\0';
headersSize = 0;
@@ -1172,14 +1248,16 @@ static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element)
headersSize = 0;
/* */
{
- char* message;
+ char *message;
+
if (strlen(element->msg) < 32) {
message = element->msg;
} else {
message = "(See X-StatusMessage)";
}
/* 64 characters MAX for first line */
- sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', element->statuscode, element->msg);
+ sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1',
+ element->statuscode, element->msg);
}
headersSize += (int) strlen(headers + headersSize);
@@ -1187,22 +1265,23 @@ static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element)
ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", 1);
ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", element->statuscode);
ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", element->msg);
- ZIP_FIELD_INT(headers, headersSize, "X-Size", element->size); // size
- ZIP_FIELD_STRING(headers, headersSize, "Content-Type", element->contenttype); // contenttype
- ZIP_FIELD_STRING(headers, headersSize, "X-Charset", element->charset); // contenttype
- ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", element->lastmodified); // last-modified
- ZIP_FIELD_STRING(headers, headersSize, "Etag", element->etag); // Etag
- ZIP_FIELD_STRING(headers, headersSize, "Location", element->location); // 'location' pour moved
- ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", element->cdispo); // Content-disposition
- ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address
- ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename
- ZIP_FIELD_STRING(headers, headersSize, "X-Save", ""); // Original save filename
-
+ ZIP_FIELD_INT(headers, headersSize, "X-Size", element->size); // size
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Type", element->contenttype); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "X-Charset", element->charset); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", element->lastmodified); // last-modified
+ ZIP_FIELD_STRING(headers, headersSize, "Etag", element->etag); // Etag
+ ZIP_FIELD_STRING(headers, headersSize, "Location", element->location); // 'location' pour moved
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", element->cdispo); // Content-disposition
+ ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address
+ ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename
+ ZIP_FIELD_STRING(headers, headersSize, "X-Save", ""); // Original save filename
+
/* Time */
memset(&fi, 0, sizeof(fi));
if (element->lastmodified[0] != '\0') {
- struct tm buffer;
- struct tm* tm_s = convert_time_rfc822(&buffer, element->lastmodified);
+ struct tm buffer;
+ struct tm *tm_s = convert_time_rfc822(&buffer, element->lastmodified);
+
if (tm_s) {
fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec;
fi.tmz_date.tm_min = (uInt) tm_s->tm_min;
@@ -1214,333 +1293,355 @@ static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element)
}
/* Open file - NOTE: headers in "comment" */
- if ((zErr = zipOpenNewFileInZip(zFileOut,
- url,
- &fi,
- /*
- Store headers in realtime in the local file directory as extra field
- In case of crash, we'll be able to recover the whole ZIP file by rescanning it
- */
- headers,
- (uInt) strlen(headers),
- NULL,
- 0,
- NULL, /* comment */
- Z_DEFLATED,
- Z_DEFAULT_COMPRESSION)) != Z_OK)
- {
+ if ((zErr = zipOpenNewFileInZip(zFileOut, url, &fi,
+ /*
+ Store headers in realtime in the local file directory as extra field
+ In case of crash, we'll be able to recover the whole ZIP file by rescanning it
+ */
+ headers, (uInt) strlen(headers), NULL, 0, NULL, /* comment */
+ Z_DEFLATED, Z_DEFAULT_COMPRESSION)) != Z_OK) {
int zip_zipOpenNewFileInZip_failed = 0;
+
assertf(zip_zipOpenNewFileInZip_failed);
}
-
- /* Write data in cache */
- if (element->size > 0 && element->adr != NULL) {
- if ((zErr = zipWriteInFileInZip(zFileOut, element->adr, (int) element->size)) != Z_OK) {
- int zip_zipWriteInFileInZip_failed = 0;
- assertf(zip_zipWriteInFileInZip_failed);
- }
- }
-
- /* Close */
- if ((zErr = zipCloseFileInZip(zFileOut)) != Z_OK) {
- int zip_zipCloseFileInZip_failed = 0;
+
+ /* Write data in cache */
+ if (element->size > 0 && element->adr != NULL) {
+ if ((zErr =
+ zipWriteInFileInZip(zFileOut, element->adr,
+ (int) element->size)) != Z_OK) {
+ int zip_zipWriteInFileInZip_failed = 0;
+
+ assertf(zip_zipWriteInFileInZip_failed);
+ }
+ }
+
+ /* Close */
+ if ((zErr = zipCloseFileInZip(zFileOut)) != Z_OK) {
+ int zip_zipCloseFileInZip_failed = 0;
+
assertf(zip_zipCloseFileInZip_failed);
}
/* Flush */
if ((zErr = zipFlush(zFileOut)) != 0) {
int zip_zipFlush_failed = 0;
+
assertf(zip_zipFlush_failed);
}
- return 0;
+ return 0;
}
static int PT_SaveCache__New(PT_Indexes indexes, const char *filename) {
- zipFile zFileOut = zipOpen(filename, 0);
- if (zFileOut != NULL) {
- int ret = PT_EnumCache(indexes, PT_SaveCache__New_Fun, (void *) zFileOut);
- zipClose(zFileOut, "Created by HTTrack Website Copier/ProxyTrack "PROXYTRACK_VERSION);
- zFileOut = NULL;
- if (ret != 0)
- (void) unlink(filename);
- return ret;
- }
- return -1;
+ zipFile zFileOut = zipOpen(filename, 0);
+
+ if (zFileOut != NULL) {
+ int ret = PT_EnumCache(indexes, PT_SaveCache__New_Fun, (void *) zFileOut);
+
+ zipClose(zFileOut,
+ "Created by HTTrack Website Copier/ProxyTrack "
+ PROXYTRACK_VERSION);
+ zFileOut = NULL;
+ if (ret != 0)
+ (void) unlink(filename);
+ return ret;
+ }
+ return -1;
}
-
-
/* ------------------------------------------------------------ */
/* Old HTTrack cache (dat/ndx) format */
/* ------------------------------------------------------------ */
-static int cache_brstr(char* adr,char* s) {
+static int cache_brstr(char *adr, char *s) {
int i;
int off;
char buff[256 + 1];
- off=binput(adr,buff,256);
- adr+=off;
- sscanf(buff,"%d",&i);
- if (i>0)
- strncpy(s,adr,i);
- *(s+i)='\0';
- off+=i;
+
+ off = binput(adr, buff, 256);
+ adr += off;
+ sscanf(buff, "%d", &i);
+ if (i > 0)
+ strncpy(s, adr, i);
+ *(s + i) = '\0';
+ off += i;
return off;
}
-static void cache_rstr(FILE* fp,char* s) {
+static void cache_rstr(FILE * fp, char *s) {
INTsys i;
- char buff[256+4];
- linput(fp,buff,256);
- sscanf(buff,INTsysP,&i);
- if (i < 0 || i > 32768) /* error, something nasty happened */
- i=0;
- if (i>0) {
- if ((int) fread(s,1,i,fp) != i) {
+ char buff[256 + 4];
+
+ linput(fp, buff, 256);
+ sscanf(buff, INTsysP, &i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i = 0;
+ if (i > 0) {
+ if ((int) fread(s, 1, i, fp) != i) {
int fread_cache_failed = 0;
+
assertf(fread_cache_failed);
}
}
- *(s+i)='\0';
+ *(s + i) = '\0';
}
-static char* cache_rstr_addr(FILE* fp) {
+static char *cache_rstr_addr(FILE * fp) {
INTsys i;
- char* addr = NULL;
- char buff[256+4];
- linput(fp,buff,256);
- sscanf(buff,"%d",&i);
- if (i < 0 || i > 32768) /* error, something nasty happened */
- i=0;
+ char *addr = NULL;
+ char buff[256 + 4];
+
+ linput(fp, buff, 256);
+ sscanf(buff, "%d", &i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i = 0;
if (i > 0) {
addr = malloc(i + 1);
if (addr != NULL) {
- if ((int) fread(addr,1,i,fp) != i) {
+ if ((int) fread(addr, 1, i, fp) != i) {
int fread_cache_failed = 0;
+
assertf(fread_cache_failed);
}
- *(addr+i)='\0';
+ *(addr + i) = '\0';
}
}
return addr;
}
-static void cache_rint(FILE* fp,int* i) {
+static void cache_rint(FILE * fp, int *i) {
char s[256];
- cache_rstr(fp,s);
- sscanf(s,"%d",i);
+
+ cache_rstr(fp, s);
+ sscanf(s, "%d", i);
}
-static void cache_rLLint(FILE* fp,unsigned long* i) {
- int l;
+static void cache_rLLint(FILE * fp, unsigned long *i) {
+ int l;
char s[256];
- cache_rstr(fp,s);
- sscanf(s,"%d",&l);
- *i = (unsigned long)l;
+
+ cache_rstr(fp, s);
+ sscanf(s, "%d", &l);
+ *i = (unsigned long) l;
}
static int PT_LoadCache__Old(PT_Index index_, const char *filename) {
- if (index_ != NULL && filename != NULL) {
- char * pos = strrchr(filename, '.');
- PT_Index__Old cache = &index_->slots.formatOld;
- long int ndxSize;
- cache->filenameDat[0] = '\0';
- cache->filenameNdx[0] = '\0';
- cache->path[0] = '\0';
-
- {
- PT_Index__Old index = cache;
- const char * abpath;
- int slashes;
- /* -------------------- COPY OF THE __New() CODE -------------------- */
- /* Compute base path for this index - the filename MUST be absolute! */
- for(slashes = 2, abpath = filename + (int)strlen(filename) - 1
- ; abpath > filename && ( ( *abpath != '/'&& *abpath != '\\' ) || --slashes > 0)
- ; abpath--);
- index->path[0] = '\0';
- if (slashes == 0 && *abpath != 0) {
- int i;
- strncat(index->path, filename, (int) ( abpath - filename ) + 1 );
- for(i = 0 ; index->path[i] != 0 ; i++) {
- if (index->path[i] == '\\') {
- index->path[i] = '/';
- }
- }
- }
- /* -------------------- END OF COPY OF THE __New() CODE -------------------- */
- }
-
- /* Index/data filenames */
- if (pos != NULL) {
- int nLen = (int) (pos - filename);
- strncat(cache->filenameDat, filename, nLen);
- strncat(cache->filenameNdx, filename, nLen);
- strcat(cache->filenameDat, ".dat");
- strcat(cache->filenameNdx, ".ndx");
- }
- ndxSize = filesize(cache->filenameNdx);
- cache->timestamp = file_timestamp(cache->filenameDat);
- cache->dat = fopen(cache->filenameDat, "rb");
- cache->ndx = fopen(cache->filenameNdx, "rb");
- if (cache->dat != NULL && cache->ndx != NULL && ndxSize > 0) {
- char * use = malloc(ndxSize + 1);
- if (fread(use, 1, ndxSize, cache->ndx) == ndxSize) {
- char firstline[256];
- char* a=use;
- use[ndxSize] = '\0';
- a += cache_brstr(a, firstline);
- if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
- if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
- cache->version=(int)(firstline[8]-'0'); // cache 1.x
- if (cache->version <= 5) {
- a+=cache_brstr(a,firstline);
- strcpy(cache->lastmodified,firstline);
- } else {
- // fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
- fclose(cache->dat);
- cache->dat=NULL;
- free(use);
- use=NULL;
- }
- } else { // non supporté
- // fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
- fclose(cache->dat);
- cache->dat=NULL;
- free(use);
- use=NULL;
- }
- /* */
- } else { // Vieille version du cache
- /* */
- // hts_log_print(opt, LOG_WARNING, "Cache: importing old cache format");
- cache->version=0; // cache 1.0
- strcpy(cache->lastmodified,firstline);
- }
-
- /* Create hash table for the cache (MUCH FASTER!) */
- if (use) {
- char line[HTS_URLMAXSIZE*2];
- char linepos[256];
- int pos;
- int firstSeen = 0;
- while ( (a!=NULL) && (a < (use + ndxSize) ) ) {
- a=strchr(a+1,'\n'); /* start of line */
- if (a) {
- a++;
- /* read "host/file" */
- a+=binput(a,line,HTS_URLMAXSIZE);
- a+=binput(a,line+strlen(line),HTS_URLMAXSIZE);
- /* read position */
- a+=binput(a,linepos,200);
- sscanf(linepos,"%d",&pos);
-
- /* Add entry */
- inthash_add(cache->hash,line,pos);
-
- /* First link as starting URL */
- if (!firstSeen) {
- if (strstr(line, "/robots.txt") == NULL) {
- PT_Index__Old index = cache;
- firstSeen = 1;
- if (!link_has_authority(line))
- strcat(index->startUrl, "http://");
- strcat(index->startUrl, line);
- }
- }
-
- }
- }
- /* Not needed anymore! */
- free(use);
- use=NULL;
- return 1;
- }
- }
- }
- }
- return 0;
-}
-
-static String DecodeUrl(const char * url) {
- int i;
- String s = STRING_EMPTY;
- StringClear(s);
- for(i = 0 ; url[i] != '\0' ; i++) {
- if (url[i] == '+') {
- StringAddchar(s, ' ');
- } else if (url[i] == '%') {
- if (url[i + 1] == '%') {
- StringAddchar(s, '%');
- i++;
- } else if (url[i + 1] != 0 && url[i + 2] != 0) {
- char tmp[3];
- int codepoint = 0;
- tmp[0] = url[i + 1];
- tmp[1] = url[i + 2];
- tmp[2] = 0;
- if (sscanf(tmp, "%x", &codepoint) == 1) {
- StringAddchar(s, (char)codepoint);
- }
- i += 2;
- }
- } else {
- StringAddchar(s, url[i]);
- }
- }
- return s;
-}
-
-static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags) {
- PT_Element retCode;
- MutexLock(&index->slots.formatOld.fileLock);
- {
- retCode = PT_ReadCache__Old_u(index, url, flags);
- }
- MutexUnlock(&index->slots.formatOld.fileLock);
- return retCode;
-}
-
-static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flags) {
- PT_Index__Old cache = (PT_Index__Old) &index_->slots.formatOld;
+ if (index_ != NULL && filename != NULL) {
+ char *pos = strrchr(filename, '.');
+ PT_Index__Old cache = &index_->slots.formatOld;
+ long int ndxSize;
+
+ cache->filenameDat[0] = '\0';
+ cache->filenameNdx[0] = '\0';
+ cache->path[0] = '\0';
+
+ {
+ PT_Index__Old index = cache;
+ const char *abpath;
+ int slashes;
+
+ /* -------------------- COPY OF THE __New() CODE -------------------- */
+ /* Compute base path for this index - the filename MUST be absolute! */
+ for(slashes = 2, abpath = filename + (int) strlen(filename) - 1;
+ abpath > filename && ((*abpath != '/' && *abpath != '\\')
+ || --slashes > 0);
+ abpath--) ;
+ index->path[0] = '\0';
+ if (slashes == 0 && *abpath != 0) {
+ int i;
+
+ strncat(index->path, filename, (int) (abpath - filename) + 1);
+ for(i = 0; index->path[i] != 0; i++) {
+ if (index->path[i] == '\\') {
+ index->path[i] = '/';
+ }
+ }
+ }
+ /* -------------------- END OF COPY OF THE __New() CODE -------------------- */
+ }
+
+ /* Index/data filenames */
+ if (pos != NULL) {
+ int nLen = (int) (pos - filename);
+
+ strncat(cache->filenameDat, filename, nLen);
+ strncat(cache->filenameNdx, filename, nLen);
+ strcat(cache->filenameDat, ".dat");
+ strcat(cache->filenameNdx, ".ndx");
+ }
+ ndxSize = filesize(cache->filenameNdx);
+ cache->timestamp = file_timestamp(cache->filenameDat);
+ cache->dat = fopen(cache->filenameDat, "rb");
+ cache->ndx = fopen(cache->filenameNdx, "rb");
+ if (cache->dat != NULL && cache->ndx != NULL && ndxSize > 0) {
+ char *use = malloc(ndxSize + 1);
+
+ if (fread(use, 1, ndxSize, cache->ndx) == ndxSize) {
+ char firstline[256];
+ char *a = use;
+
+ use[ndxSize] = '\0';
+ a += cache_brstr(a, firstline);
+ if (strncmp(firstline, "CACHE-", 6) == 0) { // Nouvelle version du cache
+ if (strncmp(firstline, "CACHE-1.", 8) == 0) { // Version 1.1x
+ cache->version = (int) (firstline[8] - '0'); // cache 1.x
+ if (cache->version <= 5) {
+ a += cache_brstr(a, firstline);
+ strcpy(cache->lastmodified, firstline);
+ } else {
+ // fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
+ fclose(cache->dat);
+ cache->dat = NULL;
+ free(use);
+ use = NULL;
+ }
+ } else { // non supporté
+ // fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
+ fclose(cache->dat);
+ cache->dat = NULL;
+ free(use);
+ use = NULL;
+ }
+ /* */
+ } else { // Vieille version du cache
+ /* */
+ // hts_log_print(opt, LOG_WARNING, "Cache: importing old cache format");
+ cache->version = 0; // cache 1.0
+ strcpy(cache->lastmodified, firstline);
+ }
+
+ /* Create hash table for the cache (MUCH FASTER!) */
+ if (use) {
+ char line[HTS_URLMAXSIZE * 2];
+ char linepos[256];
+ int pos;
+ int firstSeen = 0;
+
+ while((a != NULL) && (a < (use + ndxSize))) {
+ a = strchr(a + 1, '\n'); /* start of line */
+ if (a) {
+ a++;
+ /* read "host/file" */
+ a += binput(a, line, HTS_URLMAXSIZE);
+ a += binput(a, line + strlen(line), HTS_URLMAXSIZE);
+ /* read position */
+ a += binput(a, linepos, 200);
+ sscanf(linepos, "%d", &pos);
+
+ /* Add entry */
+ inthash_add(cache->hash, line, pos);
+
+ /* First link as starting URL */
+ if (!firstSeen) {
+ if (strstr(line, "/robots.txt") == NULL) {
+ PT_Index__Old index = cache;
+
+ firstSeen = 1;
+ if (!link_has_authority(line))
+ strcat(index->startUrl, "http://");
+ strcat(index->startUrl, line);
+ }
+ }
+
+ }
+ }
+ /* Not needed anymore! */
+ free(use);
+ use = NULL;
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+static String DecodeUrl(const char *url) {
+ int i;
+ String s = STRING_EMPTY;
+
+ StringClear(s);
+ for(i = 0; url[i] != '\0'; i++) {
+ if (url[i] == '+') {
+ StringAddchar(s, ' ');
+ } else if (url[i] == '%') {
+ if (url[i + 1] == '%') {
+ StringAddchar(s, '%');
+ i++;
+ } else if (url[i + 1] != 0 && url[i + 2] != 0) {
+ char tmp[3];
+ int codepoint = 0;
+
+ tmp[0] = url[i + 1];
+ tmp[1] = url[i + 2];
+ tmp[2] = 0;
+ if (sscanf(tmp, "%x", &codepoint) == 1) {
+ StringAddchar(s, (char) codepoint);
+ }
+ i += 2;
+ }
+ } else {
+ StringAddchar(s, url[i]);
+ }
+ }
+ return s;
+}
+
+static PT_Element PT_ReadCache__Old(PT_Index index, const char *url, int flags) {
+ PT_Element retCode;
+
+ MutexLock(&index->slots.formatOld.fileLock);
+ {
+ retCode = PT_ReadCache__Old_u(index, url, flags);
+ }
+ MutexUnlock(&index->slots.formatOld.fileLock);
+ return retCode;
+}
+
+static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char *url,
+ int flags) {
+ PT_Index__Old cache = (PT_Index__Old) & index_->slots.formatOld;
intptr_t hash_pos;
int hash_pos_return;
- char location_default[HTS_URLMAXSIZE*2];
- char previous_save[HTS_URLMAXSIZE*2];
- char previous_save_[HTS_URLMAXSIZE*2];
+ char location_default[HTS_URLMAXSIZE * 2];
+ char previous_save[HTS_URLMAXSIZE * 2];
+ char previous_save_[HTS_URLMAXSIZE * 2];
PT_Element r;
- int ok=0;
-
- if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
- return NULL;
- if ((r = PT_ElementNew()) == NULL)
- return NULL;
- location_default[0] = '\0';
- previous_save[0] = previous_save_[0] = '\0';
+ int ok = 0;
+
+ if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
+ return NULL;
+ if ((r = PT_ElementNew()) == NULL)
+ return NULL;
+ location_default[0] = '\0';
+ previous_save[0] = previous_save_[0] = '\0';
memset(r, 0, sizeof(_PT_Element));
r->location = location_default;
- strcpy(r->location, "");
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- hash_pos_return=inthash_read(cache->hash, url, &hash_pos);
+ strcpy(r->location, "");
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ hash_pos_return = inthash_read(cache->hash, url, &hash_pos);
if (hash_pos_return) {
- int pos = (int) hash_pos; /* simply */
-
- if (fseek(cache->dat, (pos>0) ? pos : (-pos), SEEK_SET) == 0) {
- /* Importer cache1.0 */
- if (cache->version==0) {
- OLD_htsblk old_r;
- if (fread((char*) &old_r,1,sizeof(old_r),cache->dat) == sizeof(old_r)) { // lire tout (y compris statuscode etc)
- int i;
- String urlDecoded;
- r->statuscode = old_r.statuscode;
- r->size = old_r.size; // taille fichier
- strcpy(r->msg, old_r.msg);
- strcpy(r->contenttype, old_r.contenttype);
-
- /* Guess the destination filename.. this sucks, because this method is not reliable.
- Yes, the old 1.0 cache format was *that* bogus. /rx */
+ int pos = (int) hash_pos; /* simply */
+
+ if (fseek(cache->dat, (pos > 0) ? pos : (-pos), SEEK_SET) == 0) {
+ /* Importer cache1.0 */
+ if (cache->version == 0) {
+ OLD_htsblk old_r;
+
+ if (fread((char *) &old_r, 1, sizeof(old_r), cache->dat) == sizeof(old_r)) { // lire tout (y compris statuscode etc)
+ int i;
+ String urlDecoded;
+
+ r->statuscode = old_r.statuscode;
+ r->size = old_r.size; // taille fichier
+ strcpy(r->msg, old_r.msg);
+ strcpy(r->contenttype, old_r.contenttype);
+
+ /* Guess the destination filename.. this sucks, because this method is not reliable.
+ Yes, the old 1.0 cache format was *that* bogus. /rx */
#define FORBIDDEN_CHAR(c) (c == '~' \
|| c == '\\' \
|| c == ':' \
@@ -1554,203 +1655,211 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag
|| ((unsigned char) c ) <= 31 \
|| ((unsigned char) c ) == 127 \
)
- urlDecoded = DecodeUrl(jump_protocol_and_auth(url));
- strcpy(previous_save_, StringBuff(urlDecoded));
- StringFree(urlDecoded);
- for(i = 0 ; previous_save_[i] != '\0' && previous_save_[i] != '?' ; i++) {
- if (FORBIDDEN_CHAR(previous_save_[i])) {
- previous_save_[i] = '_';
- }
- }
- previous_save_[i] = '\0';
+ urlDecoded = DecodeUrl(jump_protocol_and_auth(url));
+ strcpy(previous_save_, StringBuff(urlDecoded));
+ StringFree(urlDecoded);
+ for(i = 0; previous_save_[i] != '\0' && previous_save_[i] != '?'; i++) {
+ if (FORBIDDEN_CHAR(previous_save_[i])) {
+ previous_save_[i] = '_';
+ }
+ }
+ previous_save_[i] = '\0';
#undef FORBIDDEN_CHAR
- ok = 1; /* import ok */
- }
- /* */
- /* Cache 1.1 */
+ ok = 1; /* import ok */
+ }
+ /* */
+ /* Cache 1.1 */
} else {
char check[256];
unsigned long size_read;
unsigned long int size_;
- check[0]='\0';
+
+ check[0] = '\0';
//
- cache_rint(cache->dat,&r->statuscode);
- cache_rLLint(cache->dat,&size_);
+ cache_rint(cache->dat, &r->statuscode);
+ cache_rLLint(cache->dat, &size_);
r->size = (size_t) size_;
- cache_rstr(cache->dat,r->msg);
- cache_rstr(cache->dat,r->contenttype);
+ cache_rstr(cache->dat, r->msg);
+ cache_rstr(cache->dat, r->contenttype);
if (cache->version >= 3)
- cache_rstr(cache->dat,r->charset);
- cache_rstr(cache->dat,r->lastmodified);
- cache_rstr(cache->dat,r->etag);
- cache_rstr(cache->dat,r->location);
+ cache_rstr(cache->dat, r->charset);
+ cache_rstr(cache->dat, r->lastmodified);
+ cache_rstr(cache->dat, r->etag);
+ cache_rstr(cache->dat, r->location);
if (cache->version >= 2)
- cache_rstr(cache->dat,r->cdispo);
+ cache_rstr(cache->dat, r->cdispo);
if (cache->version >= 4) {
- cache_rstr(cache->dat, previous_save_); // adr
- cache_rstr(cache->dat, previous_save_); // fil
+ cache_rstr(cache->dat, previous_save_); // adr
+ cache_rstr(cache->dat, previous_save_); // fil
previous_save[0] = '\0';
- cache_rstr(cache->dat, previous_save_); // save
+ cache_rstr(cache->dat, previous_save_); // save
}
if (cache->version >= 5) {
r->headers = cache_rstr_addr(cache->dat);
}
//
- cache_rstr(cache->dat,check);
- if (strcmp(check,"HTS")==0) { /* intégrité OK */
- ok=1;
+ cache_rstr(cache->dat, check);
+ if (strcmp(check, "HTS") == 0) { /* intégrité OK */
+ ok = 1;
}
- cache_rLLint(cache->dat, &size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */
- if (size_read > 0) { /* si inscrite ici */
+ cache_rLLint(cache->dat, &size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */
+ if (size_read > 0) { /* si inscrite ici */
r->size = size_read;
- } else { /* pas de données directement dans le cache, fichier présent? */
- r->size = 0;
+ } else { /* pas de données directement dans le cache, fichier présent? */
+ r->size = 0;
}
}
- /* Check destination filename */
-
- {
- PT_Index__Old index = cache;
- /* -------------------- COPY OF THE __New() CODE -------------------- */
- if (previous_save_[0] != '\0') {
- int pathLen = (int) strlen(index->path);
- if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
- strcpy(previous_save, previous_save_);
- }
- // relative ? (hack)
- else if (index->safeCache
- || (previous_save_[0] != '/' // /home/foo/bar.gif
- && ( !isalpha(previous_save_[0]) || previous_save_[1] != ':' ) ) // c:/home/foo/bar.gif
- )
- {
- index->safeCache = 1;
- sprintf(previous_save, "%s%s", index->path, previous_save_);
- }
- // bogus format (includes buggy absolute path)
- else {
- /* guess previous path */
- if (index->fixedPath == 0) {
- const char * start = jump_protocol_and_auth(url);
- const char * end = start ? strchr(start, '/') : NULL;
- int len = (int) (end - start);
- if (start != NULL && end != NULL && len > 0 && len < 128) {
- char piece[128 + 2];
- const char * where;
- piece[0] = '\0';
- strncat(piece, start, len);
- if ((where = strstr(previous_save_, piece)) != NULL) {
- index->fixedPath = (int) (where - previous_save_); // offset to relative path
- }
- }
- }
- if (index->fixedPath > 0) {
- int saveLen = (int) strlen(previous_save_);
- if (index->fixedPath < saveLen) {
- sprintf(previous_save, "%s%s", index->path, previous_save_ + index->fixedPath);
- } else {
- sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)", previous_save_, (int)index->fixedPath);
- r->statuscode = STATUSCODE_INVALID;
- }
- } else {
- sprintf(previous_save, "%s%s", index->path, previous_save_);
- }
- }
- }
- /* -------------------- END OF COPY OF THE __New() CODE -------------------- */
- }
+ /* Check destination filename */
+
+ {
+ PT_Index__Old index = cache;
+
+ /* -------------------- COPY OF THE __New() CODE -------------------- */
+ if (previous_save_[0] != '\0') {
+ int pathLen = (int) strlen(index->path);
+
+ if (pathLen > 0 && strncmp(previous_save_, index->path, pathLen) == 0) { // old (<3.40) buggy format
+ strcpy(previous_save, previous_save_);
+ }
+ // relative ? (hack)
+ else if (index->safeCache || (previous_save_[0] != '/' // /home/foo/bar.gif
+ && (!isalpha(previous_save_[0]) || previous_save_[1] != ':')) // c:/home/foo/bar.gif
+ ) {
+ index->safeCache = 1;
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ // bogus format (includes buggy absolute path)
+ else {
+ /* guess previous path */
+ if (index->fixedPath == 0) {
+ const char *start = jump_protocol_and_auth(url);
+ const char *end = start ? strchr(start, '/') : NULL;
+ int len = (int) (end - start);
+
+ if (start != NULL && end != NULL && len > 0 && len < 128) {
+ char piece[128 + 2];
+ const char *where;
+
+ piece[0] = '\0';
+ strncat(piece, start, len);
+ if ((where = strstr(previous_save_, piece)) != NULL) {
+ index->fixedPath = (int) (where - previous_save_); // offset to relative path
+ }
+ }
+ }
+ if (index->fixedPath > 0) {
+ int saveLen = (int) strlen(previous_save_);
+
+ if (index->fixedPath < saveLen) {
+ sprintf(previous_save, "%s%s", index->path,
+ previous_save_ + index->fixedPath);
+ } else {
+ sprintf(r->msg, "Bogus fixePath prefix for %s (prefixLen=%d)",
+ previous_save_, (int) index->fixedPath);
+ r->statuscode = STATUSCODE_INVALID;
+ }
+ } else {
+ sprintf(previous_save, "%s%s", index->path, previous_save_);
+ }
+ }
+ }
+ /* -------------------- END OF COPY OF THE __New() CODE -------------------- */
+ }
/* Read data */
- if (ok) {
- r->adr = NULL;
- if ( (r->statuscode>=0) && (r->statuscode<=999)) {
- r->adr = NULL;
- if (pos<0) {
- if (flags & FETCH_BODY) {
- FILE* fp = fopen(previous_save, "rb");
- if (fp != NULL) {
- r->adr = (char*) malloc(r->size + 1);
- if (r->adr != NULL) {
- if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Read error in cache disk data");
- }
- r->adr[r->size] = '\0';
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Read error (memory exhausted) from cache");
- }
- fclose(fp);
- } else {
- r->statuscode = STATUSCODE_INVALID;
- strcpy(r->msg, "Previous cache file not found (2)");
- }
- }
- } else {
- // lire fichier (d'un coup)
- if (flags & FETCH_BODY) {
- r->adr=(char*) malloc(r->size + 1);
- if (r->adr!=NULL) {
- if (fread(r->adr, 1, r->size,cache->dat) != r->size) { // erreur
- free(r->adr);
- r->adr=NULL;
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Read Data");
- } else
- r->adr[r->size] = '\0';
- } else { // erreur
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Memory Error");
- }
- }
- }
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Bad Data");
+ if (ok) {
+ r->adr = NULL;
+ if ((r->statuscode >= 0) && (r->statuscode <= 999)) {
+ r->adr = NULL;
+ if (pos < 0) {
+ if (flags & FETCH_BODY) {
+ FILE *fp = fopen(previous_save, "rb");
+
+ if (fp != NULL) {
+ r->adr = (char *) malloc(r->size + 1);
+ if (r->adr != NULL) {
+ if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Read error in cache disk data");
+ }
+ r->adr[r->size] = '\0';
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Previous cache file not found (2)");
+ }
+ }
+ } else {
+ // lire fichier (d'un coup)
+ if (flags & FETCH_BODY) {
+ r->adr = (char *) malloc(r->size + 1);
+ if (r->adr != NULL) {
+ if (fread(r->adr, 1, r->size, cache->dat) != r->size) { // erreur
+ free(r->adr);
+ r->adr = NULL;
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Data");
+ } else
+ r->adr[r->size] = '\0';
+ } else { // erreur
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Memory Error");
+ }
+ }
+ }
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Bad Data");
}
- } else { // erreur
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Read Header");
+ } else { // erreur
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Header");
}
} else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Cache Read Error : Seek Failed");
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Seek Failed");
}
} else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"File Cache Entry Not Found");
- }
- if (r->location[0] != '\0') {
- r->location = strdup(r->location);
- } else {
- r->location = NULL;
- }
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "File Cache Entry Not Found");
+ }
+ if (r->location[0] != '\0') {
+ r->location = strdup(r->location);
+ } else {
+ r->location = NULL;
+ }
return r;
}
-static int PT_LookupCache__Old(PT_Index index, const char* url) {
- int retCode;
- MutexLock(&index->slots.formatOld.fileLock);
- {
- retCode = PT_LookupCache__Old_u(index, url);
- }
- MutexUnlock(&index->slots.formatOld.fileLock);
- return retCode;
-}
+static int PT_LookupCache__Old(PT_Index index, const char *url) {
+ int retCode;
-static int PT_LookupCache__Old_u(PT_Index index_, const char* url) {
- if (index_ != NULL) {
- PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew;
- if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
- return 0;
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- if (inthash_read(cache->hash, url, NULL))
- return 1;
- }
- return 0;
+ MutexLock(&index->slots.formatOld.fileLock);
+ {
+ retCode = PT_LookupCache__Old_u(index, url);
+ }
+ MutexUnlock(&index->slots.formatOld.fileLock);
+ return retCode;
}
+static int PT_LookupCache__Old_u(PT_Index index_, const char *url) {
+ if (index_ != NULL) {
+ PT_Index__New cache = (PT_Index__New) & index_->slots.formatNew;
+
+ if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
+ return 0;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(cache->hash, url, NULL))
+ return 1;
+ }
+ return 0;
+}
/* ------------------------------------------------------------ */
/* Internet Archive Arc 1.0 (arc) format */
@@ -1760,199 +1869,221 @@ static int PT_LookupCache__Old_u(PT_Index index_, const char* url) {
#define ARC_SP ' '
-static const char* getArcField(const char *line, int pos) {
- int i;
- for(i = 0 ; line[i] != '\0' && pos > 0 ; i++) {
- if (line[i] == ARC_SP)
- pos--;
- }
- if (pos == 0)
- return &line[i];
- return NULL;
-}
-
-static char* copyArcField(const char *line, int npos, char *dest, int destMax) {
- const char *pos;
- if ((pos = getArcField(line, npos)) != NULL) {
- int i;
- for(i = 0 ; pos[i] != '\0' && pos[i] != ARC_SP && ( --destMax ) > 0; i++) {
- dest[i] = pos[i];
- }
- dest[i] = 0;
- return dest;
- }
- dest[0] = 0;
- return NULL;
+static const char *getArcField(const char *line, int pos) {
+ int i;
+
+ for(i = 0; line[i] != '\0' && pos > 0; i++) {
+ if (line[i] == ARC_SP)
+ pos--;
+ }
+ if (pos == 0)
+ return &line[i];
+ return NULL;
+}
+
+static char *copyArcField(const char *line, int npos, char *dest, int destMax) {
+ const char *pos;
+
+ if ((pos = getArcField(line, npos)) != NULL) {
+ int i;
+
+ for(i = 0; pos[i] != '\0' && pos[i] != ARC_SP && (--destMax) > 0; i++) {
+ dest[i] = pos[i];
+ }
+ dest[i] = 0;
+ return dest;
+ }
+ dest[0] = 0;
+ return NULL;
}
static int getArcLength(const char *line) {
- const char *pos;
- if ((pos = getArcField(line, 9)) != NULL
- || (pos = getArcField(line, 4)) != NULL
- || (pos = getArcField(line, 2)) != NULL
- ) {
- int length;
- if (sscanf(pos, "%d", &length) == 1) {
- return length;
- }
- }
- return -1;
-}
-
-static int skipArcNl(FILE* file) {
- if (fgetc(file) == 0x0a) {
- return 0;
- }
- return -1;
-}
-
-static int skipArcData(FILE* file, const char *line) {
- int jump = getArcLength(line);
- if (jump != -1) {
- if (fseek(file, jump, SEEK_CUR) == 0 /* && skipArcNl(file) == 0 */) {
- return 0;
- }
- }
- return -1;
+ const char *pos;
+
+ if ((pos = getArcField(line, 9)) != NULL
+ || (pos = getArcField(line, 4)) != NULL
+ || (pos = getArcField(line, 2)) != NULL) {
+ int length;
+
+ if (sscanf(pos, "%d", &length) == 1) {
+ return length;
+ }
+ }
+ return -1;
+}
+
+static int skipArcNl(FILE * file) {
+ if (fgetc(file) == 0x0a) {
+ return 0;
+ }
+ return -1;
+}
+
+static int skipArcData(FILE * file, const char *line) {
+ int jump = getArcLength(line);
+
+ if (jump != -1) {
+ if (fseek(file, jump, SEEK_CUR) == 0 /* && skipArcNl(file) == 0 */ ) {
+ return 0;
+ }
+ }
+ return -1;
}
static int getDigit(const char digit) {
- return (int) ( digit - '0' );
+ return (int) (digit - '0');
}
-static int getDigit2(const char * const pos) {
- return getDigit(pos[0])*10 + getDigit(pos[1]);
+static int getDigit2(const char *const pos) {
+ return getDigit(pos[0]) * 10 + getDigit(pos[1]);
}
-static int getDigit4(const char * const pos) {
- return getDigit(pos[0])*1000 + getDigit(pos[1])*100 + getDigit(pos[2])*10 + getDigit(pos[3]);
+static int getDigit4(const char *const pos) {
+ return getDigit(pos[0]) * 1000 + getDigit(pos[1]) * 100 +
+ getDigit(pos[2]) * 10 + getDigit(pos[3]);
}
-static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */
- time_t t = mktime(tm);
- if (t != (time_t) -1 && t != (time_t) 0) {
+static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */
+ time_t t = mktime(tm);
+
+ if (t != (time_t) - 1 && t != (time_t) 0) {
/* BSD does not have static "timezone" declared */
#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__))
- time_t now = time(NULL);
- time_t timezone = - localtime(&now)->tm_gmtoff;
+ time_t now = time(NULL);
+ time_t timezone = -localtime(&now)->tm_gmtoff;
#endif
- return (time_t) (t - timezone);
- }
- return (time_t) -1;
-}
-
-static time_t getArcTimestamp(const char * const line) {
- const char *pos;
- if ((pos = getArcField(line, 2)) != NULL) {
- int i;
- /* date == YYYYMMDDhhmmss (Greenwich Mean Time) */
- /* example: 20050405154029 */
- for(i = 0 ; pos[i] >= '0' && pos[i] <= '9' ; i++);
- if (i == 14) {
- struct tm tm;
- memset(&tm, 0, sizeof(tm));
- tm.tm_year = getDigit4(pos + 0) - 1900; /* current year minus 1900 */
- tm.tm_mon = getDigit2(pos + 4) - 1; /* 0 – 11 */
- tm.tm_mday = getDigit2(pos + 6); /* 1 – 31 */
- tm.tm_hour = getDigit2(pos + 8); /* 0 – 23 */
- tm.tm_min = getDigit2(pos + 10); /* 0 – 59 */
- tm.tm_sec = getDigit2(pos + 12); /* 0 – 59 */
- tm.tm_isdst = 0;
- return getGMT(&tm);
- }
- }
- return (time_t) -1;
+ return (time_t) (t - timezone);
+ }
+ return (time_t) - 1;
+}
+
+static time_t getArcTimestamp(const char *const line) {
+ const char *pos;
+
+ if ((pos = getArcField(line, 2)) != NULL) {
+ int i;
+
+ /* date == YYYYMMDDhhmmss (Greenwich Mean Time) */
+ /* example: 20050405154029 */
+ for(i = 0; pos[i] >= '0' && pos[i] <= '9'; i++) ;
+ if (i == 14) {
+ struct tm tm;
+
+ memset(&tm, 0, sizeof(tm));
+ tm.tm_year = getDigit4(pos + 0) - 1900; /* current year minus 1900 */
+ tm.tm_mon = getDigit2(pos + 4) - 1; /* 0 – 11 */
+ tm.tm_mday = getDigit2(pos + 6); /* 1 – 31 */
+ tm.tm_hour = getDigit2(pos + 8); /* 0 – 23 */
+ tm.tm_min = getDigit2(pos + 10); /* 0 – 59 */
+ tm.tm_sec = getDigit2(pos + 12); /* 0 – 59 */
+ tm.tm_isdst = 0;
+ return getGMT(&tm);
+ }
+ }
+ return (time_t) - 1;
}
static int readArcURLRecord(PT_Index__Arc index) {
- index->line[0] = '\0';
- if (linput(index->file, index->line, sizeof(index->line) - 1)) {
- return 0;
- }
- return -1;
+ index->line[0] = '\0';
+ if (linput(index->file, index->line, sizeof(index->line) - 1)) {
+ return 0;
+ }
+ return -1;
}
#define str_begins(str, sstr) ( strncmp(str, sstr, sizeof(sstr) - 1) == 0 )
static int PT_CompatibleScheme(const char *url) {
- return (str_begins(url, "http:")
- || str_begins(url, "https:")
- || str_begins(url, "ftp:")
- || str_begins(url, "file:"));
+ return (str_begins(url, "http:")
+ || str_begins(url, "https:")
+ || str_begins(url, "ftp:")
+ || str_begins(url, "file:"));
}
int PT_LoadCache__Arc(PT_Index index_, const char *filename) {
- if (index_ != NULL && filename != NULL) {
- PT_Index__Arc index = &index_->slots.formatArc;
- index->timestamp = file_timestamp(filename);
- MutexInit(&index->fileLock);
- index->file = fopen(filename, "rb");
-
- // Opened ?
- if (index->file != NULL) {
- inthash hashtable = index->hash;
- if (readArcURLRecord(index) == 0) {
- int entries = 0;
- /* Read first line */
- if (strncmp(index->line, "filedesc://", sizeof("filedesc://") - 1) != 0) {
- fprintf(stderr, "Unexpected bad signature #%s"LF, index->line);
- fclose(index->file);
- index->file = NULL;
- return 0;
- }
- /* Timestamp */
- index->timestamp = getArcTimestamp(index->line);
- /* Skip first entry */
- if (skipArcData(index->file, index->line) != 0 || skipArcNl(index->file) != 0) {
- fprintf(stderr, "Unexpected bad data offset size first entry"LF);
- fclose(index->file);
- index->file = NULL;
- return 0;
- }
- /* Read all meta-entries (not data) */
- while(!feof(index->file)) {
- unsigned long int fpos = ftell(index->file);
- if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
- int length = getArcLength(index->line);
- if (length >= 0) {
- const char * filenameIndex = copyArcField(index->line, 0,
- index->filenameIndexBuff, sizeof(index->filenameIndexBuff) - 1); /* can not be NULL */
- if (strncmp(filenameIndex, "http://", 7) == 0) {
- filenameIndex += 7;
- }
- if (*filenameIndex != 0) {
- if (skipArcData(index->file, index->line) != 0) {
- fprintf(stderr, "Corrupted cache data entry #%d (truncated file?), aborting read"LF, (int)entries);
- }
- /*fprintf(stdout, "adding %s [%d]\n", filenameIndex, (int)fpos);*/
- if (PT_CompatibleScheme(index->filenameIndexBuff)) {
- inthash_add(hashtable, filenameIndex, fpos); /* position of meta-data */
- entries++;
- }
- } else {
- fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries);
- }
- } else {
- fprintf(stderr, "Corrupted cache meta entry #%d, aborting read"LF, (int)entries);
- break ;
- }
- } else {
- break ;
- }
- }
-
- /* OK */
- return 1;
- } else {
- fprintf(stderr, "Bad file (empty ?)"LF);
- }
- } else {
- fprintf(stderr, "Unable to open file"LF);
- index = NULL;
- }
- } else {
- fprintf(stderr, "Bad arguments"LF);
- }
- return 0;
+ if (index_ != NULL && filename != NULL) {
+ PT_Index__Arc index = &index_->slots.formatArc;
+
+ index->timestamp = file_timestamp(filename);
+ MutexInit(&index->fileLock);
+ index->file = fopen(filename, "rb");
+
+ // Opened ?
+ if (index->file != NULL) {
+ inthash hashtable = index->hash;
+
+ if (readArcURLRecord(index) == 0) {
+ int entries = 0;
+
+ /* Read first line */
+ if (strncmp(index->line, "filedesc://", sizeof("filedesc://") - 1) != 0) {
+ fprintf(stderr, "Unexpected bad signature #%s" LF, index->line);
+ fclose(index->file);
+ index->file = NULL;
+ return 0;
+ }
+ /* Timestamp */
+ index->timestamp = getArcTimestamp(index->line);
+ /* Skip first entry */
+ if (skipArcData(index->file, index->line) != 0
+ || skipArcNl(index->file) != 0) {
+ fprintf(stderr, "Unexpected bad data offset size first entry" LF);
+ fclose(index->file);
+ index->file = NULL;
+ return 0;
+ }
+ /* Read all meta-entries (not data) */
+ while(!feof(index->file)) {
+ unsigned long int fpos = ftell(index->file);
+
+ if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
+ int length = getArcLength(index->line);
+
+ if (length >= 0) {
+ const char *filenameIndex = copyArcField(index->line, 0,
+ index->filenameIndexBuff, sizeof(index->filenameIndexBuff) - 1); /* can not be NULL */
+
+ if (strncmp(filenameIndex, "http://", 7) == 0) {
+ filenameIndex += 7;
+ }
+ if (*filenameIndex != 0) {
+ if (skipArcData(index->file, index->line) != 0) {
+ fprintf(stderr,
+ "Corrupted cache data entry #%d (truncated file?), aborting read"
+ LF, (int) entries);
+ }
+ /*fprintf(stdout, "adding %s [%d]\n", filenameIndex, (int)fpos); */
+ if (PT_CompatibleScheme(index->filenameIndexBuff)) {
+ inthash_add(hashtable, filenameIndex, fpos); /* position of meta-data */
+ entries++;
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache meta entry #%d" LF,
+ (int) entries);
+ }
+ } else {
+ fprintf(stderr,
+ "Corrupted cache meta entry #%d, aborting read" LF,
+ (int) entries);
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+ /* OK */
+ return 1;
+ } else {
+ fprintf(stderr, "Bad file (empty ?)" LF);
+ }
+ } else {
+ fprintf(stderr, "Unable to open file" LF);
+ index = NULL;
+ }
+ } else {
+ fprintf(stderr, "Bad arguments" LF);
+ }
+ return 0;
}
#define HTTP_READFIELD_STRING(line, value, refline, refvalue) do { \
@@ -1970,278 +2101,287 @@ int PT_LoadCache__Arc(PT_Index index_, const char *filename) {
} \
} while(0)
-static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags) {
- PT_Element retCode;
- MutexLock(&index->slots.formatArc.fileLock);
- {
- retCode = PT_ReadCache__Arc_u(index, url, flags);
- }
- MutexUnlock(&index->slots.formatArc.fileLock);
- return retCode;
+static PT_Element PT_ReadCache__Arc(PT_Index index, const char *url, int flags) {
+ PT_Element retCode;
+
+ MutexLock(&index->slots.formatArc.fileLock);
+ {
+ retCode = PT_ReadCache__Arc_u(index, url, flags);
+ }
+ MutexUnlock(&index->slots.formatArc.fileLock);
+ return retCode;
}
-static PT_Element PT_ReadCache__Arc_u(PT_Index index_, const char* url, int flags)
-{
- PT_Index__Arc index = (PT_Index__Arc) &index_->slots.formatArc;
- char location_default[HTS_URLMAXSIZE*2];
+static PT_Element PT_ReadCache__Arc_u(PT_Index index_, const char *url,
+ int flags) {
+ PT_Index__Arc index = (PT_Index__Arc) & index_->slots.formatArc;
+ char location_default[HTS_URLMAXSIZE * 2];
intptr_t hash_pos;
int hash_pos_return;
- PT_Element r = NULL;
- if (index == NULL || index->hash == NULL || url == NULL || *url == 0)
- return NULL;
- if ((r = PT_ElementNew()) == NULL)
- return NULL;
- location_default[0] = '\0';
+ PT_Element r = NULL;
+
+ if (index == NULL || index->hash == NULL || url == NULL || *url == 0)
+ return NULL;
+ if ((r = PT_ElementNew()) == NULL)
+ return NULL;
+ location_default[0] = '\0';
memset(r, 0, sizeof(_PT_Element));
r->location = location_default;
- strcpy(r->location, "");
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
+ strcpy(r->location, "");
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
hash_pos_return = inthash_read(index->hash, url, &hash_pos);
- if (hash_pos_return) {
- if (fseek(index->file, (long)hash_pos, SEEK_SET) == 0) {
- if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
- long int fposMeta = ftell(index->file);
- int dataLength = getArcLength(index->line);
- const char *pos;
-
- /* Read HTTP headers */
- /* HTTP/1.1 404 Not Found */
- if (linput(index->file, index->line, sizeof(index->line) - 1)) {
- if ((pos = getArcField(index->line, 1)) != NULL) {
- if (sscanf(pos, "%d", &r->statuscode) != 1) {
- r->statuscode = STATUSCODE_INVALID;
- }
- }
- if ((pos = getArcField(index->line, 2)) != NULL) {
- r->msg[0] = '\0';
- strncat(r->msg, pos, sizeof(pos) - 1);
- }
- while (linput(index->file, index->line, sizeof(index->line) - 1) && index->line[0] != '\0') {
- char* const line = index->line;
- char* value = strchr(line, ':');
- if (value != NULL) {
- *value = '\0';
- for( value++ ; *value == ' ' || *value == '\t' ; value++);
- HTTP_READFIELD_INT(line, value, "Content-Length", r->size); // size
- HTTP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
- HTTP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
- HTTP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
- HTTP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
- HTTP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
- if (line[0] != '\0') {
- int len = r->headers ? ((int) strlen(r->headers)) : 0;
- int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 );
- r->headers = realloc(r->headers, len + nlen);
- r->headers[len] = '\0';
- strcat(r->headers, line);
- strcat(r->headers, ": ");
- strcat(r->headers, value);
- strcat(r->headers, "\r\n");
- }
- }
- }
-
- /* FIXME charset */
- if (r->contenttype[0] != '\0') {
- char *pos = strchr(r->contenttype, ';');
- if (pos != NULL) {
- /*char *chs = strchr(pos, "charset=");*/
- /*HTTP_READFIELD_STRING(line, value, "X-Charset", r->charset);*/
- *pos = 0;
- if ((pos = strchr(r->contenttype, ' ')) != NULL) {
- *pos = 0;
- }
- }
- }
-
- /* Read data */
- if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
- if (flags & FETCH_BODY) {
- long int fposCurrent = ftell(index->file);
- long int metaSize = fposCurrent - fposMeta;
- long int fetchSize = (long int) r->size;
- if (fetchSize <= 0) {
- fetchSize = dataLength - metaSize;
- } else if (fetchSize > dataLength - metaSize) {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg, "Cache Read Error : Truncated Data");
- }
- r->size = 0;
- if (r->statuscode != STATUSCODE_INVALID) {
- r->adr = (char*) malloc(fetchSize);
- if (r->adr != NULL) {
- if (fetchSize > 0 && ( r->size = (int) fread(r->adr, 1, fetchSize, index->file) ) != fetchSize) {
+ if (hash_pos_return) {
+ if (fseek(index->file, (long) hash_pos, SEEK_SET) == 0) {
+ if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
+ long int fposMeta = ftell(index->file);
+ int dataLength = getArcLength(index->line);
+ const char *pos;
+
+ /* Read HTTP headers */
+ /* HTTP/1.1 404 Not Found */
+ if (linput(index->file, index->line, sizeof(index->line) - 1)) {
+ if ((pos = getArcField(index->line, 1)) != NULL) {
+ if (sscanf(pos, "%d", &r->statuscode) != 1) {
+ r->statuscode = STATUSCODE_INVALID;
+ }
+ }
+ if ((pos = getArcField(index->line, 2)) != NULL) {
+ r->msg[0] = '\0';
+ strncat(r->msg, pos, sizeof(pos) - 1);
+ }
+ while(linput(index->file, index->line, sizeof(index->line) - 1)
+ && index->line[0] != '\0') {
+ char *const line = index->line;
+ char *value = strchr(line, ':');
+
+ if (value != NULL) {
+ *value = '\0';
+ for(value++; *value == ' ' || *value == '\t'; value++) ;
+ HTTP_READFIELD_INT(line, value, "Content-Length", r->size); // size
+ HTTP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
+ HTTP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
+ HTTP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
+ HTTP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
+ HTTP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
+ if (line[0] != '\0') {
+ int len = r->headers ? ((int) strlen(r->headers)) : 0;
+ int nlen =
+ (int) (strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1);
+ r->headers = realloc(r->headers, len + nlen);
+ r->headers[len] = '\0';
+ strcat(r->headers, line);
+ strcat(r->headers, ": ");
+ strcat(r->headers, value);
+ strcat(r->headers, "\r\n");
+ }
+ }
+ }
+
+ /* FIXME charset */
+ if (r->contenttype[0] != '\0') {
+ char *pos = strchr(r->contenttype, ';');
+
+ if (pos != NULL) {
+ /*char *chs = strchr(pos, "charset="); */
+ /*HTTP_READFIELD_STRING(line, value, "X-Charset", r->charset); */
+ *pos = 0;
+ if ((pos = strchr(r->contenttype, ' ')) != NULL) {
+ *pos = 0;
+ }
+ }
+ }
+
+ /* Read data */
+ if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
+ if (flags & FETCH_BODY) {
+ long int fposCurrent = ftell(index->file);
+ long int metaSize = fposCurrent - fposMeta;
+ long int fetchSize = (long int) r->size;
+
+ if (fetchSize <= 0) {
+ fetchSize = dataLength - metaSize;
+ } else if (fetchSize > dataLength - metaSize) {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Truncated Data");
+ }
+ r->size = 0;
+ if (r->statuscode != STATUSCODE_INVALID) {
+ r->adr = (char *) malloc(fetchSize);
+ if (r->adr != NULL) {
+ if (fetchSize > 0
+ && (r->size =
+ (int) fread(r->adr, 1, fetchSize,
+ index->file)) != fetchSize) {
int last_errno = errno;
- r->statuscode=STATUSCODE_INVALID;
- sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno));
- }
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"Read error (memory exhausted) from cache");
- }
- }
- }
- }
-
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg, "Cache Read Error : Read Header Error");
- }
-
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg, "Cache Read Error : Read Header Error");
- }
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg, "Cache Read Error : Seek Error");
- }
-
- } else {
- r->statuscode=STATUSCODE_INVALID;
- strcpy(r->msg,"File Cache Entry Not Found");
- }
- if (r->location[0] != '\0') {
- r->location = strdup(r->location);
- } else {
- r->location = NULL;
- }
+
+ r->statuscode = STATUSCODE_INVALID;
+ sprintf(r->msg, "Read error in cache disk data: %s",
+ strerror(last_errno));
+ }
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Read error (memory exhausted) from cache");
+ }
+ }
+ }
+ }
+
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Header Error");
+ }
+
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Header Error");
+ }
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Seek Error");
+ }
+
+ } else {
+ r->statuscode = STATUSCODE_INVALID;
+ strcpy(r->msg, "File Cache Entry Not Found");
+ }
+ if (r->location[0] != '\0') {
+ r->location = strdup(r->location);
+ } else {
+ r->location = NULL;
+ }
return r;
}
-static int PT_LookupCache__Arc(PT_Index index, const char* url) {
- int retCode;
- MutexLock(&index->slots.formatArc.fileLock);
- {
- retCode = PT_LookupCache__Arc_u(index, url);
- }
- MutexUnlock(&index->slots.formatArc.fileLock);
- return retCode;
+static int PT_LookupCache__Arc(PT_Index index, const char *url) {
+ int retCode;
+
+ MutexLock(&index->slots.formatArc.fileLock);
+ {
+ retCode = PT_LookupCache__Arc_u(index, url);
+ }
+ MutexUnlock(&index->slots.formatArc.fileLock);
+ return retCode;
}
-static int PT_LookupCache__Arc_u(PT_Index index_, const char* url) {
- if (index_ != NULL) {
- PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew;
- if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
- return 0;
- if (strncmp(url, "http://", 7) == 0)
- url += 7;
- if (inthash_read(cache->hash, url, NULL))
- return 1;
- }
- return 0;
+static int PT_LookupCache__Arc_u(PT_Index index_, const char *url) {
+ if (index_ != NULL) {
+ PT_Index__New cache = (PT_Index__New) & index_->slots.formatNew;
+
+ if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
+ return 0;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(cache->hash, url, NULL))
+ return 1;
+ }
+ return 0;
}
typedef struct PT_SaveCache__Arc_t {
- PT_Indexes indexes;
- FILE *fp;
- time_t t;
- char filename[64];
- struct tm buff;
- char headers[8192];
+ PT_Indexes indexes;
+ FILE *fp;
+ time_t t;
+ char filename[64];
+ struct tm buff;
+ char headers[8192];
char md5[32 + 2];
} PT_SaveCache__Arc_t;
static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element) {
- PT_SaveCache__Arc_t *st = (PT_SaveCache__Arc_t*) arg;
- FILE * const fp = st->fp;
- struct tm* tm = convert_time_rfc822(&st->buff, element->lastmodified);
- int size_headers;
-
- sprintf(st->headers,
- "HTTP/1.0 %d %s" "\r\n"
- "X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n"
- "Content-type: %s%s%s%s" "\r\n"
- "Last-modified: %s" "\r\n"
- "Content-length: %d" "\r\n"
- ,
- element->statuscode, element->msg,
- /**/
- element->contenttype,
- (element->charset[0] ? "; charset=\"" : ""),
- (element->charset[0] ? element->charset : ""),
- (element->charset[0] ? "\"" : ""),
- /**/
- element->lastmodified,
- (int) element->size
- );
- if (element->location != NULL && element->location[0] != '\0') {
- sprintf(st->headers + strlen(st->headers), "Location: %s" "\r\n", element->location);
- }
- if (element->headers != NULL) {
- if ( strlen(element->headers) < sizeof(st->headers) - strlen(element->headers) - 1 ) {
- strcat(st->headers, element->headers);
- }
- }
- strcat(st->headers, "\r\n");
- size_headers = (int) strlen(st->headers);
-
- /* doc == <nl><URL-record><nl><network_doc> */
+ PT_SaveCache__Arc_t *st = (PT_SaveCache__Arc_t *) arg;
+ FILE *const fp = st->fp;
+ struct tm *tm = convert_time_rfc822(&st->buff, element->lastmodified);
+ int size_headers;
+
+ sprintf(st->headers,
+ "HTTP/1.0 %d %s" "\r\n" "X-Server: ProxyTrack " PROXYTRACK_VERSION
+ "\r\n" "Content-type: %s%s%s%s" "\r\n" "Last-modified: %s" "\r\n"
+ "Content-length: %d" "\r\n", element->statuscode, element->msg,
+ /**/ element->contenttype,
+ (element->charset[0] ? "; charset=\"" : ""),
+ (element->charset[0] ? element->charset : ""),
+ (element->charset[0] ? "\"" : ""), /**/ element->lastmodified,
+ (int) element->size);
+ if (element->location != NULL && element->location[0] != '\0') {
+ sprintf(st->headers + strlen(st->headers), "Location: %s" "\r\n",
+ element->location);
+ }
+ if (element->headers != NULL) {
+ if (strlen(element->headers) <
+ sizeof(st->headers) - strlen(element->headers) - 1) {
+ strcat(st->headers, element->headers);
+ }
+ }
+ strcat(st->headers, "\r\n");
+ size_headers = (int) strlen(st->headers);
+
+ /* doc == <nl><URL-record><nl><network_doc> */
/* Format: URL IP date mime result checksum location offset filename length */
- if (element->adr != NULL) {
- domd5mem(element->adr, element->size, st->md5, 1);
- } else {
- strcpy(st->md5, "-");
- }
- fprintf(fp,
- /* nl */
- "\n"
- /* URL-record */
- "%s%s %s %04d%02d%02d%02d%02d%02d %s %d %s %s %ld %s %ld"
- /* nl */
- "\n",
- /* args */
- ( link_has_authority(url) ? "" : "http://" ), url,
- "0.0.0.0",
- tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
- element->contenttype,
- element->statuscode,
- st->md5, ( element->location ? element->location : "-" ),
- (long int)ftell(fp), st->filename,
- (long int)( size_headers + element->size ));
- /* network_doc */
- if (fwrite(st->headers, 1, size_headers, fp) != size_headers
- || ( element->size > 0 && fwrite(element->adr, 1, element->size, fp) != element->size )
- ) {
- return 1; /* Error */
- }
-
- return 0;
+ if (element->adr != NULL) {
+ domd5mem(element->adr, element->size, st->md5, 1);
+ } else {
+ strcpy(st->md5, "-");
+ }
+ fprintf(fp,
+ /* nl */
+ "\n"
+ /* URL-record */
+ "%s%s %s %04d%02d%02d%02d%02d%02d %s %d %s %s %ld %s %ld"
+ /* nl */
+ "\n",
+ /* args */
+ (link_has_authority(url) ? "" : "http://"), url, "0.0.0.0",
+ tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour,
+ tm->tm_min, tm->tm_sec, element->contenttype, element->statuscode,
+ st->md5, (element->location ? element->location : "-"),
+ (long int) ftell(fp), st->filename,
+ (long int) (size_headers + element->size));
+ /* network_doc */
+ if (fwrite(st->headers, 1, size_headers, fp) != size_headers
+ || (element->size > 0
+ && fwrite(element->adr, 1, element->size, fp) != element->size)
+ ) {
+ return 1; /* Error */
+ }
+
+ return 0;
}
static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename) {
- FILE *fp = fopen(filename, "wb");
- if (fp != NULL) {
- PT_SaveCache__Arc_t st;
- int ret;
- time_t t = PT_GetTimeIndex(indexes);
- struct tm tm = PT_GetTime(t);
-
- /* version-2-block ==
- filedesc://<path><sp><ip_address><sp><date><sp>text/plain<sp>200<sp>-<sp>-<sp>0<sp><filename><sp><length><nl>
- 2<sp><reserved><sp><origin-code><nl>
- URL<sp>IP-address<sp>Archive-date<sp>Content-type<sp>Result-code<sp>Checksum<sp>Location<sp> Offset<sp>Filename<sp>Archive-length<nl>
- <nl> */
- const char* prefix =
- "2 0 HTTrack Website Copier" "\n"
- "URL IP-address Archive-Date Content-Type Result-code Checksum Location Offset Filename Archive-length" "\n" "\n";
- sprintf(st.filename, "httrack_%d.arc", (int) t);
- fprintf(fp, "filedesc://%s 0.0.0.0 %04d%02d%02d%02d%02d%02d text/plain 200 - - 0 %s %d" "\n"
- "%s",
- st.filename,
- tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
- st.filename, (int)strlen(prefix), prefix);
- st.fp = fp;
- st.indexes = indexes;
- st.t = t;
- ret = PT_EnumCache(indexes, PT_SaveCache__Arc_Fun, (void *)&st);
- fclose(fp);
- if (ret != 0)
- (void) unlink(filename);
- return ret;
- }
- return -1;
+ FILE *fp = fopen(filename, "wb");
+
+ if (fp != NULL) {
+ PT_SaveCache__Arc_t st;
+ int ret;
+ time_t t = PT_GetTimeIndex(indexes);
+ struct tm tm = PT_GetTime(t);
+
+ /* version-2-block ==
+ filedesc://<path><sp><ip_address><sp><date><sp>text/plain<sp>200<sp>-<sp>-<sp>0<sp><filename><sp><length><nl>
+ 2<sp><reserved><sp><origin-code><nl>
+ URL<sp>IP-address<sp>Archive-date<sp>Content-type<sp>Result-code<sp>Checksum<sp>Location<sp> Offset<sp>Filename<sp>Archive-length<nl>
+ <nl> */
+ const char *prefix =
+ "2 0 HTTrack Website Copier" "\n"
+ "URL IP-address Archive-Date Content-Type Result-code Checksum Location Offset Filename Archive-length"
+ "\n" "\n";
+ sprintf(st.filename, "httrack_%d.arc", (int) t);
+ fprintf(fp,
+ "filedesc://%s 0.0.0.0 %04d%02d%02d%02d%02d%02d text/plain 200 - - 0 %s %d"
+ "\n" "%s", st.filename, tm.tm_year + 1900, tm.tm_mon + 1,
+ tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, st.filename,
+ (int) strlen(prefix), prefix);
+ st.fp = fp;
+ st.indexes = indexes;
+ st.t = t;
+ ret = PT_EnumCache(indexes, PT_SaveCache__Arc_Fun, (void *) &st);
+ fclose(fp);
+ if (ret != 0)
+ (void) unlink(filename);
+ return ret;
+ }
+ return -1;
}