diff options
Diffstat (limited to 'src/proxy')
-rw-r--r-- | src/proxy/changelog.txt | 4 | ||||
-rw-r--r-- | src/proxy/main.c | 29 | ||||
-rwxr-xr-x | src/proxy/proxystrings.h | 114 | ||||
-rw-r--r-- | src/proxy/proxytrack.c | 202 | ||||
-rw-r--r-- | src/proxy/proxytrack.h | 172 | ||||
-rw-r--r-- | src/proxy/store.c | 822 | ||||
-rw-r--r-- | src/proxy/store.h | 16 |
7 files changed, 1086 insertions, 273 deletions
diff --git a/src/proxy/changelog.txt b/src/proxy/changelog.txt index f5ae48b..b50a8d6 100644 --- a/src/proxy/changelog.txt +++ b/src/proxy/changelog.txt @@ -1,3 +1,7 @@ +0.5 - May 8 2006
+- added ARC (Internet Archive 1.0) file format
+- first index output routines (proxytrack --convert ..)
+
0.4 - Sept 18 2005
- implemented very limited WebDAV (RFC2518) primitives
- index enumeration fixes
diff --git a/src/proxy/main.c b/src/proxy/main.c index e48b51d..bf35377 100644 --- a/src/proxy/main.c +++ b/src/proxy/main.c @@ -64,7 +64,7 @@ int main(int argc, char* argv[]) { int i; int ret = 0; - int proxyPort, icpPort; + int proxyPort = 0, icpPort = 0; char proxyAddr[256 + 1], icpAddr[256 + 1]; PT_Indexes index; @@ -98,10 +98,21 @@ int main(int argc, char* argv[]) printf("*** This version is a development release ***\n"); printf("\n"); if (argc < 3 - || !scanHostPort(argv[1], proxyAddr, &proxyPort) - || !scanHostPort(argv[2], icpAddr, &icpPort)) + || ( + strcmp(argv[1], "--convert") != 0 + && + ( + !scanHostPort(argv[1], proxyAddr, &proxyPort) + || !scanHostPort(argv[2], icpAddr, &icpPort) + ) + ) + ) { - fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | --list <file-list> ) ..]\n", argv[0]); + fprintf(stderr, "proxy mode:\n"); + fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | <archive.arc path> | --list <file-list> ) ..]\n", argv[0]); + fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]); + fprintf(stderr, "convert mode:\n"); + fprintf(stderr, "usage: %s --convert <archive-output-path> [ ( <new.zip path> | <new.ndx path> | <archive.arc path> | --list <file-list> ) ..]\n", argv[0]); fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]); return 1; } @@ -150,7 +161,15 @@ int main(int argc, char* argv[]) #endif /* Go */ - ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index); + if (strcmp(argv[1], "--convert") != 0) { + ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index); + } else { + if ((ret = PT_SaveCache(index, argv[2])) == 0) { + fprintf(stderr, "processed: '%s'\n", argv[2]); + } else { + fprintf(stderr, "error: could not save '%s'\n", argv[2]); + } + } /* Wipe */ PT_Delete(index); diff --git a/src/proxy/proxystrings.h b/src/proxy/proxystrings.h index 87bcf34..9ee8fa7 100755 --- a/src/proxy/proxystrings.h +++ b/src/proxy/proxystrings.h @@ -28,77 +28,11 @@ Please visit our Website: http://www.httrack.com // Strings a bit safer than static buffers -#ifndef HTS_STRINGS_DEFSTATIC -#define HTS_STRINGS_DEFSTATIC - -typedef struct String { - char* buff; - int len; - int capa; -} String; - -#define STRING_EMPTY {NULL, 0, 0} -#define STRING_BLK_SIZE 256 -#define StringBuff(blk) ((blk).buff) -#define StringLength(blk) ((blk).len) -#define StringCapacity(blk) ((blk).capa) -#define StringRoom(blk, size) do { \ - if ((blk).len + (int)(size) + 1 > (blk).capa) { \ - (blk).capa = ((blk).len + (size) + 1) * 2; \ - (blk).buff = (char*) realloc((blk).buff, (blk).capa); \ - } \ -} while(0) -#define StringBuffN(blk, size) StringBuffN_(&(blk), size) -static char* StringBuffN_(String* blk, int size) { - StringRoom(*blk, (blk->len) + size); - return StringBuff(*blk); -} -#define StringClear(blk) do { \ - StringRoom(blk, 0); \ - (blk).buff[0] = '\0'; \ - (blk).len = 0; \ -} while(0) -#define StringFree(blk) do { \ - if ((blk).buff != NULL) { \ - free((blk).buff); \ - (blk).buff = NULL; \ - } \ - (blk).capa = 0; \ - (blk).len = 0; \ -} while(0) -#define StringMemcat(blk, str, size) do { \ - StringRoom(blk, size); \ - if ((int)(size) > 0) { \ - memcpy((blk).buff + (blk).len, (str), (size)); \ - (blk).len += (size); \ - } \ - *((blk).buff + (blk).len) = '\0'; \ -} while(0) -#define StringAddchar(blk, c) do { \ - char __c = (c); \ - StringMemcat(blk, &__c, 1); \ -} while(0) -static void* StringAcquire(String* blk) { - void* buff = blk->buff; - blk->buff = NULL; - blk->capa = 0; - blk->len = 0; - return buff; -} -static StringAttach(String* blk, char** str) { - StringFree(*blk); - if (str != NULL && *str != NULL) { - blk->buff = *str; - blk->capa = (int)strlen(blk->buff); - blk->len = blk->capa; - *str = NULL; - } -} -#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? (int)strlen(str) : 0) -#define StringStrcpy(blk, str) do { \ - StringClear(blk); \ - StringStrcat(blk, str); \ -} while(0) +#ifndef HTS_PROXYSTRINGS_DEFSTATIC +#define HTS_PROXYSTRINGS_DEFSTATIC + +#include "htsstrings.h" + /* Tools */ @@ -138,16 +72,46 @@ static void escapexml(const char* s, String* tempo) { int i; for (i=0 ; s[i] != '\0' ; i++) { if (s[i] == '&') - StringStrcat(*tempo, "&"); + StringCat(*tempo, "&"); else if (s[i] == '<') - StringStrcat(*tempo, "<"); + StringCat(*tempo, "<"); else if (s[i] == '>') - StringStrcat(*tempo, ">"); + StringCat(*tempo, ">"); else if (s[i] == '\"') - StringStrcat(*tempo, """); + StringCat(*tempo, """); else StringAddchar(*tempo, s[i]); } } +static char* concat(char *catbuff,const char* a,const char* b) { + if (a != NULL && a[0] != '\0') { + strcpy(catbuff, a); + } else { + catbuff[0] = '\0'; + } + if (b != NULL && b[0] != '\0') { + strcat(catbuff, b); + } + return catbuff; +} + +static char* __fconv(char* a) { +#ifdef WIN32 + int i; + for(i = 0 ; a[i] != 0 ; i++) + if (a[i] == '/') // Unix-to-DOS style + a[i] = '\\'; +#endif + return a; +} + +static char* fconcat(char *catbuff, const char* a, const char* b) { + return __fconv(concat(catbuff,a,b)); +} + +static char* fconv(char *catbuff, const char* a) { + return __fconv(concat(catbuff,a,"")); +} + #endif diff --git a/src/proxy/proxytrack.c b/src/proxy/proxytrack.c index 7604804..195c1a5 100644 --- a/src/proxy/proxytrack.c +++ b/src/proxy/proxytrack.c @@ -119,13 +119,10 @@ Remark: If no cache newer than the added one is found, all entries can be added #include <string.h> #include <time.h> #include <fcntl.h> -#if HTS_WIN +#ifdef _WIN32 #else #include <arpa/inet.h> #endif -#ifndef _WIN32 -#include <signal.h> -#endif /* END specific definitions */ /* String */ @@ -137,12 +134,6 @@ Remark: If no cache newer than the added one is found, all entries can be added /* définitions globales */ #include "htsglobal.h" -/* htslib */ -/*#include "htslib.h"*/ - -/* HTTrack Website Copier Library */ -#include "httrack-library.h" - /* htsweb */ #include "htsinthash.h" @@ -228,27 +219,6 @@ static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { return -1; } -static void unescapeini(char* s, String* tempo) { - int i; - char lastc=0; - for (i=0;i<(int) strlen(s);i++) { - if (s[i]=='%' && s[i+1]=='%') { - i++; - StringAddchar(*tempo, lastc = '%'); - } else if (s[i]=='%') { - char hc; - i++; - hc = (char) ehex(s+i); - if (!is_retorsep(hc) || !is_retorsep(lastc)) { - StringAddchar(*tempo, lastc = (char) hc); - } - i++; // sauter 2 caractères finalement - } - else - StringAddchar(*tempo, lastc = s[i]); - } -} - static int gethost(const char* hostname, SOCaddr *server, size_t server_size) { if (hostname != NULL && *hostname != '\0') { #if HTS_INET6==0 @@ -257,8 +227,8 @@ static int gethost(const char* hostname, SOCaddr *server, size_t server_size) { */ t_hostent* hp=gethostbyname(hostname); if (hp!=NULL) { - if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) { - SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); + if (hp->h_length) { + SOCaddr_copyaddr(*server, server_size, hp->h_addr_list[0], hp->h_length); return 1; } } @@ -325,7 +295,7 @@ static T_SOC smallserver_init(const char* adr, int port, int family) { SOCaddr_initany(server, server_size); if (gethost(adr, &server, server_size)) { // host name T_SOC soc = INVALID_SOCKET; - if ( (soc = socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) { SOCaddr_initport(server, port); if ( bind(soc,(struct sockaddr*) &server, (int)server_size) == 0 ) { if (family != SOCK_STREAM @@ -379,7 +349,8 @@ int proxytrack_main(char* proxyAddr, int proxyPort, fflush(stderr); // if (!proxytrack_start(index, soc, socICP)) { - fprintf(stderr, "Unable to create the server: %s\n", strerror(errno)); + int last_errno = errno; + fprintf(stderr, "Unable to create the server: %s\n", strerror(last_errno)); #ifdef _WIN32 closesocket(soc); #else @@ -391,7 +362,8 @@ int proxytrack_main(char* proxyAddr, int proxyPort, returncode = 0; } } else { - fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(errno)); + int last_errno = errno; + fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(last_errno)); returncode = 1; } printf("EXITED\n"); @@ -452,7 +424,7 @@ static const char* GetHttpMessage(int statuscode) { #ifndef NO_WEBDAV static void proxytrack_add_DAV_Item(String *item, String *buff, const char* filename, - unsigned long int size, + size_t size, time_t timestamp, const char* mime, int isDir, @@ -485,7 +457,7 @@ static void proxytrack_add_DAV_Item(String *item, String *buff, } StringRoom(*item, 1024); - sprintf(StringBuff(*item), + sprintf(StringBuffRW(*item), "<response xmlns=\"DAV:\">\r\n" "<href>/webdav%s%s</href>\r\n" "<propstat>\r\n" @@ -644,17 +616,17 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char StringClear(buff); /* Canonize URL */ - StringStrcpy(url, file + ((file[0] == '/') ? 1 : 0)); + StringCopy(url, file + ((file[0] == '/') ? 1 : 0)); if (StringLength(url) > 0) { if (StringBuff(url)[StringLength(url) - 1] == '/') { - StringBuff(url)[StringLength(url) - 1] = '\0'; + StringBuffRW(url)[StringLength(url) - 1] = '\0'; StringLength(url)--; } } /* Form response */ StringRoom(response, 1024); - sprintf(StringBuff(response), + sprintf(StringBuffRW(response), "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n" "<multistatus xmlns=\"DAV:\">\r\n"); StringLength(response) = (int) strlen(StringBuff(response)); @@ -683,22 +655,22 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char /* Item URL */ StringRoom(itemUrl, thisUrlLen + prefixLen + sizeof("/webdav/") + 1); StringClear(itemUrl); - sprintf(StringBuff(itemUrl), "/%s/%s", prefix, thisUrl); + sprintf(StringBuffRW(itemUrl), "/%s/%s", prefix, thisUrl); if (!thisIsDir) StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)); else StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)) - 1; - StringBuff(itemUrl)[StringLength(itemUrl)] = '\0'; + StringBuffRW(itemUrl)[StringLength(itemUrl)] = '\0'; if (thisIsDir == isDir) { - unsigned long size = 0; + size_t size = 0; time_t timestamp = (time_t) 0; PT_Element file = NULL; /* Item stats */ if (!isDir) { file = PT_ReadIndex(indexes, StringBuff(itemUrl) + 1, FETCH_HEADERS); - if (file != NULL && file->statuscode == 200 ) { + if (file != NULL && file->statuscode == HTTP_OK ) { size = file->size; if (file->lastmodified) { timestamp = get_time_rfc822(file->lastmodified); @@ -735,7 +707,7 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char } /* Depth > 0 */ /* End of responses */ - StringStrcat(response, + StringCat(response, "</multistatus>\r\n" ); @@ -767,7 +739,7 @@ static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char * int i, isDir; String html = STRING_EMPTY; StringClear(html); - StringStrcat(html, + StringCat(html, "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -787,28 +759,28 @@ static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char * int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0; if (thisIsDir == isDir) { if (isDir) - StringStrcat(html, "<tt>[DIR] "); + StringCat(html, "<tt>[DIR] "); else - StringStrcat(html, "<tt> "); - StringStrcat(html, "<a href=\""); + StringCat(html, "<tt> "); + StringCat(html, "<a href=\""); if (isDir) { - StringStrcat(html, "http://proxytrack/"); + StringCat(html, "http://proxytrack/"); } - StringStrcat(html, url); - StringStrcat(html, list[i]); - StringStrcat(html, "\">"); - StringStrcat(html, list[i]); - StringStrcat(html, "</a></tt><br />"); + StringCat(html, url); + StringCat(html, list[i]); + StringCat(html, "\">"); + StringCat(html, list[i]); + StringCat(html, "</a></tt><br />"); } } } - StringStrcat(html, + StringCat(html, "</body>" "</html>"); PT_Enumerate_Delete(&list); elt->size = StringLength(html); elt->adr = StringAcquire(&html); - elt->statuscode = 200; + elt->statuscode = HTTP_OK; strcpy(elt->charset, "iso-8859-1"); strcpy(elt->contenttype, "text/html"); strcpy(elt->msg, "OK"); @@ -843,10 +815,10 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { #endif StringRoom(localhost, 256); - if (gethostname(StringBuff(localhost), StringCapacity(localhost) - 1) == 0) { + if (gethostname(StringBuffRW(localhost), (int) StringCapacity(localhost) - 1) == 0) { StringLength(localhost) = (int) strlen(StringBuff(localhost)); } else { - StringStrcpy(localhost, "localhost"); + StringCopy(localhost, "localhost"); } #ifdef _DEBUG @@ -855,7 +827,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (buffer == NULL || line == NULL || line1 == NULL) { CRITICAL("proxytrack_process_HTTP:memory exhausted"); -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_c); #else close(soc_c); @@ -918,7 +890,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { char* chost = line + p; if (*chost == ' ') chost++; - StringStrcpy(host, chost); + StringCopy(host, chost); } #ifndef NO_WEBDAV else if ((p = strfield(line, "Depth: "))) { @@ -935,8 +907,8 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (length > 0) { if (length < 32768) { StringRoom(davRequest, length + 1); - if (recv(soc_c, StringBuff(davRequest), length, 0) == length) { - StringBuff(davRequest)[length] = 0; + if (recv(soc_c, StringBuffRW(davRequest), length, 0) == length) { + StringBuffRW(davRequest)[length] = 0; } else { msgCode = 500; msgError = "Posted Data Read Error"; @@ -970,9 +942,9 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { else if (strcasecmp(command, "options") == 0) { const char * options = "GET, HEAD, OPTIONS, POST, PROPFIND, TRACE" ", MKCOL, DELETE, PUT"; /* Not supported */ - msgCode = 200; + msgCode = HTTP_OK; StringRoom(headers, 8192); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" "DAV: 1, 2\r\n" "MS-Author-Via: DAV\r\n" @@ -1020,11 +992,10 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { /* Post-process request */ if (link_has_authority(surl)) { - const unsigned int prefixLen = sizeof("http://proxytrack/") - 1; - if (strncasecmp(surl, "http://proxytrack/", prefixLen) == 0) { + if (strncasecmp(surl, "http://proxytrack/", sizeof("http://proxytrack/") - 1) == 0) { directHit = 1; /* Another direct hit hack */ } - StringStrcpy(url, surl); + StringCopy(url, surl); } else { if (StringLength(host) > 0) { /* Direct hit */ @@ -1046,15 +1017,23 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } /* Direct hit */ directHit = 1; - StringStrcpy(url, ""); + StringCopy(url, ""); + if (!link_has_authority(toHit)) + StringCat(url, "http://"); + StringCat(url, toHit); + } else if (strncasecmp(surl, "/proxytrack/", sizeof("/proxytrack/") - 1) == 0) { + const char * toHit = surl + sizeof("/proxytrack/") - 1; + /* Direct hit */ + directHit = 1; + StringCopy(url, ""); if (!link_has_authority(toHit)) - StringStrcat(url, "http://"); - StringStrcat(url, toHit); + StringCat(url, "http://"); + StringCat(url, toHit); } else { /* Transparent proxy */ - StringStrcpy(url, "http://"); - StringStrcat(url, StringBuff(host)); - StringStrcat(url, surl); + StringCopy(url, "http://"); + StringCat(url, StringBuff(host)); + StringCat(url, surl); } } else { msgCode = 500; @@ -1073,7 +1052,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if ((element = proxytrack_process_DAV_Request(indexes, StringBuff(url), davDepth)) != NULL) { msgCode = element->statuscode; StringRoom(davHeaders, 1024); - sprintf(StringBuff(davHeaders), + sprintf(StringBuffRW(davHeaders), "DAV: 1, 2\r\n" "MS-Author-Via: DAV\r\n" "Cache-Control: private\r\n"); @@ -1100,7 +1079,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (element != NULL) { msgCode = element->statuscode; StringRoom(headers, 8192); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" #ifndef NO_WEBDAV "%s" @@ -1138,12 +1117,12 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } else { /* No query string, no ending / : check the the <url>/ page */ if (StringLength(url) > 0 && StringBuff(url)[StringLength(url) - 1] != '/' && strchr(StringBuff(url), '?') == NULL) { - StringStrcpy(urlRedirect, StringBuff(url)); - StringStrcat(urlRedirect, "/"); + StringCopy(urlRedirect, StringBuff(url)); + StringCat(urlRedirect, "/"); if (PT_LookupIndex(indexes, StringBuff(urlRedirect))) { msgCode = 301; /* Moved Permanently */ StringRoom(headers, 8192); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" "Content-Type: text/html\r\n" "Location: %s\r\n", @@ -1155,7 +1134,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { StringLength(headers) = (int) strlen(StringBuff(headers)); /* */ StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES)); - sprintf(StringBuff(output), + sprintf(StringBuffRW(output), "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -1195,14 +1174,14 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { msgError = GetHttpMessage(msgCode); } StringRoom(headers, 256); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" "Content-type: text/html\r\n", msgCode, msgError); StringLength(headers) = (int) strlen(StringBuff(headers)); StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES)); - sprintf(StringBuff(output), + sprintf(StringBuffRW(output), "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -1225,7 +1204,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } { char tmp[20 + 1]; /* 2^64 = 18446744073709551616 */ - unsigned int dataSize = 0; + size_t dataSize = 0; if (!headRequest) { dataSize = StringLength(output); if (dataSize == 0 && element != NULL) { @@ -1233,30 +1212,30 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } } sprintf(tmp, "%d", (int) dataSize); - StringStrcat(headers, "Content-length: "); - StringStrcat(headers, tmp); - StringStrcat(headers, "\r\n"); + StringCat(headers, "Content-length: "); + StringCat(headers, tmp); + StringCat(headers, "\r\n"); } if (keepAlive) { - StringStrcat(headers, + StringCat(headers, "Connection: Keep-Alive\r\n" "Proxy-Connection: Keep-Alive\r\n"); } else { - StringStrcat(headers, + StringCat(headers, "Connection: Close\r\n" "Proxy-Connection: Close\r\n"); } if (msgCode != 500) - StringStrcat(headers, "X-Cache: HIT from "); + StringCat(headers, "X-Cache: HIT from "); else - StringStrcat(headers, "X-Cache: MISS from "); - StringStrcat(headers, StringBuff(localhost)); - StringStrcat(headers, "\r\n"); + StringCat(headers, "X-Cache: MISS from "); + StringCat(headers, StringBuff(localhost)); + StringCat(headers, "\r\n"); /* Logging */ { const char * contentType = "text/html"; - unsigned long int size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 ); + size_t size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 ); /* */ String ip = STRING_EMPTY; SOCaddr serverClient; @@ -1265,7 +1244,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (getsockname(soc_c, (struct sockaddr*) &serverClient, &lenServerClient) == 0) { ip = getip(&serverClient, lenServerClient); } else { - StringStrcpy(ip, "unknown"); + StringCopy(ip, "unknown"); } if (element != NULL && element->contenttype[0] != '\0') { contentType = element->contenttype; @@ -1275,11 +1254,11 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } /* Send reply */ - StringStrcat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n"); - StringStrcat(headers, "\r\n"); /* Headers separator */ - if (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers) - || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output)) - || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, element->size, 0) != element->size) + StringCat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n"); + StringCat(headers, "\r\n"); /* Headers separator */ + if (send(soc_c, StringBuff(headers), (int)StringLength(headers), 0) != StringLength(headers) + || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), (int)StringLength(output), 0) != StringLength(output)) + || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, (int)element->size, 0) != element->size) ) { keepAlive = 0; /* Error, abort connection */ @@ -1298,7 +1277,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } } while(keepAlive); -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_c); #else close(soc_c); @@ -1314,19 +1293,8 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { free(buffer); } -#ifdef _WIN32 -#define PTHREAD_RETURN -#define PTHREAD_TYPE void -#define PTHREAD_TYPE_FNC __cdecl -#else -#define PTHREAD_RETURN NULL -#define PTHREAD_TYPE void* -#define PTHREAD_TYPE_FNC -#endif - /* Generic threaded function start */ -static int startThread(PTHREAD_TYPE (PTHREAD_TYPE_FNC * funct)(void* ), - void* param) +static int startThread(void (*funct)(void*), void* param) { if (param != NULL) { #ifdef _WIN32 @@ -1361,14 +1329,14 @@ typedef struct proxytrack_process_th_p { } proxytrack_process_th_p; /* Generic socket/index function stub */ -static PTHREAD_TYPE PTHREAD_TYPE_FNC proxytrack_process_th(void* param_) { +static void proxytrack_process_th(void* param_) { proxytrack_process_th_p *param = (proxytrack_process_th_p *) param_; T_SOC soc_c = param->soc_c; PT_Indexes indexes = param->indexes; void (*process)(PT_Indexes indexes, T_SOC soc_c) = param->process; free(param); process(indexes, soc_c); - return PTHREAD_RETURN ; + return ; } /* Process generic socket/index operation */ @@ -1400,7 +1368,7 @@ static int proxytrack_start_HTTP(PT_Indexes indexes, T_SOC soc) { struct sockaddr clientAddr; int clientAddrLen = sizeof(struct sockaddr); memset(&clientAddr, 0, sizeof(clientAddr)); - if ( (soc_c = accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) { + if ( (soc_c = (T_SOC) accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) { if (!proxytrack_process_HTTP_threaded(indexes, soc_c)) { CRITICAL("proxytrack_start_HTTP::Can not fork a thread"); } @@ -1587,7 +1555,7 @@ static int proxytrack_start_ICP(PT_Indexes indexes, T_SOC soc) { if (lenServerClient > 0) { ip = getip(&serverClient, lenServerClient); } else { - StringStrcpy(ip, "unknown"); + StringCopy(ip, "unknown"); } LOG("ICP %s %s/%s %s" _ StringBuff(ip) _ LogRequest _ LogReply _ (UrlRequest ? UrlRequest : "-") ); StringFree(ip); diff --git a/src/proxy/proxytrack.h b/src/proxy/proxytrack.h index 498f4d8..737c5ea 100644 --- a/src/proxy/proxytrack.h +++ b/src/proxy/proxytrack.h @@ -29,13 +29,26 @@ Please visit our Website: http://www.httrack.com #define WEBHTTRACK_PROXYTRACK /* Version */ -#define PROXYTRACK_VERSION "0.4" +#define PROXYTRACK_VERSION "0.5" /* Store manager */ #include "../minizip/mztools.h" #include "store.h" #include <sys/stat.h> +#ifndef HTS_DO_NOT_USE_FTIME +#ifdef _WIN32 +#include <sys/utime.h> +#else +#include <utime.h> +#endif +#include <sys/timeb.h> +#else +#include <utime.h> +#endif +#ifndef _WIN32 +#include <pthread.h> +#endif /* generic */ @@ -233,23 +246,6 @@ static int linput_trim(FILE* fp,char* s,int max) { return rlen; } -// copy of concat -#define HTS_URLMAXSIZE 1024 -typedef struct concat_strc { - char buff[16][HTS_URLMAXSIZE*2*2]; - int rol; -} concat_strc; -static char* concat(const char* a,const char* b) { - static concat_strc* strc = NULL; - if (strc == NULL) { - strc = (concat_strc*) calloc(16, sizeof(concat_strc)); - } - strc->rol=((strc->rol+1)%16); // roving pointer - strcpy(strc->buff[strc->rol],a); - if (b) strcat(strc->buff[strc->rol],b); - return strc->buff[strc->rol]; -} - #ifndef S_ISREG #define S_ISREG(m) ((m) & _S_IFREG) #endif @@ -264,25 +260,135 @@ static int fexist(char* s) { return 0; } -#ifndef _WIN32 -#define fconv(a) (a) -#define fconcat(a,b) concat(a,b) -#endif - -#ifdef _WIN32 -static char* __fconv(char* a) { +/* convertir une chaine en temps */ +static void set_lowcase(char* s) { int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='/') // convertir - a[i]='\\'; - return a; + for(i=0;i<(int) strlen(s);i++) + if ((s[i]>='A') && (s[i]<='Z')) + s[i]+=('a'-'A'); } -static char* fconcat(char* a,char* b) { - return __fconv(concat(a,b)); +static struct tm* convert_time_rfc822(struct tm *result,const char* s) { + char months[]="jan feb mar apr may jun jul aug sep oct nov dec"; + char str[256]; + char* a; + /* */ + int result_mm=-1; + int result_dd=-1; + int result_n1=-1; + int result_n2=-1; + int result_n3=-1; + int result_n4=-1; + /* */ + + if ((int) strlen(s) > 200) + return NULL; + strcpy(str,s); + set_lowcase(str); + /* éliminer :,- */ + while( (a=strchr(str,'-')) ) *a=' '; + while( (a=strchr(str,':')) ) *a=' '; + while( (a=strchr(str,',')) ) *a=' '; + /* tokeniser */ + a=str; + while(*a) { + char *first, *last; + char tok[256]; + /* découper mot */ + while(*a==' ') a++; /* sauter espaces */ + first=a; + while((*a) && (*a!=' ')) a++; + last=a; + tok[0]='\0'; + if (first!=last) { + char* pos; + strncat(tok,first,(int) (last - first)); + /* analyser */ + if ( (pos=strstr(months,tok)) ) { /* month always in letters */ + result_mm=((int) (pos - months))/4; + } else { + int number; + if (sscanf(tok,"%d",&number) == 1) { /* number token */ + if (result_dd<0) /* day always first number */ + result_dd=number; + else if (result_n1<0) + result_n1=number; + else if (result_n2<0) + result_n2=number; + else if (result_n3<0) + result_n3=number; + else if (result_n4<0) + result_n4=number; + } /* sinon, bruit de fond(+1GMT for exampel) */ + } + } + } + if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) { + if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */ + result->tm_year=result_n4-1900; + result->tm_hour=result_n1; + result->tm_min=result_n2; + result->tm_sec=max(result_n3,0); + } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */ + result->tm_hour=result_n2; + result->tm_min=result_n3; + result->tm_sec=max(result_n4,0); + if (result_n1<=50) /* 00 means 2000 */ + result->tm_year=result_n1+100; + else if (result_n1<1000) /* 99 means 1999 */ + result->tm_year=result_n1; + else /* 2000 */ + result->tm_year=result_n1-1900; + } + result->tm_isdst=0; /* assume GMT */ + result->tm_yday=-1; /* don't know */ + result->tm_wday=-1; /* don't know */ + result->tm_mon=result_mm; + result->tm_mday=result_dd; + return result; + } + return NULL; } -static char* fconv(char* a) { - return __fconv(concat(a,"")); +static struct tm PT_GetTime(time_t t) { + struct tm tmbuf; +#ifdef _WIN32 + struct tm * tm = gmtime(&t); +#else + struct tm * tm = gmtime_r(&t, &tmbuf); +#endif + if (tm != NULL) + return *tm; + else { + memset(&tmbuf, 0, sizeof(tmbuf)); + return tmbuf; + } } +static int set_filetime(const char* file, struct tm* tm_time) { + struct utimbuf tim; +#ifndef HTS_DO_NOT_USE_FTIME + struct timeb B; + memset(&B, 0, sizeof(B)); + B.timezone=0; + ftime( &B ); + tim.actime = tim.modtime = mktime(tm_time) - B.timezone*60; +#else + // bogus time (GMT/local).. + tim.actime=tim.modtime=mktime(tm_time); #endif + return utime(file, &tim); +} +static int set_filetime_time_t(const char* file, time_t t) { + if (t != (time_t) 0 && t != (time_t) -1) { + struct tm tm = PT_GetTime(t); + return set_filetime(file, &tm); + } + return -1; +} +static int set_filetime_rfc822(const char* file, const char* date) { + struct tm buffer; + struct tm* tm_s = convert_time_rfc822(&buffer,date); + if (tm_s) { + return set_filetime(file,tm_s); + } else return -1; +} #endif diff --git a/src/proxy/store.c b/src/proxy/store.c index 1d17574..b8233a8 100644 --- a/src/proxy/store.c +++ b/src/proxy/store.c @@ -20,6 +20,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Please visit our Website: http://www.httrack.com */ +/* Parts (inside ARC format routines) by Lars Clausen (lc@statsbiblioteket.dk) */ + /* ------------------------------------------------------------ */ /* File: Cache manager for ProxyTrack */ /* Author: Xavier Roche */ @@ -28,6 +30,7 @@ Please visit our Website: http://www.httrack.com #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> /* Locking */ #ifdef _WIN32 @@ -40,8 +43,10 @@ Please visit our Website: http://www.httrack.com #define HTS_INTERNAL_BYTECODE #include "htsinthash.h" +#include "htsmd5.h" #undef HTS_INTERNAL_BYTECODE #include "../minizip/mztools.h" +#include "../minizip/zip.h" #include "htscore.h" #include "htsback.h" @@ -58,6 +63,8 @@ static PT_Element PT_ReadCache__New_u(PT_Index index, const char* url, int flags static int PT_LookupCache__Old_u(PT_Index index, const char* url); static PT_Element PT_ReadCache__Old_u(PT_Index index, const char* url, int flags); +static int PT_LookupCache__Arc_u(PT_Index index, const char* url); +static PT_Element PT_ReadCache__Arc_u(PT_Index index, const char* url, int flags); /* Locking */ @@ -100,10 +107,12 @@ void MutexFree(PT_Mutex *pMutex) { typedef struct _PT_Index__New _PT_Index__New; typedef struct _PT_Index__Old _PT_Index__Old; +typedef struct _PT_Index__Arc _PT_Index__Arc; typedef struct _PT_Index_Functions _PT_Index_Functions; typedef struct _PT_Index__New *PT_Index__New; typedef struct _PT_Index__Old *PT_Index__Old; +typedef struct _PT_Index__Arc *PT_Index__Arc; typedef struct _PT_Index_Functions *PT_Index_Functions; enum { @@ -111,29 +120,42 @@ enum { PT_CACHE_MIN = 0, PT_CACHE__NEW = PT_CACHE_MIN, PT_CACHE__OLD, - PT_CACHE_MAX = PT_CACHE__OLD + PT_CACHE__ARC, + PT_CACHE_MAX = PT_CACHE__ARC }; static int PT_LoadCache__New(PT_Index index, const char *filename); static void PT_Index_Delete__New(PT_Index *pindex); static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags); static int PT_LookupCache__New(PT_Index index, const char* url); +static int PT_SaveCache__New(PT_Indexes indexes, const char *filename); /**/ static int PT_LoadCache__Old(PT_Index index, const char *filename); static void PT_Index_Delete__Old(PT_Index *pindex); static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags); static int PT_LookupCache__Old(PT_Index index, const char* url); +/**/ +static int PT_LoadCache__Arc(PT_Index index, const char *filename); +static void PT_Index_Delete__Arc(PT_Index *pindex); +static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags); +static int PT_LookupCache__Arc(PT_Index index, const char* url); +static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename); struct _PT_Index_Functions { + /* Mandatory services */ int (*PT_LoadCache)(PT_Index index, const char *filename); void (*PT_Index_Delete)(PT_Index *pindex); PT_Element (*PT_ReadCache)(PT_Index index, const char* url, int flags); int (*PT_LookupCache)(PT_Index index, const char* url); + + /* Optional services */ + int (*PT_SaveCache)(PT_Indexes indexes, const char *filename); }; static _PT_Index_Functions _IndexFuncts[] = { - { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New }, - { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old }, + { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New, PT_SaveCache__New }, + { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old, NULL }, + { PT_LoadCache__Arc, PT_Index_Delete__Arc, PT_ReadCache__Arc, PT_LookupCache__Arc, PT_SaveCache__Arc }, { NULL, NULL, NULL, NULL } }; @@ -164,11 +186,22 @@ struct _PT_Index__Old { int safeCache; }; +struct _PT_Index__Arc { + PT_INDEX_COMMON_STRUCTURE; + FILE *file; + PT_Mutex fileLock; + int version; + char lastmodified[1024]; + char line[2048]; + char filenameIndexBuff[2048]; +}; + struct _PT_Index { int type; union { _PT_Index__New formatNew; _PT_Index__Old formatOld; + _PT_Index__Arc formatArc; struct { PT_INDEX_COMMON_STRUCTURE; } common; @@ -194,7 +227,7 @@ struct _PT_Cache { int count; }; -PT_Indexes PT_New() { +PT_Indexes PT_New(void) { PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1); index->cil = inthash_new(127); index->index_size = 0; @@ -301,6 +334,16 @@ static void PT_Index_Delete__Old(PT_Index *pindex) { } } +static void PT_Index_Delete__Arc(PT_Index *pindex) { + if (pindex != NULL && (*pindex) != NULL) { + PT_Index__Arc index = &(*pindex)->slots.formatArc; + if (index->file != NULL) { + fclose(index->file); + } + MutexFree(&index->fileLock); + } +} + int PT_AddIndex(PT_Indexes indexes, const char *path) { PT_Index index = PT_LoadCache(path); if (index != NULL) { @@ -319,7 +362,7 @@ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) { int i; String html = STRING_EMPTY; StringClear(html); - StringStrcat(html, + StringCat(html, "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -330,26 +373,26 @@ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) { "<h3>Available sites in this cache:</h3><br />" "<br />" ); - StringStrcat(html, "<ul>\r\n"); + StringCat(html, "<ul>\r\n"); for(i = 0 ; i < indexes->index_size ; i++) { if (indexes->index[i] != NULL && indexes->index[i]->slots.common.startUrl[0] != '\0') { const char * url = indexes->index[i]->slots.common.startUrl; - StringStrcat(html, "<li>\r\n"); - StringStrcat(html, "<a href=\""); - StringStrcat(html, url); - StringStrcat(html, "\">"); - StringStrcat(html, url); - StringStrcat(html, "</a>\r\n"); - StringStrcat(html, "</li>\r\n"); + StringCat(html, "<li>\r\n"); + StringCat(html, "<a href=\""); + StringCat(html, url); + StringCat(html, "\">"); + StringCat(html, url); + StringCat(html, "</a>\r\n"); + StringCat(html, "</li>\r\n"); } } - StringStrcat(html, "</ul>\r\n"); - StringStrcat(html, "</body></html>\r\n"); + StringCat(html, "</ul>\r\n"); + StringCat(html, "</body></html>\r\n"); elt->size = StringLength(html); elt->adr = StringAcquire(&html); - elt->statuscode = 200; + elt->statuscode = HTTP_OK; strcpy(elt->charset, "iso-8859-1"); strcpy(elt->contenttype, "text/html"); strcpy(elt->msg, "OK"); @@ -404,9 +447,9 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { char* ptr = NULL; ptr += StringLength(list); if (len > 0) - StringStrcat(list, StringBuff(subitem)); + StringCat(list, StringBuff(subitem)); if (isFolder) - StringStrcat(list, "/"); + StringCat(list, "/"); StringMemcat(list, "\0", 1); /* NULL terminated strings */ StringMemcat(listindexes, &ptr, sizeof(ptr)); listCount++; @@ -434,7 +477,7 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { StringMemcat(listindexes, StringBuff(list), StringLength(list)); /* ---- no reallocation beyond this point (fixed addresses) ---- */ /* start of all strings (pointer) */ - startStrings = (startStrings - nullPointer) + StringBuff(listindexes); + startStrings = (startStrings - nullPointer) + StringBuffRW(listindexes); /* transform indexes into references */ for(i = 0 ; i < listCount ; i++) { char *ptr = NULL; @@ -442,7 +485,7 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { memcpy(&ptr, &StringBuff(listindexes)[i*sizeof(char*)], sizeof(char*)); ndx = (unsigned int) (ptr - nullPointer); ptr = startStrings + ndx; - memcpy(&StringBuff(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*)); + memcpy(&StringBuffRW(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*)); } blk = StringAcquire(&listindexes); StringFree(list); @@ -460,16 +503,22 @@ void PT_Enumerate_Delete(char ***plist) { } } -PT_Index PT_LoadCache(const char *filename) { - int type = PT_CACHE_UNDEFINED; +static int PT_GetType(const char *filename) { char * dot = strrchr(filename, '.'); if (dot != NULL) { if (strcasecmp(dot, ".zip") == 0) { - type = PT_CACHE__NEW; + return PT_CACHE__NEW; } else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) { - type = PT_CACHE__OLD; + return PT_CACHE__OLD; + } else if (strcasecmp(dot, ".arc") == 0) { + return PT_CACHE__ARC; } } + return PT_CACHE_UNDEFINED; +} + +PT_Index PT_LoadCache(const char *filename) { + int type = PT_GetType(filename); if (type != PT_CACHE_UNDEFINED) { PT_Index index = calloc(sizeof(_PT_Index), 1); if (index != NULL) { @@ -524,6 +573,44 @@ int PT_LookupCache(PT_Index index, const char* url) { return 0; } +int PT_SaveCache(PT_Indexes indexes, const char *filename) { + int type = PT_GetType(filename); + if (type != PT_CACHE_UNDEFINED) { + if (_IndexFuncts[type].PT_SaveCache != NULL) { + int ret = _IndexFuncts[type].PT_SaveCache(indexes, filename); + if (ret == 0) { + (void) set_filetime_time_t(filename, PT_GetTimeIndex(indexes)); + return 0; + } + } + } + return -1; +} + +int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg) { + if (indexes != NULL && indexes->cil != NULL) { + struct_inthash_enum en = inthash_enum_new(indexes->cil); + inthash_chain* chain; + while((chain = inthash_enum_next(&en))) { + const long int index_id = (long int)chain->value.intg; + const char *const url = chain->name; + if (index_id >= 0 && index_id <= indexes->index_size) { + PT_Element item = PT_ReadCache(indexes->index[index_id], url, FETCH_HEADERS | FETCH_BODY); + if (item != NULL) { + int ret = callback(arg, url, item); + PT_Element_Delete(&item); + if (ret != 0) + return ret; + } + } else { + CRITICAL("PT_ReadCache:Corrupted central index locator"); + return -1; + } + } + } + return 0; +} + time_t PT_Index_Timestamp(PT_Index index) { return index->slots.common.timestamp; } @@ -569,8 +656,8 @@ int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex) while((chain = inthash_enum_next(&en)) != NULL) { const char * url = chain->name; if (url != NULL && url[0] != '\0') { - long int previous_index_id = 0; - if (inthash_read(indexes->cil, url, (long int*)&previous_index_id)) { + intptr_t previous_index_id = 0; + if (inthash_read(indexes->cil, url, &previous_index_id)) { if (previous_index_id >= 0 && previous_index_id < indexes->index_size) { if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer break; @@ -616,14 +703,14 @@ PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags) { if (indexes != NULL) { - long int index_id; + intptr_t index_id; if (strncmp(url, "http://", 7) == 0) url += 7; if (inthash_read(indexes->cil, url, &index_id)) { if (index_id >= 0 && index_id <= indexes->index_size) { PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags); if (item != NULL) { - item->indexId = index_id; + item->indexId = (int) index_id; return item; } } else { @@ -637,7 +724,7 @@ PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags) int PT_LookupIndex(PT_Indexes indexes, const char* url) { if (indexes != NULL) { - long int index_id; + intptr_t index_id; if (strncmp(url, "http://", 7) == 0) url += 7; if (inthash_read(indexes->cil, url, &index_id)) { @@ -651,6 +738,22 @@ int PT_LookupIndex(PT_Indexes indexes, const char* url) { return 0; } +time_t PT_GetTimeIndex(PT_Indexes indexes) { + if (indexes != NULL && indexes->index_size > 0) + { + int i; + time_t maxt = indexes->index[0]->slots.common.timestamp; + for(i = 1 ; i < indexes->index_size ; i++) { + const time_t currt = indexes->index[i]->slots.common.timestamp; + if (currt > maxt) { + maxt = currt; + } + } + return maxt; + } + return (time_t) -1; +} + PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) { if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size) { @@ -659,7 +762,7 @@ PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) { return NULL; } -PT_Element PT_ElementNew() { +PT_Element PT_ElementNew(void) { PT_Element r = NULL; if ((r = calloc(sizeof(_PT_Element), 1)) == NULL) return NULL; @@ -690,6 +793,22 @@ static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags) /* New HTTrack cache (new.zip) format */ /* ------------------------------------------------------------ */ +#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \ + if ( (value != NULL) && (value)[0] != '\0') { \ + sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \ + (headersSize) += (int) strlen(headers + headersSize); \ + } \ +} while(0) +#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \ + if ( (value != 0) ) { \ + sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \ + (headersSize) += (int) strlen(headers + headersSize); \ + } \ +} while(0) +#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \ + sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \ + (headersSize) += (int) strlen(headers + headersSize); \ +} while(0) #define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \ if (line[0] != '\0' && strfield2(line, refline)) { \ strcpy(refvalue, value); \ @@ -821,7 +940,8 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag char location_default[HTS_URLMAXSIZE*2]; char previous_save[HTS_URLMAXSIZE*2]; char previous_save_[HTS_URLMAXSIZE*2]; - long int hash_pos; + char catbuff[CATBUFF_SIZE]; + intptr_t hash_pos; int hash_pos_return; PT_Element r = NULL; if (index == NULL || index->hash == NULL || index->zFile == NULL || url == NULL || *url == 0) @@ -835,7 +955,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag strcpy(r->location, ""); if (strncmp(url, "http://", 7) == 0) url += 7; - hash_pos_return = inthash_read(index->hash, url, (long int*)&hash_pos); + hash_pos_return = inthash_read(index->hash, url, &hash_pos); if (hash_pos_return) { uLong posInZip; @@ -888,6 +1008,16 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag //ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address //ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename + if (line[0] != '\0') { + int len = r->headers ? ((int) strlen(r->headers)) : 0; + int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 ); + r->headers = realloc(r->headers, len + nlen); + r->headers[len] = '\0'; + strcat(r->headers, line); + strcat(r->headers, ": "); + strcat(r->headers, value); + strcat(r->headers, "\r\n"); + } } } while(offset < readSizeHeader && !lineEof); totalHeader = offset; @@ -955,13 +1085,14 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag /* Read in memory from cache */ if (flags & FETCH_BODY) { if (strnotempty(previous_save)) { - FILE* fp = fopen(fconv(previous_save), "rb"); + FILE* fp = fopen(fconv(catbuff,previous_save), "rb"); if (fp != NULL) { r->adr = (char*) malloc(r->size + 4); if (r->adr != NULL) { if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) { + int last_errno = errno; r->statuscode=STATUSCODE_INVALID; - sprintf(r->msg,"Read error in cache disk data: %s", strerror(errno)); + sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno)); } } else { r->statuscode=STATUSCODE_INVALID; @@ -970,7 +1101,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag fclose(fp); } else { r->statuscode=STATUSCODE_INVALID; - sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(previous_save)); + sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(catbuff,previous_save)); } } else { r->statuscode=STATUSCODE_INVALID; @@ -982,7 +1113,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag if (flags & FETCH_BODY) { r->adr=(char*) malloc(r->size+1); if (r->adr!=NULL) { - if (unzReadCurrentFile(index->zFile, r->adr, r->size) != r->size) { // erreur + if (unzReadCurrentFile(index->zFile, r->adr, (unsigned int) r->size) != r->size) { // erreur free(r->adr); r->adr=NULL; r->statuscode=STATUSCODE_INVALID; @@ -1024,6 +1155,121 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag return r; } +static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element) { + zipFile zFileOut = (zipFile) arg; + char headers[8192]; + int headersSize; + zip_fileinfo fi; + int zErr; + const char *url_adr = ""; + const char *url_fil = ""; + + headers[0] = '\0'; + headersSize = 0; + + /* Fields */ + headers[0] = '\0'; + headersSize = 0; + /* */ + { + char* message; + if (strlen(element->msg) < 32) { + message = element->msg; + } else { + message = "(See X-StatusMessage)"; + } + /* 64 characters MAX for first line */ + sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', element->statuscode, element->msg); + } + headersSize += (int) strlen(headers + headersSize); + + /* Second line MUST ALWAYS be X-In-Cache */ + ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", 1); + ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", element->statuscode); + ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", element->msg); + ZIP_FIELD_INT(headers, headersSize, "X-Size", element->size); // size + ZIP_FIELD_STRING(headers, headersSize, "Content-Type", element->contenttype); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "X-Charset", element->charset); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", element->lastmodified); // last-modified + ZIP_FIELD_STRING(headers, headersSize, "Etag", element->etag); // Etag + ZIP_FIELD_STRING(headers, headersSize, "Location", element->location); // 'location' pour moved + ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", element->cdispo); // Content-disposition + ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address + ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename + ZIP_FIELD_STRING(headers, headersSize, "X-Save", ""); // Original save filename + + /* Time */ + memset(&fi, 0, sizeof(fi)); + if (element->lastmodified[0] != '\0') { + struct tm buffer; + struct tm* tm_s = convert_time_rfc822(&buffer, element->lastmodified); + if (tm_s) { + fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec; + fi.tmz_date.tm_min = (uInt) tm_s->tm_min; + fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour; + fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday; + fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon; + fi.tmz_date.tm_year = (uInt) tm_s->tm_year; + } + } + + /* Open file - NOTE: headers in "comment" */ + if ((zErr = zipOpenNewFileInZip(zFileOut, + url, + &fi, + /* + Store headers in realtime in the local file directory as extra field + In case of crash, we'll be able to recover the whole ZIP file by rescanning it + */ + headers, + (uInt) strlen(headers), + NULL, + 0, + NULL, /* comment */ + Z_DEFLATED, + Z_DEFAULT_COMPRESSION)) != Z_OK) + { + int zip_zipOpenNewFileInZip_failed = 0; + assertf(zip_zipOpenNewFileInZip_failed); + } + + /* Write data in cache */ + if (element->size > 0 && element->adr != NULL) { + if ((zErr = zipWriteInFileInZip(zFileOut, element->adr, (int) element->size)) != Z_OK) { + int zip_zipWriteInFileInZip_failed = 0; + assertf(zip_zipWriteInFileInZip_failed); + } + } + + /* Close */ + if ((zErr = zipCloseFileInZip(zFileOut)) != Z_OK) { + int zip_zipCloseFileInZip_failed = 0; + assertf(zip_zipCloseFileInZip_failed); + } + + /* Flush */ + if ((zErr = zipFlush(zFileOut)) != 0) { + int zip_zipFlush_failed = 0; + assertf(zip_zipFlush_failed); + } + + return 0; +} + +static int PT_SaveCache__New(PT_Indexes indexes, const char *filename) { + zipFile zFileOut = zipOpen(filename, 0); + if (zFileOut != NULL) { + int ret = PT_EnumCache(indexes, PT_SaveCache__New_Fun, (void *) zFileOut); + zipClose(zFileOut, "Created by HTTrack Website Copier/ProxyTrack "PROXYTRACK_VERSION); + zFileOut = NULL; + if (ret != 0) + (void) unlink(filename); + return ret; + } + return -1; +} + + /* ------------------------------------------------------------ */ /* Old HTTrack cache (dat/ndx) format */ @@ -1167,7 +1413,7 @@ static int PT_LoadCache__Old(PT_Index index_, const char *filename) { /* */ } else { // Vieille version du cache /* */ - // fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); + // HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: importing old cache format"LF); cache->version=0; // cache 1.0 strcpy(cache->lastmodified,firstline); } @@ -1257,7 +1503,7 @@ static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags) static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flags) { PT_Index__Old cache = (PT_Index__Old) &index_->slots.formatOld; - long int hash_pos; + intptr_t hash_pos; int hash_pos_return; char location_default[HTS_URLMAXSIZE*2]; char previous_save[HTS_URLMAXSIZE*2]; @@ -1276,7 +1522,7 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag strcpy(r->location, ""); if (strncmp(url, "http://", 7) == 0) url += 7; - hash_pos_return=inthash_read(cache->hash, url, (long int*)&hash_pos); + hash_pos_return=inthash_read(cache->hash, url, &hash_pos); if (hash_pos_return) { int pos = (int) hash_pos; /* simply */ @@ -1325,10 +1571,12 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag } else { char check[256]; unsigned long size_read; + unsigned long int size_; check[0]='\0'; // cache_rint(cache->dat,&r->statuscode); - cache_rLLint(cache->dat,&r->size); + cache_rLLint(cache->dat,&size_); + r->size = (size_t) size_; cache_rstr(cache->dat,r->msg); cache_rstr(cache->dat,r->contenttype); if (cache->version >= 3) @@ -1503,3 +1751,497 @@ static int PT_LookupCache__Old_u(PT_Index index_, const char* url) { return 0; } + +/* ------------------------------------------------------------ */ +/* Internet Archive Arc 1.0 (arc) format */ +/* Xavier Roche (roche@httrack.com) */ +/* Lars Clausen (lc@statsbiblioteket.dk) */ +/* ------------------------------------------------------------ */ + +#define ARC_SP ' ' + +static const char* getArcField(const char *line, int pos) { + int i; + for(i = 0 ; line[i] != '\0' && pos > 0 ; i++) { + if (line[i] == ARC_SP) + pos--; + } + if (pos == 0) + return &line[i]; + return NULL; +} + +static char* copyArcField(const char *line, int npos, char *dest, int destMax) { + const char *pos; + if ((pos = getArcField(line, npos)) != NULL) { + int i; + for(i = 0 ; pos[i] != '\0' && pos[i] != ARC_SP && ( --destMax ) > 0; i++) { + dest[i] = pos[i]; + } + dest[i] = 0; + return dest; + } + dest[0] = 0; + return NULL; +} + +static int getArcLength(const char *line) { + const char *pos; + if ((pos = getArcField(line, 9)) != NULL + || (pos = getArcField(line, 4)) != NULL + || (pos = getArcField(line, 2)) != NULL + ) { + int length; + if (sscanf(pos, "%d", &length) == 1) { + return length; + } + } + return -1; +} + +static int skipArcNl(FILE* file) { + if (fgetc(file) == 0x0a) { + return 0; + } + return -1; +} + +static int skipArcData(FILE* file, const char *line) { + int jump = getArcLength(line); + if (jump != -1) { + if (fseek(file, jump, SEEK_CUR) == 0 /* && skipArcNl(file) == 0 */) { + return 0; + } + } + return -1; +} + +static int getDigit(const char digit) { + return (int) ( digit - '0' ); +} + +static int getDigit2(const char * const pos) { + return getDigit(pos[0])*10 + getDigit(pos[1]); +} + +static int getDigit4(const char * const pos) { + return getDigit(pos[0])*1000 + getDigit(pos[1])*100 + getDigit(pos[2])*10 + getDigit(pos[3]); +} + +static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */ + time_t t = mktime(tm); + if (t != (time_t) -1 && t != (time_t) 0) { + /* BSD does not have static "timezone" declared */ +#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) + time_t now = time(NULL); + time_t timezone = - localtime(&now)->tm_gmtoff; +#endif + return (time_t) (t - timezone); + } + return (time_t) -1; +} + +static time_t getArcTimestamp(const char * const line) { + const char *pos; + if ((pos = getArcField(line, 2)) != NULL) { + int i; + /* date == YYYYMMDDhhmmss (Greenwich Mean Time) */ + /* example: 20050405154029 */ + for(i = 0 ; pos[i] >= '0' && pos[i] <= '9' ; i++); + if (i == 14) { + struct tm tm; + memset(&tm, 0, sizeof(tm)); + tm.tm_year = getDigit4(pos + 0) - 1900; /* current year minus 1900 */ + tm.tm_mon = getDigit2(pos + 4) - 1; /* 0 – 11 */ + tm.tm_mday = getDigit2(pos + 6); /* 1 – 31 */ + tm.tm_hour = getDigit2(pos + 8); /* 0 – 23 */ + tm.tm_min = getDigit2(pos + 10); /* 0 – 59 */ + tm.tm_sec = getDigit2(pos + 12); /* 0 – 59 */ + tm.tm_isdst = 0; + return getGMT(&tm); + } + } + return (time_t) -1; +} + +static int readArcURLRecord(PT_Index__Arc index) { + index->line[0] = '\0'; + if (linput(index->file, index->line, sizeof(index->line) - 1)) { + return 0; + } + return -1; +} + +#define str_begins(str, sstr) ( strncmp(str, sstr, sizeof(sstr) - 1) == 0 ) +static int PT_CompatibleScheme(const char *url) { + return (str_begins(url, "http:") + || str_begins(url, "https:") + || str_begins(url, "ftp:") + || str_begins(url, "file:")); +} + +int PT_LoadCache__Arc(PT_Index index_, const char *filename) { + if (index_ != NULL && filename != NULL) { + PT_Index__Arc index = &index_->slots.formatArc; + index->timestamp = file_timestamp(filename); + MutexInit(&index->fileLock); + index->file = fopen(filename, "rb"); + + // Opened ? + if (index->file != NULL) { + inthash hashtable = index->hash; + if (readArcURLRecord(index) == 0) { + int entries = 0; + /* Read first line */ + if (strncmp(index->line, "filedesc://", sizeof("filedesc://") - 1) != 0) { + fprintf(stderr, "Unexpected bad signature #%s"LF, index->line); + fclose(index->file); + index->file = NULL; + return 0; + } + /* Timestamp */ + index->timestamp = getArcTimestamp(index->line); + /* Skip first entry */ + if (skipArcData(index->file, index->line) != 0 || skipArcNl(index->file) != 0) { + fprintf(stderr, "Unexpected bad data offset size first entry"LF); + fclose(index->file); + index->file = NULL; + return 0; + } + /* Read all meta-entries (not data) */ + while(!feof(index->file)) { + unsigned long int fpos = ftell(index->file); + if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) { + int length = getArcLength(index->line); + if (length >= 0) { + const char * filenameIndex = copyArcField(index->line, 0, + index->filenameIndexBuff, sizeof(index->filenameIndexBuff) - 1); /* can not be NULL */ + if (strncmp(filenameIndex, "http://", 7) == 0) { + filenameIndex += 7; + } + if (*filenameIndex != 0) { + if (skipArcData(index->file, index->line) != 0) { + fprintf(stderr, "Corrupted cache data entry #%d (truncated file?), aborting read"LF, (int)entries); + } + /*fprintf(stdout, "adding %s [%d]\n", filenameIndex, (int)fpos);*/ + if (PT_CompatibleScheme(index->filenameIndexBuff)) { + inthash_add(hashtable, filenameIndex, fpos); /* position of meta-data */ + entries++; + } + } else { + fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries); + } + } else { + fprintf(stderr, "Corrupted cache meta entry #%d, aborting read"LF, (int)entries); + break ; + } + } else { + break ; + } + } + + /* OK */ + return 1; + } else { + fprintf(stderr, "Bad file (empty ?)"LF); + } + } else { + fprintf(stderr, "Unable to open file"LF); + index = NULL; + } + } else { + fprintf(stderr, "Bad arguments"LF); + } + return 0; +} + +#define HTTP_READFIELD_STRING(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + strcpy(refvalue, value); \ + line[0] = '\0'; \ + } \ +} while(0) +#define HTTP_READFIELD_INT(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + int intval = 0; \ + sscanf(value, "%d", &intval); \ + (refvalue) = intval; \ + line[0] = '\0'; \ + } \ +} while(0) + +static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags) { + PT_Element retCode; + MutexLock(&index->slots.formatArc.fileLock); + { + retCode = PT_ReadCache__Arc_u(index, url, flags); + } + MutexUnlock(&index->slots.formatArc.fileLock); + return retCode; +} + +static PT_Element PT_ReadCache__Arc_u(PT_Index index_, const char* url, int flags) +{ + PT_Index__Arc index = (PT_Index__Arc) &index_->slots.formatArc; + char location_default[HTS_URLMAXSIZE*2]; + intptr_t hash_pos; + int hash_pos_return; + PT_Element r = NULL; + if (index == NULL || index->hash == NULL || url == NULL || *url == 0) + return NULL; + if ((r = PT_ElementNew()) == NULL) + return NULL; + location_default[0] = '\0'; + memset(r, 0, sizeof(_PT_Element)); + r->location = location_default; + strcpy(r->location, ""); + if (strncmp(url, "http://", 7) == 0) + url += 7; + hash_pos_return = inthash_read(index->hash, url, &hash_pos); + + if (hash_pos_return) { + if (fseek(index->file, (long)hash_pos, SEEK_SET) == 0) { + if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) { + long int fposMeta = ftell(index->file); + int dataLength = getArcLength(index->line); + const char *pos; + + /* Read HTTP headers */ + /* HTTP/1.1 404 Not Found */ + if (linput(index->file, index->line, sizeof(index->line) - 1)) { + if ((pos = getArcField(index->line, 1)) != NULL) { + if (sscanf(pos, "%d", &r->statuscode) != 1) { + r->statuscode = STATUSCODE_INVALID; + } + } + if ((pos = getArcField(index->line, 2)) != NULL) { + r->msg[0] = '\0'; + strncat(r->msg, pos, sizeof(pos) - 1); + } + while (linput(index->file, index->line, sizeof(index->line) - 1) && index->line[0] != '\0') { + char* const line = index->line; + char* value = strchr(line, ':'); + if (value != NULL) { + *value = '\0'; + for( value++ ; *value == ' ' || *value == '\t' ; value++); + HTTP_READFIELD_INT(line, value, "Content-Length", r->size); // size + HTTP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype + HTTP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified + HTTP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag + HTTP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved + HTTP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition + if (line[0] != '\0') { + int len = r->headers ? ((int) strlen(r->headers)) : 0; + int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 ); + r->headers = realloc(r->headers, len + nlen); + r->headers[len] = '\0'; + strcat(r->headers, line); + strcat(r->headers, ": "); + strcat(r->headers, value); + strcat(r->headers, "\r\n"); + } + } + } + + /* FIXME charset */ + if (r->contenttype[0] != '\0') { + char *pos = strchr(r->contenttype, ';'); + if (pos != NULL) { + /*char *chs = strchr(pos, "charset=");*/ + /*HTTP_READFIELD_STRING(line, value, "X-Charset", r->charset);*/ + *pos = 0; + if ((pos = strchr(r->contenttype, ' ')) != NULL) { + *pos = 0; + } + } + } + + /* Read data */ + if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */ + if (flags & FETCH_BODY) { + long int fposCurrent = ftell(index->file); + long int metaSize = fposCurrent - fposMeta; + long int fetchSize = (long int) r->size; + if (fetchSize <= 0) { + fetchSize = dataLength - metaSize; + } else if (fetchSize > dataLength - metaSize) { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Truncated Data"); + } + r->size = 0; + if (r->statuscode != STATUSCODE_INVALID) { + r->adr = (char*) malloc(fetchSize); + if (r->adr != NULL) { + if (fetchSize > 0 && ( r->size = (int) fread(r->adr, 1, fetchSize, index->file) ) != fetchSize) { + int last_errno = errno; + r->statuscode=STATUSCODE_INVALID; + sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno)); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Read error (memory exhausted) from cache"); + } + } + } + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Read Header Error"); + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Read Header Error"); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Seek Error"); + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"File Cache Entry Not Found"); + } + if (r->location[0] != '\0') { + r->location = strdup(r->location); + } else { + r->location = NULL; + } + return r; +} + +static int PT_LookupCache__Arc(PT_Index index, const char* url) { + int retCode; + MutexLock(&index->slots.formatArc.fileLock); + { + retCode = PT_LookupCache__Arc_u(index, url); + } + MutexUnlock(&index->slots.formatArc.fileLock); + return retCode; +} + +static int PT_LookupCache__Arc_u(PT_Index index_, const char* url) { + if (index_ != NULL) { + PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew; + if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0) + return 0; + if (strncmp(url, "http://", 7) == 0) + url += 7; + if (inthash_read(cache->hash, url, NULL)) + return 1; + } + return 0; +} + +typedef struct PT_SaveCache__Arc_t { + PT_Indexes indexes; + FILE *fp; + time_t t; + char filename[64]; + struct tm buff; + char headers[8192]; + char md5[32 + 2]; +} PT_SaveCache__Arc_t; + +static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element) { + PT_SaveCache__Arc_t *st = (PT_SaveCache__Arc_t*) arg; + FILE * const fp = st->fp; + struct tm* tm = convert_time_rfc822(&st->buff, element->lastmodified); + int size_headers; + + sprintf(st->headers, + "HTTP/1.0 %d %s" "\r\n" + "X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n" + "Content-type: %s%s%s%s" "\r\n" + "Last-modified: %s" "\r\n" + "Content-length: %d" "\r\n" + , + element->statuscode, element->msg, + /**/ + element->contenttype, + (element->charset[0] ? "; charset=\"" : ""), + (element->charset[0] ? element->charset : ""), + (element->charset[0] ? "\"" : ""), + /**/ + element->lastmodified, + (int) element->size + ); + if (element->location != NULL && element->location[0] != '\0') { + sprintf(st->headers + strlen(st->headers), "Location: %s" "\r\n", element->location); + } + if (element->headers != NULL) { + if ( strlen(element->headers) < sizeof(st->headers) - strlen(element->headers) - 1 ) { + strcat(st->headers, element->headers); + } + } + strcat(st->headers, "\r\n"); + size_headers = (int) strlen(st->headers); + + /* doc == <nl><URL-record><nl><network_doc> */ + + /* Format: URL IP date mime result checksum location offset filename length */ + if (element->adr != NULL) { + domd5mem(element->adr, element->size, st->md5, 1); + } else { + strcpy(st->md5, "-"); + } + fprintf(fp, + /* nl */ + "\n" + /* URL-record */ + "%s%s %s %04d%02d%02d%02d%02d%02d %s %d %s %s %ld %s %ld" + /* nl */ + "\n", + /* args */ + ( link_has_authority(url) ? "" : "http://" ), url, + "0.0.0.0", + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, + element->contenttype, + element->statuscode, + st->md5, ( element->location ? element->location : "-" ), + (long int)ftell(fp), st->filename, + (long int)( size_headers + element->size )); + /* network_doc */ + if (fwrite(st->headers, 1, size_headers, fp) != size_headers + || ( element->size > 0 && fwrite(element->adr, 1, element->size, fp) != element->size ) + ) { + return 1; /* Error */ + } + + return 0; +} + +static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename) { + FILE *fp = fopen(filename, "wb"); + if (fp != NULL) { + PT_SaveCache__Arc_t st; + int ret; + time_t t = PT_GetTimeIndex(indexes); + struct tm tm = PT_GetTime(t); + + /* version-2-block == + filedesc://<path><sp><ip_address><sp><date><sp>text/plain<sp>200<sp>-<sp>-<sp>0<sp><filename><sp><length><nl> + 2<sp><reserved><sp><origin-code><nl> + URL<sp>IP-address<sp>Archive-date<sp>Content-type<sp>Result-code<sp>Checksum<sp>Location<sp> Offset<sp>Filename<sp>Archive-length<nl> + <nl> */ + const char* prefix = + "2 0 HTTrack Website Copier" "\n" + "URL IP-address Archive-Date Content-Type Result-code Checksum Location Offset Filename Archive-length" "\n" "\n"; + sprintf(st.filename, "httrack_%d.arc", (int) t); + fprintf(fp, "filedesc://%s 0.0.0.0 %04d%02d%02d%02d%02d%02d text/plain 200 - - 0 %s %d" "\n" + "%s", + st.filename, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, + st.filename, (int)strlen(prefix), prefix); + st.fp = fp; + st.indexes = indexes; + st.t = t; + ret = PT_EnumCache(indexes, PT_SaveCache__Arc_Fun, (void *)&st); + fclose(fp); + if (ret != 0) + (void) unlink(filename); + return ret; + } + return -1; +} diff --git a/src/proxy/store.h b/src/proxy/store.h index 805bc20..5d4a76e 100644 --- a/src/proxy/store.h +++ b/src/proxy/store.h @@ -28,6 +28,13 @@ Please visit our Website: http://www.httrack.com #ifndef WEBHTTRACK_PROXYTRACK_STORE #define WEBHTTRACK_PROXYTRACK_STORE +/* Includes */ +#ifndef _WIN32 +#include <pthread.h> +#else +#include "windows.h" +#endif + /* Proxy */ typedef struct _PT_Index _PT_Index; @@ -43,12 +50,12 @@ typedef struct _PT_CacheItem _PT_CacheItem; typedef struct _PT_CacheItem *PT_CacheItem; typedef struct _PT_Element { - int indexId; // index identifier, if suitable (!= -1) + int indexId; // index identifier, if suitable (!= -1) // int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) char* adr; // adresse du bloc de mémoire, NULL=vide - char* headers; // adresse des en têtes si présents - unsigned long int size; // taille fichier + char* headers; // adresse des en têtes si présents (RFC822 format) + size_t size; // taille fichier char msg[1024]; // error message ("\0"=undefined) char contenttype[64]; // content-type ("text/html" par exemple) char charset[64]; // charset ("iso-8859-1" par exemple) @@ -85,11 +92,14 @@ int PT_AddIndex(PT_Indexes index, const char *path); int PT_RemoveIndex(PT_Indexes index, int indexId); int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex); PT_Index PT_GetIndex(PT_Indexes indexes, int indexId); +time_t PT_GetTimeIndex(PT_Indexes indexes); /* Indexes list */ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes); char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree); void PT_Enumerate_Delete(char ***plist); +int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg); +int PT_SaveCache(PT_Indexes indexes, const char *filename); /* Index */ PT_Index PT_LoadCache(const char *filename); |