summaryrefslogtreecommitdiff
path: root/src/proxy
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:59:03 +0000
committerXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:59:03 +0000
commit660b569b0980fc8f71b03ed666dd02eec8388b4c (patch)
tree8ad02b5f0bebdd4cd1d2ba01005d6f3f71a0a7fb /src/proxy
parent64cc4a88da8887ef1f7f4d90be0158d2cc76222d (diff)
httrack 3.41.2
Diffstat (limited to 'src/proxy')
-rw-r--r--src/proxy/changelog.txt4
-rw-r--r--src/proxy/main.c29
-rwxr-xr-xsrc/proxy/proxystrings.h114
-rw-r--r--src/proxy/proxytrack.c202
-rw-r--r--src/proxy/proxytrack.h172
-rw-r--r--src/proxy/store.c822
-rw-r--r--src/proxy/store.h16
7 files changed, 1086 insertions, 273 deletions
diff --git a/src/proxy/changelog.txt b/src/proxy/changelog.txt
index f5ae48b..b50a8d6 100644
--- a/src/proxy/changelog.txt
+++ b/src/proxy/changelog.txt
@@ -1,3 +1,7 @@
+0.5 - May 8 2006
+- added ARC (Internet Archive 1.0) file format
+- first index output routines (proxytrack --convert ..)
+
0.4 - Sept 18 2005
- implemented very limited WebDAV (RFC2518) primitives
- index enumeration fixes
diff --git a/src/proxy/main.c b/src/proxy/main.c
index e48b51d..bf35377 100644
--- a/src/proxy/main.c
+++ b/src/proxy/main.c
@@ -64,7 +64,7 @@ int main(int argc, char* argv[])
{
int i;
int ret = 0;
- int proxyPort, icpPort;
+ int proxyPort = 0, icpPort = 0;
char proxyAddr[256 + 1], icpAddr[256 + 1];
PT_Indexes index;
@@ -98,10 +98,21 @@ int main(int argc, char* argv[])
printf("*** This version is a development release ***\n");
printf("\n");
if (argc < 3
- || !scanHostPort(argv[1], proxyAddr, &proxyPort)
- || !scanHostPort(argv[2], icpAddr, &icpPort))
+ || (
+ strcmp(argv[1], "--convert") != 0
+ &&
+ (
+ !scanHostPort(argv[1], proxyAddr, &proxyPort)
+ || !scanHostPort(argv[2], icpAddr, &icpPort)
+ )
+ )
+ )
{
- fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | --list <file-list> ) ..]\n", argv[0]);
+ fprintf(stderr, "proxy mode:\n");
+ fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | <archive.arc path> | --list <file-list> ) ..]\n", argv[0]);
+ fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]);
+ fprintf(stderr, "convert mode:\n");
+ fprintf(stderr, "usage: %s --convert <archive-output-path> [ ( <new.zip path> | <new.ndx path> | <archive.arc path> | --list <file-list> ) ..]\n", argv[0]);
fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]);
return 1;
}
@@ -150,7 +161,15 @@ int main(int argc, char* argv[])
#endif
/* Go */
- ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index);
+ if (strcmp(argv[1], "--convert") != 0) {
+ ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index);
+ } else {
+ if ((ret = PT_SaveCache(index, argv[2])) == 0) {
+ fprintf(stderr, "processed: '%s'\n", argv[2]);
+ } else {
+ fprintf(stderr, "error: could not save '%s'\n", argv[2]);
+ }
+ }
/* Wipe */
PT_Delete(index);
diff --git a/src/proxy/proxystrings.h b/src/proxy/proxystrings.h
index 87bcf34..9ee8fa7 100755
--- a/src/proxy/proxystrings.h
+++ b/src/proxy/proxystrings.h
@@ -28,77 +28,11 @@ Please visit our Website: http://www.httrack.com
// Strings a bit safer than static buffers
-#ifndef HTS_STRINGS_DEFSTATIC
-#define HTS_STRINGS_DEFSTATIC
-
-typedef struct String {
- char* buff;
- int len;
- int capa;
-} String;
-
-#define STRING_EMPTY {NULL, 0, 0}
-#define STRING_BLK_SIZE 256
-#define StringBuff(blk) ((blk).buff)
-#define StringLength(blk) ((blk).len)
-#define StringCapacity(blk) ((blk).capa)
-#define StringRoom(blk, size) do { \
- if ((blk).len + (int)(size) + 1 > (blk).capa) { \
- (blk).capa = ((blk).len + (size) + 1) * 2; \
- (blk).buff = (char*) realloc((blk).buff, (blk).capa); \
- } \
-} while(0)
-#define StringBuffN(blk, size) StringBuffN_(&(blk), size)
-static char* StringBuffN_(String* blk, int size) {
- StringRoom(*blk, (blk->len) + size);
- return StringBuff(*blk);
-}
-#define StringClear(blk) do { \
- StringRoom(blk, 0); \
- (blk).buff[0] = '\0'; \
- (blk).len = 0; \
-} while(0)
-#define StringFree(blk) do { \
- if ((blk).buff != NULL) { \
- free((blk).buff); \
- (blk).buff = NULL; \
- } \
- (blk).capa = 0; \
- (blk).len = 0; \
-} while(0)
-#define StringMemcat(blk, str, size) do { \
- StringRoom(blk, size); \
- if ((int)(size) > 0) { \
- memcpy((blk).buff + (blk).len, (str), (size)); \
- (blk).len += (size); \
- } \
- *((blk).buff + (blk).len) = '\0'; \
-} while(0)
-#define StringAddchar(blk, c) do { \
- char __c = (c); \
- StringMemcat(blk, &__c, 1); \
-} while(0)
-static void* StringAcquire(String* blk) {
- void* buff = blk->buff;
- blk->buff = NULL;
- blk->capa = 0;
- blk->len = 0;
- return buff;
-}
-static StringAttach(String* blk, char** str) {
- StringFree(*blk);
- if (str != NULL && *str != NULL) {
- blk->buff = *str;
- blk->capa = (int)strlen(blk->buff);
- blk->len = blk->capa;
- *str = NULL;
- }
-}
-#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? (int)strlen(str) : 0)
-#define StringStrcpy(blk, str) do { \
- StringClear(blk); \
- StringStrcat(blk, str); \
-} while(0)
+#ifndef HTS_PROXYSTRINGS_DEFSTATIC
+#define HTS_PROXYSTRINGS_DEFSTATIC
+
+#include "htsstrings.h"
+
/* Tools */
@@ -138,16 +72,46 @@ static void escapexml(const char* s, String* tempo) {
int i;
for (i=0 ; s[i] != '\0' ; i++) {
if (s[i] == '&')
- StringStrcat(*tempo, "&amp;");
+ StringCat(*tempo, "&amp;");
else if (s[i] == '<')
- StringStrcat(*tempo, "&lt;");
+ StringCat(*tempo, "&lt;");
else if (s[i] == '>')
- StringStrcat(*tempo, "&gt;");
+ StringCat(*tempo, "&gt;");
else if (s[i] == '\"')
- StringStrcat(*tempo, "&quot;");
+ StringCat(*tempo, "&quot;");
else
StringAddchar(*tempo, s[i]);
}
}
+static char* concat(char *catbuff,const char* a,const char* b) {
+ if (a != NULL && a[0] != '\0') {
+ strcpy(catbuff, a);
+ } else {
+ catbuff[0] = '\0';
+ }
+ if (b != NULL && b[0] != '\0') {
+ strcat(catbuff, b);
+ }
+ return catbuff;
+}
+
+static char* __fconv(char* a) {
+#ifdef WIN32
+ int i;
+ for(i = 0 ; a[i] != 0 ; i++)
+ if (a[i] == '/') // Unix-to-DOS style
+ a[i] = '\\';
+#endif
+ return a;
+}
+
+static char* fconcat(char *catbuff, const char* a, const char* b) {
+ return __fconv(concat(catbuff,a,b));
+}
+
+static char* fconv(char *catbuff, const char* a) {
+ return __fconv(concat(catbuff,a,""));
+}
+
#endif
diff --git a/src/proxy/proxytrack.c b/src/proxy/proxytrack.c
index 7604804..195c1a5 100644
--- a/src/proxy/proxytrack.c
+++ b/src/proxy/proxytrack.c
@@ -119,13 +119,10 @@ Remark: If no cache newer than the added one is found, all entries can be added
#include <string.h>
#include <time.h>
#include <fcntl.h>
-#if HTS_WIN
+#ifdef _WIN32
#else
#include <arpa/inet.h>
#endif
-#ifndef _WIN32
-#include <signal.h>
-#endif
/* END specific definitions */
/* String */
@@ -137,12 +134,6 @@ Remark: If no cache newer than the added one is found, all entries can be added
/* définitions globales */
#include "htsglobal.h"
-/* htslib */
-/*#include "htslib.h"*/
-
-/* HTTrack Website Copier Library */
-#include "httrack-library.h"
-
/* htsweb */
#include "htsinthash.h"
@@ -228,27 +219,6 @@ static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) {
return -1;
}
-static void unescapeini(char* s, String* tempo) {
- int i;
- char lastc=0;
- for (i=0;i<(int) strlen(s);i++) {
- if (s[i]=='%' && s[i+1]=='%') {
- i++;
- StringAddchar(*tempo, lastc = '%');
- } else if (s[i]=='%') {
- char hc;
- i++;
- hc = (char) ehex(s+i);
- if (!is_retorsep(hc) || !is_retorsep(lastc)) {
- StringAddchar(*tempo, lastc = (char) hc);
- }
- i++; // sauter 2 caractères finalement
- }
- else
- StringAddchar(*tempo, lastc = s[i]);
- }
-}
-
static int gethost(const char* hostname, SOCaddr *server, size_t server_size) {
if (hostname != NULL && *hostname != '\0') {
#if HTS_INET6==0
@@ -257,8 +227,8 @@ static int gethost(const char* hostname, SOCaddr *server, size_t server_size) {
*/
t_hostent* hp=gethostbyname(hostname);
if (hp!=NULL) {
- if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) {
- SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ if (hp->h_length) {
+ SOCaddr_copyaddr(*server, server_size, hp->h_addr_list[0], hp->h_length);
return 1;
}
}
@@ -325,7 +295,7 @@ static T_SOC smallserver_init(const char* adr, int port, int family) {
SOCaddr_initany(server, server_size);
if (gethost(adr, &server, server_size)) { // host name
T_SOC soc = INVALID_SOCKET;
- if ( (soc = socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) {
+ if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) {
SOCaddr_initport(server, port);
if ( bind(soc,(struct sockaddr*) &server, (int)server_size) == 0 ) {
if (family != SOCK_STREAM
@@ -379,7 +349,8 @@ int proxytrack_main(char* proxyAddr, int proxyPort,
fflush(stderr);
//
if (!proxytrack_start(index, soc, socICP)) {
- fprintf(stderr, "Unable to create the server: %s\n", strerror(errno));
+ int last_errno = errno;
+ fprintf(stderr, "Unable to create the server: %s\n", strerror(last_errno));
#ifdef _WIN32
closesocket(soc);
#else
@@ -391,7 +362,8 @@ int proxytrack_main(char* proxyAddr, int proxyPort,
returncode = 0;
}
} else {
- fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(errno));
+ int last_errno = errno;
+ fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(last_errno));
returncode = 1;
}
printf("EXITED\n");
@@ -452,7 +424,7 @@ static const char* GetHttpMessage(int statuscode) {
#ifndef NO_WEBDAV
static void proxytrack_add_DAV_Item(String *item, String *buff,
const char* filename,
- unsigned long int size,
+ size_t size,
time_t timestamp,
const char* mime,
int isDir,
@@ -485,7 +457,7 @@ static void proxytrack_add_DAV_Item(String *item, String *buff,
}
StringRoom(*item, 1024);
- sprintf(StringBuff(*item),
+ sprintf(StringBuffRW(*item),
"<response xmlns=\"DAV:\">\r\n"
"<href>/webdav%s%s</href>\r\n"
"<propstat>\r\n"
@@ -644,17 +616,17 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char
StringClear(buff);
/* Canonize URL */
- StringStrcpy(url, file + ((file[0] == '/') ? 1 : 0));
+ StringCopy(url, file + ((file[0] == '/') ? 1 : 0));
if (StringLength(url) > 0) {
if (StringBuff(url)[StringLength(url) - 1] == '/') {
- StringBuff(url)[StringLength(url) - 1] = '\0';
+ StringBuffRW(url)[StringLength(url) - 1] = '\0';
StringLength(url)--;
}
}
/* Form response */
StringRoom(response, 1024);
- sprintf(StringBuff(response),
+ sprintf(StringBuffRW(response),
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n"
"<multistatus xmlns=\"DAV:\">\r\n");
StringLength(response) = (int) strlen(StringBuff(response));
@@ -683,22 +655,22 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char
/* Item URL */
StringRoom(itemUrl, thisUrlLen + prefixLen + sizeof("/webdav/") + 1);
StringClear(itemUrl);
- sprintf(StringBuff(itemUrl), "/%s/%s", prefix, thisUrl);
+ sprintf(StringBuffRW(itemUrl), "/%s/%s", prefix, thisUrl);
if (!thisIsDir)
StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl));
else
StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)) - 1;
- StringBuff(itemUrl)[StringLength(itemUrl)] = '\0';
+ StringBuffRW(itemUrl)[StringLength(itemUrl)] = '\0';
if (thisIsDir == isDir) {
- unsigned long size = 0;
+ size_t size = 0;
time_t timestamp = (time_t) 0;
PT_Element file = NULL;
/* Item stats */
if (!isDir) {
file = PT_ReadIndex(indexes, StringBuff(itemUrl) + 1, FETCH_HEADERS);
- if (file != NULL && file->statuscode == 200 ) {
+ if (file != NULL && file->statuscode == HTTP_OK ) {
size = file->size;
if (file->lastmodified) {
timestamp = get_time_rfc822(file->lastmodified);
@@ -735,7 +707,7 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char
} /* Depth > 0 */
/* End of responses */
- StringStrcat(response,
+ StringCat(response,
"</multistatus>\r\n"
);
@@ -767,7 +739,7 @@ static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char *
int i, isDir;
String html = STRING_EMPTY;
StringClear(html);
- StringStrcat(html,
+ StringCat(html,
"<html>"
PROXYTRACK_COMMENT_HEADER
DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
@@ -787,28 +759,28 @@ static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char *
int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0;
if (thisIsDir == isDir) {
if (isDir)
- StringStrcat(html, "<tt>[DIR] ");
+ StringCat(html, "<tt>[DIR] ");
else
- StringStrcat(html, "<tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
- StringStrcat(html, "<a href=\"");
+ StringCat(html, "<tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
+ StringCat(html, "<a href=\"");
if (isDir) {
- StringStrcat(html, "http://proxytrack/");
+ StringCat(html, "http://proxytrack/");
}
- StringStrcat(html, url);
- StringStrcat(html, list[i]);
- StringStrcat(html, "\">");
- StringStrcat(html, list[i]);
- StringStrcat(html, "</a></tt><br />");
+ StringCat(html, url);
+ StringCat(html, list[i]);
+ StringCat(html, "\">");
+ StringCat(html, list[i]);
+ StringCat(html, "</a></tt><br />");
}
}
}
- StringStrcat(html,
+ StringCat(html,
"</body>"
"</html>");
PT_Enumerate_Delete(&list);
elt->size = StringLength(html);
elt->adr = StringAcquire(&html);
- elt->statuscode = 200;
+ elt->statuscode = HTTP_OK;
strcpy(elt->charset, "iso-8859-1");
strcpy(elt->contenttype, "text/html");
strcpy(elt->msg, "OK");
@@ -843,10 +815,10 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
#endif
StringRoom(localhost, 256);
- if (gethostname(StringBuff(localhost), StringCapacity(localhost) - 1) == 0) {
+ if (gethostname(StringBuffRW(localhost), (int) StringCapacity(localhost) - 1) == 0) {
StringLength(localhost) = (int) strlen(StringBuff(localhost));
} else {
- StringStrcpy(localhost, "localhost");
+ StringCopy(localhost, "localhost");
}
#ifdef _DEBUG
@@ -855,7 +827,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
if (buffer == NULL || line == NULL || line1 == NULL) {
CRITICAL("proxytrack_process_HTTP:memory exhausted");
-#if HTS_WIN
+#ifdef _WIN32
closesocket(soc_c);
#else
close(soc_c);
@@ -918,7 +890,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
char* chost = line + p;
if (*chost == ' ')
chost++;
- StringStrcpy(host, chost);
+ StringCopy(host, chost);
}
#ifndef NO_WEBDAV
else if ((p = strfield(line, "Depth: "))) {
@@ -935,8 +907,8 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
if (length > 0) {
if (length < 32768) {
StringRoom(davRequest, length + 1);
- if (recv(soc_c, StringBuff(davRequest), length, 0) == length) {
- StringBuff(davRequest)[length] = 0;
+ if (recv(soc_c, StringBuffRW(davRequest), length, 0) == length) {
+ StringBuffRW(davRequest)[length] = 0;
} else {
msgCode = 500;
msgError = "Posted Data Read Error";
@@ -970,9 +942,9 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
else if (strcasecmp(command, "options") == 0) {
const char * options = "GET, HEAD, OPTIONS, POST, PROPFIND, TRACE"
", MKCOL, DELETE, PUT"; /* Not supported */
- msgCode = 200;
+ msgCode = HTTP_OK;
StringRoom(headers, 8192);
- sprintf(StringBuff(headers),
+ sprintf(StringBuffRW(headers),
"HTTP/1.1 %d %s\r\n"
"DAV: 1, 2\r\n"
"MS-Author-Via: DAV\r\n"
@@ -1020,11 +992,10 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
/* Post-process request */
if (link_has_authority(surl)) {
- const unsigned int prefixLen = sizeof("http://proxytrack/") - 1;
- if (strncasecmp(surl, "http://proxytrack/", prefixLen) == 0) {
+ if (strncasecmp(surl, "http://proxytrack/", sizeof("http://proxytrack/") - 1) == 0) {
directHit = 1; /* Another direct hit hack */
}
- StringStrcpy(url, surl);
+ StringCopy(url, surl);
} else {
if (StringLength(host) > 0) {
/* Direct hit */
@@ -1046,15 +1017,23 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
}
/* Direct hit */
directHit = 1;
- StringStrcpy(url, "");
+ StringCopy(url, "");
+ if (!link_has_authority(toHit))
+ StringCat(url, "http://");
+ StringCat(url, toHit);
+ } else if (strncasecmp(surl, "/proxytrack/", sizeof("/proxytrack/") - 1) == 0) {
+ const char * toHit = surl + sizeof("/proxytrack/") - 1;
+ /* Direct hit */
+ directHit = 1;
+ StringCopy(url, "");
if (!link_has_authority(toHit))
- StringStrcat(url, "http://");
- StringStrcat(url, toHit);
+ StringCat(url, "http://");
+ StringCat(url, toHit);
} else {
/* Transparent proxy */
- StringStrcpy(url, "http://");
- StringStrcat(url, StringBuff(host));
- StringStrcat(url, surl);
+ StringCopy(url, "http://");
+ StringCat(url, StringBuff(host));
+ StringCat(url, surl);
}
} else {
msgCode = 500;
@@ -1073,7 +1052,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
if ((element = proxytrack_process_DAV_Request(indexes, StringBuff(url), davDepth)) != NULL) {
msgCode = element->statuscode;
StringRoom(davHeaders, 1024);
- sprintf(StringBuff(davHeaders),
+ sprintf(StringBuffRW(davHeaders),
"DAV: 1, 2\r\n"
"MS-Author-Via: DAV\r\n"
"Cache-Control: private\r\n");
@@ -1100,7 +1079,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
if (element != NULL) {
msgCode = element->statuscode;
StringRoom(headers, 8192);
- sprintf(StringBuff(headers),
+ sprintf(StringBuffRW(headers),
"HTTP/1.1 %d %s\r\n"
#ifndef NO_WEBDAV
"%s"
@@ -1138,12 +1117,12 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
} else {
/* No query string, no ending / : check the the <url>/ page */
if (StringLength(url) > 0 && StringBuff(url)[StringLength(url) - 1] != '/' && strchr(StringBuff(url), '?') == NULL) {
- StringStrcpy(urlRedirect, StringBuff(url));
- StringStrcat(urlRedirect, "/");
+ StringCopy(urlRedirect, StringBuff(url));
+ StringCat(urlRedirect, "/");
if (PT_LookupIndex(indexes, StringBuff(urlRedirect))) {
msgCode = 301; /* Moved Permanently */
StringRoom(headers, 8192);
- sprintf(StringBuff(headers),
+ sprintf(StringBuffRW(headers),
"HTTP/1.1 %d %s\r\n"
"Content-Type: text/html\r\n"
"Location: %s\r\n",
@@ -1155,7 +1134,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
StringLength(headers) = (int) strlen(StringBuff(headers));
/* */
StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES));
- sprintf(StringBuff(output),
+ sprintf(StringBuffRW(output),
"<html>"
PROXYTRACK_COMMENT_HEADER
DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
@@ -1195,14 +1174,14 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
msgError = GetHttpMessage(msgCode);
}
StringRoom(headers, 256);
- sprintf(StringBuff(headers),
+ sprintf(StringBuffRW(headers),
"HTTP/1.1 %d %s\r\n"
"Content-type: text/html\r\n",
msgCode,
msgError);
StringLength(headers) = (int) strlen(StringBuff(headers));
StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES));
- sprintf(StringBuff(output),
+ sprintf(StringBuffRW(output),
"<html>"
PROXYTRACK_COMMENT_HEADER
DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
@@ -1225,7 +1204,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
}
{
char tmp[20 + 1]; /* 2^64 = 18446744073709551616 */
- unsigned int dataSize = 0;
+ size_t dataSize = 0;
if (!headRequest) {
dataSize = StringLength(output);
if (dataSize == 0 && element != NULL) {
@@ -1233,30 +1212,30 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
}
}
sprintf(tmp, "%d", (int) dataSize);
- StringStrcat(headers, "Content-length: ");
- StringStrcat(headers, tmp);
- StringStrcat(headers, "\r\n");
+ StringCat(headers, "Content-length: ");
+ StringCat(headers, tmp);
+ StringCat(headers, "\r\n");
}
if (keepAlive) {
- StringStrcat(headers,
+ StringCat(headers,
"Connection: Keep-Alive\r\n"
"Proxy-Connection: Keep-Alive\r\n");
} else {
- StringStrcat(headers,
+ StringCat(headers,
"Connection: Close\r\n"
"Proxy-Connection: Close\r\n");
}
if (msgCode != 500)
- StringStrcat(headers, "X-Cache: HIT from ");
+ StringCat(headers, "X-Cache: HIT from ");
else
- StringStrcat(headers, "X-Cache: MISS from ");
- StringStrcat(headers, StringBuff(localhost));
- StringStrcat(headers, "\r\n");
+ StringCat(headers, "X-Cache: MISS from ");
+ StringCat(headers, StringBuff(localhost));
+ StringCat(headers, "\r\n");
/* Logging */
{
const char * contentType = "text/html";
- unsigned long int size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 );
+ size_t size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 );
/* */
String ip = STRING_EMPTY;
SOCaddr serverClient;
@@ -1265,7 +1244,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
if (getsockname(soc_c, (struct sockaddr*) &serverClient, &lenServerClient) == 0) {
ip = getip(&serverClient, lenServerClient);
} else {
- StringStrcpy(ip, "unknown");
+ StringCopy(ip, "unknown");
}
if (element != NULL && element->contenttype[0] != '\0') {
contentType = element->contenttype;
@@ -1275,11 +1254,11 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
}
/* Send reply */
- StringStrcat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n");
- StringStrcat(headers, "\r\n"); /* Headers separator */
- if (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers)
- || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output))
- || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, element->size, 0) != element->size)
+ StringCat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n");
+ StringCat(headers, "\r\n"); /* Headers separator */
+ if (send(soc_c, StringBuff(headers), (int)StringLength(headers), 0) != StringLength(headers)
+ || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), (int)StringLength(output), 0) != StringLength(output))
+ || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, (int)element->size, 0) != element->size)
)
{
keepAlive = 0; /* Error, abort connection */
@@ -1298,7 +1277,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
}
} while(keepAlive);
-#if HTS_WIN
+#ifdef _WIN32
closesocket(soc_c);
#else
close(soc_c);
@@ -1314,19 +1293,8 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) {
free(buffer);
}
-#ifdef _WIN32
-#define PTHREAD_RETURN
-#define PTHREAD_TYPE void
-#define PTHREAD_TYPE_FNC __cdecl
-#else
-#define PTHREAD_RETURN NULL
-#define PTHREAD_TYPE void*
-#define PTHREAD_TYPE_FNC
-#endif
-
/* Generic threaded function start */
-static int startThread(PTHREAD_TYPE (PTHREAD_TYPE_FNC * funct)(void* ),
- void* param)
+static int startThread(void (*funct)(void*), void* param)
{
if (param != NULL) {
#ifdef _WIN32
@@ -1361,14 +1329,14 @@ typedef struct proxytrack_process_th_p {
} proxytrack_process_th_p;
/* Generic socket/index function stub */
-static PTHREAD_TYPE PTHREAD_TYPE_FNC proxytrack_process_th(void* param_) {
+static void proxytrack_process_th(void* param_) {
proxytrack_process_th_p *param = (proxytrack_process_th_p *) param_;
T_SOC soc_c = param->soc_c;
PT_Indexes indexes = param->indexes;
void (*process)(PT_Indexes indexes, T_SOC soc_c) = param->process;
free(param);
process(indexes, soc_c);
- return PTHREAD_RETURN ;
+ return ;
}
/* Process generic socket/index operation */
@@ -1400,7 +1368,7 @@ static int proxytrack_start_HTTP(PT_Indexes indexes, T_SOC soc) {
struct sockaddr clientAddr;
int clientAddrLen = sizeof(struct sockaddr);
memset(&clientAddr, 0, sizeof(clientAddr));
- if ( (soc_c = accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) {
+ if ( (soc_c = (T_SOC) accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) {
if (!proxytrack_process_HTTP_threaded(indexes, soc_c)) {
CRITICAL("proxytrack_start_HTTP::Can not fork a thread");
}
@@ -1587,7 +1555,7 @@ static int proxytrack_start_ICP(PT_Indexes indexes, T_SOC soc) {
if (lenServerClient > 0) {
ip = getip(&serverClient, lenServerClient);
} else {
- StringStrcpy(ip, "unknown");
+ StringCopy(ip, "unknown");
}
LOG("ICP %s %s/%s %s" _ StringBuff(ip) _ LogRequest _ LogReply _ (UrlRequest ? UrlRequest : "-") );
StringFree(ip);
diff --git a/src/proxy/proxytrack.h b/src/proxy/proxytrack.h
index 498f4d8..737c5ea 100644
--- a/src/proxy/proxytrack.h
+++ b/src/proxy/proxytrack.h
@@ -29,13 +29,26 @@ Please visit our Website: http://www.httrack.com
#define WEBHTTRACK_PROXYTRACK
/* Version */
-#define PROXYTRACK_VERSION "0.4"
+#define PROXYTRACK_VERSION "0.5"
/* Store manager */
#include "../minizip/mztools.h"
#include "store.h"
#include <sys/stat.h>
+#ifndef HTS_DO_NOT_USE_FTIME
+#ifdef _WIN32
+#include <sys/utime.h>
+#else
+#include <utime.h>
+#endif
+#include <sys/timeb.h>
+#else
+#include <utime.h>
+#endif
+#ifndef _WIN32
+#include <pthread.h>
+#endif
/* generic */
@@ -233,23 +246,6 @@ static int linput_trim(FILE* fp,char* s,int max) {
return rlen;
}
-// copy of concat
-#define HTS_URLMAXSIZE 1024
-typedef struct concat_strc {
- char buff[16][HTS_URLMAXSIZE*2*2];
- int rol;
-} concat_strc;
-static char* concat(const char* a,const char* b) {
- static concat_strc* strc = NULL;
- if (strc == NULL) {
- strc = (concat_strc*) calloc(16, sizeof(concat_strc));
- }
- strc->rol=((strc->rol+1)%16); // roving pointer
- strcpy(strc->buff[strc->rol],a);
- if (b) strcat(strc->buff[strc->rol],b);
- return strc->buff[strc->rol];
-}
-
#ifndef S_ISREG
#define S_ISREG(m) ((m) & _S_IFREG)
#endif
@@ -264,25 +260,135 @@ static int fexist(char* s) {
return 0;
}
-#ifndef _WIN32
-#define fconv(a) (a)
-#define fconcat(a,b) concat(a,b)
-#endif
-
-#ifdef _WIN32
-static char* __fconv(char* a) {
+/* convertir une chaine en temps */
+static void set_lowcase(char* s) {
int i;
- for(i=0;i<(int) strlen(a);i++)
- if (a[i]=='/') // convertir
- a[i]='\\';
- return a;
+ for(i=0;i<(int) strlen(s);i++)
+ if ((s[i]>='A') && (s[i]<='Z'))
+ s[i]+=('a'-'A');
}
-static char* fconcat(char* a,char* b) {
- return __fconv(concat(a,b));
+static struct tm* convert_time_rfc822(struct tm *result,const char* s) {
+ char months[]="jan feb mar apr may jun jul aug sep oct nov dec";
+ char str[256];
+ char* a;
+ /* */
+ int result_mm=-1;
+ int result_dd=-1;
+ int result_n1=-1;
+ int result_n2=-1;
+ int result_n3=-1;
+ int result_n4=-1;
+ /* */
+
+ if ((int) strlen(s) > 200)
+ return NULL;
+ strcpy(str,s);
+ set_lowcase(str);
+ /* éliminer :,- */
+ while( (a=strchr(str,'-')) ) *a=' ';
+ while( (a=strchr(str,':')) ) *a=' ';
+ while( (a=strchr(str,',')) ) *a=' ';
+ /* tokeniser */
+ a=str;
+ while(*a) {
+ char *first, *last;
+ char tok[256];
+ /* découper mot */
+ while(*a==' ') a++; /* sauter espaces */
+ first=a;
+ while((*a) && (*a!=' ')) a++;
+ last=a;
+ tok[0]='\0';
+ if (first!=last) {
+ char* pos;
+ strncat(tok,first,(int) (last - first));
+ /* analyser */
+ if ( (pos=strstr(months,tok)) ) { /* month always in letters */
+ result_mm=((int) (pos - months))/4;
+ } else {
+ int number;
+ if (sscanf(tok,"%d",&number) == 1) { /* number token */
+ if (result_dd<0) /* day always first number */
+ result_dd=number;
+ else if (result_n1<0)
+ result_n1=number;
+ else if (result_n2<0)
+ result_n2=number;
+ else if (result_n3<0)
+ result_n3=number;
+ else if (result_n4<0)
+ result_n4=number;
+ } /* sinon, bruit de fond(+1GMT for exampel) */
+ }
+ }
+ }
+ if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) {
+ if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */
+ result->tm_year=result_n4-1900;
+ result->tm_hour=result_n1;
+ result->tm_min=result_n2;
+ result->tm_sec=max(result_n3,0);
+ } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
+ result->tm_hour=result_n2;
+ result->tm_min=result_n3;
+ result->tm_sec=max(result_n4,0);
+ if (result_n1<=50) /* 00 means 2000 */
+ result->tm_year=result_n1+100;
+ else if (result_n1<1000) /* 99 means 1999 */
+ result->tm_year=result_n1;
+ else /* 2000 */
+ result->tm_year=result_n1-1900;
+ }
+ result->tm_isdst=0; /* assume GMT */
+ result->tm_yday=-1; /* don't know */
+ result->tm_wday=-1; /* don't know */
+ result->tm_mon=result_mm;
+ result->tm_mday=result_dd;
+ return result;
+ }
+ return NULL;
}
-static char* fconv(char* a) {
- return __fconv(concat(a,""));
+static struct tm PT_GetTime(time_t t) {
+ struct tm tmbuf;
+#ifdef _WIN32
+ struct tm * tm = gmtime(&t);
+#else
+ struct tm * tm = gmtime_r(&t, &tmbuf);
+#endif
+ if (tm != NULL)
+ return *tm;
+ else {
+ memset(&tmbuf, 0, sizeof(tmbuf));
+ return tmbuf;
+ }
}
+static int set_filetime(const char* file, struct tm* tm_time) {
+ struct utimbuf tim;
+#ifndef HTS_DO_NOT_USE_FTIME
+ struct timeb B;
+ memset(&B, 0, sizeof(B));
+ B.timezone=0;
+ ftime( &B );
+ tim.actime = tim.modtime = mktime(tm_time) - B.timezone*60;
+#else
+ // bogus time (GMT/local)..
+ tim.actime=tim.modtime=mktime(tm_time);
#endif
+ return utime(file, &tim);
+}
+static int set_filetime_time_t(const char* file, time_t t) {
+ if (t != (time_t) 0 && t != (time_t) -1) {
+ struct tm tm = PT_GetTime(t);
+ return set_filetime(file, &tm);
+ }
+ return -1;
+}
+static int set_filetime_rfc822(const char* file, const char* date) {
+ struct tm buffer;
+ struct tm* tm_s = convert_time_rfc822(&buffer,date);
+ if (tm_s) {
+ return set_filetime(file,tm_s);
+ } else return -1;
+}
#endif
diff --git a/src/proxy/store.c b/src/proxy/store.c
index 1d17574..b8233a8 100644
--- a/src/proxy/store.c
+++ b/src/proxy/store.c
@@ -20,6 +20,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Please visit our Website: http://www.httrack.com
*/
+/* Parts (inside ARC format routines) by Lars Clausen (lc@statsbiblioteket.dk) */
+
/* ------------------------------------------------------------ */
/* File: Cache manager for ProxyTrack */
/* Author: Xavier Roche */
@@ -28,6 +30,7 @@ Please visit our Website: http://www.httrack.com
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <time.h>
/* Locking */
#ifdef _WIN32
@@ -40,8 +43,10 @@ Please visit our Website: http://www.httrack.com
#define HTS_INTERNAL_BYTECODE
#include "htsinthash.h"
+#include "htsmd5.h"
#undef HTS_INTERNAL_BYTECODE
#include "../minizip/mztools.h"
+#include "../minizip/zip.h"
#include "htscore.h"
#include "htsback.h"
@@ -58,6 +63,8 @@ static PT_Element PT_ReadCache__New_u(PT_Index index, const char* url, int flags
static int PT_LookupCache__Old_u(PT_Index index, const char* url);
static PT_Element PT_ReadCache__Old_u(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__Arc_u(PT_Index index, const char* url);
+static PT_Element PT_ReadCache__Arc_u(PT_Index index, const char* url, int flags);
/* Locking */
@@ -100,10 +107,12 @@ void MutexFree(PT_Mutex *pMutex) {
typedef struct _PT_Index__New _PT_Index__New;
typedef struct _PT_Index__Old _PT_Index__Old;
+typedef struct _PT_Index__Arc _PT_Index__Arc;
typedef struct _PT_Index_Functions _PT_Index_Functions;
typedef struct _PT_Index__New *PT_Index__New;
typedef struct _PT_Index__Old *PT_Index__Old;
+typedef struct _PT_Index__Arc *PT_Index__Arc;
typedef struct _PT_Index_Functions *PT_Index_Functions;
enum {
@@ -111,29 +120,42 @@ enum {
PT_CACHE_MIN = 0,
PT_CACHE__NEW = PT_CACHE_MIN,
PT_CACHE__OLD,
- PT_CACHE_MAX = PT_CACHE__OLD
+ PT_CACHE__ARC,
+ PT_CACHE_MAX = PT_CACHE__ARC
};
static int PT_LoadCache__New(PT_Index index, const char *filename);
static void PT_Index_Delete__New(PT_Index *pindex);
static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags);
static int PT_LookupCache__New(PT_Index index, const char* url);
+static int PT_SaveCache__New(PT_Indexes indexes, const char *filename);
/**/
static int PT_LoadCache__Old(PT_Index index, const char *filename);
static void PT_Index_Delete__Old(PT_Index *pindex);
static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags);
static int PT_LookupCache__Old(PT_Index index, const char* url);
+/**/
+static int PT_LoadCache__Arc(PT_Index index, const char *filename);
+static void PT_Index_Delete__Arc(PT_Index *pindex);
+static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags);
+static int PT_LookupCache__Arc(PT_Index index, const char* url);
+static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename);
struct _PT_Index_Functions {
+ /* Mandatory services */
int (*PT_LoadCache)(PT_Index index, const char *filename);
void (*PT_Index_Delete)(PT_Index *pindex);
PT_Element (*PT_ReadCache)(PT_Index index, const char* url, int flags);
int (*PT_LookupCache)(PT_Index index, const char* url);
+
+ /* Optional services */
+ int (*PT_SaveCache)(PT_Indexes indexes, const char *filename);
};
static _PT_Index_Functions _IndexFuncts[] = {
- { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New },
- { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old },
+ { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New, PT_SaveCache__New },
+ { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old, NULL },
+ { PT_LoadCache__Arc, PT_Index_Delete__Arc, PT_ReadCache__Arc, PT_LookupCache__Arc, PT_SaveCache__Arc },
{ NULL, NULL, NULL, NULL }
};
@@ -164,11 +186,22 @@ struct _PT_Index__Old {
int safeCache;
};
+struct _PT_Index__Arc {
+ PT_INDEX_COMMON_STRUCTURE;
+ FILE *file;
+ PT_Mutex fileLock;
+ int version;
+ char lastmodified[1024];
+ char line[2048];
+ char filenameIndexBuff[2048];
+};
+
struct _PT_Index {
int type;
union {
_PT_Index__New formatNew;
_PT_Index__Old formatOld;
+ _PT_Index__Arc formatArc;
struct {
PT_INDEX_COMMON_STRUCTURE;
} common;
@@ -194,7 +227,7 @@ struct _PT_Cache {
int count;
};
-PT_Indexes PT_New() {
+PT_Indexes PT_New(void) {
PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1);
index->cil = inthash_new(127);
index->index_size = 0;
@@ -301,6 +334,16 @@ static void PT_Index_Delete__Old(PT_Index *pindex) {
}
}
+static void PT_Index_Delete__Arc(PT_Index *pindex) {
+ if (pindex != NULL && (*pindex) != NULL) {
+ PT_Index__Arc index = &(*pindex)->slots.formatArc;
+ if (index->file != NULL) {
+ fclose(index->file);
+ }
+ MutexFree(&index->fileLock);
+ }
+}
+
int PT_AddIndex(PT_Indexes indexes, const char *path) {
PT_Index index = PT_LoadCache(path);
if (index != NULL) {
@@ -319,7 +362,7 @@ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) {
int i;
String html = STRING_EMPTY;
StringClear(html);
- StringStrcat(html,
+ StringCat(html,
"<html>"
PROXYTRACK_COMMENT_HEADER
DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES
@@ -330,26 +373,26 @@ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) {
"<h3>Available sites in this cache:</h3><br />"
"<br />"
);
- StringStrcat(html, "<ul>\r\n");
+ StringCat(html, "<ul>\r\n");
for(i = 0 ; i < indexes->index_size ; i++) {
if (indexes->index[i] != NULL
&& indexes->index[i]->slots.common.startUrl[0] != '\0')
{
const char * url = indexes->index[i]->slots.common.startUrl;
- StringStrcat(html, "<li>\r\n");
- StringStrcat(html, "<a href=\"");
- StringStrcat(html, url);
- StringStrcat(html, "\">");
- StringStrcat(html, url);
- StringStrcat(html, "</a>\r\n");
- StringStrcat(html, "</li>\r\n");
+ StringCat(html, "<li>\r\n");
+ StringCat(html, "<a href=\"");
+ StringCat(html, url);
+ StringCat(html, "\">");
+ StringCat(html, url);
+ StringCat(html, "</a>\r\n");
+ StringCat(html, "</li>\r\n");
}
}
- StringStrcat(html, "</ul>\r\n");
- StringStrcat(html, "</body></html>\r\n");
+ StringCat(html, "</ul>\r\n");
+ StringCat(html, "</body></html>\r\n");
elt->size = StringLength(html);
elt->adr = StringAcquire(&html);
- elt->statuscode = 200;
+ elt->statuscode = HTTP_OK;
strcpy(elt->charset, "iso-8859-1");
strcpy(elt->contenttype, "text/html");
strcpy(elt->msg, "OK");
@@ -404,9 +447,9 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
char* ptr = NULL;
ptr += StringLength(list);
if (len > 0)
- StringStrcat(list, StringBuff(subitem));
+ StringCat(list, StringBuff(subitem));
if (isFolder)
- StringStrcat(list, "/");
+ StringCat(list, "/");
StringMemcat(list, "\0", 1); /* NULL terminated strings */
StringMemcat(listindexes, &ptr, sizeof(ptr));
listCount++;
@@ -434,7 +477,7 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
StringMemcat(listindexes, StringBuff(list), StringLength(list));
/* ---- no reallocation beyond this point (fixed addresses) ---- */
/* start of all strings (pointer) */
- startStrings = (startStrings - nullPointer) + StringBuff(listindexes);
+ startStrings = (startStrings - nullPointer) + StringBuffRW(listindexes);
/* transform indexes into references */
for(i = 0 ; i < listCount ; i++) {
char *ptr = NULL;
@@ -442,7 +485,7 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) {
memcpy(&ptr, &StringBuff(listindexes)[i*sizeof(char*)], sizeof(char*));
ndx = (unsigned int) (ptr - nullPointer);
ptr = startStrings + ndx;
- memcpy(&StringBuff(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*));
+ memcpy(&StringBuffRW(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*));
}
blk = StringAcquire(&listindexes);
StringFree(list);
@@ -460,16 +503,22 @@ void PT_Enumerate_Delete(char ***plist) {
}
}
-PT_Index PT_LoadCache(const char *filename) {
- int type = PT_CACHE_UNDEFINED;
+static int PT_GetType(const char *filename) {
char * dot = strrchr(filename, '.');
if (dot != NULL) {
if (strcasecmp(dot, ".zip") == 0) {
- type = PT_CACHE__NEW;
+ return PT_CACHE__NEW;
} else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) {
- type = PT_CACHE__OLD;
+ return PT_CACHE__OLD;
+ } else if (strcasecmp(dot, ".arc") == 0) {
+ return PT_CACHE__ARC;
}
}
+ return PT_CACHE_UNDEFINED;
+}
+
+PT_Index PT_LoadCache(const char *filename) {
+ int type = PT_GetType(filename);
if (type != PT_CACHE_UNDEFINED) {
PT_Index index = calloc(sizeof(_PT_Index), 1);
if (index != NULL) {
@@ -524,6 +573,44 @@ int PT_LookupCache(PT_Index index, const char* url) {
return 0;
}
+int PT_SaveCache(PT_Indexes indexes, const char *filename) {
+ int type = PT_GetType(filename);
+ if (type != PT_CACHE_UNDEFINED) {
+ if (_IndexFuncts[type].PT_SaveCache != NULL) {
+ int ret = _IndexFuncts[type].PT_SaveCache(indexes, filename);
+ if (ret == 0) {
+ (void) set_filetime_time_t(filename, PT_GetTimeIndex(indexes));
+ return 0;
+ }
+ }
+ }
+ return -1;
+}
+
+int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg) {
+ if (indexes != NULL && indexes->cil != NULL) {
+ struct_inthash_enum en = inthash_enum_new(indexes->cil);
+ inthash_chain* chain;
+ while((chain = inthash_enum_next(&en))) {
+ const long int index_id = (long int)chain->value.intg;
+ const char *const url = chain->name;
+ if (index_id >= 0 && index_id <= indexes->index_size) {
+ PT_Element item = PT_ReadCache(indexes->index[index_id], url, FETCH_HEADERS | FETCH_BODY);
+ if (item != NULL) {
+ int ret = callback(arg, url, item);
+ PT_Element_Delete(&item);
+ if (ret != 0)
+ return ret;
+ }
+ } else {
+ CRITICAL("PT_ReadCache:Corrupted central index locator");
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
time_t PT_Index_Timestamp(PT_Index index) {
return index->slots.common.timestamp;
}
@@ -569,8 +656,8 @@ int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex)
while((chain = inthash_enum_next(&en)) != NULL) {
const char * url = chain->name;
if (url != NULL && url[0] != '\0') {
- long int previous_index_id = 0;
- if (inthash_read(indexes->cil, url, (long int*)&previous_index_id)) {
+ intptr_t previous_index_id = 0;
+ if (inthash_read(indexes->cil, url, &previous_index_id)) {
if (previous_index_id >= 0 && previous_index_id < indexes->index_size) {
if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer
break;
@@ -616,14 +703,14 @@ PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags)
{
if (indexes != NULL)
{
- long int index_id;
+ intptr_t index_id;
if (strncmp(url, "http://", 7) == 0)
url += 7;
if (inthash_read(indexes->cil, url, &index_id)) {
if (index_id >= 0 && index_id <= indexes->index_size) {
PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags);
if (item != NULL) {
- item->indexId = index_id;
+ item->indexId = (int) index_id;
return item;
}
} else {
@@ -637,7 +724,7 @@ PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags)
int PT_LookupIndex(PT_Indexes indexes, const char* url) {
if (indexes != NULL)
{
- long int index_id;
+ intptr_t index_id;
if (strncmp(url, "http://", 7) == 0)
url += 7;
if (inthash_read(indexes->cil, url, &index_id)) {
@@ -651,6 +738,22 @@ int PT_LookupIndex(PT_Indexes indexes, const char* url) {
return 0;
}
+time_t PT_GetTimeIndex(PT_Indexes indexes) {
+ if (indexes != NULL && indexes->index_size > 0)
+ {
+ int i;
+ time_t maxt = indexes->index[0]->slots.common.timestamp;
+ for(i = 1 ; i < indexes->index_size ; i++) {
+ const time_t currt = indexes->index[i]->slots.common.timestamp;
+ if (currt > maxt) {
+ maxt = currt;
+ }
+ }
+ return maxt;
+ }
+ return (time_t) -1;
+}
+
PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) {
if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size)
{
@@ -659,7 +762,7 @@ PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) {
return NULL;
}
-PT_Element PT_ElementNew() {
+PT_Element PT_ElementNew(void) {
PT_Element r = NULL;
if ((r = calloc(sizeof(_PT_Element), 1)) == NULL)
return NULL;
@@ -690,6 +793,22 @@ static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags)
/* New HTTrack cache (new.zip) format */
/* ------------------------------------------------------------ */
+#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \
+ if ( (value != NULL) && (value)[0] != '\0') { \
+ sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+ } \
+} while(0)
+#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \
+ if ( (value != 0) ) { \
+ sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+ } \
+} while(0)
+#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \
+ sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+} while(0)
#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \
if (line[0] != '\0' && strfield2(line, refline)) { \
strcpy(refvalue, value); \
@@ -821,7 +940,8 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
char location_default[HTS_URLMAXSIZE*2];
char previous_save[HTS_URLMAXSIZE*2];
char previous_save_[HTS_URLMAXSIZE*2];
- long int hash_pos;
+ char catbuff[CATBUFF_SIZE];
+ intptr_t hash_pos;
int hash_pos_return;
PT_Element r = NULL;
if (index == NULL || index->hash == NULL || index->zFile == NULL || url == NULL || *url == 0)
@@ -835,7 +955,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
strcpy(r->location, "");
if (strncmp(url, "http://", 7) == 0)
url += 7;
- hash_pos_return = inthash_read(index->hash, url, (long int*)&hash_pos);
+ hash_pos_return = inthash_read(index->hash, url, &hash_pos);
if (hash_pos_return) {
uLong posInZip;
@@ -888,6 +1008,16 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
//ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address
//ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename
ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename
+ if (line[0] != '\0') {
+ int len = r->headers ? ((int) strlen(r->headers)) : 0;
+ int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 );
+ r->headers = realloc(r->headers, len + nlen);
+ r->headers[len] = '\0';
+ strcat(r->headers, line);
+ strcat(r->headers, ": ");
+ strcat(r->headers, value);
+ strcat(r->headers, "\r\n");
+ }
}
} while(offset < readSizeHeader && !lineEof);
totalHeader = offset;
@@ -955,13 +1085,14 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
/* Read in memory from cache */
if (flags & FETCH_BODY) {
if (strnotempty(previous_save)) {
- FILE* fp = fopen(fconv(previous_save), "rb");
+ FILE* fp = fopen(fconv(catbuff,previous_save), "rb");
if (fp != NULL) {
r->adr = (char*) malloc(r->size + 4);
if (r->adr != NULL) {
if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) {
+ int last_errno = errno;
r->statuscode=STATUSCODE_INVALID;
- sprintf(r->msg,"Read error in cache disk data: %s", strerror(errno));
+ sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno));
}
} else {
r->statuscode=STATUSCODE_INVALID;
@@ -970,7 +1101,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
fclose(fp);
} else {
r->statuscode=STATUSCODE_INVALID;
- sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(previous_save));
+ sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(catbuff,previous_save));
}
} else {
r->statuscode=STATUSCODE_INVALID;
@@ -982,7 +1113,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
if (flags & FETCH_BODY) {
r->adr=(char*) malloc(r->size+1);
if (r->adr!=NULL) {
- if (unzReadCurrentFile(index->zFile, r->adr, r->size) != r->size) { // erreur
+ if (unzReadCurrentFile(index->zFile, r->adr, (unsigned int) r->size) != r->size) { // erreur
free(r->adr);
r->adr=NULL;
r->statuscode=STATUSCODE_INVALID;
@@ -1024,6 +1155,121 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag
return r;
}
+static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element) {
+ zipFile zFileOut = (zipFile) arg;
+ char headers[8192];
+ int headersSize;
+ zip_fileinfo fi;
+ int zErr;
+ const char *url_adr = "";
+ const char *url_fil = "";
+
+ headers[0] = '\0';
+ headersSize = 0;
+
+ /* Fields */
+ headers[0] = '\0';
+ headersSize = 0;
+ /* */
+ {
+ char* message;
+ if (strlen(element->msg) < 32) {
+ message = element->msg;
+ } else {
+ message = "(See X-StatusMessage)";
+ }
+ /* 64 characters MAX for first line */
+ sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', element->statuscode, element->msg);
+ }
+ headersSize += (int) strlen(headers + headersSize);
+
+ /* Second line MUST ALWAYS be X-In-Cache */
+ ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", 1);
+ ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", element->statuscode);
+ ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", element->msg);
+ ZIP_FIELD_INT(headers, headersSize, "X-Size", element->size); // size
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Type", element->contenttype); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "X-Charset", element->charset); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", element->lastmodified); // last-modified
+ ZIP_FIELD_STRING(headers, headersSize, "Etag", element->etag); // Etag
+ ZIP_FIELD_STRING(headers, headersSize, "Location", element->location); // 'location' pour moved
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", element->cdispo); // Content-disposition
+ ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address
+ ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename
+ ZIP_FIELD_STRING(headers, headersSize, "X-Save", ""); // Original save filename
+
+ /* Time */
+ memset(&fi, 0, sizeof(fi));
+ if (element->lastmodified[0] != '\0') {
+ struct tm buffer;
+ struct tm* tm_s = convert_time_rfc822(&buffer, element->lastmodified);
+ if (tm_s) {
+ fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec;
+ fi.tmz_date.tm_min = (uInt) tm_s->tm_min;
+ fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour;
+ fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday;
+ fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon;
+ fi.tmz_date.tm_year = (uInt) tm_s->tm_year;
+ }
+ }
+
+ /* Open file - NOTE: headers in "comment" */
+ if ((zErr = zipOpenNewFileInZip(zFileOut,
+ url,
+ &fi,
+ /*
+ Store headers in realtime in the local file directory as extra field
+ In case of crash, we'll be able to recover the whole ZIP file by rescanning it
+ */
+ headers,
+ (uInt) strlen(headers),
+ NULL,
+ 0,
+ NULL, /* comment */
+ Z_DEFLATED,
+ Z_DEFAULT_COMPRESSION)) != Z_OK)
+ {
+ int zip_zipOpenNewFileInZip_failed = 0;
+ assertf(zip_zipOpenNewFileInZip_failed);
+ }
+
+ /* Write data in cache */
+ if (element->size > 0 && element->adr != NULL) {
+ if ((zErr = zipWriteInFileInZip(zFileOut, element->adr, (int) element->size)) != Z_OK) {
+ int zip_zipWriteInFileInZip_failed = 0;
+ assertf(zip_zipWriteInFileInZip_failed);
+ }
+ }
+
+ /* Close */
+ if ((zErr = zipCloseFileInZip(zFileOut)) != Z_OK) {
+ int zip_zipCloseFileInZip_failed = 0;
+ assertf(zip_zipCloseFileInZip_failed);
+ }
+
+ /* Flush */
+ if ((zErr = zipFlush(zFileOut)) != 0) {
+ int zip_zipFlush_failed = 0;
+ assertf(zip_zipFlush_failed);
+ }
+
+ return 0;
+}
+
+static int PT_SaveCache__New(PT_Indexes indexes, const char *filename) {
+ zipFile zFileOut = zipOpen(filename, 0);
+ if (zFileOut != NULL) {
+ int ret = PT_EnumCache(indexes, PT_SaveCache__New_Fun, (void *) zFileOut);
+ zipClose(zFileOut, "Created by HTTrack Website Copier/ProxyTrack "PROXYTRACK_VERSION);
+ zFileOut = NULL;
+ if (ret != 0)
+ (void) unlink(filename);
+ return ret;
+ }
+ return -1;
+}
+
+
/* ------------------------------------------------------------ */
/* Old HTTrack cache (dat/ndx) format */
@@ -1167,7 +1413,7 @@ static int PT_LoadCache__Old(PT_Index index_, const char *filename) {
/* */
} else { // Vieille version du cache
/* */
- // fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF);
+ // HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: importing old cache format"LF);
cache->version=0; // cache 1.0
strcpy(cache->lastmodified,firstline);
}
@@ -1257,7 +1503,7 @@ static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags)
static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flags) {
PT_Index__Old cache = (PT_Index__Old) &index_->slots.formatOld;
- long int hash_pos;
+ intptr_t hash_pos;
int hash_pos_return;
char location_default[HTS_URLMAXSIZE*2];
char previous_save[HTS_URLMAXSIZE*2];
@@ -1276,7 +1522,7 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag
strcpy(r->location, "");
if (strncmp(url, "http://", 7) == 0)
url += 7;
- hash_pos_return=inthash_read(cache->hash, url, (long int*)&hash_pos);
+ hash_pos_return=inthash_read(cache->hash, url, &hash_pos);
if (hash_pos_return) {
int pos = (int) hash_pos; /* simply */
@@ -1325,10 +1571,12 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag
} else {
char check[256];
unsigned long size_read;
+ unsigned long int size_;
check[0]='\0';
//
cache_rint(cache->dat,&r->statuscode);
- cache_rLLint(cache->dat,&r->size);
+ cache_rLLint(cache->dat,&size_);
+ r->size = (size_t) size_;
cache_rstr(cache->dat,r->msg);
cache_rstr(cache->dat,r->contenttype);
if (cache->version >= 3)
@@ -1503,3 +1751,497 @@ static int PT_LookupCache__Old_u(PT_Index index_, const char* url) {
return 0;
}
+
+/* ------------------------------------------------------------ */
+/* Internet Archive Arc 1.0 (arc) format */
+/* Xavier Roche (roche@httrack.com) */
+/* Lars Clausen (lc@statsbiblioteket.dk) */
+/* ------------------------------------------------------------ */
+
+#define ARC_SP ' '
+
+static const char* getArcField(const char *line, int pos) {
+ int i;
+ for(i = 0 ; line[i] != '\0' && pos > 0 ; i++) {
+ if (line[i] == ARC_SP)
+ pos--;
+ }
+ if (pos == 0)
+ return &line[i];
+ return NULL;
+}
+
+static char* copyArcField(const char *line, int npos, char *dest, int destMax) {
+ const char *pos;
+ if ((pos = getArcField(line, npos)) != NULL) {
+ int i;
+ for(i = 0 ; pos[i] != '\0' && pos[i] != ARC_SP && ( --destMax ) > 0; i++) {
+ dest[i] = pos[i];
+ }
+ dest[i] = 0;
+ return dest;
+ }
+ dest[0] = 0;
+ return NULL;
+}
+
+static int getArcLength(const char *line) {
+ const char *pos;
+ if ((pos = getArcField(line, 9)) != NULL
+ || (pos = getArcField(line, 4)) != NULL
+ || (pos = getArcField(line, 2)) != NULL
+ ) {
+ int length;
+ if (sscanf(pos, "%d", &length) == 1) {
+ return length;
+ }
+ }
+ return -1;
+}
+
+static int skipArcNl(FILE* file) {
+ if (fgetc(file) == 0x0a) {
+ return 0;
+ }
+ return -1;
+}
+
+static int skipArcData(FILE* file, const char *line) {
+ int jump = getArcLength(line);
+ if (jump != -1) {
+ if (fseek(file, jump, SEEK_CUR) == 0 /* && skipArcNl(file) == 0 */) {
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static int getDigit(const char digit) {
+ return (int) ( digit - '0' );
+}
+
+static int getDigit2(const char * const pos) {
+ return getDigit(pos[0])*10 + getDigit(pos[1]);
+}
+
+static int getDigit4(const char * const pos) {
+ return getDigit(pos[0])*1000 + getDigit(pos[1])*100 + getDigit(pos[2])*10 + getDigit(pos[3]);
+}
+
+static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */
+ time_t t = mktime(tm);
+ if (t != (time_t) -1 && t != (time_t) 0) {
+ /* BSD does not have static "timezone" declared */
+#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__))
+ time_t now = time(NULL);
+ time_t timezone = - localtime(&now)->tm_gmtoff;
+#endif
+ return (time_t) (t - timezone);
+ }
+ return (time_t) -1;
+}
+
+static time_t getArcTimestamp(const char * const line) {
+ const char *pos;
+ if ((pos = getArcField(line, 2)) != NULL) {
+ int i;
+ /* date == YYYYMMDDhhmmss (Greenwich Mean Time) */
+ /* example: 20050405154029 */
+ for(i = 0 ; pos[i] >= '0' && pos[i] <= '9' ; i++);
+ if (i == 14) {
+ struct tm tm;
+ memset(&tm, 0, sizeof(tm));
+ tm.tm_year = getDigit4(pos + 0) - 1900; /* current year minus 1900 */
+ tm.tm_mon = getDigit2(pos + 4) - 1; /* 0 – 11 */
+ tm.tm_mday = getDigit2(pos + 6); /* 1 – 31 */
+ tm.tm_hour = getDigit2(pos + 8); /* 0 – 23 */
+ tm.tm_min = getDigit2(pos + 10); /* 0 – 59 */
+ tm.tm_sec = getDigit2(pos + 12); /* 0 – 59 */
+ tm.tm_isdst = 0;
+ return getGMT(&tm);
+ }
+ }
+ return (time_t) -1;
+}
+
+static int readArcURLRecord(PT_Index__Arc index) {
+ index->line[0] = '\0';
+ if (linput(index->file, index->line, sizeof(index->line) - 1)) {
+ return 0;
+ }
+ return -1;
+}
+
+#define str_begins(str, sstr) ( strncmp(str, sstr, sizeof(sstr) - 1) == 0 )
+static int PT_CompatibleScheme(const char *url) {
+ return (str_begins(url, "http:")
+ || str_begins(url, "https:")
+ || str_begins(url, "ftp:")
+ || str_begins(url, "file:"));
+}
+
+int PT_LoadCache__Arc(PT_Index index_, const char *filename) {
+ if (index_ != NULL && filename != NULL) {
+ PT_Index__Arc index = &index_->slots.formatArc;
+ index->timestamp = file_timestamp(filename);
+ MutexInit(&index->fileLock);
+ index->file = fopen(filename, "rb");
+
+ // Opened ?
+ if (index->file != NULL) {
+ inthash hashtable = index->hash;
+ if (readArcURLRecord(index) == 0) {
+ int entries = 0;
+ /* Read first line */
+ if (strncmp(index->line, "filedesc://", sizeof("filedesc://") - 1) != 0) {
+ fprintf(stderr, "Unexpected bad signature #%s"LF, index->line);
+ fclose(index->file);
+ index->file = NULL;
+ return 0;
+ }
+ /* Timestamp */
+ index->timestamp = getArcTimestamp(index->line);
+ /* Skip first entry */
+ if (skipArcData(index->file, index->line) != 0 || skipArcNl(index->file) != 0) {
+ fprintf(stderr, "Unexpected bad data offset size first entry"LF);
+ fclose(index->file);
+ index->file = NULL;
+ return 0;
+ }
+ /* Read all meta-entries (not data) */
+ while(!feof(index->file)) {
+ unsigned long int fpos = ftell(index->file);
+ if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
+ int length = getArcLength(index->line);
+ if (length >= 0) {
+ const char * filenameIndex = copyArcField(index->line, 0,
+ index->filenameIndexBuff, sizeof(index->filenameIndexBuff) - 1); /* can not be NULL */
+ if (strncmp(filenameIndex, "http://", 7) == 0) {
+ filenameIndex += 7;
+ }
+ if (*filenameIndex != 0) {
+ if (skipArcData(index->file, index->line) != 0) {
+ fprintf(stderr, "Corrupted cache data entry #%d (truncated file?), aborting read"LF, (int)entries);
+ }
+ /*fprintf(stdout, "adding %s [%d]\n", filenameIndex, (int)fpos);*/
+ if (PT_CompatibleScheme(index->filenameIndexBuff)) {
+ inthash_add(hashtable, filenameIndex, fpos); /* position of meta-data */
+ entries++;
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries);
+ }
+ } else {
+ fprintf(stderr, "Corrupted cache meta entry #%d, aborting read"LF, (int)entries);
+ break ;
+ }
+ } else {
+ break ;
+ }
+ }
+
+ /* OK */
+ return 1;
+ } else {
+ fprintf(stderr, "Bad file (empty ?)"LF);
+ }
+ } else {
+ fprintf(stderr, "Unable to open file"LF);
+ index = NULL;
+ }
+ } else {
+ fprintf(stderr, "Bad arguments"LF);
+ }
+ return 0;
+}
+
+#define HTTP_READFIELD_STRING(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ strcpy(refvalue, value); \
+ line[0] = '\0'; \
+ } \
+} while(0)
+#define HTTP_READFIELD_INT(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ int intval = 0; \
+ sscanf(value, "%d", &intval); \
+ (refvalue) = intval; \
+ line[0] = '\0'; \
+ } \
+} while(0)
+
+static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags) {
+ PT_Element retCode;
+ MutexLock(&index->slots.formatArc.fileLock);
+ {
+ retCode = PT_ReadCache__Arc_u(index, url, flags);
+ }
+ MutexUnlock(&index->slots.formatArc.fileLock);
+ return retCode;
+}
+
+static PT_Element PT_ReadCache__Arc_u(PT_Index index_, const char* url, int flags)
+{
+ PT_Index__Arc index = (PT_Index__Arc) &index_->slots.formatArc;
+ char location_default[HTS_URLMAXSIZE*2];
+ intptr_t hash_pos;
+ int hash_pos_return;
+ PT_Element r = NULL;
+ if (index == NULL || index->hash == NULL || url == NULL || *url == 0)
+ return NULL;
+ if ((r = PT_ElementNew()) == NULL)
+ return NULL;
+ location_default[0] = '\0';
+ memset(r, 0, sizeof(_PT_Element));
+ r->location = location_default;
+ strcpy(r->location, "");
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ hash_pos_return = inthash_read(index->hash, url, &hash_pos);
+
+ if (hash_pos_return) {
+ if (fseek(index->file, (long)hash_pos, SEEK_SET) == 0) {
+ if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) {
+ long int fposMeta = ftell(index->file);
+ int dataLength = getArcLength(index->line);
+ const char *pos;
+
+ /* Read HTTP headers */
+ /* HTTP/1.1 404 Not Found */
+ if (linput(index->file, index->line, sizeof(index->line) - 1)) {
+ if ((pos = getArcField(index->line, 1)) != NULL) {
+ if (sscanf(pos, "%d", &r->statuscode) != 1) {
+ r->statuscode = STATUSCODE_INVALID;
+ }
+ }
+ if ((pos = getArcField(index->line, 2)) != NULL) {
+ r->msg[0] = '\0';
+ strncat(r->msg, pos, sizeof(pos) - 1);
+ }
+ while (linput(index->file, index->line, sizeof(index->line) - 1) && index->line[0] != '\0') {
+ char* const line = index->line;
+ char* value = strchr(line, ':');
+ if (value != NULL) {
+ *value = '\0';
+ for( value++ ; *value == ' ' || *value == '\t' ; value++);
+ HTTP_READFIELD_INT(line, value, "Content-Length", r->size); // size
+ HTTP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype
+ HTTP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified
+ HTTP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag
+ HTTP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved
+ HTTP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition
+ if (line[0] != '\0') {
+ int len = r->headers ? ((int) strlen(r->headers)) : 0;
+ int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 );
+ r->headers = realloc(r->headers, len + nlen);
+ r->headers[len] = '\0';
+ strcat(r->headers, line);
+ strcat(r->headers, ": ");
+ strcat(r->headers, value);
+ strcat(r->headers, "\r\n");
+ }
+ }
+ }
+
+ /* FIXME charset */
+ if (r->contenttype[0] != '\0') {
+ char *pos = strchr(r->contenttype, ';');
+ if (pos != NULL) {
+ /*char *chs = strchr(pos, "charset=");*/
+ /*HTTP_READFIELD_STRING(line, value, "X-Charset", r->charset);*/
+ *pos = 0;
+ if ((pos = strchr(r->contenttype, ' ')) != NULL) {
+ *pos = 0;
+ }
+ }
+ }
+
+ /* Read data */
+ if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */
+ if (flags & FETCH_BODY) {
+ long int fposCurrent = ftell(index->file);
+ long int metaSize = fposCurrent - fposMeta;
+ long int fetchSize = (long int) r->size;
+ if (fetchSize <= 0) {
+ fetchSize = dataLength - metaSize;
+ } else if (fetchSize > dataLength - metaSize) {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Truncated Data");
+ }
+ r->size = 0;
+ if (r->statuscode != STATUSCODE_INVALID) {
+ r->adr = (char*) malloc(fetchSize);
+ if (r->adr != NULL) {
+ if (fetchSize > 0 && ( r->size = (int) fread(r->adr, 1, fetchSize, index->file) ) != fetchSize) {
+ int last_errno = errno;
+ r->statuscode=STATUSCODE_INVALID;
+ sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno));
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"Read error (memory exhausted) from cache");
+ }
+ }
+ }
+ }
+
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Header Error");
+ }
+
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Read Header Error");
+ }
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg, "Cache Read Error : Seek Error");
+ }
+
+ } else {
+ r->statuscode=STATUSCODE_INVALID;
+ strcpy(r->msg,"File Cache Entry Not Found");
+ }
+ if (r->location[0] != '\0') {
+ r->location = strdup(r->location);
+ } else {
+ r->location = NULL;
+ }
+ return r;
+}
+
+static int PT_LookupCache__Arc(PT_Index index, const char* url) {
+ int retCode;
+ MutexLock(&index->slots.formatArc.fileLock);
+ {
+ retCode = PT_LookupCache__Arc_u(index, url);
+ }
+ MutexUnlock(&index->slots.formatArc.fileLock);
+ return retCode;
+}
+
+static int PT_LookupCache__Arc_u(PT_Index index_, const char* url) {
+ if (index_ != NULL) {
+ PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew;
+ if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0)
+ return 0;
+ if (strncmp(url, "http://", 7) == 0)
+ url += 7;
+ if (inthash_read(cache->hash, url, NULL))
+ return 1;
+ }
+ return 0;
+}
+
+typedef struct PT_SaveCache__Arc_t {
+ PT_Indexes indexes;
+ FILE *fp;
+ time_t t;
+ char filename[64];
+ struct tm buff;
+ char headers[8192];
+ char md5[32 + 2];
+} PT_SaveCache__Arc_t;
+
+static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element) {
+ PT_SaveCache__Arc_t *st = (PT_SaveCache__Arc_t*) arg;
+ FILE * const fp = st->fp;
+ struct tm* tm = convert_time_rfc822(&st->buff, element->lastmodified);
+ int size_headers;
+
+ sprintf(st->headers,
+ "HTTP/1.0 %d %s" "\r\n"
+ "X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n"
+ "Content-type: %s%s%s%s" "\r\n"
+ "Last-modified: %s" "\r\n"
+ "Content-length: %d" "\r\n"
+ ,
+ element->statuscode, element->msg,
+ /**/
+ element->contenttype,
+ (element->charset[0] ? "; charset=\"" : ""),
+ (element->charset[0] ? element->charset : ""),
+ (element->charset[0] ? "\"" : ""),
+ /**/
+ element->lastmodified,
+ (int) element->size
+ );
+ if (element->location != NULL && element->location[0] != '\0') {
+ sprintf(st->headers + strlen(st->headers), "Location: %s" "\r\n", element->location);
+ }
+ if (element->headers != NULL) {
+ if ( strlen(element->headers) < sizeof(st->headers) - strlen(element->headers) - 1 ) {
+ strcat(st->headers, element->headers);
+ }
+ }
+ strcat(st->headers, "\r\n");
+ size_headers = (int) strlen(st->headers);
+
+ /* doc == <nl><URL-record><nl><network_doc> */
+
+ /* Format: URL IP date mime result checksum location offset filename length */
+ if (element->adr != NULL) {
+ domd5mem(element->adr, element->size, st->md5, 1);
+ } else {
+ strcpy(st->md5, "-");
+ }
+ fprintf(fp,
+ /* nl */
+ "\n"
+ /* URL-record */
+ "%s%s %s %04d%02d%02d%02d%02d%02d %s %d %s %s %ld %s %ld"
+ /* nl */
+ "\n",
+ /* args */
+ ( link_has_authority(url) ? "" : "http://" ), url,
+ "0.0.0.0",
+ tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
+ element->contenttype,
+ element->statuscode,
+ st->md5, ( element->location ? element->location : "-" ),
+ (long int)ftell(fp), st->filename,
+ (long int)( size_headers + element->size ));
+ /* network_doc */
+ if (fwrite(st->headers, 1, size_headers, fp) != size_headers
+ || ( element->size > 0 && fwrite(element->adr, 1, element->size, fp) != element->size )
+ ) {
+ return 1; /* Error */
+ }
+
+ return 0;
+}
+
+static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename) {
+ FILE *fp = fopen(filename, "wb");
+ if (fp != NULL) {
+ PT_SaveCache__Arc_t st;
+ int ret;
+ time_t t = PT_GetTimeIndex(indexes);
+ struct tm tm = PT_GetTime(t);
+
+ /* version-2-block ==
+ filedesc://<path><sp><ip_address><sp><date><sp>text/plain<sp>200<sp>-<sp>-<sp>0<sp><filename><sp><length><nl>
+ 2<sp><reserved><sp><origin-code><nl>
+ URL<sp>IP-address<sp>Archive-date<sp>Content-type<sp>Result-code<sp>Checksum<sp>Location<sp> Offset<sp>Filename<sp>Archive-length<nl>
+ <nl> */
+ const char* prefix =
+ "2 0 HTTrack Website Copier" "\n"
+ "URL IP-address Archive-Date Content-Type Result-code Checksum Location Offset Filename Archive-length" "\n" "\n";
+ sprintf(st.filename, "httrack_%d.arc", (int) t);
+ fprintf(fp, "filedesc://%s 0.0.0.0 %04d%02d%02d%02d%02d%02d text/plain 200 - - 0 %s %d" "\n"
+ "%s",
+ st.filename,
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
+ st.filename, (int)strlen(prefix), prefix);
+ st.fp = fp;
+ st.indexes = indexes;
+ st.t = t;
+ ret = PT_EnumCache(indexes, PT_SaveCache__Arc_Fun, (void *)&st);
+ fclose(fp);
+ if (ret != 0)
+ (void) unlink(filename);
+ return ret;
+ }
+ return -1;
+}
diff --git a/src/proxy/store.h b/src/proxy/store.h
index 805bc20..5d4a76e 100644
--- a/src/proxy/store.h
+++ b/src/proxy/store.h
@@ -28,6 +28,13 @@ Please visit our Website: http://www.httrack.com
#ifndef WEBHTTRACK_PROXYTRACK_STORE
#define WEBHTTRACK_PROXYTRACK_STORE
+/* Includes */
+#ifndef _WIN32
+#include <pthread.h>
+#else
+#include "windows.h"
+#endif
+
/* Proxy */
typedef struct _PT_Index _PT_Index;
@@ -43,12 +50,12 @@ typedef struct _PT_CacheItem _PT_CacheItem;
typedef struct _PT_CacheItem *PT_CacheItem;
typedef struct _PT_Element {
- int indexId; // index identifier, if suitable (!= -1)
+ int indexId; // index identifier, if suitable (!= -1)
//
int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
char* adr; // adresse du bloc de mémoire, NULL=vide
- char* headers; // adresse des en têtes si présents
- unsigned long int size; // taille fichier
+ char* headers; // adresse des en têtes si présents (RFC822 format)
+ size_t size; // taille fichier
char msg[1024]; // error message ("\0"=undefined)
char contenttype[64]; // content-type ("text/html" par exemple)
char charset[64]; // charset ("iso-8859-1" par exemple)
@@ -85,11 +92,14 @@ int PT_AddIndex(PT_Indexes index, const char *path);
int PT_RemoveIndex(PT_Indexes index, int indexId);
int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex);
PT_Index PT_GetIndex(PT_Indexes indexes, int indexId);
+time_t PT_GetTimeIndex(PT_Indexes indexes);
/* Indexes list */
PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes);
char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree);
void PT_Enumerate_Delete(char ***plist);
+int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg);
+int PT_SaveCache(PT_Indexes indexes, const char *filename);
/* Index */
PT_Index PT_LoadCache(const char *filename);