diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:51:31 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:51:31 +0000 |
commit | 25adbdabb47499fe641c7bd9595024ff82667058 (patch) | |
tree | 4200bb5e746bc1c0606de615ec99f0a247d4d9ba /src/htscoremain.c | |
parent | ad5b7acc19290ff91e0f42a0de448a26760fcf99 (diff) |
httrack 3.30.1
Diffstat (limited to 'src/htscoremain.c')
-rw-r--r-- | src/htscoremain.c | 569 |
1 files changed, 460 insertions, 109 deletions
diff --git a/src/htscoremain.c b/src/htscoremain.c index a03635f..1162c18 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -42,15 +42,19 @@ Please visit our Website: http://www.httrack.com #include "htsdefines.h" #include "htsalias.h" #include "htswrap.h" +#include "htsmodules.h" + #include <ctype.h> #if HTS_WIN #else #ifndef HTS_DO_NOT_USE_UID /* setuid */ #include <pwd.h> +#ifdef HAVE_UNISTD_H #include <unistd.h> #endif #endif +#endif extern int exit_xh; // sortir prématurément @@ -61,7 +65,7 @@ extern int IPV6_resolver; // Add a command in the argc/argv #define cmdl_add(token,argc,argv,buff,ptr) \ argv[argc]=(buff+ptr); \ - strcpy(argv[argc],token); \ + strcpybuff(argv[argc],token); \ ptr += (strlen(argv[argc])+2); \ argc++ @@ -73,15 +77,56 @@ extern int IPV6_resolver; argv[i]=argv[i-1];\ } \ argv[0]=(buff+ptr); \ - strcpy(argv[0],token); \ + strcpybuff(argv[0],token); \ ptr += (strlen(argv[0])+2); \ argc++ #define htsmain_free() do { if (url != NULL) { free(url); } } while(0) +#define ensureUrlCapacity(url, urlsize, size) do { \ + if (urlsize < size || url == NULL) { \ + urlsize = size; \ + if (url == NULL) { \ + url = (char*) malloct(urlsize); \ + if (url != NULL) url[0]='\0'; \ + } else { \ + url = (char*) realloct(url, urlsize); \ + } \ + if (url == NULL) { \ + HTS_PANIC_PRINTF("* memory exhausted"); \ + htsmain_free(); \ + return -1; \ + } \ + } \ +} while(0) + +static void set_wrappers(void) { +#if HTS_ANALYSTE + // custom wrappers + hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); + hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); + hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); + hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); + hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); + hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); + hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); + hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); + hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); + hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); + hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); + hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); + hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); + hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); + hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); + hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); + hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header"); + hts_htmlcheck_receivehead = (t_hts_htmlcheck_receivehead) htswrap_read("receive-header"); +#endif +} + // Main, récupère les paramètres et appelle le robot #if HTS_ANALYSTE -int hts_main(int argc, char **argv) { +HTSEXT_API int hts_main(int argc, char **argv) { #else int main(int argc, char **argv) { #endif @@ -96,6 +141,7 @@ int main(int argc, char **argv) { int argv_url=-1; // ==0 : utiliser cache et doit.log char* argv_firsturl=NULL; // utilisé pour nommage par défaut char* url = NULL; // URLS séparées par un espace + int url_sz = 65535; //char url[65536]; // URLS séparées par un espace // the parametres httrackp httrack; @@ -113,33 +159,12 @@ int main(int argc, char **argv) { int switch_chroot=0; /* chroot ? */ #endif // - url = malloc(65536); - if (url == NULL) { - HTS_PANIC_PRINTF("* memory exhausted"); - htsmain_free(); - return -1; - } - url[0]='\0'; + ensureUrlCapacity(url, url_sz, 65536); // #if HTS_ANALYSTE // custom wrappers - hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); - hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); - hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); - hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); - hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); - hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); - hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); - hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); - hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); - hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); - hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); - hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); - hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); - hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); - hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); - hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); + set_wrappers(); #endif // options par défaut @@ -157,7 +182,7 @@ int main(int argc, char **argv) { httrack.maxsite=-1; // taille max site (aucune) httrack.maxfile_nonhtml=-1; // taille max fichier non html httrack.maxfile_html=-1; // idem pour html - httrack.maxsoc=8; // nbre socket max + httrack.maxsoc=4; // nbre socket max httrack.fragment=-1; // pas de fragmentation httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents" httrack.makeindex=1; // faire un index @@ -169,10 +194,12 @@ int main(int argc, char **argv) { httrack.cache=1; // cache prioritaire httrack.shell=0; // pas de shell par defaut httrack.proxy.active=0; // pas de proxy + strcpybuff(httrack.proxy.bindhost, ""); // bind default host httrack.user_agent_send=1; // envoyer un user-agent - strcpy(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); + strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); httrack.savename_83=0; // noms longs par défaut httrack.savename_type=0; // avec structure originale + httrack.mimehtml=0; // pas MIME-html httrack.parsejava=1; // parser classes httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer httrack.retry=2; // 2 retry par défaut @@ -187,26 +214,29 @@ int main(int argc, char **argv) { httrack.accept_cookie=1; // gérer les cookies httrack.cookie=NULL; httrack.http10=0; // laisser http/1.1 + httrack.nokeepalive = 0; // pas keep-alive httrack.nocompression=0; // pas de compression httrack.tolerant=0; // ne pas accepter content-length incorrect httrack.parseall=1; // tout parser (tags inconnus, par exemple) httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur httrack.verbosedisplay=0; // pas d'animation texte - strcpy(httrack.footer,HTS_DEFAULT_FOOTER); + httrack.sizehack=0; // size hack + httrack.urlhack=1; // url hack (normalizer) + strcpybuff(httrack.footer,HTS_DEFAULT_FOOTER); httrack.ftp_proxy=1; // proxy http pour ftp - strcpy(httrack.filelist,""); - strcpy(httrack.lang_iso,"en, *"); - strcpy(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) + strcpybuff(httrack.filelist,""); + strcpybuff(httrack.lang_iso,"en, *"); + strcpybuff(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) // httrack.log=stdout; httrack.errlog=stderr; httrack.flush=1; // flush sur les fichiers log - httrack.aff_progress=0; + //httrack.aff_progress=0; httrack.keyboard=0; // - strcpy(httrack.path_html,""); - strcpy(httrack.path_log,""); - strcpy(httrack.path_bin,""); + strcpybuff(httrack.path_html,""); + strcpybuff(httrack.path_log,""); + strcpybuff(httrack.path_bin,""); // httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb) httrack.maxfilter=200; // 200 filtres max par défaut @@ -222,8 +252,10 @@ int main(int argc, char **argv) { httrack.dir_topindex=0; // do not built top index (yet) // httrack.state.stop=0; // stopper + httrack.state.exit_xh=0; // abort // _DEBUG_HEAD=0; // pas de debuggage en têtes + #if HTS_WIN #if HTS_ANALYSTE!=2 @@ -269,9 +301,9 @@ int main(int argc, char **argv) { lien_back r; char* path; FILE* fp; - strcpy(r.url_adr,argv[2]); - strcpy(r.url_fil,argv[3]); - strcpy(r.url_sav,argv[4]); + strcpybuff(r.url_adr,argv[2]); + strcpybuff(r.url_fil,argv[3]); + strcpybuff(r.url_sav,argv[4]); path=argv[5]; r.status=1000; run_launch_ftp(&r); @@ -298,11 +330,11 @@ int main(int argc, char **argv) { char* a; if ((a=strrchr(path,'/'))) { httrack.path_bin[0]='\0'; - strncat(httrack.path_bin,argv[0],(int) a - (int) path); + strncatbuff(httrack.path_bin,argv[0],(int) a - (int) path); } } #else - strcpy(httrack.path_bin,HTS_HTTRACKDIR); + strcpybuff(httrack.path_bin, HTS_HTTRACKDIR); #endif @@ -316,7 +348,7 @@ int main(int argc, char **argv) { while( (a=strchr(argv[na],9)) ) *a=' '; /* equivalent to "empty parameter" */ if ((strcmp(argv[na],HTS_NOPARAM)==0) || (strcmp(argv[na],HTS_NOPARAM2)==0)) // (none) - strcpy(argv[na],"\"\""); + strcpybuff(argv[na],"\"\""); if (strncmp(argv[na],"-&",2)==0) argv[na][1]='%'; } @@ -402,6 +434,11 @@ int main(int argc, char **argv) { argv_url=-1; /* forcer */ httrack.quiet=1; } + } else if (strcmp(tmp_argv[0] + 2,"quiet") == 0) { + httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + } else if (strcmp(tmp_argv[0] + 2,"continue") == 0) { + argv_url=-1; /* forcer */ + httrack.quiet=1; } } } @@ -436,7 +473,7 @@ int main(int argc, char **argv) { FILE* fp; int x_argc2; - //strcpy(x_argvblk2,"httrack "); + //strcpybuff(x_argvblk2,"httrack "); fp=fopen("config","rb"); if (fp) { linput(fp,x_argvblk2+strlen(x_argvblk2),32000); @@ -482,7 +519,7 @@ int main(int argc, char **argv) { if (argv[na][0]=='"') { char tempo[HTS_CDLMAXSIZE]; - strcpy(tempo,argv[na]+1); + strcpybuff(tempo,argv[na]+1); if (tempo[strlen(tempo)-1]!='"') { char s[HTS_CDLMAXSIZE]; sprintf(s,"Missing quote in %s",argv[na]); @@ -491,7 +528,7 @@ int main(int argc, char **argv) { return -1; } tempo[strlen(tempo)-1]='\0'; - strcpy(argv[na],tempo); + strcpybuff(argv[na],tempo); } if (cmdl_opt(argv[na])) { // option @@ -509,34 +546,34 @@ int main(int argc, char **argv) { } else { char* a; na++; - strcpy(httrack.path_html,""); - strcpy(httrack.path_log,""); + strcpybuff(httrack.path_html,""); + strcpybuff(httrack.path_log,""); a=strstr(argv[na],"\",\""); // rechercher en premier, au cas ou -O "c:\pipo,test","c:\test" if (!a) a=strchr(argv[na],','); // 2 path else a++; // position , if (a) { - strncat(httrack.path_html,argv[na],(int) (a-argv[na])); - strcat(httrack.path_log,a+1); + strncatbuff(httrack.path_html,argv[na],(int) (a-argv[na])); + strcatbuff(httrack.path_log,a+1); } else { - strcpy(httrack.path_log,argv[na]); - strcpy(httrack.path_html,argv[na]); + strcpybuff(httrack.path_log,argv[na]); + strcpybuff(httrack.path_html,argv[na]); } // Eliminer les cas comme -O "C:\mirror\" if (httrack.path_log[0]=='"') { // Guillemets char tmp[256]; - strcpy(tmp,httrack.path_log+1); + strcpybuff(tmp,httrack.path_log+1); if (tmp[strlen(tmp)-1]=='"') tmp[strlen(tmp)-1]='\0'; - strcpy(httrack.path_log,tmp); + strcpybuff(httrack.path_log,tmp); } if (httrack.path_html[0]=='"') { char tmp[256]; - strcpy(tmp,httrack.path_html+1); + strcpybuff(tmp,httrack.path_html+1); if (tmp[strlen(tmp)-1]=='"') tmp[strlen(tmp)-1]='\0'; - strcpy(httrack.path_html,tmp); + strcpybuff(httrack.path_html,tmp); } check_path(httrack.path_log,argv_firsturl); if (check_path(httrack.path_html,argv_firsturl)) { @@ -583,8 +620,6 @@ int main(int argc, char **argv) { } // traiter -O - - /* load doit.log and insert in current command line */ if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) { FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); @@ -611,8 +646,21 @@ int main(int argc, char **argv) { /* Insert parameters BUT so that they can be in the same order */ if (lastp) { if (strnotempty(lastp)) { + //char* argv0; + //int len; insert_after_argc=argc-insert_after; + //argv0 = (argv+insert_after)[0]; cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr); + /* + DONE IN 'next_token' + len = strlen(argv0); + if (len >= 2 && argv0[0]=='\"' && argv0[len-1]=='\"') { // "foo" + char tempo[1024]; + tempo[0] = '\0'; + strncatbuff(tempo, argv0+1, len-2); + strcpybuff(argv0, tempo); + } + */ argc=insert_after_argc+insert_after; insert_after++; } @@ -668,7 +716,7 @@ int main(int argc, char **argv) { if (argv[i][0]=='-') { if (argv[i][1]=='-') { // --xxx if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer - strcpy(argv[i]+1,""); + strcpybuff(argv[i]+1,""); if (fexist(fconcat(httrack.path_log,"hts-log.txt"))) remove(fconcat(httrack.path_log,"hts-log.txt")); if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) @@ -699,7 +747,7 @@ int main(int argc, char **argv) { // } else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire! argv_url=1; // forcer a passer les parametres - strcpy(argv[i]+1,"#P"); + strcpybuff(argv[i]+1,"#P"); // } else if (strfield2(argv[i]+2,"updatehttrack")) { #ifdef _WIN32 @@ -714,10 +762,10 @@ int main(int argc, char **argv) { char *args[8]; printf("Cheking for updates...\n"); - strcpy(_args[0],argv[0]); - strcpy(_args[1],"--get"); + strcpybuff(_args[0],argv[0]); + strcpybuff(_args[1],"--get"); sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,""); - strcpy(_args[3],"--quickinfo"); + strcpybuff(_args[3],"--quickinfo"); args[0]=_args[0]; args[1]=_args[1]; args[2]=_args[2]; @@ -781,7 +829,7 @@ int main(int argc, char **argv) { FILE* fp; int x_argc; - //strcpy(x_argvblk,"httrack "); + //strcpybuff(x_argvblk,"httrack "); fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); if (fp) { linput(fp,x_argvblk+strlen(x_argvblk),8192); @@ -892,6 +940,10 @@ int main(int argc, char **argv) { } } else { // aucune URL définie et pas de cache + if (argc > 1 && strcmp(argv[0], "-#h") == 0) { + printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + exit(0); + } #if HTS_ANALYSTE!=2 if (httrack.quiet) { #endif @@ -976,7 +1028,7 @@ int main(int argc, char **argv) { if (argv[na][0]=='"') { char tempo[HTS_CDLMAXSIZE]; - strcpy(tempo,argv[na]+1); + strcpybuff(tempo,argv[na]+1); if (tempo[strlen(tempo)-1]!='"') { char s[HTS_CDLMAXSIZE]; sprintf(s,"Missing quote in %s",argv[na]); @@ -985,7 +1037,7 @@ int main(int argc, char **argv) { return -1; } tempo[strlen(tempo)-1]='\0'; - strcpy(argv[na],tempo); + strcpybuff(argv[na],tempo); } if (cmdl_opt(argv[na])) { // option @@ -1009,6 +1061,7 @@ int main(int argc, char **argv) { httrack.savename_type=1003; // mettre dans le répertoire courant httrack.depth=0; // ne pas explorer la page httrack.accept_cookie=0; // pas de cookies + httrack.robots=0; // pas de robots break; case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions) httrack.travel=0; @@ -1078,7 +1131,7 @@ int main(int argc, char **argv) { sscanf(com+1,"%d",&httrack.maxsoc); while(isdigit((unsigned char)*(com+1))) com++; httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1 - } else httrack.maxsoc=8; + } else httrack.maxsoc=4; break; // @@ -1122,7 +1175,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.savename_userdef,argv[na]); + strcpybuff(httrack.savename_userdef,argv[na]); if (strnotempty(httrack.savename_userdef)) httrack.savename_type = -1; // userdef! else @@ -1175,6 +1228,8 @@ int main(int argc, char **argv) { case '&': case '%': { // deuxième jeu d'options com++; switch(*com) { + case 'M': httrack.mimehtml = 1; if (*(com+1)=='0') { httrack.mimehtml=0; com++; } break; + case 'k': httrack.nokeepalive = 0; if (*(com+1)=='0') { httrack.nokeepalive = 1; com++; } break; case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; } @@ -1188,7 +1243,9 @@ int main(int argc, char **argv) { case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length + case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; + case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break; // preserve: no footer, original links case 'p': @@ -1208,7 +1265,53 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.filelist,argv[na]); + strcpybuff(httrack.filelist,argv[na]); + } + break; + case 'b': // bind + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %b needs to be followed by a blank space, and a local hostname"); + printf("Example: -%%b \"ip4.localhost\"\n"); + htsmain_free(); + return -1; + } else{ + na++; + if (strlen(argv[na])>=254) { + HTS_PANIC_PRINTF("Hostname string too long"); + htsmain_free(); + return -1; + } + strcpybuff(httrack.proxy.bindhost,argv[na]); + } + break; + case 'S': // Scan Rules list + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %S needs to be followed by a blank space, and a text filename"); + printf("Example: -%%S \"myfilterlist.txt\"\n"); + htsmain_free(); + return -1; + } else{ + INTsys fz; + na++; + fz = fsize(argv[na]); + if (fz < 0) { + HTS_PANIC_PRINTF("File url list could not be opened"); + htsmain_free(); + return -1; + } else { + FILE* fp = fopen(argv[na], "rb"); + if (fp != NULL) { + int cl = (int) strlen(url); + ensureUrlCapacity(url, url_sz, cl + fz + 8192); + if ((INTsys)fread(url + cl, 1, fz, fp) != fz) { + HTS_PANIC_PRINTF("File url list could not be read"); + htsmain_free(); + return -1; + } + fclose(fp); + *(url + cl + fz) = '\0'; + } + } } break; case 'A': // assume @@ -1227,12 +1330,12 @@ int main(int argc, char **argv) { } // --assume standard if (strcmp(argv[na],"standard") == 0) { - strcpy(httrack.mimedefs,"\n"); - strcat(httrack.mimedefs,HTS_ASSUME_STANDARD); - strcat(httrack.mimedefs,"\n"); + strcpybuff(httrack.mimedefs,"\n"); + strcatbuff(httrack.mimedefs,HTS_ASSUME_STANDARD); + strcatbuff(httrack.mimedefs,"\n"); } else { - strcat(httrack.mimedefs,argv[na]); - strcat(httrack.mimedefs,"\n"); + strcatbuff(httrack.mimedefs,argv[na]); + strcatbuff(httrack.mimedefs,"\n"); } a=httrack.mimedefs; while(*a) { @@ -1259,7 +1362,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.lang_iso,argv[na]); + strcpybuff(httrack.lang_iso,argv[na]); } break; // @@ -1276,7 +1379,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.footer,argv[na]); + strcpybuff(httrack.footer,argv[na]); } break; case 'H': // debug headers @@ -1316,6 +1419,81 @@ int main(int argc, char **argv) { } break; + case 'W': // Wrapper callback + // --wrapper check-link=obj.so:check_link + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %W needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; + } else { + char callbackname[128]; + char* a = argv[na + 1]; + char* pos = strchr(a, '='); + na++; + if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { + char* posf = strchr(pos + 1, ':'); + char filename[1024]; + callbackname[0] = '\0'; + strncatbuff(callbackname, a, pos - a); + pos++; + if (posf != NULL && (posf - pos) > 0 && (posf - pos + 2) < sizeof(filename)) { + void* userfunction; + filename[0] = '\0'; + strncatbuff(filename, pos, posf - pos); + posf++; + userfunction = getFunctionPtr(filename, posf); + if (userfunction != NULL) { + if ((void*)htswrap_read(callbackname) != NULL) { + if (htswrap_add(callbackname, userfunction)) { + if (!httrack.quiet) { + set_wrappers(); + if ((void*)htswrap_read(callbackname) == userfunction) { + printf("successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename); + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } else { + HTS_PANIC_PRINTF("Syntax error in option %W : filename error : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; + } + } else { + HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; + } + } + break; + default: { char s[HTS_CDLMAXSIZE]; sprintf(s,"invalid option %%%c\n",*com); @@ -1376,17 +1554,185 @@ int main(int argc, char **argv) { } } break; - + // - case '#': { // non documenté (appel de l'interface) + case '#': { // non documenté com++; switch(*com) { + case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file + { + int hasFilter = 0; + int found = 0; + char* filter=NULL; + cache_back cache; + inthash cache_hashtable=inthash_new(HTS_HASH_SIZE); + int backupXFR = htsMemoryFastXfr; + int sendb = 0; + if (isdigit((unsigned char)*(com+1))) { + sscanf(com+1,"%d",&sendb); + while(isdigit((unsigned char)*(com+1))) com++; + } else sendb=0; + if (!((na+1>=argc) || (argv[na+1][0]=='-'))) { + na++; + hasFilter = 1; + filter=argv[na]; + } + htsMemoryFastXfr = 1; /* fast load */ + + memset(&cache, 0, sizeof(cache_back)); + cache.type=1; // cache? + cache.log=stdout; // log? + cache.errlog=stderr; // err log? + cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper + cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */ + cache.ro = 1; /* read only */ + if (cache.hashtable) { + char adr[HTS_URLMAXSIZE*2]; + char fil[HTS_URLMAXSIZE*2]; + char url[HTS_URLMAXSIZE*2]; + char linepos[256]; + int pos; + char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx")); + cache_init(&cache,&httrack); /* load cache */ + if (cacheNdx != NULL) { + char firstline[256]; + char* a = cacheNdx; + a+=cache_brstr(a, firstline); + a+=cache_brstr(a, firstline); + while ( a != NULL ) { + a=strchr(a+1,'\n'); /* start of line */ + if (a) { + htsblk r; + /* */ + a++; + /* read "host/file" */ + a+=binput(a,adr,HTS_URLMAXSIZE); + a+=binput(a,fil,HTS_URLMAXSIZE); + url[0]='\0'; + if (!link_has_authority(adr)) + strcatbuff(url, "http://"); + strcatbuff(url, adr); + strcatbuff(url, fil); + /* read position */ + a+=binput(a,linepos,200); + sscanf(linepos,"%d",&pos); + if (!hasFilter + || + (strjoker(url, filter, NULL, NULL) != NULL) + ) { + r = cache_read(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data + if (r.statuscode != -1) { // No errors + found++; + if (!hasFilter) { + fprintf(stdout, "%s%s%s\r\n", + (link_has_authority(adr)) ? "" : "http://", + adr, fil); + } else { + char msg[256], cdate[256]; + char sav[HTS_URLMAXSIZE*2]; + infostatuscode(msg, r.statuscode); + time_gmt_rfc822(cdate); + + fprintf(stdout, "HTTP/1.1 %d %s\r\n", + r.statuscode, + r.msg[0] ? r.msg : msg + ); + fprintf(stdout, "X-Host: %s\r\n", adr); + fprintf(stdout, "X-File: %s\r\n", fil); + fprintf(stdout, "X-URL: %s%s%s\r\n", + (link_has_authority(adr)) ? "" : "http://", + adr, fil); + if (url_savename(adr, fil, sav, NULL, NULL, NULL, NULL, + &httrack, NULL, 0, NULL, 0, &cache, NULL, 0, 0)!=-1) { + if (fexist(sav)) { + fprintf(stdout, "Content-location: %s\r\n", sav); + } + } + fprintf(stdout, "Date: %s\r\n", cdate); + fprintf(stdout, "Server: HTTrack Website Copier/"HTTRACK_VERSION"\r\n"); + if (r.lastmodified[0]) { + fprintf(stdout, "Last-Modified: %s\r\n", r.lastmodified); + } + if (r.etag[0]) { + fprintf(stdout, "Etag: %s\r\n", r.etag); + } + if (r.totalsize >= 0) { + fprintf(stdout, "Content-Length: "LLintP"\r\n", r.totalsize); + } + fprintf(stdout, "X-Content-Length: "LLintP"\r\n", (r.size >= 0) ? r.size : (-r.size) ); + if (r.contenttype >= 0) { + fprintf(stdout, "Content-Type: %s\r\n", r.contenttype); + } + if (r.cdispo[0]) { + fprintf(stdout, "Content-Disposition: %s\r\n", r.cdispo); + } + if (r.contentencoding[0]) { + fprintf(stdout, "Content-Encoding: %s\r\n", r.contentencoding); + } + if (r.is_chunk) { + fprintf(stdout, "Transfer-Encoding: chunked\r\n"); + } +#if HTS_USEOPENSSL + if (r.ssl) { + fprintf(stdout, "X-SSL: yes\r\n"); + } +#endif + if (r.is_write) { + fprintf(stdout, "X-Direct-To-Disk: yes\r\n"); + } + if (r.compressed) { + fprintf(stdout, "X-Compressed: yes\r\n"); + } + if (r.notmodified) { + fprintf(stdout, "X-Not-Modified: yes\r\n"); + } + if (r.is_chunk) { + fprintf(stdout, "X-Chunked: yes\r\n"); + } + fprintf(stdout, "\r\n"); + /* Send the body */ + if (sendb && r.adr) { + fprintf(stdout, "%s\r\n", r.adr); + } + } + } + } + } + } + freet(cacheNdx); + } + } + if (!found) { + fprintf(stderr, "No cache entry found%s%s%s\r\n", + (hasFilter)?" for '":"", + (hasFilter)?filter:"", + (hasFilter)?"'":"" + ); + } + htsMemoryFastXfr = backupXFR; + return 0; + } + break; + case 'X': +#ifndef STRDEBUG + fprintf(stderr, "warning: no string debugging support built, option has no effect\n"); +#endif + htsMemoryFastXfr=1; + if (*(com+1)=='0') { htsMemoryFastXfr=0; com++; } + break; + case '~': /* internal lib test */ + { + char thisIsATestYouShouldSeeAnError[12]; + strcpybuff(thisIsATestYouShouldSeeAnError, "0123456789012345678901234567890123456789"); + return 0; + } + break; case 'f': httrack.flush=1; break; case 'h': - printf("HTTrack version "HTTRACK_VERSION"\n"); - exit(1); + printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + return 0; break; - case 'p': httrack.aff_progress=1; break; + case 'p': /* httrack.aff_progress=1; deprecated */ break; case 'S': httrack.shell=1; break; // stdin sur un shell case 'K': httrack.keyboard=1; break; // vérifier stdin // @@ -1458,10 +1804,10 @@ int main(int argc, char **argv) { if (*a == ':') { // un port est présent, <proxy>:port sscanf(a+1,"%d",&httrack.proxy.port); httrack.proxy.name[0]='\0'; - strncat(httrack.proxy.name,argv[na],(int) (a - argv[na])); + strncatbuff(httrack.proxy.name,argv[na],(int) (a - argv[na])); } else { // <proxy> httrack.proxy.port=8080; - strcpy(httrack.proxy.name,argv[na]); + strcpybuff(httrack.proxy.name,argv[na]); } } break; @@ -1478,7 +1824,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.user_agent,argv[na]); + strcpybuff(httrack.user_agent,argv[na]); if (strnotempty(httrack.user_agent)) httrack.user_agent_send=1; else @@ -1499,7 +1845,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.sys_com,argv[na]); + strcpybuff(httrack.sys_com,argv[na]); if (strnotempty(httrack.sys_com)) httrack.sys_com_exec=1; else @@ -1521,10 +1867,10 @@ int main(int argc, char **argv) { } else { // URL/filters char tempo[1024]; - if (strnotempty(url)) strcat(url," "); // espace de séparation - strcpy(tempo,unescape_http_unharm(argv[na],1)); + if (strnotempty(url)) strcatbuff(url," "); // espace de séparation + strcpybuff(tempo,unescape_http_unharm(argv[na],1)); escape_spc_url(tempo); - strcat(url,tempo); + strcatbuff(url,tempo); } // if argv=- etc. } // for @@ -1563,28 +1909,28 @@ int main(int argc, char **argv) { rpath[0]='\0'; if (c != httrack.path_html) { if (httrack.path_html[0]!='/') - strcat(rpath,"./"); - strncat(rpath,httrack.path_html,(int) (c - httrack.path_html)); + strcatbuff(rpath,"./"); + strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html)); } { char tmp[1024]; - strcpy(tmp,c); strcpy(httrack.path_html,tmp); - strcpy(tmp,d); strcpy(httrack.path_log,tmp); + strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp); + strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp); } } else { - strcpy(rpath,"./"); - strcpy(httrack.path_html,"/"); - strcpy(httrack.path_log,"/"); + strcpybuff(rpath,"./"); + strcpybuff(httrack.path_html,"/"); + strcpybuff(httrack.path_log,"/"); } if (rpath[0]) { printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log); if (chroot(rpath)) { printf("ERROR! Can not chroot to %s!\n",rpath); - exit(0); + return -1; } if (chdir("/")) { /* new root */ printf("ERROR! Can not chdir to %s!\n",rpath); - exit(0); + return -1; } } else printf("WARNING: chroot not possible with these paths\n"); @@ -1668,6 +2014,9 @@ int main(int argc, char **argv) { if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) remove(fconcat(httrack.path_log,"hts-err.txt")); + /* Check FS directory structure created */ + structcheck(httrack.path_log); + httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w"); if (httrack_logmode==2) httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w"); @@ -1705,7 +2054,7 @@ int main(int argc, char **argv) { if (fp) { fprintf(fp,"What's in this folder?"LF); fprintf(fp,""LF); - fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION""LF); + fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, WHAT_is_available); fprintf(fp,"and is used for updating this website."LF); fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF); fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF); @@ -1732,8 +2081,8 @@ int main(int argc, char **argv) { }*/ // vérifier existence de la structure - structcheck(httrack.path_html); - structcheck(httrack.path_log); + structcheck(fconcat(httrack.path_html, "/")); + structcheck(fconcat(httrack.path_log, "/")); // reprise/update if (httrack.cache) { @@ -1799,7 +2148,9 @@ int main(int argc, char **argv) { // fichier log if (httrack.log) { int i; - fprintf(httrack.log,"HTTrack"HTTRACK_VERSION" launched on %s at %s"LF,t,url); + fprintf(httrack.log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF, + WHAT_is_available, + t, url); fprintf(httrack.log,"("); for(i=0;i<argc;i++) { if ((strchr(argv[i],' ')==NULL) || (strchr(argv[i],'\"'))) @@ -1814,8 +2165,8 @@ int main(int argc, char **argv) { fprintf(httrack.log,LF); } - if (httrack_logmode) { - printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS""LF,t); + if (httrack_logmode) { + printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,WHAT_is_available); if (httrack.wizard==0) { printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode); } else { // the magic wizard @@ -1877,7 +2228,7 @@ deprecated - see SIGCHLD if (httrack.dir_topindex) { char rpath[1024*2]; char* a; - strcpy(rpath,httrack.path_html); + strcpybuff(rpath,httrack.path_html); if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') rpath[strlen(rpath)-1]='\0'; @@ -1885,7 +2236,7 @@ deprecated - see SIGCHLD a=strrchr(rpath,'/'); if (a) { *a='\0'; - hts_buildtopindex(rpath,httrack.path_bin); + hts_buildtopindex(&httrack,rpath,httrack.path_bin); if (httrack.log) { fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF); } @@ -1931,7 +2282,7 @@ deprecated - see SIGCHLD // WSACleanup(); // ** non en cas de thread tjs présent!.. #endif #endif -#if HTS_TRACE_MALLOC +#ifdef HTS_TRACE_MALLOC hts_freeall(); #endif @@ -1968,9 +2319,9 @@ int check_path(char* s,char* defaultname) { char* a=strchr(defaultname,'#'); // we never know.. if (a) *a='\0'; tempo[0]='\0'; - strncat(tempo,s,i-1); - strcat(tempo,defaultname); - strcpy(s,tempo); + strncatbuff(tempo,s,i-1); + strcatbuff(tempo,defaultname); + strcpybuff(s,tempo); } else s[0]='\0'; // Clear path (no name/default url given) return_value=1; // expanded @@ -1980,7 +2331,7 @@ int check_path(char* s,char* defaultname) { // ending / if (strnotempty(s)) if (s[strlen(s)-1]!='/') // ajouter slash à la fin - strcat(s,"/"); + strcatbuff(s,"/"); return return_value; } |