From 660b569b0980fc8f71b03ed666dd02eec8388b4c Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Mon, 19 Mar 2012 12:59:03 +0000 Subject: httrack 3.41.2 --- src/Makefile.am | 29 +- src/Makefile.in | 52 +- src/hts-indextmpl.h | 12 +- src/htsalias.c | 15 +- src/htsalias.h | 2 +- src/htsback.c | 1339 +++++++++++++++++++++++++----------------- src/htsback.h | 64 ++- src/htsbase.h | 48 +- src/htsbasenet.h | 80 ++- src/htsbauth.c | 52 +- src/htsbauth.h | 22 +- src/htscache.c | 493 ++++++++-------- src/htscache.h | 41 +- src/htscatchurl.c | 10 +- src/htscore.c | 1439 +++++++++++++++++++--------------------------- src/htscore.h | 349 +++++------ src/htscoremain.c | 1227 ++++++++++++++++----------------------- src/htscoremain.h | 8 +- src/htsdefines.h | 244 +++++--- src/htsfilters.c | 2 +- src/htsfilters.h | 5 +- src/htsftp.c | 207 +++---- src/htsftp.h | 31 +- src/htsglobal.h | 118 ++-- src/htshash.c | 8 +- src/htshash.h | 11 +- src/htshelp.c | 47 +- src/htshelp.h | 13 +- src/htsindex.c | 22 +- src/htsindex.h | 5 +- src/htsinthash.c | 10 +- src/htsinthash.h | 52 +- src/htsjava.c | 359 +++++++----- src/htsjava.h | 30 +- src/htslib.c | 1248 ++++++++++++++++++++++++++-------------- src/htslib.h | 343 +++++++---- src/htsmd5.c | 12 +- src/htsmd5.h | 2 +- src/htsmms.c | 69 +-- src/htsmms.h | 25 +- src/htsmodules.c | 271 ++++----- src/htsmodules.h | 54 +- src/htsname.c | 228 ++++---- src/htsname.h | 45 +- src/htsnet.h | 27 +- src/htsnostatic.c | 264 --------- src/htsnostatic.h | 278 --------- src/htsopt.h | 261 +++++++-- src/htsparse.c | 985 +++++++++++++++---------------- src/htsparse.h | 72 ++- src/htsrobots.c | 4 +- src/htsrobots.h | 8 +- src/htsserver.c | 360 ++++++------ src/htsserver.h | 77 ++- src/htsstrings.h | 298 +++++++--- src/htssystem.h | 1 - src/htsthread.c | 200 +++---- src/htsthread.h | 88 ++- src/htstools.c | 234 +++++--- src/htstools.h | 61 +- src/htsweb.c | 185 +++--- src/htsweb.h | 48 +- src/htswizard.c | 146 ++--- src/htswizard.h | 16 +- src/htswrap.c | 30 +- src/htswrap.h | 22 +- src/htszlib.c | 17 +- src/htszlib.h | 2 +- src/httrack-library.h | 219 +++---- src/httrack.c | 377 +++++++----- src/httrack.h | 50 +- src/httrack.vcproj | 253 ++++++++ src/mmsrip/mms.h | 4 +- src/proxy/changelog.txt | 4 + src/proxy/main.c | 29 +- src/proxy/proxystrings.h | 114 ++-- src/proxy/proxytrack.c | 202 +++---- src/proxy/proxytrack.h | 172 ++++-- src/proxy/store.c | 822 ++++++++++++++++++++++++-- src/proxy/store.h | 16 +- src/webhttrack | 6 +- 81 files changed, 8140 insertions(+), 6555 deletions(-) delete mode 100644 src/htsnostatic.c delete mode 100644 src/htsnostatic.h delete mode 100644 src/htssystem.h create mode 100755 src/httrack.vcproj (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 587535a..c609b07 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,16 +1,17 @@ #SUBDIRS = swf DevIncludesdir = $(includedir)/httrack -DevIncludes_DATA = httrack-library.h \ +DevIncludes_DATA = \ + httrack-library.h \ htsglobal.h \ htsopt.h \ htswrap.h \ - htssystem.h \ htsconfig.h \ ../config.h \ htsmodules.h \ htsbasenet.h \ - htsbauth.h + htsbauth.h \ + htsdefines.h INCLUDES = \ @DEFAULT_CFLAGS@ \ @@ -29,7 +30,7 @@ htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack proxytrack_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) proxytrack_CFLAGS = $(AM_CFLAGS) -DNO_MALLOCT -lib_LTLIBRARIES = libhttrack.la +lib_LTLIBRARIES = libhttrack.la libhtsjava.la htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h proxytrack_SOURCES = proxy/main.c \ @@ -42,10 +43,10 @@ whttrackrun_SCRIPTS = webhttrack libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ - htshelp.c htsjava.c htslib.c htscoremain.c \ + htshelp.c htslib.c htscoremain.c \ htsname.c htsrobots.c htstools.c htswizard.c \ htsalias.c htsthread.c htsindex.c htsbauth.c \ - htsmd5.c htszlib.c htsnostatic.c htswrap.c \ + htsmd5.c htszlib.c htswrap.c \ htsmodules.c \ md5.c \ htsmms.c \ @@ -55,22 +56,23 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ - htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \ - htsmodules.h htsname.h htsnet.h htsnostatic.h \ - htsopt.h htsrobots.h htssystem.h htsthread.h \ + htshelp.h htsindex.h htslib.h htsmd5.h \ + htsmodules.h htsname.h htsnet.h \ + htsopt.h htsrobots.h htsthread.h \ htstools.h htswizard.h htswrap.h htszlib.h \ htsstrings.h httrack-library.h \ md5.h \ htsmms.h \ minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h \ mmsrip/error.h mmsrip/mms.h - libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS) libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) +libhtsjava_la_SOURCES = htsjava.c htsjava.h +libhtsjava_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) +libhtsjava_la_LDFLAGS = -version-info $(VERSION_INFO) + EXTRA_DIST = httrack.h webhttrack \ - httrack.dsp httrack.dsw \ - webhttrack.dsp webhttrack.dsw \ minizip/ChangeLogUnzip \ minizip/iowin32.c \ minizip/iowin32.h \ @@ -86,4 +88,5 @@ EXTRA_DIST = httrack.h webhttrack \ proxy/changelog.txt \ proxy/proxystrings.h \ proxy/proxytrack.h \ - proxy/store.h + proxy/store.h \ + *.dsw *.dsp *.vcproj diff --git a/src/Makefile.in b/src/Makefile.in index 888071a..b22b2fb 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -144,16 +144,17 @@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ DevIncludesdir = $(includedir)/httrack -DevIncludes_DATA = httrack-library.h \ +DevIncludes_DATA = \ + httrack-library.h \ htsglobal.h \ htsopt.h \ htswrap.h \ - htssystem.h \ htsconfig.h \ ../config.h \ htsmodules.h \ htsbasenet.h \ - htsbauth.h + htsbauth.h \ + htsdefines.h INCLUDES = \ @@ -174,7 +175,7 @@ htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack proxytrack_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) proxytrack_CFLAGS = $(AM_CFLAGS) -DNO_MALLOCT -lib_LTLIBRARIES = libhttrack.la +lib_LTLIBRARIES = libhttrack.la libhtsjava.la htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h proxytrack_SOURCES = proxy/main.c \ @@ -188,10 +189,10 @@ whttrackrun_SCRIPTS = webhttrack libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ - htshelp.c htsjava.c htslib.c htscoremain.c \ + htshelp.c htslib.c htscoremain.c \ htsname.c htsrobots.c htstools.c htswizard.c \ htsalias.c htsthread.c htsindex.c htsbauth.c \ - htsmd5.c htszlib.c htsnostatic.c htswrap.c \ + htsmd5.c htszlib.c htswrap.c \ htsmodules.c \ md5.c \ htsmms.c \ @@ -201,9 +202,9 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ - htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \ - htsmodules.h htsname.h htsnet.h htsnostatic.h \ - htsopt.h htsrobots.h htssystem.h htsthread.h \ + htshelp.h htsindex.h htslib.h htsmd5.h \ + htsmodules.h htsname.h htsnet.h \ + htsopt.h htsrobots.h htsthread.h \ htstools.h htswizard.h htswrap.h htszlib.h \ htsstrings.h httrack-library.h \ md5.h \ @@ -211,13 +212,14 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h \ mmsrip/error.h mmsrip/mms.h - libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS) libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) +libhtsjava_la_SOURCES = htsjava.c htsjava.h +libhtsjava_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) +libhtsjava_la_LDFLAGS = -version-info $(VERSION_INFO) + EXTRA_DIST = httrack.h webhttrack \ - httrack.dsp httrack.dsw \ - webhttrack.dsp webhttrack.dsw \ minizip/ChangeLogUnzip \ minizip/iowin32.c \ minizip/iowin32.h \ @@ -233,7 +235,8 @@ EXTRA_DIST = httrack.h webhttrack \ proxy/changelog.txt \ proxy/proxystrings.h \ proxy/proxytrack.h \ - proxy/store.h + proxy/store.h \ + *.dsw *.dsp *.vcproj subdir = src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -242,14 +245,17 @@ CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = LTLIBRARIES = $(lib_LTLIBRARIES) +libhtsjava_la_DEPENDENCIES = +am_libhtsjava_la_OBJECTS = htsjava.lo +libhtsjava_la_OBJECTS = $(am_libhtsjava_la_OBJECTS) libhttrack_la_DEPENDENCIES = am_libhttrack_la_OBJECTS = htscore.lo htsparse.lo htsback.lo htscache.lo \ htscatchurl.lo htsfilters.lo htsftp.lo htshash.lo htsinthash.lo \ - htshelp.lo htsjava.lo htslib.lo htscoremain.lo htsname.lo \ - htsrobots.lo htstools.lo htswizard.lo htsalias.lo htsthread.lo \ - htsindex.lo htsbauth.lo htsmd5.lo htszlib.lo htsnostatic.lo \ - htswrap.lo htsmodules.lo md5.lo htsmms.lo ioapi.lo mztools.lo \ - unzip.lo zip.lo error.lo mms.lo + htshelp.lo htslib.lo htscoremain.lo htsname.lo htsrobots.lo \ + htstools.lo htswizard.lo htsalias.lo htsthread.lo htsindex.lo \ + htsbauth.lo htsmd5.lo htszlib.lo htswrap.lo htsmodules.lo \ + md5.lo htsmms.lo ioapi.lo mztools.lo unzip.lo zip.lo error.lo \ + mms.lo libhttrack_la_OBJECTS = $(am_libhttrack_la_OBJECTS) bin_PROGRAMS = proxytrack$(EXEEXT) httrack$(EXEEXT) htsserver$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) @@ -288,7 +294,6 @@ am__depfiles_maybe = depfiles @AMDEP_TRUE@ ./$(DEPDIR)/htsjava.Plo ./$(DEPDIR)/htslib.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsmd5.Plo ./$(DEPDIR)/htsmms.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsmodules.Plo ./$(DEPDIR)/htsname.Plo \ -@AMDEP_TRUE@ ./$(DEPDIR)/htsnostatic.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsparse.Plo ./$(DEPDIR)/htsrobots.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsserver.Po ./$(DEPDIR)/htsthread.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htstools.Plo ./$(DEPDIR)/htsweb.Po \ @@ -314,12 +319,12 @@ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ CCLD = $(CC) LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ -DIST_SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c \ - $(proxytrack_SOURCES) +DIST_SOURCES = $(libhtsjava_la_SOURCES) $(libhttrack_la_SOURCES) \ + $(htsserver_SOURCES) httrack.c $(proxytrack_SOURCES) DATA = $(DevIncludes_DATA) DIST_COMMON = $(srcdir)/Makefile.in Makefile.am -SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c $(proxytrack_SOURCES) +SOURCES = $(libhtsjava_la_SOURCES) $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c $(proxytrack_SOURCES) all: all-am @@ -358,6 +363,8 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done +libhtsjava.la: $(libhtsjava_la_OBJECTS) $(libhtsjava_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libhtsjava_la_LDFLAGS) $(libhtsjava_la_OBJECTS) $(libhtsjava_la_LIBADD) $(LIBS) libhttrack.la: $(libhttrack_la_OBJECTS) $(libhttrack_la_DEPENDENCIES) $(LINK) -rpath $(libdir) $(libhttrack_la_LDFLAGS) $(libhttrack_la_OBJECTS) $(libhttrack_la_LIBADD) $(LIBS) binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) @@ -445,7 +452,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmms.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmodules.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsname.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsnostatic.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsparse.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsrobots.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsserver.Po@am__quote@ diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h index b9aff10..24706d5 100644 --- a/src/hts-indextmpl.h +++ b/src/hts-indextmpl.h @@ -174,7 +174,7 @@ regen: "
"LF\ "
"LF\ "
"LF\ - " Mirror and index made by HTTrack Website Copier [XR&CO'2006]"LF\ + " Mirror and index made by HTTrack Website Copier [XR&CO'2007]"LF\ "
"LF\ " %s"LF\ " "LF\ @@ -193,7 +193,7 @@ regen: ""LF\ ""LF\ " "LF\ - " "LF\ + " "LF\ " "LF\ "
© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.
"LF\ ""LF\ @@ -324,7 +324,7 @@ regen: " "LF\ "
"LF\ "
"LF\ - " Mirror and index made by HTTrack Website Copier [XR&CO'2006]"LF\ + " Mirror and index made by HTTrack Website Copier [XR&CO'2007]"LF\ "
"LF\ " %s"LF\ " "LF\ @@ -342,7 +342,7 @@ regen: ""LF\ ""LF\ " "LF\ - " "LF\ + " "LF\ " "LF\ "
© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.
"LF\ ""LF\ @@ -483,7 +483,7 @@ regen: ""LF\ ""LF\ " "LF\ - " "LF\ + " "LF\ " "LF\ "
© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.
"LF\ ""LF\ @@ -620,7 +620,7 @@ regen: ""LF\ ""LF\ " "LF\ - " "LF\ + " "LF\ " "LF\ "
© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.
"LF\ ""LF\ diff --git a/src/htsalias.c b/src/htsalias.c index c6bfef4..5845837 100644 --- a/src/htsalias.c +++ b/src/htsalias.c @@ -66,7 +66,7 @@ void hts_lowcase(char* s); } \ argv[0]=(buff+ptr); \ strcpybuff(argv[0],token); \ - ptr += (strlen(argv[0])+1); \ + ptr += (int) (strlen(argv[0])+1); \ argc++ // END OF COPY OF cmdl_ins in htsmain.c @@ -172,6 +172,7 @@ const char* hts_optalias[][4] = { {"display","-%v","single","show files transfered and other funny realtime information"}, {"dos83","-L0","single",""}, {"iso9660","-L2","single",""}, + {"disable-module","-%w","param1",""}, /* */ /* DEPRECATED */ @@ -193,7 +194,7 @@ const char* hts_optalias[][4] = { {"advanced-maxlinks","-#L","single",""}, {"advanced-progressinfo","-#p","single","deprecated"}, {"catch-url","-#P","single","catch complex URL through proxy"}, - {"debug-oldftp","-#R","single",""}, + /*{"debug-oldftp","-#R","single",""},*/ {"debug-xfrstats","-#T","single",""}, {"advanced-wait","-#u","single",""}, {"debug-ratestats","-#Z","single",""}, @@ -544,11 +545,11 @@ char* hts_gethome(void) { } /* Convert ~/foo into /home/smith/foo */ -void expand_home(char* str) { - if (str[0] == '~') { +void expand_home(String *str) { + if (StringSub(*str, 1) == '~') { char BIGSTK tempo[HTS_URLMAXSIZE*2]; - strcpybuff(tempo,hts_gethome()); - strcatbuff(tempo,str+1); - strcpybuff(str,tempo); + strcpybuff(tempo, hts_gethome()); + strcatbuff(tempo, StringBuff(*str) + 1); + StringCopy(*str, tempo); } } diff --git a/src/htsalias.h b/src/htsalias.h index 21c3142..bf52f3b 100644 --- a/src/htsalias.h +++ b/src/htsalias.h @@ -55,7 +55,7 @@ const char* optalias_value(int p); const char* opttype_value(int p); const char* opthelp_value(int p); char* hts_gethome(void); -void expand_home(char* str); +void expand_home(String *str); #endif #endif diff --git a/src/htsback.c b/src/htsback.c index 8a9aac5..2f06b09 100644 --- a/src/htsback.c +++ b/src/htsback.c @@ -38,16 +38,16 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE -#include "htsback.h" - /* specific definitions */ -#include "htsbase.h" #include "htsnet.h" +#include "htscore.h" #include "htsthread.h" #include /* END specific definitions */ -//#if HTS_WIN +#include "htsback.h" + +//#ifdef _WIN32 #include "htsftp.h" #if HTS_USEZLIB #include "htszlib.h" @@ -56,7 +56,7 @@ Please visit our Website: http://www.httrack.com #endif //#endif -#if HTS_WIN +#ifdef _WIN32 #ifndef __cplusplus // DOS #ifndef _WIN32_WCE @@ -71,22 +71,28 @@ Please visit our Website: http://www.httrack.com #endif #undef test_flush -#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } } +#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->log) { fflush(opt->log); } } #define VT_CLREOL "\33[K" +/* Slot operations */ +static int slot_can_be_cached_on_disk(const lien_back* back); +static int slot_can_be_cleaned(const lien_back* back); +static int slot_can_be_finalized(httrackp* opt, const lien_back* back); + struct_back* back_new(int back_max) { int i; struct_back* sback = calloct(1, sizeof(struct_back)); sback->count = back_max; sback->lnk = (lien_back*) calloct((back_max + 1), sizeof(lien_back)); - sback->ready = inthash_new(8191); + sback->ready = inthash_new(32767); + sback->ready_size_bytes = 0; inthash_value_is_malloc(sback->ready, 1); // init for(i = 0 ; i < sback->count ; i++){ sback->lnk[i].r.location = sback->lnk[i].location_buffer; - sback->lnk[i].status = -1; + sback->lnk[i].status = STATUS_FREE; sback->lnk[i].r.soc = INVALID_SOCKET; } return sback; @@ -99,7 +105,8 @@ void back_free(struct_back** sback) { (*sback)->lnk = NULL; } if ((*sback)->ready != NULL) { - inthash_delete((inthash *)&(*sback)->ready); + inthash_delete(&(*sback)->ready); + (*sback)->ready_size_bytes = 0; } freet(*sback); *sback = NULL; @@ -115,15 +122,23 @@ void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback) { } // delete stored slots if (sback->ready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { - struct_back back1; - back1.count = 1; - back1.lnk = (lien_back*) item->value.ptr; - back1.ready = NULL; - back_delete(opt, cache, &back1, 0); +#ifndef HTS_NO_BACK_ON_DISK + char *filename = (char*) item->value.ptr; + if (filename != NULL) { + (void) unlink(filename); + } +#else + /* clear entry content (but not yet the entry) */ + lien_back *back = (lien_back*) item->value.ptr; + back_clear_entry(back); +#endif } + /* delete hashtable & content */ + inthash_delete(&sback->ready); + sback->ready_size_bytes = 0; } } } @@ -131,64 +146,113 @@ void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback) { // --- // routines de backing -static int back_index_ready(struct_back* sback, char* adr, char* fil, char* sav, int getIndex); -static int back_index_fetch(struct_back* sback, char* adr, char* fil, char* sav, int getIndex); +static int back_index_ready(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex); +static int back_index_fetch(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex); // retourne l'index d'un lien dans un tableau de backing -int back_index(struct_back* sback,char* adr,char* fil,char* sav) { - return back_index_fetch(sback, adr, fil, sav, 1); +int back_index(httrackp* opt, struct_back* sback,char* adr,char* fil,char* sav) { + return back_index_fetch(opt,sback, adr, fil, sav, 1); } -static int back_index_fetch(struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { +static int back_index_fetch(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { lien_back* const back = sback->lnk; const int back_max = sback->count; - int i=0; int index=-1; - while( i < back_max ) { - if (back[i].status>=0) // réception OU prêt - if (strfield2(back[i].url_adr,adr)) { - if (strcmp(back[i].url_fil,fil)==0) { - if (index==-1) /* first time we meet, store it */ - index=i; - else if (sav != NULL && strcmp(back[i].url_sav, sav) == 0) { /* oops, check sav too */ - index=i; - return index; - } - } + int i; + for( i = 0 ; i < back_max ; i++ ) { + if (back[i].status >= 0 /* not free or alive */ + && strfield2(back[i].url_adr,adr) + && strcmp(back[i].url_fil,fil)==0) + { + if (index==-1) /* first time we meet, store it */ + index=i; + else if (sav != NULL && strcmp(back[i].url_sav, sav) == 0) { /* oops, check sav too */ + index=i; + return index; } - i++; + } } // not found in fast repository - search in the storage hashtable if (index == -1 && sav != NULL) { - index = back_index_ready(sback, adr, fil, sav, getIndex); + index = back_index_ready(opt, sback, adr, fil, sav, getIndex); } return index; } -static int back_index_ready(struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { +/* resurrect stored entry */ +static int back_index_ready(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { lien_back* const back = sback->lnk; - const int back_max = sback->count; - int index=-1; void* ptr = NULL; - if (inthash_read_pvoid((inthash)sback->ready, sav, &ptr)) { - lien_back* itemback = (lien_back*) ptr; - if (itemback != NULL) { - if (!getIndex) { - return sback->count; // positive (but invalid) result + if (inthash_read_pvoid(sback->ready, sav, &ptr)) { + if (!getIndex) { /* don't "pagefault" the entry */ + if (ptr != NULL) { + return sback->count; /* (invalid but) positive result */ + } else { + return -1; /* not found */ + } + } else if (ptr != NULL) { + lien_back* itemback = NULL; +#ifndef HTS_NO_BACK_ON_DISK + FILE *fp; + char* fileback = (char*) ptr; + char catbuff[CATBUFF_SIZE]; + if (( fp = fopen(fconv(catbuff, fileback), "rb") ) != NULL ) { + if (back_unserialize(fp, &itemback) != 0) { + if (itemback != NULL) { + back_clear_entry(itemback); + freet(itemback); + itemback = NULL; + } + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: unserialize error for %s%s (%s): %s"LF,adr,fil,sav,strerror(last_errno)); + test_flush; + } + } + fclose(fp); } else { - // move from hashtable to fast repository - int q = back_search_quick(sback); - if (q != -1) { - deletehttp(&back[q].r); // security check - back_move(itemback, &back[q]); - inthash_remove((inthash)sback->ready, sav); // delete item - back[q].locked = 1; /* locked */ - index = q; + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: unserialize error for %s%s (%s), file disappeared: %s"LF,adr,fil,sav,strerror(last_errno)); + test_flush; } } - } + (void) unlink(fileback); +#else + itemback = (lien_back*) ptr; +#endif + if (itemback != NULL) { + // move from hashtable to fast repository + int q = back_search(opt, sback); + if (q != -1) { + deletehttp(&back[q].r); // security check + back_move(itemback, &back[q]); + back_clear_entry(itemback); /* delete entry content */ + freet(itemback); /* delete item */ + itemback = NULL; + inthash_remove(sback->ready, sav); // delete item + sback->ready_size_bytes -= back[q].r.size; /* substract for stats */ + back_set_locked(sback, q); /* locked */ + return q; + } else { + if (opt->log != NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: unserialize error for %s%s (%s): no more space to wakeup frozen slots"LF,adr,fil,sav); + test_flush; + } + } + } + } } - return index; + return -1; +} + +static int slot_can_be_cached_on_disk(const lien_back* back) { + return + (back->status == STATUS_READY && back->locked == 0 + && back->url_sav[0] != '\0' + && strcmp(back->url_sav, BACK_ADD_TEST) != 0 + ); + /* Note: not checking !IS_DELAYED_EXT(back->url_sav) or it will quickly cause the slots to be filled! */ } /* Put all backing entries that are ready in the storage hashtable to spare space and CPU */ @@ -199,19 +263,15 @@ int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback) int i; for( i = 0 ; i < back_max ; i++ ) { // ready, not locked and suitable - if (back[i].status == 0 && back[i].locked == 0 - && back[i].url_sav[0] != '\0' - && strcmp(back[i].url_sav, BACK_ADD_TEST) != 0 - && !IS_DELAYED_EXT(back[i].url_sav) - ) - { - lien_back* itemback = calloct(1, sizeof(lien_back)); + if (slot_can_be_cached_on_disk(&back[i])) { +#ifdef HTS_NO_BACK_ON_DISK + lien_back* itemback; +#endif /* Security check */ - int checkIndex = back_index_ready(sback, back[i].url_adr, back[i].url_fil, back[i].url_sav, 1); + int checkIndex = back_index_ready(opt, sback, back[i].url_adr, back[i].url_fil, back[i].url_sav, 1); if (checkIndex != -1) { if (opt->log) { - fspc(opt->log,"warning"); - fprintf(opt->log,"engine: unexpected duplicate file entry: %s%s -> %s (%d '%s') / %s%s -> %s (%d '%s')"LF, + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"engine: unexpected duplicate file entry: %s%s -> %s (%d '%s') / %s%s -> %s (%d '%s')"LF, back[checkIndex].url_adr, back[checkIndex].url_fil, back[checkIndex].url_sav, back[checkIndex].r.statuscode, back[checkIndex].r.msg, back[i].url_adr, back[i].url_fil, back[i].url_sav, back[i].r.statuscode, back[i].r.msg ); @@ -222,10 +282,67 @@ int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback) /* This should NOT happend! */ { int duplicateEntryInBacklog = 1; assertf(!duplicateEntryInBacklog); } #endif - } + } +#ifndef HTS_NO_BACK_ON_DISK + /* temporarily serialize the entry on disk */ + { + int fsz = (int) strlen(back[i].url_sav); + char *filename = malloc(fsz + 8 + 1); + if (filename != NULL) { + FILE *fp; + if (opt->getmode != 0) { + sprintf(filename, "%s.tmp", back[i].url_sav); + } else { + sprintf(filename, "%stmpfile%d.tmp", StringBuff(opt->path_html), opt->state.tmpnameid++); + } + /* Security check */ + if (fexist(filename)) { + if (opt->log != NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: temporary file %s already exists"LF, filename); + test_flush; + } + } + /* Create file and serialize slot */ + if ((fp = filecreate(NULL, filename)) != NULL) + { + if (back_serialize(fp, &back[i]) == 0) + { + inthash_add_pvoid(sback->ready, back[i].url_sav, filename); + filename = NULL; + sback->ready_size_bytes += back[i].r.size; /* add for stats */ + nclean++; + back_clear_entry(&back[i]); /* entry is now recycled */ + } else { + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: write error: %s"LF,back[i].url_adr,back[i].url_fil,filename,strerror(last_errno)); + test_flush; + } + } + fclose(fp); + } else { + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: open error: %s (%s, %s)"LF, back[i].url_adr, back[i].url_fil, filename, strerror(last_errno), dir_exists(filename) ? "directory exists" : "directory does NOT exist!", fexist(filename) ? "file already exists!" : "file does not exist"); + test_flush; + } + } + if (filename != NULL) + free(filename); + } else { + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: memory full: %s"LF,back[i].url_adr,back[i].url_fil,filename,strerror(last_errno)); + test_flush; + } + } + } +#else + itemback = calloct(1, sizeof(lien_back)); back_move(&back[i], itemback); - inthash_add_pvoid((inthash)sback->ready, itemback->url_sav, itemback); + inthash_add_pvoid(sback->ready, itemback->url_sav, itemback); nclean++; +#endif } } return nclean; @@ -238,7 +355,7 @@ int back_available(struct_back* sback) { int i; int nb=0; for(i=0;iready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { lien_back* ritem = (lien_back*) item->value.ptr; @@ -264,6 +382,7 @@ LLint back_incache(struct_back* sback) { sum+=max(ritem->r.size,ritem->r.totalsize); } } +#endif return sum; } @@ -272,27 +391,31 @@ int back_done_incache(struct_back* sback) { lien_back* const back = sback->lnk; const int back_max = sback->count; int i; - int n=0; - for(i=0;iready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); +#ifndef HTS_NO_BACK_ON_DISK + n += inthash_nitems(sback->ready); +#else + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { lien_back* ritem = (lien_back*) item->value.ptr; - if (ritem->status==0) + if (ritem->status==STATUS_READY) n++; } +#endif } return n; } // le lien a-t-il été mis en backing? -HTS_INLINE int back_exist(struct_back* sback,char* adr,char* fil,char* sav) { - return (back_index_fetch(sback, adr, fil, sav, /*don't fetch*/0) >= 0); +HTS_INLINE int back_exist(struct_back* sback,httrackp* opt,char* adr,char* fil,char* sav) { + return (back_index_fetch(opt, sback, adr, fil, sav, /*don't fetch*/0) >= 0); } // nombre de sockets en tâche de fond @@ -313,7 +436,7 @@ int back_nsoc_overall(struct_back* sback) { int n=0; int i; for(i=0;i 0 || back[i].status == -103) + if (back[i].status > 0 || back[i].status == STATUS_ALIVE) n++; return n; @@ -324,6 +447,7 @@ int back_nsoc_overall(struct_back* sback) { // fermer les paramètres de transfert, // et notamment vérifier les fichiers compressés (décompresser), callback etc. int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { + char catbuff[CATBUFF_SIZE]; lien_back* const back = sback->lnk; const int back_max = sback->count; assertf(p >= 0 && p < back_max); @@ -338,9 +462,9 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } if ( - (back[p].status == 0) // ready + (back[p].status == STATUS_READY) // ready && - (back[p].r.statuscode>0) // not internal error + (back[p].r.statuscode > 0) // not internal error ) { if (!back[p].testmode) { // not test mode @@ -361,7 +485,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { back[p].r.out=fopen(back[p].tmpfile,"wb"); if (back[p].r.out) { if ((back[p].r.adr) && (back[p].r.size>0)) { - if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) { + if (fwrite(back[p].r.adr,1,(size_t)back[p].r.size,back[p].r.out) != back[p].r.size) { back[p].r.statuscode=STATUSCODE_INVALID; strcpybuff(back[p].r.msg,"Write error when decompressing"); } @@ -386,8 +510,8 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') { if (back[p].url_sav[0]) { LLint size; - file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 1, 1, back[p].r.notmodified); - filecreateempty(back[p].url_sav); // filenote & co + file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 1, 1, back[p].r.notmodified); + filecreateempty(&opt->state.strc, back[p].url_sav); // filenote & co if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) { back[p].r.size=back[p].r.totalsize=size; // fichier -> mémoire @@ -426,10 +550,10 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { REAL MEDIA HACK Check if we have to load locally the file ************************************************************************ */ - if (back[p].r.statuscode == 200) { // OK (ou 304 en backing) + if (back[p].r.statuscode == HTTP_OK) { // OK (ou 304 en backing) if (back[p].r.is_write) { // Written file - if (may_be_hypertext_mime(back[p].r.contenttype, back[p].url_fil)) { // to parse! - LLint sz; + if (may_be_hypertext_mime(opt,back[p].r.contenttype, back[p].url_fil)) { // to parse! + off_t sz; sz=fsize(back[p].url_sav); if (sz>0) { // ok, exists! if (sz < 8192) { // ok, small file --> to parse! @@ -437,7 +561,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { if (fp) { back[p].r.adr=malloct((int)sz + 2); if (back[p].r.adr) { - if (fread(back[p].r.adr,1,(INTsys)sz,fp) == sz) { + if (fread(back[p].r.adr,1,sz,fp) == sz) { back[p].r.size=sz; back[p].r.adr[sz] = '\0'; back[p].r.is_write = 0; /* not anymore a direct-to-disk file */ @@ -451,7 +575,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { fclose(fp); fp=NULL; // remove (temporary) file! - unlink(fconv(back[p].url_sav)); + unlink(fconv(catbuff,back[p].url_sav)); } if (fp) fclose(fp); @@ -512,7 +636,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize); fprintf(cache->txt,"\t%s\t",flags); } - if (back[p].r.statuscode == 200) { + if (back[p].r.statuscode == HTTP_OK) { if (back[p].r.size>=0) { if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) { HTS_STAT.stat_bytes+=back[p].r.size; @@ -521,7 +645,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { if ( (!back[p].r.notmodified) && (opt->is_update) ) { HTS_STAT.stat_updated_files++; // page modifiée if (opt->log!=NULL) { - fspc(opt->log,"info"); + HTS_LOG(opt,LOG_INFO); if (back[p].is_update) { fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); } else { @@ -538,7 +662,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); test_flush; } if (cache->txt) { @@ -550,7 +674,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); test_flush; } if (cache->txt) { @@ -559,7 +683,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); } if (cache->txt) { state="error"; @@ -575,11 +699,11 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { "(from %s%s%s)" LF, back[p].r.statuscode, - state, escape_check_url_addr(back[p].r.msg), - escape_check_url_addr(back[p].r.contenttype), - ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)), - (link_has_authority(back[p].url_adr) ? "" : "http://"),escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav), - (link_has_authority(back[p].referer_adr) || !back[p].referer_adr[0]) ? "" : "http://",escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil) + state, escape_check_url_addr(OPT_GET_BUFF(opt),back[p].r.msg), + escape_check_url_addr(OPT_GET_BUFF(opt),back[p].r.contenttype), + ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr(OPT_GET_BUFF(opt),(back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)), + (link_has_authority(back[p].url_adr) ? "" : "http://"),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].url_adr),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].url_fil),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].url_sav), + (link_has_authority(back[p].referer_adr) || !back[p].referer_adr[0]) ? "" : "http://",escape_check_url_addr(OPT_GET_BUFF(opt),back[p].referer_adr),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].referer_fil) ); if (opt->flush) fflush(cache->txt); @@ -591,23 +715,21 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } else { if (!HTTP_IS_OK(back[p].r.statuscode)) { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"redirect to %s%s"LF,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"redirect to %s%s"LF,back[p].url_adr,back[p].url_fil); } /* Store only header reference */ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL); } else { + /* Partial file, but marked as "ok" ? */ if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"file not stored in cache due to bogus state (incomplete type): %s%s"LF,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"file not stored in cache due to bogus state (incomplete type): %s%s"LF,back[p].url_adr,back[p].url_fil); } } } // status finished callback -#if HTS_ANALYSTE - if (hts_htmlcheck_xfrstatus != NULL) { - hts_htmlcheck_xfrstatus(&back[p]); - } -#endif + RUN_CALLBACK1(opt, xfrstatus, &back[p]); + return 0; } else { // testmode if (back[p].r.statuscode / 100 >= 3) { /* Store 3XX, 4XX, 5XX test response codes, but NOT 2XX */ @@ -670,7 +792,7 @@ void back_move(lien_back* src, lien_back* dst) { memcpy(dst, src, sizeof(lien_back)); memset(src, 0, sizeof(lien_back)); src->r.soc=INVALID_SOCKET; - src->status=-1; + src->status=STATUS_FREE; src->r.location = src->location_buffer; dst->r.location = dst->location_buffer; } @@ -688,6 +810,80 @@ void back_copy_static(const lien_back* src, lien_back* dst) { #endif } +static int back_data_serialize(FILE *fp, const void *data, size_t size) { + if ( fwrite(&size, 1, sizeof(size), fp) == sizeof(size) + && ( size == 0 || fwrite(data, 1, size, fp) == size ) + ) + return 0; + return 1; /* error */ +} + +static int back_string_serialize(FILE *fp, const char *str) { + size_t size = ( str != NULL ) ? ( strlen(str) + 1 ) : 0; + return back_data_serialize(fp, str, size); +} + +static int back_data_unserialize(FILE *fp, void **str, size_t *size) { + *str = NULL; + if (fread(size, 1, sizeof(*size), fp) == sizeof(*size)) { + if (*size == 0) /* serialized NULL ptr */ + return 0; + *str = malloct(*size + 1); + if (*str == NULL) + return 1; /* error */ + ((char*) *str)[*size] = 0; /* guard byte */ + if (fread(*str, 1, *size, fp) == *size) + return 0; + } + return 1; /* error */ +} + +static int back_string_unserialize(FILE *fp, char **str) { + size_t dummy; + return back_data_unserialize(fp, (void**) str, &dummy); +} + +int back_serialize(FILE *fp, const lien_back* src) { + if (back_data_serialize(fp, src, sizeof(lien_back)) == 0 + && back_data_serialize(fp, src->r.adr, src->r.adr ? (size_t)src->r.size : 0) == 0 + && back_string_serialize(fp, src->r.headers) == 0 + && fflush(fp) == 0) + return 0; + return 1; +} + +int back_unserialize(FILE *fp, lien_back** dst) { + size_t size; + *dst = NULL; + errno = 0; + if (back_data_unserialize(fp, (void**) dst, &size) == 0 && size == sizeof(lien_back)) { + (*dst)->tmpfile = NULL; + (*dst)->chunk_adr = NULL; + (*dst)->r.adr = NULL; + (*dst)->r.out = NULL; + (*dst)->r.location = (*dst)->location_buffer; + (*dst)->r.fp = NULL; +#if HTS_USEOPENSSL + (*dst)->r.ssl_con = NULL; +#endif + if (back_data_unserialize(fp, (void**) &(*dst)->r.adr, &size) == 0) + { + (*dst)->r.size = size; + (*dst)->r.headers = NULL; + if (back_string_unserialize(fp, &(*dst)->r.headers) == 0) + return 0; /* ok */ + if ((*dst)->r.headers != NULL) + freet((*dst)->r.headers); + } + if ((*dst)->r.adr != NULL) + freet((*dst)->r.adr); + } + if (dst != NULL) + freet(dst); + *dst = NULL; + return 1; /* error */ +} + // clear, or leave for keep-alive int back_maydelete(httrackp* opt,cache_back* cache,struct_back* sback, int p) { lien_back* const back = sback->lnk; @@ -712,9 +908,9 @@ int back_maydelete(httrackp* opt,cache_back* cache,struct_back* sback, int p) { strcpybuff(tmp.url_adr, back[p].url_adr); if (back_letlive(opt, cache, sback, p)) { strcpybuff(back[p].url_adr, tmp.url_adr); - back[p].status = -103; // alive & waiting + back[p].status = STATUS_ALIVE; // alive & waiting if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully saved #%d (%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): successfully saved #%d (%s)"LF, back[p].r.debugid, back[p].url_adr); test_flush; } @@ -754,7 +950,7 @@ void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, in /* Connection delay must not exceed keep-alive timeout */ && ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) ) /* Available slot in backing */ - && ( q = back_search(opt, cache, sback) ) >= 0 + && ( q = back_search(opt, sback) ) >= 0 ) { lien_back tmp; @@ -764,9 +960,9 @@ void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, in back[q].ka_time_start = back[p].ka_time_start; // refresh back[p].r.soc = INVALID_SOCKET; strcpybuff(back[q].url_adr, tmp.url_adr); // address - back[q].status = -103; // alive & waiting + back[q].status = STATUS_ALIVE; // alive & waiting if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully preserved #%d (%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): successfully preserved #%d (%s)"LF, back[q].r.debugid, back[q].url_adr); test_flush; } @@ -783,13 +979,13 @@ int back_trylive(httrackp* opt,cache_back* cache,struct_back* sback, int p) { lien_back* const back = sback->lnk; const int back_max = sback->count; assertf(p >= 0 && p < back_max); - if (p>=0 && back[p].status != -103) { // we never know.. + if (p>=0 && back[p].status != STATUS_ALIVE) { // we never know.. int i = back_searchlive(opt,sback, back[p].url_adr); // search slot if (i >= 0 && i != p) { deletehttp(&back[p].r); // security check back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p - back_delete(opt,cache,sback, i); // delete old slot - back[p].status=100; // ready to connect + back_delete(opt,cache,sback, i); // delete old slot + back[p].status=STATUS_CONNECTING; // ready to connect return 1; // success: will reuse live connection } } @@ -804,7 +1000,7 @@ int back_searchlive(httrackp* opt, struct_back* sback, char* search_addr) { /* search for a live socket */ for(i = 0 ; i < back_max ; i++ ) { - if (back[i].status == -103) { + if (back[i].status == STATUS_ALIVE) { if (strfield2(back[i].url_adr, search_addr)) { /* same location (xxc: check also virtual hosts?) */ if (time_local() < back[i].ka_time_start + back[i].r.keep_alive_t) { return i; @@ -822,7 +1018,7 @@ int back_search_quick(struct_back* sback) { /* try to find an empty place */ for(i = 0 ; i < back_max ; i++ ) { - if (back[i].status == -1) { + if (back[i].status == STATUS_FREE) { return i; } } @@ -831,7 +1027,7 @@ int back_search_quick(struct_back* sback) { return -1; } -int back_search(httrackp* opt,cache_back* cache,struct_back* sback) { +int back_search(httrackp* opt,struct_back* sback) { lien_back* const back = sback->lnk; const int back_max = sback->count; int i; @@ -842,9 +1038,11 @@ int back_search(httrackp* opt,cache_back* cache,struct_back* sback) { /* couldn't find an empty place, try to requisition a keep-alive place */ for(i = 0 ; i < back_max ; i++ ) { - if (back[i].status == -103) { + if (back[i].status == STATUS_ALIVE) { + lien_back* const back = sback->lnk; /* close this place */ - back_delete(opt,cache,sback, i); + back_clear_entry(&back[i]); /* Already finalized (this is the night of the living dead) */ + /*back_delete(opt,cache,sback, i);*/ return i; } } @@ -859,7 +1057,7 @@ void back_set_finished(struct_back* sback, int p) { assertf(p >= 0 && p < back_max); if (p >= 0 && p < sback->count) { // we never know.. /* status: finished (waiting to be validated) */ - back[p].status=0; /* finished */ + back[p].status=STATUS_READY; /* finished */ /* close open r/w streams, if any */ if (back[p].r.fp!=NULL) { fclose(back[p].r.fp); @@ -872,6 +1070,26 @@ void back_set_finished(struct_back* sback, int p) { } } +void back_set_locked(struct_back* sback, int p) { + lien_back* const back = sback->lnk; + const int back_max = sback->count; + assertf(p >= 0 && p < back_max); + if (p >= 0 && p < sback->count) { + /* status: locked (in process, do not swap on disk) */ + back[p].locked = 1; /* locked */ + } +} + +void back_set_unlocked(struct_back* sback, int p) { + lien_back* const back = sback->lnk; + const int back_max = sback->count; + assertf(p >= 0 && p < back_max); + if (p >= 0 && p < sback->count) { + /* status: unlocked (can be swapped on disk) */ + back[p].locked = 0; /* unlocked */ + } +} + int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int p) { lien_back* const back = sback->lnk; const int back_max = sback->count; @@ -905,18 +1123,6 @@ int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int return 0; } -// effacer entrée -int back_set_passe2_ptr(httrackp* opt, cache_back* cache, struct_back* sback, int p, int* pass2_ptr) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; - assertf(p >= 0 && p < back_max); - if (p >= 0 && p < sback->count) { // on sait jamais.. - back[p].pass2_ptr = pass2_ptr; - return 1; - } - return 0; -} - // effacer entrée int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) { lien_back* const back = sback->lnk; @@ -934,14 +1140,14 @@ int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) { // Finalize if (!back[p].finalized) { if ( - (back[p].status == 0) // ready + (back[p].status == STATUS_READY) // ready && (!back[p].testmode) // not test mode && (back[p].r.statuscode>0) // not internal error ) { if (opt != NULL && opt->debug>1 && opt->log!=NULL) { - fspc(opt->log,"debug"); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush; } } if (cache != NULL) { @@ -952,46 +1158,70 @@ int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) { // flush output buffers (void) back_flush_output(opt, cache, sback, p); - + + return back_clear_entry(&back[p]); + } + return 0; +} + +/* ensure that the entry is not locked */ +void back_index_unlock(struct_back* sback, int p) { + lien_back* const back = sback->lnk; + if (back[p].locked) { + back[p].locked = 0; /* not locked anymore */ + } +} + +/* the entry is available again */ +static void back_set_free(lien_back* back) { + back->locked = 0; + back->status = STATUS_FREE; +} + +/* delete entry content (clear the entry), but don't unallocate the entry itself */ +int back_clear_entry(lien_back* back) { + if (back != NULL) { // Libérer tous les sockets, handles, buffers.. - if (back[p].r.soc!=INVALID_SOCKET) { + if (back->r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_delete: deletehttp\n"); #endif - deletehttp(&back[p].r); - back[p].r.soc=INVALID_SOCKET; + deletehttp(&back->r); + back->r.soc=INVALID_SOCKET; } - if (back[p].r.adr!=NULL) { // reste un bloc à désallouer - freet(back[p].r.adr); - back[p].r.adr=NULL; + if (back->r.adr!=NULL) { // reste un bloc à désallouer + freet(back->r.adr); + back->r.adr=NULL; } - if (back[p].chunk_adr!=NULL) { // reste un bloc à désallouer - freet(back[p].chunk_adr); - back[p].chunk_adr=NULL; - back[p].chunk_size=0; - back[p].chunk_blocksize=0; - back[p].is_chunk=0; + if (back->chunk_adr!=NULL) { // reste un bloc à désallouer + freet(back->chunk_adr); + back->chunk_adr=NULL; + back->chunk_size=0; + back->chunk_blocksize=0; + back->is_chunk=0; } // only for security - if (back[p].tmpfile && back[p].tmpfile[0] != '\0') { - (void) unlink(back[p].tmpfile); - back[p].tmpfile = NULL; + if (back->tmpfile && back->tmpfile[0] != '\0') { + (void) unlink(back->tmpfile); + back->tmpfile = NULL; } // headers - if (back[p].r.headers != NULL) { - freet(back[p].r.headers); - back[p].r.headers = NULL; + if (back->r.headers != NULL) { + freet(back->r.headers); + back->r.headers = NULL; } // Tout nettoyer - memset(&back[p], 0, sizeof(lien_back)); - back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer; + memset(back, 0, sizeof(lien_back)); + back->r.soc = INVALID_SOCKET; + back->r.location = back->location_buffer; // Le plus important: libérer le champ - back[p].status=-1; + back_set_free(back); + return 1; } return 0; @@ -1003,37 +1233,46 @@ int back_stack_available(struct_back* sback) { const int back_max = sback->count; int p=0,n=0; for( ; p < back_max ; p++ ) - if ( back[p].status == -1 ) + if ( back[p].status == STATUS_FREE ) n++; return n; } // ajouter un lien en backing -int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) { - int index = back_index(sback, adr, fil, save); - if (index < 0) { - return back_add(sback, opt, cache, adr, fil, save, referer_adr, referer_fil, test, pass2_ptr); - } else { - /* Ensure that the reference to pass2_ptr is set */ - return back_set_passe2_ptr(opt,cache,sback,index,pass2_ptr); +int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test) { + back_clean(opt, cache, sback); /* first cleanup the backlog to ensure that we have some entry left */ + if (!back_exist(sback,opt,adr,fil,save)) { + return back_add(sback, opt, cache, adr, fil, save, referer_adr, referer_fil, test); } + return 0; } -int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) { +int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test) { lien_back* const back = sback->lnk; const int back_max = sback->count; int p=0; + char catbuff[CATBUFF_SIZE]; + char catbuff2[CATBUFF_SIZE]; + +#if (defined(_DEBUG) || defined(DEBUG)) + if (!test && back_exist(sback,opt,adr,fil,save)) { + int already_there = 0; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"error: back_add(%s,%s,%s) duplicate"LF, adr, fil, save); + } + } +#endif // vérifier cohérence de adr et fil (non vide!) if (strnotempty(adr)==0) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: adr is empty for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"error: adr is empty for back_add"LF); } return -1; // erreur! } if (strnotempty(fil)==0) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: fil is empty for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"error: fil is empty for back_add"LF); } return -1; // erreur! } @@ -1044,7 +1283,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // rechercher emplacement back_clean(opt, cache, sback); - if ( ( p = back_search(opt, cache, sback) ) >= 0) { + if ( ( p = back_search(opt, sback) ) >= 0) { back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur // clear r @@ -1059,7 +1298,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* strcpybuff(back[p].url_adr,adr); strcpybuff(back[p].url_fil,fil); strcpybuff(back[p].url_sav,save); - back[p].pass2_ptr=pass2_ptr; + //back[p].links_index = links_index; // copier referer si besoin strcpybuff(back[p].referer_adr,""); strcpybuff(back[p].referer_fil,""); @@ -1094,24 +1333,25 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* if (opt->state.stop) { back[p].r.statuscode=STATUSCODE_INVALID; // fatal strcpybuff(back[p].r.msg,"mirror stopped by user"); - back[p].status=0; // terminé + back[p].status=STATUS_READY; // terminé back_set_finished(sback, p); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush; } return 0; } // test "fast header" cache ; that is, tests we did that lead to 3XX/4XX/5XX response codes if (cache->cached_tests != NULL) { - long int ptr = 0; - if (inthash_read((inthash)cache->cached_tests, concat(adr, fil), (long int*)&ptr)) { // gotcha + intptr_t ptr = 0; + if (inthash_read(cache->cached_tests, concat(OPT_GET_BUFF(opt), adr, fil), &ptr)) { // gotcha if (ptr != 0) { char* text = (char*) ptr; char* lf = strchr(text, '\n'); int code = 0; if (sscanf(text, "%d", &code) == 1) { // got code back[p].r.statuscode=code; + back[p].status=STATUS_READY; // terminé if (lf != NULL && *lf != '\0') { // got location ? strcpybuff(back[p].r.location, lf + 1); } @@ -1126,7 +1366,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* && ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */ && ( (strnotempty(save)) || (strcmp(fil,"/robots.txt")==0) ) ) { // si en test on ne doit pas utiliser le cache sinon telescopage avec le 302.. #if HTS_FAST_CACHE - long int hash_pos; + intptr_t hash_pos; int hash_pos_return=0; #else char* a=NULL; @@ -1139,7 +1379,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* char BIGSTK buff[HTS_URLMAXSIZE*4]; #if HTS_FAST_CACHE strcpybuff(buff,adr); strcatbuff(buff,fil); - hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); + hash_pos_return=inthash_read(cache->hashtable,buff,&hash_pos); #else buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n"); a=strstr(cache->use,buff); @@ -1153,7 +1393,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* #endif if (!test) { // non mode test #if HTS_FAST_CACHE - int pos=hash_pos; + uintptr_t pos=hash_pos; #else int pos=-1; a+=strlen(buff); @@ -1161,7 +1401,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* #endif if (pos<0) { // pas de mise en cache data, vérifier existence /* note: no check with IS_DELAYED_EXT() enabled - postcheck by client please! */ - if (!IS_DELAYED_EXT(save) && fsize(fconv(save)) <= 0) { // fichier existe pas ou est vide! + if (!IS_DELAYED_EXT(save) && fsize(fconv(catbuff,save)) <= 0) { // fichier existe pas ou est vide! int found=0; /* It is possible that the file has been moved due to changes in build structure */ @@ -1169,16 +1409,16 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* char BIGSTK previous_save[HTS_URLMAXSIZE*2]; previous_save[0] = '\0'; back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0); - if (previous_save[0] != '\0' && fexist(fconv(previous_save))) { - rename(fconv(previous_save), fconv(save)); - if (fexist(fconv(save))) { + if (previous_save[0] != '\0' && fexist(fconv(catbuff,previous_save))) { + rename(fconv(catbuff,previous_save), fconv(catbuff2,save)); + if (fexist(fconv(catbuff,save))) { found = 1; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush; } } else { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"error"); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush; + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush; } } } @@ -1194,11 +1434,11 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour // en être sûr if (opt->norecatch) { // tester norecatch - if (!fexist(fconv(save))) { // fichier existe pas mais déclaré: on l'a effacé - FILE* fp=fopen(fconv(save),"wb"); + if (!fexist(fconv(catbuff,save))) { // fichier existe pas mais déclaré: on l'a effacé + FILE* fp=fopen(fconv(catbuff,save),"wb"); if (fp) fclose(fp); if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; } } } @@ -1235,7 +1475,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* /* Interdiction taille par le wizard? --> détruire */ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture if (!back_checksize(opt,&back[p],0)) { - back[p].status=0; // FINI + back[p].status=STATUS_READY; // FINI back_set_finished(sback, p); back[p].r.statuscode=STATUSCODE_TOO_BIG; if (!back[p].testmode) @@ -1249,13 +1489,13 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* if (back[p].r.statuscode != -1 || IS_DELAYED_EXT(save)) { // pas d'erreur de lecture ou test retardé if ((opt->debug>0) && (opt->log!=NULL)) { if (!test) { - fspc(opt->log,"debug"); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; } else { - fspc(opt->log,"debug"); fprintf(opt->log,"File immediately tested from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File immediately tested from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; } } back[p].r.notmodified=1; // fichier non modifié - back[p].status=0; // OK prêt + back[p].status=STATUS_READY; // OK prêt //file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); // not modified back_set_finished(sback, p); @@ -1284,7 +1524,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* if (!back_checksize(opt,&back[p],1)) { r.statuscode = STATUSCODE_INVALID; // - back[p].status=0; // FINI + back[p].status=STATUS_READY; // FINI back_set_finished(sback, p); back[p].r.statuscode=STATUSCODE_TOO_BIG; deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET; @@ -1298,7 +1538,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* } if (r.statuscode != -1) { - if (r.statuscode==200) { // uniquement des 200 (OK) + if (r.statuscode==HTTP_OK) { // uniquement des 200 (OK) if (strnotempty(r.etag)) { // ETag (RFC2616) /* - If both an entity tag and a Last-Modified value have been @@ -1332,12 +1572,12 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // Pas dans le cache: fichier n'a pas été transféré du tout, donc pas sur disque? } else { if (fexist(save)) { // fichier existe? aghl! - LLint sz=fsize(save); + off_t sz=fsize(save); // Bon, là il est possible que le fichier ait été partiellement transféré // (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque) // PAS de If-Modified-Since, on a pas connaissance des données à la date du cache // On demande juste les données restantes si le date est valide (206), tout sinon (200) - if ((ishtml(save) != 1) && (ishtml(back[p].url_fil)!=1)) { // NON HTML (liens changés!!) + if ((ishtml(opt,save) != 1) && (ishtml(opt,back[p].url_fil)!=1)) { // NON HTML (liens changés!!) if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!) char lastmodified[256]; get_filetime_rfc822(save, lastmodified); @@ -1346,7 +1586,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* printf("..if unmodified since %s size "LLintP"\n", lastmodified, (LLint)sz); #endif if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } /* impossible - don't have etag or date @@ -1368,37 +1608,37 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* back[p].r.req.nocompression=1; } else { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } } } else { - if ((opt->debug>0) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"warning"); + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_WARNING); /* if (opt->http10) - fprintf(opt->errlog,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil); + fprintf(opt->log,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil); else */ - fprintf(opt->errlog,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); + fprintf(opt->log,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } /* Sinon requête normale... */ back[p].http11=0; } } else if (opt->norecatch) { // tester norecatch - filenote(save,NULL); // ne pas purger tout de même - file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); - back[p].status=0; // OK prêt + filenote(&opt->state.strc,save,NULL); // ne pas purger tout de même + file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); + back[p].status=STATUS_READY; // OK prêt back_set_finished(sback, p); back[p].r.statuscode=STATUSCODE_INVALID; // erreur strcpybuff(back[p].r.msg,"Null-size file not recaught"); return 0; } } else { - if ((opt->debug>0) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"warning"); - fprintf(opt->errlog,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_WARNING); + fprintf(opt->log,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } /* Sinon requête normale... */ @@ -1417,12 +1657,18 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // ne pas traiter ou recevoir l'en tête immédiatement memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer; // recopier proxy - memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy)); + if ((back[p].r.req.proxy.active = opt->proxy.active)) { + if (StringBuff(opt->proxy.bindhost) != NULL) + strcpybuff(back[p].r.req.proxy.bindhost, StringBuff(opt->proxy.bindhost)); + if (StringBuff(opt->proxy.name) != NULL) + strcpybuff(back[p].r.req.proxy.name, StringBuff(opt->proxy.name)); + back[p].r.req.proxy.port = opt->proxy.port; + } // et user-agent - strcpybuff(back[p].r.req.user_agent,opt->user_agent); - strcpybuff(back[p].r.req.referer,opt->referer); - strcpybuff(back[p].r.req.from,opt->from); - strcpybuff(back[p].r.req.lang_iso,opt->lang_iso); + strcpy(back[p].r.req.user_agent,StringBuff(opt->user_agent)); + strcpy(back[p].r.req.referer,StringBuff(opt->referer)); + strcpy(back[p].r.req.from,StringBuff(opt->from)); + strcpy(back[p].r.req.lang_iso,StringBuff(opt->lang_iso)); back[p].r.req.user_agent_send=opt->user_agent_send; // et http11 back[p].r.req.http11=back[p].http11; @@ -1432,22 +1678,21 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // mode ftp, court-circuit! if (strfield(back[p].url_adr,"ftp://")) { if (back[p].testmode) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with ftp link for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"error: forbidden test with ftp link for back_add"LF); } return -1; // erreur pas de test permis } if (!(back[p].r.req.proxy.active && opt->ftp_proxy)) { // connexion directe, gérée en thread - back[p].status=1000; // connexion ftp + FTPDownloadStruct *str = (FTPDownloadStruct*) malloc(sizeof(FTPDownloadStruct)); + str->pBack = &back[p]; + str->pOpt = opt; + /* */ + back[p].status=STATUS_FTP_TRANSFER; // connexion ftp #if USE_BEGINTHREAD - launch_ftp(&(back[p])); + launch_ftp(str); #else - { - char nid[32]; - sprintf(nid,"htsftp%d-in_progress.lock",p); - strcpybuff(back[p].location_buffer,fconcat(opt->path_log,nid)); - } - launch_ftp(&(back[p]),back[p].location_buffer,opt->exec); +#error Must have pthreads #endif return 0; } @@ -1456,15 +1701,15 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* else if (strfield(back[p].url_adr,"mms://")) { MMSDownloadStruct str; if (back[p].testmode) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with mms link for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"error: forbidden test with mms link for back_add"LF); } return -1; // erreur pas de test permis } if (back[p].r.req.proxy.active) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"warning: direct connection for mms links (proxy settings ignored)"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"warning: direct connection for mms links (proxy settings ignored)"LF); } - back[p].status=1000; // connexion externe + back[p].status=STATUS_FTP_TRANSFER; // connexion externe str.pBack = &back[p]; str.pOpt = opt; launch_mms(&str); @@ -1484,16 +1729,16 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* #if HDEBUG printf("back_solve..\n"); #endif - back[p].status=101; // tentative de résolution du nom de host + back[p].status=STATUS_WAIT_DNS; // tentative de résolution du nom de host soc=INVALID_SOCKET; // pas encore ouverte - back_solve(&back[p]); // préparer - if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns + back_solve(opt, &back[p]); // préparer + if (host_wait(opt, &back[p])) { // prêt, par ex fichier ou dispo dans dns #if HDEBUG printf("ok, dns cache ready..\n"); #endif - soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r)); + soc=http_xfopen(opt,0,0,0,back[p].send_too,adr,fil,&(back[p].r)); if (soc==INVALID_SOCKET) { - back[p].status=0; // fini, erreur + back[p].status=STATUS_READY; // fini, erreur back_set_finished(sback, p); } } @@ -1521,7 +1766,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* soc = back[p].r.soc; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully linked #%d (for %s%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): successfully linked #%d (for %s%s)"LF, back[p].r.debugid, back[p].url_adr, back[p].url_fil); test_flush; } @@ -1543,7 +1788,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // Note: on charge les code-page erreurs (erreur 404, etc) dans le cas où cela est // rattrapable (exemple: 301,302 moved xxx -> refresh sur la page!) - //if ((back[p].statuscode!=200) || (soc<0)) { // ERREUR HTTP/autre + //if ((back[p].statuscode!=HTTP_OK) || (soc<0)) { // ERREUR HTTP/autre #if CNXDEBUG printf("Xfopen ok, poll..\n"); @@ -1551,23 +1796,23 @@ printf("Xfopen ok, poll..\n"); #if HTS_XGETHOST if (soc!=INVALID_SOCKET) - if (back[p].status==101) { // pas d'erreur + if (back[p].status==STATUS_WAIT_DNS) { // pas d'erreur if (!back[p].r.is_file) - back[p].status=100; // connexion en cours + back[p].status=STATUS_CONNECTING; // connexion en cours else back[p].status=1; // fichier } #else if (soc==INVALID_SOCKET) { // erreur socket - back[p].status=0; // FINI + back[p].status=STATUS_READY; // FINI back_set_finished(sback, p); //if (back[p].soc!=INVALID_SOCKET) deletehttp(back[p].soc); back[p].r.soc=INVALID_SOCKET; } else { if (!back[p].r.is_file) #if HTS_XCONN - back[p].status=100; // connexion en cours + back[p].status=STATUS_CONNECTING; // connexion en cours #else back[p].status=99; // chargement en tête en cours #endif @@ -1589,8 +1834,45 @@ printf("Xfopen ok, poll..\n"); return 0; } else { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: no space left in stack for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"error: no space left in stack for back_add"LF); + if ( ( opt->state.debug_state & 1 ) == 0 ) { /* debug_state<0> == debug 'no space left in stack' */ + int i; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"debug: DUMPING %d BLOCKS"LF, back_max); + opt->state.debug_state |= 1; /* once */ + /* OUTPUT FULL DEBUG INFORMATION THE FIRST TIME WE SEE THIS VERY ANNOYING BUG, + HOPING THAT SOME USER REPORT WILL QUICKLY SOLVE THIS PROBLEM :p */ + for(i = 0 ; i < back_max ; i++ ) { + if (back[i].status != -1) { + int may_clean = slot_can_be_cleaned(&back[i]); + int may_finalize = may_clean && slot_can_be_finalized(opt, &back[i]); + int may_serialize = slot_can_be_cached_on_disk(&back[i]); + HTS_LOG(opt,LOG_INFO); + fprintf(opt->log, + "debug: back[%03d]: may_clean=%d, may_finalize_disk=%d, may_serialize=%d:"LF + "\t" "finalized(%d), status(%d), locked(%d), delayed(%d), test(%d), "LF + "\t" "statuscode(%d), size(%d), is_write(%d), may_hypertext(%d), "LF + "\t" "contenttype(%s), url(%s%s), save(%s)"LF, + i, + may_clean, may_finalize, may_serialize, + back[i].finalized, + back[i].status, + back[i].locked, + IS_DELAYED_EXT(back[i].url_sav), + back[i].testmode, + back[i].r.statuscode, + (int) back[i].r.size, + back[i].r.is_write, + may_be_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil), + /* */ + back[i].r.contenttype, + back[i].url_adr, back[i].url_fil, + back[i].url_sav ? back[i].url_sav : "" + ); + } + } + } + } return -1; // plus de place } @@ -1601,9 +1883,14 @@ printf("Xfopen ok, poll..\n"); #if HTS_XGETHOST #if USE_BEGINTHREAD // lancement multithread du robot -PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { +typedef struct { + char iadr_p[HTS_URLMAXSIZE]; + httrackp *opt; +} HostlookupStruct; +void Hostlookup(void* pP) { + HostlookupStruct *str = (HostlookupStruct*) pP; char iadr[256]; - t_dnscache* cache=_hts_cache(); // adresse du cache + t_dnscache* cache=_hts_cache(str->opt); // adresse du cache t_hostent* hp; int error_found=0; @@ -1611,18 +1898,17 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { #if DEBUGDNS printf("resolv in background: %s\n",jump_identification(iadr_p)); #endif - strcpybuff(iadr,jump_identification(iadr_p)); + strcpybuff(iadr,jump_identification(str->iadr_p)); // couper éventuel : { char *a; if ( (a=jump_toport(iadr)) ) *a='\0'; // get rid of it } - freet(iadr_p); + freet(pP); + + hts_mutexlock(&dns_lock); - // attendre que le cache dns soit prêt - while(_hts_lockdns(-1)); // attendre libération - _hts_lockdns(1); // locker while(cache->n) { if (strcmp(cache->iadr,iadr)==0) { error_found=1; @@ -1641,7 +1927,6 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { strcpybuff(cache->n->iadr,iadr); cache->n->host_length=0; /* pour le moment rien */ cache->n->n=NULL; - _hts_lockdns(0); // délocker /* resolve */ #if DEBUGDNS @@ -1654,28 +1939,25 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { memcpy(cache->n->host_addr, hp->h_addr, hp->h_length); cache->n->host_length = hp->h_length; } - } else - _hts_lockdns(0); // délocker + } } else { #if DEBUGDNS printf("aborting resolv for %s (found)\n",iadr); #endif - _hts_lockdns(0); // délocker } - // fin de copie de hts_gethostbyname + + hts_mutexrelease(&dns_lock); #if DEBUGDNS printf("quitting resolv for %s (result: %d)\n",iadr,(cache->n!=NULL)?cache->n->host_length:(-999)); #endif - - return PTHREAD_RETURN; /* _endthread implied */ } #endif // attendre que le host (ou celui du proxy) ait été résolu // si c'est un fichier, la résolution est immédiate // idem pour ftp:// -void back_solve(lien_back* back) { +void back_solve(httrackp *opt, lien_back* back) { if ((!strfield(back->url_adr,"file://")) && ! strfield(back->url_adr,"ftp://") #if HTS_USEMMS @@ -1683,22 +1965,23 @@ void back_solve(lien_back* back) { #endif ) { //## if (back->url_adr[0]!=lOCAL_CHAR) { // qq chose à préparer - char* a; + const char* a; if (!(back->r.req.proxy.active)) a=back->url_adr; else a=back->r.req.proxy.name; a = jump_protocol(a); - if (!hts_dnstest(a)) { // non encore testé!.. + if (!hts_dnstest(opt, a)) { // non encore testé!.. // inscire en thread -#if HTS_WIN +#ifdef _WIN32 // Windows #if USE_BEGINTHREAD { - char* p = calloct(strlen(a)+2,1); - if (p) { - strcpybuff(p,a); - (void)hts_newthread( Hostlookup , 0, p ); + HostlookupStruct *str = (HostlookupStruct*)malloct(sizeof(HostlookupStruct)); + if (str) { + strcpybuff(str->iadr_p, a); + str->opt = opt; + hts_newthread(Hostlookup, str); } } #else @@ -1710,7 +1993,7 @@ void back_solve(lien_back* back) { char* p = calloct(strlen(a)+2,1); if (p) { strcpybuff(p,a); - (void)hts_newthread( Hostlookup , 0, p ); + hts_newthread( Hostlookup , p ); } #else // Sous Unix, le gethostbyname() est bloquant.. @@ -1723,7 +2006,7 @@ void back_solve(lien_back* back) { } // détermine si le host a pu être résolu -int host_wait(lien_back* back) { +int host_wait(httrackp *opt, lien_back* back) { if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://")) #if HTS_USEMMS @@ -1732,9 +2015,9 @@ int host_wait(lien_back* back) { ) { //## if (back->url_adr[0]!=lOCAL_CHAR) { if (!(back->r.req.proxy.active)) { - return (hts_dnstest(back->url_adr)); + return (hts_dnstest(opt, back->url_adr)); } else { - return (hts_dnstest(back->r.req.proxy.name)); + return (hts_dnstest(opt, back->r.req.proxy.name)); } } else return 1; // prêt, fichier local } @@ -1745,82 +2028,101 @@ int host_wait(lien_back* back) { // cleanup non-html files in backing to save backing space // and allow faster "save in cache" operation // also cleanup keep-alive sockets and ensure that not too many sockets are being opened + +static int slot_can_be_cleaned(const lien_back* back) { + return + (back->status == STATUS_READY) // ready + /* Check autoclean */ + && (!back->testmode) // not test mode + && (strnotempty(back->url_sav)) // filename exists + && (HTTP_IS_OK(back->r.statuscode)) // HTTP "OK" + && (back->r.size > 0) // size>0 + ; +} + +static int slot_can_be_finalized(httrackp* opt, const lien_back* back) { + return + (back->r.is_write // not in memory (on disk, ready) + && !is_hypertext_mime(opt,back->r.contenttype, back->url_fil) // not HTML/hypertext + && !may_be_hypertext_mime(opt,back->r.contenttype, back->url_fil) // may NOT be parseable mime type + ); +} + void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { lien_back* const back = sback->lnk; const int back_max = sback->count; -#if HTS_ANALYSTE - int oneMore = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links -#endif + int oneMore = ( (opt->state._hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (opt->state._hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links int i; for(i=0;i0) { // size>0 - if (back[i].r.is_write // not in memory (on disk, ready) - && !is_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // not HTML/hypertext - && !may_be_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // may NOT be parseable mime type - ) - { - if (back[i].pass2_ptr) { - (void) back_flush_output(opt, cache, sback, i); // flush output buffers - usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil); - *back[i].pass2_ptr=-1; // Done! - HTS_STAT.stat_background++; - if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; + if (slot_can_be_cleaned(&back[i])) { + if (slot_can_be_finalized(opt, &back[i])) { + (void) back_flush_output(opt, cache, sback, i); // flush output buffers + usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil); + //if (back[i].links_index >= 0) { + // assertf(back[i].links_index < opt->hash->max_lien); + // opt->hash->liens[back[i].links_index]->pass2 = -1; + // // *back[i].pass2_ptr=-1; // Done! + //} + /* MANDATORY if we don't want back_fill() to endlessly put the same file on download! */ + { + int index = hash_read(opt->hash,back[i].url_sav,"",0,opt->urlhack); // lecture type 0 (sav) + if (index >= 0) { + opt->hash->liens[index]->pass2 = -1; /* DONE! */ + } else { + if (opt->log != NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: entry cleaned up, but no trace on heap: %s%s (%s)"LF,back[i].url_adr, back[i].url_fil,back[i].url_sav); + test_flush; + } + } + } + HTS_STAT.stat_background++; + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; + } + back_maydelete(opt,cache,sback,i); // May delete backing entry + } else { + if (!back[i].finalized) { + if (1) { + /* Ensure deleted or recycled socket */ + /* BUT DO NOT YET WIPE back[i].r.adr */ + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush; + } + back_maydeletehttp(opt, cache, sback, i); + } else { + /* + NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO) + */ + /* Lock the entry but do not keep the html data in memory (in cache) */ + if (opt->cache) { + htsblk r; + + /* Ensure deleted or recycled socket */ + back_maydeletehttp(opt, cache, sback, i); + assertf(back[i].r.soc == INVALID_SOCKET); + + /* Check header */ + cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r); + if (r.statuscode == HTTP_OK) { + if (back[i].r.soc == INVALID_SOCKET) { + /* Delete buffer and sockets */ + deleteaddr(&back[i].r); + deletehttp(&back[i].r); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush; } - back_maydelete(opt,cache,sback,i); // May delete backing entry } } else { - if (!back[i].finalized) { - if (1) { - /* Ensure deleted or recycled socket */ - /* BUT DO NOT YET WIPE back[i].r.adr */ - if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush; - } - back_maydeletehttp(opt, cache, sback, i); - } else { - /* - NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO) - */ - /* Lock the entry but do not keep the html data in memory (in cache) */ - if (opt->cache) { - htsblk r; - - /* Ensure deleted or recycled socket */ - back_maydeletehttp(opt, cache, sback, i); - assertf(back[i].r.soc == INVALID_SOCKET); - - /* Check header */ - cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r); - if (r.statuscode == 200) { - if (back[i].r.soc == INVALID_SOCKET) { - /* Delete buffer and sockets */ - deleteaddr(&back[i].r); - deletehttp(&back[i].r); - if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush; - } - } - } else { - if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected html cache lookup error during back clean"LF); test_flush; - } - } - // xxc xxc - } - } - } + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unexpected html cache lookup error during back clean"LF); test_flush; + } } + // xxc xxc } } } } - } else if (back[i].status == -103) { // waiting (keep-alive) + } else if (back[i].status == STATUS_ALIVE) { // waiting (keep-alive) if ( ! back[i].r.keep_alive || back[i].r.soc == INVALID_SOCKET @@ -1828,7 +2130,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { || time_local() >= back[i].ka_time_start + back[i].r.keep_alive_t ) { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): live socket closed #%d (%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): live socket closed #%d (%s)"LF, back[i].r.debugid, back[i].url_adr); test_flush; @@ -1839,7 +2141,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { } /* switch connections to live ones */ for(i=0;i max) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF, curr - max); test_flush; } } for(i = 0 ; i < back_max && curr > max ; i++) { - if (back[i].status == -103) { + if (back[i].status == STATUS_ALIVE) { back_delete(opt,cache,sback, i); // delete backing entry curr--; } @@ -1866,7 +2168,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { { int nxfr = back_cleanup_background(opt,cache,sback); if (nxfr > 0 && (opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(htsback): %d slots ready moved to background"LF, nxfr); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(htsback): %d slots ready moved to background"LF, nxfr); test_flush; } } @@ -1875,6 +2177,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { // attente (gestion des buffers des sockets) void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_timestart) { + char catbuff[CATBUFF_SIZE]; lien_back* const back = sback->lnk; const int back_max = sback->count; unsigned int i_mod; @@ -1888,9 +2191,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti int busy_recv=0; // pas de données pour le moment int busy_state=0; // pas de connexions int max_loop; // nombre de boucles max à parcourir.. -#if HTS_ANALYSTE int max_loop_chk=0; -#endif unsigned int mod_random = (unsigned int) ( time_local() + HTS_STAT.HTS_TOTAL_RECV ); // max. number of loops @@ -1926,7 +2227,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // en cas de gestion du connect préemptif #if HTS_XCONN - if (back[i].status==100) { // connexion + if (back[i].status==STATUS_CONNECTING) { // connexion do_wait=1; // noter socket write @@ -1947,7 +2248,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } else #endif #if HTS_XGETHOST - if (back[i].status==101) { // attente + if (back[i].status==STATUS_WAIT_DNS) { // attente // rien à faire.. } else #endif @@ -1984,14 +2285,14 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } else { back[i].r.statuscode=STATUSCODE_CONNERROR; - if (back[i].status==100) + if (back[i].status==STATUS_CONNECTING) strcpybuff(back[i].r.msg,"Connect Error"); else strcpybuff(back[i].r.msg,"Receive Error"); - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush; } } #if WIDE_DEBUG @@ -2058,14 +2359,14 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_CONNERROR; - if (back[i].status==100) + if (back[i].status==STATUS_CONNECTING) strcpybuff(back[i].r.msg,"Connect Error"); else strcpybuff(back[i].r.msg,"Receive Error"); - if (back[i].status == -103) { /* Keep-alive socket */ + if (back[i].status == STATUS_ALIVE) { /* Keep-alive socket */ back_delete(opt,cache,sback, i); } else { - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); } } @@ -2074,7 +2375,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // ---- FLAG WRITE MIS A UN?: POUR LE CONNECT - if (back[i].status==100) { // attendre connect + if (back[i].status==STATUS_CONNECTING) { // attendre connect int dispo=0; // vérifier l'existance de timeout-check if (!gestion_timeout) @@ -2098,7 +2399,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti SSL_clear(back[i].r.ssl_con); if (SSL_set_fd(back[i].r.ssl_con, back[i].r.soc) == 1) { SSL_set_connect_state(back[i].r.ssl_con); - back[i].status = 102; /* handshake wait */ + back[i].status = STATUS_SSL_WAIT_HANDSHAKE; /* handshake wait */ } else back[i].r.statuscode=STATUSCODE_SSL_HANDSHAKE; } else @@ -2110,7 +2411,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_NON_FATAL; - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); } } @@ -2121,7 +2422,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("..connect ok on socket %d\n",back[i].r.soc); #endif - if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==100)) { + if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==STATUS_CONNECTING)) { /* limit nb. connections/seconds to avoid server overload */ /*if (opt->maxconn>0) { Sleep(1000/opt->maxconn); @@ -2138,11 +2439,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti //if (strcmp(back[i].url_sav,BACK_ADD_TEST)!=0) // vrai get HTS_STAT.stat_nrequests++; if (!back[i].head_request) - http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); + http_sendhead(opt, opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); else if (back[i].head_request==2) // test en GET! - http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); + http_sendhead(opt, opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); else // test! - http_sendhead(opt->cookie,1,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); + http_sendhead(opt, opt->cookie,1,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); back[i].status=99; // attendre en tête maintenant } } @@ -2150,7 +2451,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // attente gethostbyname } #if HTS_USEOPENSSL - else if (SSL_is_available && back[i].status==102) { // wait for SSL handshake + else if (SSL_is_available && back[i].status == STATUS_SSL_WAIT_HANDSHAKE) { // wait for SSL handshake /* SSL mode */ if (back[i].r.ssl) { int conn_code; @@ -2173,25 +2474,25 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_NON_FATAL; - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); } } else { /* got it! */ - back[i].status=100; // back to waitconnect + back[i].status=STATUS_CONNECTING; // back to waitconnect } } else { strcpybuff(back[i].r.msg, "unexpected SSL/TLS error"); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_NON_FATAL; - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); } } #endif #if HTS_XGETHOST - else if (back[i].status==101) { // attendre gethostbyname + else if (back[i].status==STATUS_WAIT_DNS) { // attendre gethostbyname #if DEBUGDNS //printf("status 101 for %s\n",back[i].url_adr); #endif @@ -2200,8 +2501,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (back[i].timeout>0) gestion_timeout=1; - if (host_wait(&back[i])) { // prêt - back[i].status=100; // attente connexion + if (host_wait(opt, &back[i])) { // prêt + back[i].status=STATUS_CONNECTING; // attente connexion if (back[i].timeout>0) { // refresh timeout si besoin est back[i].timeout_refresh=time_local(); } @@ -2209,9 +2510,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti back[i].rateout_time=time_local(); } - back[i].r.soc=http_xfopen(0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r)); + back[i].r.soc=http_xfopen(opt,0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r)); if (back[i].r.soc==INVALID_SOCKET) { - back[i].status=0; // fini, erreur + back[i].status=STATUS_READY; // fini, erreur back_set_finished(sback, i); if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK @@ -2233,10 +2534,10 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti #if USE_BEGINTHREAD // ..rien à faire, c'est magic les threads #else - else if (back[i].status==1000) { // en réception ftp + else if (back[i].status==STATUS_FTP_TRANSFER) { // en réception ftp if (!fexist(back[i].location_buffer)) { // terminé FILE* fp; - fp=fopen(fconcat(back[i].location_buffer,".ok"),"rb"); + fp=fopen(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),"rb"); if (fp) { int j=0; fscanf(fp,"%d ",&(back[i].r.statuscode)); @@ -2247,13 +2548,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } back[i].r.msg[j++]='\0'; fclose(fp); - unlink(fconcat(back[i].location_buffer,".ok")); - strcpybuff(fconcat(back[i].location_buffer,".ok"),""); + unlink(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok")); + strcpybuff(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),""); } else { strcpybuff(back[i].r.msg,"Unknown ftp result, check if file is ok"); back[i].r.statuscode=STATUSCODE_INVALID; } - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); // finalize transfer if (back[i].r.statuscode>0) { @@ -2262,8 +2563,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } #endif - else if (back[i].status==1001) { // ftp ready - back[i].status=0; + else if (back[i].status==STATUS_FTP_READY) { // ftp ready + back[i].status=STATUS_READY; back_set_finished(sback, i); // finalize transfer if (back[i].r.statuscode>0) { @@ -2311,28 +2612,35 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!back[i].testmode) { // pas mode test if (strnotempty(back[i].url_sav)) { if (strcmp(back[i].url_fil,"/robots.txt")) { - if (back[i].r.statuscode==200) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML + if (back[i].r.statuscode==HTTP_OK) { // 'OK' + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html int fcheck=0; + int last_errno = 0; back[i].r.is_write=1; // écrire if (back[i].r.compressed && /* .gz are *NOT* depacked!! */ - (strfield(get_ext(back[i].url_sav),"gz") == 0) + (strfield(get_ext(catbuff,back[i].url_sav),"gz") == 0) ) { back[i].tmpfile_buffer[0]='\0'; back[i].tmpfile=tmpnam(back[i].tmpfile_buffer); - if (back[i].tmpfile != NULL && back[i].tmpfile[0]) - back[i].r.out=fopen(back[i].tmpfile,"wb"); + if (back[i].tmpfile != NULL && back[i].tmpfile[0]) { + if ((back[i].r.out=fopen(back[i].tmpfile,"wb")) == NULL) { + last_errno = errno; + } + } } else { - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 1, 1, back[i].r.notmodified); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 1, 1, back[i].r.notmodified); back[i].r.compressed=0; - back[i].r.out=filecreate(back[i].url_sav); + if ((back[i].r.out=filecreate(&opt->state.strc, back[i].url_sav)) == NULL) { + last_errno = errno; + } } if (back[i].r.out==NULL) { + errno = last_errno; if ((fcheck=check_fatal_io_errno())) { - fspc(opt->log,"error"); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; opt->state.exit_xh=-1; /* fatal error */ } } @@ -2340,16 +2648,16 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("direct-disk: %s\n",back[i].url_sav); #endif if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File received from net to disk: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File received from net to disk: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } if (back[i].r.out==NULL) { - if (opt->errlog) { - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"Unable to save file %s : %s"LF,back[i].url_sav, strerror(errno)); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"Unable to save file %s : %s"LF,back[i].url_sav, strerror(last_errno)); if (fcheck) { - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"* * Fatal write error, giving up"LF); + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"* * Fatal write error, giving up"LF); } test_flush; } @@ -2358,14 +2666,14 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("..error!\n"); #endif } -#if HTS_WIN==0 +#ifndef _WIN32 else chmod(back[i].url_sav,HTS_ACCESS_FILE); #endif } else { // on coupe tout! if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); if (!back[i].testmode) back[i].r.statuscode=STATUSCODE_INVALID; // EUHH CANCEL @@ -2391,9 +2699,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // réception de données depuis socket ou fichier if (back[i].status) { - if (back[i].status==99) // recevoir par bloc de lignes + if (back[i].status==STATUS_WAIT_HEADERS) // recevoir par bloc de lignes retour_fread=http_xfread1(&(back[i].r),0); - else if (back[i].status==98 || back[i].status==97) { // recevoir longueur chunk en hexa caractère par caractère + else if (back[i].status==STATUS_CHUNK_WAIT || back[i].status==STATUS_CHUNK_CR) { // recevoir longueur chunk en hexa caractère par caractère // backuper pour lire dans le buffer chunk htsblk r; memcpy(&r, &(back[i].r), sizeof(htsblk)); @@ -2427,7 +2735,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (back[i].is_chunk) { // attendre prochain chunk if (back[i].r.size==back[i].r.totalsize) { // fin chunk! //printf("chunk end at %d\n",back[i].r.size); - back[i].status=97; /* fetch ending CRLF */ + back[i].status=STATUS_CHUNK_CR; /* fetch ending CRLF */ if (back[i].chunk_adr!=NULL) { freet(back[i].chunk_adr); back[i].chunk_adr=NULL; @@ -2446,7 +2754,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } if (retour_fread < 0) { // fin réception - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); /*KA back[i].r.soc=INVALID_SOCKET; */ #if CHUNKDEBUG==1 @@ -2455,18 +2763,18 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti #endif if (retour_fread < 0 && retour_fread != READ_EOF) { if (back[i].r.size > 0) - strcatbuff(back[i].r.msg, "Interrupted transfer"); + strcpybuff(back[i].r.msg, "Interrupted transfer"); else - strcatbuff(back[i].r.msg, "No data (connection closed)"); + strcpybuff(back[i].r.msg, "No data (connection closed)"); back[i].r.statuscode=STATUSCODE_CONNERROR; } else if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) { #if HDEBUG printf("error interruped: %s\n",back[i].r.adr); #endif if (back[i].r.size>0) - strcatbuff(back[i].r.msg,"Interrupted transfer"); + strcpybuff(back[i].r.msg,"Interrupted transfer"); else - strcatbuff(back[i].r.msg,"No data (connection closed)"); + strcpybuff(back[i].r.msg,"No data (connection closed)"); back[i].r.statuscode=STATUSCODE_CONNERROR; } @@ -2487,7 +2795,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } if (back[i].r.totalsize>0) { // tester totalsize - //if ((back[i].r.totalsize>0) && (back[i].status==99)) { // tester totalsize + //if ((back[i].r.totalsize>0) && (back[i].status==STATUS_WAIT_HEADERS)) { // tester totalsize if (back[i].r.totalsize!=back[i].r.size) { // pas la même! if (!opt->tolerant) { //#if HTS_CL_IS_FATAL @@ -2498,8 +2806,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } else { //#else // Un warning suffira.. - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fspc(opt,cache->log,"warning"); fprintf(cache->log,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } //#endif } @@ -2514,7 +2822,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // Traitement des en têtes chunks ou en têtes - if (back[i].status==98 || back[i].status==97) { // réception taille chunk en hexa ( après les en têtes, peut ne pas + if (back[i].status==STATUS_CHUNK_WAIT || back[i].status==STATUS_CHUNK_CR) { // réception taille chunk en hexa ( après les en têtes, peut ne pas if (back[i].chunk_size > 0 && back[i].chunk_adr[back[i].chunk_size-1]==10) { int chunk_size=-1; char chunk_data[64]; @@ -2534,7 +2842,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti #endif if (back[i].r.totalsize<0) back[i].r.totalsize=0; // initialiser à 0 - if (back[i].status==98) { // "real" chunk + if (back[i].status==STATUS_CHUNK_WAIT) { // "real" chunk if (sscanf(chunk_data,"%x",&chunk_size) == 1) { if (chunk_size > 0) back[i].chunk_blocksize = chunk_size; /* the data block chunk size */ @@ -2542,10 +2850,10 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti back[i].chunk_blocksize = -1; /* ending */ back[i].r.totalsize+=chunk_size; // noter taille if (back[i].r.adr != NULL || !back[i].r.is_write) { // Not to disk - back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1); + back[i].r.adr=(char*) realloct(back[i].r.adr, (size_t)back[i].r.totalsize + 1); if (!back[i].r.adr) { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } } } @@ -2553,11 +2861,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("[%d] chunk length: %d - next total "LLintP":\n",(int)back[i].r.soc,(int)chunk_size,(LLint)back[i].r.totalsize); #endif } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); } } - } else { /* back[i].status==97 : just receiving ending CRLF after data */ + } else { /* back[i].status==STATUS_CHUNK_CR : just receiving ending CRLF after data */ if (chunk_data[0] == '\0') { if (back[i].chunk_blocksize > 0) chunk_size=(int)back[i].chunk_blocksize; /* recent data chunk size */ @@ -2569,8 +2877,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("[%d] chunk CRLF seen\n", (int)back[i].r.soc); #endif } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Illegal chunk CRLF (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Warning: Illegal chunk CRLF (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); } #if CHUNKDEBUG==1 printf("[%d] chunk CRLF ERROR!! : '%s'\n", (int)back[i].r.soc, chunk_data); @@ -2578,25 +2886,25 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,(LLint)back[i].chunk_size,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Warning: Chunk too big ("LLintP") for %s%s"LF,(LLint)back[i].chunk_size,back[i].url_adr,back[i].url_fil); } } // ok, continuer sur le body // si chunk non nul continuer (ou commencer) - if (back[i].status==97 && chunk_size > 0) { - back[i].status = 98; /* waiting for next chunk (NN\r\n\r\nNN\r\n..\r\n0\r\n\r\n) */ + if (back[i].status==STATUS_CHUNK_CR && chunk_size > 0) { + back[i].status = STATUS_CHUNK_WAIT; /* waiting for next chunk (NN\r\n\r\nNN\r\n..\r\n0\r\n\r\n) */ #if CHUNKDEBUG==1 printf("[%d] waiting for next chunk\n", (int)back[i].r.soc); #endif - } else if (back[i].status==98 && chunk_size == 0) { /* final chunk */ - back[i].status=97; /* final CRLF */ + } else if (back[i].status==STATUS_CHUNK_WAIT && chunk_size == 0) { /* final chunk */ + back[i].status=STATUS_CHUNK_CR; /* final CRLF */ #if CHUNKDEBUG==1 printf("[%d] waiting for final CRLF (chunk)\n", (int)back[i].r.soc); #endif - } else if (back[i].status==98 && chunk_size >= 0) { /* will fetch data now */ + } else if (back[i].status==STATUS_CHUNK_WAIT && chunk_size >= 0) { /* will fetch data now */ back[i].status=1; // continuer body #if CHUNKDEBUG==1 printf("[%d] waiting for body (chunk)\n", (int)back[i].r.soc); @@ -2606,8 +2914,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("[%d] chunk end, total: %d\n",(int)back[i].r.soc,back[i].r.size); #endif /* End */ - //if (back[i].status==97) { - back[i].status=0; // fin + //if (back[i].status==STATUS_CHUNK_CR) { + back[i].status=STATUS_READY; // fin back_set_finished(sback, i); //} @@ -2615,9 +2923,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!IS_DELAYED_EXT(back[i].url_sav)) { back_finalize(opt,cache,sback,i); } else { - if (back[i].r.statuscode == 200) { - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Unexpected incomplete type with 200 code at %s%s"LF, back[i].url_adr, back[i].url_fil); + if (back[i].r.statuscode == HTTP_OK) { + if (cache->log!=NULL) { + fspc(opt,cache->log,"warning"); fprintf(cache->log,"Unexpected incomplete type with 200 code at %s%s"LF, back[i].url_adr, back[i].url_fil); } } } @@ -2648,8 +2956,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti strcpybuff(back[i].r.msg,"Incorrect length"); } else { // Un warning suffira.. - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fspc(opt,cache->log,"warning"); fprintf(cache->log,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } } } @@ -2676,7 +2984,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // taille buffer chunk > 1 && LF // - } else if (back[i].status==99) { // en têtes (avant le chunk si il est présent) + } else if (back[i].status==STATUS_WAIT_HEADERS) { // en têtes (avant le chunk si il est présent) // if (back[i].r.size>=2) { // double LF @@ -2741,7 +3049,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti treathead(opt->cookie,back[i].url_adr,back[i].url_fil,&back[i].r,rcvd); // traiter // parfois les serveurs buggés renvoient un content-range avec un 200 - if (back[i].r.statuscode==200) // 'OK' + if (back[i].r.statuscode==HTTP_OK) // 'OK' if (strfield(rcvd,"content-range:")) // Avec un content-range: relisez les RFC.. back[i].r.statuscode=206; // FORCER A 206 !!!!! @@ -2755,21 +3063,20 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // Callback -#if HTS_ANALYSTE - if (hts_htmlcheck_receivehead != NULL) { - int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); + { + int test_head = RUN_CALLBACK6(opt, receivehead, + back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); if (test_head!=1) { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } - back[i].status=0; // FINI + back[i].status=STATUS_READY; // FINI back_set_finished(sback, i); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; strcpybuff(back[i].r.msg,"External wrapper aborted transfer"); back[i].r.statuscode = STATUSCODE_INVALID; } } -#endif // Free headers memory now // Actually, save them for informational purpose @@ -2786,17 +3093,17 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // Check response : 203 == 200 - if (back[i].r.statuscode==203) { // 'Non-Authoritative Information' - back[i].r.statuscode=200; // forcer "OK" - } else if (back[i].r.statuscode == 100) { - back[i].status=99; + if (back[i].r.statuscode==HTTP_NON_AUTHORITATIVE_INFORMATION) { + back[i].r.statuscode=HTTP_OK; // forcer "OK" + } else if (back[i].r.statuscode == HTTP_CONTINUE) { + back[i].status=STATUS_WAIT_HEADERS; back[i].r.size=0; back[i].r.totalsize=0; back[i].chunk_size=0; back[i].r.statuscode=STATUSCODE_INVALID; back[i].r.msg[0]='\0'; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush; } continue; } @@ -2810,22 +3117,22 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // -> // Content-Range: bytes */2830 if (back[i].range_req_size == back[i].r.crange) { - filenote(back[i].url_sav,NULL); - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); + filenote(&opt->state.strc,back[i].url_sav,NULL); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); back[i].r.size=back[i].r.totalsize=back[i].range_req_size; - back[i].r.statuscode=304; // NOT MODIFIED + back[i].r.statuscode=HTTP_NOT_MODIFIED; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } // transform 406 into 200 ; we'll catch embedded links inside the choice page if (back[i].r.statuscode==406) { // 'Not Acceptable' - back[i].r.statuscode=200; + back[i].r.statuscode=HTTP_OK; } // 'do not erase already downloaded file' @@ -2835,9 +3142,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (HTTP_IS_ERROR(back[i].r.statuscode) && back[i].is_update && !back[i].testmode) { if (back[i].url_sav[0] && fexist(back[i].url_sav)) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Error ignored %d (%s) because of 'no purge' option for %s%s"LF,back[i].r.statuscode,back[i].r.msg,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Error ignored %d (%s) because of 'no purge' option for %s%s"LF,back[i].r.statuscode,back[i].r.msg,back[i].url_adr,back[i].url_fil); test_flush; } - back[i].r.statuscode = 304; + back[i].r.statuscode = HTTP_NOT_MODIFIED; deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; } } @@ -2854,9 +3161,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // If the size is the same, and the option has been set, we assume // that the file is identical - and therefore let's break the connection if (back[i].is_update) { // mise à jour - if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' + if (back[i].r.statuscode==HTTP_OK && !back[i].testmode) { // 'OK' htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL,NULL); // lire entrée cache - if (r.statuscode == 200) { // OK pas d'erreur cache + if (r.statuscode == HTTP_OK) { // OK pas d'erreur cache LLint len1,len2; len1=r.totalsize; len2=back[i].r.totalsize; @@ -2864,16 +3171,16 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti len1=r.size; if (len1>0) { if (len1 == len2) { // tailles identiques - back[i].r.statuscode=304; // forcer NOT MODIFIED + back[i].r.statuscode=HTTP_NOT_MODIFIED; // forcer NOT MODIFIED deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (same size), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } } else { - if (opt->errlog!=NULL) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File seems complete (same size), but there was a cache read error: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File seems complete (same size), but there was a cache read error: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } if (r.adr) { @@ -2888,21 +3195,21 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // Detect already downloaded file (with another browser, for example) if (opt->sizehack) { if (!back[i].is_update) { // mise à jour - if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // not HTML + if (back[i].r.statuscode==HTTP_OK && !back[i].testmode) { // 'OK' + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // not HTML if (strnotempty(back[i].url_sav)) { // target found int size = fsize(back[i].url_sav); // target size if (size >= 0) { if (back[i].r.totalsize == size) { // same size! deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); back[i].r.size=back[i].r.totalsize; - filenote(back[i].url_sav,NULL); - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); - back[i].r.statuscode=304; // NOT MODIFIED + filenote(&opt->state.strc,back[i].url_sav,NULL); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); + back[i].r.statuscode=HTTP_NOT_MODIFIED; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } @@ -2924,23 +3231,23 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!back[i].testmode) { // pas mode test if (strnotempty(back[i].url_sav)) { if (strcmp(back[i].url_fil,"/robots.txt")) { - if (back[i].r.statuscode==200) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML - if (back[i].r.statuscode==200) { // "OK" + if (back[i].r.statuscode==HTTP_OK) { // 'OK' + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // pas HTML + if (back[i].r.statuscode==HTTP_OK) { // "OK" if (back[i].range_req_size>0) { // but Range: requested if (back[i].range_req_size == back[i].r.totalsize) { // And same size #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(skip_range): deletehttp\n"); #endif deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); back[i].r.size=back[i].r.totalsize; - filenote(back[i].url_sav,NULL); - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); - back[i].r.statuscode=304; // NOT MODIFIED + filenote(&opt->state.strc,back[i].url_sav,NULL); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); + back[i].r.statuscode=HTTP_NOT_MODIFIED; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } @@ -2965,7 +3272,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti /* Interdiction taille par le wizard? */ if (back[i].r.soc!=INVALID_SOCKET) { if (!back_checksize(opt,&back[i],1)) { - back[i].status=0; // FINI + back[i].status=STATUS_READY; // FINI back_set_finished(sback, i); back[i].r.statuscode=STATUSCODE_TOO_BIG; deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; @@ -2981,7 +3288,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // head: terminé if (back[i].head_request) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Tested file: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Tested file: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(head request): deletehttp\n"); @@ -2990,11 +3297,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!back[i].http11) { /* NO KA */ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; } - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); } // traiter une éventuelle erreur 304 (cache à jour utilisable) - else if (back[i].r.statuscode==304) { // document à jour dans le cache + else if (back[i].r.statuscode==HTTP_NOT_MODIFIED) { // document à jour dans le cache // lire dans le cache // ** NOTE: pas de vérif de la taille ici!! #if HTS_DEBUG_CLOSESOCK @@ -3015,24 +3322,24 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // then, force 'ok' status if (back[i].r.statuscode == STATUSCODE_INVALID) { if (fexist(back[i].url_sav)) { - back[i].r.statuscode=200; // OK + back[i].r.statuscode=HTTP_OK; // OK strcpybuff(back[i].r.msg, "OK (cached)"); back[i].r.is_file=1; back[i].r.totalsize = back[i].r.size = fsize(back[i].url_sav); - get_httptype(back[i].r.contenttype, back[i].url_sav, 1); + get_httptype(opt,back[i].r.contenttype, back[i].url_sav, 1); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } // Status is okay? if (back[i].r.statuscode!=-1) { // pas d'erreur de lecture - back[i].status=0; // OK prêt + back[i].status=STATUS_READY; // OK prêt back_set_finished(sback, i); back[i].r.notmodified=1; // NON modifié! if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } // finalize @@ -3047,7 +3354,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode); } else { // erreur - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); //printf("erreur cache\n"); @@ -3066,7 +3373,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti /*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/ back_maydeletehttp(opt, cache, sback, i); - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); // finalize if (back[i].r.statuscode>0) { @@ -3085,20 +3392,20 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // traiter 206 (partial content) // xxc SI CHUNK VERIFIER QUE CA MARCHE?? if (back[i].r.statuscode==206) { // on nous envoie un morceau (la fin) coz une partie sur disque! - LLint sz=fsize(back[i].url_sav); + off_t sz=fsize(back[i].url_sav); #if HDEBUG printf("partial content: "LLintP" on disk..\n",(LLint)sz); #endif if (sz>=0) { - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_sav)) { // pas HTML + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_sav)) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir** - filenote(back[i].url_sav,NULL); // noter fichier comme connu - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 1, back[i].r.notmodified); - back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append + filenote(&opt->state.strc,back[i].url_sav,NULL); // noter fichier comme connu + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 1, back[i].r.notmodified); + back[i].r.out=fopen(fconv(catbuff,back[i].url_sav),"ab"); // append if (back[i].r.out) { back[i].r.is_write=1; // écrire back[i].r.size=sz; // déja écrit - back[i].r.statuscode=200; // Forcer 'OK' + back[i].r.statuscode=HTTP_OK; // Forcer 'OK' if (back[i].r.totalsize>0) back[i].r.totalsize+=sz; // plus en fait fseek(back[i].r.out,0,SEEK_END); // à la fin @@ -3106,50 +3413,50 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("continue interrupted file\n"); #endif } else { // On est dans la m** - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not open partial file"); } } } else { // mémoire - FILE* fp=fopen(fconv(back[i].url_sav),"rb"); + FILE* fp=fopen(fconv(catbuff,back[i].url_sav),"rb"); if (fp) { LLint alloc_mem=sz + 1; if (back[i].r.totalsize>0) alloc_mem+=back[i].r.totalsize; // AJOUTER RESTANT! - if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) { + if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((size_t)alloc_mem)) ) { back[i].r.size=sz; if (back[i].r.totalsize>0) back[i].r.totalsize+=sz; // plus en fait - if (( fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) { - back[i].status=0; // terminé (voir plus loin) + if (( fread(back[i].r.adr,1,sz,fp)) != sz) { + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not read partial file"); } else { - back[i].r.statuscode=200; // Forcer 'OK' + back[i].r.statuscode=HTTP_OK; // Forcer 'OK' #if HDEBUG printf("continue in mem interrupted file\n"); #endif } } else { - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"No memory for partial file"); } fclose(fp); } else { // Argh.. - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not open partial file"); } } } else { // Non trouvé?? - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not find partial file"); } // Erreur? - if (back[i].status==0) { + if (back[i].status==STATUS_READY) { if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(206 solve problems): deletehttp\n"); @@ -3167,13 +3474,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (back[i].status!=0) { // non terminé (erreur) if (!back[i].testmode) { // fichier normal - if (back[i].r.empty /* ?? && back[i].r.statuscode==200 */) { // empty response + if (back[i].r.empty /* ?? && back[i].r.statuscode==HTTP_OK */) { // empty response // Couper connexion back_maydeletehttp(opt, cache, sback, i); /* KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; */ - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); - if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) { + if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct( 2)) ) { back[i].r.adr[0] = 0; } back_finalize(opt,cache,sback,i); @@ -3190,7 +3497,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti back[i].chunk_adr=NULL; back[i].chunk_size=0; back[i].chunk_blocksize=0; - back[i].status=98; // start body wait chunk + back[i].status=STATUS_CHUNK_WAIT; // start body wait chunk back[i].r.totalsize=0; /* devalidate size! (rfc) */ } if (back[i].rateout>0) { @@ -3200,13 +3507,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("(buffer) start body!\n"); #endif } else { // mode test, ne pas passer en 1!! - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(test ok): deletehttp\n"); #endif deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - if (back[i].r.statuscode==200) { + if (back[i].r.statuscode==HTTP_OK) { strcpybuff(back[i].r.msg,"Test: OK"); back[i].r.statuscode=STATUSCODE_TEST_OK; // test réussi } @@ -3248,8 +3555,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti //if (!opt->quiet) { // petite animation if (opt->verbosedisplay==1) { - if (back[i].status==0) { - if (back[i].r.statuscode==200) + if (back[i].status==STATUS_READY) { + if (back[i].r.statuscode==HTTP_OK) printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size); else printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size,back[i].r.statuscode); @@ -3281,18 +3588,18 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_TIMEOUT; - if (back[i].status==100) + if (back[i].status==STATUS_CONNECTING) strcpybuff(back[i].r.msg,"Connect Time Out"); - else if (back[i].status==101) + else if (back[i].status==STATUS_WAIT_DNS) strcpybuff(back[i].r.msg,"DNS Time Out"); else strcpybuff(back[i].r.msg,"Receive Time Out"); - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); } else if ((back[i].rateout>0) && (back[i].status<99)) { if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s if ( (int) ((back[i].r.size)/(act-back[i].rateout_time)) < back[i].rateout ) { // trop lent - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK @@ -3311,17 +3618,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } max_loop--; -#if HTS_ANALYSTE max_loop_chk++; -#endif } while((busy_state) && (busy_recv) && (max_loop>0)); -#if HTS_ANALYSTE if ((!busy_recv) && (!busy_state)) { if (max_loop_chk>=1) { Sleep(10); // un tite pause pour éviter les lag.. } } -#endif } int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { @@ -3338,7 +3641,7 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { } /* vérifier taille classique (heml et non html) */ - if ((istoobig(size_to_test,eback->maxfile_html,eback->maxfile_nonhtml,eback->r.contenttype))) { + if ((istoobig(opt,size_to_test,eback->maxfile_html,eback->maxfile_nonhtml,eback->r.contenttype))) { return 0; /* interdit */ } } @@ -3348,14 +3651,14 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { int back_checkmirror(httrackp* opt) { // Check max time if ((opt->maxsite>0) && (HTS_STAT.stat_bytes >= opt->maxsite)) { - if (opt->errlog) { - fprintf(opt->errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,(LLint)opt->maxsite); + if (opt->log) { + fprintf(opt->log,"More than "LLintP" bytes have been transfered.. giving up"LF,(LLint)opt->maxsite); test_flush; } return 0; } else if ((opt->maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt->maxtime)) { - if (opt->errlog) { - fprintf(opt->errlog,"More than %d seconds passed.. giving up"LF,opt->maxtime); + if (opt->log) { + fprintf(opt->log,"More than %d seconds passed.. giving up"LF,opt->maxtime); test_flush; } return 0; @@ -3372,16 +3675,20 @@ LLint back_transfered(LLint nb,struct_back* sback) { // ajouter octets en instance for(i=0;i0) && (back[i].status<99 || back[i].status>=1000)) - nb+=back[i].r.size; + nb += back[i].r.size; // stored (ready) slots if (sback->ready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); +#ifndef HTS_NO_BACK_ON_DISK + nb += sback->ready_size_bytes; +#else + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { lien_back* ritem = (lien_back*) item->value.ptr; if ((ritem->status>0) && (ritem->status<99 || ritem->status>=1000)) - nb+=ritem->r.size; + nb += ritem->r.size; } +#endif } return nb; } @@ -3410,36 +3717,27 @@ void back_infostr(struct_back* sback,int i,int j,char* s) { if (back[i].status>=0) { int aff=0; if (j & 1) { - if (back[i].status==100) { + if (back[i].status==STATUS_CONNECTING) { strcatbuff(s,"CONNECT "); - } else if (back[i].status==99) { + } else if (back[i].status==STATUS_WAIT_HEADERS) { strcatbuff(s,"INFOS "); aff=1; - } else if (back[i].status==98 || back[i].status==97) { + } else if (back[i].status==STATUS_CHUNK_WAIT || back[i].status==STATUS_CHUNK_CR) { strcatbuff(s,"INFOSC"); // infos chunk aff=1; } else if (back[i].status>0) { -#if HTS_ANALYSTE==2 - strcatbuff(s,"WAIT "); -#else strcatbuff(s,"RECEIVE "); -#endif aff=1; } } if (j & 2) { - if (back[i].status==0) { + if (back[i].status==STATUS_READY) { switch (back[i].r.statuscode) { case 200: strcatbuff(s,"READY "); aff=1; break; -#if HTS_ANALYSTE==2 - default: - strcatbuff(s,"ERROR "); - break; -#else case -1: strcatbuff(s,"ERROR "); aff=1; @@ -3475,7 +3773,6 @@ void back_infostr(struct_back* sback,int i,int j,char* s) { strcatbuff(s,s2); } aff=1; -#endif } } } diff --git a/src/htsback.h b/src/htsback.h index 90c36db..57751d1 100644 --- a/src/htsback.h +++ b/src/htsback.h @@ -39,19 +39,34 @@ Please visit our Website: http://www.httrack.com #define HTSBACK_DEFH #include "htsglobal.h" -#include "htsbasenet.h" -#include "htscore.h" - -typedef enum BackStatusCode { - STATUSCODE_INVALID = -1, - STATUSCODE_TIMEOUT = -2, - STATUSCODE_SLOW = -3, - STATUSCODE_CONNERROR = -4, - STATUSCODE_NON_FATAL = -5, - STATUSCODE_SSL_HANDSHAKE = -6, - STATUSCODE_TOO_BIG = -7, - STATUSCODE_TEST_OK = -10 -} BackStatusCode; + +#if HTS_XGETHOST +#if USE_BEGINTHREAD +#include "htsthread.h" +#endif +#endif + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE @@ -63,17 +78,17 @@ void back_free(struct_back** sback); // backing #define BACK_ADD_TEST "(dummy)" #define BACK_ADD_TEST2 "(dummy2)" -int back_index(struct_back* sback,char* adr,char* fil,char* sav); +int back_index(httrackp* opt, struct_back* sback,char* adr,char* fil,char* sav); int back_available(struct_back* sback); LLint back_incache(struct_back* sback); int back_done_incache(struct_back* sback); -HTS_INLINE int back_exist(struct_back* sback,char* adr,char* fil,char* sav); +HTS_INLINE int back_exist(struct_back* sback,httrackp* opt,char* adr,char* fil,char* sav); int back_nsoc(struct_back* sback); int back_nsoc_overall(struct_back* sback); -int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr); -int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr); +int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test); +int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test); int back_stack_available(struct_back* sback); -int back_search(httrackp* opt, cache_back* cache, struct_back* sback); +int back_search(httrackp* opt, struct_back* sback); int back_search_quick(struct_back* sback); void back_clean(httrackp* opt,cache_back* cache,struct_back* sback); int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback); @@ -83,10 +98,15 @@ int back_searchlive(httrackp* opt, struct_back* sback, char* search_addr); void back_connxfr(htsblk* src, htsblk* dst); void back_move(lien_back* src, lien_back* dst); void back_copy_static(const lien_back* src, lien_back* dst); +int back_serialize(FILE *fp, const lien_back* src); +int back_unserialize(FILE *fp, lien_back** dst); void back_set_finished(struct_back* sback, int p); +void back_set_locked(struct_back* sback, int p); +void back_set_unlocked(struct_back* sback, int p); int back_delete(httrackp* opt,cache_back* cache,struct_back* sback,int p); +void back_index_unlock(struct_back* sback, int p); +int back_clear_entry(lien_back* back); int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int p); -int back_set_passe2_ptr(httrackp* opt, cache_back* cache, struct_back* sback, int p, int* pass2_ptr); void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback); int back_maydelete(httrackp* opt, cache_back* cache, struct_back* sback, int p); void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, int p); @@ -97,15 +117,15 @@ void back_infostr(struct_back* sback,int i,int j,char* s); LLint back_transfered(LLint add,struct_back* sback); // hostback #if HTS_XGETHOST -void back_solve(lien_back* sback); -int host_wait(lien_back* sback); +void back_solve(httrackp *opt,lien_back* sback); +int host_wait(httrackp *opt, lien_back* sback); #endif int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize); int back_checkmirror(httrackp* opt); #if HTS_XGETHOST #if USE_BEGINTHREAD -PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p); +void Hostlookup(void* iadr_p); #endif #endif diff --git a/src/htsbase.h b/src/htsbase.h index 7faec95..e3aec57 100644 --- a/src/htsbase.h +++ b/src/htsbase.h @@ -43,6 +43,7 @@ extern "C" { #endif #include "htsglobal.h" +#include "htsstrings.h" #include #include @@ -50,7 +51,7 @@ extern "C" { #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_SYS_TYPES_H +#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) ) #include #endif #ifdef HAVE_SYS_STAT_H @@ -60,16 +61,25 @@ extern "C" { #include #endif -#ifndef _WIN32 #include -#endif -#if HTS_WIN +#ifdef _WIN32 #else #include #endif #include +/* GCC extension */ +#ifndef HTS_UNUSED +#ifdef __GNUC__ +#define HTS_UNUSED __attribute__ ((unused)) +#define HTS_STATIC static __attribute__ ((unused)) +#else +#define HTS_UNUSED +#define HTS_STATIC static +#endif +#endif + #undef min #undef max #define min(a,b) ((a)>(b)?(b):(a)) @@ -149,6 +159,35 @@ extern HTSEXT_API t_abortLog abortLog__; } \ } while(0) +/* logging */ +typedef enum { + LOG_DEBUG, + LOG_INFO, + LOG_WARNING, + LOG_ERROR, + LOG_PANIC +} HTS_LogType; +#define HTS_LOG(OPT,TYPE) do { \ + int last_errno = errno; \ + switch(TYPE) { \ + case LOG_DEBUG: \ + fspc(OPT,(OPT)->log, "debug"); \ + break; \ + case LOG_INFO: \ + fspc(OPT,(OPT)->log, "info"); \ + break; \ + case LOG_WARNING: \ + fspc(OPT,(OPT)->log, "warning"); \ + break; \ + case LOG_ERROR: \ + fspc(OPT,(OPT)->log, "error"); \ + break; \ + case LOG_PANIC: \ + fspc(OPT,(OPT)->log, "panic"); \ + break; \ + } \ + errno = last_errno; \ +} while(0) /* regular malloc's() */ #ifndef HTS_TRACE_MALLOC @@ -194,6 +233,7 @@ extern HTSEXT_API int htsMemoryFastXfr; /* */ +#define stringdup() #ifdef STRDEBUG diff --git a/src/htsbasenet.h b/src/htsbasenet.h index f2a6c53..26f1392 100644 --- a/src/htsbasenet.h +++ b/src/htsbasenet.h @@ -31,14 +31,14 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ /* File: Basic net definitions */ -/* Used in .c and .h files that needs T_SOC and so */ +/* Used in .c and .h files that needs hostent and so */ /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ #ifndef HTS_DEFBASENETH #define HTS_DEFBASENETH -#if HTS_WIN +#ifdef _WIN32 #if HTS_INET6==0 #include @@ -57,13 +57,11 @@ Please visit our Website: http://www.httrack.com #endif -typedef SOCKET T_SOC; - typedef struct hostent FAR t_hostent; +typedef struct hostent FAR t_hostent; #else #define HTS_USESCOPEID #define INVALID_SOCKET -1 - typedef int T_SOC; typedef struct hostent t_hostent; #endif @@ -178,4 +176,76 @@ extern SSL_CTX *openssl_ctx; #endif #endif +/** RFC2616 status-codes ('statuscode' member of htsblk) **/ +typedef enum HTTPStatusCode { + HTTP_CONTINUE = 100, + HTTP_SWITCHING_PROTOCOLS = 101, + HTTP_OK = 200, + HTTP_CREATED = 201, + HTTP_ACCEPTED = 202, + HTTP_NON_AUTHORITATIVE_INFORMATION = 203, + HTTP_NO_CONTENT = 204, + HTTP_RESET_CONTENT = 205, + HTTP_PARTIAL_CONTENT = 206, + HTTP_MULTIPLE_CHOICES = 300, + HTTP_MOVED_PERMANENTLY = 301, + HTTP_FOUND = 302, + HTTP_SEE_OTHER = 303, + HTTP_NOT_MODIFIED = 304, + HTTP_USE_PROXY = 305, + HTTP_TEMPORARY_REDIRECT = 307, + HTTP_BAD_REQUEST = 400, + HTTP_UNAUTHORIZED = 401, + HTTP_PAYMENT_REQUIRED = 402, + HTTP_FORBIDDEN = 403, + HTTP_NOT_FOUND = 404, + HTTP_METHOD_NOT_ALLOWED = 405, + HTTP_NOT_ACCEPTABLE = 406, + HTTP_PROXY_AUTHENTICATION_REQUIRED = 407, + HTTP_REQUEST_TIME_OUT = 408, + HTTP_CONFLICT = 409, + HTTP_GONE = 410, + HTTP_LENGTH_REQUIRED = 411, + HTTP_PRECONDITION_FAILED = 412, + HTTP_REQUEST_ENTITY_TOO_LARGE = 413, + HTTP_REQUEST_URI_TOO_LARGE = 414, + HTTP_UNSUPPORTED_MEDIA_TYPE = 415, + HTTP_REQUESTED_RANGE_NOT_SATISFIABLE = 416, + HTTP_EXPECTATION_FAILED = 417, + HTTP_INTERNAL_SERVER_ERROR = 500, + HTTP_NOT_IMPLEMENTED = 501, + HTTP_BAD_GATEWAY = 502, + HTTP_SERVICE_UNAVAILABLE = 503, + HTTP_GATEWAY_TIME_OUT = 504, + HTTP_HTTP_VERSION_NOT_SUPPORTED = 505 +} HTTPStatusCode; + +/** Internal HTTrack status-codes ('statuscode' member of htsblk) **/ +typedef enum BackStatusCode { + STATUSCODE_INVALID = -1, + STATUSCODE_TIMEOUT = -2, + STATUSCODE_SLOW = -3, + STATUSCODE_CONNERROR = -4, + STATUSCODE_NON_FATAL = -5, + STATUSCODE_SSL_HANDSHAKE = -6, + STATUSCODE_TOO_BIG = -7, + STATUSCODE_TEST_OK = -10 +} BackStatusCode; + +/** HTTrack status ('status' member of of 'lien_back') **/ +typedef enum HTTrackStatus { + STATUS_ALIVE = -103, + STATUS_FREE = -1, + STATUS_READY = 0, + STATUS_TRANSFER = 1, + STATUS_CHUNK_CR = 97, + STATUS_CHUNK_WAIT = 98, + STATUS_WAIT_HEADERS = 99, + STATUS_CONNECTING = 100, + STATUS_WAIT_DNS = 101, + STATUS_SSL_WAIT_HANDSHAKE = 102, + STATUS_FTP_TRANSFER = 1000, + STATUS_FTP_READY = 1001 +} HTTrackStatus; + #endif diff --git a/src/htsbauth.c b/src/htsbauth.c index cdc7f1c..df47d13 100644 --- a/src/htsbauth.c +++ b/src/htsbauth.c @@ -44,14 +44,13 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htslib.h" -#include "htsnostatic.h" - /* END specific definitions */ // gestion des cookie // ajoute, dans l'ordre // !=0 : erreur int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path) { + char buffer[8192]; char* a=cookie->data; char* insert; char cook[16384]; @@ -72,7 +71,7 @@ int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,ch insert=a; // insérer ici while (*a) { - if ( strlen(cookie_get(a,2)) < strlen(path) ) // long. path (le + long est prioritaire) + if ( strlen(cookie_get(buffer, a,2)) < strlen(path) ) // long. path (le + long est prioritaire) a=cookie->data+strlen(cookie->data); // fin else { a=strchr(a,'\n'); // prochain champ @@ -127,20 +126,21 @@ int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path) { // path est aligné à droite et cook_name peut être vide (chercher alors tout cookie) // .doubleclick.net TRUE / FALSE 1999999999 id A char* cookie_find(char* s,char* cook_name,char* domain,char* path) { + char buffer[8192]; char* a=s; while (*a) { int t; if (strnotempty(cook_name)==0) t=1; // accepter par défaut else - t=( strcmp(cookie_get(a,5),cook_name)==0 ); // tester si même nom + t=( strcmp(cookie_get(buffer, a,5),cook_name)==0 ); // tester si même nom if (t) { // même nom ou nom qualconque // - char* chk_dom=cookie_get(a,0); // domaine concerné par le cookie + char* chk_dom=cookie_get(buffer,a,0); // domaine concerné par le cookie if ((int) strlen(chk_dom) <= (int) strlen(domain)) { if ( strcmp(chk_dom,domain+strlen(domain)-strlen(chk_dom))==0 ) { // même domaine // - char* chk_path=cookie_get(a,2); // chemin concerné par le cookie + char* chk_path=cookie_get(buffer,a,2); // chemin concerné par le cookie if ((int) strlen(chk_path) <= (int) strlen(path)) { if (strncmp(path,chk_path,strlen(chk_path))==0 ) { // même chemin return a; @@ -169,11 +169,13 @@ char* cookie_nextfield(char* a) { // lire cookies.txt // lire également (Windows seulement) les *@*.txt (cookies IE copiés) // !=0 : erreur -int cookie_load(t_cookie* cookie,char* fpath,char* name) { +int cookie_load(t_cookie* cookie, const char* fpath, const char* name) { + char catbuff[CATBUFF_SIZE]; + char buffer[8192]; // cookie->data[0]='\0'; // Fusionner d'abord les éventuels cookies IE -#if HTS_WIN +#ifdef _WIN32 { WIN32_FIND_DATAA find; HANDLE h; @@ -185,7 +187,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { do { if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )) if (!(find.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM )) { - FILE* fp=fopen(fconcat(fpath,find.cFileName),"rb"); + FILE* fp=fopen(fconcat(catbuff, fpath, find.cFileName),"rb"); if (fp) { char cook_name[256]; char cook_value[1000]; @@ -223,7 +225,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { } fclose(fp); if (cookie_merged) - remove(fconcat(fpath,find.cFileName)); + remove(fconcat(catbuff,fpath,find.cFileName)); } // if fp } } while(FindNextFileA(h,&find)); @@ -234,7 +236,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { // Ensuite, cookies.txt { - FILE* fp = fopen(fconcat(fpath,name),"rb"); + FILE* fp = fopen(fconcat(catbuff, fpath, name),"rb"); if (fp) { char BIGSTK line[8192]; while( (!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) { @@ -246,10 +248,10 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { char path[256]; // chemin (/) char cook_name[256]; // nom cookie (MYCOOK) char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234) - strcpybuff(domain,cookie_get(line,0)); // host - strcpybuff(path,cookie_get(line,2)); // path - strcpybuff(cook_name,cookie_get(line,5)); // name - strcpybuff(cook_value,cookie_get(line,6)); // value + strcpybuff(domain,cookie_get(buffer,line,0)); // host + strcpybuff(path,cookie_get(buffer,line,2)); // path + strcpybuff(cook_name,cookie_get(buffer,line,5)); // name + strcpybuff(cook_value,cookie_get(buffer,line,6)); // value #if DEBUG_COOK printf("%s\n",line); #endif @@ -268,9 +270,10 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { // écrire cookies.txt // !=0 : erreur int cookie_save(t_cookie* cookie,char* name) { + char catbuff[CATBUFF_SIZE]; if (strnotempty(cookie->data)) { char BIGSTK line[8192]; - FILE* fp = fopen(fconv(name),"wb"); + FILE* fp = fopen(fconv(catbuff,name),"wb"); if (fp) { char* a=cookie->data; fprintf(fp,"# HTTrack Website Copier Cookie File"LF"# This file format is compatible with Netscape cookies"LF); @@ -318,11 +321,8 @@ void cookie_delete(char* s,int pos) { // renvoie champ param de la chaine cookie_base // ex: cookie_get("ceci estunexemple",1) renvoi "un" -char* cookie_get(char* cookie_base,int param) { - char* buffer; - // +char* cookie_get(char *buffer,char* cookie_base,int param) { char * limit; - NOSTATIC_RESERVE(buffer, char, 8192); while(*cookie_base=='\n') cookie_base++; limit = strchr(cookie_base,'\n'); @@ -359,10 +359,11 @@ char* cookie_get(char* cookie_base,int param) { /* déclarer un répertoire comme possédant une authentification propre */ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) { - if (cookie) { + char buffer[HTS_URLMAXSIZE*2]; + if (cookie) { if (!bauth_check(cookie,adr,fil)) { // n'existe pas déja bauth_chain* chain=&cookie->auth; - char* prefix=bauth_prefix(adr,fil); + char* prefix=bauth_prefix(buffer,adr,fil); /* fin de la chaine */ while(chain->next) chain=chain->next; @@ -382,9 +383,10 @@ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) { /* tester adr et fil, et retourner authentification si nécessaire */ /* sinon, retourne NULL */ char* bauth_check(t_cookie* cookie,char* adr,char* fil) { + char buffer[HTS_URLMAXSIZE*2]; if (cookie) { bauth_chain* chain=&cookie->auth; - char* prefix=bauth_prefix(adr,fil); + char* prefix=bauth_prefix(buffer,adr,fil); while(chain) { if (strnotempty(chain->prefix)) { if (strncmp(prefix,chain->prefix,strlen(chain->prefix))==0) { @@ -397,10 +399,8 @@ char* bauth_check(t_cookie* cookie,char* adr,char* fil) { return NULL; } -char* bauth_prefix(char* adr,char* fil) { - char* prefix; +char* bauth_prefix(char *prefix,char* adr,char* fil) { char* a; - NOSTATIC_RESERVE(prefix, char, HTS_URLMAXSIZE*2); strcpybuff(prefix,jump_identification(adr)); strcatbuff(prefix,fil); a=strchr(prefix,'?'); diff --git a/src/htsbauth.h b/src/htsbauth.h index 4066ece..2f585da 100644 --- a/src/htsbauth.h +++ b/src/htsbauth.h @@ -40,19 +40,27 @@ Please visit our Website: http://www.httrack.com #define HTSBAUTH_DEFH // robots wizard -typedef struct bauth_chain { +#ifndef HTS_DEF_FWSTRUCT_bauth_chain +#define HTS_DEF_FWSTRUCT_bauth_chain +typedef struct bauth_chain bauth_chain; +#endif +struct bauth_chain { char prefix[1024]; /* www.foo.com/secure/ */ char auth[1024]; /* base-64 encoded user:pass */ struct bauth_chain* next; /* next element */ -} bauth_chain; +}; // buffer pour les cookies et authentification -typedef struct t_cookie { +#ifndef HTS_DEF_FWSTRUCT_t_cookie +#define HTS_DEF_FWSTRUCT_t_cookie +typedef struct t_cookie t_cookie; +#endif +struct t_cookie { int max_len; char data[32768]; bauth_chain auth; -} t_cookie; +}; /* Library internal definictions */ @@ -61,18 +69,18 @@ typedef struct t_cookie { // cookies int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path); int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path); -int cookie_load(t_cookie* cookie,char* path,char* name); +int cookie_load(t_cookie* cookie, const char* path, const char* name); int cookie_save(t_cookie* cookie,char* name); void cookie_insert(char* s,char* ins); void cookie_delete(char* s,int pos); -char* cookie_get(char* cookie_base,int param); +char* cookie_get(char *buffer, char* cookie_base,int param); char* cookie_find(char* s,char* cook_name,char* domain,char* path); char* cookie_nextfield(char* a); // basic auth int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth); char* bauth_check(t_cookie* cookie,char* adr,char* fil); -char* bauth_prefix(char* adr,char* fil); +char* bauth_prefix(char *buffer, char* adr,char* fil); #endif diff --git a/src/htscache.c b/src/htscache.c index de2273c..98ba328 100644 --- a/src/htscache.c +++ b/src/htscache.c @@ -41,19 +41,16 @@ Please visit our Website: http://www.httrack.com #include "htscache.h" /* specific definitions */ -#include "htsbase.h" +#include "htscore.h" #include "htsbasenet.h" #include "htsmd5.h" #include #include "htszlib.h" - - -#include "htsnostatic.h" /* END specific definitions */ #undef test_flush -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // routines de mise en cache @@ -113,9 +110,9 @@ with // Nouveau: si != text/html ne stocke que la taille -void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save) { +void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,const char* url_adr,const char* url_fil,const char* url_save) { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File checked by cache: %s"LF,url_adr); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File checked by cache: %s"LF,url_adr); } // ---stockage en cache--- // stocker dans le cache? @@ -124,7 +121,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // ensure not a temporary filename (should not happend ?!) if (IS_DELAYED_EXT(url_save)) { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log, "aborted cache validation: %s%s still has temporary name %s"LF, url_adr, url_fil, url_save); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log, "aborted cache validation: %s%s still has temporary name %s"LF, url_adr, url_fil, url_save); } return ; } @@ -136,7 +133,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* r->statuscode > 0 #else /* We don't store 5XX errors, because it might be a server problem */ - (r->statuscode==200) /* stocker réponse standard, plus */ + (r->statuscode==HTTP_OK) /* stocker réponse standard, plus */ || (r->statuscode==204) /* no content */ || HTTP_IS_REDIRECT(r->statuscode) /* redirect */ || (r->statuscode==401) /* authorization */ @@ -150,14 +147,14 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // stocker fichiers (et robots.txt) if ( url_save == NULL || (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) { // ajouter le fichier au cache - cache_add(cache,*r,url_adr,url_fil,url_save,opt->all_in_cache,opt->path_html); + cache_add(opt,cache,r,url_adr,url_fil,url_save,opt->all_in_cache,StringBuff(opt->path_html)); // // store a reference NOT to redo the same test zillions of times! // (problem reported by Lars Clausen) // we just store statuscode + location (if any) if (url_save == NULL && r->statuscode / 100 >= 3) { // cached "fast" header doesn't uet exists - if (inthash_read((inthash)cache->cached_tests, concat(url_adr, url_fil), NULL) == 0) { + if (inthash_read(cache->cached_tests, concat(OPT_GET_BUFF(opt), url_adr, url_fil), NULL) == 0) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; sprintf(tempo, "%d", (int)r->statuscode); if (r->location != NULL && r->location[0] != '\0') { @@ -165,9 +162,9 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* strcatbuff(tempo, r->location); } if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode); } - inthash_add((inthash)cache->cached_tests, concat(url_adr, url_fil), (long int)strdupt(tempo)); + inthash_add(cache->cached_tests, concat(OPT_GET_BUFF(opt), url_adr, url_fil), (intptr_t)strdupt(tempo)); } } } @@ -178,11 +175,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // ---fin stockage en cache--- } - - -#if 01 - -/* test only - to be removed */ +#if 1 #define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \ if ( (value != NULL) && (value)[0] != '\0') { \ @@ -224,15 +217,16 @@ struct cache_back_zip_entry { /* Ajout d'un fichier en cache */ -void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache,char* path_prefix) { +void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,const char* url_adr,const char* url_fil,const char* url_save,int all_in_cache,const char* path_prefix) { char BIGSTK filename[HTS_URLMAXSIZE*4]; + char catbuff[CATBUFF_SIZE]; int dataincache=0; // put data in cache ? char BIGSTK headers[8192]; int headersSize = 0; int entryBodySize = 0; int entryFilenameSize = 0; zip_fileinfo fi; - char* url_save_suffix = url_save; + const char* url_save_suffix = url_save; int zErr; // robots.txt hack @@ -248,8 +242,8 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ } /* Data in cache ? */ - if (is_hypertext_mime(r.contenttype, url_fil) - || (may_be_hypertext_mime(r.contenttype, url_fil) && r.adr != NULL) + if (is_hypertext_mime(opt,r->contenttype, url_fil) + || (may_be_hypertext_mime(opt,r->contenttype, url_fil) && r->adr != NULL) ) { dataincache=1; @@ -258,13 +252,13 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ } } - if (r.size < 0) // error + if (r->size < 0) // error return; // data in cache if (dataincache) { - assertf(((int) r.size) == r.size); - entryBodySize = (int) r.size; + assertf(((int) r->size) == r->size); + entryBodySize = (int) r->size; } /* Fields */ @@ -272,14 +266,14 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ headersSize = 0; /* */ { - char* message; - if (strlen(r.msg) < 32) { - message = r.msg; + const char* message; + if (strlen(r->msg) < 32) { + message = r->msg; } else { message = "(See X-StatusMessage)"; } /* 64 characters MAX for first line */ - sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r.statuscode, r.msg); + sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r->statuscode, r->msg); } headersSize += (int) strlen(headers + headersSize); @@ -292,15 +286,15 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /* Second line MUST ALWAYS be X-In-Cache */ ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", dataincache); - ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r.statuscode); - ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r.msg); - ZIP_FIELD_INT(headers, headersSize, "X-Size", r.size); // size - ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r.contenttype); // contenttype - ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r.charset); // contenttype - ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r.lastmodified); // last-modified - ZIP_FIELD_STRING(headers, headersSize, "Etag", r.etag); // Etag - ZIP_FIELD_STRING(headers, headersSize, "Location", r.location); // 'location' pour moved - ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r.cdispo); // Content-disposition + ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r->statuscode); + ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r->msg); + ZIP_FIELD_INT(headers, headersSize, "X-Size", r->size); // size + ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r->contenttype); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r->charset); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r->lastmodified); // last-modified + ZIP_FIELD_STRING(headers, headersSize, "Etag", r->etag); // Etag + ZIP_FIELD_STRING(headers, headersSize, "Location", r->location); // 'location' pour moved + ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r->cdispo); // Content-disposition ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename ZIP_FIELD_STRING(headers, headersSize, "X-Save", url_save_suffix); // Original save filename @@ -318,8 +312,9 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /* Time */ memset(&fi, 0, sizeof(fi)); - if (r.lastmodified[0] != '\0') { - struct tm* tm_s=convert_time_rfc822(r.lastmodified); + if (r->lastmodified[0] != '\0') { + struct tm buffer; + struct tm* tm_s=convert_time_rfc822(&buffer, r->lastmodified); if (tm_s) { fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec; fi.tmz_date.tm_min = (uInt) tm_s->tm_min; @@ -352,22 +347,22 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /* Write data in cache */ if (dataincache) { - if (r.is_write == 0) { - if (r.size > 0 && r.adr != NULL) { - if ((zErr = zipWriteInFileInZip((zipFile) cache->zipOutput, r.adr, (int) r.size)) != Z_OK) { + if (r->is_write == 0) { + if (r->size > 0 && r->adr != NULL) { + if ((zErr = zipWriteInFileInZip((zipFile) cache->zipOutput, r->adr, (int) r->size)) != Z_OK) { int zip_zipWriteInFileInZip_failed = 0; assertf(zip_zipWriteInFileInZip_failed); } } } else { FILE* fp; - // On recopie le fichier.. - LLint file_size=fsize(fconv(url_save)); + // On recopie le fichier->. + off_t file_size=fsize(fconv(catbuff, url_save)); if (file_size>=0) { - fp=fopen(fconv(url_save),"rb"); + fp=fopen(fconv(catbuff, url_save),"rb"); if (fp!=NULL) { char BIGSTK buff[32768]; - INTsys nl; + size_t nl; do { nl=fread(buff,1,32768,fp); if (nl>0) { @@ -401,7 +396,7 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ #else /* Ajout d'un fichier en cache */ -void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) { +void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) { int pos; char s[256]; char BIGSTK buff[HTS_URLMAXSIZE*4]; @@ -422,20 +417,20 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ return; // erreur (sauf robots.txt) } - if (r.size <= 0) // taille <= 0 + if (r->size <= 0) // taille <= 0 return; // refusé.. // Mettre les *donées* en cache ? - if (is_hypertext_mime(r.contenttype, url_fil)) // html, mise en cache des données et + if (is_hypertext_mime(opt,r->contenttype, url_fil)) // html, mise en cache des données et dataincache=1; // pas uniquement de l'en tête else if (all_in_cache) dataincache=1; // forcer tout en cache /* calcul md5 ? */ /* - if (is_hypertext_mime(r.contenttype)) { // html, calcul MD5 - if (r.adr) { - domd5mem(r.adr,r.size,digest,1); + if (is_hypertext_mime(opt,r->contenttype)) { // html, calcul MD5 + if (r->adr) { + domd5mem(r->adr,r->size,digest,1); } }*/ @@ -451,24 +446,24 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ // data // écrire données en-tête, données fichier /*if (!dataincache) { // patcher - r.size=-r.size; // négatif + r->size=-r->size; // négatif }*/ // Construction header ok=0; - if (cache_wint(cache_dat,r.statuscode) != -1 // statuscode - && cache_wLLint(cache_dat,r.size) != -1 // size - && cache_wstr(cache_dat,r.msg) != -1 // msg - && cache_wstr(cache_dat,r.contenttype) != -1 // contenttype - && cache_wstr(cache_dat,r.charset) != -1 // contenttype - && cache_wstr(cache_dat,r.lastmodified) != -1 // last-modified - && cache_wstr(cache_dat,r.etag) != -1 // Etag - && cache_wstr(cache_dat,(r.location!=NULL)?r.location:"") != -1 // 'location' pour moved - && cache_wstr(cache_dat,r.cdispo) != -1 // Content-disposition + if (cache_wint(cache_dat,r->statuscode) != -1 // statuscode + && cache_wLLint(cache_dat,r->size) != -1 // size + && cache_wstr(cache_dat,r->msg) != -1 // msg + && cache_wstr(cache_dat,r->contenttype) != -1 // contenttype + && cache_wstr(cache_dat,r->charset) != -1 // contenttype + && cache_wstr(cache_dat,r->lastmodified) != -1 // last-modified + && cache_wstr(cache_dat,r->etag) != -1 // Etag + && cache_wstr(cache_dat,(r->location!=NULL)?r->location:"") != -1 // 'location' pour moved + && cache_wstr(cache_dat,r->cdispo) != -1 // Content-disposition && cache_wstr(cache_dat,url_adr) != -1 // Original address && cache_wstr(cache_dat,url_fil) != -1 // Original URI filename && cache_wstr(cache_dat,url_save) != -1 // Original save filename - && cache_wstr(cache_dat,r.headers) != -1 // Full HTTP Headers + && cache_wstr(cache_dat,r->headers) != -1 // Full HTTP Headers && cache_wstr(cache_dat,"HTS") != -1 // end of header ) { ok=1; /* ok */ @@ -478,13 +473,13 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /*if ((int) fwrite((char*) &r,1,sizeof(htsblk),cache_dat) == sizeof(htsblk)) {*/ if (ok) { if (dataincache) { // mise en cache? - if (!r.adr) { /* taille nulle (parfois en cas de 301 */ + if (!r->adr) { /* taille nulle (parfois en cas de 301 */ if (cache_wLLint(cache_dat,0)==-1) /* 0 bytes */ ok=0; - } else if (r.is_write==0) { // en mémoire, recopie directe - if (cache_wLLint(cache_dat,r.size)!=-1) { - if (r.size>0) { // taille>0 - if (fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size) + } else if (r->is_write==0) { // en mémoire, recopie directe + if (cache_wLLint(cache_dat,r->size)!=-1) { + if (r->size>0) { // taille>0 + if (fwrite(r->adr,1,r->size,cache_dat)!=r->size) ok=0; } else // taille=0, ne rien écrire ok=0; @@ -492,18 +487,18 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ ok=0; } else { // recopier fichier dans cache FILE* fp; - // On recopie le fichier.. - LLint file_size=fsize(fconv(url_save)); + // On recopie le fichier->. + off_t file_size=fsize(fconv(catbuff, url_save)); if (file_size>=0) { if (cache_wLLint(cache_dat,file_size)!=-1) { - fp=fopen(fconv(url_save),"rb"); + fp=fopen(fconv(catbuff, url_save),"rb"); if (fp!=NULL) { char BIGSTK buff[32768]; - INTsys nl; + ssize_t nl; do { nl=fread(buff,1,32768,fp); if (nl>0) { - if ((INTsys)fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur + if (fwrite(buff,1,nl,cache_dat)!=nl) { // erreur nl=-1; ok=0; } @@ -520,7 +515,7 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ } } else ok=0; /*if (!dataincache) { // dépatcher - r.size=-r.size; + r->size=-r->size; }*/ // index @@ -538,23 +533,23 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ #endif -htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { +htsblk cache_read(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location) { return cache_readex(opt,cache,adr,fil,save,location,NULL,0); } -htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { +htsblk cache_read_ro(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location) { return cache_readex(opt,cache,adr,fil,save,location,NULL,1); } -static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_old(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly); -static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly); // lecture d'un fichier dans le cache // si save==null alors test unqiquement -htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +htsblk cache_readex(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly) { if (cache->zipInput != NULL) { return cache_readex_new(opt, cache, adr, fil, save, location, return_save, readonly); @@ -565,13 +560,14 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa // lecture d'un fichier dans le cache // si save==null alors test unqiquement -static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly) { char BIGSTK location_default[HTS_URLMAXSIZE*2]; char BIGSTK buff[HTS_URLMAXSIZE*2]; char BIGSTK previous_save[HTS_URLMAXSIZE*2]; char BIGSTK previous_save_[HTS_URLMAXSIZE*2]; - long int hash_pos; + char catbuff[CATBUFF_SIZE]; + intptr_t hash_pos; int hash_pos_return; htsblk r; memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; @@ -586,7 +582,7 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.location, ""); strcpybuff(buff, adr); strcatbuff(buff,fil); - hash_pos_return = inthash_read((inthash)cache->hashtable, buff, (long int*)&hash_pos); + hash_pos_return = inthash_read(cache->hashtable, buff, &hash_pos); /* avoid errors on data entries */ if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') { #if HTS_FAST_CACHE @@ -656,9 +652,9 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f /* Previous entry */ if (previous_save_[0] != '\0') { - int pathLen = (int) strlen(opt->path_html); - if (pathLen != 0 && strncmp(previous_save_, opt->path_html, pathLen) != 0) { // old (<3.40) buggy format - sprintf(previous_save, "%s%s", opt->path_html, previous_save_); + int pathLen = (int) strlen(StringBuff(opt->path_html)); + if (pathLen != 0 && strncmp(previous_save_, StringBuff(opt->path_html), pathLen) != 0) { // old (<3.40) buggy format + sprintf(previous_save, "%s%s", StringBuff(opt->path_html), previous_save_); } else { strcpy(previous_save, previous_save_); } @@ -684,28 +680,28 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.msg,"Cache Read Error : Unexpected error"); } } - else if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement + else if (!readonly && r.statuscode==HTTP_OK && !is_hypertext_mime(opt,r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement r.is_write=1; // écrire if (!dataincache) { - if (fexist(fconv(save))) { // un fichier existe déja + if (fexist(fconv(catbuff, save))) { // un fichier existe déja //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) ok=1; // plus rien à faire - filenote(save,NULL); // noter comme connu - file_notify(adr, fil, save, 0, 0, 1); // data in cache + filenote(&opt->state.strc,save,NULL); // noter comme connu + file_notify(opt,adr, fil, save, 0, 0, 1); // data in cache } } if (!dataincache && !ok) { // Pas de donnée en cache et fichier introuvable : erreur! if (opt->norecatch) { - file_notify(adr, fil, save, 1, 0, 0); - filecreateempty(save); + file_notify(opt,adr, fil, save, 1, 0, 0); + filecreateempty(&opt->state.strc, save); // r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"File deleted by user not recaught"); ok=1; // ne pas récupérer (et pas d'erreur) } else { - file_notify(adr, fil, save, 1, 1, 0); + file_notify(opt,adr, fil, save, 1, 1, 0); r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"Previous cache file not found"); ok=1; // ne pas récupérer @@ -713,8 +709,8 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f } if (!ok) { // load from cache - file_notify(adr, fil, save, 1, 1, 1); // data in cache - r.out=filecreate(save); + file_notify(opt,adr, fil, save, 1, 1, 1); // data in cache + r.out=filecreate(&opt->state.strc, save); #if HDEBUG printf("direct-disk: %s\n",save); #endif @@ -722,14 +718,15 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f char BIGSTK buff[32768+4]; LLint size = r.size; if (size > 0) { - INTsys nl; + size_t nl; do { nl = unzReadCurrentFile((unzFile) cache->zipInput, buff, (int)minimum(size, 32768)); if (nl>0) { size-=nl; - if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur + if (fwrite(buff,1,nl,r.out)!=nl) { // erreur + int last_errno = errno; r.statuscode=STATUSCODE_INVALID; - sprintf(r.msg,"Cache Read Error : Read To Disk: %s", strerror(errno)); + sprintf(r.msg,"Cache Read Error : Read To Disk: %s", strerror(last_errno)); } } } while((nl>0) && (size>0) && (r.statuscode!=-1)); @@ -737,7 +734,7 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f fclose(r.out); r.out=NULL; -#if HTS_WIN==0 +#ifndef _WIN32 chmod(save,HTS_ACCESS_FILE); #endif } else { @@ -757,13 +754,14 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.msg,"Previous cache file not found (2)"); } else { /* Read in memory from cache */ if (strnotempty(previous_save) && fexist(previous_save)) { - FILE* fp = fopen(fconv(previous_save), "rb"); + FILE* fp = fopen(fconv(catbuff, previous_save), "rb"); if (fp != NULL) { - r.adr=(char*) malloct((INTsys)r.size + 4); + r.adr = (char*) malloct((int) r.size + 4); if (r.adr != NULL) { - if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) { + if (r.size > 0 && fread(r.adr, 1, (int) r.size, fp) != r.size) { + int last_errno = errno; r.statuscode=STATUSCODE_INVALID; - sprintf(r.msg,"Read error in cache disk data: %s", strerror(errno)); + sprintf(r.msg,"Read error in cache disk data: %s", strerror(last_errno)); } } else { r.statuscode=STATUSCODE_INVALID; @@ -778,9 +776,9 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f } } else { // lire fichier (d'un coup) - r.adr=(char*) malloct((INTsys)r.size+4); + r.adr = (char*) malloct((int) r.size+4); if (r.adr!=NULL) { - if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (INTsys)r.size) != r.size) { // erreur + if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (int) r.size) != r.size) { // erreur freet(r.adr); r.adr=NULL; r.statuscode=STATUSCODE_INVALID; @@ -824,10 +822,10 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f // lecture d'un fichier dans le cache // si save==null alors test unqiquement -static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_old(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly) { #if HTS_FAST_CACHE - long int hash_pos; + intptr_t hash_pos; int hash_pos_return; #else char* a; @@ -835,6 +833,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f char BIGSTK buff[HTS_URLMAXSIZE*2]; char BIGSTK location_default[HTS_URLMAXSIZE*2]; char BIGSTK previous_save[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; htsblk r; int ok=0; int header_only=0; @@ -848,7 +847,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.location, ""); #if HTS_FAST_CACHE strcpybuff(buff,adr); strcatbuff(buff,fil); - hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); + hash_pos_return=inthash_read(cache->hashtable,buff,&hash_pos); #else buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n"); if (cache->use) @@ -872,13 +871,13 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f #else if (a!=NULL) { // OK existe en cache! #endif - INTsys pos; + intptr_t pos; #if DEBUGCA fprintf(stdout,"..cache: %s%s at ",adr,fil); #endif #if HTS_FAST_CACHE - pos=hash_pos; /* simply */ + pos = hash_pos; /* simply */ #else a+=strlen(buff); sscanf(a,"%d",&pos); // lire position @@ -888,7 +887,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f #endif fflush(cache->olddat); - if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) { + if (fseek(cache->olddat,(long)((pos>0)?pos:(-pos)),SEEK_SET) == 0) { /* Importer cache1.0 */ if (cache->version==0) { OLD_htsblk old_r; @@ -938,7 +937,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f if (size_read>0) { /* si inscrite ici */ r.size=size_read; } else { /* pas de données directement dans le cache, fichier présent? */ - if (r.statuscode!=200) + if (r.statuscode!=HTTP_OK) header_only=1; /* que l'en tête ici! */ } } @@ -966,22 +965,22 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f #if HTS_DIRECTDISK // Court-circuit: // Peut-on stocker le fichier directement sur disque? - if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement + if (!readonly && r.statuscode==HTTP_OK && !is_hypertext_mime(opt,r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement int ok=0; r.is_write=1; // écrire - if (fexist(fconv(save))) { // un fichier existe déja + if (fexist(fconv(catbuff, save))) { // un fichier existe déja //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) ok=1; // plus rien à faire - filenote(save,NULL); // noter comme connu - file_notify(adr, fil, save, 0, 0, 0); + filenote(&opt->state.strc,save,NULL); // noter comme connu + file_notify(opt,adr, fil, save, 0, 0, 0); //} } if ((pos<0) && (!ok)) { // Pas de donnée en cache et fichier introuvable : erreur! if (opt->norecatch) { - file_notify(adr, fil, save, 1, 0, 0); - filecreateempty(save); + file_notify(opt,adr, fil, save, 1, 0, 0); + filecreateempty(&opt->state.strc, save); // r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"File deleted by user not recaught"); @@ -994,20 +993,20 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f } if (!ok) { - r.out=filecreate(save); + r.out=filecreate(&opt->state.strc, save); #if HDEBUG printf("direct-disk: %s\n",save); #endif if (r.out!=NULL) { char BIGSTK buff[32768+4]; - LLint size = r.size; + size_t size = (size_t) r.size; if (size > 0) { - INTsys nl; + size_t nl; do { - nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat); + nl=fread(buff,1,minimum(size,32768),cache->olddat); if (nl>0) { size-=nl; - if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur + if (fwrite(buff,1,nl,r.out)!=nl) { // erreur r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"Cache Read Error : Read To Disk"); } @@ -1017,7 +1016,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f fclose(r.out); r.out=NULL; -#if HTS_WIN==0 +#ifndef _WIN32 chmod(save,HTS_ACCESS_FILE); #endif } else { @@ -1037,11 +1036,11 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.msg,"Previous cache file not found (2)"); } else { /* Read in memory from cache */ if (strnotempty(return_save) && fexist(return_save)) { - FILE* fp = fopen(fconv(return_save), "rb"); + FILE* fp = fopen(fconv(catbuff, return_save), "rb"); if (fp != NULL) { - r.adr=(char*) malloct((INTsys)r.size + 4); + r.adr = (char*) malloct((size_t)r.size + 4); if (r.adr != NULL) { - if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) { + if (r.size > 0 && fread(r.adr, 1, (size_t)r.size, fp) != r.size) { r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"Read error in cache disk data"); } @@ -1058,9 +1057,9 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f } } else { // lire fichier (d'un coup) - r.adr=(char*) malloct((INTsys)r.size+4); + r.adr=(char*) malloct((size_t)r.size+4); if (r.adr!=NULL) { - if (fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur + if (fread(r.adr,1,(size_t)r.size,cache->olddat)!=r.size) { // erreur freet(r.adr); r.adr=NULL; r.statuscode=STATUSCODE_INVALID; @@ -1111,7 +1110,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f /* write (string1-string2)-data in cache */ /* 0 if failed */ -int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) { +int cache_writedata(FILE* cache_ndx,FILE* cache_dat,const char* str1,const char* str2,char* outbuff,int len) { if (cache_dat) { char BIGSTK buff[HTS_URLMAXSIZE*4]; char s[256]; @@ -1120,12 +1119,12 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* pos=ftell(cache_dat); /* first write data */ if (cache_wint(cache_dat,len)!=-1) { // length - if ((INTsys)fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data + if (fwrite(outbuff,1,len,cache_dat) == len) { // data /* then write index */ sprintf(s,"%d\n",pos); buff[0]='\0'; strcatbuff(buff,str1); strcatbuff(buff,"\n"); strcatbuff(buff,str2); strcatbuff(buff,"\n"); cache_wstr(cache_ndx,buff); - if (fwrite(s,1,(INTsys)strlen(s),cache_ndx) == strlen(s)) { + if (fwrite(s,1,strlen(s),cache_ndx) == strlen(s)) { fflush(cache_dat); fflush(cache_ndx); return 1; } @@ -1137,20 +1136,20 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* /* read the data corresponding to (string1-string2) in cache */ /* 0 if failed */ -int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) { +int cache_readdata(cache_back* cache,const char* str1,const char* str2,char** inbuff,int* inlen) { #if HTS_FAST_CACHE if (cache->hashtable) { char BIGSTK buff[HTS_URLMAXSIZE*4]; - long int pos; + intptr_t pos; strcpybuff(buff,str1); strcatbuff(buff,str2); - if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) { - if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) { + if (inthash_read(cache->hashtable,buff,&pos)) { + if (fseek(cache->olddat,(long)((pos>0)?pos:(-pos)),SEEK_SET) == 0) { INTsys len; cache_rint(cache->olddat,&len); if (len>0) { char* mem_buff=(char*)malloct(len+4); /* Plus byte 0 */ if (mem_buff) { - if ((INTsys)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/ + if (fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/ *inbuff=mem_buff; *inlen=len; return 1; @@ -1169,7 +1168,7 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* in // renvoyer uniquement en tête, ou NULL si erreur // return NULL upon error, and set -1 to r.statuscode -htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r) { +htsblk* cache_header(httrackp* opt,cache_back* cache,const char* adr,const char* fil,htsblk* r) { *r=cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement if (r->statuscode != -1) return r; @@ -1187,52 +1186,52 @@ void cache_init(cache_back* cache,httrackp* opt) { printf("cache init: "); #endif if (!cache->ro) { -#if HTS_WIN - mkdir(fconcat(opt->path_log,"hts-cache")); +#ifdef _WIN32 + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache")); #else - mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER); + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache"),HTS_PROTECT_FOLDER); #endif - if ((fexist(fconcat(opt->path_log,"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer + if ((fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer /* Previous cache from the previous cache version */ #if 0 /* No.. reuse with old httrack releases! */ - if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) - remove(fconcat(opt->path_log,"hts-cache/old.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/old.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); #endif /* Previous cache version */ - if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer - rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); - rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); + if ((fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); } /* Remove OLD cache */ - if (fexist(fconcat(opt->path_log,"hts-cache/old.zip"))) - remove(fconcat(opt->path_log,"hts-cache/old.zip")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); /* Rename */ - rename(fconcat(opt->path_log,"hts-cache/new.zip"),fconcat(opt->path_log,"hts-cache/old.zip")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); } - else if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + else if ((fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer #if DEBUGCA printf("work with former cache\n"); #endif - if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) - remove(fconcat(opt->path_log,"hts-cache/old.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/old.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); - rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); - rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel #if DEBUGCA printf("new cache\n"); #endif - if (fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) - remove(fconcat(opt->path_log,"hts-cache/new.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/new.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); } } @@ -1240,19 +1239,19 @@ void cache_init(cache_back* cache,httrackp* opt) { if ( ( !cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/old.zip")) > 0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) > 0 ) || ( cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/new.zip")) > 0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) > 0 ) ) { if (!cache->ro) { - cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/old.zip")); + cache->zipInput = unzOpen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); } else { - cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/new.zip")); + cache->zipInput = unzOpen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); } // Corrupted ZIP file ? Try to repair! @@ -1261,30 +1260,30 @@ void cache_init(cache_back* cache,httrackp* opt) { uLong repaired = 0; uLong repairedBytes = 0; if (!cache->ro) { - name = fconcat(opt->path_log,"hts-cache/old.zip"); + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"); } else { - name = fconcat(opt->path_log,"hts-cache/new.zip"); + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"); } if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF); fflush(opt->log); } if (unzRepair(name, - fconcat(opt->path_log,"hts-cache/repair.zip"), - fconcat(opt->path_log,"hts-cache/repair.tmp"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.tmp"), &repaired, &repairedBytes ) == Z_OK) { unlink(name); - rename(fconcat(opt->path_log,"hts-cache/repair.zip"), name); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), name); cache->zipInput = unzOpen(name); if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF, + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF, (int) repairedBytes, (int) repaired); fflush(opt->log); } } else { if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: could not repair the cache"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: could not repair the cache"LF); fflush(opt->log); } } @@ -1338,28 +1337,28 @@ void cache_init(cache_back* cache,httrackp* opt) { } } if (dataincache) - inthash_add((inthash)cache->hashtable, filenameIndex, pos); + inthash_add(cache->hashtable, filenameIndex, pos); else - inthash_add((inthash)cache->hashtable, filenameIndex, -pos); + inthash_add(cache->hashtable, filenameIndex, -pos); } else { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries); } } } else { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); } } unzCloseCurrentFile((unzFile) cache->zipInput); } else { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); } } } while( unzGoToNextFile((unzFile) cache->zipInput) == Z_OK ); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries); } opt->is_update=1; // signaler comme update @@ -1370,12 +1369,12 @@ void cache_init(cache_back* cache,httrackp* opt) { } else if ( ( !cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")) >=0 && fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")) >0 ) || ( cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/new.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/new.ndx")) > 0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) >=0 && fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) > 0 ) ) { FILE* oldndx=NULL; @@ -1383,11 +1382,11 @@ void cache_init(cache_back* cache,httrackp* opt) { printf("..load cache\n"); #endif if (!cache->ro) { - cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/old.dat"),"rb"); - oldndx=fopen(fconcat(opt->path_log,"hts-cache/old.ndx"),"rb"); + cache->olddat=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),"rb"); + oldndx=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),"rb"); } else { - cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"rb"); - oldndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"rb"); + cache->olddat=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),"rb"); + oldndx=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),"rb"); } // les deux doivent être ouvrables if ((cache->olddat==NULL) && (oldndx!=NULL)) { @@ -1404,11 +1403,11 @@ void cache_init(cache_back* cache,httrackp* opt) { fclose(oldndx); oldndx=NULL; // lire ndx, et lastmodified if (!cache->ro) { - buffl=fsize(fconcat(opt->path_log,"hts-cache/old.ndx")); - cache->use=readfile(fconcat(opt->path_log,"hts-cache/old.ndx")); + buffl=fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); + cache->use=readfile(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); } else { - buffl=fsize(fconcat(opt->path_log,"hts-cache/new.ndx")); - cache->use=readfile(fconcat(opt->path_log,"hts-cache/new.ndx")); + buffl=fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + cache->use=readfile(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); } if (cache->use!=NULL) { char firstline[256]; @@ -1421,9 +1420,9 @@ void cache_init(cache_back* cache,httrackp* opt) { a+=cache_brstr(a,firstline); strcpybuff(cache->lastmodified,firstline); } else { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); - fflush(opt->errlog); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); + fflush(opt->log); } fclose(cache->olddat); cache->olddat=NULL; @@ -1431,9 +1430,9 @@ void cache_init(cache_back* cache,httrackp* opt) { cache->use=NULL; } } else { // non supporté - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline); - fflush(opt->errlog); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Cache: %s not supported, ignoring current cache"LF,firstline); + fflush(opt->log); } fclose(cache->olddat); cache->olddat=NULL; @@ -1444,7 +1443,7 @@ void cache_init(cache_back* cache,httrackp* opt) { } else { // Vieille version du cache /* */ if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: importing old cache format"LF); fflush(opt->log); } cache->version=0; // cache 1.0 @@ -1468,7 +1467,7 @@ void cache_init(cache_back* cache,httrackp* opt) { /* read position */ a+=binput(a,linepos,200); sscanf(linepos,"%d",&pos); - inthash_add((inthash)cache->hashtable,line,pos); + inthash_add(cache->hashtable,line,pos); } } /* Not needed anymore! */ @@ -1485,44 +1484,46 @@ void cache_init(cache_back* cache,httrackp* opt) { #endif if (!cache->ro) { // ouvrir caches actuels - structcheck(fconcat(opt->path_log, "hts-cache/")); + structcheck(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), "hts-cache/")); if (1) { /* Create ZIP file cache */ - cache->zipOutput = (void*) zipOpen(fconcat(opt->path_log,"hts-cache/new.zip"), 0); + cache->zipOutput = (void*) zipOpen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"), 0); if (cache->zipOutput != NULL) { // supprimer old.lst - if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) - remove(fconcat(opt->path_log,"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) - rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); - // ouvrir - cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); - { - filecreate_params tmp; - strcpybuff(tmp.path,opt->path_html); // chemin - tmp.lst=cache->lst; // fichier lst - filenote("",&tmp); // initialiser filecreate - } - + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); + // ouvrir + cache->lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),"wb"); + strcpybuff(opt->state.strc.path, StringBuff(opt->path_html)); + opt->state.strc.lst = cache->lst; + //{ + //filecreate_params tmp; + //strcpybuff(tmp.path,StringBuff(opt->path_html)); // chemin + //tmp.lst=cache->lst; // fichier lst + //filenote("",&tmp); // initialiser filecreate + //} + // supprimer old.txt - if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) - remove(fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) - rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); // ouvrir - cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + cache->txt=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),"wb"); if (cache->txt) { fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); } } } else { - cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb"); - cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb"); + cache->dat=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),"wb"); + cache->ndx=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),"wb"); // les deux doivent être ouvrables if ((cache->dat==NULL) && (cache->ndx!=NULL)) { fclose(cache->ndx); @@ -1546,28 +1547,30 @@ void cache_init(cache_back* cache,httrackp* opt) { fflush(cache->ndx); // un petit fflush au cas où // supprimer old.lst - if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) - remove(fconcat(opt->path_log,"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) - rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); // ouvrir - cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); - { - filecreate_params tmp; - strcpybuff(tmp.path,opt->path_html); // chemin - tmp.lst=cache->lst; // fichier lst - filenote("",&tmp); // initialiser filecreate - } + cache->lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),"wb"); + strcpybuff(opt->state.strc.path, StringBuff(opt->path_html)); + opt->state.strc.lst = cache->lst; + //{ + // filecreate_params tmp; + // strcpybuff(tmp.path,StringBuff(opt->path_html)); // chemin + // tmp.lst=cache->lst; // fichier lst + // filenote("",&tmp); // initialiser filecreate + //} // supprimer old.txt - if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) - remove(fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) - rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_log),"hts-cache/old.txt")); // ouvrir - cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + cache->txt=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),"wb"); if (cache->txt) { fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); @@ -1596,17 +1599,18 @@ char* readfile(char* fil) { char* readfile2(char* fil, LLint* size) { char* adr=NULL; + char catbuff[CATBUFF_SIZE]; INTsys len=0; len=fsize(fil); if (len >= 0) { // exists FILE* fp; - fp=fopen(fconv(fil),"rb"); + fp=fopen(fconv(catbuff, fil),"rb"); if (fp!=NULL) { // n'existe pas (!) adr=(char*) malloct(len+1); if (size != NULL) *size = len; if (adr!=NULL) { - if (len > 0 && (INTsys)fread(adr,1,len,fp) != len) { // fichier endommagé ? + if (len > 0 && fread(adr,1,len,fp) != len) { // fichier endommagé ? freet(adr); adr=NULL; } else @@ -1621,8 +1625,9 @@ char* readfile2(char* fil, LLint* size) { char* readfile_or(char* fil,char* defaultdata) { char* realfile=fil; char* ret; + char catbuff[CATBUFF_SIZE]; if (!fexist(fil)) - realfile=fconcat(hts_rootdir(NULL),fil); + realfile=fconcat(catbuff,hts_rootdir(NULL),fil); ret=readfile(realfile); if (ret) return ret; @@ -1638,14 +1643,14 @@ char* readfile_or(char* fil,char* defaultdata) { // écriture/lecture d'une chaîne sur un fichier // -1 : erreur, sinon 0 -int cache_wstr(FILE* fp,char* s) { +int cache_wstr(FILE* fp,const char* s) { INTsys i; char buff[256+4]; - i = s != NULL ? strlen(s) : 0; + i = (s != NULL) ? ((INTsys)strlen(s)) : 0; sprintf(buff,INTsysP "\n",i); - if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff)) + if (fwrite(buff,1,strlen(buff),fp) != strlen(buff)) return -1; - if (i > 0 && (INTsys)fwrite(s,1,i,fp) != i) + if (i > 0 && fwrite(s,1,i,fp) != i) return -1; return 0; } diff --git a/src/htscache.h b/src/htscache.h index b80a0ee..7a4bb5c 100644 --- a/src/htscache.h +++ b/src/htscache.h @@ -40,32 +40,49 @@ Please visit our Website: http://www.httrack.com #ifndef HTSCACHE_DEFH #define HTSCACHE_DEFH -#include "htscore.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE +#include "htsglobal.h" + +#include + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif + // cache -void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save); -void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache,char* path_prefix); -htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); -htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); -htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,char* return_save,int readonly); -htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r); +void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,const char* url_adr,const char* url_fil,const char* url_save); +void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,const char* url_adr,const char* url_fil,const char* url_save,int all_in_cache,const char* path_prefix); +htsblk cache_read(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location); +htsblk cache_read_ro(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location); +htsblk cache_readex(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location,char* return_save,int readonly); +htsblk* cache_header(httrackp* opt,cache_back* cache,const char* adr,const char* fil,htsblk* r); void cache_init(cache_back* cache,httrackp* opt); -int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len); -int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* len); +int cache_writedata(FILE* cache_ndx,FILE* cache_dat,const char* str1,const char* str2,char* outbuff,int len); +int cache_readdata(cache_back* cache,const char* str1,const char* str2,char** inbuff,int* len); -int cache_wstr(FILE* fp,char* s); void cache_rstr(FILE* fp,char* s); char* cache_rstr_addr(FILE* fp); int cache_brstr(char* adr,char* s); int cache_quickbrstr(char* adr,char* s); int cache_brint(char* adr,int* i); void cache_rint(FILE* fp,int* i); -int cache_wint(FILE* fp,int i); void cache_rLLint(FILE* fp,LLint* i); + +int cache_wstr(FILE* fp,const char* s); +int cache_wint(FILE* fp,int i); int cache_wLLint(FILE* fp,LLint i); #endif diff --git a/src/htscatchurl.c b/src/htscatchurl.c index 3832019..74a2439 100644 --- a/src/htscatchurl.c +++ b/src/htscatchurl.c @@ -47,7 +47,7 @@ Please visit our Website: http://www.httrack.com #ifndef _WIN32_WCE #include #endif -#if HTS_WIN +#ifdef _WIN32 #else #include #endif @@ -102,7 +102,7 @@ HTSEXT_API T_SOC catch_url_init(int* port,char* adr) { // copie adresse SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length); - if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { SOCaddr_initport(server, *port); if ( bind(soc,(struct sockaddr*) &server,server_size) == 0 ) { SOCaddr server2; @@ -162,7 +162,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { T_SOC soc2; struct sockaddr dummyaddr; int dummylen = sizeof(struct sockaddr); - while ( (soc2=accept(soc,&dummyaddr,&dummylen)) == INVALID_SOCKET); + while ( (soc2 = (T_SOC) accept(soc,&dummyaddr,&dummylen)) == INVALID_SOCKET); /* #ifdef _WIN32 closesocket(soc); @@ -224,7 +224,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { //strcatbuff(data,"\r\n"); if (blkretour.totalsize>0) { int len=(int)min(blkretour.totalsize,32000); - int pos=strlen(data); + int pos = (int) strlen(data); // Copier le reste (post éventuel) while((len>0) && ((r=recv(soc,(char*) data+pos,len,0))>0) ) { pos+=r; @@ -234,7 +234,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { } // Envoyer page sprintf(line,CATCH_RESPONSE); - send(soc,line,strlen(line),0); + send(soc,line,(int)strlen(line),0); // OK! retour=1; } diff --git a/src/htscore.c b/src/htscore.c index 48d776f..370f529 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -67,56 +67,10 @@ Please visit our Website: http://www.httrack.com /* END specific definitions */ - -/* HTML parsing */ -#if HTS_ANALYSTE - -t_hts_htmlcheck_init hts_htmlcheck_init = NULL; -t_hts_htmlcheck_uninit hts_htmlcheck_uninit = NULL; -t_hts_htmlcheck_start hts_htmlcheck_start = NULL; -t_hts_htmlcheck_end hts_htmlcheck_end = NULL; -t_hts_htmlcheck_chopt hts_htmlcheck_chopt = NULL; -t_hts_htmlcheck_process hts_htmlcheck_preprocess = NULL; -t_hts_htmlcheck_process hts_htmlcheck_postprocess = NULL; -t_hts_htmlcheck hts_htmlcheck = NULL; -t_hts_htmlcheck_query hts_htmlcheck_query = NULL; -t_hts_htmlcheck_query2 hts_htmlcheck_query2 = NULL; -t_hts_htmlcheck_query3 hts_htmlcheck_query3 = NULL; -t_hts_htmlcheck_loop hts_htmlcheck_loop = NULL; -t_hts_htmlcheck_check hts_htmlcheck_check = NULL; -t_hts_htmlcheck_check_mime hts_htmlcheck_check_mime = NULL; -t_hts_htmlcheck_pause hts_htmlcheck_pause = NULL; -t_hts_htmlcheck_filesave hts_htmlcheck_filesave = NULL; -t_hts_htmlcheck_filesave2 hts_htmlcheck_filesave2 = NULL; -t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected = NULL; -t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2 = NULL; -t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus = NULL; -t_hts_htmlcheck_savename hts_htmlcheck_savename = NULL; -t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead = NULL; -t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead = NULL; - -extern void set_wrappers(void); - -char _hts_errmsg[1100]=""; -int _hts_in_html_parsing=0; -int _hts_in_html_done=0; // % done -int _hts_in_html_poll=0; // parsing -int _hts_setpause=0; -//httrackp* _hts_setopt=NULL; -char** _hts_addurl=NULL; - /* external modules */ extern int hts_parse_externals(htsmoduleStruct* str); extern void htspe_init(void); -// -int _hts_cancel=0; -#endif - - - -int exit_xh; /* quick exit (fatal error or interrupt) */ - /* debug */ #if DEBUG_SHOWTYPES char REG[32768]="\n"; @@ -155,18 +109,11 @@ int nsocDEBUG=0; int longest_hash[3]={0,0,0},hashnumber=0; #endif -// demande d'interaction avec le shell -#if HTS_ANALYSTE -char HTbuff[2048]; -#endif - - - // Début de httpmirror, routines annexes // version 1 pour httpmirror // flusher si on doit lire peu à peu le fichier -#define test_flush if (opt.flush) { fflush(opt.log); fflush(opt.errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // pour alléger la syntaxe, des raccourcis sont créés #define urladr (liens[ptr]->adr) @@ -177,18 +124,12 @@ char HTbuff[2048]; // au cas où nous devons quitter rapidement xhttpmirror (plus de mémoire, etc) // note: partir de liens_max.. vers 0.. sinon erreur de violation de mémoire: les liens suivants // ne sont plus à nous.. agh! [dur celui-là] -#if HTS_ANALYSTE #define HTMLCHECK_UNINIT { \ -if ( (opt.debug>0) && (opt.log!=NULL) ) { \ -fspc(opt.log,"info"); fprintf(opt.log,"engine: end"LF); \ -} \ -if (hts_htmlcheck_end != NULL) { \ - hts_htmlcheck_end(); \ +if ( (opt->debug>0) && (opt->log!=NULL) ) { \ +HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: end"LF); \ } \ +RUN_CALLBACK0(opt, end); \ } -#else - #define HTMLCHECK_UNINIT -#endif #define XH_extuninit do { \ int i; \ @@ -211,7 +152,7 @@ if (hts_htmlcheck_end != NULL) { \ if (filters) { \ freet(filters); filters=NULL; \ } \ - back_delete_all(&opt,&cache,sback); \ + back_delete_all(opt,&cache,sback); \ back_free(&sback); \ checkrobots_free(&robots);\ if (cache.use) { freet(cache.use); cache.use=NULL; } \ @@ -228,18 +169,18 @@ if (hts_htmlcheck_end != NULL) { \ if (cache.olddat) { fclose(cache.olddat); cache.olddat=NULL; } \ if (cache.lst) { fclose(cache.lst); cache.lst=NULL; } \ if (cache.txt) { fclose(cache.txt); cache.txt=NULL; } \ - if (opt.log) fflush(opt.log); \ - if (opt.errlog) fflush(opt.errlog);\ + if (opt->log) fflush(opt->log); \ + if (opt->log) fflush(opt->log);\ if (makestat_fp) { fclose(makestat_fp); makestat_fp=NULL; } \ if (maketrack_fp){ fclose(maketrack_fp); maketrack_fp=NULL; } \ - if (opt.accept_cookie) cookie_save(opt.cookie,fconcat(opt.path_log,"cookies.txt")); \ + if (opt->accept_cookie) cookie_save(opt->cookie,fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_log),"cookies.txt")); \ if (makeindex_fp) { fclose(makeindex_fp); makeindex_fp=NULL; } \ if (cache_hashtable) { inthash_delete(&cache_hashtable); } \ if (cache_tests) { inthash_delete(&cache_tests); } \ if (template_header) { freet(template_header); template_header=NULL; } \ if (template_body) { freet(template_body); template_body=NULL; } \ if (template_footer) { freet(template_footer); template_footer=NULL; } \ - clearCallbacks(&opt.state.callbacks); \ + clearCallbacks(&opt->state.callbacks); \ /*structcheck_init(-1);*/ \ } while(0) #define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0) @@ -254,7 +195,7 @@ if (hts_htmlcheck_end != NULL) { \ #define liens_record(A,F,S,FA,FF,NORM) { \ int notecode=0; \ -int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ +size_t lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ fil_len=strlen(F),\ sav_len=strlen(S),\ @@ -315,7 +256,7 @@ if (makeindex_fp) { \ fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ - usercommand(&opt,0,NULL,fconcat(opt.path_html,"index.html"),"",""); \ + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_html),"index.html"),"",""); \ } \ } \ makeindex_done=1; /* ok c'est fait */ \ @@ -326,8 +267,7 @@ makeindex_done=1; /* ok c'est fait */ \ // Début de httpmirror, robot // url1 peut être multiple -int httpmirror(char* url1,httrackp* ptropt) { - httrackp BIGSTK opt; // structure d'options +int httpmirror(char* url1, httrackp* opt) { char* primary=NULL; // première page, contenant les liens à scanner int lien_tot=0; // nombre de liens pour le moment lien_url** liens=NULL; // les pointeurs sur les liens @@ -335,7 +275,7 @@ int httpmirror(char* url1,httrackp* ptropt) { hash_struct* hashptr = &hash; t_cookie BIGSTK cookie; // gestion des cookies int lien_max=0; - int lien_size=0; // octets restants dans buffer liens dispo + size_t lien_size=0; // octets restants dans buffer liens dispo char* lien_buffer=NULL; // buffer liens actuel int add_tab_alloc=256000; // +256K de liens à chaque fois //char* tab_alloc=NULL; @@ -344,7 +284,6 @@ int httpmirror(char* url1,httrackp* ptropt) { int numero_passe=0; // deux passes pour html puis images struct_back* sback=NULL; htsblk BIGSTK r; // retour de certaines fonctions - TStamp lastime=0; // pour affichage infos de tmp en tmp // pour les stats, nombre de fichiers & octets écrits LLint stat_fragment=0; // pour la fragmentation //TStamp istat_timestart; // départ pour calcul instantanné @@ -378,8 +317,6 @@ int httpmirror(char* url1,httrackp* ptropt) { // char *template_header=NULL,*template_body=NULL,*template_footer=NULL; // - opt = *ptropt; - // codebase[0]='\0'; base[0]='\0'; // cookie.auth.next=NULL; @@ -394,66 +331,56 @@ int httpmirror(char* url1,httrackp* ptropt) { /* reset stats */ HTS_STAT.HTS_TOTAL_RECV=0; HTS_STAT.istat_bytes[0]=HTS_STAT.istat_bytes[1]=0; - /* - if (opt.aff_progress) - lastime=HTS_STAT.stat_timestart; - */ - if (opt.shell) { + if (opt->shell) { last_info_shell=HTS_STAT.stat_timestart; } - if ((opt.makestat) || (opt.maketrack)){ + if ((opt->makestat) || (opt->maketrack)){ makestat_time=HTS_STAT.stat_timestart; } - // initialiser compteur erreurs - fspc(NULL,NULL); - // init external modules htspe_init(); // initialiser cookie - if (opt.accept_cookie) { - opt.cookie=&cookie; + if (opt->accept_cookie) { + opt->cookie=&cookie; cookie.max_len=30000; // max len strcpybuff(cookie.data,""); // Charger cookies.txt par défaut ou cookies.txt du miroir - cookie_load(opt.cookie,opt.path_log,"cookies.txt"); - cookie_load(opt.cookie,"","cookies.txt"); + cookie_load(opt->cookie,StringBuff(opt->path_log),"cookies.txt"); + cookie_load(opt->cookie,"","cookies.txt"); } else - opt.cookie=NULL; + opt->cookie=NULL; // initialiser exit_xh - exit_xh=0; // sortir prématurément (var globale) + opt->state.exit_xh=0; // sortir prématurément (var globale) // initialiser usercommand - usercommand(&opt,opt.sys_com_exec,opt.sys_com,"","",""); + usercommand(opt,opt->sys_com_exec,StringBuff(opt->sys_com),"","",""); // initialiser structcheck // structcheck_init(1); - // initialiser tableau options accessible par d'autres fonctions (signal) - hts_declareoptbuffer(&opt); - // initialiser verif_backblue - verif_backblue(&opt,NULL); - verif_external(0,0); - verif_external(1,0); + verif_backblue(opt,NULL); + verif_external(opt,0,0); + verif_external(opt,1,0); // et templates html - template_header=readfile_or(fconcat(opt.path_bin,"templates/index-header.html"),HTS_INDEX_HEADER); - template_body=readfile_or(fconcat(opt.path_bin,"templates/index-body.html"),HTS_INDEX_BODY); - template_footer=readfile_or(fconcat(opt.path_bin,"templates/index-footer.html"),HTS_INDEX_FOOTER); + template_header=readfile_or(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_bin),"templates/index-header.html"),HTS_INDEX_HEADER); + template_body=readfile_or(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_bin),"templates/index-body.html"),HTS_INDEX_BODY); + template_footer=readfile_or(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_bin),"templates/index-footer.html"),HTS_INDEX_FOOTER); // initialiser mimedefs - get_userhttptype(1,opt.mimedefs,NULL); + //get_userhttptype(opt,1,StringBuff(opt->mimedefs),NULL); // Initialiser indexation - if (opt.kindex) - index_init(opt.path_html); + if (opt->kindex) + index_init(StringBuff(opt->path_html)); // effacer bloc cache memset(&cache, 0, sizeof(cache_back)); - cache.type=opt.cache; // cache? - cache.errlog=opt.errlog; // err log? + cache.type=opt->cache; // cache? + cache.errlog=cache.log=opt->log; // err log? cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper // initialiser hash cache @@ -471,32 +398,29 @@ int httpmirror(char* url1,httrackp* ptropt) { cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */ cache.cached_tests=(void*)cache_tests; /* copy of cache_tests */ - // initialiser cache DNS - _hts_lockdns(-999); - // robots.txt strcpybuff(robots.adr,"!"); // dummy robots.token[0]='\0'; robots.next=NULL; // suivant - opt.robotsptr = &robots; + opt->robotsptr = &robots; // effacer filters - opt.maxfilter = maximum(opt.maxfilter, 128); - if (filters_init(&filters, opt.maxfilter, 0) == 0) { + opt->maxfilter = maximum(opt->maxfilter, 128); + if (filters_init(&filters, opt->maxfilter, 0) == 0) { printf("PANIC! : Not enough memory [%d]\n",__LINE__); XH_extuninit; return 0; } - opt.filters.filters=&filters; + opt->filters.filters=&filters; // - opt.filters.filptr=&filptr; - //opt.filters.filter_max=&filter_max; + opt->filters.filptr=&filptr; + //opt->filters.filter_max=&filter_max; // hash table - opt.hash = &hash; + opt->hash = &hash; // tableau de pointeurs sur les liens - lien_max=maximum(opt.maxlink,32); + lien_max=maximum(opt->maxlink,32); liens=(lien_url**) malloct(lien_max*sizeof(lien_url*)); // tableau de pointeurs sur les liens if (liens==NULL) { printf("PANIC! : Not enough memory [%d]\n",__LINE__); @@ -519,15 +443,15 @@ int httpmirror(char* url1,httrackp* ptropt) { hash.liens = liens; hash.max_lien=0; } - + // copier adresse(s) dans liste des adresses { char *a=url1; int primary_len=8192; - if (strnotempty(opt.filelist)) { - primary_len+=max(0,fsize(opt.filelist)*2); + if (StringNotEmpty(opt->filelist)) { + primary_len += max(0, fsize(StringBuff(opt->filelist))*2); } - primary_len+=strlen(url1)*2; + primary_len += (int) strlen(url1)*2; // création de la première page, qui contient les liens de base à scanner // c'est plus propre et plus logique que d'entrer à la main les liens dans la pile @@ -586,19 +510,19 @@ int httpmirror(char* url1,httrackp* ptropt) { filptr++; /* sanity check */ - if (filptr + 1 >= opt.maxfilter) { - opt.maxfilter += HTS_FILTERSINC; - if (filters_init(&filters, opt.maxfilter, HTS_FILTERSINC) == 0) { + if (filptr + 1 >= opt->maxfilter) { + opt->maxfilter += HTS_FILTERSINC; + if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) { printf("PANIC! : Too many filters : >%d [%d]\n",filptr,__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,LF"Too many filters, giving up..(>%d)"LF,filptr); - fprintf(opt.errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); + if (opt->log) { + fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF,filptr); + fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF); test_flush; } XH_extuninit; return 0; } - //opt.filters.filters=filters; + //opt->filters.filters=filters; } } @@ -622,15 +546,15 @@ int httpmirror(char* url1,httrackp* ptropt) { /* load URL file list */ /* OPTIMIZED for fast load */ - if (strnotempty(opt.filelist)) { + if (StringNotEmpty(opt->filelist)) { char* filelist_buff=NULL; - INTsys filelist_sz=fsize(opt.filelist); + off_t filelist_sz = fsize(StringBuff(opt->filelist)); if (filelist_sz>0) { - FILE* fp=fopen(opt.filelist,"rb"); + FILE* fp=fopen(StringBuff(opt->filelist),"rb"); if (fp) { - filelist_buff=malloct(filelist_sz + 2); + filelist_buff = malloct(filelist_sz + 2); if (filelist_buff) { - if ((INTsys)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) { + if (fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) { freet(filelist_buff); filelist_buff=NULL; } else { @@ -662,26 +586,26 @@ int httpmirror(char* url1,httrackp* ptropt) { } } // fclose(fp); - if (opt.log!=NULL) { - fspc(opt.log,"info"); fprintf(opt.log,"%d links added from %s"LF,n,opt.filelist); test_flush; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"%d links added from %s"LF,n,StringBuff(opt->filelist)); test_flush; } // Free buffer freet(filelist_buff); } else { - if (opt.errlog!=NULL) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Could not include URL list: %s"LF,opt.filelist); test_flush; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Could not include URL list: %s"LF,StringBuff(opt->filelist)); test_flush; } } } // lien primaire - liens_record("primary","/primary",fslash(fconcat(opt.path_html,"index.html")),"","",opt.urlhack); + liens_record("primary","/primary",fslash(OPT_GET_BUFF(opt),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html")),"","",opt->urlhack); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } XH_extuninit; // désallocation mémoire & buffers @@ -689,9 +613,9 @@ int httpmirror(char* url1,httrackp* ptropt) { } liens[lien_tot]->testmode=0; // pas mode test liens[lien_tot]->link_import=0; // pas mode import - liens[lien_tot]->depth=opt.depth+1; // lien de priorité maximale + liens[lien_tot]->depth=opt->depth+1; // lien de priorité maximale liens[lien_tot]->pass2=0; // 1ère passe - liens[lien_tot]->retry=opt.retry; // lien de priorité maximale + liens[lien_tot]->retry=opt->retry; // lien de priorité maximale liens[lien_tot]->premier=lien_tot; // premier lien, objet-père=objet liens[lien_tot]->precedent=lien_tot; // lien précédent lien_tot++; @@ -699,18 +623,14 @@ int httpmirror(char* url1,httrackp* ptropt) { // Initialiser cache { int backupXFR = htsMemoryFastXfr; -#if HTS_ANALYSTE - _hts_in_html_parsing=4; -#endif - if (!hts_htmlcheck_loop(NULL,0,0,0,lien_tot,0,NULL)) { - exit_xh=1; // exit requested + opt->state._hts_in_html_parsing=4; + if (!RUN_CALLBACK7(opt, loop, NULL,0,0,0,lien_tot,0,NULL)) { + opt->state.exit_xh=1; // exit requested } htsMemoryFastXfr = 1; /* fast load */ - cache_init(&cache,&opt); + cache_init(&cache,opt); htsMemoryFastXfr = backupXFR; -#if HTS_ANALYSTE - _hts_in_html_parsing=0; -#endif + opt->state._hts_in_html_parsing=0; } } @@ -728,8 +648,8 @@ int httpmirror(char* url1,httrackp* ptropt) { #endif // backing - //soc_max=opt.maxsoc; - if (opt.maxsoc>0) { + //soc_max=opt->maxsoc; + if (opt->maxsoc>0) { #if BDEBUG==2 _CLRSCR; #endif @@ -737,10 +657,10 @@ int httpmirror(char* url1,httrackp* ptropt) { // On prévoit large: les fichiers HTML ne prennent que peu de place en mémoire, et les // fichiers non html sont sauvés en direct sur disque. // --> 1024 entrées + 32 entrées par socket en supplément - sback = back_new(opt.maxsoc*32+1024); + sback = back_new(opt->maxsoc*32+1024); if (sback == NULL) { - if (opt.errlog) - fprintf(opt.errlog,"Not enough memory, can not allocate %d bytes"LF,(int)((opt.maxsoc+1)*sizeof(lien_back))); + if (opt->log) + fprintf(opt->log,"Not enough memory, can not allocate %d bytes"LF,(int)((opt->maxsoc+1)*sizeof(lien_back))); return 0; } } @@ -750,8 +670,8 @@ int httpmirror(char* url1,httrackp* ptropt) { test_flush; // statistiques - if (opt.makestat) { - makestat_fp=fopen(fconcat(opt.path_log,"hts-stats.txt"),"wb"); + if (opt->makestat) { + makestat_fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-stats.txt"),"wb"); if (makestat_fp != NULL) { fprintf(makestat_fp,"HTTrack statistics report, every minutes"LF LF); fflush(makestat_fp); @@ -759,8 +679,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } // tracking -- débuggage - if (opt.maketrack) { - maketrack_fp=fopen(fconcat(opt.path_log,"hts-track.txt"),"wb"); + if (opt->maketrack) { + maketrack_fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-track.txt"),"wb"); if (maketrack_fp != NULL) { fprintf(maketrack_fp,"HTTrack tracking report, every minutes"LF LF); fflush(maketrack_fp); @@ -769,20 +689,16 @@ int httpmirror(char* url1,httrackp* ptropt) { // on n'a pas de liens!! (exemple: httrack www.* impossible sans départ..) if (lien_tot<=0) { - if (opt.errlog) { - fprintf(opt.errlog,"Error! You MUST specify at least one complete URL, and not only wildcards!"LF); + if (opt->log) { + fprintf(opt->log,"Error! You MUST specify at least one complete URL, and not only wildcards!"LF); } } /* Send options to callback functions */ -#if HTS_ANALYSTE - if (hts_htmlcheck_chopt != NULL) { - hts_htmlcheck_chopt(&opt); - } -#endif + RUN_CALLBACK0(opt, chopt); // attendre une certaine heure.. - if (opt.waittime>0) { + if (opt->waittime>0) { int rollover=0; int ok=0; { @@ -794,12 +710,12 @@ int httpmirror(char* url1,httrackp* ptropt) { tl+=A->tm_sec; tl+=A->tm_min*60; tl+=A->tm_hour*60*60; - if (tl>opt.waittime) // attendre minuit + if (tl>opt->waittime) // attendre minuit rollover=1; } // attendre.. - _hts_in_html_parsing=5; + opt->state._hts_in_html_parsing=5; do { TStamp tl=0; time_t tt; @@ -811,60 +727,49 @@ int httpmirror(char* url1,httrackp* ptropt) { tl+=A->tm_hour*60*60; if (rollover) { - if (tl<=opt.waittime) + if (tl<=opt->waittime) rollover=0; // attendre heure } else { - if (tl>opt.waittime) + if (tl>opt->waittime) ok=1; // ok! } -#if HTS_ANALYSTE - if (hts_htmlcheck_loop != NULL) { + { int r; if (rollover) - r=hts_htmlcheck_loop(sback->lnk, sback->count,0,0,lien_tot,(int) (opt.waittime-tl+24*3600),NULL); + r = RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,0,0,lien_tot,(int) (opt->waittime-tl+24*3600),NULL); else - r=hts_htmlcheck_loop(sback->lnk, sback->count,0,0,lien_tot,(int) (opt.waittime-tl),NULL); + r = RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,0,0,lien_tot,(int) (opt->waittime-tl),NULL); if (!r) { - exit_xh=1; // exit requested + opt->state.exit_xh=1; // exit requested ok=1; } else Sleep(100); } -#endif - } while(!ok); - _hts_in_html_parsing=0; + + } while(!ok); + opt->state._hts_in_html_parsing=0; // note: recopie de plus haut // noter heure actuelle de départ en secondes HTS_STAT.stat_timestart=time_local(); - /* - if (opt.aff_progress) - lastime=HTS_STAT.stat_timestart; - */ - if (opt.shell) { + if (opt->shell) { last_info_shell=HTS_STAT.stat_timestart; } - if ((opt.makestat) || (opt.maketrack)){ + if ((opt->makestat) || (opt->maketrack)){ makestat_time=HTS_STAT.stat_timestart; } } /* Info for wrappers */ - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: start"LF); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: start"LF); } -#if HTS_ANALYSTE - if (hts_htmlcheck_start != NULL) { - if (!hts_htmlcheck_start(&opt)) { - XH_extuninit; - return 1; - } + if (!RUN_CALLBACK0(opt, start)) { + XH_extuninit; + return 1; } - set_wrappers(); // _start() is allowed to set other wrappers -#endif - // ------------------------------------------------------------ @@ -883,13 +788,19 @@ int httpmirror(char* url1,httrackp* ptropt) { memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; r.location=loc; // en cas d'erreur 3xx (moved) // recopier proxy - memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy)); + if ((r.req.proxy.active = opt->proxy.active)) { + if (StringBuff(opt->proxy.bindhost) != NULL) + strcpybuff(r.req.proxy.bindhost, StringBuff(opt->proxy.bindhost)); + if (StringBuff(opt->proxy.name) != NULL) + strcpybuff(r.req.proxy.name, StringBuff(opt->proxy.name)); + r.req.proxy.port = opt->proxy.port; + } // et user-agent - strcpybuff(r.req.user_agent,opt.user_agent); - strcpybuff(r.req.referer,opt.referer); - strcpybuff(r.req.from,opt.from); - strcpybuff(r.req.lang_iso,opt.lang_iso); - r.req.user_agent_send=opt.user_agent_send; + strcpy(r.req.user_agent,StringBuff(opt->user_agent)); + strcpy(r.req.referer,StringBuff(opt->referer)); + strcpy(r.req.from,StringBuff(opt->from)); + strcpy(r.req.lang_iso,StringBuff(opt->lang_iso)); + r.req.user_agent_send=opt->user_agent_send; if (!error) { @@ -901,11 +812,11 @@ int httpmirror(char* url1,httrackp* ptropt) { ( (liens[ptr]->pass2 == -1) ) ) ) { // sauter si lien annulé (ou fil vide) - if ((opt.debug>1) && (opt.log!=NULL)) { + if ((opt->debug>1) && (opt->log!=NULL)) { if (liens[ptr] != NULL && liens[ptr]->pass2 == -1) { - fspc(opt.log,"debug"); fprintf(opt.log,"link #%d is ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link #%d is ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); } else { - fspc(opt.log,"debug"); fprintf(opt.log,"link #%d seems ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link #%d seems ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); } test_flush; } @@ -918,8 +829,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } if (liens[ptr]) { // on a qq chose à récupérer? - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"Wait get: %s%s"LF,urladr,urlfil); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Wait get: %s%s"LF,urladr,urlfil); test_flush; #if DEBUG_ROBOTS if (strcmp(urlfil,"/robots.txt") == 0) { @@ -931,11 +842,11 @@ int httpmirror(char* url1,httrackp* ptropt) { // DEBUT --RECUPERATION LIEN--- if (ptr==0) { // premier lien à parcourir: lien primaire construit avant r.adr=primary; primary=NULL; - r.statuscode=200; + r.statuscode=HTTP_OK; r.size=strlen(r.adr); r.soc=INVALID_SOCKET; strcpybuff(r.contenttype,"text/html"); - /*} else if (opt.maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing) + /*} else if (opt->maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing) // charger le fichier en mémoire tout bêtement r=xhttpget(urladr,urlfil); // @@ -960,12 +871,12 @@ int httpmirror(char* url1,httrackp* ptropt) { str.mime = r.contenttype; str.url_host = urladr; str.url_file = urlfil; - str.size = (int) r.size; + str.size = (const int) r.size; /* */ str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -981,7 +892,7 @@ int httpmirror(char* url1,httrackp* ptropt) { stre.r_ = &r; /* */ stre.error_ = &error; - stre.exit_xh_ = &exit_xh; + stre.exit_xh_ = &opt->state.exit_xh; stre.store_errpage_ = &store_errpage; /* */ stre.base = base; @@ -1037,8 +948,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } else { // lien vide.. - if (opt.errlog && opt.debug > 0) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush; + if (opt->log && opt->debug > 0) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Warning, link #%d empty"LF,ptr); test_flush; } error=1; goto jump_if_done; @@ -1061,11 +972,11 @@ int httpmirror(char* url1,httrackp* ptropt) { // error=1; // peut être que le fichier était trop gros? - if ((istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype)) - || (istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype))) { + if ((istoobig(opt,r.totalsize,opt->maxfile_html,opt->maxfile_nonhtml,r.contenttype)) + || (istoobig(opt,r.totalsize,opt->maxfile_html,opt->maxfile_nonhtml,r.contenttype))) { error=0; - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Big file cancelled according to user's preferences: %s%s"LF,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Big file cancelled according to user's preferences: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1084,14 +995,14 @@ int httpmirror(char* url1,httrackp* ptropt) { // Content-disposition="foo.jpg" // -------------------- if (!error) { - if (r.statuscode == 200) { // OK (ou 304 en backing) + if (r.statuscode == HTTP_OK) { // OK (ou 304 en backing) if (r.adr) { // Written file - if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ + if ( (is_hypertext_mime(opt,r.contenttype, urlfil)) /* Is HTML or Js, .. */ /* NO - real media is real media, and mms is mms, not HTML */ /*|| (may_be_hypertext_mime(r.contenttype, urlfil) && (r.adr) )*/ /* Is real media, .. */ ) { if (strnotempty(r.cdispo)) { // Content-disposition set! - if (ishtml(savename) == 0) { // Non HTML!! + if (ishtml(opt, savename) == 0) { // Non HTML!! // patch it! strcpybuff(r.contenttype,"application/octet-stream"); } @@ -1103,8 +1014,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // ------------------------------------ // BOGUS MIME TYPE HACK II (the revenge) // Check if we have a bogus MIME type - if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype, urlfil)) /* Is real media, .. */ + if ( (is_hypertext_mime(opt,r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(opt,r.contenttype, urlfil)) /* Is real media, .. */ ) { if ((r.adr) && (r.size)) { unsigned int map[256]; @@ -1150,7 +1061,7 @@ int httpmirror(char* url1,httrackp* ptropt) { #define CH_ADD_RNG2(c, r, r2, o) do { \ CH_ADD_RNG1(c, (r) * (r2), o); \ } while(0) - int new_capa = r.size / 2 + 1; + int new_capa = (int) ( r.size / 2 + 1 ); int new_offs = 0; unsigned char* prev_adr = (unsigned char*) r.adr; unsigned char* new_adr = (unsigned char*) malloct(new_capa); @@ -1203,7 +1114,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* U+D800..U+DFFF */ CH_ADD('?'); /* ill-formed */ - } else if (unic <= 0xFFFF) { + } else /* if (unic <= 0xFFFF) */ { /* U+E000..U+FFFF EE..EF 80..BF 80..BF */ unic -= 0xE000; CH_ADD_RNG2( unic, 0xbf - 0x80 + 1, 0xbf - 0x80 + 1, 0xee ); @@ -1211,8 +1122,8 @@ int httpmirror(char* url1,httrackp* ptropt) { CH_ADD_RNG0( unic, 0x80 ); } } - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File %s%s converted from UCS2 to UTF-8 (old size: %d bytes, new size: %d bytes)"LF, urladr, urlfil, (int)r.size, new_offs); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File %s%s converted from UCS2 to UTF-8 (old size: %d bytes, new size: %d bytes)"LF, urladr, urlfil, (int)r.size, new_offs); test_flush; } freet(r.adr); @@ -1226,8 +1137,8 @@ int httpmirror(char* url1,httrackp* ptropt) { #undef CH_ADD_RNG2 } else if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters strcpybuff(r.contenttype,"application/octet-stream"); - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1250,7 +1161,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // Check if we have to load locally the file // -------------------- //if (!error) { - // if (r.statuscode == 200) { // OK (ou 304 en backing) + // if (r.statuscode == HTTP_OK) { // OK (ou 304 en backing) // if (r.adr==NULL) { // Written file // if (may_be_hypertext_mime(r.contenttype, urlfil)) { // to parse! // LLint sz; @@ -1261,7 +1172,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // if (fp) { // r.adr=malloct((int)sz + 2); // if (r.adr) { - // if (fread(r.adr,1,(INTsys)sz,fp) == sz) { + // if (fread(r.adr,1,sz,fp) == sz) { // r.size=sz; // r.adr[sz] = '\0'; // r.is_write = 0; @@ -1295,7 +1206,7 @@ int httpmirror(char* url1,httrackp* ptropt) { if (!error) { if (ptr>0) { if (liens[ptr]) { - xxcache_mayadd(&opt,&cache,&r,urladr,urlfil,savename); + xxcache_mayadd(opt,&cache,&r,urladr,urlfil,savename); } else error=1; } @@ -1328,7 +1239,7 @@ int httpmirror(char* url1,httrackp* ptropt) { str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -1344,7 +1255,7 @@ int httpmirror(char* url1,httrackp* ptropt) { stre.r_ = &r; /* */ stre.error_ = &error; - stre.exit_xh_ = &exit_xh; + stre.exit_xh_ = &opt->state.exit_xh; stre.store_errpage_ = &store_errpage; /* */ stre.base = base; @@ -1394,7 +1305,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Load file if necessary */ if ( - is_hypertext_mime(r.contenttype, urlfil) /* Is HTML or Js, .. */ + may_be_hypertext_mime(opt,r.contenttype, urlfil) /* Is HTML or Js, .. */ && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */ && (r.adr==NULL) /* HTML Data exists */ && (!store_errpage) /* Not an html error page */ @@ -1402,15 +1313,15 @@ int httpmirror(char* url1,httrackp* ptropt) { ) { r.adr = readfile2(savename, &r.size); - (void) unlink(fconv(savename)); + (void) unlink(fconv(OPT_GET_BUFF(opt),savename)); if (r.adr != NULL) { - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"File successfully loaded for parsing: %s%s (%d bytes)"LF,urladr,urlfil,(int)r.size); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File successfully loaded for parsing: %s%s (%d bytes)"LF,urladr,urlfil,(int)r.size); test_flush; } } else { - if ( opt.log != NULL ) { - fspc(opt.log,"error"); fprintf(opt.log,"File could not be loaded for parsing: %s%s"LF,urladr,urlfil); + if ( opt->log != NULL ) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"File could not be loaded for parsing: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1437,19 +1348,19 @@ int httpmirror(char* url1,httrackp* ptropt) { /* if (ptr>0) { // "mis à jour" - if ((!r.notmodified) && (opt.is_update) && (!store_errpage)) { // page modifiée + if ((!r.notmodified) && (opt->is_update) && (!store_errpage)) { // page modifiée if (strnotempty(savename)) { HTS_STAT.stat_updated_files++; - if (opt.log!=NULL) { - //if ((opt.debug>0) && (opt.log!=NULL)) { - fspc(opt.log,"info"); fprintf(opt.log,"File updated: %s%s"LF,urladr,urlfil); + if (opt->log!=NULL) { + //if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File updated: %s%s"LF,urladr,urlfil); test_flush; } } } else { if (!store_errpage) { - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"File recorded: %s%s"LF,urladr,urlfil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File recorded: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1463,8 +1374,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // traiter if ( - ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype, urlfil) && r.adr != NULL ) /* Is real media, .. */ + ( (is_hypertext_mime(opt,r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(opt,r.contenttype, urlfil) && r.adr != NULL ) /* Is real media, .. */ ) && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */ && (r.adr!=NULL) /* HTML Data exists */ @@ -1476,8 +1387,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // Parsing HTML if (!error) { /* Info for wrappers */ - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: check-html: %s%s"LF,urladr,urlfil); } { char BIGSTK buff_err_msg[1024]; @@ -1497,7 +1408,7 @@ int httpmirror(char* url1,httrackp* ptropt) { str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -1513,7 +1424,7 @@ int httpmirror(char* url1,httrackp* ptropt) { stre.r_ = &r; /* */ stre.error_ = &error; - stre.exit_xh_ = &exit_xh; + stre.exit_xh_ = &opt->state.exit_xh; stre.store_errpage_ = &store_errpage; /* */ stre.base = base; @@ -1571,11 +1482,11 @@ int httpmirror(char* url1,httrackp* ptropt) { if (store_errpage) { // c'est une page d'erreur int create_html_warning=0; int create_gif_warning=0; - switch (ishtml(urlfil)) { /* pas fichier html */ + switch (ishtml(opt,urlfil)) { /* pas fichier html */ case 0: /* non html */ { char buff[256]; - guess_httptype(buff,urlfil); + guess_httptype(opt,buff,urlfil); if (strcmp(buff,"image/gif")==0) create_gif_warning=1; } @@ -1590,8 +1501,8 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Créer message d'erreur ? */ if (create_html_warning) { char* adr=(char*)malloct(strlen(HTS_DATA_ERROR_HTML)+1100); - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"Creating HTML warning file (%s)"LF,r.msg); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Creating HTML warning file (%s)"LF,r.msg); test_flush; } if (adr) { @@ -1604,8 +1515,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } } else if (create_gif_warning) { char* adr=(char*)malloct(HTS_DATA_UNKNOWN_GIF_LEN); - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"Creating GIF dummy file (%s)"LF,r.msg); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Creating GIF dummy file (%s)"LF,r.msg); test_flush; } if (r.adr) { @@ -1642,7 +1553,7 @@ int httpmirror(char* url1,httrackp* ptropt) { *comm = '\0'; } /* strip spaces */ - llen=strlen(line); + llen = (int) strlen(line); while(llen > 0 && is_realspace(line[llen - 1])) { line[llen - 1] = '\0'; llen--; @@ -1670,7 +1581,7 @@ int httpmirror(char* url1,httrackp* ptropt) { a++; // sauter espace(s) if (strnotempty(a)) { #ifdef IGNORE_RESTRICTIVE_ROBOTS - if (strcmp(a,"/") != 0 || opt.robots >= 3) + if (strcmp(a,"/") != 0 || opt->robots >= 3) #endif { /* ignoring disallow: / */ if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) { @@ -1684,8 +1595,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } #ifdef IGNORE_RESTRICTIVE_ROBOTS else { - if (opt.errlog!=NULL) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: %s robots.txt rules are too restrictive, ignoring /"LF,urladr); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Note: %s robots.txt rules are too restrictive, ignoring /"LF,urladr); test_flush; } } @@ -1696,14 +1607,14 @@ int httpmirror(char* url1,httrackp* ptropt) { } while( (bptrlog!=NULL) { + if (opt->log != opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Note: robots.txt forbidden links for %s are: %s"LF,urladr,infobuff); test_flush; } } - if (opt.errlog!=NULL) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: due to %s remote robots.txt rules, links begining with these path will be forbidden: %s (see in the options to disable this)"LF,urladr,infobuff); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Note: due to %s remote robots.txt rules, links begining with these path will be forbidden: %s (see in the options to disable this)"LF,urladr,infobuff); test_flush; } } @@ -1723,7 +1634,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // Si par la suite on doit retraiter ce fichier avec un niveau de récursion plus // fort, on supprimera le readme, et on scannera le fichier html! // note: sauté si store_errpage (càd si page d'erreur, non à scanner!) - if ( (is_hypertext_mime(r.contenttype, urlfil)) && (!store_errpage) && (r.size>0)) { // c'est du html!! + if ( (is_hypertext_mime(opt,r.contenttype, urlfil)) && (!store_errpage) && (r.size>0)) { // c'est du html!! char BIGSTK tempo[HTS_URLMAXSIZE*2]; FILE* fp; tempo[0]='\0'; @@ -1744,32 +1655,32 @@ int httpmirror(char* url1,httrackp* ptropt) { #endif if ((fp=fopen(tempo,"wb"))!=NULL) { - fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, WHAT_is_available); + fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, hts_get_version_info(opt)); fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename); fprintf(fp,"Some links contained in it may be unreachable locally."CRLF); fprintf(fp,"If you want to get these files, you have to set an upper recurse level, "); fprintf(fp,"and to rescan the URL."CRLF); fclose(fp); -#if HTS_WIN==0 +#ifndef _WIN32 chmod(tempo,HTS_ACCESS_FILE); #endif - usercommand(&opt,0,NULL,fconv(tempo),"",""); + usercommand(opt,0,NULL,fconv(OPT_GET_BUFF(opt),tempo),"",""); } - if ( (opt.debug>0) && (opt.errlog!=NULL) ) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning: store %s without scan: %s"LF,r.contenttype,savename); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Warning: store %s without scan: %s"LF,r.contenttype,savename); test_flush; } } else { - if ((opt.getmode & 2)!=0) { // ok autorisé - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"Store %s: %s"LF,r.contenttype,savename); + if ((opt->getmode & 2)!=0) { // ok autorisé + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Store %s: %s"LF,r.contenttype,savename); test_flush; } } else { // lien non autorisé! (ex: cgi-bin en html) - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil); + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil); test_flush; } if (r.adr) { @@ -1782,18 +1693,19 @@ int httpmirror(char* url1,httrackp* ptropt) { // ATTENTION C'EST ICI QU'ON SAUVE LE FICHIER!! if (r.adr) { - file_notify(urladr,urlfil, savename, 1, 1, r.notmodified); - if (filesave(&opt,r.adr,(int)r.size,savename,urladr,urlfil)!=0) { + file_notify(opt, urladr,urlfil, savename, 1, 1, r.notmodified); + if (filesave(opt,r.adr,(int)r.size,savename,urladr,urlfil)!=0) { int fcheck; if ((fcheck=check_fatal_io_errno())) { - fspc(opt.log,"error"); fprintf(opt.log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; - exit_xh=-1; /* fatal error */ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; + opt->state.exit_xh=-1; /* fatal error */ } - if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s : %s"LF, savename, strerror(errno)); + if (opt->log) { + int last_errno = errno; + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unable to save file %s : %s"LF, savename, strerror(last_errno)); if (fcheck) { - fspc(opt.errlog,"error"); - fprintf(opt.errlog,"* * Fatal write error, giving up"LF); + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"* * Fatal write error, giving up"LF); } test_flush; } @@ -1812,8 +1724,8 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Parsing of other media types (java, ram..) */ /* if (strfield2(r.contenttype,"audio/x-pn-realaudio")) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): parsing %s"LF,savename); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Real Media): parsing %s"LF,savename); test_flush; } if (fexist(savename)) { // ok, existe bien! FILE* fp=fopen(savename,"r+b"); @@ -1822,8 +1734,8 @@ int httpmirror(char* url1,httrackp* ptropt) { char BIGSTK line[HTS_URLMAXSIZE*2]; linput(fp,line,HTS_URLMAXSIZE); if (strnotempty(line)) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): detected %s"LF,line); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Real Media): detected %s"LF,line); test_flush; } } } @@ -1834,7 +1746,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* External modules */ - if (opt.parsejava && fexist(savename)) { + if ( opt->parsejava && ( opt->parsejava & HTSPARSE_NO_CLASS ) == 0 && fexist(savename)) { char BIGSTK buff_err_msg[1024]; htsmoduleStruct BIGSTK str; buff_err_msg[0] = '\0'; @@ -1850,7 +1762,7 @@ int httpmirror(char* url1,httrackp* ptropt) { str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -1864,13 +1776,13 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Parse if recognized */ switch(hts_parse_externals(&str)) { case 1: - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(External module): parsed successfully %s"LF,savename); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(External module): parsed successfully %s"LF,savename); test_flush; } break; case 0: - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(External module): couldn't parse successfully %s : %s"LF,savename, str.err_msg); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(External module): couldn't parse successfully %s : %s"LF,savename, str.err_msg); test_flush; } break; } @@ -1882,7 +1794,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Post-processing */ if (fexist(savename)) { - usercommand(&opt, 0, NULL, savename, urladr, urlfil); + usercommand(opt, 0, NULL, savename, urladr, urlfil); } } // if !error @@ -1898,7 +1810,7 @@ jump_if_done: ptr++; // faut-il sauter le(s) lien(s) suivant(s)? (fichiers images à passer après les html) - if (opt.getmode & 4) { // sauver les non html après + if (opt->getmode & 4) { // sauver les non html après // sauter les fichiers selon la passe if (!numero_passe) { while((ptrpass2):0) ptr++; @@ -1907,8 +1819,8 @@ jump_if_done: } if (ptr>=lien_tot) { // fin de boucle if (!numero_passe) { // première boucle - if ((opt.debug>1) && (opt.log!=NULL)) { - fprintf(opt.log,LF"Now getting non-html files..."LF); + if ((opt->debug>1) && (opt->log!=NULL)) { + fprintf(opt->log,LF"Now getting non-html files..."LF); test_flush; } numero_passe=1; // seconde boucle @@ -1923,19 +1835,19 @@ jump_if_done: } // copy abort state if necessary from outside - if (!exit_xh && opt.state.exit_xh) { - exit_xh=opt.state.exit_xh; - } + //if (!exit_xh && opt->state.exit_xh) { + // exit_xh=opt->state.exit_xh; + //} // a-t-on dépassé le quota? - if (!back_checkmirror(&opt)) { + if (!back_checkmirror(opt)) { ptr=lien_tot; - } else if (exit_xh) { // sortir - if (opt.errlog) { - fspc(opt.errlog,"info"); - if (exit_xh==1) { - fprintf(opt.errlog,"Exit requested by shell or user"LF); + } else if (opt->state.exit_xh) { // sortir + if (opt->log) { + HTS_LOG(opt,LOG_INFO); + if (opt->state.exit_xh==1) { + fprintf(opt->log,"Exit requested by shell or user"LF); } else { - fprintf(opt.errlog,"Exit requested by engine"LF); + fprintf(opt->log,"Exit requested by engine"LF); } test_flush; } @@ -1963,22 +1875,22 @@ jump_if_done: && (HTS_STAT.HTS_TOTAL_RECV < 32768) /* should be fine */ ) { - if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"No data seems to have been transfered during this session! : restoring previous one!"LF); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"No data seems to have been transfered during this session! : restoring previous one!"LF); test_flush; } XH_uninit; - if ( (fexist(fconcat(opt.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt.path_log,"hts-cache/old.ndx"))) ) { - remove(fconcat(opt.path_log,"hts-cache/new.dat")); - remove(fconcat(opt.path_log,"hts-cache/new.ndx")); - remove(fconcat(opt.path_log,"hts-cache/new.lst")); - remove(fconcat(opt.path_log,"hts-cache/new.txt")); - rename(fconcat(opt.path_log,"hts-cache/old.dat"),fconcat(opt.path_log,"hts-cache/new.dat")); - rename(fconcat(opt.path_log,"hts-cache/old.ndx"),fconcat(opt.path_log,"hts-cache/new.ndx")); - rename(fconcat(opt.path_log,"hts-cache/old.lst"),fconcat(opt.path_log,"hts-cache/new.lst")); - rename(fconcat(opt.path_log,"hts-cache/old.txt"),fconcat(opt.path_log,"hts-cache/new.txt")); - } - exit_xh=2; /* interrupted (no connection detected) */ + if ( (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) ) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt")); + } + opt->state.exit_xh=2; /* interrupted (no connection detected) */ return 1; } @@ -1990,33 +1902,31 @@ jump_if_done: // purger! if (cache.lst) { fclose(cache.lst); cache.lst=NULL; - if (opt.delete_old) { + if (opt->delete_old) { FILE *old_lst,*new_lst; // -#if HTS_ANALYSTE - _hts_in_html_parsing=3; -#endif + opt->state._hts_in_html_parsing=3; // - old_lst=fopen(fconcat(opt.path_log,"hts-cache/old.lst"),"rb"); + old_lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"),"rb"); if (old_lst) { - LLint sz=fsize(fconcat(opt.path_log,"hts-cache/new.lst")); - new_lst=fopen(fconcat(opt.path_log,"hts-cache/new.lst"),"rb"); + off_t sz=fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + new_lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),"rb"); if ((new_lst) && (sz>0)) { - char* adr=(char*) malloct((INTsys)sz); + char* adr=(char*) malloct(sz); if (adr) { - if (fread(adr,1,(INTsys)sz,new_lst) == sz) { + if (fread(adr,1,sz,new_lst) == sz) { char line[1100]; int purge=0; while(!feof(old_lst)) { linput(old_lst,line,1000); if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau? char BIGSTK file[HTS_URLMAXSIZE*2]; - strcpybuff(file,opt.path_html); + strcpybuff(file,StringBuff(opt->path_html)); strcatbuff(file,line+1); file[strlen(file)-1]='\0'; if (fexist(file)) { // toujours sur disque: virer - if (opt.log) { - fspc(opt.log,"info"); fprintf(opt.log,"Purging %s"LF,file); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Purging %s"LF,file); } remove(file); purge=1; } @@ -2034,12 +1944,12 @@ jump_if_done: if (strnotempty(line)) if (!strstr(adr,line)) { // non trouvé? char BIGSTK file[HTS_URLMAXSIZE*2]; - strcpybuff(file,opt.path_html); + strcpybuff(file,StringBuff(opt->path_html)); strcatbuff(file,line+1); while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait) purge=1; - if (opt.log) { - fspc(opt.log,"info"); fprintf(opt.log,"Purging directory %s/"LF,file); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Purging directory %s/"LF,file); while(strnotempty(file) && (file[strlen(file)-1]!='/') && (file[strlen(file)-1]!='\\')) { file[strlen(file)-1]='\0'; } @@ -2052,8 +1962,8 @@ jump_if_done: } // if (!purge) { - if (opt.log) { - fprintf(opt.log,"No files purged"LF); + if (opt->log) { + fprintf(opt->log,"No files purged"LF); } } } @@ -2064,23 +1974,21 @@ jump_if_done: fclose(old_lst); } // -#if HTS_ANALYSTE - _hts_in_html_parsing=0; -#endif + opt->state._hts_in_html_parsing=0; } } // fin purge! // Indexation - if (opt.kindex) - index_finish(opt.path_html,opt.kindex); + if (opt->kindex) + index_finish(StringBuff(opt->path_html),opt->kindex); // afficher résumé dans log - if (opt.log!=NULL) { + if (opt->log!=NULL) { char BIGSTK finalInfo[8192]; - int error = fspc(NULL,"error"); - int warning = fspc(NULL,"warning"); - int info = fspc(NULL,"info"); + int error = fspc(opt,NULL,"error"); + int warning = fspc(opt,NULL,"warning"); + int info = fspc(opt,NULL,"info"); char BIGSTK htstime[256]; char BIGSTK infoupdated[256]; // int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart)); @@ -2089,7 +1997,7 @@ jump_if_done: sec2str(htstime,time_local()-HTS_STAT.stat_timestart); //sprintf(finalInfo + strlen(finalInfo),LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n); infoupdated[0] = '\0'; - if (opt.is_update) { + if (opt->is_update) { if (HTS_STAT.stat_updated_files > 0) { sprintf(infoupdated, ", %d files updated", (int)HTS_STAT.stat_updated_files); } else { @@ -2114,7 +2022,7 @@ jump_if_done: int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked); sprintf(finalInfo + strlen(finalInfo),", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio); } - if (!opt.nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) { + if (!opt->nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) { int rq = (HTS_STAT.stat_nrequests * 10) / HTS_STAT.stat_sockid; sprintf(finalInfo + strlen(finalInfo),", %d.%d requests per connection", rq/10, rq%10); } @@ -2125,7 +2033,7 @@ jump_if_done: sprintf(finalInfo + strlen(finalInfo),"(No errors, %d warnings, %d messages)"LF,warning,info); // Log - fprintf(opt.log,LF"%s", finalInfo); + fprintf(opt->log,LF"%s", finalInfo); // Close ZIP if (cache.zipOutput) { @@ -2162,7 +2070,7 @@ jump_if_done: // fin afficher résumé dans log // ending - usercommand(&opt,0,NULL,NULL,NULL,NULL); + usercommand(opt,0,NULL,NULL,NULL,NULL); // désallocation mémoire & buffers XH_uninit; @@ -2172,7 +2080,7 @@ jump_if_done: // version 2 pour le reste // flusher si on doit lire peu à peu le fichier #undef test_flush -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // Estimate transfer rate @@ -2260,10 +2168,10 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,struct_back* s opt->maxfilter += HTS_FILTERSINC; if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) { printf("PANIC! : Too many filters : >%d [%d]\n",*_FILTERS_PTR,__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*_FILTERS_PTR); - fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); - fflush(opt->errlog); + if (opt->log) { + fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF,*_FILTERS_PTR); + fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF); + fflush(opt->log); } assertf("too many filters - giving up" == NULL); } @@ -2349,48 +2257,6 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,struct_back* s } } - -#if 0 -/* Init structure */ -/* 1 : init */ -/* -1 : off */ -/* 0 : query */ -/* 2 : LOCK */ -/* -2 : UNLOCK */ -void* structcheck_init(int init) { - int structcheck_size = 1024; - inthash structcheck_hash=NULL; - /* */ - static PTHREAD_LOCK_TYPE structcheck_init_mutex; - static int structcheck_init_mutex_init=0; - - if (init == 1 || init == -1) { - if (init) { - if (structcheck_hash) - inthash_delete(&structcheck_hash); - structcheck_hash=NULL; - } - if (init != -1) { - if (structcheck_init_mutex_init == 0) { - htsSetLock(&structcheck_init_mutex, -999); - structcheck_init_mutex_init=1; - } - if (structcheck_hash==NULL) { - structcheck_hash=inthash_new(structcheck_size); // désalloué xh_xx - } - } - } - - /* Lock / Unlock */ - if (init == 2) { // Lock - htsSetLock(&structcheck_init_mutex, 1); - } else if (init == -2) { // Unlock - htsSetLock(&structcheck_init_mutex, 0); - } - return structcheck_hash; -} -#endif - int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { char** filters = *ptrfilters; int filter_max=maximum(maxfilter, 128); @@ -2430,76 +2296,155 @@ int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { return (filters != NULL) ? filter_max : 0; } -// vérifier présence de l'arbo -HTSEXT_API int structcheck(char* s) { - // vérifier la présence des dossier(s) - char *a=s; - char BIGSTK nom[HTS_URLMAXSIZE*2]; - char *b; - //inthash structcheck_hash=NULL; - if (strnotempty(s)==0) return 0; - if (strlen(s)>HTS_URLMAXSIZE) return 0; - - // Get buffer address - /* - structcheck_hash = (inthash)structcheck_init(0); - if (structcheck_hash == NULL) { - return -1; +static int mkdir_compat(const char *pathname) { +#ifdef _WIN32 + return mkdir(pathname); +#else + return mkdir(pathname, HTS_ACCESS_FOLDER); +#endif +} + +/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */ +HTSEXT_API int dir_exists(const char* path) { + struct stat st; + char BIGSTK file[HTS_URLMAXSIZE*2]; + int i = 0; + if (strnotempty(path) == 0) { + errno = EINVAL; + return 0; + } + if (strlen(path) > HTS_URLMAXSIZE) { + errno = EINVAL; + return 0; } - */ - b=nom; - do { - if (*a) *b++=*a++; - while((*a!='/') && (*a!='\0')) *b++=*a++; - *b='\0'; // pas de ++ pour boucler - if (*a=='/') { // toujours dossier - if (strnotempty(nom)) { - //if (inthash_write(structcheck_hash, nom, 1)) { // non encore créé -#if HTS_WIN - if (mkdir(fconv(nom))!=0) -#else - if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0) + /* Get a copy */ + strcpybuff(file, path); +#ifdef _WIN32 + /* To system name */ + for(i = 0 ; file[i] != 0 ; i++) { + if (file[i] == '/') { + file[i] = PATH_SEPARATOR; + } + } #endif - { -#if HTS_REMOVE_ANNOYING_INDEX - // might be a filename with same name than this folder - // then, remove it to allow folder creation - // it happends when servers gives a folder index while - // requesting / page - // -> if the file can be opened (not a folder) then rename it - if (fexist(fconv(nom))) { - rename(fconv(nom),fconcat(fconv(nom),".txt")); - } - // if it fails, that's too bad -#if HTS_WIN - mkdir(fconv(nom)); -#else - mkdir(fconv(nom),HTS_ACCESS_FOLDER); + /* Get prefix (note: file can not be empty here) */ + for(i = (int) strlen(file) - 1 ; i > 0 && file[i] != PATH_SEPARATOR ; i--); + for( ; i > 0 && file[i] == PATH_SEPARATOR ; i--); + file[i + 1] = '\0'; + + /* Check the final dir */ + if (stat(file, &st) == 0 && S_ISDIR(st.st_mode)) { + errno = 0; + return 1; /* EXISTS */ + } + errno = 0; + return 0; /* DOES NOT EXISTS */ +} + +/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */ +HTSEXT_API int structcheck(const char* path) { + struct stat st; + char BIGSTK tmpbuf[HTS_URLMAXSIZE*2]; + char BIGSTK file[HTS_URLMAXSIZE*2]; + int i = 0; + int npaths; + if (strnotempty(path) == 0) + return 0; + if (strlen(path) > HTS_URLMAXSIZE) { + errno = EINVAL; + return -1; + } + + /* Get a copy */ + strcpybuff(file, path); +#ifdef _WIN32 + /* To system name */ + for(i = 0 ; file[i] != 0 ; i++) { + if (file[i] == '/') { + file[i] = PATH_SEPARATOR; + } + } #endif + /* Get prefix (note: file can not be empty here) */ + for(i = (int) strlen(file) - 1 ; i > 0 && file[i] != PATH_SEPARATOR ; i--); + for( ; i > 0 && file[i] == PATH_SEPARATOR ; i--); + file[i + 1] = '\0'; + + /* First check the final dir */ + if (stat(file, &st) == 0 && S_ISDIR(st.st_mode)) { + return 0; /* OK */ + } + + /* Start from the begining */ + i = 0; + + /* Skip irrelevant part (the root slash, or the drive path) */ +#ifdef _WIN32 + if (file[0] != 0 && file[1] == ':') { /* f:\ */ + i+= 2; + if (file[i] == PATH_SEPARATOR) { /* f:\ */ + i++; + } + } else if (file[0] == PATH_SEPARATOR && file[1] == PATH_SEPARATOR) { /* \\mch */ + i+= 2; + } #endif - // Si existe déja renvoie une erreur.. tant pis + + /* Check paths */ + for(npaths = 1 ; ; npaths++) { + char end_char; + + /* Go to next path */ + + /* Skip separator(s) */ + for( ; file[i] == PATH_SEPARATOR ; i++); + /* Next separator */ + for( ; file[i] != 0 && file[i] != PATH_SEPARATOR ; i++); + + /* Check */ + end_char = file[i]; + if (end_char != 0) { + file[i] = '\0'; + } + if (stat(file, &st) == 0) { /* Something exists */ + if (!S_ISDIR(st.st_mode)) { +#if HTS_REMOVE_ANNOYING_INDEX + if (S_ISREG(st.st_mode)) { /* Regular file in place ; move it and create directory */ + sprintf(tmpbuf, "%s.txt", file); + if (rename(file, tmpbuf) != 0) { /* Can't rename regular file */ + return -1; } -#if HTS_WIN==0 - /*chmod(fconv(nom),HTS_ACCESS_FOLDER);*/ + if (mkdir_compat(file) != 0) { /* Can't create directory */ + return -1; + } + } +#else +#error Not implemented #endif - //} } - *b++=*a++; // slash - } - } while(*a); + } else { /* Nothing exists ; create directory */ + if (mkdir_compat(file) != 0) { /* Can't create directory */ + return -1; + } + } + if (end_char == 0) { /* End */ + break; + } else { + file[i] = end_char; /* Restore / */ + } + } return 0; } - // sauver un fichier -int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr,char* url_fil) { +int filesave(httrackp* opt,const char* adr,int len,const char* s,const char* url_adr,const char* url_fil) { FILE* fp; // écrire le fichier - if ((fp=filecreate(s))!=NULL) { + if ((fp = filecreate(&opt->state.strc, s))!=NULL) { int nl=0; if (len>0) { - nl=(int) fwrite(adr,1,(INTsys)len,fp); + nl=(int) fwrite(adr,1,len,fp); } fclose(fp); if (nl!=len) // erreur @@ -2530,17 +2475,18 @@ int check_fatal_io_errno(void) { // ouvrir un fichier (avec chemin Un*x) -FILE* filecreate(char* s) { +FILE* filecreate(filenote_strc *strc, const char* s) { char BIGSTK fname[HTS_URLMAXSIZE*2]; FILE* fp; + int last_errno = 0; fname[0]='\0'; // noter lst - filenote(s,NULL); + if (strc != NULL) { + filenote(strc, s, NULL); + } - // if (*s=='/') strcpybuff(fname,s+1); else strcpybuff(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O - strcpybuff(fname,s); - + strcpybuff(fname, s); #if HTS_DOSNAME // remplacer / par des slash arrière { @@ -2554,29 +2500,37 @@ FILE* filecreate(char* s) { // a partir d'ici le slash devient antislash #endif - // ouvrir - fp=fopen(fname,"wb"); + /* Try to open the file */ + fp = fopen(fname, "wb"); + + /* Error ? Check the directory structure and retry. */ if (fp == NULL) { - // construire le chemin si besoin est - (void)structcheck(s); - fp=fopen(fname,"wb"); + last_errno = errno; + if (structcheck(s) != 0) { + last_errno = errno; + } else { + last_errno = 0; + } + fp = fopen(fname, "wb"); } - -#if HTS_WIN==0 - if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE); + if (fp == NULL && last_errno != 0) { + errno = last_errno; + } +#ifndef _WIN32 + if (fp != NULL) + chmod(fname, HTS_ACCESS_FILE); #endif - return fp; } // ouvrir un fichier (avec chemin Un*x) -FILE* fileappend(char* s) { +FILE* fileappend(filenote_strc *strc,const char* s) { char BIGSTK fname[HTS_URLMAXSIZE*2]; FILE* fp; fname[0]='\0'; // noter lst - filenote(s,NULL); + filenote(strc,s,NULL); // if (*s=='/') strcpybuff(fname,s+1); else strcpybuff(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O strcpybuff(fname,s); @@ -2597,7 +2551,7 @@ FILE* fileappend(char* s) { // ouvrir fp=fopen(fname,"ab"); -#if HTS_WIN==0 +#ifndef _WIN32 if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE); #endif @@ -2606,9 +2560,9 @@ FILE* fileappend(char* s) { // create an empty file -int filecreateempty(char* filename) { +int filecreateempty(filenote_strc *strc, const char* filename) { FILE* fp; - fp=filecreate(filename); // filenote & co + fp=filecreate(strc, filename); // filenote & co if (fp) { fclose(fp); return 1; @@ -2617,14 +2571,7 @@ int filecreateempty(char* filename) { } // noter fichier -typedef struct { - FILE* lst; - char path[HTS_URLMAXSIZE*2]; -} filenote_strc; -int filenote(char* s,filecreate_params* params) { - filenote_strc* strc; - NOSTATIC_RESERVE(strc, filenote_strc, 1); - +int filenote(filenote_strc *strc, const char* s, filecreate_params* params) { // gestion du fichier liste liste if (params) { //filecreate_params* p = (filecreate_params*) params; @@ -2633,10 +2580,11 @@ int filenote(char* s,filecreate_params* params) { return 0; } else if (strc->lst) { char BIGSTK savelst[HTS_URLMAXSIZE*2]; - strcpybuff(savelst,fslash(s)); + char catbuff[CATBUFF_SIZE]; + strcpybuff(savelst,fslash(catbuff,s)); // couper chemin? if (strnotempty(strc->path)) { - if (strncmp(fslash(strc->path),savelst,strlen(strc->path))==0) { // couper + if (strncmp(fslash(catbuff,strc->path),savelst,strlen(strc->path))==0) { // couper strcpybuff(savelst,s+strlen(strc->path)); } } @@ -2646,23 +2594,14 @@ int filenote(char* s,filecreate_params* params) { return 1; } -void file_notify(char* adr,char* fil,char* save,int create,int modify,int not_updated) { -#if HTS_ANALYSTE - if (hts_htmlcheck_filesave2 != NULL) { - hts_htmlcheck_filesave2(adr, fil, save, create, modify, not_updated); - } -#endif +void file_notify(httrackp* opt,const char* adr,const char* fil,const char* save,int create,int modify,int not_updated) { + RUN_CALLBACK6(opt, filesave2, adr, fil, save, create, modify, not_updated); } // executer commande utilisateur -static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil); -typedef struct { - int exe; - char cmd[2048]; -} usercommand_strc; -HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* adr,char* fil) { - usercommand_strc* strc; - NOSTATIC_RESERVE(strc, usercommand_strc, 1); +static void postprocess_file(httrackp* opt, const char* save, const char* adr, const char* fil); +HTS_INLINE void usercommand(httrackp* opt,int _exe,const char* _cmd,const char* file,const char* adr,const char* fil) { + usercommand_strc* strc = &opt->state.usercmd; /* Callback */ if (_exe) { @@ -2676,12 +2615,9 @@ HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* a /* post-processing */ postprocess_file(opt, file, adr, fil); -#if HTS_ANALYSTE - if (hts_htmlcheck_filesave != NULL) { - if (file != NULL && strnotempty(file)) - hts_htmlcheck_filesave(file); - } -#endif + if (file != NULL && strnotempty(file)) { + RUN_CALLBACK1(opt, filesave, file); + } if (strc->exe) { if (file != NULL && strnotempty(file)) { @@ -2691,7 +2627,7 @@ HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* a } } } -void usercommand_exe(char* cmd,char* file) { +void usercommand_exe(const char* cmd,const char* file) { char BIGSTK temp[8192]; char c[2]=""; int i; @@ -2710,7 +2646,7 @@ void usercommand_exe(char* cmd,char* file) { } -static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { +static void postprocess_file(httrackp* opt,const char* save, const char* adr, const char* fil) { int first = 0; /* MIME-html archive to build */ if (opt != NULL && opt->mimehtml) { @@ -2718,24 +2654,26 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { adr = NULL; } if (save != NULL && opt != NULL && adr != NULL && adr[0] && strnotempty(save) && fexist(save)) { - char* rsc_save = save; - char* rsc_fil = strrchr(fil, '/'); + const char* rsc_save = save; + const char* rsc_fil = strrchr(fil, '/'); int n; if (rsc_fil == NULL) rsc_fil = fil; - if (strncmp(fslash(save), fslash(opt->path_html), (n = (int)strlen(opt->path_html))) == 0) { + if (strncmp(fslash(OPT_GET_BUFF(opt),save), fslash(OPT_GET_BUFF(opt),StringBuff(opt->path_html)), (n = (int)strlen(StringBuff(opt->path_html)))) == 0) { rsc_save += n; } if (!opt->state.mimehtml_created) { first = 1; - opt->state.mimefp = fopen(fconcat(opt->path_html,"index.mht"), "wb"); + opt->state.mimefp = fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.mht"), "wb"); if (opt->state.mimefp != NULL) { char BIGSTK rndtmp[1024], currtime[256]; - srand(time(NULL)); + srand((unsigned int)time(NULL)); time_gmt_rfc822(currtime); sprintf(rndtmp, "%d_%d", (int)time(NULL), (int) rand()); - sprintf(opt->state.mimemid, "----=_MIMEPart_%s_=----", rndtmp); + StringRoom(opt->state.mimemid, 256); + sprintf(StringBuffRW(opt->state.mimemid), "----=_MIMEPart_%s_=----", rndtmp); + StringSetLength(opt->state.mimemid, -1); fprintf(opt->state.mimefp, "From: HTTrack Website Copier \r\n" "Subject: Local mirror\r\n" "Date: %s\r\n" @@ -2746,12 +2684,12 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { "MIME-Version: 1.0\r\n" "\r\nThis message is a RFC MIME-compliant multipart message.\r\n" "\r\n" - , currtime, rndtmp, opt->state.mimemid); + , currtime, rndtmp, StringBuff(opt->state.mimemid)); opt->state.mimehtml_created = 1; } else { opt->state.mimehtml_created = -1; - if ( opt->errlog != NULL ) { - fspc(opt->errlog,"error"); fprintf(opt->log,"unable to create index.mht"LF); + if ( opt->log != NULL ) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"unable to create index.mht"LF); } } } @@ -2761,8 +2699,8 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { char buff[60*100 + 2]; char mimebuff[256]; char BIGSTK cid[HTS_URLMAXSIZE*3]; - int len; - int isHtml = ( ishtml(save) == 1 ); + size_t len; + int isHtml = ( ishtml(opt,save) == 1 ); mimebuff[0] = '\0'; /* CID */ @@ -2771,8 +2709,8 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { escape_in_url(cid); { char* a = cid; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } - guess_httptype(mimebuff, save); - fprintf(opt->state.mimefp, "--%s\r\n", opt->state.mimemid); + guess_httptype(opt,mimebuff, save); + fprintf(opt->state.mimefp, "--%s\r\n", StringBuff(opt->state.mimemid)); /*if (first) fprintf(opt->state.mimefp, "Content-disposition: inline\r\n"); else*/ @@ -2791,7 +2729,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { buff[len] = '\0'; if (!isHtml) { char base64buff[60*100*2]; - code64((unsigned char*)buff, len, (unsigned char*)base64buff, 1); + code64((unsigned char*)buff, (int)len, (unsigned char*)base64buff, 1); fprintf(opt->state.mimefp, "%s", base64buff); } else { fprintf(opt->state.mimefp, "%s", buff); @@ -2804,7 +2742,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { } else if (save == NULL) { if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) { fprintf(opt->state.mimefp, - "--%s--\r\n", opt->state.mimemid); + "--%s--\r\n", StringBuff(opt->state.mimemid)); fclose(opt->state.mimefp); opt->state.mimefp = NULL; } @@ -2813,17 +2751,9 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { } // écrire n espaces dans fp -typedef struct { - int error; - int warning; - int info; -} fspc_strc; -HTS_INLINE int fspc(FILE* fp,char* type) { - fspc_strc* strc; - NOSTATIC_RESERVE(strc, fspc_strc, 1); // log.. - - // - if (fp) { +HTS_INLINE int fspc(httrackp *opt,FILE* fp,const char* type) { + fspc_strc* const strc = ( opt != NULL ) ? &opt->state.fspc : NULL; + if (fp != NULL) { char s[256]; time_t tt; struct tm* A; @@ -2835,19 +2765,25 @@ HTS_INLINE int fspc(FILE* fp,char* type) { } strftime(s,250,"%H:%M:%S",A); if (strnotempty(type)) - fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1); - else - fprintf(fp,"%s\t \t",s); - if (strcmp(type,"warning")==0) - strc->warning++; - else if (strcmp(type,"error")==0) - strc->error++; - else if (strcmp(type,"info")==0) - strc->info++; - } - else if (!type) - strc->error=strc->warning=strc->info=0; // reset - else if (strcmp(type,"warning")==0) + fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1); + else + fprintf(fp,"%s\t \t",s); + if (strc != NULL) { + if (strcmp(type,"warning")==0) + strc->warning++; + else if (strcmp(type,"error")==0) + strc->error++; + else if (strcmp(type,"info")==0) + strc->info++; + } + } + else if (strc == NULL) { + return 0; + } + else if (!type) { + strc->error=strc->warning=strc->info=0; // reset + } + else if (strcmp(type,"warning")==0) return strc->warning; else if (strcmp(type,"error")==0) return strc->error; @@ -2914,8 +2850,6 @@ HTS_INLINE int back_fillmax(struct_back* sback,httrackp* opt,cache_back* cache,l } int back_pluggable_sockets_strict(struct_back* sback, httrackp* opt) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; int n = opt->maxsoc - back_nsoc(sback); // connect limiter @@ -2936,8 +2870,6 @@ int back_pluggable_sockets_strict(struct_back* sback, httrackp* opt) { } int back_pluggable_sockets(struct_back* sback, httrackp* opt) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; int n; // ajouter autant de socket qu'on peut ajouter @@ -2955,8 +2887,6 @@ int back_pluggable_sockets(struct_back* sback, httrackp* opt) { // remplir backing int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; int n = back_pluggable_sockets(sback, opt); if (opt->savename_delayed == 2 && !opt->delayed_cached) /* cancel (always delayed) */ return 0; @@ -2975,7 +2905,7 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien //while((p0) && (p < ptr+opt->maxcache_anticipate)) { int ok=1; - // on ne met pas le fichier en backing si il doit être traité après + // on ne met pas le fichier en backing si il doit être traité après ou s'il a déja été traité if (liens[p]->pass2) { // 2è passe if (numero_passe!=1) ok=0; @@ -2983,15 +2913,19 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien if (numero_passe!=0) ok=0; } + if (ok && liens[p]->sav != NULL && liens[p]->sav[0] != '\0' + && hash_read(opt->hash,liens[p]->sav,"",0,opt->urlhack) >= 0) // lookup in liens_record + { + ok = 0; + } // note: si un backing est fini, il reste en mémoire jusqu'à ce que // le ptr l'atteigne if (ok) { - int index = back_index(sback, liens[p]->adr,liens[p]->fil,liens[p]->sav); - if (index < 0) { - if (back_add(sback,opt,cache,liens[p]->adr,liens[p]->fil,liens[p]->sav,liens[liens[p]->precedent]->adr,liens[liens[p]->precedent]->fil,liens[p]->testmode,&liens[p]->pass2)==-1) { - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: unable to add more links through back_add for back_fill"LF); + if (!back_exist(sback, opt, liens[p]->adr,liens[p]->fil,liens[p]->sav)) { + if (back_add(sback,opt,cache,liens[p]->adr,liens[p]->fil,liens[p]->sav,liens[liens[p]->precedent]->adr,liens[liens[p]->precedent]->fil,liens[p]->testmode)==-1) { + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"error: unable to add more links through back_add for back_fill"LF); test_flush; } #if BDEBUG==1 @@ -3004,8 +2938,6 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien printf("backing: %s%s\n",liens[p]->adr,liens[p]->fil); #endif } - } else { - back_set_passe2_ptr(opt,cache,sback,index,&liens[p]->pass2); } } p++; @@ -3035,116 +2967,6 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien -// routines de détournement de SIGHUP & co (Unix) -// -httrackp* hts_declareoptbuffer(httrackp* optdecl) { - static httrackp* opt=NULL; /* OK */ - if (optdecl) opt=optdecl; - return opt; -} -// -void sig_finish( int code ) { // finir et quitter - signal(code,sig_term); // quitter si encore - exit_xh=1; - fprintf(stderr,"\nExit requested to engine (signal %d)\n",code); -} -void sig_term( int code ) { // quitter brutalement - fprintf(stderr,"\nProgram terminated (signal %d)\n",code); - exit(0); -} -#if HTS_WIN -void sig_ask( int code ) { // demander - char s[256]; - signal(code,sig_term); // quitter si encore - printf("\nQuit program/Interrupt/Cancel? (Q/I/C) "); - fflush(stdout); - scanf("%s",s); - if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q')) - exit(0); // quitter - else if ( (s[0]=='i') || (s[0]=='I') ) { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - // ask for stop - opt->state.stop=1; - } - } - signal(code,sig_ask); // remettre signal -} -#else -void sig_back( int code ) { // ignorer et mettre en backing - signal(code,sig_ignore); - sig_doback(0); -} -void sig_ask( int code ) { // demander - char s[256]; - signal(code,sig_term); // quitter si encore - printf("\nQuit program/Interrupt/Background/bLind background/Cancel? (Q/I/B/L/C) "); - fflush(stdout); - scanf("%s",s); - if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q')) - exit(0); // quitter - else if ( (s[0]=='b') || (s[0]=='B') || (s[0]=='a') || (s[0]=='A') ) - sig_doback(0); // arrière plan - else if ( (s[0]=='l') || (s[0]=='L') ) - sig_doback(1); // arrière plan - else if ( (s[0]=='i') || (s[0]=='I') ) { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - // ask for stop - printf("finishing pending transfers.. please wait\n"); - opt->state.stop=1; - } - signal(code,sig_ask); // remettre signal - } - else { - printf("cancel..\n"); - signal(code,sig_ask); // remettre signal - } -} -void sig_ignore( int code ) { // ignorer signal -} -void sig_brpipe( int code ) { // treat if necessary - signal(code, sig_brpipe); -} -void sig_doback(int blind) { // mettre en backing - int out=-1; - // - printf("\nMoving into background to complete the mirror...\n"); fflush(stdout); - - { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - // suppress logging and asking lousy questions - opt->quiet=1; - opt->verbosedisplay=0; - } - } - - if (!blind) - out = open("hts-nohup.out",O_CREAT|O_WRONLY,S_IRUSR|S_IWUSR); - if (out == -1) - out = open("/dev/null",O_WRONLY,S_IRUSR|S_IWUSR); - close(0); - close(1); - dup(out); - close(2); - dup(out); - // - switch (fork()) { - case 0: - break; - case -1: - fprintf(stderr,"Error: can not fork process\n"); - break; - default: // pere - usleep(100000); // pause 1/10s "A microsecond is .000001s" - _exit(0); - break; - } -} -#endif -// fin routines de détournement de SIGHUP & co - // Poll stdin.. si besoin #if HTS_POLL // lecture stdin des caractères disponibles @@ -3207,10 +3029,9 @@ HTS_INLINE int check_sockdata(T_SOC s) { } // Attente de touche -#if HTS_ANALYSTE -int ask_continue(void) { - char* s; - s=hts_htmlcheck_query2(HTbuff); +int ask_continue(httrackp *opt) { + const char* s; + s = RUN_CALLBACK1(opt, query2, opt->state.HTbuff); if (s) { if (strnotempty(s)) { if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON"))) @@ -3220,19 +3041,6 @@ int ask_continue(void) { } return 1; } -#else -int ask_continue(void) { - char s[12]; - s[0]='\0'; - printf("Press to confirm, to abort\n"); - io_flush; linput(stdin,s,4); - if (strnotempty(s)) { - if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON"))) - return 0; - } - return 1; -} -#endif // nombre de digits dans un nombre int nombre_digit(int n) { @@ -3289,166 +3097,130 @@ char* next_token(char* p,int flag) { return p; } -// routines annexes -#if HTS_ANALYSTE -// canceller un fichier (noter comme cancellable) -// !!NOT THREAD SAFE!! -HTSEXT_API char* hts_cancel_file(char * s) { - static char sav[HTS_URLMAXSIZE*2]=""; - if (s[0]!='\0') - if (sav[0]=='\0') - strcpybuff(sav,s); - return sav; -} -HTSEXT_API void hts_cancel_test(void) { - if (_hts_in_html_parsing==2) - _hts_cancel=2; +static int hts_cancel_file_push_(httrackp *opt, const char *url) { + if (url != NULL && url[0] != '\0') { + htsoptstatecancel **cancel; + /* search for available place to store a new htsoptstatecancel* */ + for( cancel = &opt->state.cancel ; *cancel != NULL ; cancel = & ( (*cancel)->next ) ) { + if (strcmp((*cancel)->url, url) == 0) { + return 1; /* already there */ + } + } + *cancel = malloct(sizeof(htsoptstatecancel)); + (*cancel)->next = NULL; + (*cancel)->url = strdupt(url); + return 0; + } + return 1; } -HTSEXT_API void hts_cancel_parsing(void) { - if (_hts_in_html_parsing) - _hts_cancel=1; + +/* cancel a file (locked) */ +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url) { + int ret; + hts_mutexlock(&opt->state.lock); + ret = hts_cancel_file_push_(opt, url); + hts_mutexrelease(&opt->state.lock); + return ret; } -#endif -// for(_i=0;(_i=0) { // signifie "lien actif" -#if 0 -/* -hts_add_file, add/get elements in the add chain for java parsing -if file_position >= 0 - push 'file/file_position' - return 1 (return 0 if exists) -else - pop file -> 'file' - return 'file_position' -else if empty/error - return -1; -*/ -typedef struct addfile_chain { - char name[1024]; - int pos; - struct addfile_chain* next; -} addfile_chain; -typedef addfile_chain* addfile_chain_ptr; -int opt->(char* file,int file_position) { - addfile_chain** chain; - NOSTATIC_RESERVE(chain, addfile_chain_ptr, 1); - - if (file_position>=0) { /* copy file to the chain */ - struct addfile_chain** current; - current=chain; /* start from */ - while(*current) { - if (strcmp((*current)->name,file)==0) - return 0; /* already exists */ - current=&( (*current)->next ); /* 'next' address */ - } - *current=calloct(1,sizeof(addfile_chain)); - if (*current) { - (*current)->next=NULL; - (*current)->pos=-1; - (*current)->name[0]='\0'; - } - if (*current) { - strcpybuff((*current)->name,file); - (*current)->pos=file_position; - return 1; - } else { - printf("PANIC! Too many Java files during parsing [1]\n"); - return -1; - } - } else { /* copy last element in file and delete it */ - if (file) - file[0]='\0'; - if (*chain) { - struct addfile_chain** current; - int pos=-1; - current=chain; /* start from */ - while( (*current)->next ) { - current=&( (*current)->next ); /* 'next' address */ - } - if (file) - strcpybuff(file,(*current)->name); - pos=(*current)->pos; - freet(*current); - *current=NULL; - return pos; - } - return -1; /* no more elements */ +static char* hts_cancel_file_pop_(httrackp *opt) { + if (opt->state.cancel != NULL) { + htsoptstatecancel **cancel; + htsoptstatecancel *ret; + for( cancel = &opt->state.cancel ; (*cancel)->next != NULL ; cancel = & ( (*cancel)->next ) ); + ret = *cancel; + *cancel = NULL; + return ret->url; } + return NULL; /* no entry */ +} - return 0; +char* hts_cancel_file_pop(httrackp *opt) { + char* ret; + hts_mutexlock(&opt->state.lock); + ret = hts_cancel_file_pop_(opt); + hts_mutexrelease(&opt->state.lock); + return ret; +} + +HTSEXT_API void hts_cancel_test(httrackp *opt) { + if (opt->state._hts_in_html_parsing==2) + opt->state._hts_cancel=2; +} +HTSEXT_API void hts_cancel_parsing(httrackp *opt) { + if (opt->state._hts_in_html_parsing) + opt->state._hts_cancel=1; } -#endif -#if HTS_ANALYSTE // en train de parser un fichier html? réponse: % effectués // flag>0 : refresh demandé -HTSEXT_API int hts_is_parsing(int flag) { - if (_hts_in_html_parsing) { // parsing? - if (flag>=0) _hts_in_html_poll=1; // faudrait un tit refresh - return max(_hts_in_html_done,1); // % effectués +HTSEXT_API int hts_is_parsing(httrackp *opt, int flag) { + if (opt->state._hts_in_html_parsing) { // parsing? + if (flag >= 0) + opt->state._hts_in_html_poll = 1; // faudrait un tit refresh + return max(opt->state._hts_in_html_done, 1); // % effectués } else { return 0; // non } } -HTSEXT_API int hts_is_testing(void) { // 0 non 1 test 2 purge - if (_hts_in_html_parsing==2) +HTSEXT_API int hts_is_testing(httrackp *opt) { // 0 non 1 test 2 purge + if (opt->state._hts_in_html_parsing==2) return 1; - else if (_hts_in_html_parsing==3) + else if (opt->state._hts_in_html_parsing==3) return 2; - else if (_hts_in_html_parsing==4) + else if (opt->state._hts_in_html_parsing==4) return 3; - else if (_hts_in_html_parsing==5) // scheduling + else if (opt->state._hts_in_html_parsing==5) // scheduling return 4; - else if (_hts_in_html_parsing==6) // wait for slot + else if (opt->state._hts_in_html_parsing==6) // wait for slot return 5; return 0; } -HTSEXT_API int hts_is_exiting(void) { - return exit_xh; +HTSEXT_API int hts_is_exiting(httrackp *opt) { + return opt->state.exit_xh; } // message d'erreur? -char* hts_errmsg(void) { - return _hts_errmsg; +char* hts_errmsg(httrackp *opt) { + return opt->state._hts_errmsg; } // mode pause transfer -HTSEXT_API int hts_setpause(int p) { - if (p>=0) _hts_setpause=p; - return _hts_setpause; +HTSEXT_API int hts_setpause(httrackp *opt, int p) { + if (p >= 0) + opt->state._hts_setpause = p; + return opt->state._hts_setpause; } // ask for termination -HTSEXT_API int hts_request_stop(int force) { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - opt->state.stop=1; +HTSEXT_API int hts_request_stop(httrackp* opt, int force) { + if (opt != NULL) { + opt->state.stop = 1; } return 0; } // régler en cours de route les paramètres réglables.. // -1 : erreur -HTSEXT_API int hts_setopt(httrackp* set_opt) { - if (set_opt) { - httrackp* engine_opt=hts_declareoptbuffer(NULL); - if (engine_opt) { - //_hts_setopt=opt; - copy_htsopt(set_opt,engine_opt); - } - } - return 0; -} +//HTSEXT_API int hts_setopt(httrackp* set_opt) { +// if (set_opt) { +// httrackp* engine_opt=hts_declareoptbuffer(NULL); +// if (engine_opt) { +// //_hts_setopt=opt; +// copy_htsopt(set_opt,engine_opt); +// } +// } +// return 0; +//} // ajout d'URL // -1 : erreur -HTSEXT_API int hts_addurl(char** url) { - if (url) _hts_addurl=url; - return (_hts_addurl!=NULL); +HTSEXT_API int hts_addurl(httrackp *opt, char** url) { + if (url) + opt->state._hts_addurl = url; + return (opt->state._hts_addurl != NULL); } -HTSEXT_API int hts_resetaddurl(void) { - _hts_addurl=NULL; - return (_hts_addurl!=NULL); +HTSEXT_API int hts_resetaddurl(httrackp *opt) { + opt->state._hts_addurl = NULL; + return (opt->state._hts_addurl != NULL); } // copier nouveaux paramètres si besoin -HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { +HTSEXT_API int copy_htsopt(const httrackp* from,httrackp* to) { if (from->maxsite > -1) to->maxsite = from->maxsite; @@ -3484,8 +3256,8 @@ HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { if (from->maxconn > 0) to->maxconn = from->maxconn; - if (strnotempty(from->user_agent)) - strcpybuff(to->user_agent , from->user_agent); + if (StringNotEmpty(from->user_agent)) + StringCopyS(to->user_agent, from->user_agent); if (from->retry > -1) to->retry = from->retry; @@ -3512,7 +3284,6 @@ HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { return 0; } -#endif // /* External modules callback */ @@ -3532,27 +3303,25 @@ int htsAddLink(htsmoduleStruct* str, char* link) { codebase[0]='\0'; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): adding link : '%s'"LF, link); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): adding link : '%s'"LF, link); test_flush; } // recopie de "creer le lien" // -#if HTS_ANALYSTE - if (hts_htmlcheck_linkdetected != NULL && !hts_htmlcheck_linkdetected(link)) { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF, link); + if (!RUN_CALLBACK1(opt, linkdetected, link)) { + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s refused by external wrapper"LF, link); test_flush; } return 0; } - if (hts_htmlcheck_linkdetected2 != NULL && !hts_htmlcheck_linkdetected2(link, NULL)) { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper(2)"LF, link); + if (!RUN_CALLBACK2(opt, linkdetected2, link, NULL)) { + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s refused by external wrapper(2)"LF, link); test_flush; } return 0; } -#endif // adr = c'est la même // fil et save: save2 et fil2 @@ -3578,8 +3347,8 @@ int htsAddLink(htsmoduleStruct* str, char* link) { strcpybuff(tempo,a); strcpybuff(codebase,tempo); // couper host } else { - if (opt->errlog) { - fprintf(opt->errlog,"Unexpected strstr error in base %s"LF,codebase); + if (opt->log) { + fprintf(opt->log,"Unexpected strstr error in base %s"LF,codebase); test_flush; } } @@ -3587,8 +3356,8 @@ int htsAddLink(htsmoduleStruct* str, char* link) { } if (!((int) strlen(codebase)errlog) { - fprintf(opt->errlog,"Codebase too long, parsing skipped (%s)"LF,codebase); + if (opt->log) { + fprintf(opt->log,"Codebase too long, parsing skipped (%s)"LF,codebase); test_flush; } } @@ -3610,7 +3379,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { &set_prio_to, &just_test_it); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard external module link: %d"LF,forbidden_url); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard external module link: %d"LF,forbidden_url); test_flush; } @@ -3634,7 +3403,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { char BIGSTK former_adr[HTS_URLMAXSIZE*2]; char BIGSTK former_fil[HTS_URLMAXSIZE*2]; former_adr[0] = former_fil[0] = '\0'; - r = hts_wait_delayed(str, adr, fil, save, former_adr, former_fil, &forbidden_url); + r = hts_wait_delayed(str, adr, fil, save, NULL, NULL, former_adr, former_fil, &forbidden_url); } // end resolve unresolved type opt->savename_type=a; @@ -3643,7 +3412,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { if (savename) { if (lienrelatif(tempo,save,savename)==0) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); test_flush; if (str->localLink && str->localLinkSize > (int) strlen(tempo) + 1) { strcpybuff(str->localLink, tempo); @@ -3656,7 +3425,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { if (forbidden_url) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): file not caught: %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): file not caught: %s"LF,lien); test_flush; } if (str->localLink && str->localLinkSize > (int) ( strlen(adr) + strlen(fil) + 8 ) ) { str->localLink[0] = '\0'; @@ -3671,7 +3440,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { // if (r != -1) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush; } // modifié par rapport à l'autre version (cf prio_fix notamment et save2) @@ -3697,11 +3466,11 @@ int htsAddLink(htsmoduleStruct* str, char* link) { liens_record(adr,fil,save,"","",opt->urlhack); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } - exit_xh=-1; /* fatal error -> exit */ + opt->state.exit_xh=-1; /* fatal error -> exit */ return 0; } @@ -3729,7 +3498,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { //strcpybuff(liens[lien_tot]->fil,fil); //strcpybuff(liens[lien_tot]->sav,save); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); test_flush; } diff --git a/src/htscore.h b/src/htscore.h index e1966d3..5e88313 100644 --- a/src/htscore.h +++ b/src/htscore.h @@ -35,16 +35,15 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ // Fichier librairie .h -#ifndef HTTRACK_DEFH -#define HTTRACK_DEFH - +#ifndef HTS_CORE_DEFH +#define HTS_CORE_DEFH #include "htsglobal.h" /* specific definitions */ #include "htsbase.h" // Includes & définitions -#ifdef HAVE_SYS_TYPES_H +#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) ) #include #endif #ifdef HAVE_SYS_STAT_H @@ -55,29 +54,99 @@ Please visit our Website: http://www.httrack.com #include #endif #ifndef _WIN32_WCE -#include #include -#else -#ifndef HTS_CECOMPAT -#include "signal.h" -#endif #endif #else -#include #ifdef HAVE_UNISTD_H #include #endif #endif /* END specific definitions */ +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_filecreate_params +#define HTS_DEF_FWSTRUCT_filecreate_params +typedef struct filecreate_params filecreate_params; +#endif // Include htslib.h for all types #include "htslib.h" +// options #include "htsopt.h" +// INCLUDES .H PARTIES DE CODE HTTRACK + +// routine main +#include "htscoremain.h" + +// core routines +#include "htscore.h" + +// divers outils pour httrack.c +#include "htstools.h" + +// aide pour la version en ligne de commande +#include "htshelp.h" + +// génération du nom de fichier à sauver +#include "htsname.h" + +// gestion ftp +#include "htsftp.h" + +// gestion interception d'URL +#include "htscatchurl.h" + +// gestion robots.txt +#include "htsrobots.h" + +// routines d'acceptation de liens +#include "htswizard.h" + +// routines de regexp +#include "htsfilters.h" + +// gestion backing +#include "htsback.h" + +// gestion cache +#include "htscache.h" + +// gestion hashage +#include "htshash.h" +#include "htsinthash.h" + +#include "htsdefines.h" + +#include "hts-indextmpl.h" + // structure d'un lien -typedef struct lien_url { +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +struct lien_url { char firstblock; // flag 1=premier malloc char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down int depth; // profondeur autorisée lien ; >0 forte 0=faible @@ -95,10 +164,14 @@ typedef struct lien_url { char* former_fil; // nom du fichier distant initial (avant éventuel moved), peut être nul // pour optimisation: int hash_next[3]; // prochain lien avec même valeur hash -} lien_url; +}; // chargement de fichiers en 'arrière plan' -typedef struct lien_back { +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +struct lien_back { #if DEBUG_CHECKINT char magic; #endif @@ -133,7 +206,7 @@ typedef struct lien_back { LLint chunk_blocksize; // taille data declaree par le chunk LLint compressed_size; // taille compressés (stats uniquement) // - int* pass2_ptr; // pointeur sur liens[ptr]->pass2 + //int links_index; // to access liens[links_index] // char info[256]; // éventuel status pour le ftp int stop_ftp; // flag stop pour ftp @@ -141,18 +214,27 @@ typedef struct lien_back { #if DEBUG_CHECKINT char magic2; #endif -} lien_back; +}; -typedef struct struct_back { +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +struct struct_back { lien_back* lnk; int count; - void* ready; -} struct_back; + inthash ready; + LLint ready_size_bytes; +}; typedef struct cache_back_zip_entry cache_back_zip_entry; // cache -typedef struct cache_back { +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +struct cache_back { int version; // 0 ou 1 /* */ int type; @@ -163,9 +245,9 @@ typedef struct cache_back { FILE *txt; // liste des fichiers (info) char lastmodified[256]; // HASH - void* hashtable; + inthash hashtable; // HASH for tests (naming subsystem) - void* cached_tests; + inthash cached_tests; // fichiers log optionnels FILE* log; FILE* errlog; @@ -173,32 +255,40 @@ typedef struct cache_back { int ptr_ant; // pointeur pour anticiper int ptr_last; // pointeur pour anticiper // - void* zipInput; - void* zipOutput; + void *zipInput; + void *zipOutput; cache_back_zip_entry* zipEntries; int zipEntriesOffs; int zipEntriesCapa; -} cache_back; +}; -typedef struct hash_struct { +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +struct hash_struct { lien_url** liens; // pointeur sur liens int max_lien; // indice le plus grand rencontré int hash[3][HTS_HASH_SIZE]; // tables pour sav/adr-fil/former_adr-former_fil -} hash_struct; +}; -typedef struct filecreate_params { +#ifndef HTS_DEF_FWSTRUCT_filecreate_params +#define HTS_DEF_FWSTRUCT_filecreate_params +typedef struct filecreate_params filecreate_params; +#endif +struct filecreate_params { FILE* lst; char path[HTS_URLMAXSIZE*2]; -} filecreate_params; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -static int cache_writable(cache_back* cache) { +HTS_STATIC int cache_writable(cache_back* cache) { return (cache != NULL && ( cache->dat != NULL || cache->zipOutput != NULL ) ); } -static int cache_readable(cache_back* cache) { +HTS_STATIC int cache_readable(cache_back* cache) { return (cache != NULL && ( cache->olddat != NULL || cache->zipInput != NULL ) ); } @@ -208,146 +298,29 @@ static int cache_readable(cache_back* cache) { // INCLUDES .H PARTIES DE CODE HTTRACK -// routine main -#include "htscoremain.h" - -// divers outils pour httrack.c -#include "htstools.h" - -// aide pour la version en ligne de commande -#include "htshelp.h" - -// génération du nom de fichier à sauver -#include "htsname.h" - -// gestion ftp -#include "htsftp.h" - -// routine parser java -#include "htsjava.h" - -// gestion interception d'URL -#include "htscatchurl.h" - -// gestion robots.txt -#include "htsrobots.h" - -// routines d'acceptation de liens -#include "htswizard.h" - -// routines de regexp -#include "htsfilters.h" - -// gestion backing -#include "htsback.h" - -// gestion cache -#include "htscache.h" - -// gestion hashage -#include "htshash.h" -#include "htsinthash.h" - -// gestion réentrance -#include "htsnostatic.h" - -// infos console -#if HTS_ANALYSTE_CONSOLE -#include "httrack.h" -#endif - -#include "htsdefines.h" - -#include "hts-indextmpl.h" - -// INCLUDES .H PARTIES DE CODE HTTRACK - -// - -/* -typedef void (* t_hts_htmlcheck_init)(void); -typedef void (* t_hts_htmlcheck_uninit)(void); -typedef int (* t_hts_htmlcheck_start)(httrackp* opt); -typedef int (* t_hts_htmlcheck_end)(void); -typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); -typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier); -typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); -typedef char* (* t_hts_htmlcheck_query)(char* question); -typedef char* (* t_hts_htmlcheck_query2)(char* question); -typedef char* (* t_hts_htmlcheck_query3)(char* question); -typedef int (* t_hts_htmlcheck_loop)(struct_back* sback,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); -typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); -typedef int (* t_hts_htmlcheck_check_mime)(char* adr,char* fil,char* mime,int status); -typedef void (* t_hts_htmlcheck_pause)(char* lockfile); -typedef void (* t_hts_htmlcheck_filesave)(char* file); -typedef void (* t_hts_htmlcheck_filesave2)(char* hostname,char* filename,char* localfile,int is_new,int is_modified, int not_updated); -typedef int (* t_hts_htmlcheck_linkdetected)(char* link); -typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start); -typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); -typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); -*/ - -// demande d'interaction avec le shell -#if HTS_ANALYSTE -//char HTbuff[1024]; -/* -extern t_hts_htmlcheck_init hts_htmlcheck_init; -extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; -extern t_hts_htmlcheck_start hts_htmlcheck_start; -extern t_hts_htmlcheck_end hts_htmlcheck_end; -extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; -extern t_hts_htmlcheck_process hts_htmlcheck_preprocess; -extern t_hts_htmlcheck_process hts_htmlcheck_postprocess; -extern t_hts_htmlcheck hts_htmlcheck; -extern t_hts_htmlcheck_query hts_htmlcheck_query; -extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; -extern t_hts_htmlcheck_query3 hts_htmlcheck_query3; -extern t_hts_htmlcheck_loop hts_htmlcheck_loop; -extern t_hts_htmlcheck_check hts_htmlcheck_check; -extern t_hts_htmlcheck_check_mime hts_htmlcheck_check_mime; -extern t_hts_htmlcheck_pause hts_htmlcheck_pause; -extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; -extern t_hts_htmlcheck_filesave2 hts_htmlcheck_filesave2; -extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; -extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2; -extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; -extern t_hts_htmlcheck_savename hts_htmlcheck_savename; -extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; -extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; -*/ - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE // #ifndef HTTRACK_DEFLIB -HTSEXT_API int hts_is_parsing(int flag); -HTSEXT_API int hts_is_testing(void); -HTSEXT_API int hts_is_exiting(void); -HTSEXT_API int hts_setopt(httrackp* opt); -HTSEXT_API int hts_addurl(char** url); -HTSEXT_API int hts_resetaddurl(void); -HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to); -HTSEXT_API char* hts_errmsg(void); -HTSEXT_API int hts_setpause(int); // pause transfer -HTSEXT_API int hts_request_stop(int force); +HTSEXT_API int hts_is_parsing(httrackp *opt, int flag); +HTSEXT_API int hts_is_testing(httrackp *opt); +HTSEXT_API int hts_addurl(httrackp *opt, char** url); +HTSEXT_API int hts_resetaddurl(httrackp *opt); +HTSEXT_API int copy_htsopt(const httrackp* from,httrackp* to); +HTSEXT_API char* hts_errmsg(httrackp *opt); +HTSEXT_API int hts_setpause(httrackp *opt, int); // -HTSEXT_API char* hts_cancel_file(char * s); -HTSEXT_API void hts_cancel_test(void); -HTSEXT_API void hts_cancel_parsing(void); -#endif +HTSEXT_API int hts_is_exiting(httrackp *opt); +HTSEXT_API int hts_request_stop(httrackp* opt, int force); // -// Variables globales -extern int _hts_in_html_parsing; -extern int _hts_in_html_done; // % réalisés -extern int _hts_in_html_poll; // parsing -extern char _hts_errmsg[1100]; -extern int _hts_setpause; -//extern httrackp* _hts_setopt; -extern char** _hts_addurl; -extern int _hts_cancel; +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url); +HTSEXT_API void hts_cancel_test(httrackp *opt); +HTSEXT_API void hts_cancel_parsing(httrackp *opt); +#endif + +char* hts_cancel_file_pop(httrackp *opt); + #endif // @@ -355,23 +328,24 @@ extern int _hts_cancel; //int httpmirror(char* url,int level,httrackp opt); int httpmirror(char* url1,httrackp* opt); -int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr /* = NULL */,char* url_fil /* = NULL */); +int filesave(httrackp* opt,const char* adr,int len,const char* s,const char* url_adr /* = NULL */,const char* url_fil /* = NULL */); +char* hts_cancel_file_pop(httrackp *opt); int check_fatal_io_errno(void); int engine_stats(void); void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,struct_back* sback,char* host); -FILE* filecreate(char* s); -FILE* fileappend(char* s); -int filecreateempty(char* filename); -int filenote(char* s,filecreate_params* params); -void file_notify(char* adr,char* fil,char* save,int create,int modify,int wasupdated); -HTS_INLINE void usercommand(httrackp* opt,int exe,char* cmd,char* file,char* adr,char* fil); -void usercommand_exe(char* cmd,char* file); -//void* structcheck_init(int init); +FILE* filecreate(filenote_strc *strct,const char* s); +FILE* fileappend(filenote_strc *strct,const char* s); +int filecreateempty(filenote_strc *strct, const char* filename); +int filenote(filenote_strc *strct,const char* s,filecreate_params* params); +void file_notify(httrackp* opt,const char* adr,const char* fil,const char* save,int create,int modify,int wasupdated); +HTS_INLINE void usercommand(httrackp* opt,int exe,const char* cmd,const char* file,const char* adr,const char* fil); +void usercommand_exe(const char* cmd,const char* file); int filters_init(char*** ptrfilters, int maxfilter, int filterinc); #ifndef HTTRACK_DEFLIB -HTSEXT_API int structcheck(char* s); +HTSEXT_API int structcheck(const char* path); +HTSEXT_API int dir_exists(const char* path); #endif -HTS_INLINE int fspc(FILE* fp,char* type); +HTS_INLINE int fspc(httrackp *opt,FILE* fp,const char* type); char* next_token(char* p,int flag); // char* readfile(char* fil); @@ -393,15 +367,13 @@ int backlinks_done(struct_back* sback,lien_url** liens,int lien_tot,int ptr); int back_fillmax(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot); // cancel file -#if HTS_ANALYSTE #ifndef HTTRACK_DEFLIB -HTSEXT_API char* hts_cancel_file(char * s); -HTSEXT_API void hts_cancel_test(void); -HTSEXT_API void hts_cancel_parsing(void); -#endif +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url); +HTSEXT_API void hts_cancel_test(httrackp *opt); +HTSEXT_API void hts_cancel_parsing(httrackp *opt); #endif -int ask_continue(void); +int ask_continue(httrackp *opt); int nombre_digit(int n); // Java @@ -418,19 +390,6 @@ int read_stdin(char* s,int max); HTS_INLINE int check_sockerror(T_SOC s); HTS_INLINE int check_sockdata(T_SOC s); -httrackp* hts_declareoptbuffer(httrackp* optdecl); -void sig_finish( int code ); // finir et quitter -void sig_term( int code ); // quitter -#if HTS_WIN -void sig_ask( int code ); // demander -#else -void sig_back( int code ); // ignorer et mettre en backing -void sig_ask( int code ); // demander -void sig_ignore( int code ); // ignorer signal -void sig_brpipe( int code ); // treat if necessary -void sig_doback(int); // mettre en arrière plan -#endif - /* external modules */ int htsAddLink(htsmoduleStruct* str, char* link); @@ -440,7 +399,3 @@ void voidf(void); #define HTS_TOPINDEX "TOP_INDEX_HTTRACK" #endif - -#endif - - diff --git a/src/htscoremain.c b/src/htscoremain.c index bdebfb0..4231ee0 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -30,7 +30,7 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ -/* File: httrack.c subroutines: */ +/* File: opt->c subroutines: */ /* main routine (first called) */ /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ @@ -50,11 +50,11 @@ Please visit our Website: http://www.httrack.com #include #if USE_BEGINTHREAD -#if HTS_WIN +#ifdef _WIN32 #include #endif #endif -#if HTS_WIN +#ifdef _WIN32 #else #ifndef HTS_DO_NOT_USE_UID /* setuid */ @@ -65,8 +65,6 @@ Please visit our Website: http://www.httrack.com #endif #endif -extern int exit_xh; // sortir prématurément - /* Resolver */ extern int IPV6_resolver; @@ -75,7 +73,7 @@ extern int IPV6_resolver; #define cmdl_add(token,argc,argv,buff,ptr) \ argv[argc]=(buff+ptr); \ strcpybuff(argv[argc],token); \ - ptr += (strlen(argv[argc])+2); \ + ptr += (int) (strlen(argv[argc])+2); \ argc++ // Insert a command in the argc/argv @@ -87,7 +85,7 @@ extern int IPV6_resolver; } \ argv[0]=(buff+ptr); \ strcpybuff(argv[0],token); \ - ptr += (strlen(argv[0])+2); \ + ptr += (int) (strlen(argv[0])+2); \ argc++ #define htsmain_free() do { if (url != NULL) { free(url); } } while(0) @@ -109,41 +107,16 @@ extern int IPV6_resolver; } \ } while(0) -void set_wrappers(void) { -#if HTS_ANALYSTE - // custom wrappers - hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); - hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); - hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); - hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); - hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); - hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html"); - hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html"); - hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); - hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); - hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); - hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); - hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); - hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); - hts_htmlcheck_check_mime = (t_hts_htmlcheck_check_mime) htswrap_read("check-mime"); - hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); - hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); - hts_htmlcheck_filesave2 = (t_hts_htmlcheck_filesave2) htswrap_read("save-file2"); - hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); - hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2"); - hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); - hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); - hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header"); - hts_htmlcheck_receivehead = (t_hts_htmlcheck_receivehead) htswrap_read("receive-header"); -#endif +HTSEXT_API int hts_main(int argc, char **argv) +{ + httrackp *opt = hts_create_opt(); + int ret = hts_main2(argc, argv, opt); + hts_free_opt(opt); + return ret; } // Main, récupère les paramètres et appelle le robot -#if HTS_ANALYSTE -HTSEXT_API int hts_main(int argc, char **argv) { -#else -int main(int argc, char **argv) { -#endif +HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt) { char** x_argv=NULL; // Patch pour argv et argc: en cas de récupération de ligne de commande char* x_argvblk=NULL; // (reprise ou update) int x_ptr=0; // offset @@ -154,15 +127,9 @@ int main(int argc, char **argv) { int url_sz = 65535; //char url[65536]; // URLS séparées par un espace // the parametres - httrackp BIGSTK httrack; int httrack_logmode=3; // ONE log file int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer) -#if HTS_WIN -#if HTS_ANALYSTE!=2 - WORD wVersionRequested; /* requested version WinSock API */ - WSADATA BIGSTK wsadata; /* Windows Sockets API data */ -#endif -#else +#ifndef _WIN32 #ifndef HTS_DO_NOT_USE_UID int switch_uid=-1,switch_gid=-1; /* setuid/setgid */ #endif @@ -170,207 +137,41 @@ int main(int argc, char **argv) { #endif // ensureUrlCapacity(url, url_sz, 65536); - // - -#if HTS_ANALYSTE - // custom wrappers - set_wrappers(); -#endif - - // options par défaut - memset(&httrack, 0, sizeof(httrackp)); - httrack.wizard=2; // wizard automatique - httrack.quiet=0; // questions - // - httrack.travel=0; // même adresse - httrack.depth=9999; // mirror total par défaut - httrack.extdepth=0; // mais pas à l'extérieur - httrack.seeker=1; // down - httrack.urlmode=2; // relatif par défaut - httrack.debug=0; // pas de débug en plus - httrack.getmode=3; // linear scan - httrack.maxsite=-1; // taille max site (aucune) - httrack.maxfile_nonhtml=-1; // taille max fichier non html - httrack.maxfile_html=-1; // idem pour html - httrack.maxsoc=4; // nbre socket max - httrack.fragment=-1; // pas de fragmentation - httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents" - httrack.makeindex=1; // faire un index - httrack.kindex=0; // index 'keyword' - httrack.delete_old=1; // effacer anciens fichiers - httrack.makestat=0; // pas de fichier de stats - httrack.maketrack=0; // ni de tracking - httrack.timeout=120; // timeout par défaut (2 minutes) - httrack.cache=1; // cache prioritaire - httrack.shell=0; // pas de shell par defaut - httrack.proxy.active=0; // pas de proxy - strcpybuff(httrack.proxy.bindhost, ""); // bind default host - httrack.user_agent_send=1; // envoyer un user-agent - strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); - strcpybuff(httrack.referer, ""); - strcpybuff(httrack.from, ""); - httrack.savename_83=0; // noms longs par défaut - httrack.savename_type=0; // avec structure originale - httrack.savename_delayed=2;// hard delayed type (default) - httrack.delayed_cached=1; // cached delayed type (default) - httrack.mimehtml=0; // pas MIME-html - httrack.parsejava=1; // parser classes - httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer - httrack.retry=2; // 2 retry par défaut - httrack.errpage=1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.) - httrack.check_type=1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html - httrack.all_in_cache=0; // ne pas tout stocker en cache - httrack.robots=2; // traiter les robots.txt - httrack.external=0; // liens externes normaux - httrack.passprivacy=0; // mots de passe dans les fichiers - httrack.includequery=1; // include query-string par défaut - httrack.mirror_first_page=0; // pas mode mirror links - httrack.accept_cookie=1; // gérer les cookies - httrack.cookie=NULL; - httrack.http10=0; // laisser http/1.1 - httrack.nokeepalive = 0; // pas keep-alive - httrack.nocompression=0; // pas de compression - httrack.tolerant=0; // ne pas accepter content-length incorrect - httrack.parseall=1; // tout parser (tags inconnus, par exemple) - httrack.parsedebug=0; // pas de mode débuggage - httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur - httrack.verbosedisplay=0; // pas d'animation texte - httrack.sizehack=0; // size hack - httrack.urlhack=1; // url hack (normalizer) - strcpybuff(httrack.footer,HTS_DEFAULT_FOOTER); - httrack.ftp_proxy=1; // proxy http pour ftp - strcpybuff(httrack.filelist,""); - strcpybuff(httrack.lang_iso,"en, *"); - strcpybuff(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) - // - httrack.log=stdout; - httrack.errlog=stderr; - httrack.flush=1; // flush sur les fichiers log - //httrack.aff_progress=0; - httrack.keyboard=0; - // - strcpybuff(httrack.path_html,""); - strcpybuff(httrack.path_log,""); - strcpybuff(httrack.path_bin,""); - // -#if HTS_SPARE_MEMORY==0 - httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb) - httrack.maxfilter=200; // 200 filtres max par défaut -#else - httrack.maxlink=10000; // 10,000 liens max par défaut (40Kb) - httrack.maxfilter=50; // 50 filtres max par défaut -#endif - httrack.maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT -- - //httrack.maxcache_anticipate=256; // maximum de liens à anticiper - httrack.maxtime=-1; // temps max en secondes -#if HTS_USEMMS - httrack.mms_maxtime = 60*3600; // max time for mms streams (one hour) -#endif - httrack.maxrate=25000; // taux maxi - httrack.maxconn=5.0; // nombre connexions/s - httrack.waittime=-1; // wait until.. hh*3600+mm*60+ss - // - httrack.exec=argv[0]; - httrack.is_update=0; // not an update (yet) - httrack.dir_topindex=0; // do not built top index (yet) - // - httrack.bypass_limits=0; // enforce limits by default - httrack.state.stop=0; // stopper - httrack.state.exit_xh=0; // abort - // - _DEBUG_HEAD=0; // pas de debuggage en têtes + // Create options + _DEBUG_HEAD=0; // pas de debuggage en têtes -#if HTS_WIN -#if HTS_ANALYSTE!=2 - { - int stat; - wVersionRequested = 0x0101; - stat = WSAStartup( wVersionRequested, &wsadata ); - if (stat != 0) { - HTS_PANIC_PRINTF("Winsock not found!\n"); - htsmain_free(); - return -1; - } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { - HTS_PANIC_PRINTF("WINSOCK.DLL does not support version 1.1\n"); - WSACleanup(); - htsmain_free(); - return -1; - } - } -#endif -#endif - /* Init root dir */ hts_rootdir(argv[0]); -#if HTS_WIN +#ifdef _WIN32 #else /* Terminal is a tty, may ask questions and display funny information */ if (isatty(1)) { - httrack.quiet=0; - httrack.verbosedisplay=1; + opt->quiet=0; + opt->verbosedisplay=1; } /* Not a tty, no stdin input or funny output! */ else { - httrack.quiet=1; - httrack.verbosedisplay=0; + opt->quiet=1; + opt->verbosedisplay=0; } #endif - /* First test: if -#R then only launch ftp */ - if (argc > 2) { - if (strcmp(argv[1],"-#R")==0) { - if (argc==6) { - lien_back r; - char* path; - FILE* fp; - strcpybuff(r.url_adr,argv[2]); - strcpybuff(r.url_fil,argv[3]); - strcpybuff(r.url_sav,argv[4]); - path=argv[5]; - r.status=1000; - run_launch_ftp(&r); - fp=fopen(fconv(path),"wb"); - if (fp) { - fprintf(fp,"%d %s",r.r.statuscode,r.r.msg); - fclose(fp); fp=NULL; - rename(fconv(path),fconcat(path,".ok")); - } else remove(fconv(path)); - } else { - printf("htsftp error, wrong parameter number (%d)\n",argc); - } - exit(0); // pas _exit() - } - } - - // ok, non ftp, continuer - - // Binary program path? #ifndef HTS_HTTRACKDIR { - char* path=fslash(argv[0]); + char catbuff[CATBUFF_SIZE]; + char* path=fslash(catbuff,argv[0]); char* a; if ((a=strrchr(path,'/'))) { - httrack.path_bin[0]='\0'; - strncatbuff(httrack.path_bin,argv[0],(int) a - (int) path); + StringCopyN(opt->path_bin,argv[0],a - path); } } #else - strcpybuff(httrack.path_bin, HTS_HTTRACKDIR); + StringCopy(opt->path_bin, HTS_HTTRACKDIR); #endif - /* libhttrack-plugin DLL preload (libhttrack-plugin.so or libhttrack-plugin.dll) */ - { - void* userfunction = getFunctionPtr(&httrack, "libhttrack-plugin", "plugin_init"); - if (userfunction != NULL) { - t_hts_htmlcheck_init initFnc = (t_hts_htmlcheck_init) userfunction; - initFnc(); - set_wrappers(); /* Re-read wrappers internal static functions */ - } - } - /* filter CR, LF, TAB.. */ { int na; @@ -387,15 +188,13 @@ int main(int argc, char **argv) { } } - - /* create x_argvblk buffer for transformed command line */ { int current_size=0; int size; int na; for(na=0;na0) current_size += size; x_argvblk=(char*) malloct(current_size+32768); @@ -459,22 +258,22 @@ int main(int argc, char **argv) { //} } else { if (strcmp(tmp_argv[0],"-h")==0) { - help(argv[0],!httrack.quiet); + help(argv[0],!opt->quiet); htsmain_free(); return 0; } else { if (strncmp(tmp_argv[0],"--",2)) { /* pas */ if ((strchr(tmp_argv[0],'q')!=NULL)) - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) if ((strchr(tmp_argv[0],'i')!=NULL)) { // doit.log! argv_url=-1; /* forcer */ - httrack.quiet=1; + opt->quiet=1; } } else if (strcmp(tmp_argv[0] + 2,"quiet") == 0) { - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) } else if (strcmp(tmp_argv[0] + 2,"continue") == 0) { argv_url=-1; /* forcer */ - httrack.quiet=1; + opt->quiet=1; } } } @@ -535,40 +334,41 @@ int main(int argc, char **argv) { } else { int i, j; int inQuote; - char* path; + String *path; int noDbl = 0; if (com[1] == '1') { /* only 1 arg */ com++; noDbl = 1; } na++; - httrack.path_html[0] = '\0'; - httrack.path_log[0] = '\0'; - for(i = 0, j = 0, inQuote = 0, path = httrack.path_html ; argv[na][i] != 0 ; i++) { + StringClear(opt->path_html); + StringClear(opt->path_log); + for(i = 0, j = 0, inQuote = 0, path = &opt->path_html ; argv[na][i] != 0 ; i++) { if (argv[na][i] == '"') { if (inQuote) inQuote = 0; else inQuote = 1; } else if (!inQuote && !noDbl && argv[na][i] == ',') { - path[j++] = '\0'; + //StringAddchar(path, '\0'); j = 0; - path = httrack.path_log; + path = &opt->path_log; } else { - path[j++] = argv[na][i]; + StringAddchar(*path, argv[na][i]); + //path[j++] = argv[na][i]; } } - path[j++] = '\0'; - if (httrack.path_log[0] == '\0') { - strcpybuff(httrack.path_log, httrack.path_html); + //path[j++] = '\0'; + if (StringLength(opt->path_log) == 0) { + StringCopyS(opt->path_log, opt->path_html); } - check_path(httrack.path_log,argv_firsturl); - if (check_path(httrack.path_html,argv_firsturl)) { - httrack.dir_topindex=1; // rebuilt top index + check_path(&opt->path_log, argv_firsturl); + if (check_path(&opt->path_html, argv_firsturl)) { + opt->dir_topindex=1; // rebuilt top index } - //printf("-->%s\n%s\n",httrack.path_html,httrack.path_log); + //printf("-->%s\n%s\n",StringBuff(opt->path_html),StringBuff(opt->path_log)); } break; } // switch @@ -586,13 +386,13 @@ int main(int argc, char **argv) { printf("Loading httrackrc/doit.log\n"); #endif /* recreate a doit.log (no old doit.log or new URLs (and parameters)) */ - if ((strnotempty(httrack.path_log)) || (strnotempty(httrack.path_html))) + if ((strnotempty(StringBuff(opt->path_log))) || (strnotempty(StringBuff(opt->path_html)))) loops++; // do not loop once again and do not include rc file (O option exists) else { - if ( (!fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) || (argv_url>0) ) { - if (!optinclude_file(fconcat(httrack.path_log,HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) + if ( (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"))) || (argv_url>0) ) { + if (!optinclude_file(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) if (!optinclude_file(HTS_HTTRACKRC,&argc,argv,x_argvblk,&x_ptr)) { - if (!optinclude_file(fconcat(hts_gethome(),"/"HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) { + if (!optinclude_file(fconcat(OPT_GET_BUFF(opt), hts_gethome(),"/"HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) { #ifdef HTS_HTTRACKCNF optinclude_file(HTS_HTTRACKCNF,&argc,argv,x_argvblk,&x_ptr); #endif @@ -608,8 +408,8 @@ int main(int argc, char **argv) { } // traiter -O /* load doit.log and insert in current command line */ - if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) { - FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log")) && (argv_url<=0) ) { + FILE* fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"),"rb"); if (fp) { int insert_after=1; /* insérer après nom au début */ // @@ -649,30 +449,30 @@ int main(int argc, char **argv) { #if DEBUG_STEPS printf("Checking cache\n"); #endif - if (!fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) { - if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) { - rename(fconcat(httrack.path_log,"hts-cache/old.zip"),fconcat(httrack.path_log,"hts-cache/new.zip")); + if (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) { + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) ) { + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); } - } else if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) { - if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) { - remove(fconcat(httrack.path_log,"hts-cache/new.dat")); - remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); - //remove(fconcat(httrack.path_log,"hts-cache/new.lst")); - rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat")); - rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx")); - //rename(fconcat(httrack.path_log,"hts-cache/old.lst"),fconcat(httrack.path_log,"hts-cache/new.lst")); + } else if ( (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) || (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) ) { + if ( (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) ) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + //remove(fconcat(StringBuff(opt->path_log),"hts-cache/new.lst")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + //rename(fconcat(StringBuff(opt->path_log),"hts-cache/old.lst"),fconcat(StringBuff(opt->path_log),"hts-cache/new.lst")); } } /* Interrupted mirror detected */ - if (!httrack.quiet) { - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { + if (!opt->quiet) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { /* Old cache */ - if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) { - if (httrack.log != NULL) { - fprintf(httrack.log,"Warning!\n"); - fprintf(httrack.log,"An aborted mirror has been detected!\nThe current temporary cache is required for any update operation and only contains data downloaded during the last aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files\n"); - fprintf(httrack.log,"Please restart HTTrack with --continue (-iC1) option to override this message!\n"); + if ( (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) ) { + if (opt->log != NULL) { + fprintf(opt->log,"Warning!\n"); + fprintf(opt->log,"An aborted mirror has been detected!\nThe current temporary cache is required for any update operation and only contains data downloaded during the last aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files\n"); + fprintf(opt->log,"Please restart HTTrack with --continue (-iC1) option to override this message!\n"); } exit(0); } @@ -695,38 +495,38 @@ int main(int argc, char **argv) { if (argv[i][1]=='-') { // --xxx if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer strcpybuff(argv[i]+1,""); - if (fexist(fconcat(httrack.path_log,"hts-log.txt"))) - remove(fconcat(httrack.path_log,"hts-log.txt")); - if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) - remove(fconcat(httrack.path_log,"hts-err.txt")); - if (fexist(fconcat(httrack.path_html,"index.html"))) - remove(fconcat(httrack.path_html,"index.html")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html")); /* */ - if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) - remove(fconcat(httrack.path_log,"hts-cache/new.zip")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) - remove(fconcat(httrack.path_log,"hts-cache/old.zip")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) - remove(fconcat(httrack.path_log,"hts-cache/new.dat")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) - remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) - remove(fconcat(httrack.path_log,"hts-cache/old.dat")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) - remove(fconcat(httrack.path_log,"hts-cache/old.ndx")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.lst"))) - remove(fconcat(httrack.path_log,"hts-cache/new.lst")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.lst"))) - remove(fconcat(httrack.path_log,"hts-cache/old.lst")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.txt"))) - remove(fconcat(httrack.path_log,"hts-cache/new.txt")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.txt"))) - remove(fconcat(httrack.path_log,"hts-cache/old.txt")); - if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) - remove(fconcat(httrack.path_log,"hts-cache/doit.log")); - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) - remove(fconcat(httrack.path_log,"hts-in_progress.lock")); - rmdir(fconcat(httrack.path_log,"hts-cache")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock")); + rmdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache")); // } else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire! argv_url=1; // forcer a passer les parametres @@ -747,7 +547,7 @@ int main(int argc, char **argv) { printf("Cheking for updates...\n"); strcpybuff(_args[0],argv[0]); strcpybuff(_args[1],"--get"); - sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,""); + sprintf(_args[2],HTS_UPDATE_WEBSITE,0,""); strcpybuff(_args[3],"--quickinfo"); args[0]=_args[0]; args[1]=_args[1]; @@ -784,12 +584,12 @@ int main(int argc, char **argv) { } else if (!cmdl_opt(argv[na])) { argv_url++; // un de plus } else if (strcmp(argv[na],"-h")==0) { - help(argv[0],!httrack.quiet); + help(argv[0],!opt->quiet); htsmain_free(); return 0; } else { if ((strchr(argv[na],'q')!=NULL)) - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) if ((strchr(argv[na],'i')!=NULL)) { // doit.log! argv_url=0; na=argc; @@ -803,8 +603,8 @@ int main(int argc, char **argv) { // Exemple: httrack www.truc.fr -L0 puis ^C puis httrack sans URL : ajouter URL précédente /* if (argv_url==0) { - //if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer - if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent + //if ((fexist(fconcat(StringBuff(opt->path_log),"hts-cache/new.dat"))) && (fexist(fconcat(StringBuff(opt->path_log),"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if (fexist(fconcat(StringBuff(opt->path_log),"hts-cache/doit.log"))) { // un cache est présent x_argvblk=(char*) calloct(32768,1); @@ -813,7 +613,7 @@ int main(int argc, char **argv) { int x_argc; //strcpybuff(x_argvblk,"httrack "); - fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); + fp=fopen(fconcat(StringBuff(opt->path_log),"hts-cache/doit.log"),"rb"); if (fp) { linput(fp,x_argvblk+strlen(x_argvblk),8192); fclose(fp); fp=NULL; @@ -865,7 +665,7 @@ int main(int argc, char **argv) { na++; // sauter nom de proxy } else { if ((strchr(argv[na],'q')!=NULL) || (strchr(argv[na],'i')!=NULL)) - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) } } } @@ -878,26 +678,26 @@ int main(int argc, char **argv) { if (argv_url==0) { // Présence d'un cache, que faire?.. if ( - ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) ) || - ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) ) ) { // il existe déja un cache précédent.. renommer - if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"))) { // un cache est présent if (x_argvblk!=NULL) { int m; // établir mode - mode cache: 1 (cache valide) 2 (cache à vérifier) - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // cache prioritaire + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { // cache prioritaire m=1; recuperer=1; } else { m=2; } - httrack.cache=m; + opt->cache=m; - if (httrack.quiet==0) { // sinon on continue automatiquement + if (opt->quiet==0) { // sinon on continue automatiquement HT_REQUEST_START; HT_PRINT("A cache (hts-cache/) has been found in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(LF); if (m==1) { HT_PRINT("That means that a transfer has been aborted"LF); @@ -908,7 +708,7 @@ int main(int argc, char **argv) { } HT_PRINT("httrack "); HT_PRINT(x_argvblk); HT_PRINT("?"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } @@ -928,79 +728,75 @@ int main(int argc, char **argv) { } else { // aucune URL définie et pas de cache if (argc > 1 && strcmp(argv[0], "-#h") == 0) { - printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + printf("HTTrack version "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); exit(0); } -#if HTS_ANALYSTE!=2 - if (httrack.quiet) { -#endif - help(argv[0],!httrack.quiet); + if (opt->quiet) { + help(argv[0],!opt->quiet); htsmain_free(); return -1; -#if HTS_ANALYSTE!=2 } else { - help_wizard(&httrack); + help_wizard(opt); htsmain_free(); return -1; } -#endif htsmain_free(); return 0; } } else { // plus de 2 paramètres // un fichier log existe? - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // fichier lock? + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { // fichier lock? //char s[32]; - httrack.cache=1; // cache prioritaire - if (httrack.quiet==0) { + opt->cache=1; // cache prioritaire + if (opt->quiet==0) { if ( - ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) ) || - ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) ) ) { HT_REQUEST_START; HT_PRINT("There is a lock-file in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(LF"That means that a mirror has not been terminated"LF); HT_PRINT("Be sure you call httrack with proper parameters"LF); HT_PRINT("(The cache allows you to restart faster the transfer)"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } } } - } else if (fexist(fconcat(httrack.path_html,"index.html"))) { + } else if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"))) { //char s[32]; - httrack.cache=2; // cache vient après test de validité - if (httrack.quiet==0) { + opt->cache=2; // cache vient après test de validité + if (opt->quiet==0) { if ( - ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) ) || - ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) ) ) { HT_REQUEST_START; HT_PRINT("There is an index.html and a hts-cache folder in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(LF"A site may have been mirrored here, that could mean that you want to update it"LF); HT_PRINT("Be sure parameters are ok"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } } else { HT_REQUEST_START; HT_PRINT("There is an index.html in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(" but no cache"LF); HT_PRINT("There is an index.html in the directory, but no cache"LF); HT_PRINT("A site may have been mirrored here, and erased.."LF); HT_PRINT("Be sure parameters are ok"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } @@ -1048,113 +844,119 @@ int main(int argc, char **argv) { return 0; // déja fait normalement // case 'g': // récupérer un (ou plusieurs) fichiers isolés - httrack.wizard=2; // le wizard on peut plus s'en passer.. - //httrack.wizard=0; // pas de wizard - httrack.cache=0; // ni de cache - httrack.makeindex=0; // ni d'index + opt->wizard=2; // le wizard on peut plus s'en passer.. + //opt->wizard=0; // pas de wizard + opt->cache=0; // ni de cache + opt->makeindex=0; // ni d'index httrack_logmode=1; // erreurs à l'écran - httrack.savename_type=1003; // mettre dans le répertoire courant - httrack.depth=0; // ne pas explorer la page - httrack.accept_cookie=0; // pas de cookies - httrack.robots=0; // pas de robots + opt->savename_type=1003; // mettre dans le répertoire courant + opt->depth=0; // ne pas explorer la page + opt->accept_cookie=0; // pas de cookies + opt->robots=0; // pas de robots break; - case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions) - httrack.travel=0; - httrack.seeker=1; + case 'w': opt->wizard=2; // wizard 'soft' (ne pose pas de questions) + opt->travel=0; + opt->seeker=1; break; - case 'W': httrack.wizard=1; // Wizard-Help (pose des questions) - httrack.travel=0; - httrack.seeker=1; + case 'W': opt->wizard=1; // Wizard-Help (pose des questions) + opt->travel=0; + opt->seeker=1; break; case 'r': // n'est plus le recurse get bestial mais wizard itou! if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.depth); + sscanf(com+1,"%d",&opt->depth); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.depth=3; + } else opt->depth=3; break; /* - case 'r': httrack.wizard=0; + case 'r': opt->wizard=0; if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.depth); + sscanf(com+1,"%d",&opt->depth); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.depth=3; + } else opt->depth=3; break; */ // - // note: les tests httrack.depth sont pour éviter de faire + // note: les tests opt->depth sont pour éviter de faire // un miroir du web (:-O) accidentelement ;-) - case 'a': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=0+(httrack.travel&256); break; - case 'd': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=1+(httrack.travel&256); break; - case 'l': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=2+(httrack.travel&256); break; - case 'e': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=7+(httrack.travel&256); break; - case 't': httrack.travel|=256; break; - case 'n': httrack.nearlink=1; break; - case 'x': httrack.external=1; break; + case 'a': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=0+(opt->travel&256); break; + case 'd': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=1+(opt->travel&256); break; + case 'l': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=2+(opt->travel&256); break; + case 'e': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=7+(opt->travel&256); break; + case 't': opt->travel|=256; break; + case 'n': opt->nearlink=1; break; + case 'x': opt->external=1; break; // - case 'U': httrack.seeker=2; break; - case 'D': httrack.seeker=1; break; - case 'S': httrack.seeker=0; break; - case 'B': httrack.seeker=3; break; + case 'U': opt->seeker=2; break; + case 'D': opt->seeker=1; break; + case 'S': opt->seeker=0; break; + case 'B': opt->seeker=3; break; // - case 'Y': httrack.mirror_first_page=1; break; + case 'Y': opt->mirror_first_page=1; break; // - case 'q': case 'i': httrack.quiet=1; break; + case 'q': case 'i': opt->quiet=1; break; // case 'Q': httrack_logmode=0; break; case 'v': httrack_logmode=1; break; case 'f': httrack_logmode=2; if (*(com+1)=='2') httrack_logmode=3; while(isdigit((unsigned char)*(com+1))) com++; break; // - //case 'A': httrack.urlmode=1; break; - //case 'R': httrack.urlmode=2; break; - case 'K': httrack.urlmode=0; + //case 'A': opt->urlmode=1; break; + //case 'R': opt->urlmode=2; break; + case 'K': opt->urlmode=0; if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.urlmode); - if (httrack.urlmode == 0) { // in fact K0 ==> K2 + sscanf(com+1,"%d",&opt->urlmode); + if (opt->urlmode == 0) { // in fact K0 ==> K2 // and K ==> K0 - httrack.urlmode=2; + opt->urlmode=2; } while(isdigit((unsigned char)*(com+1))) com++; } - //if (*(com+1)=='0') { httrack.urlmode=2; com++; } break; + //if (*(com+1)=='0') { opt->urlmode=2; com++; } break; // case 'c': if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.maxsoc); + sscanf(com+1,"%d",&opt->maxsoc); while(isdigit((unsigned char)*(com+1))) com++; - httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1 - } else httrack.maxsoc=4; + opt->maxsoc=max(opt->maxsoc,1); // FORCER A 1 + } else opt->maxsoc=4; break; // - case 'p': sscanf(com+1,"%d",&httrack.getmode); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'p': sscanf(com+1,"%d",&opt->getmode); while(isdigit((unsigned char)*(com+1))) com++; break; // - case 'G': sscanf(com+1,LLintP,&httrack.fragment); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'M': sscanf(com+1,LLintP,&httrack.maxsite); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'm': sscanf(com+1,LLintP,&httrack.maxfile_nonhtml); while(isdigit((unsigned char)*(com+1))) com++; + case 'G': sscanf(com+1,LLintP,&opt->fragment); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'M': sscanf(com+1,LLintP,&opt->maxsite); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'm': sscanf(com+1,LLintP,&opt->maxfile_nonhtml); while(isdigit((unsigned char)*(com+1))) com++; if (*(com+1)==',') { com++; - sscanf(com+1,LLintP,&httrack.maxfile_html); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.maxfile_html=-1; + sscanf(com+1,LLintP,&opt->maxfile_html); while(isdigit((unsigned char)*(com+1))) com++; + } else opt->maxfile_html=-1; break; // - case 'T': sscanf(com+1,"%d",&httrack.timeout); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'J': sscanf(com+1,"%d",&httrack.rateout); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'R': sscanf(com+1,"%d",&httrack.retry); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'E': sscanf(com+1,"%d",&httrack.maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'H': sscanf(com+1,"%d",&httrack.hostcontrol); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'A': sscanf(com+1,"%d",&httrack.maxrate); while(isdigit((unsigned char)*(com+1))) com++; break; - - case 'j': httrack.parsejava=1; if (*(com+1)=='0') { httrack.parsejava=0; com++; } break; + case 'T': sscanf(com+1,"%d",&opt->timeout); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'J': sscanf(com+1,"%d",&opt->rateout); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'R': sscanf(com+1,"%d",&opt->retry); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'E': sscanf(com+1,"%d",&opt->maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'H': sscanf(com+1,"%d",&opt->hostcontrol); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'A': sscanf(com+1,"%d",&opt->maxrate); while(isdigit((unsigned char)*(com+1))) com++; break; + + case 'j': + opt->parsejava = HTSPARSE_DEFAULT; + if (isdigit((unsigned char)*(com+1))) { + sscanf(com+1,"%d",&opt->parsejava); + while(isdigit((unsigned char)*(com+1))) com++; + } + break; // - case 'I': httrack.makeindex=1; if (*(com+1)=='0') { httrack.makeindex=0; com++; } break; + case 'I': opt->makeindex=1; if (*(com+1)=='0') { opt->makeindex=0; com++; } break; // - case 'X': httrack.delete_old=1; if (*(com+1)=='0') { httrack.delete_old=0; com++; } break; + case 'X': opt->delete_old=1; if (*(com+1)=='0') { opt->delete_old=0; com++; } break; // - case 'b': sscanf(com+1,"%d",&httrack.accept_cookie); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'b': sscanf(com+1,"%d",&opt->accept_cookie); while(isdigit((unsigned char)*(com+1))) com++; break; // case 'N': if (strcmp(argv[na],"-N")==0) { // Tout seul @@ -1170,28 +972,28 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.savename_userdef,argv[na]); - if (strnotempty(httrack.savename_userdef)) - httrack.savename_type = -1; // userdef! + StringCopy(opt->savename_userdef, argv[na]); + if (StringLength(opt->savename_userdef) > 0) + opt->savename_type = -1; // userdef! else - httrack.savename_type = 0; // -N "" : par défaut + opt->savename_type = 0; // -N "" : par défaut } } else { - sscanf(com+1,"%d",&httrack.savename_type); while(isdigit((unsigned char)*(com+1))) com++; + sscanf(com+1,"%d",&opt->savename_type); while(isdigit((unsigned char)*(com+1))) com++; } break; case 'L': { - sscanf(com+1,"%d",&httrack.savename_83); - switch(httrack.savename_83) { + sscanf(com+1,"%d",&opt->savename_83); + switch(opt->savename_83) { case 0: // 8-3 (ISO9660 L1) - httrack.savename_83=1; + opt->savename_83=1; break; case 1: - httrack.savename_83=0; + opt->savename_83=0; break; default: // 2 == ISO9660 (ISO9660 L2) - httrack.savename_83=2; + opt->savename_83=2; break; } while(isdigit((unsigned char)*(com+1))) com++; @@ -1199,59 +1001,71 @@ int main(int argc, char **argv) { break; case 's': if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.robots); + sscanf(com+1,"%d",&opt->robots); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.robots=1; + } else opt->robots=1; #if DEBUG_ROBOTS - printf("robots.txt mode set to %d\n",httrack.robots); + printf("robots.txt mode set to %d\n",opt->robots); #endif break; - case 'o': sscanf(com+1,"%d",&httrack.errpage); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'u': sscanf(com+1,"%d",&httrack.check_type); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'o': sscanf(com+1,"%d",&opt->errpage); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'u': sscanf(com+1,"%d",&opt->check_type); while(isdigit((unsigned char)*(com+1))) com++; break; // case 'C': if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.cache); + sscanf(com+1,"%d",&opt->cache); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.cache=1; + } else opt->cache=1; break; - case 'k': httrack.all_in_cache=1; break; + case 'k': opt->all_in_cache=1; break; // - case 'z': httrack.debug=1; break; // petit debug - case 'Z': httrack.debug=2; break; // GROS debug + case 'z': opt->debug=1; break; // petit debug + case 'Z': opt->debug=2; break; // GROS debug // case '&': case '%': { // deuxième jeu d'options com++; switch(*com) { - case 'M': httrack.mimehtml = 1; if (*(com+1)=='0') { httrack.mimehtml=0; com++; } break; - case 'k': httrack.nokeepalive = 0; if (*(com+1)=='0') { httrack.nokeepalive = 1; com++; } break; - case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files - case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files - case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; } + case 'M': opt->mimehtml = 1; if (*(com+1)=='0') { opt->mimehtml=0; com++; } break; + case 'k': opt->nokeepalive = 0; if (*(com+1)=='0') { opt->nokeepalive = 1; com++; } break; + case 'x': opt->passprivacy=1; if (*(com+1)=='0') { opt->passprivacy=0; com++; } break; // No passwords in html files + case 'q': opt->includequery=1; if (*(com+1)=='0') { opt->includequery=0; com++; } break; // No passwords in html files + case 'I': opt->kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->kindex); while(isdigit((unsigned char)*(com+1))) com++; } break; // Keyword Index - case 'c': sscanf(com+1,"%f",&httrack.maxconn); while(isdigit((unsigned char)*(com+1)) || *(com+1) == '.') com++; break; - case 'e': sscanf(com+1,"%d",&httrack.extdepth); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'B': httrack.tolerant=1; if (*(com+1)=='0') { httrack.tolerant=0; com++; } break; // HTTP/1.0 notamment - case 'h': httrack.http10=1; if (*(com+1)=='0') { httrack.http10=0; com++; } break; // HTTP/1.0 - case 'z': httrack.nocompression=1; if (*(com+1)=='0') { httrack.nocompression=0; com++; } break; // pas de compression - case 'f': httrack.ftp_proxy=1; if (*(com+1)=='0') { httrack.ftp_proxy=0; com++; } break; // proxy http pour ftp - case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser - case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement - case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length - case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack - case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; - case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break; - case 'N': httrack.savename_delayed = 2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.savename_delayed); while(isdigit((unsigned char)*(com+1))) com++; } break; - case 'D': httrack.delayed_cached=1; if (*(com+1)=='0') { httrack.delayed_cached=0; com++; } break; // url hack - case '!': httrack.bypass_limits = 1; if (*(com+1)=='0') { httrack.bypass_limits=0; com++; } break; + case 'c': sscanf(com+1,"%f",&opt->maxconn); while(isdigit((unsigned char)*(com+1)) || *(com+1) == '.') com++; break; + case 'e': sscanf(com+1,"%d",&opt->extdepth); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'B': opt->tolerant=1; if (*(com+1)=='0') { opt->tolerant=0; com++; } break; // HTTP/1.0 notamment + case 'h': opt->http10=1; if (*(com+1)=='0') { opt->http10=0; com++; } break; // HTTP/1.0 + case 'z': opt->nocompression=1; if (*(com+1)=='0') { opt->nocompression=0; com++; } break; // pas de compression + case 'f': opt->ftp_proxy=1; if (*(com+1)=='0') { opt->ftp_proxy=0; com++; } break; // proxy http pour ftp + case 'P': opt->parseall=1; if (*(com+1)=='0') { opt->parseall=0; com++; } break; // tout parser + case 'n': opt->norecatch=1; if (*(com+1)=='0') { opt->norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement + case 's': opt->sizehack=1; if (*(com+1)=='0') { opt->sizehack=0; com++; } break; // hack sur content-length + case 'u': opt->urlhack=1; if (*(com+1)=='0') { opt->urlhack=0; com++; } break; // url hack + case 'v': opt->verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; + case 'i': opt->dir_topindex = 1; if (*(com+1)=='0') { opt->dir_topindex=0; com++; } break; + case 'N': opt->savename_delayed = 2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->savename_delayed); while(isdigit((unsigned char)*(com+1))) com++; } break; + case 'D': opt->delayed_cached=1; if (*(com+1)=='0') { opt->delayed_cached=0; com++; } break; // url hack + case '!': opt->bypass_limits = 1; if (*(com+1)=='0') { opt->bypass_limits=0; com++; } break; #if HTS_USEMMS - case 'm': sscanf(com+1,"%d",&httrack.mms_maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'm': sscanf(com+1,"%d",&opt->mms_maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; #endif + case 'w': // disable specific plugin + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %w needs to be followed by a blank space, and a module name"); + printf("Example: -%%w htsswf\n"); + htsmain_free(); + return -1; + } else{ + na++; + StringCat(opt->mod_blacklist, argv[na]); + StringCat(opt->mod_blacklist, "\n"); + } + break; // preserve: no footer, original links case 'p': - httrack.footer[0]='\0'; - httrack.urlmode=4; + StringClear(opt->footer); + opt->urlmode=4; break; case 'L': // URL list if ((na+1>=argc) || (argv[na+1][0]=='-')) { @@ -1266,7 +1080,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.filelist,argv[na]); + StringCopy(opt->filelist,argv[na]); } break; case 'b': // bind @@ -1282,7 +1096,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.proxy.bindhost,argv[na]); + StringCopy(opt->proxy.bindhost, argv[na]); } break; case 'S': // Scan Rules list @@ -1292,7 +1106,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } else{ - INTsys fz; + off_t fz; na++; fz = fsize(argv[na]); if (fz < 0) { @@ -1304,7 +1118,7 @@ int main(int argc, char **argv) { if (fp != NULL) { int cl = (int) strlen(url); ensureUrlCapacity(url, url_sz, cl + fz + 8192); - if ((INTsys)fread(url + cl, 1, fz, fp) != fz) { + if (fread(url + cl, 1, fz, fp) != fz) { HTS_PANIC_PRINTF("File url list could not be read"); htsmain_free(); return -1; @@ -1323,30 +1137,29 @@ int main(int argc, char **argv) { return -1; } else{ na++; - if ( (strlen(argv[na]) + strlen(httrack.mimedefs) + 4) >= sizeof(httrack.mimedefs)) { - HTS_PANIC_PRINTF("Mime definition string too long"); - htsmain_free(); - return -1; - } // --assume standard if (strcmp(argv[na], "standard") == 0) { - strcpybuff(httrack.mimedefs,"\n"); - strcatbuff(httrack.mimedefs,HTS_ASSUME_STANDARD); - strcatbuff(httrack.mimedefs,"\n"); + StringCopy(opt->mimedefs,"\n"); + StringCat(opt->mimedefs,HTS_ASSUME_STANDARD); + StringCat(opt->mimedefs,"\n"); } else { char* a; - char* b = httrack.mimedefs + strlen(httrack.mimedefs); + //char* b = StringBuff(opt->mimedefs) + StringLength(opt->mimedefs); for(a = argv[na] ; *a != '\0' ; a++) { if (*a == ';') { /* next one */ - *b++ = '\n'; + StringAddchar(opt->mimedefs, '\n'); + //*b++ = '\n'; } else if (*a == ',' || *a == '\n' || *a == '\r' || *a == '\t') { - *b++ = ' '; + StringAddchar(opt->mimedefs, ' '); + //*b++ = ' '; } else { - *b++ = *a; + StringAddchar(opt->mimedefs, *a); + //*b++ = *a; } } - *b++ = '\n'; /* next def */ - *b++ = '\0'; + StringAddchar(opt->mimedefs, '\n'); + //*b++ = '\n'; /* next def */ + //*b++ = '\0'; } } break; @@ -1364,7 +1177,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.lang_iso,argv[na]); + StringCopy(opt->lang_iso,argv[na]); } break; // @@ -1381,14 +1194,14 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.footer,argv[na]); + StringCopy(opt->footer,argv[na]); } break; case 'H': // debug headers _DEBUG_HEAD=1; break; case 'O': -#if HTS_WIN +#ifdef _WIN32 printf("Warning option -%%O has no effect in this system (chroot)\n"); #else switch_chroot=1; @@ -1402,7 +1215,7 @@ int main(int argc, char **argv) { return -1; } else { na++; -#if HTS_WIN +#ifdef _WIN32 printf("Warning option -%%U has no effect on this system (setuid)\n"); #else #ifndef HTS_DO_NOT_USE_UID @@ -1431,64 +1244,36 @@ int main(int argc, char **argv) { } else { char callbackname[128]; char* a = argv[na + 1]; - char* pos = strchr(a, '='); - na++; - if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { - char* posf = strchr(pos + 1, ':'); - char BIGSTK filename[1024]; - callbackname[0] = '\0'; - strncatbuff(callbackname, a, pos - a); - pos++; - if (posf != NULL && (posf - pos) > 0 && (posf - pos + 2) < sizeof(filename)) { - void* userfunction; - filename[0] = '\0'; - strncatbuff(filename, pos, posf - pos); - posf++; - userfunction = getFunctionPtr(&httrack, filename, posf); - if (userfunction != NULL) { - if ((void*)htswrap_read(callbackname) != NULL) { - if (htswrap_add(callbackname, userfunction)) { - set_wrappers(); /* Re-read wrappers internal static functions */ - if ((void*)htswrap_read(callbackname) == userfunction) { - if (!httrack.quiet) { - fprintf(stderr, "successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename); - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - HTS_PANIC_PRINTF("Syntax error in option %W : filename error : this function needs to be followed by a blank space, and a =: field"); - printf("Example: -%%W check-link=checklink.so:check\n"); + char* pos; /* = strchr(a, '='); */ + for(pos = a ; *pos != '\0' && *pos != '=' && *pos != ',' && *pos != ':' ; pos++); + /* httrack --wrapper callback[,foo] */ + if (*pos == 0 || *pos == ',' || *pos == ':') { + int ret = plug_wrapper(opt, argv[na + 1], argv[na + 1]); + if (ret == 0) { + char BIGSTK tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to plug the module %s (returncode != 1)", a); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } else if (ret == -1) { + char BIGSTK tmp[1024 * 2]; + int last_errno = errno; + sprintf(tmp, "option %%W : unable to load the module %s: %s (check the library path ?)", a, strerror(last_errno)); + HTS_PANIC_PRINTF(tmp); htsmain_free(); return -1; } + } + /* Old style */ + /* httrack --wrapper save-name=callback:process,string */ + else if (*pos == '=' && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { + fprintf(stderr, "Syntax error in option %%W : the old (<3.41) API is no more supported!\n"); + HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a module name"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; } else { - HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a =: field"); + HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a module name"); printf("Example: -%%W check-link=checklink.so:check\n"); htsmain_free(); return -1; @@ -1509,7 +1294,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.referer, argv[na]); + StringCopy(opt->referer, argv[na]); } break; case 'E': // From Email address @@ -1525,7 +1310,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.from, argv[na]); + StringCopy(opt->from, argv[na]); } break; @@ -1585,7 +1370,7 @@ int main(int argc, char **argv) { } break; - //case 's': httrack.sslengine=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.sslengine); while(isdigit((unsigned char)*(com+1))) com++; } break; + //case 's': opt->sslengine=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->sslengine); while(isdigit((unsigned char)*(com+1))) com++; } break; } } break; @@ -1627,8 +1412,8 @@ int main(int argc, char **argv) { char BIGSTK url[HTS_URLMAXSIZE*2]; char linepos[256]; int pos; - char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx")); - cache_init(&cache,&httrack); /* load cache */ + char* cacheNdx = readfile(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + cache_init(&cache,opt); /* load cache */ if (cacheNdx != NULL) { char firstline[256]; char* a = cacheNdx; @@ -1655,7 +1440,7 @@ int main(int argc, char **argv) { || (strjoker(url, filter, NULL, NULL) != NULL) ) { - r = cache_read_ro(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data + r = cache_read_ro(opt, &cache, adr, fil, "", NULL); // lire entrée cache + data if (r.statuscode != -1) { // No errors found++; if (!hasFilter) { @@ -1678,7 +1463,7 @@ int main(int argc, char **argv) { (link_has_authority(adr)) ? "" : "http://", adr, fil); if (url_savename(adr, fil, sav, /*former_adr*/NULL, /*former_fil*/NULL, /*referer_adr*/NULL, /*referer_fil*/NULL, - /*opt*/&httrack, /*liens*/NULL, /*lien_tot*/0, /*sback*/NULL, /*cache*/&cache, /*hash*/NULL, /*ptr*/0, /*numero_passe*/0, /*mime_type*/NULL)!=-1) { + /*opt*/opt, /*liens*/NULL, /*lien_tot*/0, /*sback*/NULL, /*cache*/&cache, /*hash*/NULL, /*ptr*/0, /*numero_passe*/0, /*mime_type*/NULL)!=-1) { if (fexist(sav)) { fprintf(stdout, "Content-location: %s\r\n", sav); } @@ -1749,7 +1534,7 @@ int main(int argc, char **argv) { } break; case 'E': // extract cache - if (!hts_extract_meta(httrack.path_log)) { + if (!hts_extract_meta(StringBuff(opt->path_log))) { fprintf(stderr, "* error extracting meta-data\n"); return 1; } @@ -1768,22 +1553,22 @@ int main(int argc, char **argv) { char* name; uLong repaired = 0; uLong repairedBytes = 0; - if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) { - name = fconcat(httrack.path_log,"hts-cache/new.zip"); - } else if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) { - name = fconcat(httrack.path_log,"hts-cache/old.zip"); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) { + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"); + } else if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))) { + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"); } else { - fprintf(stderr, "* error: no cache found in %s\n", fconcat(httrack.path_log,"hts-cache/new.zip")); + fprintf(stderr, "* error: no cache found in %s\n", fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); return 1; } fprintf(stderr, "Cache: trying to repair %s\n", name); if (unzRepair(name, - fconcat(httrack.path_log,"hts-cache/repair.zip"), - fconcat(httrack.path_log,"hts-cache/repair.tmp"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.tmp"), &repaired, &repairedBytes ) == Z_OK) { unlink(name); - rename(fconcat(httrack.path_log,"hts-cache/repair.zip"), name); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), name); fprintf(stderr,"Cache: %d bytes successfully recovered in %d entries\n", (int) repairedBytes, (int) repaired); } else { fprintf(stderr, "Cache: could not repair the cache\n"); @@ -1798,20 +1583,20 @@ int main(int argc, char **argv) { return 0; } break; - case 'f': httrack.flush=1; break; + case 'f': opt->flush=1; break; case 'h': - printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + printf("HTTrack version "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); return 0; break; - case 'p': /* httrack.aff_progress=1; deprecated */ break; - case 'S': httrack.shell=1; break; // stdin sur un shell - case 'K': httrack.keyboard=1; break; // vérifier stdin + case 'p': /* opt->aff_progress=1; deprecated */ break; + case 'S': opt->shell=1; break; // stdin sur un shell + case 'K': opt->keyboard=1; break; // vérifier stdin // - case 'L': sscanf(com+1,"%d",&httrack.maxlink); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'F': sscanf(com+1,"%d",&httrack.maxfilter); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'Z': httrack.makestat=1; break; - case 'T': httrack.maketrack=1; break; - case 'u': sscanf(com+1,"%d",&httrack.waittime); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'L': sscanf(com+1,"%d",&opt->maxlink); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'F': sscanf(com+1,"%d",&opt->maxfilter); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'Z': opt->makestat=1; break; + case 'T': opt->maketrack=1; break; + case 'u': sscanf(com+1,"%d",&opt->waittime); while(isdigit((unsigned char)*(com+1))) com++; break; /*case 'R': // ohh ftp, catch->ftpget HTS_PANIC_PRINTF("Unexpected internal error with -#R command"); @@ -1820,7 +1605,7 @@ int main(int argc, char **argv) { break; */ case 'P': { // catchurl - help_catchurl(httrack.path_log); + help_catchurl(StringBuff(opt->path_log)); htsmain_free(); return 0; } @@ -1863,10 +1648,10 @@ int main(int argc, char **argv) { } else { char mime[256]; // initialiser mimedefs - get_userhttptype(1,httrack.mimedefs,NULL); + //get_userhttptype(opt,1,opt->mimedefs,NULL); // check mime[0] = '\0'; - get_httptype(mime, argv[na+1], 0); + get_httptype(opt, mime, argv[na+1], 0); if (mime[0] != '\0') { char ext[256]; printf("%s is '%s'\n", argv[na+1], mime); @@ -1893,7 +1678,7 @@ int main(int argc, char **argv) { } break; case 'd': - httrack.parsedebug = 1; + opt->parsedebug = 1; break; /* autotest */ @@ -1921,18 +1706,17 @@ int main(int argc, char **argv) { } else { char* a; na++; - httrack.proxy.active=1; + opt->proxy.active=1; // Rechercher MAIS en partant de la fin à cause de user:pass@proxy:port a = argv[na] + strlen(argv[na]) -1; // a=strstr(argv[na],":"); // port while( (a > argv[na]) && (*a != ':') && (*a != '@') ) a--; if (*a == ':') { // un port est présent, :port - sscanf(a+1,"%d",&httrack.proxy.port); - httrack.proxy.name[0]='\0'; - strncatbuff(httrack.proxy.name,argv[na],(int) (a - argv[na])); + sscanf(a+1,"%d",&opt->proxy.port); + StringCopyN(opt->proxy.name,argv[na],(int) (a - argv[na])); } else { // - httrack.proxy.port=8080; - strcpybuff(httrack.proxy.name,argv[na]); + opt->proxy.port=8080; + StringCopy(opt->proxy.name,argv[na]); } } break; @@ -1949,11 +1733,11 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.user_agent,argv[na]); - if (strnotempty(httrack.user_agent)) - httrack.user_agent_send=1; + StringCopy(opt->user_agent,argv[na]); + if (StringNotEmpty(opt->user_agent)) + opt->user_agent_send=1; else - httrack.user_agent_send=0; // -F "" désactive l'option + opt->user_agent_send=0; // -F "" désactive l'option } break; // @@ -1970,11 +1754,11 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.sys_com,argv[na]); - if (strnotempty(httrack.sys_com)) - httrack.sys_com_exec=1; + StringCopy(opt->sys_com,argv[na]); + if (StringNotEmpty(opt->sys_com)) + opt->sys_com_exec=1; else - httrack.sys_com_exec=0; // -V "" désactive l'option + opt->sys_com_exec=0; // -V "" désactive l'option } break; // @@ -1991,9 +1775,10 @@ int main(int argc, char **argv) { } // while } else { // URL/filters + char catbuff[CATBUFF_SIZE]; char BIGSTK tempo[1024]; if (strnotempty(url)) strcatbuff(url," "); // espace de séparation - strcpybuff(tempo,unescape_http_unharm(argv[na],1)); + strcpybuff(tempo,unescape_http_unharm(catbuff,argv[na],1)); escape_spc_url(tempo); strcatbuff(url,tempo); } // if argv=- etc. @@ -2010,7 +1795,7 @@ int main(int argc, char **argv) { #endif -#if HTS_WIN +#ifdef _WIN32 #else #ifndef HTS_DO_NOT_USE_UID /* Chroot - xxc */ @@ -2021,9 +1806,9 @@ int main(int argc, char **argv) { if (!userid) { //if (strcmp(userdef->pw_name,"root")==0) { char BIGSTK rpath[1024]; - //printf("html=%s log=%s\n",httrack.path_html,httrack.path_log); // xxc - if ((httrack.path_html[0]) && (httrack.path_log[0])) { - char *a=httrack.path_html,*b=httrack.path_log,*c=NULL,*d=NULL; + //printf("html=%s log=%s\n",StringBuff(opt->path_html),StringBuff(opt->path_log)); // xxc + if ((StringBuff(opt->path_html)[0]) && (StringBuff(opt->path_log)[0])) { + const char *a=StringBuff(opt->path_html),*b=StringBuff(opt->path_log),*c=NULL,*d=NULL; c=a; d=b; while ((*a) && (*a == *b)) { if (*a=='/') { c=a; d=b; } @@ -2032,23 +1817,20 @@ int main(int argc, char **argv) { } rpath[0]='\0'; - if (c != httrack.path_html) { - if (httrack.path_html[0]!='/') + if (c != StringBuff(opt->path_html)) { + if (StringBuff(opt->path_html)[0]!='/') strcatbuff(rpath,"./"); - strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html)); - } - { - char BIGSTK tmp[1024]; - strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp); - strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp); + strncatbuff(rpath,StringBuff(opt->path_html),(int) (c - StringBuff(opt->path_html))); } + StringCopyOverlapped(opt->path_html, c); + StringCopyOverlapped(opt->path_log, d); } else { strcpybuff(rpath,"./"); - strcpybuff(httrack.path_html,"/"); - strcpybuff(httrack.path_log,"/"); + StringCopy(opt->path_html,"/"); + StringCopy(opt->path_log,"/"); } if (rpath[0]) { - printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log); + printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,StringBuff(opt->path_html),StringBuff(opt->path_log)); if (chroot(rpath)) { printf("ERROR! Can not chroot to %s!\n",rpath); return -1; @@ -2094,32 +1876,32 @@ int main(int argc, char **argv) { // on utilise le cache.. // en cas de présence des deux versions, garder la version la plus avancée, // cad la version contenant le plus de fichiers - if (httrack.cache) { - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // problemes.. - if ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) ) { - if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) { - if (fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))<32768) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip"))>65536) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip")) > fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))) { - remove(fconcat(httrack.path_log,"hts-cache/new.zip")); - rename(fconcat(httrack.path_log,"hts-cache/old.zip"), fconcat(httrack.path_log,"hts-cache/new.zip")); + if (opt->cache) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { // problemes.. + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) ) { + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) ) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))<32768) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))>65536) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) > fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"), fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); } } } } } - else if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) { - if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) { + else if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) { // switcher si new<32Ko et old>65Ko (tailles arbitraires) ? // ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre // alors l'ancien cache - if (fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))<32768) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat"))>65536) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat")) > fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))) { - remove(fconcat(httrack.path_log,"hts-cache/new.dat")); - remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); - rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat")); - rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx")); + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))<32768) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))>65536) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")) > fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); //} else { // ne rien faire // remove("hts-cache/old.dat"); // remove("hts-cache/old.ndx"); @@ -2133,7 +1915,7 @@ int main(int argc, char **argv) { // Débuggage des en têtes if (_DEBUG_HEAD) { - ioinfo=fopen(fconcat(httrack.path_log,"hts-ioinfo.txt"),"wb"); + ioinfo=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-ioinfo.txt"),"wb"); } { @@ -2141,41 +1923,41 @@ int main(int argc, char **argv) { // on peut pas avoir un affichage ET un fichier log // ca sera pour la version 2 if (httrack_logmode==1) { - httrack.log=stdout; - httrack.errlog=stderr; + opt->log=stdout; + opt->errlog=stderr; } else if (httrack_logmode>=2) { // deux fichiers log - structcheck(httrack.path_log); - if (fexist(fconcat(httrack.path_log,"hts-log.txt"))) - remove(fconcat(httrack.path_log,"hts-log.txt")); - if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) - remove(fconcat(httrack.path_log,"hts-err.txt")); + structcheck(StringBuff(opt->path_log)); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt")); /* Check FS directory structure created */ - structcheck(httrack.path_log); + structcheck(StringBuff(opt->path_log)); - httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w"); + opt->log=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt"),"w"); if (httrack_logmode==2) - httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w"); + opt->errlog=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt"),"w"); else - httrack.errlog=httrack.log; - if (httrack.log==NULL) { + opt->errlog=opt->log; + if (opt->log==NULL) { char s[HTS_CDLMAXSIZE]; - sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-log.txt")); + sprintf(s,"Unable to create log file %s",fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt")); HTS_PANIC_PRINTF(s); htsmain_free(); return -1; - } else if (httrack.errlog==NULL) { + } else if (opt->errlog==NULL) { char s[HTS_CDLMAXSIZE]; - sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-err.txt")); + sprintf(s,"Unable to create log file %s",fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt")); HTS_PANIC_PRINTF(s); htsmain_free(); return -1; } } else { - httrack.log=NULL; - httrack.errlog=NULL; + opt->log=NULL; + opt->errlog=NULL; } // un petit lock-file pour indiquer un miroir en cours, ainsi qu'un éventuel fichier log @@ -2187,11 +1969,11 @@ int main(int argc, char **argv) { /* readme for information purpose */ { - FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/readme.txt"),"wb"); + FILE* fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/readme.txt"),"wb"); if (fp) { fprintf(fp,"What's in this folder?"LF); fprintf(fp,""LF); - fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, WHAT_is_available); + fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, hts_get_version_info(opt)); fprintf(fp,"and is used for updating this website."LF); fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF); fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF); @@ -2202,15 +1984,15 @@ int main(int argc, char **argv) { } } - sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock")); - //sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n); + sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock")); + //sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"),n); /*do { if (!n) - sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n); + sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"),n); else - sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress%d.lock"),n); + sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress%d.lock"),n); n++; - } while((fexist(n_lock)) && httrack.quiet); + } while((fexist(n_lock)) && opt->quiet); if (fexist(n_lock)) { if (!recuperer) { remove(n_lock); @@ -2218,19 +2000,19 @@ int main(int argc, char **argv) { }*/ // vérifier existence de la structure - structcheck(fconcat(httrack.path_html, "/")); - structcheck(fconcat(httrack.path_log, "/")); + structcheck(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html), "/")); + structcheck(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), "/")); // reprise/update - if (httrack.cache) { + if (opt->cache) { FILE* fp; int i; -#if HTS_WIN - mkdir(fconcat(httrack.path_log,"hts-cache")); +#ifdef _WIN32 + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache")); #else - mkdir(fconcat(httrack.path_log,"hts-cache"),HTS_PROTECT_FOLDER); + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache"),HTS_PROTECT_FOLDER); #endif - fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"wb"); + fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"),"wb"); if (fp) { for(i=0+1;i1) { + //} else if (opt->debug>1) { // printf("! FileOpen error, \"%s\"\n",strerror(errno)); } } @@ -2291,29 +2073,29 @@ int main(int argc, char **argv) { } // fichier log - if (httrack.log) { + if (opt->log) { int i; - fprintf(httrack.log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF, - WHAT_is_available, + fprintf(opt->log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF, + hts_get_version_info(opt), t, url); - fprintf(httrack.log,"("); + fprintf(opt->log,"("); for(i=0;ilog,"%s ",argv[i]); else // entre "" (si espace(s) et pas déja de ") - fprintf(httrack.log,"\"%s\" ",argv[i]); + fprintf(opt->log,"\"%s\" ",argv[i]); } - fprintf(httrack.log,")"LF); - fprintf(httrack.log,LF); - fprintf(httrack.log,"Information, Warnings and Errors reported for this mirror:"LF); - fprintf(httrack.log,HTS_LOG_SECURITY_WARNING ); - fprintf(httrack.log,LF); + fprintf(opt->log,")"LF); + fprintf(opt->log,LF); + fprintf(opt->log,"Information, Warnings and Errors reported for this mirror:"LF); + fprintf(opt->log,HTS_LOG_SECURITY_WARNING ); + fprintf(opt->log,LF); } if (httrack_logmode) { - printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,WHAT_is_available); - if (httrack.wizard==0) { - printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode); + printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,hts_get_version_info(opt)); + if (opt->wizard==0) { + printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,opt->depth,opt->maxsoc,opt->travel,opt->seeker,httrack_logmode,opt->urlmode,opt->getmode); } else { // the magic wizard printf("mirroring %s with the wizard help..\n",url); } @@ -2323,73 +2105,50 @@ int main(int argc, char **argv) { io_flush; /* Enforce limits to avoid bandwith abuse. The bypass_limits should only be used by administrators and experts. */ - if (!httrack.bypass_limits) { - if (httrack.maxsoc <= 0 || httrack.maxsoc > 4) { - httrack.maxsoc = 4; - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of simultaneous connections limited to %d to avoid server overload"LF, (int)httrack.maxsoc); + if (!opt->bypass_limits) { + if (opt->maxsoc <= 0 || opt->maxsoc > 4) { + opt->maxsoc = 4; + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: maximum number of simultaneous connections limited to %d to avoid server overload"LF, (int)opt->maxsoc); } } - if (httrack.maxrate <= 0 || httrack.maxrate > 100000) { - httrack.maxrate = 100000; - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum bandwidth limited to %d to avoid server overload"LF, (int)httrack.maxrate); + if (opt->maxrate <= 0 || opt->maxrate > 100000) { + opt->maxrate = 100000; + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: maximum bandwidth limited to %d to avoid server overload"LF, (int)opt->maxrate); } } - if (httrack.maxconn <= 0 || httrack.maxconn > 5.0) { - httrack.maxconn = 5.0; - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of connections per second limited to %f to avoid server overload"LF, (float)httrack.maxconn); + if (opt->maxconn <= 0 || opt->maxconn > 5.0) { + opt->maxconn = 5.0; + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: maximum number of connections per second limited to %f to avoid server overload"LF, (float)opt->maxconn); } } } else { - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: !!! BYPASSING SECURITY LIMITS - MONITOR THIS SESSION WITH EXTREME CARE !!!"LF); + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: !!! BYPASSING SECURITY LIMITS - MONITOR THIS SESSION WITH EXTREME CARE !!!"LF); } } /* Info for wrappers */ - if ( (httrack.debug>0) && (httrack.log!=NULL) ) { - fspc(httrack.log,"info"); fprintf(httrack.log,"engine: init"LF); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: init"LF); } -#if HTS_ANALYSTE - if (hts_htmlcheck_init != NULL) { - hts_htmlcheck_init(); - } - set_wrappers(); // init() is allowed to set other wrappers -#endif + + /* Init external */ + RUN_CALLBACK_NOARG(opt, init); // détourner SIGHUP etc. -#if HTS_WIN -#ifndef _WIN32_WCE - signal( SIGINT , sig_ask ); // ^C - signal( SIGTERM , sig_finish ); // kill -#endif -#else - signal( SIGHUP , sig_back ); // close window - signal( SIGTSTP , sig_back ); // ^Z - signal( SIGTERM , sig_finish ); // kill - signal( SIGINT , sig_ask ); // ^C - signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket) -/* -deprecated - see SIGCHLD -#ifndef HTS_DO_NOT_SIGCLD - signal( SIGCLD , sig_ignore ); // child change status -#endif -*/ - signal( SIGCHLD , sig_ignore ); // child change status -#endif #if DEBUG_STEPS printf("Launching the mirror\n"); #endif - // Lancement du miroir // ------------------------------------------------------------ - if (httpmirror(url, &httrack)==0) { + if (httpmirror(url, opt)==0) { printf("Error during operation (see log file), site has not been successfully mirrored\n"); } else { - if (httrack.shell) { + if (opt->shell) { HTT_REQUEST_START; HT_PRINT("TRANSFER DONE"LF); HTT_REQUEST_END @@ -2401,10 +2160,10 @@ deprecated - see SIGCHLD // // Build top index - if (httrack.dir_topindex) { + if (opt->dir_topindex) { char BIGSTK rpath[1024*2]; char* a; - strcpybuff(rpath,httrack.path_html); + strcpybuff(rpath,StringBuff(opt->path_html)); if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') rpath[strlen(rpath)-1]='\0'; @@ -2412,33 +2171,31 @@ deprecated - see SIGCHLD a=strrchr(rpath,'/'); if (a) { *a='\0'; - hts_buildtopindex(&httrack,rpath,httrack.path_bin); - if (httrack.log) { - fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF); + hts_buildtopindex(opt,rpath,StringBuff(opt->path_bin)); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Top index rebuilt (done)"LF); } } } - if (exit_xh ==1) { - if (httrack.log) { - fprintf(httrack.log,"* * MIRROR ABORTED! * *\nThe current temporary cache is required for any update operation and only contains data downloaded during the present aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files]\n"); + if (opt->state.exit_xh ==1) { + if (opt->log) { + fprintf(opt->log,"* * MIRROR ABORTED! * *\nThe current temporary cache is required for any update operation and only contains data downloaded during the present aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files]\n"); } } /* Info for wrappers */ - if ( (httrack.debug>0) && (httrack.log!=NULL) ) { - fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: free"LF); } -#if HTS_ANALYSTE - if (hts_htmlcheck_uninit != NULL) { - hts_htmlcheck_uninit(); - } -#endif + + /* UnInit */ + RUN_CALLBACK_NOARG(opt, uninit); if (httrack_logmode!=1) { - if (httrack.errlog == httrack.log) httrack.errlog=NULL; - if (httrack.log) { fclose(httrack.log); httrack.log=NULL; } - if (httrack.errlog) { fclose(httrack.errlog); httrack.errlog=NULL; } + if (opt->errlog == opt->log) opt->errlog=NULL; + if (opt->log) { fclose(opt->log); opt->log=NULL; } + if (opt->errlog) { fclose(opt->errlog); opt->errlog=NULL; } } // Débuggage des en têtes @@ -2456,12 +2213,9 @@ deprecated - see SIGCHLD freet(x_argvblk); if (x_argv) freet(x_argv); - -#if HTS_WIN -#if HTS_ANALYSTE!=2 -// WSACleanup(); // ** non en cas de thread tjs présent!.. -#endif -#endif + if (url) + freet(url); + #ifdef HTS_TRACE_MALLOC hts_freeall(); #endif @@ -2476,42 +2230,39 @@ deprecated - see SIGCHLD // main() subroutines // vérifier chemin path -int check_path(char* s,char* defaultname) { +int check_path(String* s, char* defaultname) { int i; int return_value=0; // Replace name: ~/mywebsites/# -> /home/foo/mywebsites/# expand_home(s); - for(i=0;i<(int) strlen(s);i++) // conversion \ -> / - if (s[i]=='\\') - s[i]='/'; + for(i = 0 ; i < (int) StringLength(*s) ; i++) // conversion \ -> / + if (StringSub(*s, i) == '\\') + StringSubRW(*s, i) = '/'; // remove ending / - if (strnotempty(s)) - if (s[strlen(s)-1]=='/') - s[strlen(s)-1]='\0'; + if (StringNotEmpty(*s) && StringRight(*s, 1) == '/') + StringPopRight(*s); // Replace name: /home/foo/mywebsites/# -> /home/foo/mywebsites/wonderfulsite - if (strnotempty(s)) { - if (s[(i=strlen(s))-1]=='#') { + if (StringNotEmpty(*s)) { + if (StringRight(*s, 1) == '#') { if (strnotempty((defaultname?defaultname:""))) { - char BIGSTK tempo[HTS_URLMAXSIZE*2]; - char* a=strchr(defaultname,'#'); // we never know.. - if (a) *a='\0'; - tempo[0]='\0'; - strncatbuff(tempo,s,i-1); - strcatbuff(tempo,defaultname); - strcpybuff(s,tempo); - } else - s[0]='\0'; // Clear path (no name/default url given) + char* a = strchr(defaultname,'#'); // we never know.. + if (a) + *a='\0'; + StringPopRight(*s); + StringCat(*s, defaultname); + } else { + StringClear(*s); // Clear path (no name/default url given) + } return_value=1; // expanded } } // ending / - if (strnotempty(s)) - if (s[strlen(s)-1]!='/') // ajouter slash à la fin - strcatbuff(s,"/"); + if (StringNotEmpty(*s) && StringRight(*s, 1) != '/') // ajouter slash à la fin + StringCat(*s, "/"); return return_value; } diff --git a/src/htscoremain.h b/src/htscoremain.h index db781eb..a3a4025 100644 --- a/src/htscoremain.h +++ b/src/htscoremain.h @@ -45,20 +45,18 @@ Please visit our Website: http://www.httrack.com "php2 php3 php4 php cgi asp jsp pl cfm nsf=text/html" #include "htsglobal.h" +#include "htsopt.h" /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE // Main, récupère les paramètres et appelle le robot -#if HTS_ANALYSTE #ifndef HTTRACK_DEFLIB HTSEXT_API int hts_main(int argc, char **argv); -#endif -#else -int main(int argc, char **argv); +HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt); #endif int cmdl_opt(char* s); -int check_path(char* s,char* defaultname); +int check_path(String* s,char* defaultname); #endif diff --git a/src/htsdefines.h b/src/htsdefines.h index 9f1de79..8a915ae 100644 --- a/src/htsdefines.h +++ b/src/htsdefines.h @@ -17,7 +17,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - Important notes: - We hereby ask people using this source NOT to use it in purpose of grabbing @@ -38,78 +37,189 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_DEFINES_DEFH #define HTS_DEFINES_DEFH -typedef void (* t_hts_htmlcheck_init)(void); -typedef void (* t_hts_htmlcheck_uninit)(void); -typedef int (* t_hts_htmlcheck_start)(httrackp* opt); -typedef int (* t_hts_htmlcheck_end)(void); -typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); -typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier); -typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); -typedef char* (* t_hts_htmlcheck_query)(char* question); -typedef char* (* t_hts_htmlcheck_query2)(char* question); -typedef char* (* t_hts_htmlcheck_query3)(char* question); -typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); -typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); -typedef int (* t_hts_htmlcheck_check_mime)(char* adr,char* fil,char* mime,int status); -typedef void (* t_hts_htmlcheck_pause)(char* lockfile); -typedef void (* t_hts_htmlcheck_filesave)(char* file); -typedef void (* t_hts_htmlcheck_filesave2)(char* hostname,char* filename,char* localfile,int is_new,int is_modified,int not_updated); -typedef int (* t_hts_htmlcheck_linkdetected)(char* link); -typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start); -typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); -typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); - -/* Library internal definictions */ +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct +#define HTS_DEF_FWSTRUCT_hts_stat_struct +typedef struct hts_stat_struct hts_stat_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsmoduleStruct +#define HTS_DEF_FWSTRUCT_htsmoduleStruct +typedef struct htsmoduleStruct htsmoduleStruct; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_hts_callbackarg +#define HTS_DEF_FWSTRUCT_t_hts_callbackarg +typedef struct t_hts_callbackarg t_hts_callbackarg; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_hts_callbackarg +#define HTS_DEF_FWSTRUCT_t_hts_callbackarg +typedef struct t_hts_callbackarg t_hts_callbackarg; +#endif + +/* External callbacks */ +#ifndef EXTERNAL_FUNCTION +#ifdef _WIN32 +#define EXTERNAL_FUNCTION __declspec(dllexport) +#else +#define EXTERNAL_FUNCTION +#endif +#endif + +/* --wrapper plug function prototype */ + +typedef int (*t_hts_plug)(httrackp *opt, const char* argv); +typedef int (*t_hts_unplug)(httrackp *opt); + +/* htsopt function callbacks definitions */ + +typedef void (* t_hts_htmlcheck_init)(t_hts_callbackarg *carg); +typedef void (* t_hts_htmlcheck_uninit)(t_hts_callbackarg *carg); +typedef int (* t_hts_htmlcheck_start)(t_hts_callbackarg *carg, httrackp* opt); +typedef int (* t_hts_htmlcheck_end)(t_hts_callbackarg *carg, httrackp *opt); +typedef int (* t_hts_htmlcheck_chopt)(t_hts_callbackarg *carg, httrackp* opt); +typedef int (* t_hts_htmlcheck_process)(t_hts_callbackarg *carg, httrackp *opt, + char** html, int* len, const char* url_adresse, const char* url_fichier); +typedef t_hts_htmlcheck_process t_hts_htmlcheck_preprocess; +typedef t_hts_htmlcheck_process t_hts_htmlcheck_postprocess; +typedef int (* t_hts_htmlcheck_check_html)(t_hts_callbackarg *carg, httrackp *opt, + char* html, int len, const char* url_adresse, const char* url_fichier); +typedef const char* (* t_hts_htmlcheck_query)(t_hts_callbackarg *carg, httrackp *opt, + const char* question); +typedef const char* (* t_hts_htmlcheck_query2)(t_hts_callbackarg *carg, httrackp *opt, + const char* question); +typedef const char* (* t_hts_htmlcheck_query3)(t_hts_callbackarg *carg, httrackp *opt, + const char* question); +typedef int (* t_hts_htmlcheck_loop)(t_hts_callbackarg *carg, httrackp *opt, + lien_back* back, int back_max, int back_index, + int lien_tot, int lien_ntot, + int stat_time, hts_stat_struct* stats); +typedef int (* t_hts_htmlcheck_check_link)(t_hts_callbackarg *carg, httrackp *opt, + const char* adr, const char* fil, int status); +typedef int (* t_hts_htmlcheck_check_mime)(t_hts_callbackarg *carg, httrackp *opt, + const char* adr, const char* fil, const char* mime, int status); +typedef void (* t_hts_htmlcheck_pause)(t_hts_callbackarg *carg, httrackp *opt, + const char* lockfile); +typedef void (* t_hts_htmlcheck_filesave)(t_hts_callbackarg *carg, httrackp *opt, + const char* file); +typedef void (* t_hts_htmlcheck_filesave2)(t_hts_callbackarg *carg, httrackp *opt, + const char* hostname, const char* filename, const char* localfile, + int is_new, int is_modified, int not_updated); +typedef int (* t_hts_htmlcheck_linkdetected)(t_hts_callbackarg *carg, httrackp *opt, + char* link); +typedef int (* t_hts_htmlcheck_linkdetected2)(t_hts_callbackarg *carg, httrackp *opt, + char* link, const char* tag_start); +typedef int (* t_hts_htmlcheck_xfrstatus)(t_hts_callbackarg *carg, httrackp *opt, + lien_back* back); +typedef int (* t_hts_htmlcheck_savename)(t_hts_callbackarg *carg, httrackp *opt, + const char* adr_complete, const char* fil_complete, + const char* referer_adr, const char* referer_fil, char* save); +typedef int (* t_hts_htmlcheck_sendhead)(t_hts_callbackarg *carg, httrackp *opt, + char* buff, const char* adr, const char* fil, + const char* referer_adr, const char* referer_fil, + htsblk* outgoing); +typedef int (* t_hts_htmlcheck_receivehead)(t_hts_callbackarg *carg, httrackp *opt, + char* buff, const char* adr, const char* fil, + const char* referer_adr, const char* referer_fil, + htsblk* incoming); + +/* External additional parsing module(s) */ +typedef int (*t_hts_htmlcheck_detect)(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str); +typedef int (*t_hts_htmlcheck_parse)(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str); + +/* Callbacks */ +#ifndef HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +#define HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks; +#endif + +/* Callabck array */ +#define DEFCALLBACK(NAME) \ + struct NAME { \ + t_hts_htmlcheck_ ##NAME fun; \ + t_hts_callbackarg *carg; \ + } NAME + +/* Callback items */ +typedef void* t_hts_htmlcheck_t_hts_htmlcheck_callbacks_item; +typedef DEFCALLBACK(t_hts_htmlcheck_callbacks_item); + +/* Linked list, which should be used for the 'arg' user-defined argument */ +struct t_hts_callbackarg { + /* User-defined agument for the called function */ + void *userdef; + + /* Previous function, if any (fun != NULL) */ + struct prev { + void *fun; + t_hts_callbackarg *carg; + } prev; +}; + +/* Callback structure */ +struct t_hts_htmlcheck_callbacks { + /* v3.41 */ + DEFCALLBACK(init); + DEFCALLBACK(uninit); + DEFCALLBACK(start); + DEFCALLBACK(end); + DEFCALLBACK(chopt); + DEFCALLBACK(preprocess); + DEFCALLBACK(postprocess); + DEFCALLBACK(check_html); + DEFCALLBACK(query); + DEFCALLBACK(query2); + DEFCALLBACK(query3); + DEFCALLBACK(loop); + DEFCALLBACK(check_link); + DEFCALLBACK(check_mime); + DEFCALLBACK(pause); + DEFCALLBACK(filesave); + DEFCALLBACK(filesave2); + DEFCALLBACK(linkdetected); + DEFCALLBACK(linkdetected2); + DEFCALLBACK(xfrstatus); + DEFCALLBACK(savename); + DEFCALLBACK(sendhead); + DEFCALLBACK(receivehead); + DEFCALLBACK(detect); + DEFCALLBACK(parse); + /* >3.41 */ +}; + +/* Library internal definitions */ #ifdef HTS_INTERNAL_BYTECODE -// demande d'interaction avec le shell -#if HTS_ANALYSTE -extern char HTbuff[2048]; -extern t_hts_htmlcheck_init hts_htmlcheck_init; -extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; -extern t_hts_htmlcheck_start hts_htmlcheck_start; -extern t_hts_htmlcheck_end hts_htmlcheck_end; -extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; -extern t_hts_htmlcheck_process hts_htmlcheck_preprocess; -extern t_hts_htmlcheck_process hts_htmlcheck_postprocess; -extern t_hts_htmlcheck hts_htmlcheck; -extern t_hts_htmlcheck_query hts_htmlcheck_query; -extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; -extern t_hts_htmlcheck_query3 hts_htmlcheck_query3; -extern t_hts_htmlcheck_loop hts_htmlcheck_loop; -extern t_hts_htmlcheck_check hts_htmlcheck_check; -extern t_hts_htmlcheck_check_mime hts_htmlcheck_check_mime; -extern t_hts_htmlcheck_pause hts_htmlcheck_pause; -extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; -extern t_hts_htmlcheck_filesave2 hts_htmlcheck_filesave2; -extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; -extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2; -extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; -extern t_hts_htmlcheck_savename hts_htmlcheck_savename; -extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; -extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; + +#ifndef HTS_DEF_FWSTRUCT_t_hts_callback_ref +#define HTS_DEF_FWSTRUCT_t_hts_callback_ref +typedef struct t_hts_callback_ref t_hts_callback_ref; #endif +struct t_hts_callback_ref { + const char *name; + size_t offset; +}; + +extern const t_hts_htmlcheck_callbacks default_callbacks; +extern const t_hts_callback_ref default_callbacks_ref[]; -#if HTS_ANALYSTE -#define HT_PRINT(A) strcatbuff(HTbuff,A); -#define HT_REQUEST_START HTbuff[0]='\0'; +#define HT_PRINT(A) strcatbuff(opt->state.HTbuff,A); +#define HT_REQUEST_START opt->state.HTbuff[0]='\0'; #define HT_REQUEST_END -#define HTT_REQUEST_START HTbuff[0]='\0'; +#define HTT_REQUEST_START opt->state.HTbuff[0]='\0'; #define HTT_REQUEST_END -#define HTS_REQUEST_START HTbuff[0]='\0'; +#define HTS_REQUEST_START opt->state.HTbuff[0]='\0'; #define HTS_REQUEST_END -#define HTS_PANIC_PRINTF(S) strcpybuff(_hts_errmsg,S); -#else -#define HT_PRINT(A) printf("%s",A); -#define HT_REQUEST_START /*printf("§\n");*/ -#define HT_REQUEST_END /*printf("§\n");*/ -#define HTT_REQUEST_START /*if (httrack.shell) printf("§\n");*/ -#define HTT_REQUEST_END /*if (httrack.shell) printf("§\n");*/ -#define HTS_REQUEST_START if (opt->shell) { HT_REQUEST_START } -#define HTS_REQUEST_END if (opt->shell) { HT_REQUEST_END } -#define HTS_PANIC_PRINTF(S) printf("%s\n",S); -#endif +#define HTS_PANIC_PRINTF(S) strcpybuff(opt->state._hts_errmsg,S); #endif diff --git a/src/htsfilters.c b/src/htsfilters.c index cd7abdd..d78848f 100644 --- a/src/htsfilters.c +++ b/src/htsfilters.c @@ -265,7 +265,7 @@ HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag) { // tester i=0; if (!unique) - max=strlen(chaine); + max = (int) strlen(chaine); else /* *(a) only match a (not aaaaa) */ max=1; while(i<(int) max) { diff --git a/src/htsfilters.h b/src/htsfilters.h index a1ba329..6526400 100644 --- a/src/htsfilters.h +++ b/src/htsfilters.h @@ -40,10 +40,11 @@ Please visit our Website: http://www.httrack.com #ifndef HTSFILT_DEFH #define HTSFILT_DEFH -#include "htsbase.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +#include "htsbase.h" + int fa_strjoker(int type,char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth); HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag); char* strjokerfind(char* chaine,char* joker); diff --git a/src/htsftp.c b/src/htsftp.c index 1084558..e8797ca 100644 --- a/src/htsftp.c +++ b/src/htsftp.c @@ -42,17 +42,15 @@ Please visit our Website: http://www.httrack.com #include "htsftp.h" -#include "htsglobal.h" -#include "htsbase.h" -#include "htsnet.h" +#include "htscore.h" #include "htsthread.h" -#if HTS_WIN +#ifdef _WIN32 #else //inet_ntoa #include #endif -#if HTS_WIN +#ifdef _WIN32 #ifndef __cplusplus // DOS #ifndef _WIN32_WCE @@ -72,17 +70,18 @@ Please visit our Website: http://www.httrack.com #define FTP_DEBUG 0 //#define FORK_DEBUG 0 -#define FTP_STATUS_READY 1001 - #if USE_BEGINTHREAD -PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ) { - lien_back* back=(lien_back*) pP; - if (back == NULL) { +void back_launch_ftp( void* pP ) { + FTPDownloadStruct *pStruct = (FTPDownloadStruct*)pP; + if (pStruct == NULL) + return ; + + if (pStruct == NULL) { #if FTP_DEBUG printf("[ftp error: no args]\n"); #endif - return PTHREAD_RETURN; + return ; } /* Initialize */ @@ -92,72 +91,28 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ) { #if FTP_DEBUG printf("[Launching main ftp routine]\n"); #endif - run_launch_ftp(back); + run_launch_ftp(pStruct); // prêt - back->status=FTP_STATUS_READY; - + pStruct->pBack->status=STATUS_FTP_READY; + + /* Delete structure */ + free(pP); + /* Uninitialize */ hts_uninit(); - return PTHREAD_RETURN; + return ; } // lancer en back -void launch_ftp(lien_back* back) { +void launch_ftp(FTPDownloadStruct* params) { // DOS #if FTP_DEBUG printf("[Launching main ftp thread]\n"); #endif - (void)hts_newthread(back_launch_ftp, 0, (void*) back); + hts_newthread(back_launch_ftp, (void*) params); } #else -// Unix sans pthread -int back_launch_ftp(lien_back* back) { - // lancer ftp - run_launch_ftp(back); - // prêt - back->status=FTP_STATUS_READY; - return 0; -} -void launch_ftp(lien_back* back,char* path,char* exec) { - FILE* fp = fopen(fconv(path),"wb"); - if (fp) { - char _args[8][256]; - char *args[8]; - fclose(fp); fp=NULL; - - strcpybuff(_args[0],exec); - strcpybuff(_args[1],"-#R"); - strcpybuff(_args[2],back->url_adr); - strcpybuff(_args[3],back->url_fil); - strcpybuff(_args[4],back->url_sav); - strcpybuff(_args[5],path); - //strcpybuff(_args[6],""); - args[0]=_args[0]; - args[1]=_args[1]; - args[2]=_args[2]; - args[3]=_args[3]; - args[4]=_args[4]; - args[5]=_args[5]; - args[6]=NULL; - switch (fork()) { // note: vfork déconne un max' - case -1: printf("Can not vfork() process\n"); break; - case 0: - if (execvp(args[0],args)==-1) { - fp=fopen(fconv(path),"wb"); - if (fp) { - fprintf(fp,"-1 unable to launch %s",args[0]); - fclose(fp); fp=NULL; - rename(path,concat(path,".ok")); - } else remove(path); - } - _exit(0); // exit 'propre' - break; - default: // parent - // bah on fait rien.. - break; - } - } -} +#error No more supported #endif // pour l'arrêt du ftp @@ -178,7 +133,9 @@ void launch_ftp(lien_back* back,char* path,char* exec) { } // la véritable fonction une fois lancées les routines thread/fork -int run_launch_ftp(lien_back* back) { +int run_launch_ftp(FTPDownloadStruct *pStruct) { + lien_back* back = pStruct->pBack; + httrackp *opt = pStruct->pOpt; char user[256]="anonymous"; char pass[256]="user@"; char line_retr[2048]; @@ -253,7 +210,8 @@ int run_launch_ftp(lien_back* back) { #endif ftp_filename=a; if (strnotempty(a)) { - char* ua=unescape_http(a); + char catbuff[CATBUFF_SIZE]; + char* ua=unescape_http(catbuff,a); int len_a = (int) strlen(ua); if (len_a > 0 && ua[len_a -1] == '/') { /* obviously a directory listing */ transfer_list=1; @@ -275,7 +233,7 @@ int run_launch_ftp(lien_back* back) { } } else { strcpybuff(back->r.msg,"Unexpected PORT error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } @@ -306,10 +264,10 @@ int run_launch_ftp(lien_back* back) { // récupérer adresse résolue strcpybuff(back->info,"host name"); - hp = hts_gethostbyname(_adr, &fullhostent_buffer); + hp = hts_gethostbyname(opt,_adr, &fullhostent_buffer); if (hp == NULL) { strcpybuff(back->r.msg,"Unable to get server's address"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_NON_FATAL; _HALT_FTP return 0; @@ -323,10 +281,10 @@ int run_launch_ftp(lien_back* back) { // memcpy(&server.sin_addr, hp->h_addr, hp->h_length); // créer ("attachement") une socket (point d'accès) internet,en flot - soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + soc_ctl = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_ctl==INVALID_SOCKET) { strcpybuff(back->r.msg,"Unable to create a socket"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; _HALT_FTP return 0; @@ -338,17 +296,17 @@ int run_launch_ftp(lien_back* back) { // connexion (bloquante, on est en thread) strcpybuff(back->info,"connect"); -#if HTS_WIN +#ifdef _WIN32 if (connect(soc_ctl, (const struct sockaddr FAR *)&server, server_size) != 0) { #else if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) { #endif strcpybuff(back->r.msg,"Unable to connect to the server"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; _HALT_FTP return 0; -#if HTS_WIN +#ifdef _WIN32 } #else } @@ -384,7 +342,7 @@ int run_launch_ftp(lien_back* back) { // ok } else { strcpybuff(back->r.msg,"TYPE I error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } #if 0 @@ -411,34 +369,34 @@ int run_launch_ftp(lien_back* back) { // ok.. } else { strcpybuff(back->r.msg,"TYPE I error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"CWD error: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { strcpybuff(back->r.msg,"Unexpected ftp error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } #endif } else { sprintf(back->r.msg,"Bad password: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"Bad user name: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"Connection refused: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } @@ -497,7 +455,7 @@ int run_launch_ftp(lien_back* back) { // -- fin analyse de l'adresse IP et du port -- } else { sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { @@ -529,12 +487,12 @@ int run_launch_ftp(lien_back* back) { } } else { sprintf(back->r.msg,"EPSV incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"PASV/EPSV error: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } @@ -548,7 +506,8 @@ int run_launch_ftp(lien_back* back) { // SIZE if (back->r.statuscode != -1) { if (!transfer_list) { - char* ua=unescape_http(ftp_filename); + char catbuff[CATBUFF_SIZE]; + char* ua=unescape_http(catbuff,ftp_filename); if ( (strchr(ua, ' ')) || @@ -611,7 +570,7 @@ int run_launch_ftp(lien_back* back) { // résoudre if (adr_ip[0]) { - hp = hts_gethostbyname(adr_ip, &fullhostent_buffer); + hp = hts_gethostbyname(opt,adr_ip, &fullhostent_buffer); if (hp) { SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); } else { @@ -629,12 +588,12 @@ int run_launch_ftp(lien_back* back) { #endif if (server_size > 0) { // socket - soc_dat=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + soc_dat = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_dat != INVALID_SOCKET) { // structure: connexion au domaine internet, port 80 (ou autre) SOCaddr_initport(server, port_pasv); // server.sin_port = htons((unsigned short int) port_pasv); -#if HTS_WIN +#ifdef _WIN32 if (connect(soc_dat, (const struct sockaddr FAR *)&server, server_size) == 0) { #else if (connect(soc_dat, (struct sockaddr *)&server, server_size) != -1) { @@ -650,7 +609,7 @@ int run_launch_ftp(lien_back* back) { deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // sprintf(back->r.msg,"RETR command errror: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { @@ -660,22 +619,22 @@ int run_launch_ftp(lien_back* back) { deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // strcpybuff(back->r.msg,"Unable to connect"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { strcpybuff(back->r.msg,"Unable to create a socket"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { sprintf(back->r.msg,"Unable to resolve IP %s",adr_ip); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts #else @@ -698,27 +657,27 @@ int run_launch_ftp(lien_back* back) { int dummylen = sizeof(struct sockaddr); if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) { strcpybuff(back->r.msg,"Unable to accept connection"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"RETR command errror: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"PORT command error: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_servdat); #else close(soc_servdat); #endif } else { strcpybuff(back->r.msg,"Unable to listen to a port"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } #endif @@ -728,11 +687,11 @@ int run_launch_ftp(lien_back* back) { // if (soc_dat != INVALID_SOCKET) { if (rest_understood) { // REST envoyée et comprise - file_notify(back->url_adr, back->url_fil, back->url_sav, 0, 1, 0); - back->r.fp = fileappend(back->url_sav); + file_notify(opt, back->url_adr, back->url_fil, back->url_sav, 0, 1, 0); + back->r.fp = fileappend(&opt->state.strc, back->url_sav); } else { - file_notify(back->url_adr, back->url_fil, back->url_sav, 1, 1, 0); - back->r.fp = filecreate(back->url_sav); + file_notify(opt, back->url_adr, back->url_fil, back->url_sav, 1, 1, 0); + back->r.fp = filecreate(&opt->state.strc, back->url_sav); } strcpybuff(back->info,"receiving"); if (back->r.fp != NULL) { @@ -747,13 +706,13 @@ int run_launch_ftp(lien_back* back) { switch(wait_socket_receive(soc_dat,timeout)) { case -1: strcpybuff(back->r.msg,"FTP read error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; len=0; // fin break; case 0: sprintf(back->r.msg,"Time out (%d)",timeout); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; len=0; // fin break; @@ -774,17 +733,17 @@ int run_launch_ftp(lien_back* back) { } */ strcpybuff(back->r.msg,"Write error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; len=0; // error } } else { strcpybuff(back->r.msg,"Unexpected write error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { // Erreur ou terminé - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=0; if (back->r.totalsize > 0 && back->r.size != back->r.totalsize) { back->r.statuscode=STATUSCODE_INVALID; @@ -801,10 +760,10 @@ int run_launch_ftp(lien_back* back) { } } else { strcpybuff(back->r.msg,"Unable to write file"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_dat); #else close(soc_dat); @@ -817,16 +776,16 @@ int run_launch_ftp(lien_back* back) { get_ftp_line(soc_ctl,line,timeout); if (line[0]=='2') { // OK strcpybuff(back->r.msg,"OK"); - // back->status=FTP_STATUS_READY; // fini - back->r.statuscode=200; + // back->status=STATUS_FTP_READY; // fini + back->r.statuscode=HTTP_OK; } else { sprintf(back->r.msg,"RETR incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { strcpybuff(back->r.msg,"FTP read error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } @@ -844,7 +803,7 @@ int run_launch_ftp(lien_back* back) { strcpybuff(back->info,"quit"); send_line(soc_ctl,"QUIT"); // bye bye get_ftp_line(soc_ctl,NULL,timeout); -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_ctl); #else close(soc_ctl); @@ -852,10 +811,10 @@ int run_launch_ftp(lien_back* back) { } if (back->r.statuscode!=-1) { - back->r.statuscode=200; + back->r.statuscode=HTTP_OK; strcpybuff(back->r.msg,"OK"); } - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini return 0; } @@ -881,7 +840,7 @@ T_SOC get_datasocket(char* to_send) { // copie adresse SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length); - if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) { SOCaddr server2; @@ -923,7 +882,7 @@ T_SOC get_datasocket(char* to_send) { #endif } else { -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -933,7 +892,7 @@ T_SOC get_datasocket(char* to_send) { } else { -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -943,7 +902,7 @@ T_SOC get_datasocket(char* to_send) { } else { -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -991,7 +950,7 @@ int send_line(T_SOC soc,char* data) { return r; } #else - return (send(soc,line,strlen(line),0) == (int) strlen(line)); + return (send(soc,line,(int)strlen(line),0) == (int) strlen(line)); #endif } @@ -1140,8 +1099,8 @@ int wait_socket_receive(T_SOC soc,int timeout) { // cancel reçu? int stop_ftp(lien_back* back) { if (back->stop_ftp) { - strcpybuff(back->r.msg,"Cancelled by User"); - // back->status=FTP_STATUS_READY; // fini + strcpybuff(back->r.msg, "Cancelled by User"); + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; return 1; } diff --git a/src/htsftp.h b/src/htsftp.h index 08ab784..1899164 100644 --- a/src/htsftp.h +++ b/src/htsftp.h @@ -42,20 +42,37 @@ Please visit our Website: http://www.httrack.com #include "htsbasenet.h" #include "htsthread.h" -// lien_back -#include "htscore.h" +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif + +/* Download structure */ +#ifndef HTS_DEF_FWSTRUCT_FTPDownloadStruct +#define HTS_DEF_FWSTRUCT_FTPDownloadStruct +typedef struct FTPDownloadStruct FTPDownloadStruct; +#endif +struct FTPDownloadStruct { + lien_back *pBack; + httrackp *pOpt; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE #if USE_BEGINTHREAD -void launch_ftp(lien_back* back); -PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ); +void launch_ftp(FTPDownloadStruct *params); +void back_launch_ftp( void* pP ); #else -void launch_ftp(lien_back* back,char* path,char* exec); -int back_launch_ftp(lien_back* back); +void launch_ftp(FTPDownloadStruct *params,char* path,char* exec); +int back_launch_ftp(FTPDownloadStruct *params); #endif -int run_launch_ftp(lien_back* back); +int run_launch_ftp(FTPDownloadStruct *params); int send_line(T_SOC soc,char* data); int get_ftp_line(T_SOC soc,char* line,int timeout); T_SOC get_datasocket(char* to_send); diff --git a/src/htsglobal.h b/src/htsglobal.h index eef3ab3..dc39198 100644 --- a/src/htsglobal.h +++ b/src/htsglobal.h @@ -40,10 +40,10 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_GLOBAL_DEFH // Version -#define HTTRACK_VERSION "3.40-2" -#define HTTRACK_VERSIONID "3.40.4" +#define HTTRACK_VERSION "3.41" +#define HTTRACK_VERSIONID "3.41.20" #define HTTRACK_AFF_VERSION "3.x" -//#define HTTRACK_AFF_WARNING "This is a BETA release of WinHTTrack Website Copier ("HTTRACK_VERSION")\nPlease report any crashes, bugs or problems" +#define HTTRACK_LIB_VERSION "2.0" #ifndef HTS_NOINCLUDES #ifndef _WIN32_WCE @@ -61,7 +61,6 @@ Please visit our Website: http://www.httrack.com #endif // Définition plate-forme -#include "htssystem.h" #include "htsconfig.h" // WIN32 types @@ -90,6 +89,7 @@ Please visit our Website: http://www.httrack.com #endif #ifndef S_ISREG #define S_ISREG(m) ((m) & _S_IFREG) +#define S_ISDIR(m) ((m) & _S_IFDIR) #endif #else @@ -164,21 +164,6 @@ Please visit our Website: http://www.httrack.com #endif -// Socket windows ou socket unix -#ifdef _WIN32 -#undef HTS_PLATFORM -#define HTS_PLATFORM 1 -#define HTS_WIN 1 - -#else - -#define HTS_WIN 0 -#ifdef __linux -#undef HTS_PLATFORM -#define HTS_PLATFORM 3 -#endif -#endif - // don't spare memory usage by default #ifndef HTS_SPARE_MEMORY #define HTS_SPARE_MEMORY 0 @@ -189,7 +174,7 @@ Please visit our Website: http://www.httrack.com #endif // compatibilité DOS -#if HTS_WIN +#ifdef _WIN32 #define HTS_DOSNAME 1 #else #define HTS_DOSNAME 0 @@ -224,30 +209,13 @@ Please visit our Website: http://www.httrack.com #define HTS_USESWF 1 #endif -#if HTS_WIN +#ifdef _WIN32 #else #define __cdecl #endif -#ifdef HTS_ANALYSTE_CONSOLE -#undef HTS_ANALYSTE_CONSOLE -#define HTS_ANALYSTE_CONSOLE 1 -#endif - -#if HTS_ANALYSTE -#else -#if HTS_WIN -#else -#undef HTS_ANALYSTE -// Analyste -#define HTS_ANALYSTE 1 -#define HTS_ANALYSTE_CONSOLE 1 -#endif -#endif - - /* rc file */ -#if HTS_WIN +#ifdef _WIN32 #define HTS_HTTRACKRC "httrackrc" #else @@ -292,14 +260,14 @@ Please visit our Website: http://www.httrack.com #endif /* Copyright (C) Xavier Roche and other contributors */ -#define HTTRACK_AFF_AUTHORS "[XR&CO'2006]" +#define HTTRACK_AFF_AUTHORS "[XR&CO'2007]" #define HTS_DEFAULT_FOOTER "" #define HTTRACK_WEB "http://www.httrack.com" #define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s" #define H_CRLF "\x0d\x0a" #define CRLF "\x0d\x0a" -#if HTS_WIN +#ifdef _WIN32 #define LF "\x0d\x0a" #else #define LF "\x0a" @@ -350,22 +318,23 @@ Please visit our Website: http://www.httrack.com typedef LLINT_TYPE TStamp; #define LLintP LLINT_FORMAT #else - #if HTS_WIN - typedef __int64 LLint; - typedef __int64 TStamp; - #define LLintP "%I64d" - #else - #if HTS_PLATFORM==0 + +#ifdef _WIN32 + typedef __int64 LLint; + typedef __int64 TStamp; + #define LLintP "%I64d" +#elif (defined(__x86_64__) || defined(_LP64) || defined(__64BIT__)) + typedef unsigned long int LLint; + typedef unsigned long int TStamp; + #define LLintP "%ld" +#else typedef long long int LLint; typedef long long int TStamp; #define LLintP "%lld" - #else - typedef long long int LLint; - typedef long long int TStamp; - #define LLintP "%Ld" - #endif - #endif #endif + +#endif /* HTS_LONGLONG */ + #else typedef int LLint; #define LLintP "%d" @@ -383,6 +352,16 @@ typedef int INTsys; #define INTsysP "%d" #endif +#ifdef _WIN32 +#if defined(_WIN64) +typedef unsigned __int64 T_SOC; +#else +typedef unsigned __int32 T_SOC; +#endif +#else +typedef int T_SOC; +#endif + /* Default alignement */ #ifndef HTS_ALIGN #define HTS_ALIGN (sizeof(void*)) @@ -391,7 +370,7 @@ typedef int INTsys; /* IPV4, IPV6 and various unified structures */ #define HTS_MAXADDRLEN 64 -#if HTS_WIN +#ifdef _WIN32 #else #define __cdecl #endif @@ -440,43 +419,26 @@ typedef int INTsys; #define TAILLE_BUFFER 8192 #endif -#if HTS_WIN -#else -// use pthreads.h - -#ifndef THREADS -#define HTS_DO_NOT_USE_PTHREAD -#endif - #ifdef HTS_DO_NOT_USE_PTHREAD -#define USE_PTHREAD 0 -#else -#define USE_PTHREAD 1 +#error needs threads support #endif -#endif - -#if HTS_WIN #define USE_BEGINTHREAD 1 -#else -#if USE_PTHREAD -#define USE_BEGINTHREAD 1 -#else -/* sh*t.. */ -#define USE_BEGINTHREAD 0 -#endif -#endif #ifdef _DEBUG // trace mallocs //#define HTS_TRACE_MALLOC #ifdef HTS_TRACE_MALLOC typedef unsigned long int t_htsboundary; -typedef struct mlink { +#ifndef HTS_DEF_FWSTRUCT_mlink +#define HTS_DEF_FWSTRUCT_mlink +typedef struct mlink mlink; +#endif +struct mlink { char* adr; int len; int id; struct mlink* next; -} mlink; +}; static const t_htsboundary htsboundary = 0xDEADBEEF; #endif #endif diff --git a/src/htshash.c b/src/htshash.c index 67d34d0..cf6b3ec 100644 --- a/src/htshash.c +++ b/src/htshash.c @@ -42,8 +42,10 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ #include "htsbase.h" +#include "htsopt.h" #include "htsglobal.h" #include "htsmd5.h" +#include "htscore.h" /* END specific definitions */ /* Specific macros */ @@ -63,6 +65,7 @@ Please visit our Website: http://www.httrack.com // retour: position ou -1 si non trouvé int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { char BIGSTK normfil_[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; char* normfil; char* normadr; unsigned int cle; @@ -71,7 +74,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { if (type) cle = hash_cle(nom1,nom2); else - cle = hash_cle(convtolower(nom1),nom2); // case insensitive + cle = hash_cle(convtolower(catbuff,nom1),nom2); // case insensitive // la position se calcule en modulant pos = (int) (cle%HTS_HASH_SIZE); // entrée trouvée? @@ -199,6 +202,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { // enregistrement lien lpos dans les 3 tables hash1..3 void hash_write(hash_struct* hash,int lpos,int normalized) { char BIGSTK normfil_[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; char* normfil; unsigned int cle; int pos; @@ -212,7 +216,7 @@ void hash_write(hash_struct* hash,int lpos,int normalized) { // élément actuel sur -1 (fin de chaine) hash->liens[lpos]->hash_next[0]=hash->liens[lpos]->hash_next[1]=hash->liens[lpos]->hash_next[2]=-1; // - cle = hash_cle(convtolower(hash->liens[lpos]->sav),""); // CASE INSENSITIVE + cle = hash_cle(convtolower(catbuff,hash->liens[lpos]->sav),""); // CASE INSENSITIVE pos = (int) (cle%HTS_HASH_SIZE); ptr = hash_calc_chaine(hash,0,pos); // calculer adresse chaine *ptr = lpos; // noter dernier enregistré diff --git a/src/htshash.h b/src/htshash.h index 43b5003..15f111e 100644 --- a/src/htshash.h +++ b/src/htshash.h @@ -40,11 +40,16 @@ Please visit our Website: http://www.httrack.com #ifndef HTSHASH_DEFH #define HTSHASH_DEFH -#include "htscore.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -// tables de hashage + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif + +// tables de hachage int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized); void hash_write(hash_struct* hash,int lpos,int normalized); int* hash_calc_chaine(hash_struct* hash,int type,int pos); diff --git a/src/htshelp.c b/src/htshelp.c index d1557ac..1aa0945 100644 --- a/src/htshelp.c +++ b/src/htshelp.c @@ -46,7 +46,8 @@ Please visit our Website: http://www.httrack.com #include "htscatchurl.h" #include "htslib.h" #include "htsalias.h" -#if HTS_WIN +#include "htsmodules.h" +#ifdef _WIN32 #else #ifdef HAVE_UNISTD_H #include @@ -83,7 +84,7 @@ void infomsg(char* msg) { while(cmd[p]==' ') p++; sscanf(msg+p,"%s",cmd+strlen(cmd)); /* clears cN -> c */ - if ((p=strlen(cmd))>2) + if ((p = (int) strlen(cmd))>2) if (cmd[p-1]=='N') cmd[p-1]='\0'; /* finds alias (if any) */ @@ -141,7 +142,7 @@ void help_wizard(httrackp* opt) { // printf("\n"); - printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", WHAT_is_available); + printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); printf("Copyright (C) Xavier Roche and other contributors\n"); #ifdef _WIN32 printf("Note: You are running the commandline version,\n"); @@ -279,11 +280,7 @@ void help_wizard(httrackp* opt) { } i++; } -#if HTS_ANALYSTE hts_main(argc,argv); -#else - main(argc,argv); -#endif } //} else { // help("httrack",1); @@ -333,7 +330,7 @@ int help_query(char* list,int def) { } // Capture d'URL -void help_catchurl(char* dest_path) { +void help_catchurl(const char* dest_path) { char BIGSTK adr_prox[HTS_URLMAXSIZE*2]; int port_prox; T_SOC soc=catch_url_init_std(&port_prox,adr_prox); @@ -401,7 +398,7 @@ void help(char* app,int more) { if (more) infomsg("1"); if (more != 2) { - sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available); + sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", hts_is_available()); infomsg(info); #ifdef HTTRACK_AFF_WARNING infomsg("NOTE: "HTTRACK_AFF_WARNING); @@ -413,7 +410,7 @@ void help(char* app,int more) { } infomsg("General options:"); infomsg(" O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles])"); -#ifndef HTS_WIN +#ifndef _WIN32 infomsg(" %O chroot path to, must be r00t (-%O root_path)"); #endif infomsg(""); @@ -475,7 +472,7 @@ void help(char* app,int more) { infomsg("Spider options:"); infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)"); infomsg(" u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)"); - infomsg(" j *parse Java Classes (j0 don't parse)"); + infomsg(" j *parse Java Classes (j0 don't parse, bitmask: |1 parse default, |2 don't parse .class |4 don't parse .js |8 don't be aggressive)"); infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules))"); infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)"); infomsg(" %k use keep-alive if possible, greately reducing latency for small files and test requests (%k0 don't use)"); @@ -486,6 +483,7 @@ void help(char* app,int more) { infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD); infomsg(" can also be used to force a specific file type: --assume foo.cgi=text/html"); infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)"); + infomsg(" %w disable a specific external mime module (-%w htsswf -%w htsjava)"); infomsg(""); infomsg("Browser ID:"); infomsg(" F user-agent field sent in HTTP headers (-F \"user-agent name\")"); @@ -557,7 +555,7 @@ void help(char* app,int more) { infomsg("Command-line specific options:"); infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")"); infomsg(" %U run the engine with another id when called as root (-%U smith)"); - infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction[,myparameters])"); + infomsg(" %W use an external library function as a wrapper (-%W myfoo.so[,myparameters])"); /* infomsg(" %O do a chroot before setuid"); */ infomsg(""); infomsg("Details: Option N"); @@ -627,28 +625,7 @@ void help(char* app,int more) { infomsg("--http10 force http/1.0 requests (-%h)"); infomsg(""); infomsg("Details: Option %W: External callbacks prototypes"); - infomsg("'init' : void (* myfunction)(void);"); - infomsg("'free' : void (* myfunction)(void);"); - infomsg("'start' : int (* myfunction)(httrackp* opt);"); - infomsg("'end' : int (* myfunction)(void);"); - infomsg("'change-options' : int (* myfunction)(httrackp* opt);"); - infomsg("'preprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);"); - infomsg("'postprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);"); - infomsg("'check-html' : int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);"); - infomsg("'query' : char* (* myfunction)(char* question);"); - infomsg("'query2' : char* (* myfunction)(char* question);"); - infomsg("'query3' : char* (* myfunction)(char* question);"); - infomsg("'loop' : int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);"); - infomsg("'check-link' : int (* myfunction)(char* adr,char* fil,int status);"); - infomsg("'pause' : void (* myfunction)(char* lockfile);"); - infomsg("'save-file' : void (* myfunction)(char* file);"); - infomsg("'save-file2' : void (* myfunction)(char* hostname,char* filename,char* localfile,int is_new,int is_modified);"); - infomsg("'link-detected' : int (* myfunction)(char* link);"); - infomsg("'link-detected2' : int (* myfunction)(char* link, char* start_tag);"); - infomsg("'transfer-status' : int (* myfunction)(lien_back* back);"); - infomsg("'save-name' : int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);"); - infomsg("And _init() functions if defined, called upon plug"); - infomsg(""); + infomsg("see htsdefines.h"); infomsg(""); infomsg("example: httrack www.someweb.com/bob/"); infomsg("means: mirror site www.someweb.com/bob/ and only this site"); @@ -671,7 +648,7 @@ void help(char* app,int more) { infomsg("example: httrack --continue"); infomsg("continues a mirror in the current folder"); infomsg(""); - sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available); + sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", hts_is_available()); infomsg(info); infomsg("Copyright (C) Xavier Roche and other contributors"); #ifdef HTS_PLATFORM_NAME diff --git a/src/htshelp.h b/src/htshelp.h index 67354c7..1ec16e2 100644 --- a/src/htshelp.h +++ b/src/htshelp.h @@ -40,17 +40,22 @@ Please visit our Website: http://www.httrack.com #ifndef HTSHELP_DEFH #define HTSHELP_DEFH -#include "htsglobal.h" -#include "htscore.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif + void infomsg(char* msg); void help(char* app,int more); void make_empty_index(char* str); void help_wizard(httrackp* opt); int help_query(char* list,int def); -void help_catchurl(char* dest_path); +void help_catchurl(const char* dest_path); + #endif #endif diff --git a/src/htsindex.c b/src/htsindex.c index 0546b2f..4a7bd67 100644 --- a/src/htsindex.c +++ b/src/htsindex.c @@ -144,6 +144,7 @@ void index_init(const char* indexpath) { */ int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) { #if HTS_MAKE_KEYWORD_INDEX + char catbuff[CATBUFF_SIZE]; int intag=0,inscript=0,incomment=0; char keyword[KEYW_LEN+32]; int i=0; @@ -165,8 +166,8 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* // Init ? if (hts_index_init) { - remove(concat(indexpath,"index.txt")); - remove(concat(indexpath,"sindex.html")); + remove(concat(catbuff,indexpath,"index.txt")); + remove(concat(catbuff,indexpath,"sindex.html")); hts_index_init=0; } @@ -236,7 +237,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* if ( (!inscript) && (!incomment) && (!intag) ) { char cchar=html_data[i]; int pos; - int len=strlen(keyword); + int len = (int) strlen(keyword); // Replace (ignore case, and so on..) if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0) @@ -261,7 +262,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* /* Strip ending . and so */ { int ok=0; - while((len=strlen(keyword)) && (!ok)) { + while((len = (int) strlen(keyword)) && (!ok)) { if (strchr(KEYW_STRIP_END,keyword[len-1])) { /* strip it */ keyword[len-1]='\0'; } else @@ -302,13 +303,13 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* char line[KEYW_LEN + 32]; linput(tmpfp,line,KEYW_LEN + 2); if (strnotempty(line)) { - unsigned long int e=0; + intptr_t e=0; if (inthash_read(WordIndexHash,line,&e)) { //if (e) { char BIGSTK savelst[HTS_URLMAXSIZE*2]; e++; /* 0 means "once" */ - if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper + if (strncmp((const char*)fslash(catbuff,(char*)indexpath),filename,strlen(indexpath))==0) // couper strcpybuff(savelst,filename+strlen(indexpath)); else strcpybuff(savelst,filename); @@ -339,11 +340,10 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* */ void index_finish(const char* indexpath,int mode) { #if HTS_MAKE_KEYWORD_INDEX + char catbuff[CATBUFF_SIZE]; char** tab; char* blk; - INTsys size; - - size=fpsize(fp_tmpproject); + off_t size = fpsize(fp_tmpproject); if (size>0) { //FILE* fp=fopen(concat(indexpath,"index.txt"),"rb"); if (fp_tmpproject) { @@ -373,9 +373,9 @@ void index_finish(const char* indexpath,int mode) { // Write new file if (mode == 1) // TEXT - fp=fopen(concat(indexpath,"index.txt"),"wb"); + fp=fopen(concat(catbuff,indexpath,"index.txt"),"wb"); else // HTML - fp=fopen(concat(indexpath,"sindex.html"),"wb"); + fp=fopen(concat(catbuff,indexpath,"sindex.html"),"wb"); if (fp) { char current_word[KEYW_LEN + 32]; char word[KEYW_LEN + 32]; diff --git a/src/htsindex.h b/src/htsindex.h index b773034..13e139d 100644 --- a/src/htsindex.h +++ b/src/htsindex.h @@ -39,10 +39,11 @@ Please visit our Website: http://www.httrack.com #ifndef HTSKINDEX_DEFH #define HTSKINDEX_DEFH -#include "htsglobal.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +#include "htsglobal.h" + int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath); void index_init(const char* indexpath); void index_finish(const char* indexpath,int mode); diff --git a/src/htsinthash.c b/src/htsinthash.c index e81a74f..a8fcba9 100644 --- a/src/htsinthash.c +++ b/src/htsinthash.c @@ -94,7 +94,7 @@ void inthash_add_pvoid(inthash hashtable, const char* name, void* pvalue) { } // Check for duplicate entry (==1 : added) -int inthash_write(inthash hashtable,const char* name,long int intvalue) { +int inthash_write(inthash hashtable,const char* name,intptr_t intvalue) { inthash_value value = INTHASH_VALUE_NULL; value.intg = intvalue; return inthash_write_value(hashtable, name, value); @@ -129,7 +129,7 @@ int inthash_write_value(inthash hashtable,const char* name,inthash_value value) // Increment pos value, create one if necessary (=0) // (==1 : created) int inthash_inc(inthash hashtable,const char* name) { - long int value=0; + intptr_t value=0; int r=0; if (inthash_read(hashtable,name,&value)) { value++; @@ -144,7 +144,7 @@ int inthash_inc(inthash hashtable,const char* name) { // Does not check for duplicate entry -void inthash_add(inthash hashtable, const char* name, long int intvalue) { +void inthash_add(inthash hashtable, const char* name, intptr_t intvalue) { inthash_value value = INTHASH_VALUE_NULL; memset(&value, 0, sizeof(value)); value.intg = intvalue; @@ -195,7 +195,7 @@ void* inthash_addblk(inthash hashtable,const char* name,int blksize) { return NULL; } -int inthash_read(inthash hashtable,const char* name,long int* intvalue) { +int inthash_read(inthash hashtable,const char* name,intptr_t* intvalue) { inthash_value value = INTHASH_VALUE_NULL; int ret = inthash_read_value(hashtable, name, (intvalue != NULL) ? &value : NULL); if (intvalue != NULL) @@ -255,7 +255,7 @@ int inthash_remove(inthash hashtable,const char* name) { return 0; } -int inthash_readptr(inthash hashtable,const char* name,long int* value) { +int inthash_readptr(inthash hashtable,const char* name,intptr_t* value) { int ret; *value = 0; ret = inthash_read(hashtable, name, value); diff --git a/src/htsinthash.h b/src/htsinthash.h index b11b7ac..f839d2d 100644 --- a/src/htsinthash.h +++ b/src/htsinthash.h @@ -35,47 +35,65 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ - +// inthash -- simple hash table, using a key (char[]) and a value (uintptr_t) #ifndef HTSINTHASH_DEFH #define HTSINTHASH_DEFH -// inthash -- simple hash table, using a key (char[]) and a value (ulong int) +/* Includes */ +#ifdef _WIN32 +#include +#elif (defined(SOLARIS) || defined(sun) || defined(HAVE_INTTYPES_H) \ + || defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) +#include +#else +#include +#endif // value typedef union inthash_value { - unsigned long int intg; /* integer value */ + uintptr_t intg; /* integer value */ void* ptr; /* ptr value */ } inthash_value; #define INTHASH_VALUE_NULL { 0 } // simple hash table for other routines -typedef struct inthash_chain { +#ifndef HTS_DEF_FWSTRUCT_inthash_chain +#define HTS_DEF_FWSTRUCT_inthash_chain +typedef struct inthash_chain inthash_chain; +#endif +struct inthash_chain { char* name; /* key (name) */ inthash_value value; /* value */ struct inthash_chain* next; /* next element */ -} inthash_chain; +}; -// structure behind inthash typedef void (* t_inthash_freehandler)(void* value); -typedef struct struct_inthash { + +/* inthash structure */ +#ifndef HTS_DEF_FWSTRUCT_struct_inthash +#define HTS_DEF_FWSTRUCT_struct_inthash +typedef struct struct_inthash struct_inthash, *inthash; +#endif +struct struct_inthash { inthash_chain** hash; unsigned int nitems; t_inthash_freehandler free_handler; unsigned int hash_size; unsigned short flag_valueismalloc; -} struct_inthash; - -// main inthash type -typedef struct_inthash* inthash; +}; // enumeration -typedef struct struct_inthash_enum { +#ifndef HTS_DEF_FWSTRUCT_struct_inthash_enum +#define HTS_DEF_FWSTRUCT_struct_inthash_enum +typedef struct struct_inthash_enum struct_inthash_enum; +#endif +struct struct_inthash_enum { inthash table; int index; inthash_chain* item; -} struct_inthash_enum; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE @@ -91,8 +109,8 @@ void inthash_value_is_malloc(inthash hashtable,int flag); /* Is void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */ t_inthash_freehandler free_handler); /* */ -int inthash_read(inthash hashtable,const char* name,long int* intvalue); /* Read entry from the hash table */ -int inthash_readptr(inthash hashtable,const char* name,long int* intvalue); /* Same function, but returns 0 upon null ptr */ +int inthash_read(inthash hashtable,const char* name,intptr_t* intvalue); /* Read entry from the hash table */ +int inthash_readptr(inthash hashtable,const char* name,intptr_t* intvalue); /* Same function, but returns 0 upon null ptr */ int inthash_exists(inthash hashtable, const char* name); /* Is the key existing ? */ /* */ int inthash_read_value(inthash hashtable,const char* name,inthash_value* value); @@ -103,9 +121,9 @@ int inthash_read_pvoid(inthash hashtable,const char* name, void** value); int inthash_write_pvoid(inthash hashtable,const char* name, void* value); void inthash_add_pvoid(inthash hashtable, const char* name, void* value); /* */ -void inthash_add(inthash hashtable,const char* name,long int value); /* Add entry in the hash table */ +void inthash_add(inthash hashtable,const char* name,intptr_t value); /* Add entry in the hash table */ void* inthash_addblk(inthash hashtable,const char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */ -int inthash_write(inthash hashtable,const char* name,long int value); /* Overwrite/add entry in the hash table */ +int inthash_write(inthash hashtable,const char* name,intptr_t value); /* Overwrite/add entry in the hash table */ int inthash_inc(inthash hashtable,const char* name); /* Increment entry in the hash table */ int inthash_remove(inthash hashtable,const char* name); /* Remove an entry from the hashtable */ /* */ diff --git a/src/htsjava.c b/src/htsjava.c index 3536b9b..a52aea2 100644 --- a/src/htsjava.c +++ b/src/htsjava.c @@ -35,23 +35,37 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ -/* Internal engine bytecode */ -#define HTS_INTERNAL_BYTECODE - /* Version: Oct/2000 */ /* Fixed: problems with class structure (10/2000) */ // htsjava.c - Parseur de classes java -#include "stdio.h" -#include "htsglobal.h" -#include "htscore.h" +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include +#include +#include +#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) ) +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif -#include "htsjava.h" +/* Standard httrack module includes */ +#include "httrack-library.h" +#include "htsopt.h" +#include "htsdefines.h" -#include "htsnostatic.h" +/* Module structures */ +#include "htsmodules.h" -//#include +/* We link to libhttrack, we can use its functions */ +#include "httrack-library.h" + +/* This file */ +#include "htsjava.h" static int reverse_endian(void) { int endian = 1; @@ -62,6 +76,11 @@ static int reverse_endian(void) { #define hts_swap16(A) ( (((A) & 0xFF)<<8) | (((A) & 0xFF00)>>8) ) #define hts_swap32(A) ( (( (hts_swap16(A)) & 0xFFFF)<<16) | (( (hts_swap16(A>>16)) & 0xFFFF)) ) +/* Static definitions */ +static RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*); +static unsigned short int readshort(FILE *fp); +static int tris(httrackp *opt,char*); +static char * printname(char [1024],char [1024]); // ** HTS_xx sinon pas pris par VC++ #define HTS_CLASS 7 @@ -79,148 +98,217 @@ static int reverse_endian(void) { #define JAVADEBUG 0 -int hts_detect_java(htsmoduleStruct* str) { - char* savename = str->filename; +static const char *libName = "htsjava"; + +#ifdef _WIN32 +#define strcasecmp(a,b) stricmp(a,b) +#define strncasecmp(a,b,n) strnicmp(a,b,n) +#endif + +static int detect_mime(htsmoduleStruct* str) { + const char* savename = str->filename; if (savename) { int len = (int) strlen(savename); - if (len > 6 && strfield(savename + len - 6,".class")) { + if (len > 6 && strcasecmp(savename + len - 6,".class") == 0) { return 1; } } return 0; } -int hts_parse_java(htsmoduleStruct* str) +static int hts_detect_java(t_hts_callbackarg *carg, httrackp *opt, + htsmoduleStruct* str) { - FILE *fpout; - JAVA_HEADER header; - RESP_STRUCT *tab; - char* file = str->filename; - - str->relativeToHtmlLink = 1; + /* Call parent functions if multiple callbacks are chained. */ + if (CALLBACKARG_PREV_FUN(carg, detect) != NULL) { + if (CALLBACKARG_PREV_FUN(carg, detect)(CALLBACKARG_PREV_CARG(carg), opt, str)) { + return 1; /* Found before us, let them have the priority */ + } + } -#if JAVADEBUG - printf("fopen\n"); -#endif - if ((fpout = fopen(fconv(file), "r+b")) == NULL) - { - //fprintf(stderr, "Cannot open input file.\n"); - sprintf(str->err_msg,"Unable to open file %s",file); - return 0; // une erreur.. + /* Check MIME */ + if (detect_mime(str)) { + str->wrapper_name = libName; /* Our ID */ + return 1; /* Known format, we take it */ } - + + return 0; /* Unknown format */ +} + +static off_t fsize(const char* s) { + FILE* fp; + fp=fopen(s,"rb"); + if (fp!=NULL) { + off_t i; + fseek(fp,0,SEEK_END); + i = ftell(fp); + fclose(fp); + return i; + } else + return -1; +} + +static int hts_parse_java(t_hts_callbackarg *carg, httrackp *opt, + htsmoduleStruct* str) +{ + /* The wrapper_name memebr has changed: not for us anymore */ + if (str->wrapper_name == NULL || strcmp(str->wrapper_name, libName) != 0) { + /* Call parent functions if multiple callbacks are chained. */ + if (CALLBACKARG_PREV_FUN(carg, parse) != NULL) { + return CALLBACKARG_PREV_FUN(carg, parse)(CALLBACKARG_PREV_CARG(carg), opt, str); + } + strcpy(str->err_msg, "unexpected error: bad wrapper_name and no previous wrapper"); + return 0; /* Unexpected error */ + } else { + if (detect_mime(str)) { + + /* (Legacy code) */ + char catbuff[CATBUFF_SIZE]; + FILE *fpout; + JAVA_HEADER header; + RESP_STRUCT *tab; + const char* file = str->filename; + + str->relativeToHtmlLink = 1; + #if JAVADEBUG - printf("fread\n"); + printf("fopen\n"); #endif - //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. - if (fread(&header,1,10,fpout) != 10) { // pas complet.. - fclose(fpout); - sprintf(str->err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file)); - return 0; - } + if ((fpout = fopen(fconv(catbuff, file), "r+b")) == NULL) + { + //fprintf(stderr, "Cannot open input file.\n"); + sprintf(str->err_msg,"Unable to open file %s",file); + return 0; // une erreur.. + } #if JAVADEBUG - printf("header\n"); + printf("fread\n"); #endif - // tester en tête - if (reverse_endian()) { - header.magic = hts_swap32(header.magic); - header.count = hts_swap16(header.count); - } - if(header.magic!=0xCAFEBABE) { - sprintf(str->err_msg,"non java file"); - if (fpout) { fclose(fpout); fpout=NULL; } - return 0; - } - - tab =(RESP_STRUCT*)calloct(header.count,sizeof(RESP_STRUCT)); - if (!tab) { - sprintf(str->err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT)); - if (fpout) { fclose(fpout); fpout=NULL; } - return 0; // erreur.. - } + //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. + if (fread(&header,1,10,fpout) != 10) { // pas complet.. + fclose(fpout); + sprintf(str->err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file)); + return 0; + } #if JAVADEBUG - printf("calchead\n"); + printf("header\n"); #endif - { - int i; - - for (i = 1; i < header.count; i++) { - int err=0; // ++ - tab[i]=readtable(str,fpout,tab[i],&err); - if (!err) { - if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float - } else { // ++ une erreur est survenue! - if (strnotempty(str->err_msg)==0) - strcpybuff(str->err_msg,"Internal readtable error"); - freet(tab); + // tester en tête + if (reverse_endian()) { + header.magic = hts_swap32(header.magic); + header.count = hts_swap16(header.count); + } + if(header.magic!=0xCAFEBABE) { + sprintf(str->err_msg,"non java file"); if (fpout) { fclose(fpout); fpout=NULL; } return 0; } - } - - } - + tab =(RESP_STRUCT*)calloc(header.count,sizeof(RESP_STRUCT)); + if (!tab) { + sprintf(str->err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT)); + if (fpout) { fclose(fpout); fpout=NULL; } + return 0; // erreur.. + } + #if JAVADEBUG - printf("addfiles\n"); + printf("calchead\n"); #endif - { - unsigned int acess; - unsigned int Class; - unsigned int SClass; - int i; - acess = readshort(fpout); - Class = readshort(fpout); - SClass = readshort(fpout); - - for (i = 1; i =0)) { - - - if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) { - - if(!strstr(tab[tab[i].index1].name,"java/")) { - char BIGSTK tempo[1024]; - tempo[0]='\0'; - - sprintf(tempo,"%s.class",tab[tab[i].index1].name); + { + int i; + + for (i = 1; i < header.count; i++) { + int err=0; // ++ + tab[i]=readtable(str,fpout,tab[i],&err); + if (!err) { + if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float + } else { // ++ une erreur est survenue! + if (strnotempty(str->err_msg)==0) + strcpy(str->err_msg,"Internal readtable error"); + free(tab); + if (fpout) { fclose(fpout); fpout=NULL; } + return 0; + } + } + + } + + +#if JAVADEBUG + printf("addfiles\n"); +#endif + { + unsigned int acess; + unsigned int Class; + unsigned int SClass; + int i; + acess = readshort(fpout); + Class = readshort(fpout); + SClass = readshort(fpout); + + for (i = 1; i =0)) { + + + if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) { + + if(!strstr(tab[tab[i].index1].name,"java/")) { + char BIGSTK tempo[1024]; + tempo[0]='\0'; + + sprintf(tempo,"%s.class",tab[tab[i].index1].name); #if JAVADEBUG - printf("add %s\n",tempo); + printf("add %s\n",tempo); #endif - if (tab[tab[i].index1].file_position >= 0) - str->addLink(str,tempo); /* tab[tab[i].index1].file_position */ + if (tab[tab[i].index1].file_position >= 0) + str->addLink(str,tempo); /* tab[tab[i].index1].file_position */ + } + + } + } else { + i=header.count; // exit } - } - } else { - i=header.count; // exit + } } - - } - } - - + + #if JAVADEBUG - printf("end\n"); + printf("end\n"); #endif - freet(tab); - if (fpout) { fclose(fpout); fpout=NULL; } - return 1; -} + free(tab); + if (fpout) { fclose(fpout); fpout=NULL; } + return 1; + } else { + strcpy(str->err_msg, "bad MIME type"); + } + } + return 0; /* Error */ +} +/* +module entry point +*/ +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv); +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) { + /* Plug callback functions */ + CHAIN_FUNCTION(opt, detect, hts_detect_java, NULL); + CHAIN_FUNCTION(opt, parse, hts_parse_java, NULL); + return 1; /* success */ +} // error: !=0 si erreur fatale -RESP_STRUCT readtable(htsmoduleStruct* str, - FILE *fp, RESP_STRUCT trans, int* error) +static RESP_STRUCT readtable(htsmoduleStruct* str, + FILE *fp, RESP_STRUCT trans, int* error) { + char rname[1024]; unsigned short int length; int j; *error = 0; // pas d'erreur @@ -228,54 +316,54 @@ RESP_STRUCT readtable(htsmoduleStruct* str, trans.type = (int)(unsigned char)fgetc(fp); switch (trans.type) { case HTS_CLASS: - strcpybuff(trans.name,"Class"); + strcpy(trans.name,"Class"); trans.index1 = readshort(fp); break; case HTS_FIELDREF: - strcpybuff(trans.name,"Field Reference"); + strcpy(trans.name,"Field Reference"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_METHODREF: - strcpybuff(trans.name,"Method Reference"); + strcpy(trans.name,"Method Reference"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_INTERFACE: - strcpybuff(trans.name,"Interface Method Reference"); + strcpy(trans.name,"Interface Method Reference"); trans.index1 =readshort(fp); readshort(fp); break; case HTS_NAMEANDTYPE: - strcpybuff(trans.name,"Name and Type"); + strcpy(trans.name,"Name and Type"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_STRING: // CONSTANT_String - strcpybuff(trans.name,"String"); + strcpy(trans.name,"String"); trans.index1 = readshort(fp); break; case HTS_INTEGER: - strcpybuff(trans.name,"Integer"); + strcpy(trans.name,"Integer"); for(j=0;j<4;j++) fgetc(fp); break; case HTS_FLOAT: - strcpybuff(trans.name,"Float"); + strcpy(trans.name,"Float"); for(j=0;j<4;j++) fgetc(fp); break; case HTS_LONG: - strcpybuff(trans.name,"Long"); + strcpy(trans.name,"Long"); for(j=0;j<8;j++) fgetc(fp); break; case HTS_DOUBLE: - strcpybuff(trans.name,"Double"); + strcpy(trans.name,"Double"); for(j=0;j<8;j++) fgetc(fp); break; @@ -283,9 +371,9 @@ RESP_STRUCT readtable(htsmoduleStruct* str, case HTS_UNICODE: if (trans.type == HTS_ASCIZ) - strcpybuff(trans.name,"HTS_ASCIZ"); + strcpy(trans.name,"HTS_ASCIZ"); else - strcpybuff(trans.name,"HTS_UNICODE"); + strcpy(trans.name,"HTS_UNICODE"); { char BIGSTK buffer[1024]; @@ -309,10 +397,10 @@ RESP_STRUCT readtable(htsmoduleStruct* str, // if(tris(buffer)==1) printf("%s\n ",buffer); // if(tris(buffer)==2) printf("%s\n ",printname(buffer)); //#endif - if(tris(buffer)==1) str->addLink(str, buffer); /* trans.file_position */ - else if(tris(buffer)==2) str->addLink(str, printname(buffer)); + if(tris(str->opt,buffer)==1) str->addLink(str, buffer); /* trans.file_position */ + else if(tris(str->opt,buffer)==2) str->addLink(str, printname(rname,buffer)); - strcpybuff(trans.name,buffer); + strcpy(trans.name,buffer); } else { // gros pb while ( (length > 0) && (!feof(fp))) { fgetc(fp); @@ -340,7 +428,7 @@ RESP_STRUCT readtable(htsmoduleStruct* str, } -unsigned short int readshort(FILE *fp) +static unsigned short int readshort(FILE *fp) { unsigned short int valint; fread(&valint,sizeof(valint),1,fp); @@ -352,8 +440,9 @@ unsigned short int readshort(FILE *fp) } -int tris(char * buffer) +static int tris(httrackp *opt,char * buffer) { + char catbuff[CATBUFF_SIZE]; // // Java if((buffer[0]=='[') && buffer[1]=='L' && (!strstr(buffer,"java/")) ) @@ -365,25 +454,21 @@ int tris(char * buffer) { char type[256]; type[0]='\0'; - get_httptype(type,buffer,0); + get_httptype(opt,type,buffer,0); if (strnotempty(type)) // type reconnu! return 1; // ajout RX 05/2001 - else if (is_dyntype(get_ext(buffer))) // asp,cgi... + else if (is_dyntype(get_ext(catbuff, buffer))) // asp,cgi... return 1; } return 0; } - -char * printname(char name[1024]) +static char * printname(char rname[1024], char name[1024]) { - char* rname; - //char *rname; char *p; char *p1; int j; - NOSTATIC_RESERVE(rname, char, 1024); rname[0]='\0'; // @@ -396,7 +481,7 @@ char * printname(char name[1024]) for (j = 0; j < (int) strlen(name); j++,p++) { if (*p == '/') *p1='.'; if (*p==';'){*p1='\0'; - strcatbuff(rname,".class"); + strcat(rname,".class"); return (rname);} else *p1=*p; p1++; diff --git a/src/htsjava.h b/src/htsjava.h index 915824b..b95155e 100644 --- a/src/htsjava.h +++ b/src/htsjava.h @@ -38,35 +38,35 @@ Please visit our Website: http://www.httrack.com #ifndef HTSJAVA_DEFH #define HTSJAVA_DEFH -#include -#include "htsmodules.h" - -typedef struct { +#ifndef HTS_DEF_FWSTRUCT_JAVA_HEADER +#define HTS_DEF_FWSTRUCT_JAVA_HEADER +typedef struct JAVA_HEADER JAVA_HEADER; +#endif +struct JAVA_HEADER { unsigned long int magic; unsigned short int minor; unsigned short int major; unsigned short int count; -} JAVA_HEADER; +}; -typedef struct { +#ifndef HTS_DEF_FWSTRUCT_RESP_STRUCT +#define HTS_DEF_FWSTRUCT_RESP_STRUCT +typedef struct RESP_STRUCT RESP_STRUCT; +#endif +struct RESP_STRUCT { int file_position; // unsigned int index1; unsigned int type; char name[1024]; -} RESP_STRUCT; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -int hts_detect_java(htsmoduleStruct* str); -int hts_parse_java(htsmoduleStruct* str); -RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5); -//unsigned int swap(long int nomber,int digit); -RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*); -unsigned short int readshort(FILE *fp); -int tris(char*); -char * printname(char [1024]); + +EXTERNAL_FUNCTION int hts_plug_java(httrackp *opt, const char* argv); + #endif #endif diff --git a/src/htslib.c b/src/htslib.c index 93119df..c398e3f 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -39,8 +39,7 @@ Please visit our Website: http://www.httrack.com // Fichier librairie .c -#include "htslib.h" -#include "htsbauth.h" +#include "htscore.h" #ifdef _WIN32_WCE #ifndef HTS_CECOMPAT @@ -53,10 +52,12 @@ Please visit our Website: http://www.httrack.com #include "htsnet.h" #include "htsbauth.h" #include "htsthread.h" -#include "htsnostatic.h" +#include "htsback.h" #include "htswrap.h" #include "htsmd5.h" -#if HTS_WIN +#include "htsmodules.h" + +#ifdef _WIN32 #ifndef _WIN32_WCE #include #endif @@ -70,21 +71,22 @@ Please visit our Website: http://www.httrack.com #ifdef HAVE_UNISTD_H #include #endif -#endif +#endif /* _WIN32 */ + #include #include + #ifndef _WIN32_WCE #include +#include #else #ifndef HTS_CECOMPAT #include #endif -#endif -#ifndef _WIN32_WCE -#include -#endif +#endif /* _WIN32_WCE */ + // pour utimbuf -#if HTS_WIN +#ifdef _WIN32 #ifndef _WIN32_WCE #include #else @@ -94,7 +96,8 @@ Please visit our Website: http://www.httrack.com #endif #else #include -#endif +#endif /* _WIN32 */ + #ifndef _WIN32_WCE #include #endif @@ -115,7 +118,6 @@ FILE* ioinfo; #endif int IPV6_resolver = 0; - /* détection complémentaire */ const char* hts_detect[] = { "archive", @@ -300,6 +302,7 @@ const char* hts_mime[][2] = { {"application/x-authorware-map","aam"}, {"application/x-authorware-seg","aas"}, {"application/x-authorware-bin","aab"}, + {"application/x-bzip2","bz2"}, {"application/x-cocoa","cco"}, {"application/x-csh","csh"}, {"application/x-director","dir"}, @@ -347,6 +350,7 @@ const char* hts_mime[][2] = { {"application/x-tar","tar"}, {"application/x-ustar","ustar"}, {"application/x-winhelp","hlp"}, + {"application/xml","xml"}, {"audio/midi","mid"}, {"audio/midi","midi"}, {"audio/midi","kar"}, @@ -437,17 +441,17 @@ const char* hts_mime[][2] = { { "application/pkix-crl", "crl" }, { "application/set-payment-initiation", "setpay" }, { "application/set-registration-initiation", "setreg" }, + { "application/vnd.ms-excel", "xls" }, { "application/vnd.ms-excel", "xla" }, { "application/vnd.ms-excel", "xlc" }, { "application/vnd.ms-excel", "xlm" }, - { "application/vnd.ms-excel", "xls" }, { "application/vnd.ms-excel", "xlt" }, { "application/vnd.ms-excel", "xlw" }, { "application/vnd.ms-pkicertstore", "sst" }, { "application/vnd.ms-pkiseccat", "cat" }, + { "application/vnd.ms-powerpoint", "ppt" }, { "application/vnd.ms-powerpoint", "pot" }, { "application/vnd.ms-powerpoint", "pps" }, - { "application/vnd.ms-powerpoint", "ppt" }, { "application/vnd.ms-project", "mpp" }, { "application/vnd.ms-works", "wcm" }, { "application/vnd.ms-works", "wdb" }, @@ -527,7 +531,7 @@ const char* hts_mime[][2] = { /* Various */ { "application/ogg", "ogg" }, - {"*","class"}, + {"application/x-java-vm","class"}, {"",""}}; @@ -588,15 +592,12 @@ const char* hts_mime[][2] = { // conversion éventuelle / vers antislash -#if HTS_WIN -char* antislash(char* s) { - char* buff; +#ifdef _WIN32 +char* antislash(char *catbuff, const char* s) { char* a; - NOSTATIC_RESERVE(buff, char, HTS_URLMAXSIZE*2); - - strcpybuff(buff,s); - while(a=strchr(buff,'/')) *a='\\'; - return buff; + strcpybuff(catbuff,s); + while(a=strchr(catbuff,'/')) *a='\\'; + return catbuff; } #endif @@ -612,7 +613,7 @@ char cwd[MAX_PATH+1] = ""; // suivre l'évolution du chargement si le process a été lancé // en background -htsblk httpget(char* url) { +htsblk httpget(httrackp *opt,char* url) { char BIGSTK adr[HTS_URLMAXSIZE*2]; // adresse char BIGSTK fil[HTS_URLMAXSIZE*2]; // chemin @@ -629,14 +630,14 @@ htsblk httpget(char* url) { return retour; } - return xhttpget(adr,fil); + return xhttpget(opt,adr,fil); } // ouvre une liaison http, envoie une requète GET et réceptionne le header // retour: socket -int http_fopen(char* adr,char* fil,htsblk* retour) { +int http_fopen(httrackp *opt,char* adr,char* fil,htsblk* retour) { // / GET, traiter en-tête - return http_xfopen(0,1,1,NULL,adr,fil,retour); + return http_xfopen(opt,0,1,1,NULL,adr,fil,retour); } // ouverture d'une liaison http, envoi d'une requète @@ -644,7 +645,7 @@ int http_fopen(char* adr,char* fil,htsblk* retour) { // treat: traiter header? // waitconnect: attendre le connect() // note: dans retour, on met les params du proxy -int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour) { +int http_xfopen(httrackp *opt,int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour) { //htsblk retour; //int bufl=TAILLE_BUFFER; // 8Ko de buffer T_SOC soc=INVALID_SOCKET; @@ -681,12 +682,12 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f (strncmp(adr,"https://", 8)==0) ) ) { /* pas de proxy, ou non utilisable ici */ - soc=newhttp(adr,retour,-1,waitconnect); + soc=newhttp(opt,adr,retour,-1,waitconnect); } else { - soc=newhttp(retour->req.proxy.name,retour,retour->req.proxy.port,waitconnect); // ouvrir sur le proxy à la place + soc=newhttp(opt, retour->req.proxy.name, retour,retour->req.proxy.port, waitconnect); // ouvrir sur le proxy à la place } } else { - soc=newhttp(adr,NULL,-1,waitconnect); + soc=newhttp(opt,adr,NULL,-1,waitconnect); } // copier index socket retour @@ -698,9 +699,11 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f if (retour->msg) { if (!strnotempty(retour->msg)) { #ifdef _WIN32 - sprintf(retour->msg,"Connect error: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Connect error: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Connect error: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Connect error: %s", strerror(last_errno)); #endif } } @@ -715,15 +718,15 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f if (mode==0) { // GET // Test en cas de file:///C|... - if (!fexist(fconv(unescape_http(fil)))) - if (fexist(fconv(unescape_http(fil+1)))) { + if (!fexist(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil)))) + if (fexist(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil+1)))) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; strcpybuff(tempo,fil+1); strcpybuff(fil,tempo); } // Ouvrir - retour->totalsize=fsize(fconv(unescape_http(fil))); // taille du fichier + retour->totalsize=fsize(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil))); // taille du fichier retour->msg[0]='\0'; soc=INVALID_SOCKET; if (retour->totalsize<0) @@ -733,7 +736,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f else { // Note: On passe par un FILE* (plus propre) //soc=open(fil,O_RDONLY,0); // en lecture seule! - retour->fp=fopen(fconv(unescape_http(fil)),"rb"); // ouvrir + retour->fp=fopen(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil)),"rb"); // ouvrir if (retour->fp==NULL) soc=INVALID_SOCKET; else @@ -741,9 +744,9 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f } retour->soc=soc; if (soc!=INVALID_SOCKET) { - retour->statuscode=200; // OK + retour->statuscode=HTTP_OK; // OK strcpybuff(retour->msg,"OK"); - guess_httptype(retour->contenttype,fil); + guess_httptype(opt,retour->contenttype,fil); } else if (strnotempty(retour->msg)==0) strcpybuff(retour->msg,"Unable to open local file"); return soc; // renvoyer @@ -765,7 +768,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f // connecté? if (waitconnect) { - http_sendhead(NULL,mode,xsend,adr,fil,NULL,NULL,retour); + http_sendhead(opt,NULL,mode,xsend,adr,fil,NULL,NULL,retour); } if (soc!=INVALID_SOCKET) { @@ -826,7 +829,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f // envoi d'une requète -int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) { +int http_sendhead(httrackp *opt,t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) { char BIGSTK buff[8192]; //int use_11=0; // HTTP 1.1 utilisé int direct_url=0; // ne pas analyser l'url (exemple: ftp://) @@ -849,7 +852,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char search_tag=strstr(fil,POSTTOK"file:"); if (search_tag) { // postfile if (mode==0) { // GET! - FILE* fp=fopen(unescape_http(search_tag+strlen(POSTTOK)+5),"rb"); + FILE* fp=fopen(unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+5),"rb"); if (fp) { char BIGSTK line[1100]; char BIGSTK protocol[256],url[HTS_URLMAXSIZE*2],method[256]; @@ -929,14 +932,14 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char // tester proxy authentication if (retour->req.proxy.active) { if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy! - char* a=jump_identification(retour->req.proxy.name); - char* astart=jump_protocol(retour->req.proxy.name); + const char* a = jump_identification(retour->req.proxy.name); + const char* astart = jump_protocol(retour->req.proxy.name); char autorisation[1100]; char user_pass[256]; autorisation[0]=user_pass[0]='\0'; // strncatbuff(user_pass,astart,(int) (a - astart) - 1); - strcpybuff(user_pass,unescape_http(user_pass)); + strcpybuff(user_pass,unescape_http(OPT_GET_BUFF(opt),user_pass)); code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0); strcatbuff(buff,"Proxy-Authorization: Basic "); strcatbuff(buff,autorisation); @@ -978,17 +981,18 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (mode==0) { // GET! if (search_tag) { char clen[256]; - sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(search_tag+strlen(POSTTOK)+1)))); + sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+1)))); strcatbuff(buff,clen); } } // gestion cookies? if (cookie) { + char buffer[8192]; char* b=cookie->data; int cook=0; int max_cookies=8; - int max_size=2048; + size_t max_size=2048; max_size+=strlen(buff); do { b=cookie_find(b,"",jump_identification(adr),fil); // prochain cookie satisfaisant aux conditions @@ -1000,11 +1004,11 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char cook=1; } else strcatbuff(buff,"; "); - strcatbuff(buff,cookie_get(b,5)); + strcatbuff(buff,cookie_get(buffer,b,5)); strcatbuff(buff,"="); - strcatbuff(buff,cookie_get(b,6)); + strcatbuff(buff,cookie_get(buffer,b,6)); strcatbuff(buff,"; $Path="); - strcatbuff(buff,cookie_get(b,2)); + strcatbuff(buff,cookie_get(buffer,b,2)); b=cookie_nextfield(b); } } while( (b) && (max_cookies>0) && ((int)strlen(buff)msg,"Header refused by external wrapper"); retour->soc=INVALID_SOCKET; } } -#endif // Envoi HTS_STAT.last_request = mtime_local(); @@ -1203,7 +1205,7 @@ void treatfirstline(htsblk* retour,char* rcvd) { } else { if (*a == '<') { /* This is dirty .. */ - retour->statuscode=200; + retour->statuscode=HTTP_OK; retour->keep_alive=0; strcpybuff(retour->msg, "Unknown, assuming junky server"); strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); @@ -1212,7 +1214,7 @@ void treatfirstline(htsblk* retour,char* rcvd) { strcpybuff(retour->msg,"Unknown (not HTTP/xx) response structure"); } else { /* This is dirty .. */ - retour->statuscode=200; + retour->statuscode=HTTP_OK; retour->keep_alive=0; strcpybuff(retour->msg, "Unknown, assuming junky server"); strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); @@ -1224,7 +1226,7 @@ void treatfirstline(htsblk* retour,char* rcvd) { strcpybuff(retour->msg,"Empty reponse or internal error"); */ /* This is dirty .. */ - retour->statuscode=200; + retour->statuscode=HTTP_OK; strcpybuff(retour->msg, "Unknown, assuming junky server"); strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); } @@ -1613,12 +1615,12 @@ HTSEXT_API void infostatuscode(char* msg,int statuscode) { // identique au précédent, sauf que l'on donne adr+fil et non url complète -htsblk xhttpget(char* adr,char* fil) { +htsblk xhttpget(httrackp *opt,char* adr,char* fil) { T_SOC soc; htsblk retour; memset(&retour, 0, sizeof(htsblk)); - soc=http_fopen(adr,fil,&retour); + soc=http_fopen(opt,adr,fil,&retour); if (soc!=INVALID_SOCKET) { http_fread(soc,&retour); @@ -1634,12 +1636,12 @@ htsblk xhttpget(char* adr,char* fil) { // variation sur un thème... // réceptionne uniquement un en-tête (HEAD) // retourne dans xx.adr l'adresse pointant sur le bloc de mémoire de l'en tête -htsblk http_gethead(char* adr,char* fil) { +htsblk http_gethead(httrackp *opt,char* adr,char* fil) { T_SOC soc; htsblk retour; memset(&retour, 0, sizeof(htsblk)); - soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // HEAD, pas de traitement en-tête + soc=http_xfopen(opt,1,0,1,NULL,adr,fil,&retour); // HEAD, pas de traitement en-tête if (soc!=INVALID_SOCKET) { http_fread(soc,&retour); // réception en-tête @@ -1745,10 +1747,10 @@ LLint http_xfread1(htsblk* r,int bufl) { if (!r->is_write) { // stocker en mémoire if (r->totalsize>0) { // totalsize déterminé ET ALLOUE if (r->adr==NULL) { - r->adr=(char*) malloct((INTsys) r->totalsize + 1); - r->size=0; + r->adr = (char*) malloct((size_t) r->totalsize + 1); + r->size = 0; } - if (r->adr!=NULL) { + if (r->adr != NULL) { // lecture nl = hts_read(r,r->adr + ((int) r->size),(int) (r->totalsize-r->size) ); /* NO 32 bit overlow possible here (no 4GB html!) */ // nouvelle taille @@ -1812,7 +1814,7 @@ LLint http_xfread1(htsblk* r,int bufl) { // nouvelle taille if (nl > 0) { r->size+=nl; - if ((INTsys)fwrite(buff,1,nl,r->out)!=nl) { + if (fwrite(buff,1,nl,r->out)!=nl) { r->statuscode=STATUSCODE_INVALID; strcpybuff(r->msg,"Write error on disk"); nl=READ_ERROR; @@ -1905,7 +1907,7 @@ LLint http_xfread1(htsblk* r,int bufl) { // teste une adresse, et suit l'éventuel chemin "moved" // retourne 200 ou le code d'erreur (404=NOT FOUND, etc) // copie dans loc la véritable adresse si celle-ci est différente -htsblk http_location(char* adr,char* fil,char* loc) { +htsblk http_location(httrackp *opt,char* adr,char* fil,char* loc) { htsblk retour; int retry=0; int tryagain; @@ -1914,9 +1916,13 @@ htsblk http_location(char* adr,char* fil,char* loc) { // sinon abandon.. do { tryagain=0; - switch ((retour=http_test(adr,fil,loc)).statuscode) { - case 200: break; // ok! - case 301: case 302: case 303: case 307: // moved! + switch ((retour=http_test(opt,adr,fil,loc)).statuscode) { + case HTTP_OK: + break; // ok! + case HTTP_MOVED_PERMANENTLY: + case HTTP_FOUND: + case HTTP_SEE_OTHER: + case HTTP_TEMPORARY_REDIRECT: // moved! // recalculer adr et fil! if (ident_url_absolute(loc,adr,fil)!=-1) { tryagain=1; // retenter @@ -1933,7 +1939,7 @@ htsblk http_location(char* adr,char* fil,char* loc) { // en cas de moved xx, dans location // abandonne désormais au bout de 30 secondes (aurevoir les sites // qui nous font poireauter 5 heures..) -> -2=timeout -htsblk http_test(char* adr,char* fil,char* loc) { +htsblk http_test(httrackp *opt,char* adr,char* fil,char* loc) { T_SOC soc; htsblk retour; //int rcvsize=-1; @@ -1952,7 +1958,7 @@ htsblk http_test(char* adr,char* fil,char* loc) { //soc=http_fopen(adr,fil,&retour,NULL); // ouvrir, + header // on ouvre en head, et on traite l'en tête - soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // ouvrir HEAD, + envoi header + soc=http_xfopen(opt,1,0,1,NULL,adr,fil,&retour); // ouvrir HEAD, + envoi header if (soc!=INVALID_SOCKET) { int e=0; @@ -2033,7 +2039,7 @@ htsblk http_test(char* adr,char* fil,char* loc) { // Crée un lien (http) vers une adresse internet iadr // retour: structure (adresse, taille, message si erreur (si !adr)) // peut ouvrir avec des connect() non bloquants: waitconnect=0/1 -int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { +int newhttp(httrackp *opt,const char* _iadr,htsblk* retour,int port,int waitconnect) { t_fullhostent fullhostent_buffer; // buffer pour resolver T_SOC soc; // descipteur de la socket char* iadr; @@ -2081,17 +2087,17 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { strncatbuff(iadr2,iadr,(int) (a - iadr)); // adresse sans le :xx - hp = hts_gethostbyname(iadr2, &fullhostent_buffer); + hp = hts_gethostbyname(opt,iadr2, &fullhostent_buffer); } else { // adresse normale (port par défaut par la suite) - hp = hts_gethostbyname(iadr, &fullhostent_buffer); + hp = hts_gethostbyname(opt,iadr, &fullhostent_buffer); } } else // port défini - hp = hts_gethostbyname(iadr, &fullhostent_buffer); + hp = hts_gethostbyname(opt,iadr, &fullhostent_buffer); // Conversion iadr -> adresse @@ -2101,13 +2107,15 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if DEBUG printf("erreur gethostbyname\n"); #endif - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to get server's address: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to get server's address: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to get server's address: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to get server's address: %s", strerror(last_errno)); #endif + } return INVALID_SOCKET; } // copie adresse @@ -2124,7 +2132,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HTS_WIDE_DEBUG DEBUG_W("socket\n"); #endif - soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (retour != NULL) { retour->debugid = HTS_STAT.stat_sockid++; } @@ -2132,29 +2140,33 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { DEBUG_W("socket()=%d\n" _ (int) soc); #endif if (soc==INVALID_SOCKET) { - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to create a socket: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to create a socket: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to create a socket: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to create a socket: %s", strerror(last_errno)); #endif + } return INVALID_SOCKET; // erreur création socket impossible } // bind this address - if (retour != NULL && retour->req.proxy.bindhost[0] != '\0') { + if (retour != NULL && retour->req.proxy.bindhost[0] != 0) { t_fullhostent bind_buffer; - hp = hts_gethostbyname(retour->req.proxy.bindhost, &bind_buffer); + hp = hts_gethostbyname(opt, retour->req.proxy.bindhost, &bind_buffer); if (hp == NULL || bind(soc, (struct sockaddr *)hp->h_addr_list[0], hp->h_length) != 0) { - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(last_errno)); #endif + } deletesoc(soc); return INVALID_SOCKET; } @@ -2169,7 +2181,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { // connexion non bloquante? if (!waitconnect ) { unsigned long p=1; // non bloquant -#if HTS_WIN +#ifdef _WIN32 ioctlsocket(soc,FIONBIO,&p); #else ioctl(soc,FIONBIO,&p); @@ -2185,7 +2197,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HTS_WIDE_DEBUG DEBUG_W("connect\n"); #endif -#if HTS_WIN +#ifdef _WIN32 if (connect(soc, (const struct sockaddr FAR *)&server, server_size) != 0) { #else if (connect(soc, (struct sockaddr *)&server, server_size) == -1) { @@ -2196,13 +2208,15 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HDEBUG printf("unable to connect!\n"); #endif - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to connect to the server: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to connect to the server: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to connect to the server: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to connect to the server: %s", strerror(last_errno)); #endif + } /* Close the socket and notify the error!!! */ deletesoc(soc); return INVALID_SOCKET; @@ -2236,7 +2250,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { // couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html // retour=-1 si erreur. // si file://... alors adresse=file:// (et coupe le ?query dans ce cas) -int ident_url_absolute(char* url,char* adr,char* fil) { +int ident_url_absolute(const char* url,char* adr,char* fil) { int pos=0; int scheme=0; @@ -2249,7 +2263,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // Scheme? { - char* a=url; + const char* a=url; while (isalpha((unsigned char)*a)) a++; if (*a == ':') @@ -2286,7 +2300,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { //## if (adr[0]!=lOCAL_CHAR) { // adresse normale http if (!strfield(adr,"file:")) { // PAS file:// - char *p,*q; + const char *p,*q; p=url+pos; // p pointe sur le début de l'adresse, ex: www.truc.fr/sommaire/index.html @@ -2312,7 +2326,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // simplifier url pour les ../ fil_simplifie(fil); } else { // localhost file:// - char *p; + const char *p; int i; char* a; @@ -2458,7 +2472,7 @@ HTS_INLINE void deletesoc(T_SOC soc) { #if HTS_WIDE_DEBUG DEBUG_W("close %d\n" _ (int) soc); #endif -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -2570,9 +2584,7 @@ void time_local_rfc822(char* s) { } /* convertir une chaine en temps */ -struct tm* convert_time_rfc822(char* s) { - struct tm* result; - /* */ +struct tm* convert_time_rfc822(struct tm *result, const char* s) { char months[]="jan feb mar apr may jun jul aug sep oct nov dec"; char str[256]; char* a; @@ -2584,7 +2596,6 @@ struct tm* convert_time_rfc822(char* s) { int result_n3=-1; int result_n4=-1; /* */ - NOSTATIC_RESERVE(result, struct tm, 1); if ((int) strlen(s) > 200) return NULL; @@ -2655,30 +2666,41 @@ struct tm* convert_time_rfc822(char* s) { return NULL; } -/* sets file time. -1 if error */ -int set_filetime(char* file,struct tm* tm_time) { - struct utimbuf tim; -#ifndef HTS_DO_NOT_USE_FTIME - struct timeb B; - B.timezone=0; - ftime( &B ); - tim.actime=tim.modtime=mktime(tm_time) - B.timezone*60; -#else - // bogus time (GMT/local).. - tim.actime=tim.modtime=mktime(tm_time); +static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */ + time_t t = mktime(tm); + if (t != (time_t) -1 && t != (time_t) 0) { + /* BSD does not have static "timezone" declared */ +#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) + time_t now = time(NULL); + time_t timezone = - localtime(&now)->tm_gmtoff; #endif - return utime(file,&tim); + return (time_t) (t - timezone); + } + return (time_t) -1; +} + +/* sets file time. -1 if error */ +int set_filetime(const char* file, struct tm* tm_time) { + time_t t = getGMT(tm_time); + if (t != (time_t) -1) { + struct utimbuf tim; + memset(&tim, 0, sizeof(tim)); + tim.actime = tim.modtime = t; + return utime(file, &tim); + } + return -1; } /* sets file time from RFC822 date+time, -1 if error*/ -int set_filetime_rfc822(char* file,char* date) { - struct tm* tm_s=convert_time_rfc822(date); +int set_filetime_rfc822(const char* file, const char* date) { + struct tm buffer; + struct tm* tm_s = convert_time_rfc822(&buffer, date); if (tm_s) { return set_filetime(file,tm_s); } else return -1; } -int get_filetime_rfc822(char* file,char* date) { +int get_filetime_rfc822(const char* file, char* date) { struct stat buf; date[0] = '\0'; if (stat(file, &buf) == 0) { @@ -2714,31 +2736,24 @@ HTS_INLINE void time_rfc822_local(char* s,struct tm * A) { } // conversion en b,Kb,Mb -HTSEXT_API char* int2bytes(LLint n) { - char** a=int2bytes2(n); - char* buff; - NOSTATIC_RESERVE(buff, char, 256); - - strcpybuff(buff,a[0]); - strcatbuff(buff,a[1]); - return concat(buff,""); +HTSEXT_API char* int2bytes(strc_int2bytes2* strc, LLint n) { + char** a = int2bytes2(strc, n); + strcpybuff(strc->catbuff, a[0]); + strcatbuff(strc->catbuff, a[1]); + return strc->catbuff; } // conversion en b/s,Kb/s,Mb/s -HTSEXT_API char* int2bytessec(long int n) { - char* buff; - char** a=int2bytes2(n); - NOSTATIC_RESERVE(buff, char, 256); - - strcpybuff(buff,a[0]); - strcatbuff(buff,a[1]); - return concat(buff,"/s"); +HTSEXT_API char* int2bytessec(strc_int2bytes2* strc, long int n) { + char buff[256]; + char** a = int2bytes2(strc, n); + strcpybuff(buff, a[0]); + strcatbuff(buff, a[1]); + return concat(strc->catbuff, buff, "/s"); } -HTSEXT_API char* int2char(int n) { - char* buffer; - NOSTATIC_RESERVE(buffer, char, 32); - sprintf(buffer,"%d",n); - return concat(buffer,""); +HTSEXT_API char* int2char(strc_int2bytes2* strc, int n) { + sprintf(strc->buff2, "%d", n); + return strc->buff2; } // conversion en b,Kb,Mb, nombre et type séparés @@ -2753,15 +2768,7 @@ HTSEXT_API char* int2char(int n) { #define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB) #define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB) #endif -typedef struct { - char buff1[256]; - char buff2[32]; - char* buffadr[2]; -} strc_int2bytes2; -HTSEXT_API char** int2bytes2(LLint n) { - strc_int2bytes2* strc; - NOSTATIC_RESERVE(strc, strc_int2bytes2, 1); - +HTSEXT_API char** int2bytes2(strc_int2bytes2* strc, LLint n) { if (n < ToLLintKiB) { sprintf(strc->buff1,"%d",(int)(LLint)n); strcpybuff(strc->buff2,"B"); @@ -2794,7 +2801,7 @@ HTSEXT_API char** int2bytes2(LLint n) { return strc->buffadr; } -#if HTS_WIN +#ifdef _WIN32 #else // ignore sigpipe? int sig_ignore_flag( int setflag ) { // flag ignore @@ -2806,10 +2813,10 @@ int sig_ignore_flag( int setflag ) { // flag ignore #endif // envoi de texte (en têtes généralement) sur la socket soc -HTS_INLINE int sendc(htsblk* r, char* s) { +HTS_INLINE int sendc(htsblk* r, const char* s) { int n, ssz = (int)strlen(s); -#if HTS_WIN +#ifdef _WIN32 #else sig_ignore_flag(1); #endif @@ -2824,7 +2831,7 @@ HTS_INLINE int sendc(htsblk* r, char* s) { #endif n = send(r->soc,s,ssz,0); -#if HTS_WIN +#ifdef _WIN32 #else sig_ignore_flag(0); #endif @@ -3079,7 +3086,7 @@ void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map) // 1 : oui // -1 : on sait pas // -2 : on sait pas, pas d'extension -int ishtml(const char* fil) { +int ishtml(httrackp *opt,const char* fil) { /* User-defined MIME types (overrides ishtml()) */ char BIGSTK fil_noquery[HTS_URLMAXSIZE*2]; char mime[256]; @@ -3088,7 +3095,7 @@ int ishtml(const char* fil) { if ((a = strchr(fil_noquery, '?')) != NULL) { *a = '\0'; } - if (get_userhttptype(0, mime, fil_noquery)) { + if (get_userhttptype(opt, mime, fil_noquery)) { if (strfield2(mime, "text/html")) { return 1; } else { @@ -3111,7 +3118,7 @@ int ishtml(const char* fil) { *b='\0'; ret = ishtml_ext(fil_noquery); // retour if (ret == -1) { - switch(is_knowntype(dotted)) { + switch(is_knowntype(opt,dotted)) { case 1: ret = 0; // connu, non html break; @@ -3174,33 +3181,33 @@ HTS_INLINE int ishttperror(int err) { // retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant // une identification -HTSEXT_API char* jump_identification(char* source) { - char *a,*trytofind; +HTSEXT_API char* jump_identification(const char* source) { + const char *a,*trytofind; if (strcmp(source, "file://") == 0) - return source; + return (char*) source; // rechercher dernier @ (car parfois email transmise dans adresse!) // mais sauter ftp:// éventuel a = jump_protocol(source); trytofind = strrchr_limit(a, '@', strchr(a,'/')); - return (trytofind != NULL)?trytofind:a; + return (char*) ( (trytofind != NULL) ? trytofind : a ); } -HTSEXT_API char* jump_normalized(char* source) { +HTSEXT_API char* jump_normalized(const char* source) { if (strcmp(source, "file://") == 0) - return source; + return (char*) source; source = jump_identification(source); if (strfield(source, "www") && source[3] != '\0') { if (source[3] == '.') { // www.foo.com -> foo.com source += 4; } else { // www-4.foo.com -> foo.com - char* a = source + 3; + const char* a = source + 3; while(*a && ( isdigit(*a) || *a == '-') ) a++; if (*a == '.') { source = a + 1; } } } - return source; + return (char*) source; } static int sortNormFnc(const void * a_, const void * b_) { @@ -3210,7 +3217,7 @@ static int sortNormFnc(const void * a_, const void * b_) { } -HTSEXT_API char* fil_normalized(char* source, char* dest) { +HTSEXT_API char* fil_normalized(const char* source, char* dest) { char lastc = 0; int gotquery=0; int ampargs=0; @@ -3280,7 +3287,7 @@ HTSEXT_API char* fil_normalized(char* source, char* dest) { } #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 ); -HTSEXT_API char* adr_normalized(char* source, char* dest) { +HTSEXT_API char* adr_normalized(const char* source, char* dest) { /* not yet too aggressive (no com<->net<->org checkings) */ strcpybuff(dest, jump_normalized(source)); return dest; @@ -3290,47 +3297,47 @@ HTSEXT_API char* adr_normalized(char* source, char* dest) { // find port (:80) or NULL if not found // can handle IPV6 addresses -HTSEXT_API char* jump_toport(char* source) { - char *a,*trytofind; +HTSEXT_API char* jump_toport(const char* source) { + const char *a,*trytofind; a = jump_identification(source); trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html) a = strchr( (trytofind)?trytofind:a, ':'); - return a; + return (char*)a; } // strrchr, but not too far -char* strrchr_limit(char* s, char c, char* limit) { +char* strrchr_limit(const char* s, char c, const char* limit) { if (limit == NULL) { - char* p = strrchr(s, c); - return p?(p+1):NULL; + const char* p = strrchr(s, c); + return (char*) ( p ? (p+1) : NULL ); } else { - char *a=NULL, *p; + const char *a = NULL, *p; for(;;) { - p=strchr((a)?a:s, c); + p = strchr( (a) ? a : s, c); if ((p >= limit) || (p == NULL)) - return a; + return (char*) a; a=p+1; } } } // strrchr, but not too far -char* strstr_limit(char* s, char* sub, char* limit) { +char* strstr_limit(const char* s, const char* sub, const char* limit) { if (limit == NULL) { return strstr(s, sub); } else { - char* pos = strstr(s, sub); + const char* pos = strstr(s, sub); if (pos != NULL) { - char* farpos = strstr(s, limit); + const char* farpos = strstr(s, limit); if (farpos == NULL || pos < farpos) - return pos; + return (char*) pos; } } return NULL; } // retourner adr sans ftp:// -HTS_INLINE char* jump_protocol(char* source) { +HTS_INLINE char* jump_protocol(const char* source) { int p; // scheme // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) @@ -3349,7 +3356,7 @@ HTS_INLINE char* jump_protocol(char* source) { // net_path if (strncmp(source,"//",2)==0) source+=2; - return source; + return (char*) source; } // codage base 64 a vers b @@ -3414,15 +3421,16 @@ HTSEXT_API void unescape_amp(char* s) { if (strcmpbeg(s, "&#") == 0) { int num=0; if ( (s[2] == 'x') || (s[2] == 'X')) { - if (sscanf(s+3, "%x", &num) == 1) { - c=(unsigned char)num; + if (sscanf(s+3, "%x", &num) == 1 && num <= 0xff) { + c=(unsigned char) num; } } else { - if (sscanf(s+2, "%d", &num) == 1) { - c=(unsigned char)num; + if (sscanf(s+2, "%d", &num) == 1 && num <= 0xff) { + c=(unsigned char) num; } } - } else if (strcmpbeg(s, " ")==0) + } + else if (strcmpbeg(s, " ")==0) c=32; // hack - c=160; else if (strcmpbeg(s, "¡")==0) c=161; @@ -3653,20 +3661,18 @@ static int ehexh(char c) { return 0; } -static int ehex(char* s) { +static int ehex(const char* s) { return 16*ehexh(*s)+ehexh(*(s+1)); } // remplacer %20 par ' ', | par : etc.. // buffer MAX 1Ko -HTSEXT_API char* unescape_http(char* s) { - char* tempo; +HTSEXT_API char* unescape_http(char *catbuff, const char* s) { int i,j=0; - NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2); for (i=0;i<(int) strlen(s);i++) { if (s[i]=='%') { i++; - tempo[j++]=(char) ehex(s+i); + catbuff[j++]=(char) ehex(s+i); i++; // sauter 2 caractères finalement } /* @@ -3676,18 +3682,16 @@ HTSEXT_API char* unescape_http(char* s) { } */ else - tempo[j++]=s[i]; + catbuff[j++]=s[i]; } - tempo[j++]='\0'; - return tempo; + catbuff[j++]='\0'; + return catbuff; } // unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI // DOES NOT DECODE %25 -HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { - char* tempo; +HTSEXT_API char* unescape_http_unharm(char *catbuff, const char* s, int no_high) { int i,j=0; - NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2); for (i=0;i<(int) strlen(s);i++) { if (s[i]=='%') { int nchar=(char) ehex(s+i+1); @@ -3705,10 +3709,10 @@ HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { ); if (!test) { - tempo[j++]=(char) ehex(s+i+1); + catbuff[j++]=(char) ehex(s+i+1); i+=2; } else { - tempo[j++]='%'; + catbuff[j++]='%'; } } /* @@ -3718,10 +3722,10 @@ HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { } */ else - tempo[j++]=s[i]; + catbuff[j++]=s[i]; } - tempo[j++]='\0'; - return tempo; + catbuff[j++]='\0'; + return catbuff; } // remplacer " par %xx etc.. @@ -3744,9 +3748,9 @@ HTSEXT_API void escape_check_url(char* s) { x_escape_http(s,0); } // same as escape_check_url, but returns char* -HTSEXT_API char* escape_check_url_addr(char* s) { +HTSEXT_API char* escape_check_url_addr(char *catbuff, const char* s) { char* adr; - escape_check_url(adr = concat(s,"")); + escape_check_url(adr = concat(catbuff, s, "")); return adr; } @@ -3854,55 +3858,52 @@ HTSEXT_API void escape_for_html_print_full(char* s, char* d) { // concat, concatène deux chaines et renvoi le résultat // permet d'alléger grandement le code // il faut savoir qu'on ne peut mettre plus de 16 concat() dans une expression -typedef struct { - char buff[16][HTS_URLMAXSIZE*2*2]; - int rol; -} concat_strc; -char* concat(const char* a,const char* b) { - concat_strc* strc; - NOSTATIC_RESERVE(strc, concat_strc, 1); - strc->rol=((strc->rol+1)%16); // roving pointer - strcpybuff(strc->buff[strc->rol],a); - if (b) strcatbuff(strc->buff[strc->rol],b); - return strc->buff[strc->rol]; +HTSEXT_API char* concat(char *catbuff,const char* a,const char* b) { + if (a != NULL && a[0] != '\0') { + strcpybuff(catbuff, a); + } else { + catbuff[0] = '\0'; + } + if (b != NULL && b[0] != '\0') { + strcatbuff(catbuff, b); + } + return catbuff; } // conversion fichier / -> antislash +static char* __fconv(char* a) { #if HTS_DOSNAME -char* __fconv(char* a) { int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='/') // convertir - a[i]='\\'; + for(i = 0 ; a[i] != 0 ; i++) + if (a[i] == '/') // Unix-to-DOS style + a[i] = '\\'; +#endif return a; } -char* fconcat(char* a,char* b) { - return __fconv(concat(a,b)); + +HTSEXT_API char* fconcat(char *catbuff, const char* a, const char* b) { + return __fconv(concat(catbuff,a,b)); } -char* fconv(char* a) { - return __fconv(concat(a,"")); +HTSEXT_API char* fconv(char *catbuff, const char* a) { + return __fconv(concat(catbuff,a,"")); } -#endif /* / et \\ en / */ -char* __fslash(char* a) { +static char* __fslash(char* a) { int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='\\') // convertir - a[i]='/'; + for(i = 0 ; a[i] != 0 ; i++) + if (a[i] == '\\') // convertir + a[i] = '/'; return a; } -char* fslash(char* a) { - return __fslash(concat(a,"")); +char* fslash(char *catbuff, const char* a) { + return __fslash(concat(catbuff,a,NULL)); } // conversion minuscules, avec buffer -char* convtolower(char* a) { - concat_strc* strc; - NOSTATIC_RESERVE(strc, concat_strc, 1); - strc->rol=((strc->rol+1)%16); // roving pointer - strcpybuff(strc->buff[strc->rol],a); - hts_lowcase(strc->buff[strc->rol]); // lower case - return strc->buff[strc->rol]; +char* convtolower(char *catbuff, const char* a) { + strcpybuff(catbuff,a); + hts_lowcase(catbuff); // lower case + return catbuff; } // conversion en minuscules @@ -3957,18 +3958,18 @@ HTS_INLINE int is_realspace(char c) { // deviner type d'un fichier local.. // ex: fil="toto.gif" -> s="image/gif" -void guess_httptype(char *s,const char *fil) { - get_httptype(s, fil, 1); +void guess_httptype(httrackp *opt,char *s,const char *fil) { + get_httptype(opt,s, fil, 1); } // idem // flag: 1 si toujours renvoyer un type -void get_httptype(char *s,const char *fil,int flag) { +HTSEXT_API void get_httptype(httrackp *opt,char *s,const char *fil,int flag) { // userdef overrides get_httptype - if (get_userhttptype(0, s, fil)) { + if (get_userhttptype(opt, s, fil)) { return ; } // regular tests - if (ishtml(fil) == 1) { + if (ishtml(opt,fil) == 1) { strcpybuff(s,"text/html"); } else { /* Check html -> text/html */ @@ -3999,26 +4000,21 @@ void get_httptype(char *s,const char *fil,int flag) { // get type of fil (php) // s: buffer (text/html) or NULL // return: 1 if known by user -int get_userhttptype(int setdefs, char *s, const char *fil) { - char** buffer=NULL; - NOSTATIC_RESERVE(buffer, char*, 1); - if (setdefs) { - *buffer=s; - return 1; - } else { +int get_userhttptype(httrackp *opt, char *s, const char *fil) { + if (s != NULL) { if (s) s[0]='\0'; if (fil == NULL || *fil == '\0') return 0; #if 1 - if (*buffer) { + if (StringLength(opt->mimedefs) > 0) { /* Check --assume foooo/foo/bar.cgi=text/html, then foo/bar.cgi=text/html, then bar.cgi=text/html */ /* also: --assume baz,bar,foooo/foo/bar.cgi=text/html */ /* start from path begining */ do { - char* next; - char* mimedefs = *buffer; /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */ + const char* next; + const char* mimedefs = StringBuff(opt->mimedefs); /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */ while(*mimedefs != '\0') { const char* segment = fil + 1; if (*mimedefs == '\n') { @@ -4139,12 +4135,13 @@ void give_mimext(char *s,const char *st) { // 0 : non // 1 : oui // 2 : html -int is_knowntype(const char *fil) { +HTSEXT_API int is_knowntype(httrackp *opt,const char *fil) { + char catbuff[CATBUFF_SIZE]; const char *ext; int j=0; if (!fil) return 0; - ext = get_ext(fil); + ext = get_ext(catbuff, fil); while(strnotempty(hts_mime[j][1])) { if (strfield2(hts_mime[j][1], ext)) { if (strfield2(hts_mime[j][0], "text/html")) @@ -4156,16 +4153,15 @@ int is_knowntype(const char *fil) { } // Known by user? - return (is_userknowntype(fil)); + return (is_userknowntype(opt,fil)); } // extension : html,gif.. -char* get_ext(const char *fil) { - char* fil_noquery; +HTSEXT_API char* get_ext(char *catbuff, const char *fil) { const char *a=fil+strlen(fil)-1; - NOSTATIC_RESERVE(fil_noquery, char, HTS_URLMAXSIZE*2); while ( (*a!='.') && (*a!='/') && (a>fil)) a--; if (*a=='.') { + char fil_noquery[HTS_URLMAXSIZE*2]; char* b; fil_noquery[0]='\0'; a++; // pointer sur extension @@ -4173,7 +4169,7 @@ char* get_ext(const char *fil) { b=strchr(fil_noquery,'?'); if (b) *b='\0'; - return concat(fil_noquery,""); + return concat(catbuff,fil_noquery,""); } else return ""; @@ -4184,14 +4180,14 @@ char* get_ext(const char *fil) { // 2 : html // setdefs : set mime buffer: // file=(char*) "asp=text/html\nphp=text/html\n" -int is_userknowntype(const char *fil) { +HTSEXT_API int is_userknowntype(httrackp *opt,const char *fil) { char BIGSTK mime[1024]; if (!fil) return 0; if (!strnotempty(fil)) return 0; mime[0]='\0'; - get_userhttptype(0,mime,fil); + get_userhttptype(opt, mime, fil); if (!strnotempty(mime)) return 0; else if (strfield2(mime,"text/html")) @@ -4202,7 +4198,7 @@ int is_userknowntype(const char *fil) { // page dynamique? // is_dyntype(get_ext("foo.asp")) -int is_dyntype(const char *fil) { +HTSEXT_API int is_dyntype(const char *fil) { int j=0; if (!fil) return 0; @@ -4219,10 +4215,10 @@ int is_dyntype(const char *fil) { // types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne // connaissent pas le type -int may_unknown(const char* st) { +int may_unknown(httrackp *opt,const char* st) { int j=0; // types média - if (may_be_hypertext_mime(st, "")) { + if (may_be_hypertext_mime(opt,st, "")) { return 1; } while(strnotempty(hts_mime_keep[j])) { @@ -4258,10 +4254,11 @@ void fprintfio(FILE* fp,char* buff,char* prefix) { } /* Le fichier existe-t-il? (ou est-il accessible?) */ -int fexist(char* s) { +int fexist(const char* s) { + char catbuff[CATBUFF_SIZE]; struct stat st; memset(&st, 0, sizeof(st)); - if (stat(s, &st) == 0) { + if (stat(fconv(catbuff,s), &st) == 0) { if (S_ISREG(st.st_mode)) { return 1; } @@ -4272,13 +4269,14 @@ int fexist(char* s) { /* Taille d'un fichier, -1 si n'existe pas */ /* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */ /* Note: NOT YET READY FOR 64-bit */ -INTsys fsize(char* s) { - FILE* fp; +off_t fsize(const char* s) { + char catbuff[CATBUFF_SIZE]; + FILE* fp; if (strnotempty(s)==0) // nom vide: erreur return -1; - fp=fopen(fconv(s),"rb"); + fp=fopen(fconv(catbuff,s),"rb"); if (fp!=NULL) { - INTsys i; + off_t i; fseek(fp,0,SEEK_END); #ifdef HTS_FSEEKO i=ftello(fp); @@ -4287,11 +4285,12 @@ INTsys fsize(char* s) { #endif fclose(fp); return i; - } else return -1; + } else + return -1; } -INTsys fpsize(FILE* fp) { - INTsys oldpos,size; +off_t fpsize(FILE* fp) { + off_t oldpos,size; if (!fp) return -1; #ifdef HTS_FSEEKO @@ -4317,7 +4316,6 @@ typedef struct { } hts_rootdir_strc; HTSEXT_API char* hts_rootdir(char* file) { static hts_rootdir_strc strc = {"", 0}; - //NOSTATIC_RESERVE(strc, hts_rootdir_strc, 1); if (file) { if (!strc.init) { strc.path[0]='\0'; @@ -4470,10 +4468,12 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { #if HTS_DNSCACHE // 'capsule' contenant uniquement le cache -t_dnscache* _hts_cache(void) { - t_dnscache* cache; - NOSTATIC_RESERVE(cache, t_dnscache, 1); - return cache; +t_dnscache* _hts_cache(httrackp *opt) { + if (opt->state.dns_cache == NULL) { + opt->state.dns_cache = (t_dnscache*)malloct(sizeof(t_dnscache)); + memset(opt->state.dns_cache, 0, sizeof(t_dnscache)); + } + return opt->state.dns_cache; } // free the cache static void hts_cache_free_(t_dnscache* cache) { @@ -4485,10 +4485,10 @@ static void hts_cache_free_(t_dnscache* cache) { } } void hts_cache_free(t_dnscache* cache) { - if (cache != NULL && cache->n != NULL) { - hts_cache_free_(cache->n); - cache->n = NULL; - } + if (cache != NULL && cache->n != NULL) { + hts_cache_free_(cache->n); + cache->n = NULL; + } } // lock le cache dns pour tout opération d'ajout @@ -4496,104 +4496,86 @@ void hts_cache_free(t_dnscache* cache) { // -1: status? 0: libérer 1:locker /* - Simple lock function for cache - - Return value: always 0 - Parameter: - 1 wait for lock (mutex) available and lock it - 0 unlock the mutex - [-1 check if locked (always return 0 with mutex)] - -999 initialize + Simple lock for cache */ -#if USE_BEGINTHREAD -int _hts_lockdns(int i) { - static PTHREAD_LOCK_TYPE hMutex; - return htsSetLock(&hMutex,i); -} -#else -int _hts_lockdns(int i) { - int l=0; - if (i>=0) - l=i; - return l; -} -#endif +htsmutex dns_lock = HTSMUTEX_INIT; // routine pour le cache - retour optionnel à donner à chaque fois // NULL: nom non encore testé dans le cache // si h_length==0 alors le nom n'existe pas dans le dns -t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour) { - // attendre que le cache dns soit prêt - //while(_hts_lockdns(-1)); // attendre libération - _hts_lockdns(1); // locker - - while(1) { - if (strcmp(cache->iadr,iadr)==0) { // ok trouvé - if (cache->host_length>0) { // entrée valide +t_hostent* _hts_ghbn(t_dnscache* cache,const char* iadr,t_hostent* retour) { + t_hostent* ret = NULL; + hts_mutexlock(&dns_lock); + for(;;) { + if (strcmp(cache->iadr,iadr) == 0) { // ok trouvé + if (cache->host_length > 0) { // entrée valide if (retour->h_addr_list[0]) memcpy(retour->h_addr_list[0], cache->host_addr, cache->host_length); retour->h_length=cache->host_length; - } else if (cache->host_length==0) { // en cours - _hts_lockdns(0); // délocker - return NULL; + } else if (cache->host_length == 0) { // en cours + ret = NULL; + break; } else { // erreur dans le dns, déja vérifié if (retour->h_addr_list[0]) retour->h_addr_list[0][0]='\0'; retour->h_length=0; // erreur, n'existe pas } - _hts_lockdns(0); // délocker - return retour; + ret = retour; + break; } else { // on a pas encore trouvé if (cache->n!=NULL) { // chercher encore - cache=cache->n; // suivant! + cache = cache->n; // suivant! } else { - _hts_lockdns(0); // délocker - return NULL; // non présent + ret = NULL; + break; } } } + hts_mutexrelease(&dns_lock); + return ret; } // tester si iadr a déja été testé (ou en cours de test) // 0 non encore // 1 ok // 2 non présent -int hts_dnstest(char* _iadr) { - char* iadr; - t_dnscache* cache=_hts_cache(); // adresse du cache - NOSTATIC_RESERVE(iadr, char, HTS_URLMAXSIZE*2); +int hts_dnstest(httrackp *opt, const char* _iadr) { + int ret = 0; + t_dnscache* cache=_hts_cache(opt); // adresse du cache + char iadr[HTS_URLMAXSIZE*2]; // sauter user:pass@ éventuel - strcpybuff(iadr,jump_identification(_iadr)); + strcpybuff(iadr, jump_identification(_iadr)); // couper éventuel : { char *a; - if ( (a=jump_toport(iadr)) ) + if ( (a = jump_toport(iadr)) ) *a='\0'; } -#if HTS_WIN +#ifdef _WIN32 if (inet_addr(iadr)!=INADDR_NONE) // numérique #else if (inet_addr(iadr)!=(in_addr_t) -1 ) // numérique #endif return 1; - // while(_hts_lockdns(-1)); // attendre libération - _hts_lockdns(1); // locker - while(1) { - if (strcmp(cache->iadr,iadr)==0) { // ok trouvé - _hts_lockdns(0); // délocker - return 1; // présent! + hts_mutexlock(&dns_lock); + for(;;) { + if (strcmp(cache->iadr, iadr)==0) { // ok trouvé + ret = 1; + break; } else { // on a pas encore trouvé if (cache->n!=NULL) { // chercher encore cache=cache->n; // suivant! } else { - _hts_lockdns(0); // délocker - return 2; // non présent + ret = 2; // non présent + break ; } } } + hts_mutexrelease(&dns_lock); + return ret; } @@ -4655,7 +4637,7 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { if (res) { if ( (res->ai_addr) && (res->ai_addrlen) && (res->ai_addrlen <= buffer->addr_maxlen) ) { memcpy(buffer->hp.h_addr_list[0], res->ai_addr, res->ai_addrlen); - buffer->hp.h_length = res->ai_addrlen; + buffer->hp.h_length = (short) res->ai_addrlen; freeaddrinfo(res); return &(buffer->hp); } @@ -4671,10 +4653,10 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { } // cache dns interne à HTS // ** FREE A FAIRE sur la chaine -t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { +t_hostent* hts_gethostbyname(httrackp *opt,const char* _iadr, void* v_buffer) { char BIGSTK iadr[HTS_URLMAXSIZE*2]; t_fullhostent* buffer = (t_fullhostent*) v_buffer; - t_dnscache* cache=_hts_cache(); // adresse du cache + t_dnscache* cache=_hts_cache(opt); // adresse du cache t_hostent* hp; /* Clear */ @@ -4718,7 +4700,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { #endif { unsigned long inetaddr; -#if HTS_WIN +#ifdef _WIN32 if ((inetaddr=inet_addr(iadr))==INADDR_NONE) { #else if ((inetaddr=inet_addr(iadr))==(in_addr_t) -1 ) { @@ -4755,7 +4737,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { } #else -HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) { +HTS_INLINE t_hostent* hts_gethostbyname(httrackp *opt,char* iadr, t_fullhostent* buffer) { t_hostent* retour; #if HTS_WIDE_DEBUG DEBUG_W("gethostbyname (2)\n"); @@ -4778,7 +4760,7 @@ HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) { #define htsLocker(A, N) do {} while(0) static mlink trmalloc = {NULL,0,0,NULL}; static int trmalloc_id=0; -static PTHREAD_LOCK_TYPE* mallocMutex = NULL; +static htsmutex* mallocMutex = NULL; static void hts_meminit(void) { //if (mallocMutex == NULL) { // mallocMutex = calloc(sizeof(*mallocMutex), 1); @@ -4966,30 +4948,32 @@ void cut_path(char* fullpath,char* path,char* pname) { // -- Gestion protocole ftp -- -#if HTS_WIN +#ifdef _WIN32 int ftp_available(void) { return 1; } #else int ftp_available(void) { return 1; // ok! - //return 0; // SOUS UNIX, PROBLEMES + //return 0; // SOUS UNIX, PROBLEMESs } #endif int hts_dgb_init = 0; FILE* hts_dgb_init_fp = NULL; -static void hts_dgb(char* msg); HTSEXT_API void hts_debug(int level) { hts_dgb_init = level; if (hts_dgb_init > 0) { - hts_dgb("hts_debug() called"); + HTS_DBG("hts_debug() called"); } } -static void hts_dgb(char* msg) { - if (hts_dgb_init > 0) { - if (hts_dgb_init_fp == NULL) { + +FILE *hts_dgb_(void) { + if (hts_dgb_init_fp == NULL) { + if ((hts_dgb_init & 0x80) == 0) { + hts_dgb_init_fp = stderr; + } else { #ifdef _WIN32_WCE hts_dgb_init_fp = fopen("\\Temp\\hts-debug.txt", "wb"); #else @@ -4999,17 +4983,28 @@ static void hts_dgb(char* msg) { fprintf(hts_dgb_init_fp, "* Creating file\r\n"); } } - if (hts_dgb_init_fp != NULL) { - fprintf(hts_dgb_init_fp, "%s\r\n", msg); - fflush(hts_dgb_init_fp); - } } + return hts_dgb_init_fp; } +static int hts_init_ok = 0; HTSEXT_API int hts_init(void) { - static int hts_init_ok = 0; + const char *dbg_env; + /* */ + if (hts_init_ok) + return 1; + hts_init_ok = 1; + + /* enable debugging ? */ + dbg_env = getenv("HTS_LOG"); + if (dbg_env != NULL && *dbg_env != 0) { + int level = 0; + if (sscanf(dbg_env, "%d", &level) == 1) { + hts_debug(level); + } + } - hts_dgb("entering hts_init()"); /* debug */ + HTS_DBG("entering hts_init()"); /* debug */ #ifdef _WIN32_WCE #ifndef HTS_CECOMPAT @@ -5017,19 +5012,17 @@ HTSEXT_API int hts_init(void) { #endif #endif - /* Init threads */ - if (!hts_init_ok) { - htsthread_init(); - } + /* Init threads (lazy init) */ + htsthread_init(); /* Ensure external modules are loaded */ - hts_dgb("calling htspe_init()"); /* debug */ - htspe_init(); + HTS_DBG("calling htspe_init()"); /* debug */ + htspe_init(); /* module load (lazy) */ /* MD5 Auto-test */ { char digest[32 + 2]; - unsigned char* atest = (unsigned char*)"MD5 Checksum Autotest"; + const unsigned char* atest = (const unsigned char*)"MD5 Checksum Autotest"; digest[0] = '\0'; domd5mem(atest, strlen(atest), digest, 1); /* a42ec44369da07ace5ec1d660ba4a69a */ if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) { @@ -5038,36 +5031,7 @@ HTSEXT_API int hts_init(void) { } } - hts_dgb("initializing default wrappers"); /* debug */ - if (!hts_init_ok) { - hts_init_ok = 1; - // default wrappers - htswrap_init(); - htswrap_add("init",htsdefault_init); - htswrap_add("free",htsdefault_uninit); - htswrap_add("start",htsdefault_start); - htswrap_add("change-options",htsdefault_chopt); - htswrap_add("end",htsdefault_end); - htswrap_add("preprocess-html",htsdefault_preprocesshtml); - htswrap_add("postprocess-html",htsdefault_postprocesshtml); - htswrap_add("check-html",htsdefault_checkhtml); - htswrap_add("loop",htsdefault_loop); - htswrap_add("query",htsdefault_query); - htswrap_add("query2",htsdefault_query2); - htswrap_add("query3",htsdefault_query3); - htswrap_add("check-link",htsdefault_check); - htswrap_add("pause",htsdefault_pause); - htswrap_add("save-file",htsdefault_filesave); - htswrap_add("save-file2",htsdefault_filesave2); - htswrap_add("link-detected",htsdefault_linkdetected); - htswrap_add("link-detected2",htsdefault_linkdetected2); - htswrap_add("transfer-status",htsdefault_xfrstatus); - htswrap_add("save-name",htsdefault_savename); - htswrap_add("send-header",htsdefault_sendheader); - htswrap_add("receive-header",htsdefault_receiveheader); - } - - hts_dgb("initializing SSL"); /* debug */ + HTS_DBG("initializing SSL"); /* debug */ #if HTS_USEOPENSSL /* Initialize the OpensSSL library @@ -5088,96 +5052,522 @@ HTSEXT_API int hts_init(void) { } #endif - /* Init vars and thread-specific values */ - hts_dgb("initializing variables"); /* debug */ - hts_initvar(); - - /* initialiser structcheck */ - // structcheck_init(1); - - hts_dgb("ending hts_init()"); /* debug */ + HTS_DBG("ending hts_init()"); /* debug */ return 1; } + +/* will not free thread env. */ HTSEXT_API int hts_uninit(void) { - //htsthread_uninit(); - hts_cache_free(_hts_cache()); - hts_freevar(); - /* htswrap_free(); */ + /* hts_init() is a lazy initializer, with limited a allocation (one or two mutexes) ; + we won't free anything here as the .h semantic was never being very clear */ return 1; } +HTSEXT_API int hts_uninit_module(void) { + if (!hts_init_ok) + return 1; + htsthread_uninit(); + htspe_uninit(); + hts_init_ok = 0; + return 1; +} + +HTSEXT_API int hts_log(httrackp *opt, const char* prefix, const char *msg) { + if (opt->log != NULL) { + fspc(opt, opt->log, prefix); + fprintf(opt->log, "%s"LF, msg); + return 0; + } + return 1; /* Error */ +} + +HTSEXT_API void set_wrappers(httrackp *opt) { // LEGACY +} + +HTSEXT_API int plug_wrapper(httrackp *opt, const char *moduleName, const char* argv) { + void* handle = openFunctionLib(moduleName); + if (handle != NULL) { + t_hts_plug plug = (t_hts_plug) getFunctionPtr(handle, "hts_plug"); + t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(handle, "hts_unplug"); + if (plug != NULL) { + int ret = plug(opt, argv); + if (hts_dgb_init > 0 && opt->log != NULL) { + HTS_DBG("plugged module '%s' (return code=%d)" _ moduleName _ ret); + } + if (ret == 1) { /* Success! */ + opt->libHandles.handles = (htslibhandle*) realloct(opt->libHandles.handles, ( opt->libHandles.count + 1 )*sizeof(htslibhandle)); + opt->libHandles.handles[opt->libHandles.count].handle = handle; + opt->libHandles.handles[opt->libHandles.count].moduleName = strdupt(moduleName); + opt->libHandles.count++; + return 1; + } else { + HTS_DBG("* note: error while running entry point 'hts_plug' in %s"LF _ moduleName); + if (unplug) + unplug(opt); + } + } else { + int last_errno = errno; + HTS_DBG("* note: can't find entry point 'hts_plug' in %s: %s"LF _ moduleName _ strerror(last_errno)); + } + closeFunctionLib(handle); + return 0; + } else { + int last_errno = errno; + HTS_DBG("* note: can't load %s: %s"LF _ moduleName _ strerror(last_errno)); + } + return -1; +} + +static void unplug_wrappers(httrackp *opt) { + if (opt->libHandles.handles != NULL) { + int i; + for(i = 0 ; i < opt->libHandles.count ; i++) { + if (opt->libHandles.handles[i].handle != NULL) { + /* hts_unplug(), the dll exit point (finalizer) */ + t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(opt->libHandles.handles[i].handle, "hts_unplug"); + if (unplug != NULL) + unplug(opt); + closeFunctionLib(opt->libHandles.handles[i].handle); + opt->libHandles.handles[i].handle = NULL; + } + if (opt->libHandles.handles[i].moduleName != NULL) { + freet(opt->libHandles.handles[i].moduleName); + opt->libHandles.handles[i].moduleName = NULL; + } + } + freet(opt->libHandles.handles); + opt->libHandles.handles = NULL; + opt->libHandles.count = 0; + } +} + +int multipleStringMatch(const char *s, const char *match) { + int ret = 0; + String name = STRING_EMPTY; + if (match == NULL || s == NULL || *s == 0) + return 0; + for( ; *match != 0 ; match++) { + StringClear(name); + for( ; *match != 0 && *match != '\n' ; match++) { + StringAddchar(name, *match); + } + if (StringLength(name) > 0 && strstr(s, StringBuff(name)) != NULL) { + ret = 1; + break ; + } + } + StringFree(name); + return ret; +} + +HTSEXT_API httrackp *hts_create_opt(void) { +#ifdef _WIN32 + static const char *defaultModules[] = { + "htsswf", "htsjava", "httrack-plugin", NULL + }; +#else + static const char *defaultModules[] = { + "libhtsswf.so.1", "libhtsjava.so.2", "httrack-plugin", NULL + }; +#endif + httrackp *opt = malloc(sizeof(httrackp)); + + /* default options */ + memset(opt, 0, sizeof(httrackp)); + opt->size_httrackp = sizeof(httrackp); + + /* mutexes */ + hts_mutexinit(&opt->state.lock); + + /* custom wrappers */ + opt->libHandles.count = 0; + + /* default settings */ + + opt->wizard=2; // wizard automatique + opt->quiet=0; // questions + // + opt->travel=0; // même adresse + opt->depth=9999; // mirror total par défaut + opt->extdepth=0; // mais pas à l'extérieur + opt->seeker=1; // down + opt->urlmode=2; // relatif par défaut + opt->debug=0; // pas de débug en plus + opt->getmode=3; // linear scan + opt->maxsite=-1; // taille max site (aucune) + opt->maxfile_nonhtml=-1; // taille max fichier non html + opt->maxfile_html=-1; // idem pour html + opt->maxsoc=4; // nbre socket max + opt->fragment=-1; // pas de fragmentation + opt->nearlink=0; // ne pas prendre les liens non-html "adjacents" + opt->makeindex=1; // faire un index + opt->kindex=0; // index 'keyword' + opt->delete_old=1; // effacer anciens fichiers + opt->makestat=0; // pas de fichier de stats + opt->maketrack=0; // ni de tracking + opt->timeout=120; // timeout par défaut (2 minutes) + opt->cache=1; // cache prioritaire + opt->shell=0; // pas de shell par defaut + opt->proxy.active=0; // pas de proxy + opt->user_agent_send=1; // envoyer un user-agent + StringCopy(opt->user_agent, "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); + StringCopy(opt->referer, ""); + StringCopy(opt->from, ""); + opt->savename_83=0; // noms longs par défaut + opt->savename_type=0; // avec structure originale + opt->savename_delayed=2;// hard delayed type (default) + opt->delayed_cached=1; // cached delayed type (default) + opt->mimehtml=0; // pas MIME-html + opt->parsejava=HTSPARSE_DEFAULT; // parser classes + opt->hostcontrol=0; // PAS de control host pour timeout et traffic jammer + opt->retry=2; // 2 retry par défaut + opt->errpage=1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.) + opt->check_type=1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html + opt->all_in_cache=0; // ne pas tout stocker en cache + opt->robots=2; // traiter les robots.txt + opt->external=0; // liens externes normaux + opt->passprivacy=0; // mots de passe dans les fichiers + opt->includequery=1; // include query-string par défaut + opt->mirror_first_page=0; // pas mode mirror links + opt->accept_cookie=1; // gérer les cookies + opt->cookie=NULL; + opt->http10=0; // laisser http/1.1 + opt->nokeepalive = 0; // pas keep-alive + opt->nocompression=0; // pas de compression + opt->tolerant=0; // ne pas accepter content-length incorrect + opt->parseall=1; // tout parser (tags inconnus, par exemple) + opt->parsedebug=0; // pas de mode débuggage + opt->norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur + opt->verbosedisplay=0; // pas d'animation texte + opt->sizehack=0; // size hack + opt->urlhack=1; // url hack (normalizer) + StringCopy(opt->footer,HTS_DEFAULT_FOOTER); + opt->ftp_proxy=1; // proxy http pour ftp + StringCopy(opt->filelist,""); + StringCopy(opt->lang_iso,"en, *"); + StringCopy(opt->mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) + StringClear(opt->mod_blacklist); + // + opt->log = stdout; + opt->errlog = stderr; + opt->flush = 1; // flush sur les fichiers log + //opt->aff_progress=0; + opt->keyboard=0; + // + StringCopy(opt->path_html,""); + StringCopy(opt->path_log,""); + StringCopy(opt->path_bin,""); + // +#if HTS_SPARE_MEMORY==0 + opt->maxlink=100000; // 100,000 liens max par défaut (400Kb) + opt->maxfilter=200; // 200 filtres max par défaut +#else + opt->maxlink=10000; // 10,000 liens max par défaut (40Kb) + opt->maxfilter=50; // 50 filtres max par défaut +#endif + opt->maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT -- + //opt->maxcache_anticipate=256; // maximum de liens à anticiper + opt->maxtime=-1; // temps max en secondes +#if HTS_USEMMS + opt->mms_maxtime = 60*3600; // max time for mms streams (one hour) +#endif + opt->maxrate=25000; // taux maxi + opt->maxconn=5.0; // nombre connexions/s + opt->waittime=-1; // wait until.. hh*3600+mm*60+ss + // + opt->exec=""; + opt->is_update=0; // not an update (yet) + opt->dir_topindex=0; // do not built top index (yet) + // + opt->bypass_limits=0; // enforce limits by default + opt->state.stop=0; // stopper + opt->state.exit_xh=0; // abort + + /* Alocated buffers */ + + opt->callbacks_fun = (t_hts_htmlcheck_callbacks*) malloct(sizeof(t_hts_htmlcheck_callbacks)); + memset(opt->callbacks_fun, 0, sizeof(t_hts_htmlcheck_callbacks)); + + /* Preload callbacks : java and flash parser, and the automatic user-defined callback */ + + { + int i; + for(i = 0 ; defaultModules[i] != NULL ; i++) { + int ret = plug_wrapper(opt, defaultModules[i], defaultModules[i]); + if (ret == 0) { /* Module aborted initialization */ + /* Ignored. */ + } + } + } + + return opt; +} + +HTSEXT_API void hts_free_opt(httrackp *opt) { + if (opt != NULL) { + + /* Alocated callbacks */ + + if (opt->callbacks_fun != NULL) { + int i; + t_hts_htmlcheck_callbacks_item *items = (t_hts_htmlcheck_callbacks_item*) opt->callbacks_fun; + const int size = (int) sizeof(t_hts_htmlcheck_callbacks) / sizeof(t_hts_htmlcheck_callbacks_item); + assertf(sizeof(t_hts_htmlcheck_callbacks_item)*size == sizeof(t_hts_htmlcheck_callbacks)); + + /* Free all linked lists */ + for(i = 0 ; i < size ; i++) { + t_hts_callbackarg *carg, *next_carg; + for(carg = items[i].carg ; carg != NULL && (next_carg = carg->prev.carg, carg != NULL) ; carg = next_carg ) { + hts_free(carg); + } + } + + freet(opt->callbacks_fun); + opt->callbacks_fun = NULL; + } + + /* Close library handles */ + unplug_wrappers(opt); + + /* Cache */ + if (opt->state.dns_cache != NULL) { + hts_cache_free(opt->state.dns_cache); + opt->state.dns_cache = NULL; + } + + /* Cancel chain */ + if (opt->state.cancel != NULL) { + htsoptstatecancel *cancel; + for(cancel = opt->state.cancel ; cancel != NULL ; ) { + htsoptstatecancel *next = cancel->next; + if (cancel->url != NULL) { + freet(cancel->url); + } + freet(cancel); + cancel = next; + } + opt->state.cancel = NULL; + } + + /* Free strings */ + + StringFree(opt->proxy.name); + StringFree(opt->proxy.bindhost); + + StringFree(opt->savename_userdef); + StringFree(opt->user_agent); + StringFree(opt->referer); + StringFree(opt->from); + StringFree(opt->lang_iso); + StringFree(opt->sys_com); + StringFree(opt->mimedefs); + StringFree(opt->filelist); + StringFree(opt->urllist); + StringFree(opt->footer); + StringFree(opt->mod_blacklist); + + StringFree(opt->path_html); + StringFree(opt->path_log); + StringFree(opt->path_bin); + + /* mutexes */ + hts_mutexfree(&opt->state.lock); + + /* Free structure */ + free(opt); + } +} + // defaut wrappers -void __cdecl htsdefault_init(void) { +static void __cdecl htsdefault_init(t_hts_callbackarg *carg) { } -void __cdecl htsdefault_uninit(void) { - hts_freevar(); +static void __cdecl htsdefault_uninit(t_hts_callbackarg *carg) { + // hts_freevar(); } -int __cdecl htsdefault_start(void* opt) { +static int __cdecl htsdefault_start(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsdefault_chopt(void* opt) { +static int __cdecl htsdefault_chopt(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsdefault_end(void) { +static int __cdecl htsdefault_end(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { +static int __cdecl htsdefault_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_adresse,const char* url_fichier) { return 1; } -int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { +static int __cdecl htsdefault_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_adresse,const char* url_fichier) { return 1; } -int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { +static int __cdecl htsdefault_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_adresse,const char* url_fichier) { return 1; } -int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack +static int __cdecl htsdefault_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack return 1; } -char* __cdecl htsdefault_query(char* question) { +static const char* __cdecl htsdefault_query(t_hts_callbackarg *carg, httrackp *opt, const char* question) { return ""; } -char* __cdecl htsdefault_query2(char* question) { +static const char* __cdecl htsdefault_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question) { return ""; } -char* __cdecl htsdefault_query3(char* question) { +static const char* __cdecl htsdefault_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question) { return ""; } -int __cdecl htsdefault_check(char* adr,char* fil,int status) { +static int __cdecl htsdefault_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status) { return -1; } -int __cdecl htsdefault_check_mime(char* adr,char* fil,char* mime,int status) { +static int __cdecl htsdefault_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status) { return -1; } -void __cdecl htsdefault_pause(char* lockfile) { +static void __cdecl htsdefault_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile) { while (fexist(lockfile)) { Sleep(1000); } } -void __cdecl htsdefault_filesave(char* file) { +static void __cdecl htsdefault_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file) { } -void __cdecl htsdefault_filesave2(char* adr, char* file, char* sav, int is_new, int is_modified, int not_updated) { +static void __cdecl htsdefault_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* file, const char* sav, int is_new, int is_modified, int not_updated) { } -int __cdecl htsdefault_linkdetected(char* link) { +static int __cdecl htsdefault_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link) { return 1; } -int __cdecl htsdefault_linkdetected2(char* link, char* start_tag) { +static int __cdecl htsdefault_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag) { return 1; } -int __cdecl htsdefault_xfrstatus(void* back) { +static int __cdecl htsdefault_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, lien_back* back) { return 1; } -int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { +static int __cdecl htsdefault_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save) { return 1; } -int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { +static int __cdecl htsdefault_sendhead(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing) { return 1; } -int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { +static int __cdecl htsdefault_receivehead(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming) { return 1; } +static int __cdecl htsdefault_detect(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str) { + return 0; +} +static int __cdecl htsdefault_parse(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str) { + return 0; +} + + +/* Default internal dummy callbacks */ +const t_hts_htmlcheck_callbacks default_callbacks = { + { htsdefault_init, NULL }, + { htsdefault_uninit, NULL }, + { htsdefault_start, NULL }, + { htsdefault_end, NULL }, + { htsdefault_chopt, NULL }, + { htsdefault_preprocesshtml, NULL }, + { htsdefault_postprocesshtml, NULL }, + { htsdefault_checkhtml, NULL }, + { htsdefault_query, NULL }, + { htsdefault_query2, NULL }, + { htsdefault_query3, NULL }, + { htsdefault_loop, NULL }, + { htsdefault_check, NULL }, + { htsdefault_check_mime, NULL }, + { htsdefault_pause, NULL }, + { htsdefault_filesave, NULL }, + { htsdefault_filesave2, NULL }, + { htsdefault_linkdetected, NULL }, + { htsdefault_linkdetected2, NULL }, + { htsdefault_xfrstatus, NULL }, + { htsdefault_savename, NULL }, + { htsdefault_sendhead, NULL }, + { htsdefault_receivehead, NULL }, + { htsdefault_detect, NULL }, + { htsdefault_parse, NULL } +}; + +#define CHARCAST(A) ( (char*) (A) ) +#define OFFSET_OF(TYPE, MEMBER) ( (size_t) ( CHARCAST(&(((TYPE*) NULL)->MEMBER)) - CHARCAST((TYPE*) NULL) ) ) +#define CALLBACK_REF(name, fun) \ + { name, OFFSET_OF(t_hts_htmlcheck_callbacks, fun) } +#define MEMBER_OF(STRUCT, OFFSET, TYPE) ( * ((TYPE*)((char*)(STRUCT) + (OFFSET))) ) + +const t_hts_callback_ref default_callbacks_ref[] = { + CALLBACK_REF("init", init), + CALLBACK_REF("free", uninit), + CALLBACK_REF("start", start), + CALLBACK_REF("end", end), + CALLBACK_REF("change-options", chopt), + CALLBACK_REF("preprocess-html", preprocess), + CALLBACK_REF("postprocess-html", postprocess), + CALLBACK_REF("check-html", check_html), + CALLBACK_REF("query", query), + CALLBACK_REF("query2", query2), + CALLBACK_REF("query3", query3), + CALLBACK_REF("loop", loop), + CALLBACK_REF("check-link", check_link), + CALLBACK_REF("check-mime", check_mime), + CALLBACK_REF("pause", pause), + CALLBACK_REF("save-file", filesave), + CALLBACK_REF("save-file2", filesave2), + CALLBACK_REF("link-detected", linkdetected), + CALLBACK_REF("link-detected2", linkdetected2), + CALLBACK_REF("transfer-status", xfrstatus), + CALLBACK_REF("save-name", savename), + CALLBACK_REF("send-header", sendhead), + CALLBACK_REF("receive-header", receivehead), + { NULL, 0 } +}; + +size_t hts_get_callback_offs(const char *name) { + const t_hts_callback_ref *ref; + for(ref = &default_callbacks_ref[0] ; ref->name != NULL ; ref++) { + if (strcmp(name, ref->name) == 0) { + return ref->offset; + } + } + return (size_t)(-1); +} + +int hts_set_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name, void *function) { + size_t offs = hts_get_callback_offs(name); + if (offs != (size_t) -1) { + MEMBER_OF(callbacks, offs, void*) = function; + return 0; + } + return 1; +} + +void *hts_get_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name) { + size_t offs = hts_get_callback_offs(name); + if (offs != (size_t) -1) { + return MEMBER_OF(callbacks, offs, void*); + } + return NULL; +} + // end defaut wrappers +/* libc stubs */ +HTSEXT_API char* hts_strdup(const char* str) { + return strdup(str); +} + +HTSEXT_API void* hts_malloc(size_t size) { + return malloc(size); +} + +HTSEXT_API void* hts_realloc(void* data, size_t size) { + return realloc(data, size); +} + +HTSEXT_API void hts_free(void* data) { + free(data); +} + +/* Dummy functions */ +HTSEXT_API int hts_resetvar(void) { + return 0; +} // Fin diff --git a/src/htslib.h b/src/htslib.h index 2a720da..1061aee 100644 --- a/src/htslib.h +++ b/src/htslib.h @@ -39,6 +39,20 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_DEFH #define HTS_DEFH +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_htsrequest +#define HTS_DEF_FWSTRUCT_htsrequest +typedef struct htsrequest htsrequest; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_dnscache +#define HTS_DEF_FWSTRUCT_t_dnscache +typedef struct t_dnscache t_dnscache; +#endif + /* définitions globales */ #include "htsglobal.h" @@ -46,6 +60,7 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htsbasenet.h" #include "htsnet.h" +#include "htsdefines.h" /* cookies et auth */ #include "htsbauth.h" @@ -61,8 +76,29 @@ Please visit our Website: http://www.httrack.com #define READ_TIMEOUT (-3) #define READ_INTERNAL_ERROR (-4) +/* concat */ +HTS_STATIC char* getHtsOptBuff_(httrackp *opt) { + opt->state.concat.index = ( opt->state.concat.index + 1 ) % 16; + return opt->state.concat.buff[opt->state.concat.index]; +} +#define OPT_GET_BUFF(OPT) ( getHtsOptBuff_(OPT) ) + // structure pour paramètres supplémentaires lors de la requête -typedef struct htsrequest { +#ifndef HTS_DEF_FWSTRUCT_htsrequest_proxy +#define HTS_DEF_FWSTRUCT_htsrequest_proxy +typedef struct htsrequest_proxy htsrequest_proxy; +#endif +struct htsrequest_proxy { + int active; + char name[1024]; + int port; + char bindhost[256]; // bind this host +}; +#ifndef HTS_DEF_FWSTRUCT_htsrequest +#define HTS_DEF_FWSTRUCT_htsrequest +typedef struct htsrequest htsrequest; +#endif +struct htsrequest { short int user_agent_send; // user agent (ex: httrack/1.0 [sun]) short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0 short int nokeepalive; // pas de keep-alive @@ -73,12 +109,16 @@ typedef struct htsrequest { char referer[256]; char from[256]; char lang_iso[64]; - t_proxy proxy; // proxy -} htsrequest; + htsrequest_proxy proxy; // proxy +}; // structure pour retour d'une connexion/prise d'en tête -typedef struct htsblk { +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +struct htsblk { int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) short int notmodified; // page ou fichier NON modifié (transféré) short int is_write; // sortie sur disque (out) ou en mémoire (adr) @@ -117,16 +157,24 @@ typedef struct htsblk { /* */ htsrequest req; // paramètres pour la requête /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/ -} htsblk; +}; /* ANCIENNE STURCTURE pour cache 1.0 */ -typedef struct { +#ifndef HTS_DEF_FWSTRUCT_OLD_t_proxy +#define HTS_DEF_FWSTRUCT_OLD_t_proxy +typedef struct OLD_t_proxy OLD_t_proxy; +#endif +struct OLD_t_proxy { int active; char name[1024]; int port; -} OLD_t_proxy; -typedef struct { +}; +#ifndef HTS_DEF_FWSTRUCT_OLD_htsblk +#define HTS_DEF_FWSTRUCT_OLD_htsblk +typedef struct OLD_htsblk OLD_htsblk; +#endif +struct OLD_htsblk { int statuscode; // ANCIENNE STURCTURE - status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) int notmodified; // ANCIENNE STURCTURE - page ou fichier NON modifié (transféré) int is_write; // ANCIENNE STURCTURE - sortie sur disque (out) ou en mémoire (adr) @@ -144,24 +192,29 @@ typedef struct { int user_agent_send; // ANCIENNE STURCTURE - user agent (ex: httrack/1.0 [sun]) char user_agent[64]; int http11; // ANCIENNE STURCTURE - l'en tête doit être signé HTTP/1.1 et non HTTP/1.0 -} OLD_htsblk; +}; /* fin ANCIENNE STURCTURE pour cache 1.0 */ // cache pour le dns, pour éviter de faire des gethostbyname sans arrêt -typedef struct t_dnscache { +#ifndef HTS_DEF_FWSTRUCT_t_dnscache +#define HTS_DEF_FWSTRUCT_t_dnscache +typedef struct t_dnscache t_dnscache; +#endif +struct t_dnscache { char iadr[1024]; struct t_dnscache* n; char host_addr[HTS_MAXADDRLEN]; // 4 octets (v4), ou 16 octets (v6) int host_length; // 4 normalement - ==0 alors en cours de résolution // ou >16 si sockaddr // ==-1 alors erreur (host n'éxiste pas) -} t_dnscache; - +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE +extern htsmutex dns_lock; + // fonctions unix/winsock int hts_read(htsblk* r,char* buff,int size); //int HTS_TOTAL_RECV_CHECK(int var); @@ -170,21 +223,45 @@ LLint check_downloadable_bytes(int rate); #ifndef HTTRACK_DEFLIB HTSEXT_API int hts_init(void); HTSEXT_API int hts_uninit(void); +HTSEXT_API int hts_uninit_module(void); +HTSEXT_API int hts_resetvar(void); /* dummy */ +HTSEXT_API void hts_debug(int level); +HTSEXT_API httrackp* hts_create_opt(void); +HTSEXT_API void hts_free_opt(httrackp *opt); +HTSEXT_API void set_wrappers(httrackp *opt); /* LEGACY */ +HTSEXT_API int plug_wrapper(httrackp *opt, const char *moduleName, const char* argv); + +HTSEXT_API char* hts_strdup(const char* string); +HTSEXT_API void* hts_malloc(size_t size); +HTSEXT_API void* hts_realloc(void* data, size_t size); +HTSEXT_API void hts_free(void* data); #endif +extern int hts_dgb_init; +extern FILE* hts_dgb_(void); +#undef _ +#define _ , +#define HTS_DBG(FMT) do { \ + if (hts_dgb_init > 0) { \ + FILE *fp = hts_dgb_(); \ + fprintf(fp, FMT); \ + fprintf(fp, "\n"); \ + fflush(fp); \ + } \ +} while(0) // fonctions principales -int http_fopen(char* adr,char* fil,htsblk* retour); -int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour); -int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour); -htsblk httpget(char* url); +int http_fopen(httrackp *opt,char* adr,char* fil,htsblk* retour); +int http_xfopen(httrackp *opt,int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour); +int http_sendhead(httrackp *opt,t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour); +htsblk httpget(httrackp *opt,char* url); //int newhttp(char* iadr,char* err=NULL); -int newhttp(char* iadr,htsblk* retour,int port,int waitconnect); +int newhttp(httrackp *opt,const char* iadr,htsblk* retour,int port,int waitconnect); HTS_INLINE void deletehttp(htsblk* r); HTS_INLINE int deleteaddr(htsblk* r); HTS_INLINE void deletesoc(T_SOC soc); HTS_INLINE void deletesoc_r(htsblk* r); -htsblk http_location(char* adr,char* fil,char* loc); -htsblk http_test(char* adr,char* fil,char* loc); +htsblk http_location(httrackp *opt,char* adr,char* fil,char* loc); +htsblk http_test(httrackp *opt,char* adr,char* fil,char* loc); int check_readinput(htsblk* r); int check_readinput_t(T_SOC soc, int timeout); void http_fread(T_SOC soc,htsblk* retour); @@ -196,20 +273,19 @@ HTSEXT_API void infostatuscode(char* msg,int statuscode); #endif // sous-fonctions -htsblk xhttpget(char* adr,char* fil); -htsblk http_gethead(char* adr,char* fil); +htsblk xhttpget(httrackp *opt,char* adr,char* fil); +htsblk http_gethead(httrackp *opt,char* adr,char* fil); LLint http_xfread1(htsblk* r,int bufl); -HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, void* v_buffer); +HTS_INLINE t_hostent* hts_gethostbyname(httrackp *opt,const char* iadr, void* v_buffer); #ifndef HTTRACK_DEFLIB HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer); #endif -t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour); +t_hostent* _hts_ghbn(t_dnscache* cache,const char* iadr,t_hostent* retour); int ftp_available(void); #if HTS_DNSCACHE void hts_cache_free(t_dnscache* cache); -int hts_dnstest(char* _iadr); -t_dnscache* _hts_cache(void); -int _hts_lockdns(int i); +int hts_dnstest(httrackp *opt, const char* _iadr); +t_dnscache* _hts_cache(httrackp *opt); #endif // outils divers @@ -223,19 +299,19 @@ HTSEXT_API void qsec2str(char *st,TStamp t); #endif void time_gmt_rfc822(char* s); void time_local_rfc822(char* s); -struct tm* convert_time_rfc822(char* s); -int set_filetime(char* file,struct tm* tm_time); -int set_filetime_rfc822(char* file,char* date); -int get_filetime_rfc822(char* file,char* date); +struct tm* convert_time_rfc822(struct tm* buffer, const char* s); +int set_filetime(const char* file,struct tm* tm_time); +int set_filetime_rfc822(const char* file,const char* date); +int get_filetime_rfc822(const char* file,char* date); HTS_INLINE void time_rfc822(char* s,struct tm * A); HTS_INLINE void time_rfc822_local(char* s,struct tm * A); #ifndef HTTRACK_DEFLIB -HTSEXT_API char* int2char(int n); -HTSEXT_API char* int2bytes(LLint n); -HTSEXT_API char* int2bytessec(long int n); -HTSEXT_API char** int2bytes2(LLint n); +HTSEXT_API char* int2char(strc_int2bytes2* strc, int n); +HTSEXT_API char* int2bytes(strc_int2bytes2* strc, LLint n); +HTSEXT_API char* int2bytessec(strc_int2bytes2* strc, long int n); +HTSEXT_API char** int2bytes2(strc_int2bytes2* strc, LLint n); #endif -HTS_INLINE int sendc(htsblk* r, char* s); +HTS_INLINE int sendc(htsblk* r, const char* s); int finput(int fd,char* s,int max); int binput(char* buff,char* s,int max); int linput(FILE* fp,char* s,int max); @@ -245,32 +321,36 @@ int linput_trim(FILE* fp,char* s,int max); int linput_cpp(FILE* fp,char* s,int max); void rawlinput(FILE* fp,char* s,int max); char* strstrcase(char *s,char *o); -int ident_url_absolute(char* url,char* adr,char* fil); +int ident_url_absolute(const char* url,char* adr,char* fil); void fil_simplifie(char* f); int is_unicode_utf8(unsigned char* buffer, unsigned int size); void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map); -int ishtml(const char* urlfil); +int ishtml(httrackp *opt,const char* urlfil); int ishtml_ext(const char* a); int ishttperror(int err); -void guess_httptype(char *s,const char *fil); -void get_httptype(char *s,const char *fil,int flag); -int get_userhttptype(int setdefs,char *s,const char *fil); +void guess_httptype(httrackp *opt,char *s,const char *fil); +#ifndef HTTRACK_DEFLIB +HTSEXT_API void get_httptype(httrackp *opt,char *s,const char *fil,int flag); +#endif +int get_userhttptype(httrackp *opt,char *s,const char *fil); void give_mimext(char *s,const char *st); -int is_knowntype(const char *fil); -int is_userknowntype(const char *fil); -int is_dyntype(const char *fil); -char* get_ext(const char *fil); -int may_unknown(const char* st); #ifndef HTTRACK_DEFLIB -HTSEXT_API char* jump_identification(char*); -HTSEXT_API char* jump_normalized(char*); -HTSEXT_API char* jump_toport(char*); -HTSEXT_API char* fil_normalized(char* source, char* dest); -HTSEXT_API char* adr_normalized(char* source, char* dest); +HTSEXT_API int is_knowntype(httrackp *opt,const char *fil); +HTSEXT_API int is_userknowntype(httrackp *opt,const char *fil); +HTSEXT_API int is_dyntype(const char *fil); +HTSEXT_API char* get_ext(char *catbuff, const char *fil); #endif -char* strrchr_limit(char* s, char c, char* limit); -char* strstr_limit(char* s, char* sub, char* limit); -HTS_INLINE char* jump_protocol(char* source); +int may_unknown(httrackp *opt,const char* st); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* jump_identification(const char*); +HTSEXT_API char* jump_normalized(const char*); +HTSEXT_API char* jump_toport(const char*); +HTSEXT_API char* fil_normalized(const char* source, char* dest); +HTSEXT_API char* adr_normalized(const char* source, char* dest); +#endif +char* strrchr_limit(const char* s, char c, const char* limit); +char* strstr_limit(const char* s, const char* sub, const char* limit); +HTS_INLINE char* jump_protocol(const char* source); void code64(unsigned char* a,int size_a,unsigned char* b,int crlf); #ifndef HTTRACK_DEFLIB HTSEXT_API void unescape_amp(char* s); @@ -279,7 +359,7 @@ HTSEXT_API void escape_in_url(char* s); HTSEXT_API void escape_uri(char* s); HTSEXT_API void escape_uri_utf(char* s); HTSEXT_API void escape_check_url(char* s); -HTSEXT_API char* escape_check_url_addr(char* s); +HTSEXT_API char* escape_check_url_addr(char *catbuff, const char* s); HTSEXT_API void x_escape_http(char* s,int mode); HTSEXT_API void x_escape_html(char* s); HTSEXT_API void escape_remove_control(char* s); @@ -287,57 +367,47 @@ HTSEXT_API void escape_for_html_print(char* s, char* d); HTSEXT_API void escape_for_html_print_full(char* s, char* d); #endif #ifndef HTTRACK_DEFLIB -HTSEXT_API char* unescape_http(char* s); -HTSEXT_API char* unescape_http_unharm(char* s, int no_high); -HTSEXT_API char* antislash_unescaped(char* s); +HTSEXT_API char* unescape_http(char *catbuff, const char* s); +HTSEXT_API char* unescape_http_unharm(char *catbuff, const char* s, int no_high); +HTSEXT_API char* antislash_unescaped(char *catbuff, const char* s); +HTSEXT_API char* concat(char *catbuff,const char* a,const char* b); +HTSEXT_API char* fconcat(char *catbuff, const char* a, const char* b); +HTSEXT_API char* fconv(char *catbuff, const char* a); #endif -char* concat(const char* a,const char* b); -#define copychar(a) concat((a),NULL) -#if HTS_DOSNAME -char* fconcat(char* a,char* b); -char* fconv(char* a); -#else -#define fconv(a) (a) -#define fconcat(a,b) concat(a,b) +#define copychar(catbuff,a) concat(catbuff,(a),NULL) +char* fslash(char *catbuff, const char* a); +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_log(httrackp *opt, const char* prefix, const char *msg); #endif -char* fslash(char* a); -char* __fslash(char* a); -char* convtolower(char* a); -char* concat(const char* a,const char* b); +char* convtolower(char *catbuff, const char* a); void hts_lowcase(char* s); void hts_replace(char *s,char from,char to); +int multipleStringMatch(const char *s, const char *match); void fprintfio(FILE* fp,char* buff,char* prefix); -#if HTS_WIN +#ifdef _WIN32 #else int sig_ignore_flag( int setflag ); // flag ignore #endif void cut_path(char* fullpath,char* path,char* pname); -int fexist(char* s); -/*LLint fsize(char* s); */ -INTsys fpsize(FILE* fp); -INTsys fsize(char* s); +int fexist(const char* s); +/*LLint fsize(const char* s); */ +off_t fpsize(FILE* fp); +off_t fsize(const char* s); /* root dir */ #ifndef HTTRACK_DEFLIB HTSEXT_API char* hts_rootdir(char* file); #endif // Threads -#if USE_PTHREAD typedef void* ( *beginthread_type )( void * ); -unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist ); -#endif - - - +/*unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist );*/ /* variables globales */ -//extern LLint HTS_TOTAL_RECV; // flux entrant reçu -//extern int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup extern HTSEXT_API hts_stat_struct HTS_STAT; extern int _DEBUG_HEAD; extern FILE* ioinfo; @@ -353,42 +423,63 @@ extern const char* hts_detectURL[]; extern const char* hts_detectandleave[]; extern const char* hts_detect_js[]; -// defaut wrappers -void __cdecl htsdefault_init(void); -void __cdecl htsdefault_uninit(void); -int __cdecl htsdefault_start(void* opt); -int __cdecl htsdefault_chopt(void* opt); -int __cdecl htsdefault_end(void); -int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); -int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); -char* __cdecl htsdefault_query(char* question); -char* __cdecl htsdefault_query2(char* question); -char* __cdecl htsdefault_query3(char* question); -int __cdecl htsdefault_check(char* adr,char* fil,int status); -int __cdecl htsdefault_check_mime(char* adr,char* fil,char* mime,int status); -void __cdecl htsdefault_pause(char* lockfile); -void __cdecl htsdefault_filesave(char*); -void __cdecl htsdefault_filesave2(char* adr, char* file, char* sav, int is_new, int is_modified,int not_updated); -int __cdecl htsdefault_linkdetected(char* link); -int __cdecl htsdefault_linkdetected2(char* link, char* tag_start); -int __cdecl htsdefault_xfrstatus(void* back); -int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); - -// end defaut wrappers - - // htsmodule.c definitions -extern void* getFunctionPtr(httrackp* opt, char* file, char* fncname); -extern void clearCallbacks(htscallbacks* chain); +extern void* openFunctionLib(const char* file_); +extern void* getFunctionPtr(void* handle, const char* fncname); +extern void closeFunctionLib(void* handle); +extern void clearCallbacks(htscallbacks* chain); +extern size_t hts_get_callback_offs(const char *name); +int hts_set_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name, void *function); +void *hts_get_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name); + +#define CBSTRUCT(OPT) ((t_hts_htmlcheck_callbacks*) ((OPT)->callbacks_fun)) +#define GET_USERCALLBACK(OPT, NAME) ( CBSTRUCT(OPT)-> NAME .fun ) +#define GET_USERARG(OPT, NAME) ( CBSTRUCT(OPT)-> NAME .carg ) +#define GET_USERDEF(OPT, NAME) ( \ + (CBSTRUCT(OPT) != NULL && CBSTRUCT(OPT)-> NAME .fun != NULL) \ + ? ( GET_USERARG(OPT, NAME) ) \ + : ( default_callbacks. NAME .carg ) \ +) +#define GET_CALLBACK(OPT, NAME) ( \ + (CBSTRUCT(OPT) != NULL && CBSTRUCT(OPT)-> NAME .fun != NULL) \ + ? ( GET_USERCALLBACK(OPT, NAME ) ) \ + : ( default_callbacks. NAME .fun ) \ +) + +/* Predefined macros */ +#define RUN_CALLBACK_NOARG(OPT, NAME) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME)) +#define RUN_CALLBACK0(OPT, NAME) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT) +#define RUN_CALLBACK1(OPT, NAME, ARG1) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1) +#define RUN_CALLBACK2(OPT, NAME, ARG1, ARG2) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2) +#define RUN_CALLBACK3(OPT, NAME, ARG1, ARG2, ARG3) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3) +#define RUN_CALLBACK4(OPT, NAME, ARG1, ARG2, ARG3, ARG4) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4) +#define RUN_CALLBACK5(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5) +#define RUN_CALLBACK6(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) +#define RUN_CALLBACK7(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7) +#define RUN_CALLBACK8(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) +/* +#define GET_CALLBACK(OPT, NAME, ARG) ( \ + ( \ + ( ARG ) = GET_USERDEF(OPT, NAME), \ + ( \ + (CBSTRUCT(OPT) != NULL && CBSTRUCT(OPT)-> NAME .fun != NULL) \ + ? ( GET_USERCALLBACK(OPT, NAME ) ) \ + : ( default_callbacks. NAME .fun ) \ + ) \ + ) \ +) +*/ #endif // internals +#undef PATH_SEPARATOR +#ifdef _WIN32 +#define PATH_SEPARATOR '\\' +#else +#define PATH_SEPARATOR '/' +#endif /* Spaces: CR,LF,TAB,FF */ #define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) @@ -413,7 +504,7 @@ extern void clearCallbacks(htscallbacks* chain); // compare le début de f avec s et retourne la position de la fin // 'A=a' (case insensitive) -static int strfield(const char* f,const char* s) { +HTS_STATIC int strfield(const char* f,const char* s) { int r=0; while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; } if (*s==0) @@ -421,7 +512,7 @@ static int strfield(const char* f,const char* s) { else return 0; } -static int strcmpnocase(char* a,char* b) { +HTS_STATIC int strcmpnocase(char* a,char* b) { while(*a) { int cmp = hichar(*a) - hichar(*b); if (cmp != 0) @@ -445,7 +536,7 @@ static int strcmpnocase(char* a,char* b) { #if HTS_USEMMS #define OPT_MMS(a) (strfield2((a), "video/x-ms-asf") != 0) #else -#define OPT_MMS(a) (false) +#define OPT_MMS(a) (0) #endif #define is_hypertext_mime__(a) \ ( (strfield2((a),"text/html")!=0)\ @@ -461,6 +552,7 @@ static int strcmpnocase(char* a,char* b) { (\ (strfield2((a),"audio/x-pn-realaudio")!=0) \ || (strfield2((a),"audio/x-mpegurl")!=0) \ + /*|| (strfield2((a),"text/xml")!=0) || (strfield2((a),"application/xml")!=0) : TODO: content check */ \ || OPT_MMS(a) \ ) @@ -469,39 +561,39 @@ static int strcmpnocase(char* a,char* b) { #ifdef HTS_INTERNAL_BYTECODE // check if (mime, file) is hypertext -static int is_hypertext_mime(const char* mime, const char* file) { +HTS_STATIC int is_hypertext_mime(httrackp *opt,const char* mime, const char* file) { if (is_hypertext_mime__(mime)) return 1; - if (may_unknown(mime)) { + if (may_unknown(opt,mime)) { char guessed[256]; guessed[0] = '\0'; - guess_httptype(guessed, file); + guess_httptype(opt,guessed, file); return is_hypertext_mime__(guessed); } return 0; } // check if (mime, file) might be "false" hypertext -static int may_be_hypertext_mime(const char* mime, const char* file) { +HTS_STATIC int may_be_hypertext_mime(httrackp *opt,const char* mime, const char* file) { if (may_be_hypertext_mime__(mime)) return 1; - if (file != NULL && file[0] != '\0' && may_unknown(mime)) { + if (file != NULL && file[0] != '\0' && may_unknown(opt,mime)) { char guessed[256]; guessed[0] = '\0'; - guess_httptype(guessed, file); + guess_httptype(opt,guessed, file); return may_be_hypertext_mime__(guessed); } return 0; } // compare (mime, file) with reference -static int compare_mime(const char* mime, const char* file, const char* reference) { +HTS_STATIC int compare_mime(httrackp *opt,const char* mime, const char* file, const char* reference) { if (is_hypertext_mime__(mime) || may_be_hypertext_mime__(mime)) return strfield2(mime, reference); - if (file != NULL && file[0] != '\0' && may_unknown(mime)) { + if (file != NULL && file[0] != '\0' && may_unknown(opt,mime)) { char guessed[256]; guessed[0] = '\0'; - guess_httptype(guessed, file); + guess_httptype(opt,guessed, file); return strfield2(guessed, reference); } return 0; @@ -511,8 +603,7 @@ static int compare_mime(const char* mime, const char* file, const char* referenc #ifdef _WIN32_WCE_XXC extern char cwd[MAX_PATH+1]; -static char *getcwd_ce(char *buffer, int maxlen) -{ +HTS_STATIC char *getcwd_ce(char *buffer, int maxlen) { TCHAR fileUnc[MAX_PATH+1]; char* plast; diff --git a/src/htsmd5.c b/src/htsmd5.c index adbdb67..114ae7a 100644 --- a/src/htsmd5.c +++ b/src/htsmd5.c @@ -42,12 +42,13 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE -#include "htsmd5.h" -#include "md5.h" #include #include +#include +#include "htsmd5.h" +#include "md5.h" -int domd5mem(const unsigned char * buf, int len, +int domd5mem(const unsigned char * buf, size_t len, unsigned char * digest, int asAscii) { int endian = 1; unsigned char bindigest[16]; @@ -56,7 +57,7 @@ int domd5mem(const unsigned char * buf, int len, MD5_CTX ctx; MD5Init(&ctx, * ( (char*) &endian)); - MD5Update(&ctx, buf, len); + MD5Update(&ctx, buf, (unsigned int) len); MD5Final(bindigest, &ctx); #else /* Broken md5.. temporary hack */ @@ -87,7 +88,6 @@ int domd5mem(const unsigned char * buf, int len, unsigned long int md5sum32(const char* buff) { unsigned char md5digest[16]; - unsigned char* md5digest_ = md5digest; - domd5mem(buff,strlen(buff),md5digest,0); + domd5mem(buff,(int)strlen(buff),md5digest,0); return *( (long int*)(char*)md5digest ); } diff --git a/src/htsmd5.h b/src/htsmd5.h index 8892895..3a8c9bb 100644 --- a/src/htsmd5.h +++ b/src/htsmd5.h @@ -44,7 +44,7 @@ Please visit our Website: http://www.httrack.com /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -int domd5mem(const unsigned char * buf, int len, +int domd5mem(const unsigned char * buf, size_t len, unsigned char * digest, int asAscii); unsigned long int md5sum32(const char* buff); #endif diff --git a/src/htsmms.c b/src/htsmms.c index 3d76cda..3c25d80 100644 --- a/src/htsmms.c +++ b/src/htsmms.c @@ -47,9 +47,7 @@ Please visit our Website: http://www.httrack.com #if HTS_USEMMS -#include "htsbase.h" -#include "htsnet.h" -#include "htsthread.h" +#include "htscore.h" #include "htsmms.h" #include "mmsrip/mms.h" @@ -57,10 +55,10 @@ Please visit our Website: http://www.httrack.com #define FTP_STATUS_READY 1001 static int run_launch_mms(MMSDownloadStruct* back); -static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_mms( void* pP ) { +static void back_launch_mms( void* pP ) { MMSDownloadStruct *pStruct = (MMSDownloadStruct*)pP; if (pStruct == NULL) - return PTHREAD_RETURN; + return ; /* Initialize */ hts_init(); @@ -79,7 +77,7 @@ static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_mms( void* pP ) { /* Uninitialize */ hts_uninit(); - return PTHREAD_RETURN; + return ; } /* download cancelled */ @@ -96,7 +94,7 @@ static int stop_mms(lien_back* back) { void launch_mms(const MMSDownloadStruct* pStruct) { MMSDownloadStruct *pCopy = calloc(sizeof(MMSDownloadStruct), 1); memcpy(pCopy, pStruct, sizeof(*pCopy)); - (void) hts_newthread(back_launch_mms, 0, (void*) pCopy); + hts_newthread(back_launch_mms, (void*) pCopy); } /* Code mainly written by Nicolas BENOIT */ @@ -105,6 +103,8 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { httrackp* opt = pStruct->pOpt; /* */ char url[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; + char catbuff2[CATBUFF_SIZE]; MMS *mms; FILE *f; ssize_t len_written; @@ -112,25 +112,25 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { int delay = opt->mms_maxtime; time_t end = time(NULL) + delay; short checkPending = 0; - INTsys existingSize = fsize(back->url_sav); + ssize_t existingSize = fsize(back->url_sav); // effacer strcpybuff(back->r.msg,""); - back->status=1000; - back->r.statuscode=200; + back->status=STATUS_FTP_TRANSFER; + back->r.statuscode=HTTP_OK; back->r.size=0; /* Create file */ if (existingSize > 0) { /* back->r.out = fileappend(back->url_sav); */ - (void) unlink(fconcat(back->url_sav, ".old")); - if (rename(fconcat(back->url_sav, ""), fconcat(back->url_sav, ".old")) == 0) { + (void) unlink(fconcat(catbuff,back->url_sav, ".old")); + if (rename(fconcat(catbuff,back->url_sav, ""), fconcat(catbuff2,back->url_sav, ".old")) == 0) { checkPending = 1; } - back->r.out = filecreate(back->url_sav); + back->r.out = filecreate(&pStruct->pOpt->state.strc, back->url_sav); } else { - back->r.out = filecreate(back->url_sav); + back->r.out = filecreate(&pStruct->pOpt->state.strc, back->url_sav); } if ((f = back->r.out) != NULL) { // create mms resource @@ -151,18 +151,18 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { { fclose(back->r.out); f = back->r.out = NULL; - if (unlink(fconcat(back->url_sav, "")) == 0 - && rename(fconcat(back->url_sav, ".old"), fconcat(back->url_sav, "")) == 0) + if (unlink(fconcat(catbuff, back->url_sav, "")) == 0 + && rename(fconcat(catbuff, back->url_sav, ".old"), fconcat(catbuff2, back->url_sav, "")) == 0) { back->r.notmodified = 1; - back->r.statuscode = 200; + back->r.statuscode = HTTP_OK; strcpybuff(back->r.msg, "Not modified"); } else { - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Unable to rename previous file (not updated)"); } } else { - (void) unlink(fconcat(back->url_sav, ".old")); + (void) unlink(fconcat(catbuff, back->url_sav, ".old")); } } @@ -180,8 +180,7 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { if ( len_written == 0 ) { break; } else if ( len_written == -1 ) { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Unable to write stream data"); break; } @@ -194,51 +193,39 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { if ( delay != 0 && end <= time(NULL) ) { delay = -1; - back->r.statuscode = 200; + back->r.statuscode = HTTP_OK; strcpybuff(back->r.msg, "Download interrupted"); break; } } // while - back->r.statuscode = 0; /* Finished */ + back->r.statuscode = HTTP_OK; /* Finished */ } else if (f != NULL) { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not begin ripping"); } } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not write stream header"); } } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not handshake"); } mms_disconnect ( mms ); } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not connect"); } mms_destroy ( mms ); } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not create mms resource"); } } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Unable to open local output file"); } - - // End - if (back->r.statuscode != -1) { - back->r.statuscode=200; - strcpybuff(back->r.msg, "OK"); - } return 0; } diff --git a/src/htsmms.h b/src/htsmms.h index 43a6c1e..7ae22c1 100644 --- a/src/htsmms.h +++ b/src/htsmms.h @@ -42,21 +42,26 @@ Please visit our Website: http://www.httrack.com #ifndef HTSMMS_DEFH #define HTSMMS_DEFH -#include "htsglobal.h" - #if HTS_USEMMS -#include "htsbase.h" -#include "htsbasenet.h" -#include "htsthread.h" - -// lien_back -#include "htscore.h" +/* Forware definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif -typedef struct MMSDownloadStruct { +#ifndef HTS_DEF_FWSTRUCT_MMSDownloadStruct +#define HTS_DEF_FWSTRUCT_MMSDownloadStruct +typedef struct MMSDownloadStruct MMSDownloadStruct; +#endif +struct MMSDownloadStruct { lien_back *pBack; httrackp *pOpt; -} MMSDownloadStruct; +}; void launch_mms(const MMSDownloadStruct* pStruct); #endif diff --git a/src/htsmodules.c b/src/htsmodules.c index ba3927e..1049d36 100644 --- a/src/htsmodules.c +++ b/src/htsmodules.c @@ -41,7 +41,10 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htsmodules.h" #include "htsopt.h" -extern int fspc(FILE* fp,char* type); +#include "htsbasenet.h" +#include "htslib.h" + +extern int fspc(httrackp *opt,FILE* fp,const char* type); #ifndef _WIN32 #if HTS_DLOPEN @@ -52,23 +55,10 @@ extern int fspc(FILE* fp,char* type); /* >>> Put all modules definitions here */ #include "htszlib.h" #include "htsbase.h" - -typedef int (*t_hts_detect_swf)(htsmoduleStruct* str); -typedef int (*t_hts_parse_swf)(htsmoduleStruct* str); -/* <<< */ - -/* >>> Put all modules includes here */ -#include "htsjava.h" -#if HTS_USESWF -#endif /* <<< */ /* >>> Put all modules variables here */ -int swf_is_available = 0; -t_hts_detect_swf hts_detect_swf = NULL; -t_hts_parse_swf hts_parse_swf = NULL; - int gz_is_available = 0; #if 0 t_gzopen gzopen = NULL; @@ -98,9 +88,28 @@ t_SSL_load_error_strings SSL_load_error_strings = NULL; int V6_is_available = HTS_INET6; -char WHAT_is_available[64]=""; +static char WHAT_is_available[64]=""; /* <<< */ +HTSEXT_API const char* hts_get_version_info(httrackp *opt) { + size_t size; + int i; + strcpy(opt->state.HTbuff, WHAT_is_available); + size = strlen(opt->state.HTbuff); + for(i = 0 ; i < opt->libHandles.count ; i++) { + const char *name = opt->libHandles.handles[i].moduleName; + if (name != NULL) { + size_t nsize = strlen(name) + sizeof("+"); + size += nsize; + if (size + 1 >= sizeof(opt->state.HTbuff)) + break; + strcat(opt->state.HTbuff, "+"); + strcat(opt->state.HTbuff, name); + } + } + return opt->state.HTbuff; +} + /* memory checks */ HTSEXT_API htsErrorCallback htsCallbackErr = NULL; HTSEXT_API int htsMemoryFastXfr = 1; /* fast xfr by default */ @@ -119,44 +128,39 @@ void abortLog__fnc(char* msg, char* file, int line) { } HTSEXT_API t_abortLog abortLog__ = abortLog__fnc; /* avoid VC++ inlining */ -static void htspe_log(htsmoduleStruct* str, char* msg); +static void htspe_log(htsmoduleStruct* str, const char* msg); int hts_parse_externals(htsmoduleStruct* str) { - /* >>> Put all module calls here */ - - /* JAVA */ - if (hts_detect_java(str)) { - htspe_log(str, "java-lib"); - return hts_parse_java(str); - } - -#if HTS_USESWF - /* FLASH - (external module derivated from Macromedia(tm)'s classes) - */ - else if (swf_is_available && hts_detect_swf(str)) { - htspe_log(str, "swf-lib"); - return hts_parse_swf(str); + str->wrapper_name = "wrapper-lib"; + + /* External callback */ + if (RUN_CALLBACK1(str->opt, detect, str)) { + if (str->wrapper_name == NULL) + str->wrapper_name = "wrapper-lib"; + /* Blacklisted */ + if (multipleStringMatch(str->wrapper_name, StringBuff(str->opt->mod_blacklist))) { + return -1; + } else { + htspe_log(str, str->wrapper_name); + return RUN_CALLBACK1(str->opt, parse, str); + } } -#endif - /* <<< */ - /* Not detected */ return -1; } -static void addCallback(htscallbacks* chain, void* moduleHandle, htscallbacksfncptr exitFnc) { - while(chain->next != NULL) { - chain = chain->next; - } - chain->next = calloct(1, sizeof(htscallbacks)); - assertf(chain->next != NULL); - chain = chain->next; - memset(chain, 0, sizeof(*chain)); - chain->exitFnc = exitFnc; - chain->moduleHandle = moduleHandle; -} +//static void addCallback(htscallbacks* chain, void* moduleHandle, htscallbacksfncptr exitFnc) { +// while(chain->next != NULL) { +// chain = chain->next; +// } +// chain->next = calloct(1, sizeof(htscallbacks)); +// assertf(chain->next != NULL); +// chain = chain->next; +// memset(chain, 0, sizeof(*chain)); +// chain->exitFnc = exitFnc; +// chain->moduleHandle = moduleHandle; +//} void clearCallbacks(htscallbacks* chain_); void clearCallbacks(htscallbacks* chain_) { @@ -189,116 +193,67 @@ void clearCallbacks(htscallbacks* chain_) { chain_->next = NULL; // Empty } -void* getFunctionPtr(httrackp* opt, char* file_, char* fncname); -void* getFunctionPtr(httrackp* opt, char* file_, char* fncname) { - char BIGSTK file[1024]; +void* openFunctionLib(const char* file_) { void* handle; - void* userfunction = NULL; - strcpybuff(file, file_); + char *file = malloct(strlen(file_) + 32); + strcpy(file, file_); #ifdef _WIN32 - handle = LoadLibraryA((char*)file); + handle = LoadLibraryA(file); if (handle == NULL) { - strcatbuff(file, ".dll"); - handle = LoadLibraryA((char*)file); + sprintf(file, "%s.dll", file_); + handle = LoadLibraryA(file); } #else handle = dlopen(file, RTLD_LAZY); if (handle == NULL) { - strcatbuff(file, ".so"); + sprintf(file, "lib%s.so", file_); handle = dlopen(file, RTLD_LAZY); } #endif + freet(file); + return handle; +} + +void closeFunctionLib(void* handle) { +#ifdef _WIN32 + FreeLibrary(handle); +#else + dlclose(handle); +#endif +} + +void* getFunctionPtr(void* handle, const char* fncname_) { if (handle) { - /* Thanks to Lars Clausen for the "wrapper-init" patch */ - /* If given arguments, call "_init" */ - char BIGSTK tmpName[1024]; + void* userfunction = NULL; + char *fncname = strdupt(fncname_); + + /* Strip optional comma */ char *comma; if ((comma = strchr(fncname, ',')) != NULL) { /* empty arg */ *comma++ = '\0'; } - /* speficic plug init */ - { - t_htsWrapperPlugInit initfunction; - sprintf(tmpName, "%s_init", fncname); - initfunction = (t_htsWrapperPlugInit)DynamicGet(handle, (char*)tmpName); - if (initfunction != NULL) { - int result = (int) initfunction(comma); - if (!result) { - if (userfunction == NULL) { -#ifdef _WIN32 - FreeLibrary(handle); -#else - dlclose(handle); -#endif - } - return NULL; - } - } - } - /* wrapper_init() */ - { - t_htsWrapperInit initfunction = (t_htsWrapperInit)DynamicGet(handle, (char*)"wrapper_init"); - if (initfunction != NULL) { - if (! initfunction(fncname, comma)) { - if (userfunction == NULL) { -#ifdef _WIN32 - FreeLibrary(handle); -#else - dlclose(handle); -#endif - } - return NULL; - } - } - } /* the function itself */ userfunction = (void*) DynamicGet(handle, (char*)fncname); - if (userfunction == NULL) { -#ifdef _WIN32 - FreeLibrary(handle); -#else - dlclose(handle); -#endif - } else { - /* optional exit wrapper */ - t_htsWrapperExit exitFnc = (t_htsWrapperExit) DynamicGet(handle, (char*)"wrapper_exit"); - addCallback(&opt->state.callbacks, handle, exitFnc); // exitFnc can be null - } + + freet(fncname); + + return userfunction; } - return userfunction; + return NULL; } -void htspe_init() { +void* ssl_handle = NULL; +#ifdef _WIN32 +void* ssl_handle_2 = NULL; +#endif +void htspe_init(void) { static int initOk = 0; if (!initOk) { initOk = 1; - /* >>> Put all module initializations here */ - - - /* Zlib */ + /* Zlib is now statically linked */ gz_is_available = 1; - /* -#if HTS_DLOPEN - { - void* handle; -#ifdef _WIN32 - handle = LoadLibrary("zlib"); -#else - handle = dlopen("libz.so.1", RTLD_LAZY); -#endif - if (handle) { - gzopen = (t_gzopen) DynamicGet(handle, "gzopen"); - gzread = (t_gzread) DynamicGet(handle, "gzread"); - gzclose = (t_gzclose) DynamicGet(handle, "gzclose"); - if (gzopen && gzread && gzclose) { - gz_is_available = 1; - } - } - } -#endif - */ /* OpenSSL */ #if HTS_DLOPEN @@ -315,11 +270,16 @@ void htspe_init() { if (handle == NULL) { handle = dlopen("libssl.so.0.9.6", RTLD_LAZY); } + if (handle == NULL) { + /* Try harder */ + handle = dlopen("libssl.so", RTLD_LAZY); + } if (handle == NULL) { /* Try harder */ handle = dlopen("libssl.so.0", RTLD_LAZY); } #endif + ssl_handle = handle; if (handle) { SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, (char*)"SSL_shutdown"); SSL_free = (t_SSL_free) DynamicGet(handle, (char*)"SSL_free"); @@ -339,6 +299,7 @@ void htspe_init() { SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, (char*)"SSL_CTX_ctrl"); #ifdef _WIN32 handle = LoadLibraryA((char*)"libeay32"); + ssl_handle_2 = handle; #endif ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, (char*)"ERR_load_crypto_strings"); ERR_error_string = (t_ERR_error_string) DynamicGet(handle, (char*)"ERR_error_string"); @@ -354,53 +315,35 @@ void htspe_init() { #endif /* */ - /* - FLASH - Load the library on-the-fly, if available - If not, that's not a problem - */ -#if HTS_DLOPEN - { -#ifdef _WIN32 - void* handle = LoadLibraryA((char*)"htsswf"); -#else - void* handle = dlopen("libhtsswf.so.1", RTLD_LAZY); -#endif - if (handle) { - hts_detect_swf = (t_hts_detect_swf) DynamicGet(handle, "hts_detect_swf"); - hts_parse_swf = (t_hts_parse_swf) DynamicGet(handle, "hts_parse_swf"); - if (hts_detect_swf && hts_parse_swf) { - swf_is_available = 1; - } - } - // FreeLibrary(handle); - // dlclose(handle); - } -#endif - - /* <<< */ - /* Options availability */ - sprintf(WHAT_is_available, "%s%s%s%s", + sprintf(WHAT_is_available, "%s%s%s", V6_is_available ? "" : "-noV6", gz_is_available ? "" : "-nozip", - SSL_is_available ? "" : "-nossl", - swf_is_available ? "+swf" : ""); - - + SSL_is_available ? "" : "-nossl"); } } -static void htspe_log(htsmoduleStruct* str, char* msg) { - char* savename = str->filename; +void htspe_uninit(void) { +#ifdef _WIN32 + CloseHandle(ssl_handle); + CloseHandle(ssl_handle_2); + ssl_handle = NULL; + ssl_handle_2 = NULL; +#else + dlclose(ssl_handle); + ssl_handle = NULL; +#endif +} + +static void htspe_log(htsmoduleStruct* str, const char* msg) { + const char* savename = str->filename; httrackp* opt = (httrackp*) str->opt; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(External module): parsing %s using module %s"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(External module): parsing %s using module %s"LF, savename, msg); } } -HTSEXT_API const char* hts_is_available(void); HTSEXT_API const char* hts_is_available(void) { return WHAT_is_available; } diff --git a/src/htsmodules.h b/src/htsmodules.h index 6c4a305..5d0c6c1 100644 --- a/src/htsmodules.h +++ b/src/htsmodules.h @@ -38,24 +38,50 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_MODULES #define HTS_MODULES +/* Forware definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif + /* Function type to add links inside the module link : link to add (absolute or relative) str : structure defined below Returns 1 if the link was added, 0 if not */ +#ifndef HTS_DEF_FWSTRUCT_htsmoduleStruct +#define HTS_DEF_FWSTRUCT_htsmoduleStruct typedef struct htsmoduleStruct htsmoduleStruct; +#endif typedef int (* t_htsAddLink)(htsmoduleStruct* str, char* link); /* Structure passed to the module */ struct htsmoduleStruct { /* Read-only elements */ - char* filename; /* filename (C:\My Web Sites\...) */ + const char* filename; /* filename (C:\My Web Sites\...) */ int size; /* size of filename (should be > 0) */ - char* mime; /* MIME type of the object */ - char* url_host; /* incoming hostname (www.foo.com) */ - char* url_file; /* incoming filename (/bar/bar.gny) */ + const char* mime; /* MIME type of the object */ + const char* url_host; /* incoming hostname (www.foo.com) */ + const char* url_file; /* incoming filename (/bar/bar.gny) */ /* Write-only */ + const char* wrapper_name; /* name of wrapper (static string) */ char* err_msg; /* if an error occured, the error message (max. 1KB) */ /* Read/Write */ @@ -78,20 +104,20 @@ struct htsmoduleStruct { void* userdef; /* can be used by callback routines */ - /* ---- ---- ---- */ + /* The parser httrackp structure (may be used) */ + httrackp* opt; /* Internal use - please don't touch */ - void* liens; - void* opt; - void* sback; - void* cache; - void* hashptr; + lien_url** liens; + struct_back* sback; + cache_back* cache; + hash_struct* hashptr; int numero_passe; int add_tab_alloc; /* */ int* lien_tot_; int* ptr_; - int* lien_size_; + size_t* lien_size_; char** lien_buffer_; /* Internal use - please don't touch */ @@ -105,14 +131,16 @@ typedef int (*t_htsWrapperPlugInit)(char *args); /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE +HTSEXT_API const char* hts_get_version_info(httrackp *opt); +HTSEXT_API const char* hts_is_available(void); extern void htspe_init(void); +extern void htspe_uninit(void); extern int hts_parse_externals(htsmoduleStruct* str); extern int gz_is_available; -extern int swf_is_available; +/*extern int swf_is_available;*/ extern int SSL_is_available; extern int V6_is_available; -extern char WHAT_is_available[64]; #endif #endif diff --git a/src/htsname.c b/src/htsname.c index 0176c5c..c0f74d8 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -38,17 +38,14 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE +#include "htscore.h" #include "htsname.h" - -/* specific definitions */ -#include "htsbase.h" -#include "htstools.h" #include "htsmd5.h" +#include "htstools.h" #include -/* END specific definitions */ #undef test_flush -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } #define ADD_STANDARD_PATH \ { /* ajout nom */\ @@ -80,26 +77,28 @@ static const char *hts_tbdev[] = #define URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET() do { \ - int prev = _hts_in_html_parsing; \ + int prev = opt->state._hts_in_html_parsing; \ while(back_pluggable_sockets_strict(sback, opt) <= 0) { \ - _hts_in_html_parsing = 6; \ + opt->state. _hts_in_html_parsing = 6; \ /* Wait .. */ \ back_wait(sback,opt,cache,0); \ /* Transfer rate */ \ engine_stats(); \ /* Refresh various stats */ \ HTS_STAT.stat_nsocket=back_nsoc(sback); \ - HTS_STAT.stat_errors=fspc(NULL,"error"); \ - HTS_STAT.stat_warnings=fspc(NULL,"warning"); \ - HTS_STAT.stat_infos=fspc(NULL,"info"); \ + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); \ + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); \ + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); \ HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); \ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); \ /* Check */ \ - if (!hts_htmlcheck_loop(sback->lnk, sback->count,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ - return -1; \ + { \ + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ + return -1; \ + } \ } \ } \ - _hts_in_html_parsing = prev; \ + opt->state._hts_in_html_parsing = prev; \ } while(0) @@ -112,9 +111,9 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, lien_url** liens, int lien_tot, struct_back* sback, cache_back* cache, hash_struct* hash, int ptr, int numero_passe, const lien_back* headers) { - const char* mime_type = headers ? headers->r.contenttype : NULL; + char catbuff[CATBUFF_SIZE]; + const char* mime_type = ( headers && HTTP_IS_OK(headers->r.statuscode) ) ? headers->r.contenttype : NULL; lien_back* const back = sback->lnk; - const int back_max = sback->count; /* */ char BIGSTK newfil[HTS_URLMAXSIZE*2]; /* ="" */ /*char BIGSTK normadr_[HTS_URLMAXSIZE*2];*/ @@ -263,7 +262,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, fil=newfil; } // Decode remaining % - strcpybuff(fil,unescape_http(fil)); + strcpybuff(fil,unescape_http(catbuff,fil)); // , BUT do not decode high chars //strcpybuff(fil,unescape_http_unharm(fil, 1)); // YES (not server side, but fs/client side) @@ -272,7 +271,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, /* .asx hack */ if (headers != NULL && headers->r.cdispo[0] != 0 && strfield(headers->r.contenttype, "video/") - && strfield2(get_ext(headers->r.cdispo), "asx") == 0) + && strfield2(get_ext(OPT_GET_BUFF(opt),headers->r.cdispo), "asx") == 0) { ext_chg = 1; strcpybuff(ext, "asx"); @@ -280,7 +279,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, else if (headers != NULL && headers->r.contenttype[0] != 0 && strfield2(headers->r.contenttype, "video/x-ms-asf")) { - char *exts = get_ext(headers->url_fil); + char *exts = get_ext(OPT_GET_BUFF(opt),headers->url_fil); if (strfield2(exts, "wmv") == 0) { ext_chg = 1; @@ -308,12 +307,12 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if (opt->savename_delayed == 2) is_html = -1; /* ALWAYS delay type */ else - is_html = ishtml(fil); + is_html = ishtml(opt,fil); switch ( is_html ) { /* .html,.shtml,.. */ case 1: if ( - (strfield2(get_ext(fil),"html") == 0) - && (strfield2(get_ext(fil),"htm") == 0) + (strfield2(get_ext(OPT_GET_BUFF(opt),fil),"html") == 0) + && (strfield2(get_ext(OPT_GET_BUFF(opt),fil),"htm") == 0) ) { strcpybuff(ext,"html"); @@ -322,10 +321,10 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, break; case 0: if (!strnotempty(ext)) { - if (is_userknowntype(fil)) { // mime known by user + if (is_userknowntype(opt,fil)) { // mime known by user char BIGSTK mime[1024]; mime[0]=ext[0]='\0'; - get_userhttptype(0,mime,fil); + get_userhttptype(opt,mime,fil); if (strnotempty(mime)) { give_mimext(ext,mime); if (strnotempty(ext)) { @@ -348,20 +347,20 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, ) { // tester type avec requète HEAD si on ne connait pas le type du fichier if (!( (opt->check_type==1) && (fil[strlen(fil)-1]=='/') )) // slash doit être html? - if (opt->savename_delayed == 2 || (ishtest=ishtml(fil)) < 0) { // on ne sait pas si c'est un html ou un fichier.. + if ( opt->savename_delayed == 2 || (ishtest=ishtml(opt,fil)) < 0) { // on ne sait pas si c'est un html ou un fichier.. // lire dans le cache htsblk r = cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement if (r.statuscode != -1) { // pas d'erreur de lecture cache char s[16]; s[0]='\0'; if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type (from cache) %s%s"LF,adr_complete,fil_complete); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link type (from cache) %s%s"LF,adr_complete,fil_complete); test_flush; } if (strnotempty(r.cdispo)) { /* filename given */ ext_chg=2; /* change filename */ strcpybuff(ext,r.cdispo); } - else if (!may_unknown(r.contenttype) || ishtest == -2) { // on peut patcher à priori? + else if (!may_unknown(opt,r.contenttype) || ishtest == -2) { // on peut patcher à priori? give_mimext(s,r.contenttype); // obtenir extension if (strnotempty(s)>0) { // on a reconnu l'extension ext_chg=1; @@ -369,14 +368,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } } // - } else if (opt->savename_delayed != 2 && is_userknowntype(fil)) { /* PATCH BY BRIAN SCHRÖDER. + } else if ( opt->savename_delayed != 2 && is_userknowntype(opt,fil)) { /* PATCH BY BRIAN SCHRÖDER. Lookup mimetype not only by extension, but also by filename */ /* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type, that is, ".html" */ char BIGSTK mime[1024]; mime[0]=ext[0]='\0'; - get_userhttptype(0, mime, fil); + get_userhttptype(opt, mime, fil); if (strnotempty(mime)) { give_mimext(ext, mime); if (strnotempty(ext)) { @@ -392,7 +391,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, give_mimext(ext, mime_type); } if (strnotempty(ext)) { - ext_chg = 1; + char mime_from_file[128]; + mime_from_file[0] = 0; + get_httptype(opt, mime_from_file, fil, 1); + if (!strnotempty(mime_from_file) || strcasecmp(mime_type, mime_from_file) != 0) { /* different mime for this type */ + ext_chg = 1; + } else { + ext_chg = 0; + } } } else { /* Avoid collisions (no collisionning detection) */ @@ -404,9 +410,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, // test imposible dans le cache, faire une requête else { // -#if HTS_ANALYSTE - int hihp=_hts_in_html_parsing; -#endif + int hihp = opt->state._hts_in_html_parsing; int has_been_moved=0; char BIGSTK curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2]; @@ -418,20 +422,18 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, /* Rock'in */ curr_adr[0]=curr_fil[0]='\0'; -#if HTS_ANALYSTE - _hts_in_html_parsing=2; // test -#endif + opt->state. _hts_in_html_parsing=2; // test if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete); test_flush; } strcpybuff(curr_adr,adr_complete); strcpybuff(curr_fil,fil_complete); // ajouter dans le backing le fichier en mode test // savename: rien car en mode test - if (back_add(sback,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1,NULL)!=-1) { + if (back_add(sback,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1)!=-1) { int b; - b=back_index(sback,curr_adr,curr_fil,BACK_ADD_TEST); + b=back_index(opt,sback,curr_adr,curr_fil,BACK_ADD_TEST); if (b>=0) { int stop_looping=0; int petits_tours=0; @@ -442,34 +444,28 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, back_wait(sback,opt,cache,0); } if (ptr>=0) { - back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); + back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); + } + + // on est obligé d'appeler le shell pour le refresh.. + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(sback); + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); + HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); + + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + return -1; + } else if (opt->state._hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) + back_delete(opt,cache,sback,b); // cancel test + stop_looping = 1; } - - // on est obligé d'appeler le shell pour le refresh.. -#if HTS_ANALYSTE - { - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); - - if (!hts_htmlcheck_loop(sback->lnk, sback->count,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - return -1; - } else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) - back_delete(opt,cache,sback,b); // cancel test - stop_looping = 1; - } - } -#endif - - + // traitement des 304,303.. if (back[b].status<=0) { if (HTTP_IS_REDIRECT(back[b].r.statuscode)) { // agh moved.. un tit tour de plus @@ -497,7 +493,6 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, // check explicit forbidden - don't follow 3xx in this case { int set_prio_to=0; - robots_wizard* robots = (robots_wizard*) opt->robotsptr; if (hts_acceptlink(opt,ptr,lien_tot,liens, mov_adr,mov_fil, NULL, NULL, @@ -531,16 +526,16 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, methode=BACK_ADD_TEST; // tester avec HEAD else { methode=BACK_ADD_TEST2; // tester avec GET - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Loop with HEAD request (during prefetch) at %s%s"LF,curr_adr,curr_fil); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Loop with HEAD request (during prefetch) at %s%s"LF,curr_adr,curr_fil); test_flush; } } // Ajouter URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET(); - if (back_add(sback,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1,NULL)!=-1) { // OK - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); + if (back_add(sback,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1)!=-1) { // OK + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); test_flush; } @@ -548,21 +543,21 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, back_maydelete(opt,cache,sback,b); strcpybuff(curr_adr,mov_adr); strcpybuff(curr_fil,mov_fil); - b=back_index(sback,curr_adr,curr_fil,methode); + b=back_index(opt,sback,curr_adr,curr_fil,methode); if (!get_test_request) has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé petits_tours++; // } else {// sinon on fait rien et on s'en va.. (ftp etc) - if ( (opt->debug>1) && (opt->errlog)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Warning: Savename redirect backing error at %s%s"LF,mov_adr,mov_fil); + if ( (opt->debug>1) && (opt->log)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Warning: Savename redirect backing error at %s%s"LF,mov_adr,mov_fil); test_flush; } } } } else { - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop to same filename)"LF,adr_complete,fil_complete); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unable to test %s%s (loop to same filename)"LF,adr_complete,fil_complete); test_flush; } } @@ -570,8 +565,8 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } } } else{ // arrêter les frais - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop)"LF,adr_complete,fil_complete); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unable to test %s%s (loop)"LF,adr_complete,fil_complete); test_flush; } } @@ -586,8 +581,8 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, strcpybuff(back[b].r.contenttype,"text/html"); // message d'erreur en html // Finalement on, renvoie un erreur, pour ne toucher à rien dans le code // libérer emplacement backing - /*if (opt->errlog!=NULL) { - fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); + /*if (opt->log!=NULL) { + fspc(opt->log,0); fprintf(opt->log,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); test_flush; } back_delete(opt,cache,sback,b); @@ -602,7 +597,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, ext_chg=2; /* change filename */ strcpybuff(ext,back[b].r.cdispo); } - else if (!may_unknown(back[b].r.contenttype) || ishtest == -2 ) { // on peut patcher à priori? (pas interdit ou pas de type) + else if (!may_unknown(opt,back[b].r.contenttype) || ishtest == -2 ) { // on peut patcher à priori? (pas interdit ou pas de type) give_mimext(s,back[b].r.contenttype); // obtenir extension if (strnotempty(s)>0) { // on a reconnu l'extension ext_chg=1; @@ -635,16 +630,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, #if BDEBUG==1 printf("error while savename crash adding\n"); #endif - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected savename backing error at %s%s"LF,adr,fil_complete); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unexpected savename backing error at %s%s"LF,adr,fil_complete); test_flush; } } // restaurer -#if HTS_ANALYSTE - _hts_in_html_parsing=hihp; -#endif + opt->state._hts_in_html_parsing=hihp; } // caché? } } @@ -679,7 +672,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if (ext_chg) { // changer ext char* a=fil+strlen(fil)-1; if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); + HTS_LOG(opt,LOG_DEBUG); if (ext_chg==1) fprintf(opt->log,"Changing link extension %s%s to .%s"LF,adr_complete,fil_complete,ext); else @@ -720,7 +713,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, // ajouter nom du site éventuellement en premier if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t) - char* a = opt->savename_userdef; + const char* a = StringBuff(opt->savename_userdef); char* b = save; /*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */ char tok; @@ -791,7 +784,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, *d++ = *c++; } *d = '\0'; - d = unescape_http(name[0]); + d = unescape_http(catbuff,name[0]); if (d && *d) { strcpybuff(b, d); /* value */ b += strlen(b); @@ -923,9 +916,12 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, break; case 'Q': case 'q': /* query MD5 (128-bits/16-bits) GENERATED ONLY IF query string exists! */ - *b='\0'; - strncatbuff(b,url_md5(fil_complete),(tok == 'Q')?32:4); - b+=strlen(b); // pointer à la fin + { + char md5[32 + 2]; + *b='\0'; + strncatbuff(b,url_md5(md5, fil_complete),(tok == 'Q')?32:4); + b+=strlen(b); // pointer à la fin + } break; case 'r': case 'R': // protocol *b='\0'; @@ -1035,7 +1031,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } // si un html à coup sûr - if ( (ext_chg!=0) ? (ishtml_ext(ext) == 1) : (ishtml(fil)==1) ) { + if ( (ext_chg!=0) ? (ishtml_ext(ext) == 1) : (ishtml(opt,fil)==1) ) { if (opt->savename_type%100==2) { // html/ strcatbuff(save, "html/"); } @@ -1053,7 +1049,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if ((*a=='/') || (*a=='\\')) a++; // html? - if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) { + if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(opt,fil)==1) ) { if (opt->savename_type%100==5) strcatbuff(save,"html/"); } else { @@ -1078,7 +1074,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, int L; // pseudo-CRC sur fil et adr pour initialiser générateur aléatoire.. unsigned int s=0; - L=strlen(C); + L = (int) strlen(C); for(i=0;i<(int) strlen(fil_complete);i++) { s+=(unsigned int) fil_complete[i]; } @@ -1087,14 +1083,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } srand(s); - j=strlen(save); + j = (int) strlen(save); for(i=0;i<8;i++) { char c=C[(rand()%L)]; save[i+j]=c; } save[i+j]='\0'; // ajouter extension - a=fil+strlen(fil)-1; + a = fil + strlen(fil) - 1; while(( a > fil) && (*a != '/') && (*a != '.')) a--; if (*a=='.') { strcatbuff(save,a); // ajouter @@ -1141,8 +1137,8 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if (*a!='.') { // agh pas de point //strcatbuff(save,".none"); // a éviter strcatbuff(save,".html"); // préférable! - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Default HTML type set for %s%s"LF,adr_complete,fil_complete); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Default HTML type set for %s%s"LF,adr_complete,fil_complete); test_flush; } } @@ -1288,15 +1284,13 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, /* ensure that there is no ../ (potential vulnerability) */ fil_simplifie(save); -#if HTS_ANALYSTE - { - hts_htmlcheck_savename(adr_complete,fil_complete,referer_adr,referer_fil,save); - if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: save-name: local name: %s%s -> %s"LF,adr,fil,save); - test_flush; - } - } -#endif + /* callback */ + RUN_CALLBACK5(opt, savename, adr_complete,fil_complete,referer_adr,referer_fil,save); + + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: save-name: local name: %s%s -> %s"LF,adr,fil,save); + test_flush; + } /* Ensure that the MANDATORY "temporary" extension is set */ if (ext_chg_delayed) { @@ -1317,9 +1311,9 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } // chemin primaire éventuel A METTRE AVANT - if (strnotempty(opt->path_html)) { + if (strnotempty(StringBuff(opt->path_html))) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; - strcpybuff(tempo,opt->path_html); + strcpybuff(tempo,StringBuff(opt->path_html)); strcatbuff(tempo,save); strcpybuff(save,tempo); } @@ -1330,8 +1324,6 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, int nom_ok; do { int i; - int len; - len=strlen(save); // taille // nom_ok=1; // à priori bon // on part de la fin pour optimiser, plus les opti de taille pour aller encore plus vite.. @@ -1426,6 +1418,8 @@ printf("\nEnd search, %s\n",fil_complete); /* nom avec md5 urilisé partout */ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver) { + char md5[32 + 2]; + b[0]='\0'; /* Nom */ if (dot_pos) { @@ -1440,7 +1434,7 @@ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int sh strncatbuff(b,nom_pos,8); } /* MD5 - 16 bits */ - strncatbuff(b,url_md5(fil_complete),4); + strncatbuff(b,url_md5(md5,fil_complete),4); /* Ext */ if (dot_pos) { strcatbuff(b,"."); @@ -1458,10 +1452,8 @@ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int sh /* Petit md5 */ -char* url_md5(char* fil_complete) { - char* digest; +char* url_md5(char* digest, char* fil_complete) { char* a; - NOSTATIC_RESERVE(digest, char, 32+2); digest[0]='\0'; a=strchr(fil_complete,'?'); if (a) { @@ -1478,7 +1470,7 @@ char* url_md5(char* fil_complete) { // interne à url_savename: ajoute une chaîne à une autre avec \ -> / void url_savename_addstr(char* d,char* s) { - int i=strlen(d); + int i = (int) strlen(d); while(*s) { if (*s=='\\') // remplacer \ par des / d[i++]='/'; diff --git a/src/htsname.h b/src/htsname.h index 908e8e6..9d98622 100644 --- a/src/htsname.h +++ b/src/htsname.h @@ -40,11 +40,11 @@ Please visit our Website: http://www.httrack.com #ifndef HTSNAME_DEFH #define HTSNAME_DEFH -#include "htscore.h" +#include "htsglobal.h" #define DELAYED_EXT "delayed" #define IS_DELAYED_EXT(a) ( ((a) != NULL) && ((a)[0] != 0) && strendwith_(a, "." DELAYED_EXT) ) -static int strendwith_(const char* a, const char* b) { +HTS_STATIC int strendwith_(const char* a, const char* b) { int i, j; for(i = 0 ; a[i] != 0 ; i++); for(j = 0 ; b[j] != 0 ; j++); @@ -55,14 +55,49 @@ static int strendwith_(const char* a, const char* b) { return (j == -1); } - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif + // note: 'headers' can either be null, or incomplete (only r member filled) -int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,struct_back* sback,cache_back* cache,hash_struct* hash,int ptr,int numero_passe,const lien_back* headers); +int url_savename(char* adr_complete, char* fil_complete, char* save, + char* former_adr, char* former_fil, + char* referer_adr, char* referer_fil, + httrackp* opt, + lien_url** liens, int lien_tot, + struct_back* sback, + cache_back* cache, + hash_struct* hash, + int ptr, int numero_passe, + const lien_back* headers); void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver); void url_savename_addstr(char* d,char* s); -char* url_md5(char* fil_complete); +char* url_md5(char* digest_buffer, char* fil_complete); #endif #endif diff --git a/src/htsnet.h b/src/htsnet.h index 4880721..7c7fbe3 100644 --- a/src/htsnet.h +++ b/src/htsnet.h @@ -40,10 +40,11 @@ Please visit our Website: http://www.httrack.com #define HTS_DEFNETH /* basic net definitions */ +#include "htsglobal.h" #include "htsbasenet.h" #include -#if HTS_WIN +#ifdef _WIN32 // pour read #ifndef _WIN32_WCE #include @@ -83,7 +84,11 @@ Please visit our Website: http://www.httrack.com /* Ipv4 structures */ typedef struct in_addr INaddr; /* This should handle all cases */ -typedef struct SOCaddr { +#ifndef HTS_DEF_FWSTRUCT_SOCaddr +#define HTS_DEF_FWSTRUCT_SOCaddr +typedef struct SOCaddr SOCaddr; +#endif +struct SOCaddr { union { struct sockaddr_in in; struct sockaddr sa; @@ -91,7 +96,7 @@ typedef struct SOCaddr { unsigned char v6data[16]; unsigned char pad[128]; } m_addr; -} SOCaddr; +}; /* Ipv4 structure members */ #define SOCaddr_sinaddr(server) ((server).m_addr.in.sin_addr) @@ -153,7 +158,11 @@ strcpy(namebuf, dot); \ /* Ipv4 structures */ typedef struct in6_addr INaddr; /* This should handle all cases */ -typedef struct SOCaddr { +#ifndef HTS_DEF_FWSTRUCT_SOCaddr +#define HTS_DEF_FWSTRUCT_SOCaddr +typedef struct SOCaddr SOCaddr; +#endif +struct SOCaddr { union { struct sockaddr_in6 in6; struct sockaddr_in in; @@ -162,7 +171,7 @@ typedef struct SOCaddr { unsigned char v6data[16]; unsigned char pad[128]; } m_addr; -} SOCaddr; +}; /* Ipv4 structure members */ #define SOCaddr_sinaddr(server) ((server).m_addr.in6.sin6_addr) @@ -239,12 +248,16 @@ getnameinfo((struct sockaddr *)&(ss), sslen, \ #endif /* Buffer structure to copy various hostent structures */ -typedef struct t_fullhostent { +#ifndef HTS_DEF_FWSTRUCT_t_fullhostent +#define HTS_DEF_FWSTRUCT_t_fullhostent +typedef struct t_fullhostent t_fullhostent; +#endif +struct t_fullhostent { t_hostent hp; char* list[2]; char addr[HTS_MAXADDRLEN]; /* various struct sockaddr structures */ unsigned int addr_maxlen; -} t_fullhostent; +}; /* Initialize a t_fullhostent structure */ #define fullhostent_init(h) do { \ diff --git a/src/htsnostatic.c b/src/htsnostatic.c deleted file mode 100644 index 22e7d7a..0000000 --- a/src/htsnostatic.c +++ /dev/null @@ -1,264 +0,0 @@ -/* ------------------------------------------------------------ */ -/* -HTTrack Website Copier, Offline Browser for Windows and Unix -Copyright (C) Xavier Roche and other contributors - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - -Important notes: - -- We hereby ask people using this source NOT to use it in purpose of grabbing -emails addresses, or collecting any other private information on persons. -This would disgrace our work, and spoil the many hours we spent on it. - - -Please visit our Website: http://www.httrack.com -*/ - - -/* ------------------------------------------------------------ */ -/* File: htsnostatic.c subroutines: */ -/* thread-safe routines for reentrancy */ -/* Author: Xavier Roche */ -/* ------------------------------------------------------------ */ - -/* Internal engine bytecode */ -#define HTS_INTERNAL_BYTECODE - -#include "htsnostatic.h" - -#include "htsbase.h" -#include "htshash.h" -#include "htsinthash.h" - -typedef struct hts_varhash { - /* - inthash values; - */ - inthash blocks; -} hts_varhash; - -#if USE_BEGINTHREAD -static PTHREAD_LOCK_TYPE hts_static_Mutex; -#endif -static int hts_static_Mutex_init=0; -#if HTS_WIN -#else -static PTHREAD_KEY_TYPE hts_static_key; -#endif - -int hts_initvar() { - if (!hts_static_Mutex_init) { - /* Init done */ - hts_static_Mutex_init=1; -#if USE_BEGINTHREAD - /* Init mutex */ - htsSetLock(&hts_static_Mutex, -999); - -#if HTS_WIN -#else - /* Init hash */ - PTHREAD_KEY_CREATE(&hts_static_key, hts_destroyvar); -#endif -#endif - } - - /* Set specific thread value */ -#if USE_BEGINTHREAD -#if HTS_WIN -#else - { - void* thread_val; - hts_varhash* hts_static_hash = (hts_varhash*) malloc(sizeof(hts_static_hash)); - if (!hts_static_hash) - return 0; - /* - hts_static_hash->values = inthash_new(HTS_VAR_MAIN_HASH); - if (!hts_static_hash->values) - return 0; - */ - hts_static_hash->blocks = inthash_new(HTS_VAR_MAIN_HASH); - if (!hts_static_hash->blocks) - return 0; - /* inthash_value_is_malloc(hts_static_hash->values, 0); */ /* Regular values */ - inthash_value_is_malloc(hts_static_hash->blocks, 1); /* We'll have to free them upon term! */ - inthash_value_set_free_handler(hts_static_hash->blocks, hts_destroyvar_key); /* free handler */ - thread_val = (void*) hts_static_hash; - - PTHREAD_KEY_SET(hts_static_key, thread_val, inthash); - } -#endif -#endif - - return 1; -} - -/* - hash table free handler to free all keys -*/ -void hts_destroyvar_key(void* adr) { -#if HTS_WIN -#else - hts_NostaticComplexKey* cKey = (hts_NostaticComplexKey*) adr; - if (cKey) { - void* block_address = NULL; - PTHREAD_KEY_GET(cKey->localKey, &block_address, void*); - /* Free block */ - if (block_address) { - free(block_address); - } - cKey->localInit = 0; - } -#endif -} - -void hts_destroyvar(void* ptrkey) { -#if HTS_WIN -#else - if (ptrkey) { - hts_varhash* hashtables = (hts_varhash*) ptrkey; - PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */ - - /* Destroy has table */ - inthash_delete(&(hashtables->blocks)); /* will magically call hts_destroyvar_key(), too */ - /* - inthash_delete(&(hashtables->values)); - */ - free(ptrkey); - } -#endif -} - -/* - destroy all key values (for the current thread) -*/ -int hts_freevar() { -#if HTS_WIN -#if 0 - void* thread_val = NULL; - PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash); - hts_destroyvar(thread_val); - PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */ - /* - PTHREAD_KEY_DELETE(hts_static_key); NO - */ -#endif -#endif - return 1; -} - -HTSEXT_API int hts_resetvar() { - int r; - hts_lockvar(); - { - hts_freevar(); - r = hts_initvar(); - } - hts_unlockvar(); - return r; -} - -int hts_maylockvar() { - return hts_static_Mutex_init; -} - -int hts_lockvar() { -#if USE_BEGINTHREAD - htsSetLock(&hts_static_Mutex, 1); -#endif - return 1; -} - -int hts_unlockvar() { -#if USE_BEGINTHREAD - htsSetLock(&hts_static_Mutex, 0); -#endif - return 1; -} - -int hts_setvar(char* name, long int value) { - return hts_setextvar(name, (long int)value, 0); -} - -int hts_setblkvar(char* name, void* value) { - return hts_setextvar(name, (long int)value, 1); -} - -int hts_setextvar(char* name, long int value, int flag) { -#if HTS_WIN -#else - void* thread_val = NULL; - hts_varhash* hashtables; - - /* - hts_lockvar(); // NO - MUST be protected by caller - { - */ - PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash); - hashtables = (hts_varhash*) thread_val; - if (hashtables) { // XXc XXC hack for win version - inthash_write(hashtables->blocks, name, value); - } -#endif - - return 1; -} - - -int hts_getvar(char* name, long int* ptrvalue) { - return hts_getextvar(name, (long int*)ptrvalue, 0); -} - -int hts_getblkvar(char* name, void** ptrvalue) { - return hts_getextvar(name, (long int*)ptrvalue, 1); -} - -int hts_getextvar(char* name, long int* ptrvalue, int flag) { -#if HTS_WIN -#else - void* thread_val = NULL; - hts_varhash* hashtables; - - hts_lockvar(); - { - PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash); - hashtables = (hts_varhash*) thread_val; - /* if (flag) { - */ - inthash_read(hashtables->blocks, name, ptrvalue); - /* - } else { - inthash_read(hashtables->values, name, ptrvalue); - } - */ - } - hts_unlockvar(); -#endif - - return 1; -} - -long int hts_directgetvar(char* name) { - long int value=0; - hts_getvar(name, &value); - return value; -} - -void* hts_directgetblkvar(char* name) { - void* value=NULL; - hts_getblkvar(name, &value); - return value; -} diff --git a/src/htsnostatic.h b/src/htsnostatic.h deleted file mode 100644 index 3bf4ec9..0000000 --- a/src/htsnostatic.h +++ /dev/null @@ -1,278 +0,0 @@ -/* ------------------------------------------------------------ */ -/* -HTTrack Website Copier, Offline Browser for Windows and Unix -Copyright (C) Xavier Roche and other contributors - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - -Important notes: - -- We hereby ask people using this source NOT to use it in purpose of grabbing -emails addresses, or collecting any other private information on persons. -This would disgrace our work, and spoil the many hours we spent on it. - - -Please visit our Website: http://www.httrack.com -*/ - - -/* ------------------------------------------------------------ */ -/* File: htsnostatic.c subroutines: */ -/* thread-safe routines for reentrancy */ -/* Author: Xavier Roche */ -/* ------------------------------------------------------------ */ - -/* - Okay, with these routines, the engine should be fully reentrant (thread-safe) - All static references have been changed: - - from - function foo() { - static bartype bar; - } - to: - function foo() { - bartype* bar; - NOSTATIC_RESERVE(bar, bartype, 1); - } -*/ - -#ifndef HTSNOSTATIC_DEFH -#define HTSNOSTATIC_DEFH - -/* Library internal definictions */ -#ifdef HTS_INTERNAL_BYTECODE - -#include "htscore.h" -#include "htsthread.h" - -#define HTS_VAR_MAIN_HASH 127 - -/* - MutEx -*/ - - -/* Magic per-thread variables functions - - Example: - hts_lockvar(); - hts_setvar("MyFoo", (long int)(void*)&foo); - hts_unlockvar(); - .. - foo=(void*)(long int)hts_directgetvar("MyFoo"); - - Do not forget to initialize (hts_initvar()) the library once per thread -*/ -int hts_initvar(void); -int hts_freevar(void); -#ifndef HTTRACK_DEFLIB -HTSEXT_API int hts_resetvar(void); -#endif -int hts_maylockvar(void); -int hts_lockvar(void); -int hts_unlockvar(void); - -int hts_setvar(char* name, long int value); -int hts_getvar(char* name, long int* ptrvalue); -long int hts_directgetvar(char* name); - -int hts_setblkvar(char* name, void* value); -int hts_getblkvar(char* name, void** ptrvalue); -void* hts_directgetblkvar(char* name); - -/* Internal */ -int hts_setextvar(char* name, long int value, int flag); -int hts_getextvar(char* name, long int* ptrvalue, int flag); -void hts_destroyvar(void* ptrkey); -void hts_destroyvar_key(void* adr); - -/* - Ensure that the variable 'name' has 'nelts' of type 'type' reserved - fnc is an UNIQUE function name -*/ -#define NOSTATIC_RESERVE(name, type, nelt) NOSTATIC_XRESERVE(name, type, nelt) - -/* - Note: - Yes, we first read the localInit flag variable without MutEx protection, - for optimization purpose, because the flag is set once initialization DONE. - If the first read fails, we *securely* re-check and initialize *if* necessary. - The abort() things should NEVER be called, and are here for safety reasons -*/ -/* - function-specific static cKey: - cKey = { localKey, localInit } - || \ - \/ \ ==1 upon initialization - thread variable - || - \/ - void* - || - \/ - 'thread-static' value - - the function-specific static cKey is also referenced in the global - hashtable for free() purpose: (see hts_destroyvar()) - - global static key variable - 'hts_static_key' - || - \/ - thread variable - || - \/ - void* - || - \/ - hashtable - || - \/ - function-specific hash key - || - \/ - &cKey - -*/ -#ifdef _WIN32 - -#ifdef _WIN32_WCE - -/* Windows CE: static only */ -#define NOSTATIC_XRESERVE(name, type, nelt) do { \ - /*__declspec( thread )*/ static type thValue[nelt]; \ - /* __declspec( thread ) */ int static initValue = 0; \ - name = thValue; \ - if (!initValue) { \ - initValue = 1; \ - memset(&thValue, 0, sizeof(thValue)); \ - } \ -} while(0) - -#elif 1 - -/* New Windows version: TLS */ -/* Suggested by daan at zwif.com to be more gentle with LoadLibrary (04/2004) -See http://msdn.microsoft.com/library/en-us/vccore/html/_core_rules_and_limitations_for_tls.asp -And especially the "DLL declares any nonlocal data or object as __declspec( thread )" section -*/ -#define NOSTATIC_XRESERVE(name,type,nelt) do { \ - static DWORD tlsIndex = 0; \ - static int initValue = 0; \ - if (initValue == 0) \ - { \ - if (!hts_maylockvar()) { \ - abortLog("unable to lock mutex (not initialized?!)"); \ - abort(); \ - } \ - hts_lockvar(); \ - if (initValue == 0) { \ - tlsIndex = TlsAlloc(); \ - if (tlsIndex == 0xFFFFFFFF) { \ - abortLog("unable to allocate thread local storage (TLS) for variable!"); \ - abort(); \ - } \ - initValue = 1; \ - } \ - hts_unlockvar(); \ - } \ - name = (type*)TlsGetValue(tlsIndex); \ - if (name == NULL) { \ - name = (type*)malloc(sizeof(type)*nelt); \ - if (name == NULL) { \ - abortLog("unable to allocate memory for variable!"); \ - abort(); \ - } \ - memset(name, 0, sizeof(type)*nelt); \ - TlsSetValue(tlsIndex, name); \ - } \ -} while(0) - -#else - -/* Windows: handled by the compiler */ -#define NOSTATIC_XRESERVE(name, type, nelt) do { \ - __declspec( thread ) static type thValue[nelt]; \ - __declspec( thread ) int static initValue = 0; \ - name = thValue; \ - if (!initValue) { \ - initValue = 1; \ - memset(&thValue, 0, sizeof(thValue)); \ - } \ -} while(0) - -#endif - -#else - -/* Un*x : slightly more complex, we have to create a thread-key */ -typedef struct { - PTHREAD_KEY_TYPE localKey; - unsigned char localInit; -} hts_NostaticComplexKey; -#define NOSTATIC_XRESERVE(name, type, nelt) do { \ -static hts_NostaticComplexKey cKey={0,0}; \ -name = NULL; \ -if ( cKey.localInit ) { \ - PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ -} \ -if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \ - if (!hts_maylockvar()) { \ - abortLog("unable to lock mutex (not initialized?!)"); \ - abort(); \ - } \ - hts_lockvar(); \ - { \ - { \ - name = (type *) calloc((nelt), sizeof(type)); \ - if (name == NULL) { \ - abortLog("unable to allocate memory for variable!"); \ - abort(); \ - } \ - { \ - char elt_name[64+8]; \ - sprintf(elt_name, #name "_%d", (int) __LINE__); \ - PTHREAD_KEY_CREATE(&(cKey.localKey), NULL); \ - hts_setblkvar(elt_name, &cKey); \ - } \ - PTHREAD_KEY_SET(cKey.localKey, name, type*); \ - name = NULL; \ - PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ - if (name == NULL) { \ - abortLog("unable to load thread key!"); \ - abort(); \ - } \ - if ( ! cKey.localInit ) { \ - cKey.localInit = 1; \ - } \ - } \ - } \ - hts_unlockvar(); \ -} \ -else { \ - PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ - if (name == NULL) { \ - abortLog("unable to load thread key! (2)"); \ - abort(); \ - } \ -} \ -} while(0) -#endif - -#endif - -#endif diff --git a/src/htsopt.h b/src/htsopt.h index bf62c72..0b304f8 100644 --- a/src/htsopt.h +++ b/src/htsopt.h @@ -40,51 +40,231 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_DEFOPT #include -#include "htsbasenet.h" -#include "htsbauth.h" +#include "htsglobal.h" -// structure proxy -typedef struct t_proxy { +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +#define HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_dnscache +#define HTS_DEF_FWSTRUCT_t_dnscache +typedef struct t_dnscache t_dnscache; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_robots_wizard +#define HTS_DEF_FWSTRUCT_robots_wizard +typedef struct robots_wizard robots_wizard; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_cookie +#define HTS_DEF_FWSTRUCT_t_cookie +typedef struct t_cookie t_cookie; +#endif + +/** Forward definitions **/ +#ifndef HTS_DEF_FWSTRUCT_String +#define HTS_DEF_FWSTRUCT_String +typedef struct String String; +#endif +#ifndef HTS_DEF_STRUCT_String +#define HTS_DEF_STRUCT_String +struct String { + char* buffer_; + size_t length_; + size_t capacity_; +}; +#endif + +/* Defines */ +#define CATBUFF_SIZE (STRING_SIZE*2*2) +#define STRING_SIZE 2048 + +/* Proxy structure */ +#ifndef HTS_DEF_FWSTRUCT_t_proxy +#define HTS_DEF_FWSTRUCT_t_proxy +typedef struct t_proxy t_proxy; +#endif +struct t_proxy { int active; - char name[1024]; + String name; int port; - char bindhost[256]; // bind this host -} t_proxy; + String bindhost; // bind this host +}; /* Structure utile pour copier en bloc les paramètres */ -typedef struct htsfilters { +#ifndef HTS_DEF_FWSTRUCT_htsfilters +#define HTS_DEF_FWSTRUCT_htsfilters +typedef struct htsfilters htsfilters; +#endif +struct htsfilters { char*** filters; int* filptr; //int* filter_max; -} htsfilters; +}; /* User callbacks chain */ typedef int (*htscallbacksfncptr)(void); typedef struct htscallbacks htscallbacks; struct htscallbacks { - char callbackName[128]; void* moduleHandle; htscallbacksfncptr exitFnc; htscallbacks * next; }; +/* filenote() internal file structure */ +#ifndef HTS_DEF_FWSTRUCT_filenote_strc +#define HTS_DEF_FWSTRUCT_filenote_strc +typedef struct filenote_strc filenote_strc; +#endif +struct filenote_strc { + FILE* lst; + char path[STRING_SIZE*2]; +}; + +/* concat() functions */ +#ifndef HTS_DEF_FWSTRUCT_concat_strc +#define HTS_DEF_FWSTRUCT_concat_strc +typedef struct concat_strc concat_strc; +#endif +struct concat_strc { + int index; + char buff[16][STRING_SIZE*2*2]; +}; + +/* int2 functions */ +#ifndef HTS_DEF_FWSTRUCT_strc_int2bytes2 +#define HTS_DEF_FWSTRUCT_strc_int2bytes2 +typedef struct strc_int2bytes2 strc_int2bytes2; +#endif +struct strc_int2bytes2 { + char catbuff[CATBUFF_SIZE]; + char buff1[256]; + char buff2[32]; + char* buffadr[2]; +}; + +/* cmd callback */ +#ifndef HTS_DEF_FWSTRUCT_usercommand_strc +#define HTS_DEF_FWSTRUCT_usercommand_strc +typedef struct usercommand_strc usercommand_strc; +#endif +struct usercommand_strc { + int exe; + char cmd[2048]; +}; + +/* error logging */ +#ifndef HTS_DEF_FWSTRUCT_fspc_strc +#define HTS_DEF_FWSTRUCT_fspc_strc +typedef struct fspc_strc fspc_strc; +#endif +struct fspc_strc { + int error; + int warning; + int info; +}; + /* Structure état du miroir */ -typedef struct htsoptstate { +#ifndef HTS_DEF_FWSTRUCT_htsoptstatecancel +#define HTS_DEF_FWSTRUCT_htsoptstatecancel +typedef struct htsoptstatecancel htsoptstatecancel; +#endif +struct htsoptstatecancel { + char *url; + htsoptstatecancel *next; +}; + +/* Mutexes */ +#ifndef HTS_DEF_FWSTRUCT_htsmutex_s +#define HTS_DEF_FWSTRUCT_htsmutex_s +typedef struct htsmutex_s htsmutex_s, *htsmutex; +#endif + +/* Hashtables */ +#ifndef HTS_DEF_FWSTRUCT_struct_inthash +#define HTS_DEF_FWSTRUCT_struct_inthash +typedef struct struct_inthash struct_inthash, *inthash; +#endif + +/* Structure état du miroir */ +#ifndef HTS_DEF_FWSTRUCT_htsoptstate +#define HTS_DEF_FWSTRUCT_htsoptstate +typedef struct htsoptstate htsoptstate; +#endif +struct htsoptstate { + htsmutex lock; /* 3.41 */ + /* */ int stop; int exit_xh; int back_add_stats; /* */ int mimehtml_created; - char mimemid[256]; + String mimemid; FILE* mimefp; int delayedId; /* */ + filenote_strc strc; + /* Functions context (avoir thread variables!) */ htscallbacks callbacks; -} htsoptstate; + concat_strc concat; + usercommand_strc usercmd; + fspc_strc fspc; + char *userhttptype; + int verif_backblue_done; + int verif_external_status; + t_dnscache *dns_cache; + /* HTML parsing state */ + char _hts_errmsg[1024]; + int _hts_in_html_parsing; + int _hts_in_html_done; + int _hts_in_html_poll; + int _hts_setpause; + char** _hts_addurl; + int _hts_cancel; + htsoptstatecancel *cancel; /* 3.41 */ + char HTbuff[2048]; + unsigned int debug_state; + unsigned int tmpnameid; /* 3.41 */ +}; +/* Library handles */ +#ifndef HTS_DEF_FWSTRUCT_htslibhandles +#define HTS_DEF_FWSTRUCT_htslibhandles +typedef struct htslibhandles htslibhandles; +#endif +#ifndef HTS_DEF_FWSTRUCT_htslibhandle +#define HTS_DEF_FWSTRUCT_htslibhandle +typedef struct htslibhandle htslibhandle; +#endif +struct htslibhandle { + char *moduleName; + void *handle; +}; +struct htslibhandles { + int count; + htslibhandle *handles; +}; + +/* Javascript parser flags */ +typedef enum htsparsejava_flags { + HTSPARSE_NONE = 0, // don't parse + HTSPARSE_DEFAULT = 1, // parse default (all) + HTSPARSE_NO_CLASS = 2, // don't parse .java + HTSPARSE_NO_JAVASCRIPT = 4, // don't parse .js + HTSPARSE_NO_AGGRESSIVE = 8 // don't aggressively parse .js or .java +} htsparsejava_flags; // paramètres httrack (options) -typedef struct httrackp { +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +struct httrackp { + size_t size_httrackp; // size of this structure + /* */ int wizard; // wizard aucun/grand/petit int flush; // fflush sur les fichiers log int travel; // type de déplacements (same domain etc) @@ -109,9 +289,7 @@ typedef struct httrackp { int rateout; // nombre d'octets minium pour le transfert int maxtime; // temps max en secondes int maxrate; // taux de transfert max -#if HTS_USEMMS int mms_maxtime; // max duration of a mms file -#endif float maxconn; // nombre max de connexions/s int waittime; // démarrage programmé int cache; // génération d'un cache @@ -120,21 +298,21 @@ typedef struct httrackp { t_proxy proxy; // configuration du proxy int savename_83; // conversion 8-3 pour les noms de fichiers int savename_type; // type de noms: structure originale/html-images en un seul niveau - char savename_userdef[256]; // structure userdef (ex: %h%p/%n%q.%t) + String savename_userdef; // structure userdef (ex: %h%p/%n%q.%t) int savename_delayed; // delayed type check int delayed_cached; // delayed type check can be cached to speedup updates int mimehtml; // MIME-html int user_agent_send; // user agent (ex: httrack/1.0 [sun]) - char user_agent[128]; - char referer[256]; // referer - char from[256]; // from - char path_log[1024]; // chemin pour cache et log - char path_html[1024]; // chemin pour miroir - char path_bin[1024]; // chemin pour templates + String user_agent; // + String referer; // referer + String from; // from + String path_log; // chemin pour cache et log + String path_html; // chemin pour miroir + String path_bin; // chemin pour templates int retry; // nombre d'essais supplémentaires en cas d'échec int makestat; // mettre à jour un fichier log de statistiques de transfert int maketrack; // mettre à jour un fichier log de statistiques d'opérations - int parsejava; // parsing des classes java pour récupérer les class, gif & cie + int parsejava; // parsing des classes java pour récupérer les class, gif & cie ; see htsparsejava_flags int hostcontrol; // abandon d'un host trop lent etc. int errpage; // générer une page d'erreur en cas de 404 etc. int check_type; // si type inconnu (cgi,asp,/) alors tester lien (et gérer moved éventuellement) @@ -144,7 +322,7 @@ typedef struct httrackp { int passprivacy; // pas de mot de pass dans les liens externes? int includequery; // include la query-string int mirror_first_page; // miroir des liens - char sys_com[2048]; // commande système + String sys_com; // commande système int sys_com_exec; // executer commande int accept_cookie; // gestion des cookies t_cookie* cookie; @@ -158,17 +336,18 @@ typedef struct httrackp { int parsedebug; // débugger parser (debug!) int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur int verbosedisplay; // animation textuelle - char footer[256]; // ligne d'infos + String footer; // ligne d'infos int maxcache; // maximum en mémoire au niveau du cache (backing) //int maxcache_anticipate; // maximum de liens à anticiper (majorant) int ftp_proxy; // proxy http pour ftp - char filelist[1024]; // fichier liste URL à inclure - char urllist[1024]; // fichier liste de filtres à inclure + String filelist; // fichier liste URL à inclure + String urllist; // fichier liste de filtres à inclure htsfilters filters; // contient les pointeurs pour les filtres - void* hash; // hash structure - void* robotsptr; // robots ptr - char lang_iso[64]; // en, fr .. - char mimedefs[2048]; // ext1=mimetype1\next2=mimetype2.. + hash_struct* hash; // hash structure + robots_wizard* robotsptr; // robots ptr + String lang_iso; // en, fr .. + String mimedefs; // ext1=mimetype1\next2=mimetype2.. + String mod_blacklist; // (3.41) // int maxlink; // nombre max de liens int maxfilter; // nombre max de filtres @@ -182,11 +361,20 @@ typedef struct httrackp { int is_update; // c'est une update (afficher "File updated...") int dir_topindex; // reconstruire top index par la suite // - htsoptstate state; // état -} httrackp; + // callbacks + t_hts_htmlcheck_callbacks *callbacks_fun; + // store library handles + htslibhandles libHandles; + // + htsoptstate state; // state +}; // stats for httrack -typedef struct hts_stat_struct { +#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct +#define HTS_DEF_FWSTRUCT_hts_stat_struct +typedef struct hts_stat_struct hts_stat_struct; +#endif +struct hts_stat_struct { LLint HTS_TOTAL_RECV; // flux entrant reçu LLint stat_bytes; // octets écrits sur disque // int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup @@ -219,8 +407,7 @@ typedef struct hts_stat_struct { // TStamp last_connect; // last connect() call TStamp last_request; // last request issued -} hts_stat_struct; - +}; #endif diff --git a/src/htsparse.c b/src/htsparse.c index 4aa1b7e..b39b41f 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -76,7 +76,7 @@ Please visit our Website: http://www.httrack.com #define relativeurlfil ((!parent_relative)?urlfil:parenturlfil) #define relativesavename ((!parent_relative)?savename:parentsavename) -#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } } +#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->log) { fflush(opt->log); } } // does nothing #define XH_uninit do {} while(0) @@ -96,14 +96,14 @@ Please visit our Website: http://www.httrack.com ht_len+=A; #define HT_ADD_ADR \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ + size_t i = ((size_t) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ memcpy(ht_buff+j, lastsaved, i); \ ht_buff[j+i]='\0'; \ lastsaved=adr; \ } #define HT_ADD(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_=strlen(A),j_=ht_len; \ + size_t i_ = strlen(A), j_ = ht_len; \ if (i_) { \ HT_ADD_CHK(i_) \ memcpy(ht_buff+j_, A, i_); \ @@ -111,7 +111,7 @@ Please visit our Website: http://www.httrack.com } } #define HT_ADD_HTMLESCAPED(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_, j_; \ + size_t i_, j_; \ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ escape_for_html_print(A, tempo_); \ i_=strlen(tempo_); \ @@ -123,7 +123,7 @@ Please visit our Website: http://www.httrack.com } } #define HT_ADD_HTMLESCAPED_FULL(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_, j_; \ + size_t i_, j_; \ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ escape_for_html_print_full(A, tempo_); \ i_=strlen(tempo_); \ @@ -134,15 +134,15 @@ Please visit our Website: http://www.httrack.com ht_buff[j_+i_]='\0'; \ } } #define HT_ADD_START \ - int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \ - int ht_len=0; \ + size_t ht_size=(size_t)(r->size*5)/4+REALLOC_SIZE; \ + size_t ht_len=0; \ char* ht_buff=NULL; \ if ((opt->getmode & 1) && (ptr>0)) { \ ht_buff=(char*) malloct(ht_size); \ if (ht_buff==NULL) { \ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ XH_uninit; \ - abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \ + abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ (int) ht_size); \ exit(1); \ } \ ht_buff[0]='\0'; \ @@ -151,7 +151,7 @@ Please visit our Website: http://www.httrack.com int ok=0;\ if (ht_buff) { \ char digest[32+2];\ - INTsys fsize_old=fsize(fconv(savename));\ + off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),savename));\ digest[0]='\0';\ domd5mem(ht_buff,ht_len,digest,1);\ if (fsize_old==ht_len) { \ @@ -163,7 +163,7 @@ Please visit our Website: http://www.httrack.com if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\ ok=1;\ if ( (opt->debug>1) && (opt->log!=NULL) ) {\ - fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ test_flush;\ }\ } else {\ @@ -171,20 +171,21 @@ Please visit our Website: http://www.httrack.com } \ }\ if (!ok) { \ - file_notify(urladr, urlfil, savename, 1, 1, r->notmodified); \ - fp=filecreate(savename); \ + file_notify(opt,urladr, urlfil, savename, 1, 1, r->notmodified); \ + fp=filecreate(&opt->state.strc, savename); \ if (fp) { \ if (ht_len>0) {\ - if ((INTsys)fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ + if (fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ opt->state.exit_xh=-1;\ }\ - if (opt->errlog) { \ - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\ + if (opt->log) { \ + int last_errno = errno; \ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unable to write HTML file %s: %s"LF, savename, strerror(last_errno));\ if (fcheck) {\ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"* * Fatal write error, giving up"LF);\ }\ test_flush;\ }\ @@ -196,23 +197,24 @@ Please visit our Website: http://www.httrack.com } else {\ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ - fspc(opt->log,"error"); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); \ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); \ test_flush; \ opt->state.exit_xh=-1;\ }\ - if (opt->errlog) { \ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\ + if (opt->log) { \ + int last_errno = errno; \ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"Unable to save file %s : %s"LF, savename, strerror(last_errno));\ if (fcheck) {\ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"* * Fatal write error, giving up"LF);\ }\ test_flush;\ }\ }\ } else {\ - file_notify(urladr, urlfil, savename, 0, 0, r->notmodified); \ - filenote(savename,NULL); \ + file_notify(opt,urladr, urlfil, savename, 0, 0, r->notmodified); \ + filenote(&opt->state.strc, savename,NULL); \ }\ if (cache->ndx)\ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ @@ -237,7 +239,7 @@ Please visit our Website: http://www.httrack.com fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ - usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \ + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"),"primary","primary"); \ } \ } \ makeindex_done=1; /* ok c'est fait */ \ @@ -255,7 +257,7 @@ Please visit our Website: http://www.httrack.com #define liens_record(A,F,S,FA,FF) { \ int notecode=0; \ - int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ + size_t lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ fil_len=strlen(F),\ sav_len=strlen(S),\ @@ -314,27 +316,27 @@ Please visit our Website: http://www.httrack.com #define ENGINE_LOAD_CONTEXT() \ ENGINE_LOAD_CONTEXT_BASE(); \ /* */ \ - htsblk* r = stre->r_; \ - hash_struct* hash = stre->hash_; \ - int lien_max = *stre->lien_max_; \ + htsblk* r HTS_UNUSED = stre->r_; \ + hash_struct* hash HTS_UNUSED = stre->hash_; \ + int lien_max HTS_UNUSED = *stre->lien_max_; \ /* */ \ - int error = * stre->error_; \ - int store_errpage = * stre->store_errpage_; \ - char* codebase = stre->codebase; \ - char* base = stre->base; \ + int error HTS_UNUSED = * stre->error_; \ + int store_errpage HTS_UNUSED = * stre->store_errpage_; \ + char* codebase HTS_UNUSED = stre->codebase; \ + char* base HTS_UNUSED = stre->base; \ /* */ \ - int makeindex_done = *stre->makeindex_done_; \ - FILE* makeindex_fp = *stre->makeindex_fp_; \ - int makeindex_links = *stre->makeindex_links_; \ - char* makeindex_firstlink = stre->makeindex_firstlink_; \ + int makeindex_done HTS_UNUSED = *stre->makeindex_done_; \ + FILE* makeindex_fp HTS_UNUSED = *stre->makeindex_fp_; \ + int makeindex_links HTS_UNUSED = *stre->makeindex_links_; \ + char* makeindex_firstlink HTS_UNUSED = stre->makeindex_firstlink_; \ /* */ \ - char *template_header = stre->template_header_; \ - char *template_body = stre->template_body_; \ - char *template_footer = stre->template_footer_; \ + char *template_header HTS_UNUSED = stre->template_header_; \ + char *template_body HTS_UNUSED = stre->template_body_; \ + char *template_footer HTS_UNUSED = stre->template_footer_; \ /* */ \ - LLint stat_fragment = *stre->stat_fragment_; \ - TStamp makestat_time = stre->makestat_time; \ - FILE* makestat_fp = stre->makestat_fp + LLint stat_fragment HTS_UNUSED = *stre->stat_fragment_; \ + TStamp makestat_time HTS_UNUSED = stre->makestat_time; \ + FILE* makestat_fp HTS_UNUSED = stre->makestat_fp #define ENGINE_SAVE_CONTEXT() \ ENGINE_SAVE_CONTEXT_BASE(); \ @@ -369,7 +371,7 @@ Please visit our Website: http://www.httrack.com /* Increment current pointer to 'steps' characters, modifying automate if necessary */ #define INCREMENT_CURRENT_ADR(steps) do { \ - int steps__ = (steps); \ + int steps__ = (int) ( steps ); \ while(steps__ > 0) { \ adr++; \ AUTOMATE_LOOKUP_CURRENT_ADR(); \ @@ -382,39 +384,38 @@ Please visit our Website: http://www.httrack.com int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Load engine variables */ ENGINE_LOAD_CONTEXT(); + char catbuff[CATBUFF_SIZE]; -#if HTS_ANALYSTE { char* cAddr = r->adr; int cSize = (int) r->size; if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); } - if (hts_htmlcheck_preprocess(&cAddr, &cSize, urladr, urlfil) == 1) { + if (RUN_CALLBACK4(opt, preprocess, &cAddr, &cSize, urladr, urlfil) == 1) { r->adr = cAddr; r->size = cSize; } } - if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) { -#endif + if (RUN_CALLBACK4(opt, check_html, r->adr,(int)r->size,urladr,urlfil)) { FILE* fp=NULL; // fichier écrit localement char* adr=r->adr; // pointeur (on parcourt) char* lastsaved; // adresse du dernier octet sauvé + 1 if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"scanning file %s%s (%s).."LF, urladr, urlfil, savename); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"scanning file %s%s (%s).."LF, urladr, urlfil, savename); test_flush; } // Indexing! #if HTS_MAKE_KEYWORD_INDEX if (opt->kindex) { - if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) { + if (index_keyword(r->adr,r->size,r->contenttype,savename,StringBuff(opt->path_html))) { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..done"LF); test_flush; } } else { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..error!"LF); test_flush; } } } @@ -470,6 +471,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // int parent_relative=0; // the parent is the base path (.js, .css..) HT_ADD_START; // débuter + lastsaved=adr; /* Initialize script automate for comments, quotes.. */ memset(inscript_state, 0xff, sizeof(inscript_state)); @@ -498,15 +500,6 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE; /* #8: escape in "" */ inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2; /* #9: escape in '' */ - - /* statistics */ - if ((opt->getmode & 1) && (ptr>0)) { - /* - HTS_STAT.stat_files++; - HTS_STAT.stat_bytes+=r->size; - */ - } - /* Primary list or URLs */ if (ptr == 0) { intag=1; @@ -515,8 +508,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } /* Check is the file is a .js file */ else if ( - (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0) - || (compare_mime(r->contenttype, str->url_file, "text/css")!=0) + (compare_mime(opt,r->contenttype, str->url_file, "application/x-javascript")!=0) + || (compare_mime(opt,r->contenttype, str->url_file, "text/css")!=0) ) { /* JavaScript js file */ inscript=1; if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } @@ -524,10 +517,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { intag=1; // because après