diff options
Diffstat (limited to 'src')
84 files changed, 11710 insertions, 4061 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index c391d3f..bc74182 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -42,6 +42,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsmd5.c htszlib.c htsnostatic.c htswrap.c \ htsmodules.c \ md5.c \ + minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \ hts-indextmpl.h htsalias.h htsback.h htsbase.h \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ @@ -50,11 +51,16 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsmodules.h htsname.h htsnet.h htsnostatic.h \ htsopt.h htsrobots.h htssystem.h htsthread.h \ htstools.h htswizard.h htswrap.h htszlib.h \ - httrack-library.h md5.h + htsstrings.h httrack-library.h \ + md5.h \ + minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h + -libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS) +libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS) libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) EXTRA_DIST = httrack.h webhttrack \ httrack.dsp httrack.dsw \ - webhttrack.dsp webhttrack.dsw + webhttrack.dsp webhttrack.dsw \ + minizip/ChangeLogUnzip minizip/iowin32.c minizip/iowin32.h + diff --git a/src/Makefile.in b/src/Makefile.in index 22590c6..aa5da2a 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -71,6 +71,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LFS_FLAG = @LFS_FLAG@ +LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LN_S = @LN_S@ @@ -179,6 +180,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsmd5.c htszlib.c htsnostatic.c htswrap.c \ htsmodules.c \ md5.c \ + minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \ hts-indextmpl.h htsalias.h htsback.h htsbase.h \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ @@ -187,15 +189,18 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsmodules.h htsname.h htsnet.h htsnostatic.h \ htsopt.h htsrobots.h htssystem.h htsthread.h \ htstools.h htswizard.h htswrap.h htszlib.h \ - httrack-library.h md5.h + htsstrings.h httrack-library.h \ + md5.h \ + minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h -libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS) +libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS) libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) EXTRA_DIST = httrack.h webhttrack \ httrack.dsp httrack.dsw \ - webhttrack.dsp webhttrack.dsw + webhttrack.dsp webhttrack.dsw \ + minizip/ChangeLogUnzip minizip/iowin32.c minizip/iowin32.h subdir = src mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs @@ -209,7 +214,8 @@ am_libhttrack_la_OBJECTS = htscore.lo htsparse.lo htsback.lo htscache.lo \ htshelp.lo htsjava.lo htslib.lo htscoremain.lo htsname.lo \ htsrobots.lo htstools.lo htswizard.lo htsalias.lo htsthread.lo \ htsindex.lo htsbauth.lo htsmd5.lo htszlib.lo htsnostatic.lo \ - htswrap.lo htsmodules.lo md5.lo + htswrap.lo htsmodules.lo md5.lo ioapi.lo mztools.lo unzip.lo \ + zip.lo libhttrack_la_OBJECTS = $(am_libhttrack_la_OBJECTS) bin_PROGRAMS = httrack$(EXEEXT) htsserver$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) @@ -243,7 +249,9 @@ am__depfiles_maybe = depfiles @AMDEP_TRUE@ ./$(DEPDIR)/htstools.Plo ./$(DEPDIR)/htsweb.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/htswizard.Plo ./$(DEPDIR)/htswrap.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htszlib.Plo ./$(DEPDIR)/httrack.Po \ -@AMDEP_TRUE@ ./$(DEPDIR)/md5.Plo +@AMDEP_TRUE@ ./$(DEPDIR)/ioapi.Plo ./$(DEPDIR)/md5.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/mztools.Plo ./$(DEPDIR)/unzip.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/zip.Plo COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ @@ -294,6 +302,10 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done +ioapi.lo: minizip/ioapi.c +mztools.lo: minizip/mztools.c +unzip.lo: minizip/unzip.c +zip.lo: minizip/zip.c libhttrack.la: $(libhttrack_la_OBJECTS) $(libhttrack_la_DEPENDENCIES) $(LINK) -rpath $(libdir) $(libhttrack_la_LDFLAGS) $(libhttrack_la_OBJECTS) $(libhttrack_la_LIBADD) $(LIBS) binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) @@ -387,7 +399,11 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htswrap.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htszlib.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/httrack.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ioapi.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mztools.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unzip.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zip.Plo@am__quote@ distclean-depend: -rm -rf ./$(DEPDIR) @@ -425,6 +441,138 @@ distclean-depend: @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< +ioapi.o: minizip/ioapi.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ioapi.o -MD -MP -MF "$(DEPDIR)/ioapi.Tpo" \ +@am__fastdepCC_TRUE@ -c -o ioapi.o `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/ioapi.Tpo" "$(DEPDIR)/ioapi.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/ioapi.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/ioapi.c' object='ioapi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/ioapi.Po' tmpdepfile='$(DEPDIR)/ioapi.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ioapi.o `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c + +ioapi.obj: minizip/ioapi.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ioapi.obj -MD -MP -MF "$(DEPDIR)/ioapi.Tpo" \ +@am__fastdepCC_TRUE@ -c -o ioapi.obj `if test -f 'minizip/ioapi.c'; then $(CYGPATH_W) 'minizip/ioapi.c'; else $(CYGPATH_W) '$(srcdir)/minizip/ioapi.c'`; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/ioapi.Tpo" "$(DEPDIR)/ioapi.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/ioapi.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/ioapi.c' object='ioapi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/ioapi.Po' tmpdepfile='$(DEPDIR)/ioapi.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ioapi.obj `if test -f 'minizip/ioapi.c'; then $(CYGPATH_W) 'minizip/ioapi.c'; else $(CYGPATH_W) '$(srcdir)/minizip/ioapi.c'` + +ioapi.lo: minizip/ioapi.c +@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ioapi.lo -MD -MP -MF "$(DEPDIR)/ioapi.Tpo" \ +@am__fastdepCC_TRUE@ -c -o ioapi.lo `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/ioapi.Tpo" "$(DEPDIR)/ioapi.Plo"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/ioapi.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/ioapi.c' object='ioapi.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/ioapi.Plo' tmpdepfile='$(DEPDIR)/ioapi.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ioapi.lo `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c + +mztools.o: minizip/mztools.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mztools.o -MD -MP -MF "$(DEPDIR)/mztools.Tpo" \ +@am__fastdepCC_TRUE@ -c -o mztools.o `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/mztools.Tpo" "$(DEPDIR)/mztools.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/mztools.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/mztools.c' object='mztools.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/mztools.Po' tmpdepfile='$(DEPDIR)/mztools.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mztools.o `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c + +mztools.obj: minizip/mztools.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mztools.obj -MD -MP -MF "$(DEPDIR)/mztools.Tpo" \ +@am__fastdepCC_TRUE@ -c -o mztools.obj `if test -f 'minizip/mztools.c'; then $(CYGPATH_W) 'minizip/mztools.c'; else $(CYGPATH_W) '$(srcdir)/minizip/mztools.c'`; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/mztools.Tpo" "$(DEPDIR)/mztools.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/mztools.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/mztools.c' object='mztools.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/mztools.Po' tmpdepfile='$(DEPDIR)/mztools.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mztools.obj `if test -f 'minizip/mztools.c'; then $(CYGPATH_W) 'minizip/mztools.c'; else $(CYGPATH_W) '$(srcdir)/minizip/mztools.c'` + +mztools.lo: minizip/mztools.c +@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mztools.lo -MD -MP -MF "$(DEPDIR)/mztools.Tpo" \ +@am__fastdepCC_TRUE@ -c -o mztools.lo `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/mztools.Tpo" "$(DEPDIR)/mztools.Plo"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/mztools.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/mztools.c' object='mztools.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/mztools.Plo' tmpdepfile='$(DEPDIR)/mztools.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mztools.lo `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c + +unzip.o: minizip/unzip.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unzip.o -MD -MP -MF "$(DEPDIR)/unzip.Tpo" \ +@am__fastdepCC_TRUE@ -c -o unzip.o `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/unzip.Tpo" "$(DEPDIR)/unzip.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/unzip.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/unzip.c' object='unzip.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/unzip.Po' tmpdepfile='$(DEPDIR)/unzip.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unzip.o `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c + +unzip.obj: minizip/unzip.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unzip.obj -MD -MP -MF "$(DEPDIR)/unzip.Tpo" \ +@am__fastdepCC_TRUE@ -c -o unzip.obj `if test -f 'minizip/unzip.c'; then $(CYGPATH_W) 'minizip/unzip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/unzip.c'`; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/unzip.Tpo" "$(DEPDIR)/unzip.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/unzip.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/unzip.c' object='unzip.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/unzip.Po' tmpdepfile='$(DEPDIR)/unzip.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unzip.obj `if test -f 'minizip/unzip.c'; then $(CYGPATH_W) 'minizip/unzip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/unzip.c'` + +unzip.lo: minizip/unzip.c +@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unzip.lo -MD -MP -MF "$(DEPDIR)/unzip.Tpo" \ +@am__fastdepCC_TRUE@ -c -o unzip.lo `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/unzip.Tpo" "$(DEPDIR)/unzip.Plo"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/unzip.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/unzip.c' object='unzip.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/unzip.Plo' tmpdepfile='$(DEPDIR)/unzip.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unzip.lo `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c + +zip.o: minizip/zip.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT zip.o -MD -MP -MF "$(DEPDIR)/zip.Tpo" \ +@am__fastdepCC_TRUE@ -c -o zip.o `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/zip.Tpo" "$(DEPDIR)/zip.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/zip.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/zip.c' object='zip.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/zip.Po' tmpdepfile='$(DEPDIR)/zip.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o zip.o `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c + +zip.obj: minizip/zip.c +@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT zip.obj -MD -MP -MF "$(DEPDIR)/zip.Tpo" \ +@am__fastdepCC_TRUE@ -c -o zip.obj `if test -f 'minizip/zip.c'; then $(CYGPATH_W) 'minizip/zip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/zip.c'`; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/zip.Tpo" "$(DEPDIR)/zip.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/zip.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/zip.c' object='zip.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/zip.Po' tmpdepfile='$(DEPDIR)/zip.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o zip.obj `if test -f 'minizip/zip.c'; then $(CYGPATH_W) 'minizip/zip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/zip.c'` + +zip.lo: minizip/zip.c +@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT zip.lo -MD -MP -MF "$(DEPDIR)/zip.Tpo" \ +@am__fastdepCC_TRUE@ -c -o zip.lo `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/zip.Tpo" "$(DEPDIR)/zip.Plo"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/zip.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/zip.c' object='zip.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/zip.Plo' tmpdepfile='$(DEPDIR)/zip.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o zip.lo `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c + mostlyclean-libtool: -rm -f *.lo @@ -512,6 +660,7 @@ top_distdir = .. distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) distdir: $(DISTFILES) + $(mkinstalldirs) $(distdir)/minizip @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ list='$(DISTFILES)'; for file in $$list; do \ case $$file in \ diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h index 29dd122..cddbefa 100644 --- a/src/hts-indextmpl.h +++ b/src/hts-indextmpl.h @@ -158,6 +158,13 @@ regen: " </TD>"LF\ " </TR>"LF +#define HTS_INDEX_BODYCAT \ + "<!-- Note: Template file not found, using internal one -->"LF\ + " <TH>"LF\ + " <BR/>"LF\ + " %s"LF\ + " </TH>"LF + /* %s = INFO */ /* %s = META REFRESH IF ANY */ #define HTS_INDEX_FOOTER \ @@ -167,7 +174,7 @@ regen: " <BR>"LF\ " <BR>"LF\ " <H6 ALIGN=\"RIGHT\">"LF\ - " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2003]</I>"LF\ + " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2005]</I>"LF\ " </H6>"LF\ " %s"LF\ " <!-- Thanks for using HTTrack Website Copier! -->"LF\ @@ -186,7 +193,7 @@ regen: ""LF\ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -317,7 +324,7 @@ regen: " </TABLE>"LF\ " <BR>"LF\ " <H6 ALIGN=\"RIGHT\">"LF\ - " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2003]</I>"LF\ + " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2005]</I>"LF\ " </H6>"LF\ " %s"LF\ " <!-- Thanks for using HTTrack Website Copier! -->"LF\ @@ -335,7 +342,7 @@ regen: ""LF\ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -476,7 +483,7 @@ regen: ""LF\ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -613,7 +620,7 @@ regen: ""LF\ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ diff --git a/src/htsalias.c b/src/htsalias.c index 1b65945..d2e09e1 100644 --- a/src/htsalias.c +++ b/src/htsalias.c @@ -35,12 +35,13 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htsbase.h" #include "htsalias.h" #include "htsglobal.h" + void linput(FILE* fp,char* s,int max); void hts_lowcase(char* s); @@ -108,6 +109,7 @@ const char* hts_optalias[][4] = { {"host-control","-H","param",""}, {"extended-parsing","-%P","param",""}, {"near","-n","single",""}, + {"disable-security-limits","-%!","single",""}, {"test","-t","single",""}, {"list","-%L","param1",""}, {"urllist","-%S","param1",""}, @@ -115,7 +117,7 @@ const char* hts_optalias[][4] = { {"structure","-N","param",""}, {"user-structure","-N","param1",""}, {"long-names","-L","param",""}, {"keep-links","-K","param",""}, - {"mime-html","-%M","param",""}, {"mht","-%M","param",""}, + {"mime-html","-%M","single",""}, {"mht","-%M","single",""}, {"replace-external","-x","single",""}, {"disable-passwords","-%x","single",""},{"disable-password","-%x","single",""}, {"include-query-string","-%q","single",""}, @@ -135,6 +137,8 @@ const char* hts_optalias[][4] = { {"updatehack","-%s","single",""}, {"sizehack","-%s","single",""}, {"urlhack","-%u","single",""}, {"user-agent","-F","param1","user-agent identity"}, + {"referer","-%R","param1","default referer URL"}, + {"from","-%E","param1","from email address"}, {"footer","-%F","param1",""}, {"cache","-C","param","number of retries for non-fatal errors"}, {"store-all-in-cache","-k","single",""}, @@ -150,7 +154,7 @@ const char* hts_optalias[][4] = { {"priority","-p","param",""}, {"debug-headers","-%H","single",""}, {"userdef-cmd","-V","param1",""}, - {"callback","-%W","param1",""}, {"wrapper","-%W","param1",""}, + {"callback","-%W","param1","plug an external callback"}, {"wrapper","-%W","param1","plug an external callback"}, {"structure","-N","param1","user-defined structure"}, {"usercommand","-V","param1","user-defined command"}, {"display","-%v","single","show files transfered and other funny realtime information"}, @@ -185,7 +189,10 @@ const char* hts_optalias[][4] = { {"fast-engine","-#X","single","Enable fast routines"}, {"debug-overflows","-#X0","single","Attempt to detect buffer overflows"}, {"debug-cache","-#C","param1","List files in the cache"}, - + {"extract-cache","-#C","single","Extract meta-data"}, + {"debug-parsing","-#d","single","debug: test parser"}, + {"repair-cache","-#R","single","repair the damaged cache ZIP file"}, {"repair","-#R","single",""}, + /* STANDARD ALIASES */ {"spider","-p0C0I0t","single",""}, {"testsite","-p0C0I0t","single",""}, @@ -226,6 +233,7 @@ const char* hts_optalias[][4] = { {"updatehttrack","--updatehttrack","single","update HTTrack Website Copier"}, {"clean","--clean","single","clean up log files and cache"}, {"tide","--clean","single","clean up log files and cache"}, + {"autotest","-#T","single",""}, /* */ {"","","",""} @@ -342,7 +350,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg, return need_param; } - /* Check -P <path> */ + /* Check -O <path> */ { int pos; if ((pos=optreal_find(argv[n_arg]))>=0) { @@ -514,17 +522,19 @@ int optinclude_file(const char* name, /* Get home directory, '.' if failed */ /* example: /home/smith */ char* hts_gethome(void) { +#ifndef _WIN32_WCE char* home = getenv( "HOME" ); if (home) return home; else +#endif return "."; } /* Convert ~/foo into /home/smith/foo */ void expand_home(char* str) { if (str[0] == '~') { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; strcpybuff(tempo,hts_gethome()); strcatbuff(tempo,str+1); strcpybuff(str,tempo); diff --git a/src/htsalias.h b/src/htsalias.h index e5e8f82..21c3142 100644 --- a/src/htsalias.h +++ b/src/htsalias.h @@ -39,6 +39,8 @@ Please visit our Website: http://www.httrack.com #ifndef HTSALIAS_DEFH #define HTSALIAS_DEFH +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE extern const char* hts_optalias[][4]; int optalias_check(int argc,const char * const * argv,int n_arg, int* return_argc,char** return_argv, @@ -54,5 +56,6 @@ const char* opttype_value(int p); const char* opthelp_value(int p); char* hts_gethome(void); void expand_home(char* str); +#endif #endif diff --git a/src/htsback.c b/src/htsback.c index 6d0b119..317d4e7 100644 --- a/src/htsback.c +++ b/src/htsback.c @@ -35,15 +35,15 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htsback.h" /* specific definitions */ #include "htsbase.h" #include "htsnet.h" #include "htsthread.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include <time.h> /* END specific definitions */ @@ -51,14 +51,18 @@ Please visit our Website: http://www.httrack.com #include "htsftp.h" #if HTS_USEZLIB #include "htszlib.h" +#else +#error HTS_USEZLIB not defined #endif //#endif #if HTS_WIN #ifndef __cplusplus // DOS +#ifndef _WIN32_WCE #include <process.h> /* _beginthread, _endthread */ #endif +#endif #else #endif @@ -142,228 +146,243 @@ int back_nsoc_overall(lien_back* back,int back_max) { // fermer les paramètres de transfert, // et notamment vérifier les fichiers compressés (décompresser), callback etc. int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) { - if ( + /* Don't store broken files */ + if (back[p].r.totalsize > 0 && back[p].r.size != back[p].r.totalsize && ! opt->tolerant) { + return -1; + } + + /* Store ? */ + if (!back[p].finalized) { + back[p].finalized = 1; + if ( (back[p].status == 0) // ready && - (!back[p].testmode) // not test mode - && (back[p].r.statuscode>0) // not internal error ) { - char* state="unknown"; - - /* décompression */ + if (!back[p].testmode) { // not test mode + char* state="unknown"; + + /* décompression */ #if HTS_USEZLIB - if (gz_is_available && back[p].r.compressed) { - if (back[p].r.size > 0) { - //if ( (back[p].r.adr) && (back[p].r.size>0) ) { - // stats - back[p].compressed_size=back[p].r.size; - // en mémoire -> passage sur disque - if (!back[p].r.is_write) { - back[p].tmpfile_buffer[0]='\0'; - back[p].tmpfile=tmpnam(back[p].tmpfile_buffer); - if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') { - back[p].r.out=fopen(back[p].tmpfile,"wb"); - if (back[p].r.out) { - if ((back[p].r.adr) && (back[p].r.size>0)) { - if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) { + if (gz_is_available && back[p].r.compressed) { + if (back[p].r.size > 0) { + //if ( (back[p].r.adr) && (back[p].r.size>0) ) { + // stats + back[p].compressed_size=back[p].r.size; + // en mémoire -> passage sur disque + if (!back[p].r.is_write) { + back[p].tmpfile_buffer[0]='\0'; + back[p].tmpfile=tmpnam(back[p].tmpfile_buffer); + if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') { + back[p].r.out=fopen(back[p].tmpfile,"wb"); + if (back[p].r.out) { + if ((back[p].r.adr) && (back[p].r.size>0)) { + if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) { + back[p].r.statuscode=-1; + strcpybuff(back[p].r.msg,"Write error when decompressing"); + } + } else { + back[p].tmpfile[0]='\0'; back[p].r.statuscode=-1; - strcpybuff(back[p].r.msg,"Write error when decompressing"); + strcpybuff(back[p].r.msg,"Empty compressed file"); } } else { back[p].tmpfile[0]='\0'; back[p].r.statuscode=-1; - strcpybuff(back[p].r.msg,"Empty compressed file"); + strcpybuff(back[p].r.msg,"Open error when decompressing"); } - } else { - back[p].tmpfile[0]='\0'; - back[p].r.statuscode=-1; - strcpybuff(back[p].r.msg,"Open error when decompressing"); } } - } - // fermer fichier sortie - if (back[p].r.out!=NULL) { - fclose(back[p].r.out); - back[p].r.out=NULL; - } - // décompression - if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) { - LLint size; - filecreateempty(back[p].url_sav); // filenote & co - if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) { - back[p].r.size=back[p].r.totalsize=size; - // fichier -> mémoire - if (!back[p].r.is_write) { - deleteaddr(&back[p].r); - back[p].r.adr=readfile(back[p].url_sav); - if (!back[p].r.adr) { - back[p].r.statuscode=-1; - strcpybuff(back[p].r.msg,"Read error when decompressing"); + // fermer fichier sortie + if (back[p].r.out!=NULL) { + fclose(back[p].r.out); + back[p].r.out=NULL; + } + // décompression + if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) { + LLint size; + filecreateempty(back[p].url_sav); // filenote & co + if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) { + back[p].r.size=back[p].r.totalsize=size; + // fichier -> mémoire + if (!back[p].r.is_write) { + deleteaddr(&back[p].r); + back[p].r.adr=readfile(back[p].url_sav); + if (!back[p].r.adr) { + back[p].r.statuscode=-1; + strcpybuff(back[p].r.msg,"Read error when decompressing"); + } + remove(back[p].url_sav); } - remove(back[p].url_sav); } + remove(back[p].tmpfile); } - remove(back[p].tmpfile); + // stats + HTS_STAT.total_packed+=back[p].compressed_size; + HTS_STAT.total_unpacked+=back[p].r.size; + HTS_STAT.total_packedfiles++; + // unflag } - // stats - HTS_STAT.total_packed+=back[p].compressed_size; - HTS_STAT.total_unpacked+=back[p].r.size; - HTS_STAT.total_packedfiles++; - // unflag } - } - back[p].r.compressed=0; + back[p].r.compressed=0; #endif - - /* Stats */ - if (cache->txt) { - char flags[32]; - char s[256]; - time_t tt; - struct tm* A; - tt=time(NULL); - A=localtime(&tt); - if (A == NULL) { - int localtime_returned_null=0; - assert(localtime_returned_null); - } - strftime(s,250,"%H:%M:%S",A); - flags[0]='\0'; - /* input flags */ - if (back[p].is_update) - strcatbuff(flags, "U"); // update request - else - strcatbuff(flags, "-"); - if (back[p].range_req_size) - strcatbuff(flags, "R"); // range request - else - strcatbuff(flags, "-"); - /* state flags */ - if (back[p].r.is_file) // direct to disk - strcatbuff(flags, "F"); - else - strcatbuff(flags, "-"); - /* output flags */ - if (!back[p].r.notmodified) - strcatbuff(flags, "M"); // modified - else - strcatbuff(flags, "-"); - if (back[p].r.is_chunk) // chunked - strcatbuff(flags, "C"); - else - strcatbuff(flags, "-"); - if (back[p].r.compressed) - strcatbuff(flags, "Z"); // gzip - else - strcatbuff(flags, "-"); - /* Err I had to split these.. */ - fprintf(cache->txt,"%s\t", s); - fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size); - fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize); - fprintf(cache->txt,"\t%s\t",flags); - } - if (back[p].r.statuscode==200) { - if (back[p].r.size>=0) { - if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) { - HTS_STAT.stat_bytes+=back[p].r.size; - HTS_STAT.stat_files++; + /* Stats */ + if (cache->txt) { + char flags[32]; + char s[256]; + time_t tt; + struct tm* A; + tt=time(NULL); + A=localtime(&tt); + if (A == NULL) { + int localtime_returned_null=0; + assert(localtime_returned_null); } - if ( (!back[p].r.notmodified) && (opt->is_update) ) { - HTS_STAT.stat_updated_files++; // page modifiée - if (opt->log!=NULL) { - fspc(opt->log,"info"); - if (back[p].is_update) { - fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); - } else { - fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); - } - test_flush; + strftime(s,250,"%H:%M:%S",A); + + flags[0]='\0'; + /* input flags */ + if (back[p].is_update) + strcatbuff(flags, "U"); // update request + else + strcatbuff(flags, "-"); + if (back[p].range_req_size) + strcatbuff(flags, "R"); // range request + else + strcatbuff(flags, "-"); + /* state flags */ + if (back[p].r.is_file) // direct to disk + strcatbuff(flags, "F"); + else + strcatbuff(flags, "-"); + /* output flags */ + if (!back[p].r.notmodified) + strcatbuff(flags, "M"); // modified + else + strcatbuff(flags, "-"); + if (back[p].r.is_chunk) // chunked + strcatbuff(flags, "C"); + else + strcatbuff(flags, "-"); + if (back[p].r.compressed) + strcatbuff(flags, "Z"); // gzip + else + strcatbuff(flags, "-"); + /* Err I had to split these.. */ + fprintf(cache->txt,"%s\t", s); + fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size); + fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize); + fprintf(cache->txt,"\t%s\t",flags); + } + if (back[p].r.statuscode==200) { + if (back[p].r.size>=0) { + if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) { + HTS_STAT.stat_bytes+=back[p].r.size; + HTS_STAT.stat_files++; } - if (cache->txt) { - if (back[p].is_update) { - state="updated"; - } else { - state="added"; + if ( (!back[p].r.notmodified) && (opt->is_update) ) { + HTS_STAT.stat_updated_files++; // page modifiée + if (opt->log!=NULL) { + fspc(opt->log,"info"); + if (back[p].is_update) { + fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); + } else { + fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); + } + test_flush; + } + if (cache->txt) { + if (back[p].is_update) { + state="updated"; + } else { + state="added"; + } + } + } else { + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); + test_flush; + } + if (cache->txt) { + if (opt->is_update) + state="untouched"; + else + state="added"; } } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); + fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); test_flush; } if (cache->txt) { - if (opt->is_update) - state="untouched"; - else - state="added"; + state="empty"; } } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); - test_flush; + fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); } if (cache->txt) { - state="empty"; + state="error"; } } - } else { - if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); - } if (cache->txt) { - state="error"; + fprintf(cache->txt, + "%d\t" + "%s ('%s')\t" + "%s\t" + "%s%s\t" + "%s%s\t%s\t" + "(from %s%s)" + LF, + back[p].r.statuscode, + state, escape_check_url_addr(back[p].r.msg), + escape_check_url_addr(back[p].r.contenttype), + ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)), + escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav), + escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil) + ); + if (opt->flush) + fflush(cache->txt); } - } - if (cache->txt) { - fprintf(cache->txt, - "%d\t" - "%s ('%s')\t" - "%s\t" - "%s%s\t" - "%s%s\t%s\t" - "(from %s%s)" - LF, - back[p].r.statuscode, - state, escape_check_url_addr(back[p].r.msg), - escape_check_url_addr(back[p].r.contenttype), - ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)), - escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav), - escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil) - ); - if (opt->flush) - fflush(cache->txt); - } - - /* Cache */ - cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav); - - // status finished callback + + /* Cache */ + cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav); + + // status finished callback #if HTS_ANALYSTE - hts_htmlcheck_xfrstatus(&back[p]); + hts_htmlcheck_xfrstatus(&back[p]); #endif - return 0; + return 0; + } else { // testmode + if (back[p].r.statuscode / 100 >= 3) { /* Store 3XX, 4XX, 5XX test response codes, but NOT 2XX */ + /* Cache */ + cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL); + } + } + } } return -1; } /* try to keep the connection alive */ -int back_letlive(httrackp* opt, lien_back* back, int p) { +int back_letlive(httrackp* opt, cache_back* cache, lien_back* back, int p) { + int checkerror; htsblk* src = &back[p].r; if (src && !src->is_file && src->soc != INVALID_SOCKET && src->statuscode >= 0 /* no timeout errors & co */ && src->keep_alive_trailers == 0 /* not yet supported (chunk trailers) */ - && !check_sockerror(src->soc) + && ! ( checkerror = check_sockerror(src->soc) ) /*&& !check_sockdata(src->soc)*/ /* no unexpected data */ ) { htsblk tmp; memset(&tmp, 0, sizeof(tmp)); /* clear everything but connection: switch, close, and reswitch */ back_connxfr(src, &tmp); - back_delete(opt, back, p); + back_delete(opt, cache, back, p); //deletehttp(src); back_connxfr(&tmp, src); src->req.flush_garbage=1; /* ignore CRLF garbage */ @@ -392,17 +411,25 @@ void back_connxfr(htsblk* src, htsblk* dst) { } // clear, or leave for keep-alive -int back_maydelete(httrackp* opt,lien_back* back, int p) { +int back_maydelete(httrackp* opt,cache_back* cache,lien_back* back, int p) { if (p>=0) { // on sait jamais.. - if (!opt->nokeepalive + if ( + /* Keep-alive authorized by user */ + !opt->nokeepalive + /* Socket currently is keep-alive! */ && back[p].r.keep_alive + /* Remaining authorized requests */ && back[p].r.keep_alive_max > 1 + /* Known keep-alive start (security) */ && back[p].ka_time_start + /* We're on time */ && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t + /* Connection delay must not exceed keep-alive timeout */ + && ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) ) ) { lien_back tmp; strcpybuff(tmp.url_adr, back[p].url_adr); - if (back_letlive(opt, back, p)) { + if (back_letlive(opt, cache, back, p)) { strcpybuff(back[p].url_adr, tmp.url_adr); back[p].status = -103; // alive & waiting if ((opt->debug>1) && (opt->log!=NULL)) { @@ -413,21 +440,37 @@ int back_maydelete(httrackp* opt,lien_back* back, int p) { return 1; } } - back_delete(opt,back, p); + back_delete(opt,cache,back, p); } return 0; } // clear, or leave for keep-alive -void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p) { +void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int back_max, int p) { + TStamp lt = 0; if (back[p].r.soc!=INVALID_SOCKET) { int q; - if (!opt->nokeepalive + if ( + back[p].r.soc != INVALID_SOCKET /* security check */ + && back[p].r.statuscode >= 0 /* no timeout errors & co */ + && back[p].r.keep_alive_trailers == 0 /* not yet supported (chunk trailers) */ + /* Socket not in I/O error status */ + && !back[p].r.is_file + && !check_sockerror(back[p].r.soc) + /* Keep-alive authorized by user */ + && !opt->nokeepalive + /* Socket currently is keep-alive! */ && back[p].r.keep_alive + /* Remaining authorized requests */ && back[p].r.keep_alive_max > 1 + /* Known keep-alive start (security) */ && back[p].ka_time_start - && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t - && ( q = back_search(opt, back, back_max) ) >= 0 + /* We're on time */ + && ( lt = time_local() ) < back[p].ka_time_start + back[p].r.keep_alive_t + /* Connection delay must not exceed keep-alive timeout */ + && ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) ) + /* Available slot in backing */ + && ( q = back_search(opt, cache, back, back_max) ) >= 0 ) { lien_back tmp; @@ -452,13 +495,13 @@ void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p) { /* attempt to attach a live connection to this slot */ -int back_trylive(httrackp* opt,lien_back* back, int back_max, int p) { +int back_trylive(httrackp* opt,cache_back* cache,lien_back* back, int back_max, int p) { if (p>=0 && back[p].status != -103) { // we never know.. int i = back_searchlive(opt,back, back_max, back[p].url_adr); // search slot if (i >= 0 && i != p) { deletehttp(&back[p].r); // security check back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p - back_delete(opt,back, i); // delete old slot + back_delete(opt,cache,back, i); // delete old slot back[p].status=100; // ready to connect return 1; // success: will reuse live connection } @@ -483,7 +526,7 @@ int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_a return -1; } -int back_search(httrackp* opt,lien_back* back, int back_max) { +int back_search(httrackp* opt,cache_back* cache,lien_back* back, int back_max) { int i; /* try to find an empty place */ @@ -497,7 +540,7 @@ int back_search(httrackp* opt,lien_back* back, int back_max) { for(i = 0 ; i < back_max ; i++ ) { if (back[i].status == -103) { /* close this place */ - back_delete(opt,back, i); + back_delete(opt,cache,back, i); return i; } } @@ -507,18 +550,33 @@ int back_search(httrackp* opt,lien_back* back, int back_max) { } // effacer entrée -int back_delete(httrackp* opt, lien_back* back, int p) { +int back_delete(httrackp* opt, cache_back* cache, lien_back* back, int p) { if (p>=0) { // on sait jamais.. // Vérificateur d'intégrité #if DEBUG_CHECKINT _CHECKINT(&back[p],"Appel back_delete") #endif #if HTS_DEBUG_CLOSESOCK - char info[256]; - sprintf(info,"back_delete: #%d\n",p); - DEBUG_W2(info); + DEBUG_W("back_delete: #%d\n" _ (int) p); #endif - + + // Finalize + if (!back[p].finalized) { + if ( + (back[p].status == 0) // ready + && + (!back[p].testmode) // not test mode + && + (back[p].r.statuscode>0) // not internal error + ) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush; + } + } + back_finalize(opt, cache, back, p); + } + back[p].finalized = 0; + // Libérer tous les sockets, handles, buffers.. if (back[p].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK @@ -546,6 +604,12 @@ int back_delete(httrackp* opt, lien_back* back, int p) { } // } + // headers + if (back[p].r.headers != NULL) { + freet(back[p].r.headers); + back[p].r.headers = NULL; + } + /* fichier de sortie */ if (back[p].r.out!=NULL) { // fermer fichier sortie fclose(back[p].r.out); @@ -607,7 +671,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* // rechercher emplacement back_clean(opt, cache, back, back_max); - if ( ( p = back_search(opt, back, back_max) ) >= 0) { + if ( ( p = back_search(opt, cache, back, back_max) ) >= 0) { back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur // clear r @@ -664,6 +728,25 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* return 0; } + // test "fast header" cache ; that is, tests we did that lead to 3XX/4XX/5XX response codes + if (cache->cached_tests != NULL) { + long int ptr = 0; + if (inthash_read((inthash)cache->cached_tests, concat(adr, fil), (long int*)&ptr)) { // gotcha + if (ptr != 0) { + char* text = (char*) ptr; + char* lf = strchr(text, '\n'); + int code = 0; + if (sscanf(text, "%d", &code) == 1) { // got code + back[p].r.statuscode=code; + if (lf != NULL && *lf != '\0') { // got location ? + strcpybuff(back[p].r.location, lf + 1); + } + return 0; + } + } + } + } + // tester cache if ((strcmp(adr,"file://")) /* pas fichier */ && ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */ @@ -681,7 +764,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* #else if (cache->use) { #endif - char buff[HTS_URLMAXSIZE*4]; + char BIGSTK buff[HTS_URLMAXSIZE*4]; #if HTS_FAST_CACHE strcpybuff(buff,adr); strcatbuff(buff,fil); hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); @@ -710,7 +793,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* /* It is possible that the file has been moved due to changes in build structure */ { - char previous_save[HTS_URLMAXSIZE*2]; + char BIGSTK previous_save[HTS_URLMAXSIZE*2]; previous_save[0] = '\0'; back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0); if (previous_save[0] != '\0' && fexist(fconv(previous_save))) { @@ -861,9 +944,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* back[p].r.req.nocompression=1; /* Do not compress when updating! */ } - /* else if (strnotempty(cache->lastmodified)) - sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified); - */ } #if DEBUGCA printf("..is modified test %s\n",back[p].send_too); @@ -881,10 +961,11 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* // On demande juste les données restantes si le date est valide (206), tout sinon (200) if ((ishtml(save) != 1) && (ishtml(back[p].url_fil)!=1)) { // NON HTML (liens changés!!) if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!) - if (strnotempty(cache->lastmodified)) { /* pas de If-.. possible */ - /*if ( (!opt->http10) && (strnotempty(cache->lastmodified)) ) { */ /* ne pas forcer 1.0 */ + char lastmodified[256]; + get_filetime_rfc822(save, lastmodified); + if (strnotempty(lastmodified)) { /* pas de If-.. possible */ #if DEBUGCA - printf("..if unmodified since %s size "LLintP"\n",cache->lastmodified,(LLint)sz); + printf("..if unmodified since %s size "LLintP"\n", lastmodified, (LLint)sz); #endif if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; @@ -899,10 +980,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* back[p].http11=1; // En tête 1.1 } else */ - if (strlen(cache->lastmodified)) { + if (strlen(lastmodified)) { sprintf(back[p].send_too, "If-Unmodified-Since: %s\r\nRange: bytes="LLintP"-\r\n" - ,cache->lastmodified,(LLint)sz); + , lastmodified, (LLint)sz); back[p].http11=1; // En tête 1.1 back[p].range_req_size=sz; back[p].r.req.range_used=1; @@ -959,6 +1040,8 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy)); // et user-agent strcpybuff(back[p].r.req.user_agent,opt->user_agent); + strcpybuff(back[p].r.req.referer,opt->referer); + strcpybuff(back[p].r.req.from,opt->from); strcpybuff(back[p].r.req.lang_iso,opt->lang_iso); back[p].r.req.user_agent_send=opt->user_agent_send; // et http11 @@ -997,7 +1080,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* } #endif - if (!back_trylive(opt,back, back_max, p)) { + if (!back_trylive(opt, cache, back, back_max, p)) { #if HTS_XGETHOST #if HDEBUG printf("back_solve..\n"); @@ -1117,7 +1200,7 @@ printf("Xfopen ok, poll..\n"); #if HTS_XGETHOST #if USE_BEGINTHREAD // lancement multithread du robot -PTHREAD_TYPE Hostlookup(void* iadr_p) { +PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { char iadr[256]; t_dnscache* cache=_hts_cache(); // adresse du cache t_hostent* hp; @@ -1209,7 +1292,7 @@ void back_solve(lien_back* back) { char* p = calloct(strlen(a)+2,1); if (p) { strcpybuff(p,a); - _beginthread( Hostlookup , 0, p ); + (void)hts_newthread( Hostlookup , 0, p ); } } #else @@ -1221,7 +1304,7 @@ void back_solve(lien_back* back) { char* p = calloct(strlen(a)+2,1); if (p) { strcpybuff(p,a); - _beginthread( Hostlookup , 0, p ); + (void)hts_newthread( Hostlookup , 0, p ); } #else // Sous Unix, le gethostbyname() est bloquant.. @@ -1264,8 +1347,8 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { if (back[i].r.statuscode==200) { // HTTP "OK" if (back[i].r.size>0) { // size>0 if (back[i].r.is_write // not in memory (on disk, ready) - && !is_hypertext_mime(back[i].r.contenttype) // not HTML/hypertext - && !may_be_hypertext_mime(back[i].r.contenttype) // may NOT be parseable mime type + && !is_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // not HTML/hypertext + && !may_be_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // may NOT be parseable mime type ) { if (back[i].pass2_ptr) { // finalize @@ -1279,20 +1362,20 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { //xxxcache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav); usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil); *back[i].pass2_ptr=-1; // Done! - back_maydelete(opt,back,i); // May delete backing entry if ((opt->debug>0) && (opt->log!=NULL)) { fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; } + back_maydelete(opt,cache,back,i); // May delete backing entry } } else { if (!back[i].finalized) { if (1) { /* Ensure deleted or recycled socket */ /* BUT DO NOT YET WIPE back[i].r.adr */ - back_maydeletehttp(opt, back, back_max, i); if ( (opt->debug>1) && (opt->log!=NULL) ) { fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush; } + back_maydeletehttp(opt, cache, back, back_max, i); } else { /* NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO) @@ -1302,7 +1385,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { htsblk r; /* Ensure deleted or recycled socket */ - back_maydeletehttp(opt, back, back_max, i); + back_maydeletehttp(opt, cache, back, back_max, i); assertf(back[i].r.soc == INVALID_SOCKET); /* Check header */ @@ -1312,7 +1395,6 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { /* Delete buffer and sockets */ deleteaddr(&back[i].r); deletehttp(&back[i].r); - back[i].finalized = 1; if ( (opt->debug>1) && (opt->log!=NULL) ) { fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush; } @@ -1344,7 +1426,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { back[i].url_adr); test_flush; } - back_delete(opt,back, i); // delete backing entry + back_delete(opt,cache,back, i); // delete backing entry } } } @@ -1352,7 +1434,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { for(i=0;i<back_max;i++) { if (back[i].status == 0) { // ready if (back[i].r.soc != INVALID_SOCKET) { - back_maydeletehttp(opt,back, back_max, i); + back_maydeletehttp(opt,cache,back, back_max, i); } } @@ -1369,7 +1451,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { } for(i = 0 ; i < back_max && curr > max ; i++) { if (back[i].status == -103) { - back_delete(opt,back, i); // delete backing entry + back_delete(opt,cache,back, i); // delete backing entry curr--; } } @@ -1379,7 +1461,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { // attente (gestion des buffers des sockets) void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart) { - int i; + unsigned int i_mod; T_SOC nfds=INVALID_SOCKET; fd_set fds,fds_c,fds_e; // fds pour lecture, connect (write), et erreur int nsockets; // nbre sockets @@ -1393,7 +1475,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #if HTS_ANALYSTE int max_loop_chk=0; #endif - + unsigned int mod_random = (unsigned int) ( time_local() + HTS_STAT.HTS_TOTAL_RECV ); // max. number of loops max_loop=8; @@ -1422,7 +1504,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta nfds=INVALID_SOCKET; max_c=1; - for(i=0;i<back_max;i++) { + for(i_mod = 0 ; i_mod < (unsigned int) back_max ; i_mod++) { + // for(i=0;i<back_max;i++) { + unsigned int i = ( i_mod + mod_random ) % ( back_max ); // en cas de gestion du connect préemptif #if HTS_XCONN @@ -1541,8 +1625,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta busy_recv=0; // recevoir les données arrivées - for(i=0;i<back_max;i++) { - + for(i_mod = 0 ; i_mod < (unsigned int) back_max ; i_mod++) { + // for(i=0;i<back_max;i++) { + unsigned int i = ( i_mod + mod_random ) % ( back_max ); if (back[i].status>0) { if (!back[i].r.is_file) { // not file.. if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know.. @@ -1561,7 +1646,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta else strcpybuff(back[i].r.msg,"Receive Error"); if (back[i].status == -103) { /* Keep-alive socket */ - back_delete(opt,back, i); + back_delete(opt,cache,back, i); } else { back[i].status=0; // terminé } @@ -1619,9 +1704,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==100)) { /* limit nb. connections/seconds to avoid server overload */ - if (opt->maxconn>0) { + /*if (opt->maxconn>0) { Sleep(1000/opt->maxconn); - } + }*/ back[i].ka_time_start=time_local(); if (back[i].timeout>0) { // refresh timeout si besoin est @@ -1754,6 +1839,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } } #endif + else if (back[i].status==1001) { // ftp ready + back[i].status=0; + // finalize transfer + if (back[i].r.statuscode>0) { + back_finalize(opt,cache,back,i); + } + } else if ((back[i].status>0) && (back[i].status<1000)) { // en réception http int dispo=0; @@ -1764,11 +1856,12 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta // données dispo? //## if (back[i].url_adr[0]!=lOCAL_CHAR) - if (!back[i].r.is_file) { - dispo=FD_ISSET(back[i].r.soc,&fds); - } - else + if (back[i].r.is_file) + dispo=1; + else if (back[i].r.ssl) dispo=1; + else + dispo=FD_ISSET(back[i].r.soc,&fds); // Check transfer rate! if (!max_read_bytes) @@ -1795,7 +1888,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (strnotempty(back[i].url_sav)) { if (strcmp(back[i].url_fil,"/robots.txt")) { if (back[i].r.statuscode==200) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML + if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil) + ) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html int fcheck=0; back[i].r.is_write=1; // écrire @@ -1900,7 +1994,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta retour_fread=(int) http_xfread1(&(back[i].r),(int) max_read_bytes); // retour_fread=http_fread1(&(back[i].r)); } else - retour_fread=-1; // interruption ou annulation interne (peut ne pas être une erreur) + retour_fread=READ_EOF; // interruption ou annulation interne (peut ne pas être une erreur) // Si réception chunk, tester si on est pas à la fin! if (back[i].status==1) { @@ -1920,27 +2014,25 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } } else if (back[i].r.keep_alive) { if (back[i].r.size==back[i].r.totalsize) { // fin! - retour_fread=-1; // end + retour_fread=READ_EOF; // end } } } if (retour_fread < 0) { // fin réception back[i].status=0; // terminé - if (back[i].r.soc!=INVALID_SOCKET) { -#if HTS_DEBUG_CLOSESOCK - DEBUG_W("back_wait(4): deletehttp\n"); -#endif - /*KA deletehttp(&back[i].r);*/ - back_maydeletehttp(opt, back, back_max, i); - } - /*KA back[i].r.soc=INVALID_SOCKET; */ + /*KA back[i].r.soc=INVALID_SOCKET; */ #if CHUNKDEBUG==1 if (back[i].is_chunk) printf("[%d] must be the last chunk for %s (connection closed) - %d/%d\n",(int)back[i].r.soc,back[i].url_fil,back[i].r.size,back[i].r.totalsize); #endif - //if ((back[i].r.statuscode==-1) && (strnotempty(back[i].r.msg)==0)) { - if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) { + if (retour_fread < 0 && retour_fread != READ_EOF) { + if (back[i].r.size > 0) + strcatbuff(back[i].r.msg, "Interrupted transfer"); + else + strcatbuff(back[i].r.msg, "No data (connection closed)"); + back[i].r.statuscode=-4; + } else if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) { #if HDEBUG printf("error interruped: %s\n",back[i].r.adr); #endif @@ -1951,6 +2043,15 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].r.statuscode=-4; } + // Close socket + if (back[i].r.soc!=INVALID_SOCKET) { +#if HTS_DEBUG_CLOSESOCK + DEBUG_W("back_wait(4): deletehttp\n"); +#endif + /*KA deletehttp(&back[i].r);*/ + back_maydeletehttp(opt, cache, back, back_max, i); + } + // finalize transfer if (back[i].r.statuscode>0) { back_finalize(opt,cache,back,i); @@ -2101,16 +2202,16 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta /* Tester totalsize en fin de chunk */ if ((back[i].r.totalsize>0)) { // tester totalsize if (back[i].r.totalsize!=back[i].r.size) { // pas la même! -#if HTS_CL_IS_FATAL - deleteaddr(&back[i].r); - back[i].r.statuscode=-1; - strcpybuff(back[i].r.msg,"Incorrect length"); -#else - // Un warning suffira.. - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (!opt->tolerant) { + deleteaddr(&back[i].r); + back[i].r.statuscode=-1; + strcpybuff(back[i].r.msg,"Incorrect length"); + } else { + // Un warning suffira.. + if (cache->errlog!=NULL) { + fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + } } -#endif } } @@ -2153,22 +2254,6 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #endif - // Callback -#if HTS_ANALYSTE - if (hts_htmlcheck_receivehead != NULL) { - int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); - if (test_head!=1) { - if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; - } - back[i].status=0; // FINI - deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - strcpybuff(back[i].r.msg,"External wrapper aborted transfer"); - back[i].r.statuscode = -1; - } - } -#endif - /* Hack for zero-length headers */ if (back[i].status != 0 && back[i].r.adr[0] != '<') { @@ -2223,15 +2308,36 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } while(strnotempty(rcvd)); // ---------------------------------------- - // libérer mémoire -- après! -- - deleteaddr(&back[i].r); } else { // assume text/html, OK treatfirstline(&back[i].r, back[i].r.adr); noFreebuff=1; } - + // Callback +#if HTS_ANALYSTE + if (hts_htmlcheck_receivehead != NULL) { + int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); + if (test_head!=1) { + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + } + back[i].status=0; // FINI + deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + strcpybuff(back[i].r.msg,"External wrapper aborted transfer"); + back[i].r.statuscode = -1; + } + } +#endif + + // Free headers memory now + // Actually, save them for informational purpose + if (!noFreebuff) { + char* block = back[i].r.adr; + back[i].r.adr = NULL; + deleteaddr(&back[i].r); + back[i].r.headers = block; + } /* Status code and header-response hacks @@ -2325,7 +2431,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (opt->sizehack) { if (!back[i].is_update) { // mise à jour if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML + if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // not HTML if (strnotempty(back[i].url_sav)) { // target found int size = fsize(back[i].url_sav); // target size if (size >= 0) { @@ -2360,7 +2466,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (strnotempty(back[i].url_sav)) { if (strcmp(back[i].url_fil,"/robots.txt")) { if (back[i].r.statuscode==200) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML + if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML if (back[i].r.statuscode==200) { // "OK" if (back[i].range_req_size>0) { // but Range: requested if (back[i].range_req_size == back[i].r.totalsize) { // And same size @@ -2495,7 +2601,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #endif // Couper connexion /*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/ - back_maydeletehttp(opt, back, back_max, i); + back_maydeletehttp(opt, cache, back, back_max, i); back[i].status=0; // terminé // finalize @@ -2520,7 +2626,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta printf("partial content: "LLintP" on disk..\n",(LLint)sz); #endif if (sz>=0) { - if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML + if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_sav)) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir** filenote(back[i].url_sav,NULL); // noter fichier comme connu back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append @@ -2591,9 +2697,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].status!=0) { // non terminé (erreur) if (!back[i].testmode) { // fichier normal - if (back[i].r.empty && back[i].r.statuscode==200) { // empty response + if (back[i].r.empty /* ?? && back[i].r.statuscode==200 */) { // empty response // Couper connexion - deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + back_maydeletehttp(opt, cache, back, back_max, i); + /* KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; */ back[i].status=0; // terminé if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) { back[i].r.adr[0] = 0; @@ -2687,7 +2794,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (gestion_timeout) { TStamp act; act=time_local(); // temps en secondes - for(i=0;i<back_max;i++) { + for(i_mod = 0 ; i_mod < (unsigned int) back_max ; i_mod++) { + // for(i=0;i<back_max;i++) { + unsigned int i = ( i_mod + mod_random ) % ( back_max ); if (back[i].status>0) { // réception/connexion/.. if (back[i].timeout>0) { //printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout); @@ -2795,7 +2904,7 @@ LLint back_transfered(LLint nb,lien_back* back,int back_max) { // j: 1 afficher sockets 2 afficher autres 3 tout afficher void back_info(lien_back* back,int i,int j,FILE* fp) { if (back[i].status>=0) { - char s[HTS_URLMAXSIZE*2+1024]; + char BIGSTK s[HTS_URLMAXSIZE*2+1024]; s[0]='\0'; back_infostr(back,i,j,s); strcatbuff(s,LF); @@ -2881,7 +2990,7 @@ void back_infostr(lien_back* back,int i,int j,char* s) { if (aff) { { - char s2[HTS_URLMAXSIZE*2+1024]; + char BIGSTK s2[HTS_URLMAXSIZE*2+1024]; sprintf(s2,"\"%s",back[i].url_adr); strcatbuff(s,s2); if (back[i].url_fil[0]!='/') strcatbuff(s,"/"); diff --git a/src/htsback.h b/src/htsback.h index 74fd540..9587d7e 100644 --- a/src/htsback.h +++ b/src/htsback.h @@ -42,6 +42,9 @@ Please visit our Website: http://www.httrack.com #include "htsbasenet.h" #include "htscore.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + // backing #define BACK_ADD_TEST "(dummy)" #define BACK_ADD_TEST2 "(dummy2)" @@ -53,16 +56,16 @@ int back_nsoc(lien_back* back,int back_max); int back_nsoc_overall(lien_back* back,int back_max); int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr); int back_stack_available(lien_back* back,int back_max); -int back_search(httrackp* opt, lien_back* back, int back_max); +int back_search(httrackp* opt, cache_back* cache, lien_back* back, int back_max); void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max); void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart); -int back_letlive(httrackp* opt, lien_back* back, int p); +int back_letlive(httrackp* opt, cache_back* cache, lien_back* back, int p); int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr); void back_connxfr(htsblk* src, htsblk* dst); -int back_delete(httrackp* opt,lien_back* back,int p); -int back_maydelete(httrackp* opt, lien_back* back, int p); -void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p); -int back_trylive(httrackp* opt,lien_back* back, int back_max, int p); +int back_delete(httrackp* opt,cache_back* cache,lien_back* back,int p); +int back_maydelete(httrackp* opt, cache_back* cache, lien_back* back, int p); +void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int back_max, int p); +int back_trylive(httrackp* opt,cache_back* cache,lien_back* back, int back_max, int p); int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p); void back_info(lien_back* back,int i,int j,FILE* fp); void back_infostr(lien_back* back,int i,int j,char* s); @@ -77,8 +80,10 @@ int back_checkmirror(httrackp* opt); #if HTS_XGETHOST #if USE_BEGINTHREAD -PTHREAD_TYPE Hostlookup(void* iadr_p); +PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p); +#endif #endif + #endif #endif diff --git a/src/htsbase.h b/src/htsbase.h index 139e3ed..9911d73 100644 --- a/src/htsbase.h +++ b/src/htsbase.h @@ -44,9 +44,8 @@ extern "C" { #include "htsglobal.h" -// size_t et mode_t -#include <stdio.h> -#include <stdlib.h> +#include <string.h> +#include <time.h> #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -76,30 +75,24 @@ extern "C" { #define min(a,b) ((a)>(b)?(b):(a)) #define max(a,b) ((a)>(b)?(a):(b)) +#ifndef _WIN32 +#undef Sleep +#define min(a,b) ((a)>(b)?(b):(a)) +#define max(a,b) ((a)>(b)?(a):(b)) +#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); } +#endif + // teste égalité de 2 chars, case insensitive #define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a)) #define streql(a,b) (hichar(a)==hichar(b)) -// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type? -#define HTS_HYPERTEXT_DEFAULT_MIME "text/html" -#define is_hypertext_mime(a) \ - ( (strfield2((a),"text/html")!=0)\ - || (strfield2((a),"application/x-javascript")!=0) \ - || (strfield2((a),"text/css")!=0) \ - /*|| (strfield2((a),"text/vnd.wap.wml")!=0)*/ \ - || (strfield2((a),"image/svg+xml")!=0) \ - || (strfield2((a),"image/svg-xml")!=0) \ - /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\ - ) +// caractère maj +#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') ) -#define may_be_hypertext_mime(a) \ - (\ - (strfield2((a),"audio/x-pn-realaudio")!=0) \ - ) +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE -// caractère maj -#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') ) // functions #ifdef _WIN32 @@ -112,10 +105,15 @@ extern "C" { typedef void (*t_abortLog)(char* msg, char* file, int line); extern HTSEXT_API t_abortLog abortLog__; #define abortLog(a) abortLog__(a, __FILE__, __LINE__) +#define _ , +#ifndef _WIN32_WCE #define abortLogFmt(a) do { \ FILE* fp = fopen("CRASH.TXT", "wb"); \ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("\\Temp\\CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("\\CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("CRASH.TXT", "wb"); \ if (fp) { \ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \ fprintf(fp, "Reason:\r\n"); \ @@ -125,22 +123,12 @@ extern HTSEXT_API t_abortLog abortLog__; fclose(fp); \ } \ } while(0) - - -#define _ , +#else #define abortLogFmt(a) do { \ - FILE* fp = fopen("CRASH.TXT", "wb"); \ - if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \ - if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \ - if (fp) { \ - fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \ - fprintf(fp, "Reason:\r\n"); \ - fprintf(fp, a); \ - fprintf(fp, "\r\n"); \ - fflush(fp); \ - fclose(fp); \ - } \ + XCEShowMessageA("HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\nReason:\r\n%s\r\n", __LINE__, a); \ } while(0) +#endif + #define assertf(exp) do { \ if (! ( exp ) ) { \ abortLog("assert failed: " #exp); \ @@ -167,17 +155,20 @@ extern HTSEXT_API t_abortLog abortLog__; #define malloct(A) malloc(A) #define calloct(A,B) calloc((A), (B)) #define freet(A) do { assertnf((A) != NULL); if ((A) != NULL) { free(A); (A) = NULL; } } while(0) +#define strdupt(A) strdup(A) #define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) ) #define memcpybuff(A, B, N) memcpy((A), (B), (N)) #else /* debug version */ #define malloct(A) hts_malloc(A) #define calloct(A,B) hts_calloc(A,B) +#define strdupt(A) hts_strdup(A) #define freet(A) do { hts_free(A); (A) = NULL; } while(0) #define realloct(A,B) hts_realloc(A,B) void hts_freeall(); void* hts_malloc (size_t); void* hts_calloc(size_t,size_t); +char* hts_strdup(char*); void* hts_xmalloc(size_t,size_t); void hts_free (void*); void* hts_realloc (void*,size_t); @@ -379,9 +370,10 @@ extern HTSEXT_API int htsMemoryFastXfr; #endif +#endif #ifdef __cplusplus - }; +} #endif #endif diff --git a/src/htsbasenet.h b/src/htsbasenet.h index 71ac9c9..f2a6c53 100644 --- a/src/htsbasenet.h +++ b/src/htsbasenet.h @@ -41,15 +41,23 @@ Please visit our Website: http://www.httrack.com #if HTS_WIN #if HTS_INET6==0 - #include <winsock.h> + #include <winsock2.h> #else + +#ifndef _WIN32_WCE #undef HTS_USESCOPEID #define WIN32_LEAN_AND_MEAN #include <winsock2.h> #include <ws2tcpip.h> #include <tpipv6.h> +#else + #include <winsock2.h> + #include <socket.h> +#endif + #endif - typedef SOCKET T_SOC; + +typedef SOCKET T_SOC; typedef struct hostent FAR t_hostent; #else @@ -67,9 +75,6 @@ Please visit our Website: http://www.httrack.com */ #ifndef HTS_OPENSSL_H_INCLUDED #define HTS_OPENSSL_H_INCLUDED -#ifdef __cplusplus -extern "C" { -#endif /* #include <openssl/ssl.h> @@ -77,6 +82,9 @@ extern "C" { #include <openssl/err.h> */ +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + /* OpenSSL definitions */ #define SSL_shutdown hts_ptrfunc_SSL_shutdown #define SSL_free hts_ptrfunc_SSL_free @@ -96,6 +104,9 @@ extern "C" { #define ERR_error_string hts_ptrfunc_ERR_error_string #define SSL_load_error_strings hts_ptrfunc_SSL_load_error_strings #define SSL_CTX_ctrl hts_ptrfunc_SSL_CTX_ctrl + +#endif + /* */ typedef void SSL_CTX; typedef void* SSL; @@ -118,6 +129,10 @@ typedef SSL_CTX * (*t_SSL_CTX_new)(SSL_METHOD *method); typedef char * (*t_ERR_error_string)(unsigned long e, char *buf); typedef void (*t_SSL_load_error_strings)(void); typedef long (*t_SSL_CTX_ctrl)(SSL_CTX *ctx, int cmd, long larg, char *parg); + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + extern int SSL_is_available; extern t_SSL_shutdown SSL_shutdown; extern t_SSL_free SSL_free; @@ -137,6 +152,9 @@ extern t_SSL_CTX_new SSL_CTX_new; extern t_ERR_error_string ERR_error_string; extern t_SSL_load_error_strings SSL_load_error_strings; extern t_SSL_CTX_ctrl SSL_CTX_ctrl; + +#endif + /* From /usr/include/openssl/ssl.h */ @@ -154,9 +172,6 @@ From /usr/include/openssl/ssl.h SSL_CTX_ctrl(ctx,SSL_CTRL_OPTIONS,op,NULL) //#include <openssl/bio.h> -#ifdef __cplusplus - }; -#endif /* OpenSSL structure */ extern SSL_CTX *openssl_ctx; diff --git a/src/htsbauth.c b/src/htsbauth.c index 23a22af..cdc7f1c 100644 --- a/src/htsbauth.c +++ b/src/htsbauth.c @@ -35,15 +35,14 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE #include "htsbauth.h" /* specific definitions */ #include "htsglobal.h" #include "htslib.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include "htsnostatic.h" @@ -171,17 +170,17 @@ char* cookie_nextfield(char* a) { // lire également (Windows seulement) les *@*.txt (cookies IE copiés) // !=0 : erreur int cookie_load(t_cookie* cookie,char* fpath,char* name) { - cookie->data[0]='\0'; + // cookie->data[0]='\0'; // Fusionner d'abord les éventuels cookies IE #if HTS_WIN { - WIN32_FIND_DATA find; + WIN32_FIND_DATAA find; HANDLE h; char pth[MAX_PATH + 32]; strcpybuff(pth,fpath); strcatbuff(pth,"*@*.txt"); - h = FindFirstFile(pth,&find); + h = FindFirstFileA((char*)pth,&find); if (h != INVALID_HANDLE_VALUE) { do { if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )) @@ -191,19 +190,33 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { char cook_name[256]; char cook_value[1000]; char domainpathpath[512]; + char dummy[512]; // char domain[256]; // domaine cookie (.netscape.com) char path[256]; // chemin (/) int cookie_merged=0; - linput(fp,cook_name,250); - if (!feof(fp)) { - linput(fp,cook_value,250); - if ( (!feof(fp)) && (strnotempty(cook_value)) ) { - linput(fp,domainpathpath,500); - if (strnotempty(domainpathpath)) { - if (ident_url_absolute(domainpathpath,domain,path)>=0) { - cookie_add(cookie,cook_name,cook_value,domain,path); - cookie_merged=1; + // + // Read all cookies + while( ! feof(fp) ) { + cook_name[0] = cook_value[0] = domainpathpath[0] + = dummy[0] = domain[0] = path[0] = '\0'; + linput(fp,cook_name,250); + if ( ! feof(fp) ) { + linput(fp,cook_value,250); + if ( ! feof(fp) ) { + int i; + linput(fp,domainpathpath,500); + /* Read 6 other useless values */ + for(i = 0 ; ! feof(fp) && i < 6 ; i++) { + linput(fp,dummy,500); + } + if (strnotempty(cook_name) + && strnotempty(cook_value) + && strnotempty(domainpathpath)) { + if (ident_url_absolute(domainpathpath,domain,path)>=0) { + cookie_add(cookie,cook_name,cook_value,domain,path); + cookie_merged=1; + } } } } @@ -213,7 +226,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { remove(fconcat(fpath,find.cFileName)); } // if fp } - } while(FindNextFile(h,&find)); + } while(FindNextFileA(h,&find)); FindClose(h); } } @@ -223,7 +236,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { { FILE* fp = fopen(fconcat(fpath,name),"rb"); if (fp) { - char line[8192]; + char BIGSTK line[8192]; while( (!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) { rawlinput(fp,line,8100); if (strnotempty(line)) { @@ -232,7 +245,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { char domain[256]; // domaine cookie (.netscape.com) char path[256]; // chemin (/) char cook_name[256]; // nom cookie (MYCOOK) - char cook_value[8192]; // valeur (ID=toto,S=1234) + char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234) strcpybuff(domain,cookie_get(line,0)); // host strcpybuff(path,cookie_get(line,2)); // path strcpybuff(cook_name,cookie_get(line,5)); // name @@ -256,7 +269,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { // !=0 : erreur int cookie_save(t_cookie* cookie,char* name) { if (strnotempty(cookie->data)) { - char line[8192]; + char BIGSTK line[8192]; FILE* fp = fopen(fconv(name),"wb"); if (fp) { char* a=cookie->data; diff --git a/src/htsbauth.h b/src/htsbauth.h index d361d83..4066ece 100644 --- a/src/htsbauth.h +++ b/src/htsbauth.h @@ -48,12 +48,16 @@ typedef struct bauth_chain { // buffer pour les cookies et authentification -typedef struct { +typedef struct t_cookie { int max_len; char data[32768]; bauth_chain auth; } t_cookie; + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + // cookies int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path); int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path); @@ -70,5 +74,6 @@ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth); char* bauth_check(t_cookie* cookie,char* adr,char* fil); char* bauth_prefix(char* adr,char* fil); +#endif #endif diff --git a/src/htscache.c b/src/htscache.c index b90fa67..aa9a6c8 100644 --- a/src/htscache.c +++ b/src/htscache.c @@ -35,15 +35,19 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htscache.h" /* specific definitions */ #include "htsbase.h" #include "htsbasenet.h" #include "htsmd5.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +#include <time.h> + +#include "htszlib.h" + #include "htsnostatic.h" /* END specific definitions */ @@ -116,10 +120,15 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // ---stockage en cache--- // stocker dans le cache? if (opt->cache) { - if (cache->dat!=NULL) { + if (cache_writable(cache)) { // c'est le seul endroit ou l'on ajoute des elements dans le cache (fichier entier ou header) // on stocke tout fichier "ok", mais également les réponses 404,301,302... - if ((r->statuscode==200) /* stocker réponse standard, plus */ + if ( +#if 1 + r->statuscode > 0 +#else + /* We don't store 5XX errors, because it might be a server problem */ + (r->statuscode==200) /* stocker réponse standard, plus */ || (r->statuscode==204) /* no content */ || (r->statuscode==301) /* moved perm */ || (r->statuscode==302) /* moved temp */ @@ -129,13 +138,33 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* || (r->statuscode==403) /* unauthorized */ || (r->statuscode==404) /* not found */ || (r->statuscode==410) /* gone */ +#endif ) { /* ne pas stocker si la page générée est une erreur */ if (!r->is_file) { // stocker fichiers (et robots.txt) - if ( (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) { + if ( url_save == NULL || (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) { // ajouter le fichier au cache - cache_add(*r,url_adr,url_fil,url_save,cache->ndx,cache->dat,opt->all_in_cache); + cache_add(cache,*r,url_adr,url_fil,url_save,opt->all_in_cache); + // + // store a reference NOT to redo the same test zillions of times! + // (problem reported by Lars Clausen) + // we just store statuscode + location (if any) + if (url_save == NULL && r->statuscode / 100 >= 3) { + // cached "fast" header doesn't uet exists + if (inthash_read((inthash)cache->cached_tests, concat(url_adr, url_fil), NULL) == 0) { + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + sprintf(tempo, "%d", (int)r->statuscode); + if (r->location != NULL && r->location[0] != '\0') { + strcatbuff(tempo, "\n"); + strcatbuff(tempo, r->location); + } + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode); + } + inthash_add((inthash)cache->cached_tests, concat(url_adr, url_fil), (long int)strdupt(tempo)); + } + } } } } @@ -145,13 +174,222 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* } + +#if 01 + +/* test only - to be removed */ + +#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \ + if ( (value != NULL) && (value)[0] != '\0') { \ + sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \ + (headersSize) += (int) strlen(headers + headersSize); \ + } \ +} while(0) +#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \ + if ( (value != 0) ) { \ + sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \ + (headersSize) += (int) strlen(headers + headersSize); \ + } \ +} while(0) +#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \ + sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \ + (headersSize) += (int) strlen(headers + headersSize); \ +} while(0) + +struct cache_back_zip_entry { + unsigned long int hdrPos; + unsigned long int size; + int compressionMethod; +}; + +#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + strcpybuff(refvalue, value); \ + line[0] = '\0'; \ + } \ +} while(0) +#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + int intval = 0; \ + sscanf(value, "%d", &intval); \ + (refvalue) = intval; \ + line[0] = '\0'; \ + } \ +} while(0) + + +/* Ajout d'un fichier en cache */ +void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) { + char BIGSTK filemame[HTS_URLMAXSIZE*4]; + int dataincache=0; // put data in cache ? + char BIGSTK headers[8192]; + int headersSize = 0; + int entryBodySize = 0; + int entryFilenameSize = 0; + zip_fileinfo fi; + + // robots.txt hack + if (url_save == NULL) { + dataincache=0; // testing links + } + else { + if ( (strnotempty(url_save)==0) ) { + if (strcmp(url_fil,"/robots.txt")==0) // robots.txt + dataincache=1; + else + return; // error (except robots.txt) + } + + /* Data in cache ? */ + if (is_hypertext_mime(r.contenttype, url_fil)) + dataincache=1; + else if (all_in_cache) + dataincache=1; + } + + if (r.size < 0) // error + return; + + // data in cache + if (dataincache) { + assertf(((int) r.size) == r.size); + entryBodySize = (int) r.size; + } + + /* Fields */ + headers[0] = '\0'; + headersSize = 0; + /* */ + { + char* message; + if (strlen(r.msg) < 32) { + message = r.msg; + } else { + message = "(See X-StatusMessage)"; + } + /* 64 characters MAX for first line */ + sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r.statuscode, r.msg); + } + headersSize += (int) strlen(headers + headersSize); + /* Second line MUST ALWAYS be X-In-Cache */ + ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", dataincache); + ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r.statuscode); + ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r.msg); + ZIP_FIELD_INT(headers, headersSize, "X-Size", r.size); // size + ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r.contenttype); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r.charset); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r.lastmodified); // last-modified + ZIP_FIELD_STRING(headers, headersSize, "Etag", r.etag); // Etag + ZIP_FIELD_STRING(headers, headersSize, "Location", r.location); // 'location' pour moved + ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r.cdispo); // Content-disposition + ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address + ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename + ZIP_FIELD_STRING(headers, headersSize, "X-Save", url_save); // Original save filename + + entryFilenameSize = (int) ( strlen(url_adr) + strlen(url_fil)); + + /* Filename */ + if (!link_has_authority(url_adr)) { + strcpybuff(filemame, "http://"); + } else { + strcpybuff(filemame, ""); + } + strcatbuff(filemame, url_adr); + strcatbuff(filemame, url_fil); + + /* Time */ + memset(&fi, 0, sizeof(fi)); + if (r.lastmodified[0] != '\0') { + struct tm* tm_s=convert_time_rfc822(r.lastmodified); + if (tm_s) { + fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec; + fi.tmz_date.tm_min = (uInt) tm_s->tm_min; + fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour; + fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday; + fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon; + fi.tmz_date.tm_year = (uInt) tm_s->tm_year; + } + } + + /* Open file - NOTE: headers in "comment" */ + if (zipOpenNewFileInZip((zipFile) cache->zipOutput, + filemame, + &fi, + /* + Store headers in realtime in the local file directory as extra field + In case of crash, we'll be able to recover the whole ZIP file by rescanning it + */ + headers, + (uInt) strlen(headers), + NULL, + 0, + NULL, /* comment */ + Z_DEFLATED, + Z_DEFAULT_COMPRESSION) != Z_OK) + { + int zip_disk_write_failed = 0; + assertf(zip_disk_write_failed); + } + + /* Write data in cache */ + if (dataincache) { + if (r.is_write == 0) { + if (r.size > 0 && r.adr != NULL) { + if (zipWriteInFileInZip((zipFile) cache->zipOutput, r.adr, (int) r.size) != Z_OK) { + int zip_disk_write_failed = 0; + assertf(zip_disk_write_failed); + } + } + } else { + FILE* fp; + // On recopie le fichier.. + LLint file_size=fsize(fconv(url_save)); + if (file_size>=0) { + fp=fopen(fconv(url_save),"rb"); + if (fp!=NULL) { + char BIGSTK buff[32768]; + INTsys nl; + do { + nl=fread(buff,1,32768,fp); + if (nl>0) { + if (zipWriteInFileInZip((zipFile) cache->zipOutput, buff, (int) nl) != Z_OK) { + int zip_disk_write_failed = 0; + assertf(zip_disk_write_failed); + } + } + } while(nl>0); + fclose(fp); + } else { + /* Err FIXME - lost file */ + } + } /* Empty files are OK */ + } + } + + /* Close */ + if (zipCloseFileInZip((zipFile) cache->zipOutput) != Z_OK) { + int zip_disk_write_failed = 0; + assertf(zip_disk_write_failed); + } + + /* Flush */ + if (zipFlush((zipFile) cache->zipOutput) != 0) { + int zip_disk_write_failed = 0; + assertf(zip_disk_write_failed); + } +} + +#else + /* Ajout d'un fichier en cache */ -void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache) { +void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) { int pos; char s[256]; - char buff[HTS_URLMAXSIZE*4]; + char BIGSTK buff[HTS_URLMAXSIZE*4]; int ok=1; int dataincache=0; // donnée en cache? + FILE* cache_ndx = cache->ndx; + FILE* cache_dat = cache->dat; /*char digest[32+2];*/ /*digest[0]='\0';*/ @@ -159,6 +397,8 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n if ( (strnotempty(url_save)==0) ) { if (strcmp(url_fil,"/robots.txt")==0) // robots.txt dataincache=1; + else if (strcmp(url_fil,"/test")==0) // testing links + dataincache=0; else return; // erreur (sauf robots.txt) } @@ -167,7 +407,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n return; // refusé.. // Mettre les *donées* en cache ? - if (is_hypertext_mime(r.contenttype)) // html, mise en cache des données et + if (is_hypertext_mime(r.contenttype, url_fil)) // html, mise en cache des données et dataincache=1; // pas uniquement de l'en tête else if (all_in_cache) dataincache=1; // forcer tout en cache @@ -209,6 +449,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n && cache_wstr(cache_dat,url_adr) != -1 // Original address && cache_wstr(cache_dat,url_fil) != -1 // Original URI filename && cache_wstr(cache_dat,url_save) != -1 // Original save filename + && cache_wstr(cache_dat,r.headers) != -1 // Full HTTP Headers && cache_wstr(cache_dat,"HTS") != -1 // end of header ) { ok=1; /* ok */ @@ -238,7 +479,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n if (cache_wLLint(cache_dat,file_size)!=-1) { fp=fopen(fconv(url_save),"rb"); if (fp!=NULL) { - char buff[32768]; + char BIGSTK buff[32768]; INTsys nl; do { nl=fread(buff,1,32768,fp); @@ -275,6 +516,8 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n fflush(cache_dat); fflush(cache_ndx); } +#endif + htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { return cache_readex(opt,cache,adr,fil,save,location,NULL,0); @@ -284,19 +527,274 @@ htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* s return cache_readex(opt,cache,adr,fil,save,location,NULL,1); } +static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, + char* return_save, int readonly); + +static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, + char* return_save, int readonly); + // lecture d'un fichier dans le cache // si save==null alors test unqiquement htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, char* return_save, int readonly) { + if (cache->zipInput != NULL) { + return cache_readex_new(opt, cache, adr, fil, save, location, return_save, readonly); + } else { + return cache_readex_old(opt, cache, adr, fil, save, location, return_save, readonly); + } +} + +// lecture d'un fichier dans le cache +// si save==null alors test unqiquement +static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, + char* return_save, int readonly) { + char BIGSTK location_default[HTS_URLMAXSIZE*2]; + char BIGSTK buff[HTS_URLMAXSIZE*2]; + char BIGSTK previous_save[HTS_URLMAXSIZE*2]; + long int hash_pos; + int hash_pos_return; + htsblk r; + memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; + + if (location) { + r.location = location; + } else { + r.location = location_default; + } + strcpybuff(r.location, ""); + strcpybuff(buff, adr); + strcatbuff(buff,fil); + hash_pos_return = inthash_read((inthash)cache->hashtable, buff, (long int*)&hash_pos); + /* avoid errors on data entries */ + if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') { +#if HTS_FAST_CACHE + hash_pos_return = 0; +#else + a = NULL; +#endif + } + + if (hash_pos_return) { + uLong posInZip; + if (hash_pos > 0) { + posInZip = (uLong) hash_pos; + } else { + posInZip = (uLong) -hash_pos; + } + if (unzSetOffset((unzFile) cache->zipInput, posInZip) == Z_OK) { + /* Read header (Max 8KiB) */ + if (unzOpenCurrentFile((unzFile) cache->zipInput) == Z_OK) { + char BIGSTK headerBuff[8192 + 2]; + int readSizeHeader; + int totalHeader = 0; + int dataincache = 0; + + /* For BIG comments */ + headerBuff[0] + = headerBuff[sizeof(headerBuff) - 1] + = headerBuff[sizeof(headerBuff) - 2] + = headerBuff[sizeof(headerBuff) - 3] = '\0'; + + if ( (readSizeHeader = unzGetLocalExtrafield((unzFile) cache->zipInput, headerBuff, sizeof(headerBuff) - 2)) > 0) + /*if (unzGetCurrentFileInfo((unzFile) cache->zipInput, NULL, + NULL, 0, NULL, 0, headerBuff, sizeof(headerBuff) - 2) == Z_OK ) */ + { + int offset = 0; + char BIGSTK line[HTS_URLMAXSIZE + 2]; + int lineEof = 0; + /*readSizeHeader = (int) strlen(headerBuff);*/ + headerBuff[readSizeHeader] = '\0'; + do { + char* value; + line[0] = '\0'; + offset += binput(headerBuff + offset, line, sizeof(line) - 2); + if (line[0] == '\0') { + lineEof = 1; + } + value = strchr(line, ':'); + if (value != NULL) { + *value++ = '\0'; + if (*value == ' ' || *value == '\t') value++; + ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache); + ZIP_READFIELD_INT(line, value, "X-Statuscode", r.statuscode); + ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r.msg); // msg + ZIP_READFIELD_INT(line, value, "X-Size", r.size); // size + ZIP_READFIELD_STRING(line, value, "Content-Type", r.contenttype); // contenttype + ZIP_READFIELD_STRING(line, value, "X-Charset", r.charset); // contenttype + ZIP_READFIELD_STRING(line, value, "Last-Modified", r.lastmodified); // last-modified + ZIP_READFIELD_STRING(line, value, "Etag", r.etag); // Etag + ZIP_READFIELD_STRING(line, value, "Location", r.location); // 'location' pour moved + ZIP_READFIELD_STRING(line, value, "Content-Disposition", r.cdispo); // Content-disposition + ZIP_READFIELD_STRING(line, value, "X-Addr", previous_save); // Original address + ZIP_READFIELD_STRING(line, value, "X-Fil", previous_save); // Original URI filename + ZIP_READFIELD_STRING(line, value, "X-Save", previous_save); // Original save filename + } + } while(offset < readSizeHeader && !lineEof); + totalHeader = offset; + + /* Complete fields */ + r.totalsize=r.size; + r.adr=NULL; + r.out=NULL; + r.fp=NULL; + + if (save != NULL) { /* ne pas lire uniquement header */ + int ok = 0; + +#if HTS_DIRECTDISK + // Court-circuit: + // Peut-on stocker le fichier directement sur disque? + if (ok) { + if (r.msg[0] == '\0') { + strcpybuff(r.msg,"Cache Read Error : Unexpected error"); + } + } + else if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement + + r.is_write=1; // écrire + if (fexist(fconv(save))) { // un fichier existe déja + //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) + ok=1; // plus rien à faire + filenote(save,NULL); // noter comme connu + } + + if (!dataincache && !ok) { // Pas de donnée en cache et fichier introuvable : erreur! + if (opt->norecatch) { + filecreateempty(save); + // + r.statuscode=-1; + strcpybuff(r.msg,"File deleted by user not recaught"); + ok=1; // ne pas récupérer (et pas d'erreur) + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Previous cache file not found"); + ok=1; // ne pas récupérer + } + } + + if (!ok) { + r.out=filecreate(save); +#if HDEBUG + printf("direct-disk: %s\n",save); +#endif + if (r.out!=NULL) { + char BIGSTK buff[32768+4]; + LLint size = r.size; + if (size > 0) { + INTsys nl; + do { + nl = unzReadCurrentFile((unzFile) cache->zipInput, buff, (int)minimum(size, 32768)); + if (nl>0) { + size-=nl; + if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur + r.statuscode=-1; + strcpybuff(r.msg,"Cache Read Error : Read To Disk"); + } + } + } while((nl>0) && (size>0) && (r.statuscode!=-1)); + } + + fclose(r.out); + r.out=NULL; +#if HTS_WIN==0 + chmod(save,HTS_ACCESS_FILE); +#endif + //xxusercommand(opt,0,NULL,fconv(save), adr, fil); + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Cache Write Error : Unable to Create File"); + //printf("%s\n",save); + } + } + + } else +#endif + { // lire en mémoire + + if (!dataincache) { + if (strnotempty(save)) { // Pas de donnée en cache, bizarre car html!!! + r.statuscode=-1; + strcpybuff(r.msg,"Previous cache file not found (2)"); + } else { /* Read in memory from cache */ + if (strnotempty(return_save) && fexist(return_save)) { + FILE* fp = fopen(fconv(return_save), "rb"); + if (fp != NULL) { + r.adr=(char*) malloct((INTsys)r.size + 4); + if (adr != NULL) { + if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) { + r.statuscode=-1; + strcpybuff(r.msg,"Read error in cache disk data"); + } + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Read error (memory exhausted) from cache"); + } + fclose(fp); + } + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Cache file not found on disk"); + } + } + } else { + // lire fichier (d'un coup) + r.adr=(char*) malloct((INTsys)r.size+4); + if (r.adr!=NULL) { + if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (INTsys)r.size) != r.size) { // erreur + freet(r.adr); + r.adr=NULL; + r.statuscode=-1; + strcpybuff(r.msg,"Cache Read Error : Read Data"); + } else + *(r.adr+r.size)='\0'; + //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode); + } else { // erreur + r.statuscode=-1; + strcpybuff(r.msg,"Cache Memory Error"); + } + } + } + } // si save==null, ne rien charger (juste en tête) + + + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Cache Read Error : Read Header Data"); + } + unzCloseCurrentFile((unzFile) cache->zipInput); + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Cache Read Error : Open File"); + } + + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Cache Read Error : Bad Offset"); + } + } else { + r.statuscode=-1; + strcpybuff(r.msg,"File Cache Entry Not Found"); + } + if (!location) { /* don't export internal buffer */ + r.location = NULL; + } + return r; +} + + +// lecture d'un fichier dans le cache +// si save==null alors test unqiquement +static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, + char* return_save, int readonly) { #if HTS_FAST_CACHE long int hash_pos; int hash_pos_return; #else char* a; #endif - char buff[HTS_URLMAXSIZE*2]; - char location_default[HTS_URLMAXSIZE*2]; - char previous_save[HTS_URLMAXSIZE*2]; + char BIGSTK buff[HTS_URLMAXSIZE*2]; + char BIGSTK location_default[HTS_URLMAXSIZE*2]; + char BIGSTK previous_save[HTS_URLMAXSIZE*2]; htsblk r; int ok=0; int header_only=0; @@ -388,6 +886,9 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa strcpybuff(return_save, previous_save); } } + if (cache->version >= 5) { + r.headers = cache_rstr_addr(cache->olddat); + } // cache_rstr(cache->olddat,check); if (strcmp(check,"HTS")==0) { /* intégrité OK */ @@ -425,7 +926,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa #if HTS_DIRECTDISK // Court-circuit: // Peut-on stocker le fichier directement sur disque? - if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype) && strnotempty(save)) { // pas HTML, écrire sur disk directement + if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement int ok=0; r.is_write=1; // écrire @@ -457,7 +958,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa printf("direct-disk: %s\n",save); #endif if (r.out!=NULL) { - char buff[32768+4]; + char BIGSTK buff[32768+4]; LLint size = r.size; if (size > 0) { INTsys nl; @@ -572,7 +1073,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa /* 0 if failed */ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) { if (cache_dat) { - char buff[HTS_URLMAXSIZE*4]; + char BIGSTK buff[HTS_URLMAXSIZE*4]; char s[256]; int pos; fflush(cache_dat); fflush(cache_ndx); @@ -599,7 +1100,7 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) { #if HTS_FAST_CACHE if (cache->hashtable) { - char buff[HTS_URLMAXSIZE*4]; + char BIGSTK buff[HTS_URLMAXSIZE*4]; long int pos; strcpybuff(buff,str1); strcatbuff(buff,str2); if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) { @@ -651,7 +1152,29 @@ void cache_init(cache_back* cache,httrackp* opt) { #else mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER); #endif - if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if ((fexist(fconcat(opt->path_log,"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer + /* Previous cache from the previous cache version */ +#if 0 + /* No.. reuse with old httrack releases! */ + if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) + remove(fconcat(opt->path_log,"hts-cache/old.dat")); + if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) + remove(fconcat(opt->path_log,"hts-cache/old.ndx")); +#endif + /* Previous cache version */ + if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); + rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); + } + + /* Remove OLD cache */ + if (fexist(fconcat(opt->path_log,"hts-cache/old.zip"))) + remove(fconcat(opt->path_log,"hts-cache/old.zip")); + + /* Rename */ + rename(fconcat(opt->path_log,"hts-cache/new.zip"),fconcat(opt->path_log,"hts-cache/old.zip")); + } + else if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer #if DEBUGCA printf("work with former cache\n"); #endif @@ -677,6 +1200,136 @@ void cache_init(cache_back* cache,httrackp* opt) { if ( ( !cache->ro && + fsize(fconcat(opt->path_log,"hts-cache/old.zip")) > 0 + ) + || + ( + cache->ro && + fsize(fconcat(opt->path_log,"hts-cache/new.zip")) > 0 + ) + ) + { + if (!cache->ro) { + cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/old.zip")); + } else { + cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/new.zip")); + } + + // Corrupted ZIP file ? Try to repair! + if (cache->zipInput == NULL && !cache->ro) { + char* name; + uLong repaired = 0; + uLong repairedBytes = 0; + if (!cache->ro) { + name = fconcat(opt->path_log,"hts-cache/old.zip"); + } else { + name = fconcat(opt->path_log,"hts-cache/new.zip"); + } + if (opt->log) { + fspc(opt->log,"warning"); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF); + fflush(opt->log); + } + if (unzRepair(name, + fconcat(opt->path_log,"hts-cache/repair.zip"), + fconcat(opt->path_log,"hts-cache/repair.tmp"), + &repaired, &repairedBytes + ) == Z_OK) { + unlink(name); + rename(fconcat(opt->path_log,"hts-cache/repair.zip"), name); + cache->zipInput = unzOpen(name); + if (opt->log) { + fspc(opt->log,"warning"); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF, + (int) repairedBytes, (int) repaired); + fflush(opt->log); + } + } else { + if (opt->log) { + fspc(opt->log,"warning"); fprintf(opt->log,"Cache: could not repair the cache"LF); + fflush(opt->log); + } + } + } + + // Opened ? + if (cache->zipInput!=NULL) { + + /* Ready directory entries */ + if (unzGoToFirstFile((unzFile) cache->zipInput) == Z_OK) { + char comment[128]; + char BIGSTK filename[HTS_URLMAXSIZE * 4]; + int entries = 0; + memset(comment, 0, sizeof(comment)); // for truncated reads + do { + int readSizeHeader = 0; + filename[0] = '\0'; + comment[0] = '\0'; + if (unzOpenCurrentFile((unzFile) cache->zipInput) == Z_OK) { + if ( + (readSizeHeader = unzGetLocalExtrafield((unzFile) cache->zipInput, comment, sizeof(comment) - 2)) > 0 + && + unzGetCurrentFileInfo((unzFile) cache->zipInput, NULL, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK + ) + { + long int pos = (long int) unzGetOffset((unzFile) cache->zipInput); + assertf(readSizeHeader < sizeof(comment)); + comment[readSizeHeader] = '\0'; + entries++; + if (pos > 0) { + int dataincache = 0; // data in cache ? + char* filenameIndex = filename; + if (strfield(filenameIndex, "http://")) { + filenameIndex += 7; + } + if (comment[0] != '\0') { + int maxLine = 2; + char* a = comment; + while(*a && maxLine-- > 0) { // parse only few first lines + char BIGSTK line[1024]; + line[0] = '\0'; + a+=binput(a, line, sizeof(line) - 2); + if (strfield(line, "X-In-Cache:")) { + if (strfield2(line, "X-In-Cache: 1")) { + dataincache = 1; + } else { + dataincache = 0; + } + break; + } + } + } + if (dataincache) + inthash_add((inthash)cache->hashtable, filenameIndex, pos); + else + inthash_add((inthash)cache->hashtable, filenameIndex, -pos); + } else { + if (opt->log!=NULL) { + fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries); + } + } + } else { + if (opt->log!=NULL) { + fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); + } + } + unzCloseCurrentFile((unzFile) cache->zipInput); + } else { + if (opt->log!=NULL) { + fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); + } + } + } while( unzGoToNextFile((unzFile) cache->zipInput) == Z_OK ); + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries); + } + opt->is_update=1; // signaler comme update + + } + + } + + } else if ( + ( + !cache->ro && fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0 ) || @@ -724,7 +1377,7 @@ void cache_init(cache_back* cache,httrackp* opt) { if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x cache->version=(int)(firstline[8]-'0'); // cache 1.x - if (cache->version <= 4) { + if (cache->version <= 5) { a+=cache_brstr(a,firstline); strcpybuff(cache->lastmodified,firstline); } else { @@ -762,7 +1415,7 @@ void cache_init(cache_back* cache,httrackp* opt) { /* Create hash table for the cache (MUCH FASTER!) */ #if HTS_FAST_CACHE if (cache->use) { - char line[HTS_URLMAXSIZE*2]; + char BIGSTK line[HTS_URLMAXSIZE*2]; char linepos[256]; int pos; while ( (a!=NULL) && (a < (cache->use+buffl) ) ) { @@ -793,60 +1446,96 @@ void cache_init(cache_back* cache,httrackp* opt) { if (!cache->ro) { // ouvrir caches actuels structcheck(fconcat(opt->path_log, "hts-cache/")); - cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb"); - cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb"); - // les deux doivent être ouvrables - if ((cache->dat==NULL) && (cache->ndx!=NULL)) { - fclose(cache->ndx); - cache->ndx=NULL; - } - if ((cache->dat!=NULL) && (cache->ndx==NULL)) { - fclose(cache->dat); - cache->dat=NULL; - } - if (cache->ndx!=NULL) { - char s[256]; - - cache_wstr(cache->dat,"CACHE-1.4"); - fflush(cache->dat); - cache_wstr(cache->ndx,"CACHE-1.4"); - fflush(cache->ndx); - // - time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since.. - cache_wstr(cache->ndx,s); - fflush(cache->ndx); // un petit fflush au cas où - - // supprimer old.lst - if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) - remove(fconcat(opt->path_log,"hts-cache/old.lst")); - // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) - rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); - // ouvrir - cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); - { - filecreate_params tmp; - strcpybuff(tmp.path,opt->path_html); // chemin - tmp.lst=cache->lst; // fichier lst - filenote("",&tmp); // initialiser filecreate + if (1) { + /* Create ZIP file cache */ + cache->zipOutput = (void*) zipOpen(fconcat(opt->path_log,"hts-cache/new.zip"), 0); + + if (cache->zipOutput != NULL) { + // supprimer old.lst + if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) + remove(fconcat(opt->path_log,"hts-cache/old.lst")); + // renommer + if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) + rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); + // ouvrir + cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); + { + filecreate_params tmp; + strcpybuff(tmp.path,opt->path_html); // chemin + tmp.lst=cache->lst; // fichier lst + filenote("",&tmp); // initialiser filecreate + } + + // supprimer old.txt + if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) + remove(fconcat(opt->path_log,"hts-cache/old.txt")); + // renommer + if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) + rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + // ouvrir + cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + if (cache->txt) { + fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); + fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); + } } - - // supprimer old.txt - if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) - remove(fconcat(opt->path_log,"hts-cache/old.txt")); - // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) - rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); - // ouvrir - cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); - if (cache->txt) { - fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); - fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); + } else { + cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb"); + cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb"); + // les deux doivent être ouvrables + if ((cache->dat==NULL) && (cache->ndx!=NULL)) { + fclose(cache->ndx); + cache->ndx=NULL; + } + if ((cache->dat!=NULL) && (cache->ndx==NULL)) { + fclose(cache->dat); + cache->dat=NULL; } - // test - // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9); + if (cache->ndx!=NULL) { + char s[256]; + + cache_wstr(cache->dat,"CACHE-1.5"); + fflush(cache->dat); + cache_wstr(cache->ndx,"CACHE-1.5"); + fflush(cache->ndx); + // + time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since.. + cache_wstr(cache->ndx,s); + fflush(cache->ndx); // un petit fflush au cas où + + // supprimer old.lst + if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) + remove(fconcat(opt->path_log,"hts-cache/old.lst")); + // renommer + if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) + rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); + // ouvrir + cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); + { + filecreate_params tmp; + strcpybuff(tmp.path,opt->path_html); // chemin + tmp.lst=cache->lst; // fichier lst + filenote("",&tmp); // initialiser filecreate + } + + // supprimer old.txt + if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) + remove(fconcat(opt->path_log,"hts-cache/old.txt")); + // renommer + if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) + rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + // ouvrir + cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + if (cache->txt) { + fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); + fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); + } + + // test + // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9); + } } } else { @@ -906,12 +1595,11 @@ char* readfile_or(char* fil,char* defaultdata) { int cache_wstr(FILE* fp,char* s) { INTsys i; char buff[256+4]; - i=strlen(s); + i = s != NULL ? strlen(s) : 0; sprintf(buff,INTsysP "\n",i); if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff)) return -1; - if (i>0) - if ((INTsys)fwrite(s,1,i,fp) != i) + if (i > 0 && (INTsys)fwrite(s,1,i,fp) != i) return -1; return 0; } @@ -922,10 +1610,34 @@ void cache_rstr(FILE* fp,char* s) { sscanf(buff,INTsysP,&i); if (i < 0 || i > 32768) /* error, something nasty happened */ i=0; - if (i>0) - fread(s,1,i,fp); + if (i>0) { + if ((int) fread(s,1,i,fp) != i) { + int fread_cache_failed = 0; + assertf(fread_cache_failed); + } + } *(s+i)='\0'; } +char* cache_rstr_addr(FILE* fp) { + INTsys i; + char* addr = NULL; + char buff[256+4]; + linput(fp,buff,256); + sscanf(buff,INTsysP,&i); + if (i < 0 || i > 32768) /* error, something nasty happened */ + i=0; + if (i > 0) { + addr = malloct(i + 1); + if (addr != NULL) { + if ((int) fread(addr,1,i,fp) != i) { + int fread_cache_failed = 0; + assertf(fread_cache_failed); + } + *(addr+i)='\0'; + } + } + return addr; +} int cache_brstr(char* adr,char* s) { int i; int off; diff --git a/src/htscache.h b/src/htscache.h index ef897f1..51dd439 100644 --- a/src/htscache.h +++ b/src/htscache.h @@ -42,9 +42,12 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + // cache void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save); -void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache); +void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache); htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,char* return_save,int readonly); @@ -56,6 +59,7 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* le int cache_wstr(FILE* fp,char* s); void cache_rstr(FILE* fp,char* s); +char* cache_rstr_addr(FILE* fp); int cache_brstr(char* adr,char* s); int cache_quickbrstr(char* adr,char* s); int cache_brint(char* adr,int* i); @@ -63,4 +67,7 @@ void cache_rint(FILE* fp,int* i); int cache_wint(FILE* fp,int i); void cache_rLLint(FILE* fp,LLint* i); int cache_wLLint(FILE* fp,LLint i); + +#endif + #endif diff --git a/src/htscatchurl.c b/src/htscatchurl.c index 8455ea0..3832019 100644 --- a/src/htscatchurl.c +++ b/src/htscatchurl.c @@ -34,6 +34,9 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + // Fichier intercepteur d'URL .c /* specific definitions */ @@ -41,11 +44,9 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htsnet.h" #include "htslib.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> +#ifndef _WIN32_WCE #include <fcntl.h> +#endif #if HTS_WIN #else #include <arpa/inet.h> @@ -194,8 +195,8 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { socinput(soc,line,1000); if (strnotempty(line)) { if (sscanf(line,"%s %s %s",method,url,protocol) == 3) { - char url_adr[HTS_URLMAXSIZE*2]; - char url_fil[HTS_URLMAXSIZE*2]; + char BIGSTK url_adr[HTS_URLMAXSIZE*2]; + char BIGSTK url_fil[HTS_URLMAXSIZE*2]; // méthode en majuscule int i,r=0; url_adr[0]=url_fil[0]='\0'; @@ -207,7 +208,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { // adresse du lien if (ident_url_absolute(url,url_adr,url_fil)>=0) { // Traitement des en-têtes - char loc[HTS_URLMAXSIZE*2]; + char BIGSTK loc[HTS_URLMAXSIZE*2]; htsblk blkretour; memset(&blkretour, 0, sizeof(htsblk)); // effacer blkretour.location=loc; // si non nul, contiendra l'adresse véritable en cas de moved xx diff --git a/src/htscatchurl.h b/src/htscatchurl.h index a2514ef..cec7537 100644 --- a/src/htscatchurl.h +++ b/src/htscatchurl.h @@ -41,6 +41,9 @@ Please visit our Website: http://www.httrack.com #include "htsbasenet.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + // Fonctions void socinput(T_SOC soc,char* s,int max); #ifndef HTTRACK_DEFLIB @@ -74,5 +77,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data); #endif +#endif + diff --git a/src/htscore.c b/src/htscore.c index ba1e226..ff761ef 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -34,11 +34,12 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + +#ifndef _WIN32_WCE #include <fcntl.h> +#endif #include <ctype.h> /* File defs */ @@ -60,6 +61,10 @@ Please visit our Website: http://www.httrack.com // parser #include "htsparse.h" +/* Cache */ +#include "htszlib.h" + + /* END specific definitions */ @@ -71,6 +76,8 @@ t_hts_htmlcheck_uninit hts_htmlcheck_uninit = NULL; t_hts_htmlcheck_start hts_htmlcheck_start = NULL; t_hts_htmlcheck_end hts_htmlcheck_end = NULL; t_hts_htmlcheck_chopt hts_htmlcheck_chopt = NULL; +t_hts_htmlcheck_process hts_htmlcheck_preprocess = NULL; +t_hts_htmlcheck_process hts_htmlcheck_postprocess = NULL; t_hts_htmlcheck hts_htmlcheck = NULL; t_hts_htmlcheck_query hts_htmlcheck_query = NULL; t_hts_htmlcheck_query2 hts_htmlcheck_query2 = NULL; @@ -80,11 +87,13 @@ t_hts_htmlcheck_check hts_htmlcheck_check = NULL; t_hts_htmlcheck_pause hts_htmlcheck_pause = NULL; t_hts_htmlcheck_filesave hts_htmlcheck_filesave = NULL; t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected = NULL; +t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2 = NULL; t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus = NULL; t_hts_htmlcheck_savename hts_htmlcheck_savename = NULL; t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead = NULL; t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead = NULL; +extern void set_wrappers(void); char _hts_errmsg[1100]=""; int _hts_in_html_parsing=0; @@ -201,7 +210,7 @@ hts_htmlcheck_end(); \ if (back) { \ int i; \ for(i=0;i<back_max;i++) { \ - back_delete(&opt,back,i); \ + back_delete(&opt,&cache,back,i); \ } \ freet(back); back=NULL; \ } \ @@ -209,6 +218,14 @@ hts_htmlcheck_end(); \ if (cache.use) { freet(cache.use); cache.use=NULL; } \ if (cache.dat) { fclose(cache.dat); cache.dat=NULL; } \ if (cache.ndx) { fclose(cache.ndx); cache.ndx=NULL; } \ + if (cache.zipOutput) { \ + zipClose(cache.zipOutput, "Created by HTTrack Website Copier/"HTTRACK_VERSION); \ + cache.zipOutput = NULL; \ + } \ + if (cache.zipInput) { \ + unzClose(cache.zipInput); \ + cache.zipInput = NULL; \ + } \ if (cache.olddat) { fclose(cache.olddat); cache.olddat=NULL; } \ if (cache.lst) { fclose(cache.lst); cache.lst=NULL; } \ if (cache.txt) { fclose(cache.txt); cache.txt=NULL; } \ @@ -219,9 +236,11 @@ hts_htmlcheck_end(); \ if (opt.accept_cookie) cookie_save(opt.cookie,fconcat(opt.path_log,"cookies.txt")); \ if (makeindex_fp) { fclose(makeindex_fp); makeindex_fp=NULL; } \ if (cache_hashtable) { inthash_delete(&cache_hashtable); } \ + if (cache_tests) { inthash_delete(&cache_tests); } \ if (template_header) { freet(template_header); template_header=NULL; } \ if (template_body) { freet(template_body); template_body=NULL; } \ if (template_footer) { freet(template_footer); template_footer=NULL; } \ + clearCallbacks(&opt.state.callbacks); \ /*structcheck_init(-1);*/ \ } while(0) #define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0) @@ -289,7 +308,7 @@ hash_write(hashptr,lien_tot,NORM); \ #define HT_INDEX_END do { \ if (!makeindex_done) { \ if (makeindex_fp) { \ - char tempo[1024]; \ + char BIGSTK tempo[1024]; \ if (makeindex_links == 1) { \ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \ } else \ @@ -313,13 +332,13 @@ makeindex_done=1; /* ok c'est fait */ \ // Début de httpmirror, robot // url1 peut être multiple int httpmirror(char* url1,httrackp* ptropt) { - httrackp opt = *ptropt; // structure d'options + httrackp BIGSTK opt; // structure d'options char* primary=NULL; // première page, contenant les liens à scanner int lien_tot=0; // nombre de liens pour le moment lien_url** liens=NULL; // les pointeurs sur les liens hash_struct hash; // système de hachage, accélère la recherche dans les liens hash_struct* hashptr = &hash; - t_cookie cookie; // gestion des cookies + t_cookie BIGSTK cookie; // gestion des cookies int lien_max=0; int lien_size=0; // octets restants dans buffer liens dispo char* lien_buffer=NULL; // buffer liens actuel @@ -330,7 +349,7 @@ int httpmirror(char* url1,httrackp* ptropt) { int numero_passe=0; // deux passes pour html puis images int back_max=0; // fichiers qui peuvent être en local lien_back* back=NULL; // backing en local - htsblk r; // retour de certaines fonctions + htsblk BIGSTK r; // retour de certaines fonctions TStamp lastime=0; // pour affichage infos de tmp en tmp // pour les stats, nombre de fichiers & octets écrits LLint stat_fragment=0; // pour la fragmentation @@ -346,7 +365,7 @@ int httpmirror(char* url1,httrackp* ptropt) { int makeindex_done=0; // lorsque l'index sera fait FILE* makeindex_fp=NULL; int makeindex_links=0; - char makeindex_firstlink[HTS_URLMAXSIZE*2]; + char BIGSTK makeindex_firstlink[HTS_URLMAXSIZE*2]; // statistiques (mode #Z) FILE* makestat_fp=NULL; // fichier de stats taux transfert FILE* maketrack_fp=NULL; // idem pour le tracking @@ -354,16 +373,19 @@ int httpmirror(char* url1,httrackp* ptropt) { LLint makestat_total=0; // repère du nombre d'octets transférés depuis denrière stat int makestat_lnk=0; // idem, pour le nombre de liens // - char codebase[HTS_URLMAXSIZE*2]; // base pour applet java - char base[HTS_URLMAXSIZE*2]; // base pour les autres fichiers + char BIGSTK codebase[HTS_URLMAXSIZE*2]; // base pour applet java + char BIGSTK base[HTS_URLMAXSIZE*2]; // base pour les autres fichiers // - cache_back cache; - robots_wizard robots; // gestion robots.txt + cache_back BIGSTK cache; + robots_wizard BIGSTK robots; // gestion robots.txt inthash cache_hashtable=NULL; + inthash cache_tests=NULL; int cache_hash_size=0; // char *template_header=NULL,*template_body=NULL,*template_footer=NULL; // + opt = *ptropt; + // codebase[0]='\0'; base[0]='\0'; // cookie.auth.next=NULL; @@ -444,13 +466,16 @@ int httpmirror(char* url1,httrackp* ptropt) { if (!cache_hash_size) cache_hash_size=HTS_HASH_SIZE; cache_hashtable=inthash_new(cache_hash_size); - if (cache_hashtable==NULL) { + cache_tests=inthash_new(cache_hash_size); + if (cache_hashtable==NULL || cache_tests==NULL) { printf("PANIC! : Not enough memory [%d]\n",__LINE__); filters[0]=NULL; back_max=0; // uniquement a cause du warning de XH_extuninit XH_extuninit; return 0; } + inthash_value_is_malloc(cache_tests, 1); /* malloc */ cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */ + cache.cached_tests=(void*)cache_tests; /* copy of cache_tests */ // initialiser cache DNS _hts_lockdns(-999); @@ -539,7 +564,7 @@ int httpmirror(char* url1,httrackp* ptropt) { if (joker) { // joker ou filters //char* p; - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; int type; int plus=0; // noter joker (dans b) @@ -598,7 +623,7 @@ int httpmirror(char* url1,httrackp* ptropt) { } } else { // adresse normale - char url[HTS_URLMAXSIZE*2]; + char BIGSTK url[HTS_URLMAXSIZE*2]; // prochaine adresse i=0; while((*a!=0) && (!isspace((unsigned char)*a))) { url[i++]=*a; a++; } @@ -638,7 +663,7 @@ int httpmirror(char* url1,httrackp* ptropt) { if (filelist_buff) { int filelist_ptr=0; int n=0; - char line[HTS_URLMAXSIZE*2]; + char BIGSTK line[HTS_URLMAXSIZE*2]; char* primary_ptr = primary + strlen(primary); while( filelist_ptr < filelist_sz ) { int count=binput(filelist_buff+filelist_ptr,line,HTS_URLMAXSIZE); @@ -758,6 +783,7 @@ int httpmirror(char* url1,httrackp* ptropt) { makestat_fp=fopen(fconcat(opt.path_log,"hts-stats.txt"),"wb"); if (makestat_fp != NULL) { fprintf(makestat_fp,"HTTrack statistics report, every minutes"LF LF); + fflush(makestat_fp); } } @@ -766,6 +792,7 @@ int httpmirror(char* url1,httrackp* ptropt) { maketrack_fp=fopen(fconcat(opt.path_log,"hts-track.txt"),"wb"); if (maketrack_fp != NULL) { fprintf(maketrack_fp,"HTTrack tracking report, every minutes"LF LF); + fflush(maketrack_fp); } } @@ -776,6 +803,10 @@ int httpmirror(char* url1,httrackp* ptropt) { } } + /* Send options to callback functions */ +#if HTS_ANALYSTE + hts_htmlcheck_chopt(&opt); +#endif // attendre une certaine heure.. if (opt.waittime>0) { @@ -795,6 +826,7 @@ int httpmirror(char* url1,httrackp* ptropt) { } // attendre.. + _hts_in_html_parsing=5; do { TStamp tl=0; time_t tt; @@ -828,6 +860,7 @@ int httpmirror(char* url1,httrackp* ptropt) { } #endif } while(!ok); + _hts_in_html_parsing=0; // note: recopie de plus haut // noter heure actuelle de départ en secondes @@ -854,6 +887,7 @@ int httpmirror(char* url1,httrackp* ptropt) { XH_extuninit; return 1; } + set_wrappers(); // _start() is allowed to set other wrappers #endif @@ -865,7 +899,7 @@ int httpmirror(char* url1,httrackp* ptropt) { do { int error=0; // si error alors sauter int store_errpage=0; // c'est une erreur mais on enregistre le html - char loc[HTS_URLMAXSIZE*2]; // adresse de relocation + char BIGSTK loc[HTS_URLMAXSIZE*2]; // adresse de relocation // Ici on charge le fichier (html, gif..) en mémoire // Les HTMLs sont traités (si leur priorité est suffisante) @@ -877,6 +911,9 @@ int httpmirror(char* url1,httrackp* ptropt) { memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy)); // et user-agent strcpybuff(r.req.user_agent,opt.user_agent); + strcpybuff(r.req.referer,opt.referer); + strcpybuff(r.req.from,opt.from); + strcpybuff(r.req.lang_iso,opt.lang_iso); r.req.user_agent_send=opt.user_agent_send; if (!error) { @@ -928,9 +965,9 @@ int httpmirror(char* url1,httrackp* ptropt) { Get the next link, waiting for other files, handling external callbacks */ { - char buff_err_msg[1024]; - htsmoduleStruct str; - htsmoduleStructExtended stre; + char BIGSTK buff_err_msg[1024]; + htsmoduleStruct BIGSTK str; + htsmoduleStructExtended BIGSTK stre; buff_err_msg[0] = '\0'; memset(&str, 0, sizeof(str)); memset(&stre, 0, sizeof(stre)); @@ -1018,7 +1055,7 @@ int httpmirror(char* url1,httrackp* ptropt) { } else { // lien vide.. - if (opt.errlog) { + if (opt.errlog && opt.debug > 0) { fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush; } error=1; @@ -1067,8 +1104,8 @@ int httpmirror(char* url1,httrackp* ptropt) { if (!error) { if (r.statuscode == 200) { // OK (ou 304 en backing) if (r.adr) { // Written file - if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */ + if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(r.contenttype, urlfil) && (r.adr) ) /* Is real media, .. */ ) { if (strnotempty(r.cdispo)) { // Content-disposition set! if (ishtml(savename) == 0) { // Non HTML!! @@ -1083,8 +1120,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // ------------------------------------ // BOGUS MIME TYPE HACK II (the revenge) // Check if we have a bogus MIME type - if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */ + if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(r.contenttype, urlfil)) /* Is real media, .. */ ) { if ((r.adr) && (r.size)) { unsigned int map[256]; @@ -1159,11 +1196,11 @@ int httpmirror(char* url1,httrackp* ptropt) { if (!error) { if (r.statuscode == 200) { // OK (ou 304 en backing) if (r.adr==NULL) { // Written file - if (may_be_hypertext_mime(r.contenttype)) { // to parse! + if (may_be_hypertext_mime(r.contenttype, urlfil)) { // to parse! LLint sz; sz=fsize(savename); if (sz>0) { // ok, exists! - if (sz < 1024) { // ok, small file --> to parse! + if (sz < 8192) { // ok, small file --> to parse! FILE* fp=fopen(savename,"rb"); if (fp) { r.adr=malloct((int)sz + 2); @@ -1216,9 +1253,9 @@ int httpmirror(char* url1,httrackp* ptropt) { redirect pages. */ if (!error) { - char buff_err_msg[1024]; - htsmoduleStruct str; - htsmoduleStructExtended stre; + char BIGSTK buff_err_msg[1024]; + htsmoduleStruct BIGSTK str; + htsmoduleStructExtended BIGSTK stre; buff_err_msg[0] = '\0'; memset(&str, 0, sizeof(str)); memset(&stre, 0, sizeof(stre)); @@ -1346,8 +1383,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // traiter if ( - ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */ + ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(r.contenttype, urlfil) && (r.adr) ) /* Is real media, .. */ ) && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */ && (r.adr!=NULL) /* HTML Data exists */ @@ -1363,9 +1400,9 @@ int httpmirror(char* url1,httrackp* ptropt) { fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil); } { - char buff_err_msg[1024]; - htsmoduleStruct str; - htsmoduleStructExtended stre; + char BIGSTK buff_err_msg[1024]; + htsmoduleStruct BIGSTK str; + htsmoduleStructExtended BIGSTK stre; buff_err_msg[0] = '\0'; memset(&str, 0, sizeof(str)); memset(&stre, 0, sizeof(stre)); @@ -1507,9 +1544,9 @@ int httpmirror(char* url1,httrackp* ptropt) { if (strcmp(urlfil,"/robots.txt")==0) { // robots.txt if (r.adr) { int bptr=0; - char line[1024]; - char buff[8192]; - char infobuff[8192]; + char BIGSTK line[1024]; + char BIGSTK buff[8192]; + char BIGSTK infobuff[8192]; int record=0; line[0]='\0'; buff[0]='\0'; infobuff[0]='\0'; // @@ -1553,7 +1590,7 @@ int httpmirror(char* url1,httrackp* ptropt) { while(is_realspace(*a)) a++; // sauter espace(s) if (strnotempty(a)) { - if (strcmp(a,"/") != 0) { /* ignoring disallow: / */ + if (strcmp(a,"/") != 0 || opt.robots >= 3) { /* ignoring disallow: / */ if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) { strcatbuff(buff,a); strcatbuff(buff,"\n"); @@ -1601,8 +1638,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // Si par la suite on doit retraiter ce fichier avec un niveau de récursion plus // fort, on supprimera le readme, et on scannera le fichier html! // note: sauté si store_errpage (càd si page d'erreur, non à scanner!) - if ( (is_hypertext_mime(r.contenttype)) && (!store_errpage) && (r.size>0)) { // c'est du html!! - char tempo[HTS_URLMAXSIZE*2]; + if ( (is_hypertext_mime(r.contenttype, urlfil)) && (!store_errpage) && (r.size>0)) { // c'est du html!! + char BIGSTK tempo[HTS_URLMAXSIZE*2]; FILE* fp; tempo[0]='\0'; strcpybuff(tempo,savename); @@ -1695,7 +1732,7 @@ int httpmirror(char* url1,httrackp* ptropt) { FILE* fp=fopen(savename,"r+b"); if (fp) { if (!fseek(fp,0,SEEK_SET)) { - char line[HTS_URLMAXSIZE*2]; + char BIGSTK line[HTS_URLMAXSIZE*2]; linput(fp,line,HTS_URLMAXSIZE); if (strnotempty(line)) { if ((opt.debug>1) && (opt.log!=NULL)) { @@ -1711,8 +1748,8 @@ int httpmirror(char* url1,httrackp* ptropt) { /* External modules */ if (opt.parsejava && fexist(savename)) { - char buff_err_msg[1024]; - htsmoduleStruct str; + char BIGSTK buff_err_msg[1024]; + htsmoduleStruct BIGSTK str; buff_err_msg[0] = '\0'; memset(&str, 0, sizeof(str)); /* */ @@ -1889,7 +1926,7 @@ jump_if_done: while(!feof(old_lst)) { linput(old_lst,line,1000); if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau? - char file[HTS_URLMAXSIZE*2]; + char BIGSTK file[HTS_URLMAXSIZE*2]; strcpybuff(file,opt.path_html); strcatbuff(file,line+1); file[strlen(file)-1]='\0'; @@ -1912,7 +1949,7 @@ jump_if_done: line[strlen(line)-1]='\0'; if (strnotempty(line)) if (!strstr(adr,line)) { // non trouvé? - char file[HTS_URLMAXSIZE*2]; + char BIGSTK file[HTS_URLMAXSIZE*2]; strcpybuff(file,opt.path_html); strcatbuff(file,line+1); while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait) @@ -1956,26 +1993,28 @@ jump_if_done: // afficher résumé dans log if (opt.log!=NULL) { + char BIGSTK finalInfo[8192]; int error = fspc(NULL,"error"); int warning = fspc(NULL,"warning"); int info = fspc(NULL,"info"); - char htstime[256]; - char infoupdated[256]; + char BIGSTK htstime[256]; + char BIGSTK infoupdated[256]; // int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart)); LLint n=(LLint) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart))); sec2str(htstime,time_local()-HTS_STAT.stat_timestart); - //fprintf(opt.log,LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n); + //sprintf(finalInfo + strlen(finalInfo),LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n); infoupdated[0] = '\0'; if (opt.is_update) { - if (HTS_STAT.stat_updated_files < 0) { + if (HTS_STAT.stat_updated_files > 0) { sprintf(infoupdated, ", %d files updated", (int)HTS_STAT.stat_updated_files); } else { sprintf(infoupdated, ", no files updated"); } } - fprintf(opt.log,LF - "HTTrack mirror complete in %s : " + finalInfo[0] = '\0'; + sprintf(finalInfo + strlen(finalInfo), + "HTTrack Website Copier/"HTTRACK_VERSION" mirror complete in %s : " "%d links scanned, %d files written ("LLintP" bytes overall)%s " "["LLintP" bytes received at "LLintP" bytes/sec]", htstime, @@ -1985,20 +2024,31 @@ jump_if_done: infoupdated, (LLint)HTS_STAT.HTS_TOTAL_RECV, (LLint)n - ); + ); + if (HTS_STAT.total_packed > 0 && HTS_STAT.total_unpacked > 0) { int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked); - fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio); + sprintf(finalInfo + strlen(finalInfo),", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio); } if (!opt.nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) { int rq = (HTS_STAT.stat_nrequests * 10) / HTS_STAT.stat_sockid; - fprintf(opt.log,", %d.%d requests per connection", rq/10, rq%10); + sprintf(finalInfo + strlen(finalInfo),", %d.%d requests per connection", rq/10, rq%10); } - fprintf(opt.log,LF); + sprintf(finalInfo + strlen(finalInfo),LF); if (error) - fprintf(opt.log,"(%d errors, %d warnings, %d messages)"LF,error,warning,info); + sprintf(finalInfo + strlen(finalInfo),"(%d errors, %d warnings, %d messages)"LF,error,warning,info); else - fprintf(opt.log,"(No errors, %d warnings, %d messages)"LF,warning,info); + sprintf(finalInfo + strlen(finalInfo),"(No errors, %d warnings, %d messages)"LF,warning,info); + + // Log + fprintf(opt.log,LF"%s", finalInfo); + + // Close ZIP + if (cache.zipOutput) { + zipClose(cache.zipOutput, finalInfo); + cache.zipOutput = NULL; + } + test_flush; } #if DEBUG_HASH @@ -2301,7 +2351,7 @@ int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { HTSEXT_API int structcheck(char* s) { // vérifier la présence des dossier(s) char *a=s; - char nom[HTS_URLMAXSIZE*2]; + char BIGSTK nom[HTS_URLMAXSIZE*2]; char *b; //inthash structcheck_hash=NULL; if (strnotempty(s)==0) return 0; @@ -2399,7 +2449,7 @@ int check_fatal_io_errno(void) { // ouvrir un fichier (avec chemin Un*x) FILE* filecreate(char* s) { - char fname[HTS_URLMAXSIZE*2]; + char BIGSTK fname[HTS_URLMAXSIZE*2]; FILE* fp; fname[0]='\0'; @@ -2464,7 +2514,7 @@ int filenote(char* s,filecreate_params* params) { strc->lst=params->lst; return 0; } else if (strc->lst) { - char savelst[HTS_URLMAXSIZE*2]; + char BIGSTK savelst[HTS_URLMAXSIZE*2]; strcpybuff(savelst,fslash(s)); // couper chemin? if (strnotempty(strc->path)) { @@ -2515,7 +2565,7 @@ HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* a } } void usercommand_exe(char* cmd,char* file) { - char temp[8192]; + char BIGSTK temp[8192]; char c[2]=""; int i; temp[0]='\0'; @@ -2554,7 +2604,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { first = 1; opt->state.mimefp = fopen(fconcat(opt->path_html,"index.mht"), "wb"); if (opt->state.mimefp != NULL) { - char rndtmp[1024], currtime[256]; + char BIGSTK rndtmp[1024], currtime[256]; srand(time(NULL)); time_gmt_rfc822(currtime); sprintf(rndtmp, "%d_%d", (int)time(NULL), (int) rand()); @@ -2583,7 +2633,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { if (fp != NULL) { char buff[60*100 + 2]; char mimebuff[256]; - char cid[HTS_URLMAXSIZE*3]; + char BIGSTK cid[HTS_URLMAXSIZE*3]; int len; int isHtml = ( ishtml(save) == 1 ); mimebuff[0] = '\0'; @@ -2730,13 +2780,31 @@ HTS_INLINE int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_bac return -1; /* plus de place */ } -// remplir backing -int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) { +int back_pluggable_sockets_strict(lien_back* back, int back_max, httrackp* opt) { + int n = opt->maxsoc - back_nsoc(back, back_max); + + // connect limiter + if (n > 0 && opt->maxconn > 0 && HTS_STAT.last_connect > 0) { + TStamp opTime = HTS_STAT.last_request ? HTS_STAT.last_request : HTS_STAT.last_connect; + TStamp cTime = mtime_local(); + TStamp lap = ( cTime - opTime ); + TStamp minLap = (TStamp) ( 1000.0 / opt->maxconn ); + if (lap < minLap) { + n = 0; + } else { + int nMax = (int) ( lap / minLap ); + n = min(n, nMax); + } + } + + return n; +} + +int back_pluggable_sockets(lien_back* back, int back_max, httrackp* opt) { int n; - int oneLess = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links // ajouter autant de socket qu'on peut ajouter - n=opt->maxsoc-back_nsoc(back,back_max) - oneLess; + n=back_pluggable_sockets_strict(back, back_max, opt); // vérifier qu'il restera assez de place pour les tests ensuite (en théorie, 1 entrée libre restante suffirait) n=min( n, back_available(back,back_max) - 8 ); @@ -2745,6 +2813,12 @@ int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_ if (back_stack_available(back,back_max) <= 2) n=0; + return n; +} + +// remplir backing +int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) { + int n = back_pluggable_sockets(back, back_max, opt); if (n>0) { int p; @@ -2886,11 +2960,7 @@ void sig_ask( int code ) { // demander void sig_ignore( int code ) { // ignorer signal } void sig_brpipe( int code ) { // treat if necessary - /* - if (!sig_ignore_flag(-1)) { - sig_term(code); - } - */ + signal(code, sig_brpipe); } void sig_doback(int blind) { // mettre en backing int out=-1; @@ -2943,7 +3013,11 @@ int read_stdin(char* s,int max) { } #ifdef _WIN32 HTS_INLINE int check_stdin(void) { +#ifndef _WIN32_WCE return (_kbhit()); +#else + return 0; +#endif } #else HTS_INLINE int check_flot(T_SOC s) { @@ -3043,7 +3117,7 @@ char* next_token(char* p,int flag) { else if (*(p+1)=='"') c='"'; if (c) { - char tempo[8192]; + char BIGSTK tempo[8192]; tempo[0]=c; tempo[1]='\0'; strcatbuff(tempo,p+2); strcpybuff(p,tempo); @@ -3051,7 +3125,7 @@ char* next_token(char* p,int flag) { } } else if (*p==34) { // guillemets (de fin) - char tempo[8192]; + char BIGSTK tempo[8192]; tempo[0]='\0'; strcatbuff(tempo,p+1); strcpybuff(p,tempo); /* wipe "" */ @@ -3181,6 +3255,10 @@ HTSEXT_API int hts_is_testing(void) { // 0 non 1 test 2 purge return 2; else if (_hts_in_html_parsing==4) return 3; + else if (_hts_in_html_parsing==5) // scheduling + return 4; + else if (_hts_in_html_parsing==6) // wait for slot + return 5; return 0; } HTSEXT_API int hts_is_exiting(void) { @@ -3254,6 +3332,9 @@ HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { if (from->maxrate > -1) to->maxrate = from->maxrate; + if (from->maxconn > 0) + to->maxconn = from->maxconn; + if (strnotempty(from->user_agent)) strcpybuff(to->user_agent , from->user_agent); @@ -3303,10 +3384,10 @@ int htsAddLink(htsmoduleStruct* str, char* link) { char* lien_buffer = * ( (char**) (str->lien_buffer_) ); /* */ /* */ - char adr[HTS_URLMAXSIZE*2], + char BIGSTK adr[HTS_URLMAXSIZE*2], fil[HTS_URLMAXSIZE*2], save[HTS_URLMAXSIZE*2]; - char codebase[HTS_URLMAXSIZE*2]; + char BIGSTK codebase[HTS_URLMAXSIZE*2]; /* */ int pass_fix, prio_fix; /* */ @@ -3321,7 +3402,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { // #if HTS_ANALYSTE - if (!hts_htmlcheck_linkdetected(link)) { + if (!hts_htmlcheck_linkdetected(link) || !hts_htmlcheck_linkdetected2(link, NULL)) { if (opt->errlog) { fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF, link); test_flush; @@ -3347,7 +3428,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { *(a+1)='\0'; // couper } else { // couper http:// éventuel if (strfield(codebase,"http://")) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; char* a=codebase+7; a=strchr(a,'/'); // après host if (a) { // ** msg erreur et vérifier? @@ -3382,6 +3463,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { int just_test_it = 0; forbidden_url = hts_acceptlink(opt, ptr, lien_tot, liens, adr,fil, + NULL, NULL, &set_prio_to, &just_test_it); if ((opt->debug>1) && (opt->log!=NULL)) { @@ -3391,7 +3473,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { /* Link accepted */ if (!forbidden_url) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; int a,b; tempo[0]='\0'; a=opt->savename_type; diff --git a/src/htscore.h b/src/htscore.h index d9e5d0a..97c0127 100644 --- a/src/htscore.h +++ b/src/htscore.h @@ -44,16 +44,25 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ #include "htsbase.h" // Includes & définitions -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +#ifdef HAVE_SYS_TYPES_H #include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H #include <sys/stat.h> +#endif #ifdef _WIN32 +#ifndef _WIN32_WCE #include <conio.h> +#endif +#ifndef _WIN32_WCE #include <signal.h> #include <direct.h> #else +#ifndef HTS_CECOMPAT +#include "signal.h" +#endif +#endif +#else #include <signal.h> #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -68,7 +77,7 @@ Please visit our Website: http://www.httrack.com #include "htsopt.h" // structure d'un lien -typedef struct { +typedef struct lien_url { char firstblock; // flag 1=premier malloc char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down int depth; // profondeur autorisée lien ; >0 forte 0=faible @@ -93,7 +102,7 @@ typedef struct { } lien_url; // chargement de fichiers en 'arrière plan' -typedef struct { +typedef struct lien_back { #if DEBUG_CHECKINT char magic; #endif @@ -137,8 +146,10 @@ typedef struct { #endif } lien_back; +typedef struct cache_back_zip_entry cache_back_zip_entry; + // cache -typedef struct { +typedef struct cache_back { int version; // 0 ou 1 /* */ int type; @@ -150,15 +161,23 @@ typedef struct { char lastmodified[256]; // HASH void* hashtable; + // HASH for tests (naming subsystem) + void* cached_tests; // fichiers log optionnels FILE* log; FILE* errlog; // variables int ptr_ant; // pointeur pour anticiper int ptr_last; // pointeur pour anticiper + // + void* zipInput; + void* zipOutput; + cache_back_zip_entry* zipEntries; + int zipEntriesOffs; + int zipEntriesCapa; } cache_back; -typedef struct { +typedef struct hash_struct { lien_url** liens; // pointeur sur liens int max_lien; // indice le plus grand rencontré int hash[3][HTS_HASH_SIZE]; // tables pour sav/adr-fil/former_adr-former_fil @@ -169,11 +188,24 @@ typedef struct { #define hash_write(A,B) #endif -typedef struct { +typedef struct filecreate_params { FILE* lst; char path[HTS_URLMAXSIZE*2]; } filecreate_params; +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + +static int cache_writable(cache_back* cache) { + return (cache != NULL && ( cache->dat != NULL || cache->zipOutput != NULL ) ); +} + +static int cache_readable(cache_back* cache) { + return (cache != NULL && ( cache->olddat != NULL || cache->zipInput != NULL ) ); +} + +#endif + // Fonctions // INCLUDES .H PARTIES DE CODE HTTRACK @@ -240,6 +272,7 @@ typedef void (* t_hts_htmlcheck_uninit)(void); typedef int (* t_hts_htmlcheck_start)(httrackp* opt); typedef int (* t_hts_htmlcheck_end)(void); typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); +typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier); typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); typedef char* (* t_hts_htmlcheck_query)(char* question); typedef char* (* t_hts_htmlcheck_query2)(char* question); @@ -249,6 +282,7 @@ typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); typedef void (* t_hts_htmlcheck_pause)(char* lockfile); typedef void (* t_hts_htmlcheck_filesave)(char* file); typedef int (* t_hts_htmlcheck_linkdetected)(char* link); +typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start); typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); @@ -264,6 +298,8 @@ extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; extern t_hts_htmlcheck_start hts_htmlcheck_start; extern t_hts_htmlcheck_end hts_htmlcheck_end; extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; +extern t_hts_htmlcheck_process hts_htmlcheck_preprocess; +extern t_hts_htmlcheck_process hts_htmlcheck_postprocess; extern t_hts_htmlcheck hts_htmlcheck; extern t_hts_htmlcheck_query hts_htmlcheck_query; extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; @@ -273,11 +309,16 @@ extern t_hts_htmlcheck_check hts_htmlcheck_check; extern t_hts_htmlcheck_pause hts_htmlcheck_pause; extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; +extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2; extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; extern t_hts_htmlcheck_savename hts_htmlcheck_savename; extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; */ + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + // #ifndef HTTRACK_DEFLIB HTSEXT_API int hts_is_parsing(int flag); @@ -307,8 +348,6 @@ extern char** _hts_addurl; extern int _hts_cancel; #endif - - // @@ -342,6 +381,8 @@ int liens_record(char* adr,char* fil,char* save,char* former_adr,char* former_fi // backing, routines externes +int back_pluggable_sockets(lien_back* back, int back_max, httrackp* opt); +int back_pluggable_sockets_strict(lien_back* back, int back_max, httrackp* opt); int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot); int backlinks_done(lien_url** liens,int lien_tot,int ptr); int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot); @@ -395,4 +436,6 @@ void voidf(void); #endif +#endif + diff --git a/src/htscoremain.c b/src/htscoremain.c index 1162c18..bd90593 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htscoremain.h" #include "htsglobal.h" @@ -43,6 +46,7 @@ Please visit our Website: http://www.httrack.com #include "htsalias.h" #include "htswrap.h" #include "htsmodules.h" +#include "htszlib.h" #include <ctype.h> #if HTS_WIN @@ -100,7 +104,7 @@ extern int IPV6_resolver; } \ } while(0) -static void set_wrappers(void) { +void set_wrappers(void) { #if HTS_ANALYSTE // custom wrappers hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); @@ -108,6 +112,8 @@ static void set_wrappers(void) { hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); + hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html"); + hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html"); hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); @@ -117,6 +123,7 @@ static void set_wrappers(void) { hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); + hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2"); hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header"); @@ -130,13 +137,9 @@ HTSEXT_API int hts_main(int argc, char **argv) { #else int main(int argc, char **argv) { #endif - char* x_argv[999]; // Patch pour argv et argc: en cas de récupération de ligne de commande + char** x_argv=NULL; // Patch pour argv et argc: en cas de récupération de ligne de commande char* x_argvblk=NULL; // (reprise ou update) int x_ptr=0; // offset - /* - char* x_argv2[999]; // Patch pour config - char* x_argvblk2=NULL; - */ // int argv_url=-1; // ==0 : utiliser cache et doit.log char* argv_firsturl=NULL; // utilisé pour nommage par défaut @@ -144,13 +147,13 @@ int main(int argc, char **argv) { int url_sz = 65535; //char url[65536]; // URLS séparées par un espace // the parametres - httrackp httrack; + httrackp BIGSTK httrack; int httrack_logmode=3; // ONE log file - int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer) + int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer) #if HTS_WIN #if HTS_ANALYSTE!=2 WORD wVersionRequested; /* requested version WinSock API */ - WSADATA wsadata; /* Windows Sockets API data */ + WSADATA BIGSTK wsadata; /* Windows Sockets API data */ #endif #else #ifndef HTS_DO_NOT_USE_UID @@ -197,6 +200,8 @@ int main(int argc, char **argv) { strcpybuff(httrack.proxy.bindhost, ""); // bind default host httrack.user_agent_send=1; // envoyer un user-agent strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); + strcpybuff(httrack.referer, ""); + strcpybuff(httrack.from, ""); httrack.savename_83=0; // noms longs par défaut httrack.savename_type=0; // avec structure originale httrack.mimehtml=0; // pas MIME-html @@ -218,6 +223,7 @@ int main(int argc, char **argv) { httrack.nocompression=0; // pas de compression httrack.tolerant=0; // ne pas accepter content-length incorrect httrack.parseall=1; // tout parser (tags inconnus, par exemple) + httrack.parsedebug=0; // pas de mode débuggage httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur httrack.verbosedisplay=0; // pas d'animation texte httrack.sizehack=0; // size hack @@ -238,19 +244,25 @@ int main(int argc, char **argv) { strcpybuff(httrack.path_log,""); strcpybuff(httrack.path_bin,""); // +#if HTS_SPARE_MEMORY==0 httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb) httrack.maxfilter=200; // 200 filtres max par défaut +#else + httrack.maxlink=10000; // 10,000 liens max par défaut (40Kb) + httrack.maxfilter=50; // 50 filtres max par défaut +#endif httrack.maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT -- //httrack.maxcache_anticipate=256; // maximum de liens à anticiper httrack.maxtime=-1; // temps max en secondes - httrack.maxrate=-1; // pas de taux maxi - httrack.maxconn=10; // nombre connexions/s + httrack.maxrate=25000; // default max rate + httrack.maxconn=5.0; // nombre connexions/s httrack.waittime=-1; // wait until.. hh*3600+mm*60+ss // httrack.exec=argv[0]; httrack.is_update=0; // not an update (yet) httrack.dir_topindex=0; // do not built top index (yet) // + httrack.bypass_limits=0; // enforce limits by default httrack.state.stop=0; // stopper httrack.state.exit_xh=0; // abort // @@ -337,6 +349,15 @@ int main(int argc, char **argv) { strcpybuff(httrack.path_bin, HTS_HTTRACKDIR); #endif + /* libhttrack-plugin DLL preload (libhttrack-plugin.so or libhttrack-plugin.dll) */ + { + void* userfunction = getFunctionPtr(&httrack, "libhttrack-plugin", "plugin_init"); + if (userfunction != NULL) { + t_hts_htmlcheck_init initFnc = (t_hts_htmlcheck_init) userfunction; + initFnc(); + set_wrappers(); /* Re-read wrappers internal static functions */ + } + } /* filter CR, LF, TAB.. */ { @@ -373,13 +394,16 @@ int main(int argc, char **argv) { } x_argvblk[0]='\0'; x_ptr=0; + + /* Create argv */ + x_argv = (char**) malloct(sizeof(char*) * ( argc + 1024 )); } /* Create new argc/argv, replace alias, count URLs, treat -h, -q, -i */ { - char _tmp_argv[2][HTS_CDLMAXSIZE]; + char BIGSTK _tmp_argv[2][HTS_CDLMAXSIZE]; + char BIGSTK tmp_error[HTS_CDLMAXSIZE]; char* tmp_argv[2]; - char tmp_error[HTS_CDLMAXSIZE]; int tmp_argc; int x_argc=0; int na; @@ -461,53 +485,6 @@ int main(int argc, char **argv) { argc=x_argc; } - - - - // Ici on ajoute les arguments de config -/* - if (fexist("config")) { // configuration - x_argvblk2=(char*) calloct(32768,1); - - if (x_argvblk2!=NULL) { - FILE* fp; - int x_argc2; - - //strcpybuff(x_argvblk2,"httrack "); - fp=fopen("config","rb"); - if (fp) { - linput(fp,x_argvblk2+strlen(x_argvblk2),32000); - fclose(fp); fp=NULL; - - // calculer arguments selon derniers arguments - x_argv2[0]=argv[0]; - x_argc2=1; - { - char* p=x_argvblk2; - do { - x_argv2[x_argc2++]=p; - p=strchr(p,' '); - if (p) { - *p=0; // octet nul (tableau) - p++; - } - } while(p!=NULL); - } - // recopier arguments actuels (pointeurs uniquement) - { - int na; - for(na=1;na<argc;na++) { - x_argv2[x_argc2++]=argv[na]; - } - } - argc=x_argc2; // nouvel argc - argv=x_argv2; // nouvel argv - } - } - } -*/ - - // Option O and includerc { int loops=0; @@ -518,10 +495,10 @@ int main(int argc, char **argv) { for(na=1;na<argc;na++) { if (argv[na][0]=='"') { - char tempo[HTS_CDLMAXSIZE]; + char BIGSTK tempo[HTS_CDLMAXSIZE]; strcpybuff(tempo,argv[na]+1); if (tempo[strlen(tempo)-1]!='"') { - char s[HTS_CDLMAXSIZE]; + char BIGSTK s[HTS_CDLMAXSIZE]; sprintf(s,"Missing quote in %s",argv[na]); HTS_PANIC_PRINTF(s); htsmain_free(); @@ -626,7 +603,7 @@ int main(int argc, char **argv) { if (fp) { int insert_after=1; /* insérer après nom au début */ // - char buff[8192]; + char BIGSTK buff[8192]; char *p,*lastp; linput(fp,buff,8000); fclose(fp); fp=NULL; @@ -646,21 +623,8 @@ int main(int argc, char **argv) { /* Insert parameters BUT so that they can be in the same order */ if (lastp) { if (strnotempty(lastp)) { - //char* argv0; - //int len; insert_after_argc=argc-insert_after; - //argv0 = (argv+insert_after)[0]; cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr); - /* - DONE IN 'next_token' - len = strlen(argv0); - if (len >= 2 && argv0[0]=='\"' && argv0[len-1]=='\"') { // "foo" - char tempo[1024]; - tempo[0] = '\0'; - strncatbuff(tempo, argv0+1, len-2); - strcpybuff(argv0, tempo); - } - */ argc=insert_after_argc+insert_after; insert_after++; } @@ -675,7 +639,11 @@ int main(int argc, char **argv) { #if DEBUG_STEPS printf("Checking cache\n"); #endif - if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) { + if (!fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) { + if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) { + rename(fconcat(httrack.path_log,"hts-cache/old.zip"),fconcat(httrack.path_log,"hts-cache/new.zip")); + } + } else if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) { if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) { remove(fconcat(httrack.path_log,"hts-cache/new.dat")); remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); @@ -723,6 +691,11 @@ int main(int argc, char **argv) { remove(fconcat(httrack.path_log,"hts-err.txt")); if (fexist(fconcat(httrack.path_html,"index.html"))) remove(fconcat(httrack.path_html,"index.html")); + /* */ + if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) + remove(fconcat(httrack.path_log,"hts-cache/new.zip")); + if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) + remove(fconcat(httrack.path_log,"hts-cache/old.zip")); if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) remove(fconcat(httrack.path_log,"hts-cache/new.dat")); if (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) @@ -894,7 +867,11 @@ int main(int argc, char **argv) { #endif if (argv_url==0) { // Présence d'un cache, que faire?.. - if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if ( + ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + || + ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ) { // il existe déja un cache précédent.. renommer if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent if (x_argvblk!=NULL) { int m; @@ -967,7 +944,11 @@ int main(int argc, char **argv) { httrack.cache=1; // cache prioritaire if (httrack.quiet==0) { - if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if ( + ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + || + ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ) { HT_REQUEST_START; HT_PRINT("There is a lock-file in the directory "); HT_PRINT(httrack.path_log); @@ -985,7 +966,11 @@ int main(int argc, char **argv) { //char s[32]; httrack.cache=2; // cache vient après test de validité if (httrack.quiet==0) { - if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if ( + ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + || + ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ) { HT_REQUEST_START; HT_PRINT("There is an index.html and a hts-cache folder in the directory "); HT_PRINT(httrack.path_log); @@ -1027,7 +1012,7 @@ int main(int argc, char **argv) { for(na=1;na<argc;na++) { if (argv[na][0]=='"') { - char tempo[HTS_CDLMAXSIZE]; + char BIGSTK tempo[HTS_CDLMAXSIZE]; strcpybuff(tempo,argv[na]+1); if (tempo[strlen(tempo)-1]!='"') { char s[HTS_CDLMAXSIZE]; @@ -1189,13 +1174,13 @@ int main(int argc, char **argv) { { sscanf(com+1,"%d",&httrack.savename_83); switch(httrack.savename_83) { - case 0: + case 0: // 8-3 (ISO9660 L1) httrack.savename_83=1; break; case 1: httrack.savename_83=0; break; - default: + default: // 2 == ISO9660 (ISO9660 L2) httrack.savename_83=2; break; } @@ -1234,7 +1219,7 @@ int main(int argc, char **argv) { case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; } break; // Keyword Index - case 'c': sscanf(com+1,"%d",&httrack.maxconn); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'c': sscanf(com+1,"%f",&httrack.maxconn); while(isdigit((unsigned char)*(com+1)) || *(com+1) == '.') com++; break; case 'e': sscanf(com+1,"%d",&httrack.extdepth); while(isdigit((unsigned char)*(com+1))) com++; break; case 'B': httrack.tolerant=1; if (*(com+1)=='0') { httrack.tolerant=0; com++; } break; // HTTP/1.0 notamment case 'h': httrack.http10=1; if (*(com+1)=='0') { httrack.http10=0; com++; } break; // HTTP/1.0 @@ -1246,6 +1231,7 @@ int main(int argc, char **argv) { case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break; + case '!': httrack.bypass_limits = 1; if (*(com+1)=='0') { httrack.bypass_limits=0; com++; } break; // preserve: no footer, original links case 'p': @@ -1433,7 +1419,7 @@ int main(int argc, char **argv) { na++; if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { char* posf = strchr(pos + 1, ':'); - char filename[1024]; + char BIGSTK filename[1024]; callbackname[0] = '\0'; strncatbuff(callbackname, a, pos - a); pos++; @@ -1442,38 +1428,38 @@ int main(int argc, char **argv) { filename[0] = '\0'; strncatbuff(filename, pos, posf - pos); posf++; - userfunction = getFunctionPtr(filename, posf); + userfunction = getFunctionPtr(&httrack, filename, posf); if (userfunction != NULL) { if ((void*)htswrap_read(callbackname) != NULL) { if (htswrap_add(callbackname, userfunction)) { - if (!httrack.quiet) { - set_wrappers(); - if ((void*)htswrap_read(callbackname) == userfunction) { - printf("successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename); - } else { - char tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; + set_wrappers(); /* Re-read wrappers internal static functions */ + if ((void*)htswrap_read(callbackname) == userfunction) { + if (!httrack.quiet) { + fprintf(stderr, "successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename); } + } else { + char BIGSTK tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; } } else { - char tmp[1024 * 2]; + char BIGSTK tmp[1024 * 2]; sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname); HTS_PANIC_PRINTF(tmp); htsmain_free(); return -1; } } else { - char tmp[1024 * 2]; + char BIGSTK tmp[1024 * 2]; sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname); HTS_PANIC_PRINTF(tmp); htsmain_free(); return -1; } } else { - char tmp[1024 * 2]; + char BIGSTK tmp[1024 * 2]; sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname); HTS_PANIC_PRINTF(tmp); htsmain_free(); @@ -1494,6 +1480,39 @@ int main(int argc, char **argv) { } break; + case 'R': // Referer + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %R needs to be followed by a blank space, and a referer URL"); + printf("Example: -%%R \"http://www.example.com/\"\n"); + htsmain_free(); + return -1; + } else{ + na++; + if (strlen(argv[na])>=254) { + HTS_PANIC_PRINTF("Referer URL too long"); + htsmain_free(); + return -1; + } + strcpybuff(httrack.referer, argv[na]); + } + break; + case 'E': // From Email address + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %E needs to be followed by a blank space, and an email"); + printf("Example: -%%E \"postmaster@example.com\"\n"); + htsmain_free(); + return -1; + } else{ + na++; + if (strlen(argv[na])>=254) { + HTS_PANIC_PRINTF("From email too long"); + htsmain_free(); + return -1; + } + strcpybuff(httrack.from, argv[na]); + } + break; + default: { char s[HTS_CDLMAXSIZE]; sprintf(s,"invalid option %%%c\n",*com); @@ -1587,9 +1606,9 @@ int main(int argc, char **argv) { cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */ cache.ro = 1; /* read only */ if (cache.hashtable) { - char adr[HTS_URLMAXSIZE*2]; - char fil[HTS_URLMAXSIZE*2]; - char url[HTS_URLMAXSIZE*2]; + char BIGSTK adr[HTS_URLMAXSIZE*2]; + char BIGSTK fil[HTS_URLMAXSIZE*2]; + char BIGSTK url[HTS_URLMAXSIZE*2]; char linepos[256]; int pos; char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx")); @@ -1620,7 +1639,7 @@ int main(int argc, char **argv) { || (strjoker(url, filter, NULL, NULL) != NULL) ) { - r = cache_read(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data + r = cache_read_ro(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data if (r.statuscode != -1) { // No errors found++; if (!hasFilter) { @@ -1629,7 +1648,7 @@ int main(int argc, char **argv) { adr, fil); } else { char msg[256], cdate[256]; - char sav[HTS_URLMAXSIZE*2]; + char BIGSTK sav[HTS_URLMAXSIZE*2]; infostatuscode(msg, r.statuscode); time_gmt_rfc822(cdate); @@ -1713,6 +1732,14 @@ int main(int argc, char **argv) { return 0; } break; + case 'E': // extract cache + if (!hts_extract_meta(httrack.path_log)) { + fprintf(stderr, "* error extracting meta-data\n"); + return 1; + } + fprintf(stderr, "* successfully extracted meta-data\n"); + return 0; + break; case 'X': #ifndef STRDEBUG fprintf(stderr, "warning: no string debugging support built, option has no effect\n"); @@ -1720,6 +1747,34 @@ int main(int argc, char **argv) { htsMemoryFastXfr=1; if (*(com+1)=='0') { htsMemoryFastXfr=0; com++; } break; + case 'R': + { + char* name; + uLong repaired = 0; + uLong repairedBytes = 0; + if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) { + name = fconcat(httrack.path_log,"hts-cache/new.zip"); + } else if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) { + name = fconcat(httrack.path_log,"hts-cache/old.zip"); + } else { + fprintf(stderr, "* error: no cache found in %s\n", fconcat(httrack.path_log,"hts-cache/new.zip")); + return 1; + } + fprintf(stderr, "Cache: trying to repair %s\n", name); + if (unzRepair(name, + fconcat(httrack.path_log,"hts-cache/repair.zip"), + fconcat(httrack.path_log,"hts-cache/repair.tmp"), + &repaired, &repairedBytes + ) == Z_OK) { + unlink(name); + rename(fconcat(httrack.path_log,"hts-cache/repair.zip"), name); + fprintf(stderr,"Cache: %d bytes successfully recovered in %d entries\n", (int) repairedBytes, (int) repaired); + } else { + fprintf(stderr, "Cache: could not repair the cache\n"); + } + } + return 0; + break; case '~': /* internal lib test */ { char thisIsATestYouShouldSeeAnError[12]; @@ -1742,11 +1797,12 @@ int main(int argc, char **argv) { case 'T': httrack.maketrack=1; break; case 'u': sscanf(com+1,"%d",&httrack.waittime); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'R': // ohh ftp, catch->ftpget + /*case 'R': // ohh ftp, catch->ftpget HTS_PANIC_PRINTF("Unexpected internal error with -#R command"); htsmain_free(); return -1; break; + */ case 'P': { // catchurl help_catchurl(httrack.path_log); htsmain_free(); @@ -1769,6 +1825,19 @@ int main(int argc, char **argv) { return 0; } break; + case '1': /* test #1 : fil_simplifie */ + if (na+1>=argc) { + HTS_PANIC_PRINTF("Option #1 needs to be followed by an URL"); + printf("Example: '-#0' ./foo/bar/../foobar\n"); + htsmain_free(); + return -1; + } else { + fil_simplifie(argv[na+1]); + printf("simplified=%s\n", argv[na+1]); + htsmain_free(); + return 0; + } + break; case '!': if (na+1>=argc) { HTS_PANIC_PRINTF("Option #! needs to be followed by a commandline"); @@ -1779,6 +1848,15 @@ int main(int argc, char **argv) { system(argv[na+1]); } break; + case 'd': + httrack.parsedebug = 1; + break; + + /* autotest */ + case 't': /* not yet implemented */ + fprintf(stderr, "** AUTOCHECK OK\n"); + exit(0); + break; default: printf("Internal option %c not recognized\n",*com); break; } @@ -1866,7 +1944,7 @@ int main(int argc, char **argv) { } // while } else { // URL/filters - char tempo[1024]; + char BIGSTK tempo[1024]; if (strnotempty(url)) strcatbuff(url," "); // espace de séparation strcpybuff(tempo,unescape_http_unharm(argv[na],1)); escape_spc_url(tempo); @@ -1895,7 +1973,7 @@ int main(int argc, char **argv) { //if (userdef) { if (!userid) { //if (strcmp(userdef->pw_name,"root")==0) { - char rpath[1024]; + char BIGSTK rpath[1024]; //printf("html=%s log=%s\n",httrack.path_html,httrack.path_log); // xxc if ((httrack.path_html[0]) && (httrack.path_log[0])) { char *a=httrack.path_html,*b=httrack.path_log,*c=NULL,*d=NULL; @@ -1913,7 +1991,7 @@ int main(int argc, char **argv) { strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html)); } { - char tmp[1024]; + char BIGSTK tmp[1024]; strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp); strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp); } @@ -1971,7 +2049,19 @@ int main(int argc, char **argv) { // cad la version contenant le plus de fichiers if (httrack.cache) { if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // problemes.. - if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) { + if ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) ) { + if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) { + if (fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))<32768) { + if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip"))>65536) { + if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip")) > fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))) { + remove(fconcat(httrack.path_log,"hts-cache/new.zip")); + rename(fconcat(httrack.path_log,"hts-cache/old.zip"), fconcat(httrack.path_log,"hts-cache/new.zip")); + } + } + } + } + } + else if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) { if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) { // switcher si new<32Ko et old>65Ko (tailles arbitraires) ? // ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre @@ -2058,7 +2148,7 @@ int main(int argc, char **argv) { fprintf(fp,"and is used for updating this website."LF); fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF); fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF); - fprintf(fp,"(you can safely delete old.dat, old.ndx and old.lst files, however)"LF); + fprintf(fp,"(you can safely delete old.zip and old.lst files, however)"LF); fprintf(fp,""LF); fprintf(fp,HTS_LOG_SECURITY_WARNING); fclose(fp); @@ -2177,18 +2267,47 @@ int main(int argc, char **argv) { io_flush; + /* Enforce limits to avoid bandwith abuse. The bypass_limits should only be used by administrators and experts. */ + if (!httrack.bypass_limits) { + if (httrack.maxsoc <= 0 || httrack.maxsoc > 4) { + httrack.maxsoc = 4; + if (httrack.log != NULL) { + fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of simultaneous connections limited to %d to avoid server overload"LF, (int)httrack.maxsoc); + } + } + if (httrack.maxrate <= 0 || httrack.maxrate > 100000) { + httrack.maxrate = 100000; + if (httrack.log != NULL) { + fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum bandwidth limited to %d to avoid server overload"LF, (int)httrack.maxrate); + } + } + if (httrack.maxconn <= 0 || httrack.maxconn > 5.0) { + httrack.maxconn = 5.0; + if (httrack.log != NULL) { + fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of connections per second limited to %f to avoid server overload"LF, (float)httrack.maxconn); + } + } + } else { + if (httrack.log != NULL) { + fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: !!! BYPASSING SECURITY LIMITS - MONITOR THIS SESSION WITH EXTREME CARE !!!"LF); + } + } + /* Info for wrappers */ if ( (httrack.debug>0) && (httrack.log!=NULL) ) { fspc(httrack.log,"info"); fprintf(httrack.log,"engine: init"LF); } #if HTS_ANALYSTE hts_htmlcheck_init(); + set_wrappers(); // init() is allowed to set other wrappers #endif // détourner SIGHUP etc. #if HTS_WIN +#ifndef _WIN32_WCE signal( SIGINT , sig_ask ); // ^C signal( SIGTERM , sig_finish ); // kill <process> +#endif #else signal( SIGHUP , sig_back ); // close window signal( SIGTSTP , sig_back ); // ^Z @@ -2226,7 +2345,7 @@ deprecated - see SIGCHLD // // Build top index if (httrack.dir_topindex) { - char rpath[1024*2]; + char BIGSTK rpath[1024*2]; char* a; strcpybuff(rpath,httrack.path_html); if (rpath[0]) { @@ -2249,33 +2368,35 @@ deprecated - see SIGCHLD } } - /* Info for wrappers */ - if ( (httrack.debug>0) && (httrack.log!=NULL) ) { - fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF); - } + /* Info for wrappers */ + if ( (httrack.debug>0) && (httrack.log!=NULL) ) { + fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF); + } #if HTS_ANALYSTE - hts_htmlcheck_uninit(); + hts_htmlcheck_uninit(); #endif - + if (httrack_logmode!=1) { if (httrack.errlog == httrack.log) httrack.errlog=NULL; if (httrack.log) { fclose(httrack.log); httrack.log=NULL; } if (httrack.errlog) { fclose(httrack.errlog); httrack.errlog=NULL; } } - + // Débuggage des en têtes if (_DEBUG_HEAD) { if (ioinfo) { fclose(ioinfo); } } - + // supprimer lock remove(n_lock); } - + if (x_argvblk) freet(x_argvblk); + if (x_argv) + freet(x_argv); #if HTS_WIN #if HTS_ANALYSTE!=2 @@ -2315,7 +2436,7 @@ int check_path(char* s,char* defaultname) { if (strnotempty(s)) { if (s[(i=strlen(s))-1]=='#') { if (strnotempty((defaultname?defaultname:""))) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; char* a=strchr(defaultname,'#'); // we never know.. if (a) *a='\0'; tempo[0]='\0'; @@ -2339,7 +2460,7 @@ int check_path(char* s,char* defaultname) { // détermine si l'argument est une option int cmdl_opt(char* s) { if (s[0]=='-') { // c'est peut être une option - if (strchr(s,'.')!=NULL) + if (strchr(s,'.')!=NULL && strchr(s,'%')==NULL) return 0; // sans doute un -www.truc.fr (note: -www n'est pas compris) else if (strchr(s,'/')!=NULL) return 0; // idem, -*cgi-bin/ diff --git a/src/htscoremain.h b/src/htscoremain.h index 3662793..548c7f6 100644 --- a/src/htscoremain.h +++ b/src/htscoremain.h @@ -46,6 +46,8 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE // Main, récupère les paramètres et appelle le robot #if HTS_ANALYSTE #ifndef HTTRACK_DEFLIB @@ -58,7 +60,7 @@ int main(int argc, char **argv); int cmdl_opt(char* s); int check_path(char* s,char* defaultname); - +#endif #endif diff --git a/src/htsdefines.h b/src/htsdefines.h index 0ab2cfa..e91b5b4 100644 --- a/src/htsdefines.h +++ b/src/htsdefines.h @@ -43,6 +43,7 @@ typedef void (* t_hts_htmlcheck_uninit)(void); typedef int (* t_hts_htmlcheck_start)(httrackp* opt); typedef int (* t_hts_htmlcheck_end)(void); typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); +typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier); typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); typedef char* (* t_hts_htmlcheck_query)(char* question); typedef char* (* t_hts_htmlcheck_query2)(char* question); @@ -52,11 +53,14 @@ typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); typedef void (* t_hts_htmlcheck_pause)(char* lockfile); typedef void (* t_hts_htmlcheck_filesave)(char* file); typedef int (* t_hts_htmlcheck_linkdetected)(char* link); +typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start); typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE // demande d'interaction avec le shell #if HTS_ANALYSTE extern char HTbuff[2048]; @@ -65,6 +69,8 @@ extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; extern t_hts_htmlcheck_start hts_htmlcheck_start; extern t_hts_htmlcheck_end hts_htmlcheck_end; extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; +extern t_hts_htmlcheck_process hts_htmlcheck_preprocess; +extern t_hts_htmlcheck_process hts_htmlcheck_postprocess; extern t_hts_htmlcheck hts_htmlcheck; extern t_hts_htmlcheck_query hts_htmlcheck_query; extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; @@ -74,6 +80,7 @@ extern t_hts_htmlcheck_check hts_htmlcheck_check; extern t_hts_htmlcheck_pause hts_htmlcheck_pause; extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; +extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2; extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; extern t_hts_htmlcheck_savename hts_htmlcheck_savename; extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; @@ -102,3 +109,5 @@ extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; #endif +#endif + diff --git a/src/htsfilters.c b/src/htsfilters.c index be8b482..681b506 100644 --- a/src/htsfilters.c +++ b/src/htsfilters.c @@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + // *.gif match all gif files // *[file]/*[file].exe match all exe files with one folder structure @@ -49,9 +52,6 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ #include "htsbase.h" #include "htslib.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include <ctype.h> /* END specific definitions */ diff --git a/src/htsfilters.h b/src/htsfilters.h index 168d330..f963322 100644 --- a/src/htsfilters.h +++ b/src/htsfilters.h @@ -42,8 +42,11 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int fa_strjoker(char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth); HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag); char* strjokerfind(char* chaine,char* joker); +#endif #endif diff --git a/src/htsftp.c b/src/htsftp.c index 68a8af5..7b04052 100644 --- a/src/htsftp.c +++ b/src/htsftp.c @@ -34,6 +34,9 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + // Gestion protocole ftp // Version .05 (01/2000) @@ -43,9 +46,6 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htsnet.h" #include "htsthread.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #if HTS_WIN #else //inet_ntoa @@ -55,9 +55,11 @@ Please visit our Website: http://www.httrack.com #if HTS_WIN #ifndef __cplusplus // DOS +#ifndef _WIN32_WCE #include <process.h> /* _beginthread, _endthread */ #endif #endif +#endif // ftp mode passif // #if HTS_INET6==0 @@ -73,31 +75,10 @@ Please visit our Website: http://www.httrack.com #define FTP_STATUS_READY 1001 #if USE_BEGINTHREAD -/* -#ifdef __cplusplus -// C++ -> Shell -UINT back_launch_ftp( LPVOID pP ) { - lien_back* back=(lien_back*) pP; - if (back == NULL) { - //back->status=FTP_STATUS_READY; // fini - //back->r.statuscode=-1; - return -1; - } - - // lancer ftp - run_launch_ftp(back); - // prêt - back->status=0; - return 0; // thread completed successfully -} -#else -*/ -PTHREAD_TYPE back_launch_ftp( void* pP ) { +PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ) { lien_back* back=(lien_back*) pP; if (back == NULL) { - //back->status=FTP_STATUS_READY; // fini - //back->r.statuscode=-1; #if FTP_DEBUG printf("[ftp error: no args]\n"); #endif @@ -113,27 +94,19 @@ PTHREAD_TYPE back_launch_ftp( void* pP ) { #endif run_launch_ftp(back); // prêt - back->status=0; + back->status=FTP_STATUS_READY; /* Uninitialize */ hts_uninit(); return PTHREAD_RETURN; } -/*#endif*/ // lancer en back void launch_ftp(lien_back* back) { -/* -#ifdef __cplusplus - // C++ -> Shell - AfxBeginThread(back_launch_ftp,(LPVOID) back); -#else -*/ // DOS #if FTP_DEBUG printf("[Launching main ftp thread]\n"); #endif - _beginthread(back_launch_ftp, 0, (void*) back); -/*#endif*/ + (void)hts_newthread(back_launch_ftp, 0, (void*) back); } #else @@ -142,7 +115,7 @@ int back_launch_ftp(lien_back* back) { // lancer ftp run_launch_ftp(back); // prêt - back->status=0; + back->status=FTP_STATUS_READY; return 0; } void launch_ftp(lien_back* back,char* path,char* exec) { @@ -213,7 +186,7 @@ int run_launch_ftp(lien_back* back) { #if FTP_PASV int port_pasv=0; #endif - char adr_ip[1024]; + char BIGSTK adr_ip[1024]; char *adr,*real_adr; char* ftp_filename=""; int timeout = 300; // timeout @@ -281,7 +254,11 @@ int run_launch_ftp(lien_back* back) { ftp_filename=a; if (strnotempty(a)) { char* ua=unescape_http(a); - if ( + int len_a = (int) strlen(ua); + if (len_a > 0 && ua[len_a -1] == '/') { /* obviously a directory listing */ + transfer_list=1; + sprintf(line_retr,"LIST -A %s",ua); + } else if ( (strchr(ua, ' ')) || (strchr(ua, '\"')) @@ -298,7 +275,7 @@ int run_launch_ftp(lien_back* back) { } } else { strcpybuff(back->r.msg,"Unexpected PORT error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } @@ -332,7 +309,7 @@ int run_launch_ftp(lien_back* back) { hp = hts_gethostbyname(_adr, &fullhostent_buffer); if (hp == NULL) { strcpybuff(back->r.msg,"Unable to get server's address"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-5; _HALT_FTP return 0; @@ -349,7 +326,7 @@ int run_launch_ftp(lien_back* back) { soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_ctl==INVALID_SOCKET) { strcpybuff(back->r.msg,"Unable to create a socket"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; _HALT_FTP return 0; @@ -367,7 +344,7 @@ int run_launch_ftp(lien_back* back) { if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) { #endif strcpybuff(back->r.msg,"Unable to connect to the server"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; _HALT_FTP return 0; @@ -379,7 +356,7 @@ int run_launch_ftp(lien_back* back) { _CHECK_HALT_FTP; { - char line[1024]; + char BIGSTK line[1024]; // envoi du login // --USER-- @@ -400,13 +377,23 @@ int run_launch_ftp(lien_back* back) { get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; if (line[0]=='2') { // ok + send_line(soc_ctl,"TYPE I"); + get_ftp_line(soc_ctl,line,timeout); + _CHECK_HALT_FTP; + if (line[0]=='2') { + // ok + } else { + strcpybuff(back->r.msg,"TYPE I error"); + // back->status=FTP_STATUS_READY; // fini + back->r.statuscode=-1; + } #if 0 // --CWD-- char* a; a=back->url_fil + strlen(back->url_fil)-1; while( (a > back->url_fil) && (*a!='/')) a--; if (*a == '/') { // ok repéré - char target[1024]; + char BIGSTK target[1024]; target[0]='\0'; strncatbuff(target,back->url_fil,(int) (a - back->url_fil)); if (strnotempty(target)==0) @@ -424,34 +411,34 @@ int run_launch_ftp(lien_back* back) { // ok.. } else { strcpybuff(back->r.msg,"TYPE I error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { sprintf(back->r.msg,"CWD error: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { strcpybuff(back->r.msg,"Unexpected ftp error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } #endif } else { sprintf(back->r.msg,"Bad password: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { sprintf(back->r.msg,"Bad user name: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { sprintf(back->r.msg,"Connection refused: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -510,7 +497,7 @@ int run_launch_ftp(lien_back* back) { // -- fin analyse de l'adresse IP et du port -- } else { sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { @@ -542,12 +529,12 @@ int run_launch_ftp(lien_back* back) { } } else { sprintf(back->r.msg,"EPSV incorrect: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { sprintf(back->r.msg,"PASV/EPSV error: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } @@ -663,7 +650,7 @@ int run_launch_ftp(lien_back* back) { deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // sprintf(back->r.msg,"RETR command errror: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { @@ -673,22 +660,22 @@ int run_launch_ftp(lien_back* back) { deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // strcpybuff(back->r.msg,"Unable to connect"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { strcpybuff(back->r.msg,"Unable to create a socket"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { sprintf(back->r.msg,"Unable to resolve IP %s",adr_ip); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts #else @@ -711,17 +698,17 @@ int run_launch_ftp(lien_back* back) { int dummylen = sizeof(struct sockaddr); if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) { strcpybuff(back->r.msg,"Unable to accept connection"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { sprintf(back->r.msg,"RETR command errror: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { sprintf(back->r.msg,"PORT command error: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } #if HTS_WIN @@ -731,7 +718,7 @@ int run_launch_ftp(lien_back* back) { #endif } else { strcpybuff(back->r.msg,"Unable to listen to a port"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } #endif @@ -747,7 +734,7 @@ int run_launch_ftp(lien_back* back) { back->r.fp = filecreate(back->url_sav); strcpybuff(back->info,"receiving"); if (back->r.fp != NULL) { - char buff[1024]; + char BIGSTK buff[1024]; int len=1; int read_len=1024; //HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues @@ -758,13 +745,13 @@ int run_launch_ftp(lien_back* back) { switch(wait_socket_receive(soc_dat,timeout)) { case -1: strcpybuff(back->r.msg,"FTP read error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; len=0; // fin break; case 0: sprintf(back->r.msg,"Time out (%d)",timeout); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; len=0; // fin break; @@ -785,17 +772,17 @@ int run_launch_ftp(lien_back* back) { } */ strcpybuff(back->r.msg,"Write error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; len=0; // error } } else { strcpybuff(back->r.msg,"Unexpected write error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { // Erreur ou terminé - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=0; if (back->r.totalsize > 0 && back->r.size != back->r.totalsize) { back->r.statuscode=-1; @@ -812,7 +799,7 @@ int run_launch_ftp(lien_back* back) { } } else { strcpybuff(back->r.msg,"Unable to write file"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } #if HTS_WIN @@ -828,16 +815,16 @@ int run_launch_ftp(lien_back* back) { get_ftp_line(soc_ctl,line,timeout); if (line[0]=='2') { // OK strcpybuff(back->r.msg,"OK"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=200; } else { sprintf(back->r.msg,"RETR incorrect: %s",linejmp(line)); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { strcpybuff(back->r.msg,"FTP read error"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } @@ -866,7 +853,7 @@ int run_launch_ftp(lien_back* back) { back->r.statuscode=200; strcpybuff(back->r.msg,"OK"); } - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini return 0; } @@ -976,7 +963,7 @@ FILE* dd=NULL; // routines de réception/émission // 0 = ERROR int send_line(T_SOC soc,char* data) { - char line[1024]; + char BIGSTK line[1024]; if (_DEBUG_HEAD) { if (ioinfo) { fprintf(ioinfo,"---> %s\x0d\x0a",data); @@ -1007,7 +994,7 @@ int send_line(T_SOC soc,char* data) { } int get_ftp_line(T_SOC soc,char* line,int timeout) { - char data[1024]; + char BIGSTK data[1024]; int i,ok,multiline; #if FTP_DEBUG if (dd == NULL) dd = fopen("toto.txt","w"); @@ -1152,7 +1139,7 @@ int wait_socket_receive(T_SOC soc,int timeout) { int stop_ftp(lien_back* back) { if (back->stop_ftp) { strcpybuff(back->r.msg,"Cancelled by User"); - back->status=FTP_STATUS_READY; // fini + // back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; return 1; } diff --git a/src/htsftp.h b/src/htsftp.h index e24f1f3..08ab784 100644 --- a/src/htsftp.h +++ b/src/htsftp.h @@ -45,9 +45,11 @@ Please visit our Website: http://www.httrack.com // lien_back #include "htscore.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE #if USE_BEGINTHREAD void launch_ftp(lien_back* back); -PTHREAD_TYPE back_launch_ftp( void* pP ); +PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ); #else void launch_ftp(lien_back* back,char* path,char* exec); int back_launch_ftp(lien_back* back); @@ -62,7 +64,7 @@ char* linejmp(char* line); int check_socket(T_SOC soc); int check_socket_connect(T_SOC soc); int wait_socket_receive(T_SOC soc,int timeout); - +#endif #endif diff --git a/src/htsglobal.h b/src/htsglobal.h index 38faebc..d045f14 100644 --- a/src/htsglobal.h +++ b/src/htsglobal.h @@ -40,20 +40,45 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_GLOBAL_DEFH // Version -#define HTTRACK_VERSION "3.30" -#define HTTRACK_VERSIONID "3.30.01" +#define HTTRACK_VERSION "3.33-2" +#define HTTRACK_VERSIONID "3.33.16" #define HTTRACK_AFF_VERSION "3.x" //#define HTTRACK_AFF_WARNING "This is a BETA release of WinHTTrack Website Copier ("HTTRACK_VERSION")\nPlease report any crashes, bugs or problems" - +#ifndef HTS_NOINCLUDES +#ifndef _WIN32_WCE +#include <stdio.h> +#include <stdlib.h> +#else +#include <stdio.h> +#include <stdlib.h> +#ifdef HTS_CECOMPAT +#include "cecompat.h" +#else +#include "celib.h" +#endif +#endif +#endif // Définition plate-forme #include "htssystem.h" #include "htsconfig.h" +// WIN32 types +#ifdef _WIN32 +#ifndef SIZEOF_LONG +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#endif +#endif + + // config.h #ifdef _WIN32 +// WIN32 +#ifndef _WIN32_WCE + #define HAVE_SYS_STAT_H 1 #define HAVE_SYS_TYPES_H 1 #define HAVE_SYS_STAT_H 1 @@ -69,6 +94,35 @@ Please visit our Website: http://www.httrack.com #else +// Win32CE +//#pragma runtime_checks( "s", restore ) +#define HTS_SPARE_MEMORY 1 +#define HTS_ALIGN 8 +#define BIGSTK static +#undef DLLIB // LoadLibrary(libssl) crashes +#define NOSTRDEBUG 1 +#undef HTS_MAKE_KEYWORD_INDEX +#ifdef HTS_CECOMPAT +#define HTS_DO_NOT_USE_FTIME 1 +#undef HAVE_SYS_STAT_H +#undef HAVE_SYS_TYPES_H +#else +#undef HTS_DO_NOT_USE_FTIME +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TYPES_H 1 +#endif + +#define HTS_DLOPEN 0 +#undef HTS_INET6 +#ifndef S_ISREG +#define S_ISREG(m) ((m) & _S_IFREG) + +#endif + +#endif + +#else + #include "config.h" #ifndef FTIME @@ -110,7 +164,6 @@ Please visit our Website: http://www.httrack.com #endif - // Socket windows ou socket unix #ifdef _WIN32 #undef HTS_PLATFORM @@ -126,6 +179,15 @@ Please visit our Website: http://www.httrack.com #endif #endif +// don't spare memory usage by default +#ifndef HTS_SPARE_MEMORY +#define HTS_SPARE_MEMORY 0 +#endif + +#ifndef BIGSTK +#define BIGSTK +#endif + // compatibilité DOS #if HTS_WIN #define HTS_DOSNAME 1 @@ -208,14 +270,24 @@ Please visit our Website: http://www.httrack.com #endif +#if HTS_SPARE_MEMORY==0 /* Gestion des tables de hashage */ #define HTS_HASH_SIZE 20147 /* Taille max d'une URL */ #define HTS_URLMAXSIZE 1024 /* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */ #define HTS_CDLMAXSIZE 1024 +#else +/* Gestion des tables de hashage */ +#define HTS_HASH_SIZE 1023 +/* Taille max d'une URL */ +#define HTS_URLMAXSIZE 256 +/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */ +#define HTS_CDLMAXSIZE 1024 +#endif + /* Copyright (C) Xavier Roche and other contributors */ -#define HTTRACK_AFF_AUTHORS "[XR&CO'2003]" +#define HTTRACK_AFF_AUTHORS "[XR&CO'2005]" #define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->" #define HTTRACK_WEB "http://www.httrack.com" #define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s" @@ -357,7 +429,11 @@ typedef int INTsys; #define LOCAL_SOCKET_ID -500000 // taille de chaque buffer (10 sockets 650 ko) -#define TAILLE_BUFFER 65535 +#if HTS_SPARE_MEMORY==0 +#define TAILLE_BUFFER 65536 +#else +#define TAILLE_BUFFER 8192 +#endif #if HTS_WIN #else @@ -390,11 +466,11 @@ typedef int INTsys; //#define HTS_TRACE_MALLOC #ifdef HTS_TRACE_MALLOC typedef unsigned long int t_htsboundary; -typedef struct _mlink { +typedef struct mlink { char* adr; int len; int id; - struct _mlink* next; + struct mlink* next; } mlink; static const t_htsboundary htsboundary = 0xDEADBEEF; #endif @@ -449,5 +525,22 @@ static const t_htsboundary htsboundary = 0xDEADBEEF; // htsmain #define DEBUG_STEPS 0 + +// Débuggage de contrôle +#if HTS_DEBUG_CLOSESOCK +#define _HTS_WIDE 1 +#endif +#if HTS_WIDE_DEBUG +#define _HTS_WIDE 1 +#endif +#if _HTS_WIDE +extern FILE* DEBUG_fp; +#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); } +#undef _ +#define _ , +#endif + + + #endif diff --git a/src/htshash.c b/src/htshash.c index 3cbdb5f..38a2d64 100644 --- a/src/htshash.c +++ b/src/htshash.c @@ -35,15 +35,15 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htshash.h" /* specific definitions */ #include "htsbase.h" #include "htsglobal.h" #include "htsmd5.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> /* END specific definitions */ /* Specific macros */ @@ -63,7 +63,7 @@ Please visit our Website: http://www.httrack.com // recherche dans la table selon nom1,nom2 et le no d'enregistrement // retour: position ou -1 si non trouvé int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { - char normfil_[HTS_URLMAXSIZE*2]; + char BIGSTK normfil_[HTS_URLMAXSIZE*2]; char* normfil; char* normadr; unsigned int cle; @@ -199,7 +199,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { // enregistrement lien lpos dans les 3 tables hash1..3 void hash_write(hash_struct* hash,int lpos,int normalized) { - char normfil_[HTS_URLMAXSIZE*2]; + char BIGSTK normfil_[HTS_URLMAXSIZE*2]; char* normfil; unsigned int cle; int pos; diff --git a/src/htshash.h b/src/htshash.h index c4acff1..43b5003 100644 --- a/src/htshash.h +++ b/src/htshash.h @@ -42,10 +42,13 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE // tables de hashage int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized); void hash_write(hash_struct* hash,int lpos,int normalized); int* hash_calc_chaine(hash_struct* hash,int type,int pos); unsigned long int hash_cle(char* nom1,char* nom2); +#endif #endif diff --git a/src/htshelp.c b/src/htshelp.c index 7046929..af6f742 100644 --- a/src/htshelp.c +++ b/src/htshelp.c @@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htshelp.h" /* specific definitions */ @@ -43,9 +46,6 @@ Please visit our Website: http://www.httrack.com #include "htscatchurl.h" #include "htslib.h" #include "htsalias.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #if HTS_WIN #else #ifdef HAVE_UNISTD_H @@ -334,20 +334,20 @@ int help_query(char* list,int def) { // Capture d'URL void help_catchurl(char* dest_path) { - char adr_prox[HTS_URLMAXSIZE*2]; + char BIGSTK adr_prox[HTS_URLMAXSIZE*2]; int port_prox; T_SOC soc=catch_url_init_std(&port_prox,adr_prox); if (soc!=INVALID_SOCKET) { - char url[HTS_URLMAXSIZE*2]; + char BIGSTK url[HTS_URLMAXSIZE*2]; char method[32]; - char data[32768]; + char BIGSTK data[32768]; url[0]=method[0]=data[0]='\0'; // printf("Okay, temporary proxy installed.\nSet your browser's preferences to:\n\n"); printf("\tProxy's address: \t%s\n\tProxy's port: \t%d\n",adr_prox,port_prox); // if (catch_url(soc,url,method,data)) { - char dest[HTS_URLMAXSIZE*2]; + char BIGSTK dest[HTS_URLMAXSIZE*2]; int i=0; do { sprintf(dest,"%s%s%d",dest_path,"hts-post",i); @@ -362,7 +362,7 @@ void help_catchurl(char* dest_path) { } // former URL! { - char finalurl[HTS_URLMAXSIZE*2]; + char BIGSTK finalurl[HTS_URLMAXSIZE*2]; escape_check_url(dest); sprintf(finalurl,"%s"POSTTOK"file:%s",url,dest); printf("\nThe URL is: \"%s\"\n",finalurl); @@ -471,7 +471,7 @@ void help(char* app,int more) { infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)"); infomsg(" u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)"); infomsg(" j *parse Java Classes (j0 don't parse)"); - infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)"); + infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules))"); infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)"); infomsg(" %k use keep-alive if possible, greately reducing latency for small files and test requests (%k0 don't use)"); infomsg(" %B tolerant requests (accept bogus responses on some servers, but not standard!)"); @@ -479,10 +479,13 @@ void help(char* app,int more) { infomsg(" %u url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..)"); infomsg(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)"); infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD); + infomsg(" can also be used to force a specific file type: --assume foo.cgi=text/html"); infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)"); infomsg(""); infomsg("Browser ID:"); - infomsg(" F user-agent field (-F \"user-agent name\")"); + infomsg(" F user-agent field sent in HTTP headers (-F \"user-agent name\")"); + infomsg(" %R default referer field sent in HTTP headers"); + infomsg(" %E from email address sent in HTTP headers"); infomsg(" %F footer string in Html code (-%F \"Mirrored [from host %s [file %s [at %s]]]\""); infomsg(" %l preffered language (-%l \"fr, en, jp, *\""); infomsg(""); @@ -490,7 +493,7 @@ void help(char* app,int more) { infomsg(" C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before)"); infomsg(" k store all files in cache (not useful if files on disk)"); infomsg(" %n do not re-download locally erased files"); - infomsg(" %v display on screen filenames downloaded (in realtime) - * %v1 short version"); + infomsg(" %v display on screen filenames downloaded (in realtime) - * %v1 short version - %v2 full animation"); infomsg(" Q no log - quiet mode"); infomsg(" q no questions - quiet mode"); infomsg(" z log - extra infos"); @@ -523,6 +526,9 @@ void help(char* app,int more) { infomsg(" #X *use optimized engine (limited memory boundary checks)"); infomsg(" #0 filter test (-#0 '*.gif' 'www.bar.com/foo.gif')"); infomsg(" #C cache list (-#C '*.com/spider*.gif'"); + infomsg(" #R cache repair (damaged cache)"); + infomsg(" #d debug parser"); + infomsg(" #E extract new.zip cache meta-data in meta.zip"); infomsg(" #f always flush log files"); infomsg(" #FN maximum number of filters"); infomsg(" #h version info"); @@ -536,10 +542,15 @@ void help(char* app,int more) { infomsg(" #Z generate transfer rate statictics every minutes"); infomsg(" #! execute a shell command (-#! \"echo hello\")"); infomsg(""); + infomsg("Dangerous options: (do NOT use unless you exactly know what you are doing)"); + infomsg(" %! bypass built-in security limits aimed to avoid bandwith abuses (bandwidth, simultaneous connections)"); + infomsg(" IMPORTANT NOTE: DANGEROUS OPTION, ONLY SUITABLE FOR EXPERTS"); + infomsg(" USE IT WITH EXTREME CARE"); + infomsg(""); infomsg("Command-line specific options:"); infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")"); infomsg(" %U run the engine with another id when called as root (-%U smith)"); - infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction)"); + infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction[,myparameters])"); /* infomsg(" %O do a chroot before setuid"); */ infomsg(""); infomsg("Details: Option N"); @@ -571,6 +582,7 @@ void help(char* app,int more) { infomsg(" '%h' Host name (ex: www.someweb.com)"); infomsg(" '%M' URL MD5 (128 bits, 32 ascii bytes)"); infomsg(" '%Q' query string MD5 (128 bits, 32 ascii bytes)"); + infomsg(" '%r' protocol name (ex: http)"); infomsg(" '%q' small query string MD5 (16 bits, 4 ascii bytes)"); infomsg(" '%s?' Short name version (ex: %sN)"); infomsg(" '%[param]' param variable in query string"); @@ -613,6 +625,8 @@ void help(char* app,int more) { infomsg("'start' : int (* myfunction)(httrackp* opt);"); infomsg("'end' : int (* myfunction)(void);"); infomsg("'change-options' : int (* myfunction)(httrackp* opt);"); + infomsg("'preprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);"); + infomsg("'postprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);"); infomsg("'check-html' : int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);"); infomsg("'query' : char* (* myfunction)(char* question);"); infomsg("'query2' : char* (* myfunction)(char* question);"); @@ -622,8 +636,10 @@ void help(char* app,int more) { infomsg("'pause' : void (* myfunction)(char* lockfile);"); infomsg("'save-file' : void (* myfunction)(char* file);"); infomsg("'link-detected' : int (* myfunction)(char* link);"); + infomsg("'link-detected2' : int (* myfunction)(char* link, char* start_tag);"); infomsg("'transfer-status' : int (* myfunction)(lien_back* back);"); infomsg("'save-name' : int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);"); + infomsg("And <wrappername>_init() functions if defined, called upon plug"); infomsg(""); infomsg(""); infomsg("example: httrack www.someweb.com/bob/"); diff --git a/src/htshelp.h b/src/htshelp.h index 924a526..67354c7 100644 --- a/src/htshelp.h +++ b/src/htshelp.h @@ -43,11 +43,14 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htscore.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE void infomsg(char* msg); void help(char* app,int more); void make_empty_index(char* str); void help_wizard(httrackp* opt); int help_query(char* list,int def); void help_catchurl(char* dest_path); +#endif #endif diff --git a/src/htsindex.c b/src/htsindex.c index 1a75103..af87396 100644 --- a/src/htsindex.c +++ b/src/htsindex.c @@ -35,9 +35,10 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + -#include <stdio.h> -#include <stdlib.h> #include "htsindex.h" #include "htsglobal.h" #include "htslib.h" @@ -124,12 +125,14 @@ int hts_primindex_words=0; */ void index_init(const char* indexpath) { #if HTS_MAKE_KEYWORD_INDEX +#ifndef _WIN32_WCE /* remove(concat(indexpath,"index.txt")); */ hts_index_init=1; hts_primindex_size=0; hts_primindex_words=0; fp_tmpproject=tmpfile(); #endif +#endif } @@ -298,7 +301,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* unsigned long int e=0; if (inthash_read(WordIndexHash,line,&e)) { //if (e) { - char savelst[HTS_URLMAXSIZE*2]; + char BIGSTK savelst[HTS_URLMAXSIZE*2]; e++; /* 0 means "once" */ if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper diff --git a/src/htsindex.h b/src/htsindex.h index 40a189b..b773034 100644 --- a/src/htsindex.h +++ b/src/htsindex.h @@ -41,8 +41,11 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath); void index_init(const char* indexpath); void index_finish(const char* indexpath,int mode); +#endif #endif diff --git a/src/htsinthash.c b/src/htsinthash.c index 95b8711..eb155cb 100644 --- a/src/htsinthash.c +++ b/src/htsinthash.c @@ -35,15 +35,15 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htsinthash.h" /* specific definitions */ #include "htsbase.h" #include "htsglobal.h" #include "htsmd5.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> /* END specific definitions */ /* Specific macros */ @@ -68,11 +68,12 @@ int inthash_write(inthash hashtable,char* name,long int value) { if (strcmp(h->name,name)==0) { /* Delete element */ if (hashtable->flag_valueismalloc) { - if (h->value.intg) { + void* ptr = (void*)h->value.intg; + if (ptr != NULL) { if (hashtable->free_handler) - hashtable->free_handler((void*)h->value.intg); + hashtable->free_handler(ptr); else - freet((void*)h->value.intg); + freet(ptr); } } /* Insert */ @@ -151,7 +152,8 @@ int inthash_read(inthash hashtable,char* name,long int* value) { inthash_chain* h=hashtable->hash[pos]; while (h) { if (strcmp(h->name,name)==0) { - *value=h->value.intg; + if (value != NULL) + *value=h->value.intg; return 1; } h=h->next; @@ -180,12 +182,13 @@ void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) { inthash_delchain(hash->next,free_handler); if (free_handler) { // pos is a malloc() block, delete it! if (hash->value.intg) { + void* ptr = (void*)hash->value.intg; if (free_handler) - free_handler((void*)hash->value.intg); + free_handler(ptr); else - freet((void*)hash->value.intg); + freet(ptr); + hash->value.intg=0; } - hash->value.intg=0; } freet(hash); } diff --git a/src/htsinthash.h b/src/htsinthash.h index c667cd4..5d7b992 100644 --- a/src/htsinthash.h +++ b/src/htsinthash.h @@ -54,7 +54,7 @@ typedef struct inthash_chain { // structure behind inthash typedef void (* t_inthash_freehandler)(void* value); -typedef struct { +typedef struct struct_inthash { inthash_chain** hash; t_inthash_freehandler free_handler; unsigned int hash_size; @@ -64,6 +64,8 @@ typedef struct { // main inthash type typedef struct_inthash* inthash; +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE // subfunctions unsigned long int inthash_key(char* value); void inthash_init(inthash hashtable); @@ -72,7 +74,6 @@ void inthash_default_free_handler(void* value); // main functions: - /* Hash functions: */ inthash inthash_new(int size); /* Create a new hash table */ int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */ @@ -89,6 +90,6 @@ void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entr int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */ int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */ /* End of hash functions: */ - +#endif #endif diff --git a/src/htsjava.c b/src/htsjava.c index afb166b..3536b9b 100644 --- a/src/htsjava.c +++ b/src/htsjava.c @@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + /* Version: Oct/2000 */ /* Fixed: problems with class structure (10/2000) */ @@ -46,10 +49,6 @@ Please visit our Website: http://www.httrack.com #include "htsjava.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - #include "htsnostatic.h" //#include <math.h> @@ -186,7 +185,7 @@ int hts_parse_java(htsmoduleStruct* str) if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) { if(!strstr(tab[tab[i].index1].name,"java/")) { - char tempo[1024]; + char BIGSTK tempo[1024]; tempo[0]='\0'; sprintf(tempo,"%s.class",tab[tab[i].index1].name); @@ -289,7 +288,7 @@ RESP_STRUCT readtable(htsmoduleStruct* str, strcpybuff(trans.name,"HTS_UNICODE"); { - char buffer[1024]; + char BIGSTK buffer[1024]; char *p; p=&buffer[0]; diff --git a/src/htsjava.h b/src/htsjava.h index b3d17d4..915824b 100644 --- a/src/htsjava.h +++ b/src/htsjava.h @@ -57,6 +57,8 @@ typedef struct { } RESP_STRUCT; +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int hts_detect_java(htsmoduleStruct* str); int hts_parse_java(htsmoduleStruct* str); RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5); @@ -65,6 +67,6 @@ RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*); unsigned short int readshort(FILE *fp); int tris(char*); char * printname(char [1024]); - +#endif #endif diff --git a/src/htslib.c b/src/htslib.c index 3954f9c..9c389c8 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -34,11 +34,20 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + // Fichier librairie .c #include "htslib.h" #include "htsbauth.h" +#ifdef _WIN32_WCE +#ifndef HTS_CECOMPAT +#pragma comment(lib, "celib.lib") //link with celib +#endif +#endif + /* specific definitions */ #include "htsbase.h" #include "htsnet.h" @@ -46,9 +55,11 @@ Please visit our Website: http://www.httrack.com #include "htsthread.h" #include "htsnostatic.h" #include "htswrap.h" -#include <stdio.h> +#include "htsmd5.h" #if HTS_WIN +#ifndef _WIN32_WCE #include <direct.h> +#endif #else #ifdef HAVE_SYS_TYPES_H #include <sys/types.h> @@ -60,32 +71,39 @@ Please visit our Website: http://www.httrack.com #include <unistd.h> #endif #endif -#include <stdlib.h> #include <string.h> #include <time.h> +#ifndef _WIN32_WCE +#include <sys/timeb.h> +#else +#ifndef HTS_CECOMPAT #include <sys/timeb.h> +#endif +#endif +#ifndef _WIN32_WCE #include <fcntl.h> +#endif // pour utimbuf #if HTS_WIN +#ifndef _WIN32_WCE +#include <sys/utime.h> +#else +#ifndef HTS_CECOMPAT #include <sys/utime.h> +#endif +#endif #else #include <utime.h> #endif +#ifndef _WIN32_WCE +#include <sys/stat.h> +#endif /* END specific definitions */ - -// Débuggage de contrôle -#if HTS_DEBUG_CLOSESOCK -#define _HTS_WIDE 1 -#endif -#if HTS_WIDE_DEBUG -#define _HTS_WIDE 1 -#endif +// Debugging #if _HTS_WIDE FILE* DEBUG_fp=NULL; -#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); } -#define DEBUG_W2(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,A); fflush(DEBUG_fp); } #endif /* variables globales */ @@ -553,6 +571,7 @@ const char* hts_mime[][2] = { || CIS(c,'*') \ || CIS(c,'\'') \ || CIS(c,'\"') \ + || CIS(c,'&') \ || CIS(c,'!') ) //#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 ) #define CHAR_MARK(c) ( CIS(c,'-') \ @@ -581,7 +600,9 @@ char* antislash(char* s) { } #endif - +#ifdef _WIN32_WCE +char cwd[MAX_PATH+1] = ""; +#endif // Récupération d'un fichier http sur le net. // Renvoie une adresse sur le bloc de mémoire, ou bien @@ -592,8 +613,8 @@ char* antislash(char* s) { // en background htsblk httpget(char* url) { - char adr[HTS_URLMAXSIZE*2]; // adresse - char fil[HTS_URLMAXSIZE*2]; // chemin + char BIGSTK adr[HTS_URLMAXSIZE*2]; // adresse + char BIGSTK fil[HTS_URLMAXSIZE*2]; // chemin // séparer URL en adresse+chemin if (ident_url_absolute(url,adr,fil)==-1) { @@ -692,7 +713,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f // Test en cas de file:///C|... if (!fexist(fconv(unescape_http(fil)))) if (fexist(fconv(unescape_http(fil+1)))) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; strcpybuff(tempo,fil+1); strcpybuff(fil,tempo); } @@ -802,7 +823,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f // envoi d'une requète int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) { - char buff[8192]; + char BIGSTK buff[8192]; //int use_11=0; // HTTP 1.1 utilisé int direct_url=0; // ne pas analyser l'url (exemple: ftp://) char* search_tag=NULL; @@ -826,8 +847,8 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (mode==0) { // GET! FILE* fp=fopen(unescape_http(search_tag+strlen(POSTTOK)+5),"rb"); if (fp) { - char line[1100]; - char protocol[256],url[HTS_URLMAXSIZE*2],method[256]; + char BIGSTK line[1100]; + char BIGSTK protocol[256],url[HTS_URLMAXSIZE*2],method[256]; linput(fp,line,1000); if (sscanf(line,"%s %s %s",method,url,protocol) == 3) { // selon que l'on a ou pas un proxy @@ -879,7 +900,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char // on slash doit être présent en début, sinon attention aux bad request! (400) if (*fil!='/') strcatbuff(buff,"/"); { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; if (search_tag) strncatbuff(tempo,fil,(int) (search_tag - fil)); @@ -923,25 +944,31 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char } // Referer? - if ((referer_adr) && (referer_fil)) { // existe - if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide - if ( - (strcmp(referer_adr,"file://") != 0) - && - ( /* no https referer to http urls */ - (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */ - || - (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */ - ) - ) { // PAS file:// - strcatbuff(buff,"Referer: "); - strcatbuff(buff,"http://"); - strcatbuff(buff,jump_identification(referer_adr)); - strcatbuff(buff,referer_fil); - strcatbuff(buff,H_CRLF); - } + if (referer_adr != NULL && referer_fil != NULL + && strnotempty(referer_adr) && strnotempty(referer_fil) + ) { // non vide + if ( + (strcmp(referer_adr,"file://") != 0) + && + ( /* no https referer to http urls */ + (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */ + || + (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */ + ) + ) { // PAS file:// + strcatbuff(buff,"Referer: "); + strcatbuff(buff,"http://"); + strcatbuff(buff,jump_identification(referer_adr)); + strcatbuff(buff,referer_fil); + strcatbuff(buff,H_CRLF); } } + // HTTP field: referer + else if (retour->req.referer[0] != '\0') { + strcatbuff(buff,"Referer: "); + strcatbuff(buff, retour->req.referer); + strcatbuff(buff, H_CRLF); + } // POST? if (mode==0) { // GET! @@ -1002,6 +1029,13 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char } //} + // HTTP field: from + if (retour->req.from[0] != '\0') { // HTTP from + strcatbuff(buff,"From: "); + strcatbuff(buff, retour->req.from); + strcatbuff(buff, H_CRLF); + } + // Présence d'un user-agent? if (retour->req.user_agent_send) { // ohh un user-agent char s[256]; @@ -1113,12 +1147,13 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char #endif // Envoi + HTS_STAT.last_request = mtime_local(); if (sendc(retour, buff)<0) { // ERREUR, socket rompue?... //if (sendc(retour->soc,buff) != strlen(buff)) { // ERREUR, socket rompue?... deletesoc_r(retour); // fermer tout de même // et tenter de reconnecter - strcpybuff(retour->msg,"Write error"); + strcpybuff(retour->msg, "Write error"); retour->soc=INVALID_SOCKET; } @@ -1411,7 +1446,7 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { char domain[256]; // domaine cookie (.netscape.com) char path[256]; // chemin (/) char cook_name[256]; // nom cookie (MYCOOK) - char cook_value[8192]; // valeur (ID=toto,S=1234) + char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234) #if DEBUG_COOK printf("set-cookie detected\n"); #endif @@ -1419,7 +1454,7 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { char *token_st,*token_end; char *value_st,*value_end; char name[256]; - char value[8192]; + char BIGSTK value[8192]; int next=0; name[0]=value[0]='\0'; // @@ -1679,6 +1714,11 @@ HTS_INLINE LLint http_fread1(htsblk* r) { LLint http_xfread1(htsblk* r,int bufl) { int nl=-1; + // EOF + if (r->totalsize > 0 && r->size == r->totalsize) { + return READ_EOF; + } + if (bufl>0) { if (!r->is_write) { // stocker en mémoire if (r->totalsize>0) { // totalsize déterminé ET ALLOUE @@ -1691,9 +1731,11 @@ LLint http_xfread1(htsblk* r,int bufl) { nl = hts_read(r,r->adr + ((int) r->size),(int) (r->totalsize-r->size) ); /* NO 32 bit overlow possible here (no 4GB html!) */ // nouvelle taille if (nl >= 0) r->size+=nl; - - if ((nl < 0) || (r->size >= r->totalsize)) - nl=-1; // break + + /* + if (r->size >= r->totalsize) + nl = -1; // break + */ r->adr[r->size]='\0'; // caractère NULL en fin au cas où l'on traite des HTML } @@ -1717,7 +1759,7 @@ LLint http_xfread1(htsblk* r,int bufl) { if (r->adr!=NULL) { // lecture nl = hts_read(r,r->adr+(int)r->size,bufl); - if (nl>0) { + if (nl > 0) { // resize r->adr=(char*) realloct(r->adr,(int)r->size+nl + 1); // nouvelle taille @@ -1737,7 +1779,7 @@ LLint http_xfread1(htsblk* r,int bufl) { } // pas de adr=erreur - if (r->adr==NULL) nl=-1; + if (r->adr == NULL) nl = READ_ERROR; } else { // stocker sur disque char* buff; @@ -1751,17 +1793,17 @@ LLint http_xfread1(htsblk* r,int bufl) { if ((INTsys)fwrite(buff,1,nl,r->out)!=nl) { r->statuscode=-1; strcpybuff(r->msg,"Write error on disk"); - nl=-1; + nl=READ_ERROR; } } - if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize))) - nl=-1; // break + //if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize))) + // nl=-1; // break // libérer bloc tempo freet(buff); } else - nl=-1; + nl=READ_ERROR; if ((nl < 0) && (r->out!=NULL)) { fflush(r->out); @@ -1783,7 +1825,7 @@ LLint http_xfread1(htsblk* r,int bufl) { int lf_detected=0; int at_begining=1; do { - nl=-1; + nl = READ_INTERNAL_ERROR; count--; if (r->adr==NULL) { r->adr=(char*) malloct(8192); @@ -1793,7 +1835,7 @@ LLint http_xfread1(htsblk* r,int bufl) { if (r->size < 8190) { // lecture nl = hts_read(r,r->adr+r->size,1); - if (nl>0) { + if (nl > 0) { // exit if: // lf detected AND already detected before // or @@ -1825,18 +1867,16 @@ LLint http_xfread1(htsblk* r,int bufl) { count=-1; } } while((nl >= 0) && (count>0)); - nl = tot_nl; + if (nl >= 0) { + nl = tot_nl; + } } -#if HDEBUG - //printf("add to %d / %d\n",r->size,r->totalsize); -#endif - // nl == 0 may mean "no relevant data", for example is using cache or ssl -#if HTS_USEOPENSSL - if (r->ssl) + // EOF + if (r->totalsize > 0 && r->size == r->totalsize) { + return READ_EOF; + } else { return nl; - else -#endif - return ((nl > 0) ? nl : -1); // ==0 is fatal if direct read + } } @@ -1977,20 +2017,20 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { char* iadr; // unsigned short int port; - // tester un éventuel id:pass et virer id:pass@ si détecté - iadr = jump_identification(_iadr); - // si iadr="#" alors c'est une fausse URL, mais un vrai fichier // local. // utile pour les tests! //## if (iadr[0]!=lOCAL_CHAR) { - if (strcmp(_iadr,"file://")) { /* non fichier */ + if (strcmp(_iadr,"file://") != 0) { /* non fichier */ SOCaddr server; int server_size=sizeof(server); t_hostent* hp; // effacer structure memset(&server, 0, sizeof(server)); + // tester un éventuel id:pass et virer id:pass@ si détecté + iadr = jump_identification(_iadr); + #if HDEBUG printf("gethostbyname\n"); #endif @@ -2007,7 +2047,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { port=80; // port par défaut #endif if (a) { - char iadr2[HTS_URLMAXSIZE*2]; + char BIGSTK iadr2[HTS_URLMAXSIZE*2]; int i=-1; iadr2[0]='\0'; sscanf(a+1,"%d",&i); @@ -2046,6 +2086,9 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { } // copie adresse SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); + // make a copy for external clients + retour->address_size = sizeof(retour->address); + SOCaddr_copyaddr(retour->address, retour->address_size, hp->h_addr_list[0], hp->h_length); // memcpy(&SOCaddr_sinaddr(server), hp->h_addr_list[0], hp->h_length); // créer ("attachement") une socket (point d'accès) internet,en flot @@ -2060,7 +2103,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { retour->debugid = HTS_STAT.stat_sockid++; } #if HTS_WIDE_DEBUG - DEBUG_W("socket done\n"); + DEBUG_W("socket()=%d\n" _ (int) soc); #endif if (soc==INVALID_SOCKET) { if (retour) @@ -2103,6 +2146,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HDEBUG printf("connect\n"); #endif + HTS_STAT.last_connect = mtime_local(); #if HTS_WIDE_DEBUG DEBUG_W("connect\n"); @@ -2113,10 +2157,6 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { if (connect(soc, (struct sockaddr *)&server, server_size) == -1) { #endif - // no - non blocking - //deletesoc(soc); - //soc=INVALID_SOCKET; - // bloquant if (waitconnect) { #if HDEBUG @@ -2180,7 +2220,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // 1. optional scheme ":" if ((pos=strfield(url,"file:"))) { // fichier local!! (pour les tests) - //!! p+=3; + //!!p+=3; strcpybuff(adr,"file://"); } else if ((pos=strfield(url,"http:"))) { // HTTP //!!p+=3; @@ -2233,15 +2273,19 @@ int ident_url_absolute(char* url,char* adr,char* fil) { char *p; int i; char* a; - + p=url+pos; if (*p == '/' || *p == '\\') { /* file:///.. */ strcatbuff(fil,p); // fichier local ; adr="#" } else { - strcatbuff(fil,"//"); /* file://server/foo */ - strcatbuff(fil,p); + if (p[1] != ':') { + strcatbuff(fil,"//"); /* file://server/foo */ + strcatbuff(fil,p); + } else { + strcatbuff(fil,p); // file://C:\.. + } } - + a=strchr(fil,'?'); if (a) *a='\0'; /* couper query (inutile pour file:// lors de la requête) */ @@ -2272,66 +2316,52 @@ int ident_url_absolute(char* url,char* adr,char* fil) { return 0; } -// simplification des ../ +/* simplify ../ and ./ */ void fil_simplifie(char* f) { - int i=0; - int last=0; - char* a; - - // éliminer ../ - while (f[i]) { - - if (f[i]=='/') { - if (f[i+1]=='.') - if (f[i+2]=='.') // couper dernier répertoire - if (f[i+3]=='/') // éviter les /tmp/..coolandlamedir/ - { // couper dernier répertoire - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - // - if (!last) /* can't go upper.. */ - strcpybuff(tempo,"/"); - else - strncpy(tempo,f,last+1); - tempo[last+1]='\0'; - strcatbuff(tempo,f+i+4); - strcpybuff(f,tempo); // remplacer - i=-1; // recommencer - last=0; + char *a, *b; + char *rollback[128]; + int rollid = 0; + char lc = '/'; + int query = 0; + for(a = b = f ; *a != '\0' ; ) { + if (*a == '?') + query = 1; + if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '/') { /* foo/./bar or ./foo */ + a += 2; + } + else if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '.' && a[2] == '/') { /* foo/../bar or ../foo */ + a += 3; + if (rollid > 1) { + rollid--; + b = rollback[rollid - 1]; + } else { + rollid = 0; + b = f; } - - if (i>=0) - last=i; - else - last=0; + } else { + *b++ = lc = *a; + if (*a == '/') { + rollback[rollid++] = b; + if (rollid >= 127) { + *f = '\0'; /* ERROR */ + break; + } + } + a++; } - - i++; } - - // éliminer ./ - while ( (a=strstr(f,"./")) ) { - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strcpybuff(tempo,a+2); - strcpybuff(a,tempo); - } - // delete all remaining ../ (potential threat) - while ( (a=strstr(f,"../")) ) { - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strcpybuff(tempo,a+3); - strcpybuff(a,tempo); + *b = '\0'; + if (*f == '\0') { + f[0] = '.'; + f[1] = '/'; + f[2] = '\0'; } - } // fermer liaison fichier ou socket HTS_INLINE void deletehttp(htsblk* r) { #if HTS_DEBUG_CLOSESOCK - char info[256]; - sprintf(info,"deletehttp: (htsblk*) %d\n",r); - DEBUG_W2(info); + DEBUG_W("deletehttp: (htsblk*) 0x%p\n" _ (void*) r); #endif #if HTS_USEOPENSSL /* Free OpenSSL structures */ @@ -2357,27 +2387,22 @@ HTS_INLINE void deletehttp(htsblk* r) { // free the addr buffer // always returns 1 HTS_INLINE int deleteaddr(htsblk* r) { - if (r->adr) { + if (r->adr != NULL) { freet(r->adr); r->adr = NULL; } + if (r->headers != NULL) { + freet(r->headers); + r->headers = NULL; + } return 1; } // fermer une socket HTS_INLINE void deletesoc(T_SOC soc) { - if (soc!=INVALID_SOCKET) { -// J'ai planté.. pas de shutdown -//#if HTS_WIDE_DEBUG -// DEBUG_W("shutdown\n"); -//#endif -// shutdown(soc,2); // shutdown -//#if HTS_WIDE_DEBUG -// DEBUG_W("shutdown done\n"); -//#endif - // Ne pas oublier de fermer la connexion avant de partir.. (plus propre) + if (soc!=INVALID_SOCKET && soc!=LOCAL_SOCKET_ID) { #if HTS_WIDE_DEBUG - DEBUG_W("close\n"); + DEBUG_W("close %d\n" _ (int) soc); #endif #if HTS_WIN closesocket(soc); @@ -2385,7 +2410,7 @@ HTS_INLINE void deletesoc(T_SOC soc) { close(soc); #endif #if HTS_WIDE_DEBUG - DEBUG_W("close done\n"); + DEBUG_W(".. done\n"); #endif } } @@ -2400,8 +2425,10 @@ HTS_INLINE void deletesoc_r(htsblk* r) { r->ssl_con=NULL; } #endif - deletesoc(r->soc); - r->soc=INVALID_SOCKET; + if (r->soc!=INVALID_SOCKET) { + deletesoc(r->soc); + r->soc=INVALID_SOCKET; + } } // renvoi le nombre de secondes depuis 1970 @@ -2597,6 +2624,20 @@ int set_filetime_rfc822(char* file,char* date) { } else return -1; } +int get_filetime_rfc822(char* file,char* date) { + struct stat buf; + date[0] = '\0'; + if (stat(file, &buf) == 0) { + struct tm* A; + time_t tt = buf.st_mtime; + A=gmtime(&tt); + if (A==NULL) + A=localtime(&tt); + time_rfc822(date, A); + return 1; + } + return 0; +} // heure au format rfc (taille buffer 256o) HTS_INLINE void time_rfc822(char* s,struct tm * A) { @@ -2758,33 +2799,21 @@ int finput(int fd,char* s,int max) { } // Like linput, but in memory (optimized) -int binput(char* buff,char* s,int max) { - char* end; - int count; - - // clear buffer - s[0]='\0'; - // end of buffer? - if ( *buff == '\0') - return 1; - // find ending \n - end=strchr(buff,'\n'); - // ..or end of buffer - if (!end) - end=buff+strlen(buff); - // then count number of bytes, maximum=max - count=min(max,end-buff); - // and strip annoying ending cr - while( (count>0) && (buff[count] == '\r')) - count--; - // copy - if (count > 0) { - strncatbuff(s, buff, count); +int binput(char* buff, char* s, int max) { + int count = 0; + int destCount = 0; + + // Note: \0 will return 1 + while(count < max && buff != NULL && buff[count] != '\0' && buff[count] != '\n') { + if (buff[count] != '\r') { + s[destCount++] = buff[count]; + } + count++; } - // and terminate with a null char - s[count]='\0'; + s[destCount] = '\0'; + // then return the supplemental jump offset - return (end-buff)+1; + return count + 1; } // Lecture d'une ligne (peut être unicode à priori) @@ -2894,18 +2923,6 @@ void rawlinput(FILE* fp,char* s,int max) { s[j++]='\0'; } - -// compare le début de f avec s et retourne la position de la fin -// 'A=a' (case insensitive) -int strfield(const char* f,const char* s) { - int r=0; - while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; } - if (*s==0) - return r; - else - return 0; -} - //cherche chaine, case insensitive char* strstrcase(char *s,char *o) { while((*s) && (strfield(s,o)==0)) s++; @@ -3006,8 +3023,8 @@ void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map) // 1 : oui // -1 : on sait pas // -2 : on sait pas, pas d'extension -int ishtml(char* fil) { - char *a; +int ishtml(const char* fil) { + const char *a; // patch pour les truc.html?Choix=toto if ( (a=strchr(fil,'?')) ) // paramètres? @@ -3020,19 +3037,20 @@ int ishtml(char* fil) { while ( (*a!='.') && (*a!='/') && ( a > fil)) a--; if (*a=='.') { // a une extension - char fil_noquery[HTS_URLMAXSIZE*2]; + char BIGSTK fil_noquery[HTS_URLMAXSIZE*2]; + char* b; fil_noquery[0]='\0'; a++; // pointer sur extension strncatbuff(fil_noquery,a,HTS_URLMAXSIZE); - a=strchr(fil_noquery,'?'); - if (a) - *a='\0'; + b=strchr(fil_noquery,'?'); + if (b) + *b='\0'; return ishtml_ext(fil_noquery); // retour } else return -2; // indéterminé, par exemple /truc } // idem, mais pour uniquement l'extension -int ishtml_ext(char* a) { +int ishtml_ext(const char* a) { int html=0; // if (strfield2(a,"html")) html = 1; @@ -3075,6 +3093,8 @@ HTS_INLINE int ishttperror(int err) { // une identification HTSEXT_API char* jump_identification(char* source) { char *a,*trytofind; + if (strcmp(source, "file://") == 0) + return source; // rechercher dernier @ (car parfois email transmise dans adresse!) // mais sauter ftp:// éventuel a = jump_protocol(source); @@ -3083,6 +3103,8 @@ HTSEXT_API char* jump_identification(char* source) { } HTSEXT_API char* jump_normalized(char* source) { + if (strcmp(source, "file://") == 0) + return source; source = jump_identification(source); if (strfield(source, "www") && source[3] != '\0') { if (source[3] == '.') { // www.foo.com -> foo.com @@ -3098,25 +3120,80 @@ HTSEXT_API char* jump_normalized(char* source) { return source; } -HTSEXT_API char* fil_normalized(char* source, char* dest_) { - char* dest=dest_; +static int sortNormFnc(const void * a_, const void * b_) { + char** a = (char**) a_; + char** b = (char**) b_; + return strcmp(*a+1, *b+1); +} + + +HTSEXT_API char* fil_normalized(char* source, char* dest) { char lastc = 0; int gotquery=0; - while(*source) { - if (*source == '?') - gotquery=1; + int ampargs=0; + int i,j; + char* query=NULL; + for(i=j=0 ; source[i] != '\0'; i++) { + if (!gotquery && source[i] == '?') + gotquery=ampargs=1; if ( - (!gotquery && lastc == '/' && *source == '/') // foo//bar -> foo/bar + (!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar ) { } else { - *dest++ = *source; + if (gotquery && source[i] == '&') { + ampargs++; + } + dest[j++] = source[i]; + } + lastc = source[i]; + } + dest[j++] = '\0'; + + /* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */ + if (ampargs > 1) { + char** amps = malloct(ampargs * sizeof(char*)); + char* copyBuff = NULL; + int qLen=0; + assertf(amps != NULL); + gotquery = 0; + for(i=j=0 ; dest[i] != '\0'; i++) { + if ( (gotquery && dest[i] == '&') || ( !gotquery && dest[i] == '?') ) { + if (!gotquery) { + gotquery=1; + query = &dest[i]; + qLen = (int)strlen(query); + } + assertf(j < ampargs); + amps[j++] = &dest[i]; + dest[i] = '\0'; + } } - lastc = *source; - source++; + assertf(j == ampargs); + + /* Sort 'em all */ + qsort(amps, ampargs, sizeof(char*), sortNormFnc); + + /* Replace query by sorted query */ + copyBuff = malloct(qLen + 1); + assertf(copyBuff != NULL); + copyBuff[0] = '\0'; + for(i = 0 ; i < ampargs ; i++) { + if (i == 0) + strcatbuff(copyBuff, "?"); + else + strcatbuff(copyBuff, "&"); + strcatbuff(copyBuff, amps[i] + 1); + } + assert((int)strlen(copyBuff) <= qLen); + strcpybuff(query, copyBuff); + + /* Cleanup */ + freet(amps); + freet(copyBuff); } - *dest++ = '\0'; - return dest_; + + return dest; } #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 ); @@ -3154,6 +3231,21 @@ char* strrchr_limit(char* s, char c, char* limit) { } } +// strrchr, but not too far +char* strstr_limit(char* s, char* sub, char* limit) { + if (limit == NULL) { + return strstr(s, sub); + } else { + char* pos = strstr(s, sub); + if (pos != NULL) { + char* farpos = strstr(s, limit); + if (farpos == NULL || pos < farpos) + return pos; + } + } + return NULL; +} + // retourner adr sans ftp:// HTS_INLINE char* jump_protocol(char* source) { int p; @@ -3456,7 +3548,7 @@ HTSEXT_API void unescape_amp(char* s) { c='~'; // remplacer? if (c) { - char buff[HTS_URLMAXSIZE*2]; + char BIGSTK buff[HTS_URLMAXSIZE*2]; buff[0]=(char) c; strcpybuff(buff+1,end+1); strcpybuff(s,buff); @@ -3467,6 +3559,17 @@ HTSEXT_API void unescape_amp(char* s) { } } +static int ehexh(char c) { + if ((c>='0') && (c<='9')) return c-'0'; + if ((c>='a') && (c<='f')) c-=('a'-'A'); + if ((c>='A') && (c<='F')) return (c-'A'+10); + return 0; +} + +static int ehex(char* s) { + return 16*ehexh(*s)+ehexh(*(s+1)); +} + // remplacer %20 par ' ', | par : etc.. // buffer MAX 1Ko HTSEXT_API char* unescape_http(char* s) { @@ -3564,7 +3667,7 @@ HTSEXT_API void escape_remove_control(char* s) { unsigned char* ss = (unsigned char*) s; while(*ss) { if (*ss < 32) { /* CONTROL characters go away! */ - char tmp[HTS_URLMAXSIZE*2]; + char BIGSTK tmp[HTS_URLMAXSIZE*2]; strcpybuff(tmp, ss+1); strcpybuff(ss, tmp); } else { @@ -3573,6 +3676,25 @@ HTSEXT_API void escape_remove_control(char* s) { } } +HTSEXT_API void x_escape_html(char* s) { + while(*s) { + int test=0; + test = ( + CHAR_HIG(*s) + || CHAR_XXAVOID(*s) ); + + if (test) { + char BIGSTK buffer[HTS_URLMAXSIZE*3]; + int n; + n = (int)(unsigned char) *s; + strcpybuff(buffer, s+1); + sprintf(s,"&#x%02x;", n); + strcatbuff(s, buffer); + } + s++; + } +} + HTSEXT_API void x_escape_http(char* s,int mode) { while(*s) { @@ -3588,7 +3710,7 @@ HTSEXT_API void x_escape_http(char* s,int mode) { || CHAR_MARK(*s)); } else if (mode==2) - test=(strchr(" ",*s)!=0); // n'escaper que espace + test=(*s == ' '); // n'escaper que espace else if (mode==3) { // échapper que ce qui est nécessaire test = ( CHAR_SPECIAL(*s) @@ -3601,7 +3723,7 @@ HTSEXT_API void x_escape_http(char* s,int mode) { } if (test) { - char buffer[HTS_URLMAXSIZE*3]; + char BIGSTK buffer[HTS_URLMAXSIZE*3]; int n; n=(int)(unsigned char) *s; strcpybuff(buffer,s+1); @@ -3612,18 +3734,34 @@ HTSEXT_API void x_escape_http(char* s,int mode) { } } +HTSEXT_API void escape_for_html_print(char* s, char* d) { + for( ; *s ; s++) { + if (*s == '&') { + strcpybuff(d, "&"); + d += strlen(d); + } else { + *d++ = *s; + } + } + *d = '\0'; +} -HTS_INLINE int ehexh(char c) { - if ((c>='0') && (c<='9')) return c-'0'; - if ((c>='a') && (c<='f')) c-=('a'-'A'); - if ((c>='A') && (c<='F')) return (c-'A'+10); - return 0; +HTSEXT_API void escape_for_html_print_full(char* s, char* d) { + for( ; *s ; s++) { + if (*s == '&') { + strcpybuff(d, "&"); + d += strlen(d); + } else if (CHAR_HIG(*s)) { + sprintf(d, "&#x%02x;", (unsigned char) *s); + d += strlen(d); + } else { + *d++ = *s; + } + } + *d = '\0'; } -HTS_INLINE int ehex(char* s) { - return 16*ehexh(*s)+ehexh(*(s+1)); -} // concat, concatène deux chaines et renvoi le résultat // permet d'alléger grandement le code @@ -3731,18 +3869,18 @@ HTS_INLINE int is_realspace(char c) { // deviner type d'un fichier local.. // ex: fil="toto.gif" -> s="image/gif" -void guess_httptype(char *s,char *fil) { +void guess_httptype(char *s,const char *fil) { get_httptype(s,fil,1); } // idem // flag: 1 si toujours renvoyer un type -void get_httptype(char *s,char *fil,int flag) { +void get_httptype(char *s,const char *fil,int flag) { if (ishtml(fil)==1) strcpybuff(s,"text/html"); else { - char *a=fil+strlen(fil)-1; + const char *a=fil+strlen(fil)-1; while ( (*a!='.') && (*a!='/') && (a>fil)) a--; - if (*a=='.') { + if (*a=='.' && strlen(a) < 32) { int ok=0; int j=0; a++; @@ -3766,7 +3904,7 @@ void get_httptype(char *s,char *fil,int flag) { // get type of fil (php) // s: buffer (text/html) or NULL // return: 1 if known by user -int get_userhttptype(int setdefs,char *s,char *ext) { +int get_userhttptype(int setdefs,char *s,const char *ext) { char** buffer=NULL; NOSTATIC_RESERVE(buffer, char*, 1); if (setdefs) { @@ -3778,7 +3916,7 @@ int get_userhttptype(int setdefs,char *s,char *ext) { if (!ext) return 0; if (*buffer) { - char search[1024]; + char BIGSTK search[1024]; char* detect; sprintf(search,"\n%s=",ext); // php=text/html detect=strstr(*buffer,search); @@ -3844,7 +3982,7 @@ void give_mimext(char *s,char *st) { // 0 : non // 1 : oui // 2 : html -int is_knowntype(char *fil) { +int is_knowntype(const char *fil) { int j=0; if (!fil) return 0; @@ -3862,19 +4000,20 @@ int is_knowntype(char *fil) { return (is_userknowntype(fil)); } // extension : html,gif.. -char* get_ext(char *fil) { +char* get_ext(const char *fil) { char* fil_noquery; - char *a=fil+strlen(fil)-1; + const char *a=fil+strlen(fil)-1; NOSTATIC_RESERVE(fil_noquery, char, HTS_URLMAXSIZE*2); while ( (*a!='.') && (*a!='/') && (a>fil)) a--; if (*a=='.') { + char* b; fil_noquery[0]='\0'; a++; // pointer sur extension strncatbuff(fil_noquery,a,HTS_URLMAXSIZE); - a=strchr(fil_noquery,'?'); - if (a) - *a='\0'; + b=strchr(fil_noquery,'?'); + if (b) + *b='\0'; return concat(fil_noquery,""); } else @@ -3886,8 +4025,8 @@ char* get_ext(char *fil) { // 2 : html // setdefs : set mime buffer: // file=(char*) "asp=text/html\nphp=text/html\n" -int is_userknowntype(char *fil) { - char mime[1024]; +int is_userknowntype(const char *fil) { + char BIGSTK mime[1024]; if (!fil) return 0; if (!strnotempty(fil)) @@ -3904,7 +4043,7 @@ int is_userknowntype(char *fil) { // page dynamique? // is_dyntype(get_ext("foo.asp")) -int is_dyntype(char *fil) { +int is_dyntype(const char *fil) { int j=0; if (!fil) return 0; @@ -3921,11 +4060,12 @@ int is_dyntype(char *fil) { // types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne // connaissent pas le type -int may_unknown(char* st) { +int may_unknown(const char* st) { int j=0; // types média - if (may_be_hypertext_mime(st)) + if (may_be_hypertext_mime(st, "")) { return 1; + } while(strnotempty(hts_mime_keep[j])) { if (strfield2(hts_mime_keep[j],st)) { // trouvé return 1; @@ -3936,7 +4076,6 @@ int may_unknown(char* st) { } - // -- Utils fichiers // pretty print for i/o @@ -4106,23 +4245,24 @@ int HTS_TOTAL_RECV_CHECK(int var) { #endif // Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk) +// returns: // >0 : data received // == 0 : not yet data -// <0 : no more data or error +// <0: error or no data: READ_ERROR, READ_EOF or READ_TIMEOUT HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { int retour; // return read(soc,buff,size); if (r->is_file) { #if HTS_WIDE_DEBUG - DEBUG_W("read\n"); + DEBUG_W("read(%p, %d, %d)\n" _ (void*) buff _ (int) size _ (int) r->fp); #endif if (r->fp) - retour=(int)fread(buff,1,size,r->fp); + retour = (int)fread(buff,1,size,r->fp); else - retour=-1; + retour = READ_ERROR; } else { #if HTS_WIDE_DEBUG - DEBUG_W("recv\n"); + DEBUG_W("recv(%d, %p, %d)\n" _ (int) r->soc _ (void*) buff _ (int) size); if (r->soc==INVALID_SOCKET) printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n"); #endif @@ -4139,13 +4279,20 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { ) { retour = 0; /* no data yet (ssl cache) */ + } else if (err_code == SSL_ERROR_ZERO_RETURN) { + retour = READ_EOF; /* completed */ } else { - retour = -1; /* eof or error */ + retour = READ_ERROR; /* eof or error */ } } } else { #endif retour=recv(r->soc,buff,size,0); + if (retour == 0) { + retour = READ_EOF; + } else if (retour < 0) { + retour = READ_ERROR; + } } if (retour > 0) // compter flux entrant HTS_STAT.HTS_TOTAL_RECV+=retour; @@ -4153,7 +4300,7 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { } #endif #if HTS_WIDE_DEBUG - DEBUG_W("recv/read done\n"); + DEBUG_W("recv/read done (%d bytes)\n" _ (int) retour); #endif return retour; } @@ -4179,7 +4326,7 @@ static void hts_cache_free_(t_dnscache* cache) { } } void hts_cache_free(t_dnscache* cache) { - if (cache != NULL) { + if (cache != NULL && cache->n != NULL) { hts_cache_free_(cache->n); cache->n = NULL; } @@ -4218,7 +4365,7 @@ int _hts_lockdns(int i) { // si h_length==0 alors le nom n'existe pas dans le dns t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour) { // attendre que le cache dns soit prêt - while(_hts_lockdns(-1)); // attendre libération + //while(_hts_lockdns(-1)); // attendre libération _hts_lockdns(1); // locker while(1) { @@ -4273,7 +4420,7 @@ int hts_dnstest(char* _iadr) { #endif return 1; - while(_hts_lockdns(-1)); // attendre libération + // while(_hts_lockdns(-1)); // attendre libération _hts_lockdns(1); // locker while(1) { if (strcmp(cache->iadr,iadr)==0) { // ok trouvé @@ -4306,7 +4453,7 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { The resolver doesn't seem to handle IP6 addresses in brackets */ if ((hostname[0] == '[') && (hostname[strlen(hostname)-1] == ']')) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; strncatbuff(tempo, hostname+1, strlen(hostname)-2); strcpybuff(hostname, tempo); @@ -4366,7 +4513,7 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { // cache dns interne à HTS // ** FREE A FAIRE sur la chaine t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { - char iadr[HTS_URLMAXSIZE*2]; + char BIGSTK iadr[HTS_URLMAXSIZE*2]; t_fullhostent* buffer = (t_fullhostent*) v_buffer; t_dnscache* cache=_hts_cache(); // adresse du cache t_hostent* hp; @@ -4499,6 +4646,13 @@ void* hts_calloc(size_t len,size_t len2) { memset(adr, 0, len * len2); return adr; } +void* hts_strdup(char* str) { + size_t size = str ? strlen(str) : 0; + char* adr = (char*) hts_malloc(size + 1); + fassert(adr != NULL); + strcpy(adr, str ? str : ""); + return adr; +} void* hts_xmalloc(size_t len,size_t len2) { mlink* lnk = (mlink*) calloc(1,sizeof(mlink)); fassert(lnk != NULL); @@ -4665,13 +4819,67 @@ int ftp_available(void) { #endif +int hts_dgb_init = 0; +FILE* hts_dgb_init_fp = NULL; +static void hts_dgb(char* msg); +HTSEXT_API void hts_debug(int level) { + hts_dgb_init = level; + if (hts_dgb_init > 0) { + hts_dgb("hts_debug() called"); + } +} +static void hts_dgb(char* msg) { + if (hts_dgb_init > 0) { + if (hts_dgb_init_fp == NULL) { +#ifdef _WIN32_WCE + hts_dgb_init_fp = fopen("\\Temp\\hts-debug.txt", "wb"); +#else + hts_dgb_init_fp = fopen("hts-debug.txt", "wb"); +#endif + if (hts_dgb_init_fp != NULL) { + fprintf(hts_dgb_init_fp, "* Creating file\r\n"); + } + } + if (hts_dgb_init_fp != NULL) { + fprintf(hts_dgb_init_fp, "%s\r\n", msg); + fflush(hts_dgb_init_fp); + } + } +} HTSEXT_API int hts_init(void) { static int hts_init_ok = 0; + hts_dgb("entering hts_init()"); /* debug */ + +#ifdef _WIN32_WCE +#ifndef HTS_CECOMPAT + xceinit(L""); +#endif +#endif + + /* Init threads */ + if (!hts_init_ok) { + htsthread_init(); + } + /* Ensure external modules are loaded */ + hts_dgb("calling htspe_init()"); /* debug */ htspe_init(); + /* MD5 Auto-test */ + { + char digest[32 + 2]; + unsigned char* atest = (unsigned char*)"MD5 Checksum Autotest"; + digest[0] = '\0'; + domd5mem(atest, strlen(atest), digest, 1); /* a42ec44369da07ace5ec1d660ba4a69a */ + if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) { + int fatal_broken_md5 = 0; + assertf(fatal_broken_md5); + } + } + + hts_dgb("initializing default wrappers"); /* debug */ if (!hts_init_ok) { hts_init_ok = 1; // default wrappers @@ -4681,6 +4889,8 @@ HTSEXT_API int hts_init(void) { htswrap_add("start",htsdefault_start); htswrap_add("change-options",htsdefault_chopt); htswrap_add("end",htsdefault_end); + htswrap_add("preprocess-html",htsdefault_preprocesshtml); + htswrap_add("postprocess-html",htsdefault_postprocesshtml); htswrap_add("check-html",htsdefault_checkhtml); htswrap_add("loop",htsdefault_loop); htswrap_add("query",htsdefault_query); @@ -4690,10 +4900,14 @@ HTSEXT_API int hts_init(void) { htswrap_add("pause",htsdefault_pause); htswrap_add("save-file",htsdefault_filesave); htswrap_add("link-detected",htsdefault_linkdetected); + htswrap_add("link-detected2",htsdefault_linkdetected2); htswrap_add("transfer-status",htsdefault_xfrstatus); htswrap_add("save-name",htsdefault_savename); + htswrap_add("send-header",htsdefault_sendheader); + htswrap_add("receive-header",htsdefault_receiveheader); } + hts_dgb("initializing SSL"); /* debug */ #if HTS_USEOPENSSL /* Initialize the OpensSSL library @@ -4715,14 +4929,17 @@ HTSEXT_API int hts_init(void) { #endif /* Init vars and thread-specific values */ + hts_dgb("initializing variables"); /* debug */ hts_initvar(); /* initialiser structcheck */ // structcheck_init(1); + hts_dgb("ending hts_init()"); /* debug */ return 1; } HTSEXT_API int hts_uninit(void) { + //htsthread_uninit(); hts_cache_free(_hts_cache()); hts_freevar(); /* htswrap_free(); */ @@ -4744,6 +4961,12 @@ int __cdecl htsdefault_chopt(void* opt) { int __cdecl htsdefault_end(void) { return 1; } +int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { + return 1; +} +int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { + return 1; +} int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { return 1; } @@ -4772,12 +4995,21 @@ void __cdecl htsdefault_filesave(char* file) { int __cdecl htsdefault_linkdetected(char* link) { return 1; } +int __cdecl htsdefault_linkdetected2(char* link, char* start_tag) { + return 1; +} int __cdecl htsdefault_xfrstatus(void* back) { return 1; } int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { return 1; } +int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { + return 1; +} +int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { + return 1; +} // end defaut wrappers diff --git a/src/htslib.h b/src/htslib.h index d3881d3..23a8400 100644 --- a/src/htslib.h +++ b/src/htslib.h @@ -43,7 +43,9 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" /* basic net definitions */ +#include "htsbase.h" #include "htsbasenet.h" +#include "htsnet.h" /* cookies et auth */ #include "htsbauth.h" @@ -52,12 +54,15 @@ Please visit our Website: http://www.httrack.com // (à modifier avec celle-ci) #define POSTTOK "?>post" -#include <stdio.h> - #include "htsopt.h" +#define READ_ERROR (-1) +#define READ_EOF (-2) +#define READ_TIMEOUT (-3) +#define READ_INTERNAL_ERROR (-4) + // structure pour paramètres supplémentaires lors de la requête -typedef struct { +typedef struct htsrequest { short int user_agent_send; // user agent (ex: httrack/1.0 [sun]) short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0 short int nokeepalive; // pas de keep-alive @@ -65,13 +70,15 @@ typedef struct { short int nocompression; // Pas de compression short int flush_garbage; // recycled char user_agent[128]; + char referer[256]; + char from[256]; char lang_iso[64]; t_proxy proxy; // proxy } htsrequest; // structure pour retour d'une connexion/prise d'en tête -typedef struct { +typedef struct htsblk { int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) short int notmodified; // page ou fichier NON modifié (transféré) short int is_write; // sortie sur disque (out) ou en mémoire (adr) @@ -83,6 +90,7 @@ typedef struct { int keep_alive_t; // KA timeout int keep_alive_max; // KA number of requests char* adr; // adresse du bloc de mémoire, NULL=vide + char* headers; // adresse des en têtes si présents FILE* out; // écriture directe sur disque (si is_write=1) LLint size; // taille fichier char msg[80]; // message éventuel si échec ("\0"=non précisé) @@ -93,6 +101,8 @@ typedef struct { LLint totalsize; // taille totale à télécharger (-1=inconnue) short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1 T_SOC soc; // ID socket + SOCaddr address; // IP address + int address_size; // IP address structure length FILE* fp; // fichier pour file:// #if HTS_USEOPENSSL short int ssl; // is this connection a SSL one? (https) @@ -105,8 +115,8 @@ typedef struct { LLint crange; // Content-Range int debugid; // debug connection /* */ - htsrequest req; // paramètres pour la requête - /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/ + htsrequest req; // paramètres pour la requête + /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/ } htsblk; @@ -144,12 +154,8 @@ typedef struct t_dnscache { - -/* -#ifdef __cplusplus -extern "C" { -#endif -*/ +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE // fonctions unix/winsock int hts_read(htsblk* r,char* buff,int size); @@ -215,6 +221,7 @@ void time_local_rfc822(char* s); struct tm* convert_time_rfc822(char* s); int set_filetime(char* file,struct tm* tm_time); int set_filetime_rfc822(char* file,char* date); +int get_filetime_rfc822(char* file,char* date); HTS_INLINE void time_rfc822(char* s,struct tm * A); HTS_INLINE void time_rfc822_local(char* s,struct tm * A); #ifndef HTTRACK_DEFLIB @@ -232,25 +239,23 @@ int linputsoc_t(T_SOC soc, char* s, int max, int timeout); int linput_trim(FILE* fp,char* s,int max); int linput_cpp(FILE* fp,char* s,int max); void rawlinput(FILE* fp,char* s,int max); -int strfield(const char* f,const char* s); -#define strfield2(f,s) ( (strlen(f)!=strlen(s)) ? 0 : (strfield(f,s)) ) char* strstrcase(char *s,char *o); int ident_url_absolute(char* url,char* adr,char* fil); void fil_simplifie(char* f); int is_unicode_utf8(unsigned char* buffer, unsigned int size); void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map); -int ishtml(char* urlfil); -int ishtml_ext(char* a); +int ishtml(const char* urlfil); +int ishtml_ext(const char* a); int ishttperror(int err); -void guess_httptype(char *s,char *fil); -void get_httptype(char *s,char *fil,int flag); -int get_userhttptype(int setdefs,char *s,char *ext); +void guess_httptype(char *s,const char *fil); +void get_httptype(char *s,const char *fil,int flag); +int get_userhttptype(int setdefs,char *s,const char *ext); void give_mimext(char *s,char *st); -int is_knowntype(char *fil); -int is_userknowntype(char *fil); -int is_dyntype(char *fil); -char* get_ext(char *fil); -int may_unknown(char* st); +int is_knowntype(const char *fil); +int is_userknowntype(const char *fil); +int is_dyntype(const char *fil); +char* get_ext(const char *fil); +int may_unknown(const char* st); #ifndef HTTRACK_DEFLIB HTSEXT_API char* jump_identification(char*); HTSEXT_API char* jump_normalized(char*); @@ -259,6 +264,7 @@ HTSEXT_API char* fil_normalized(char* source, char* dest); HTSEXT_API char* adr_normalized(char* source, char* dest); #endif char* strrchr_limit(char* s, char c, char* limit); +char* strstr_limit(char* s, char* sub, char* limit); HTS_INLINE char* jump_protocol(char* source); void code64(unsigned char* a,int size_a,unsigned char* b,int crlf); #ifndef HTTRACK_DEFLIB @@ -270,15 +276,16 @@ HTSEXT_API void escape_uri_utf(char* s); HTSEXT_API void escape_check_url(char* s); HTSEXT_API char* escape_check_url_addr(char* s); HTSEXT_API void x_escape_http(char* s,int mode); +HTSEXT_API void x_escape_html(char* s); HTSEXT_API void escape_remove_control(char* s); +HTSEXT_API void escape_for_html_print(char* s, char* d); +HTSEXT_API void escape_for_html_print_full(char* s, char* d); #endif -int ehexh(char c); #ifndef HTTRACK_DEFLIB HTSEXT_API char* unescape_http(char* s); HTSEXT_API char* unescape_http_unharm(char* s, int no_high); HTSEXT_API char* antislash_unescaped(char* s); #endif -int ehex(char* s); char* concat(const char* a,const char* b); #define copychar(a) concat((a),NULL) #if HTS_DOSNAME @@ -296,14 +303,6 @@ char* concat(const char* a,const char* b); void hts_lowcase(char* s); void hts_replace(char *s,char from,char to); -/* Spaces: CR,LF,TAB,FF */ -#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) -#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) ) -#define is_taborspace(c) ( ((c)==' ') || ((c)==9) ) -#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') ) -#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) ) -//HTS_INLINE int is_space(char); -//HTS_INLINE int is_realspace(char); void fprintfio(FILE* fp,char* buff,char* prefix); @@ -328,11 +327,6 @@ typedef void* ( *beginthread_type )( void * ); unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist ); #endif -/* -#ifdef __cplusplus -} -#endif -*/ @@ -360,6 +354,8 @@ void __cdecl htsdefault_uninit(void); int __cdecl htsdefault_start(void* opt); int __cdecl htsdefault_chopt(void* opt); int __cdecl htsdefault_end(void); +int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); +int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); char* __cdecl htsdefault_query(char* question); @@ -369,10 +365,150 @@ int __cdecl htsdefault_check(char* adr,char* fil,int status); void __cdecl htsdefault_pause(char* lockfile); void __cdecl htsdefault_filesave(char*); int __cdecl htsdefault_linkdetected(char* link); +int __cdecl htsdefault_linkdetected2(char* link, char* tag_start); int __cdecl htsdefault_xfrstatus(void* back); int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); +int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); +int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); + // end defaut wrappers + +// htsmodule.c definitions +extern void* getFunctionPtr(httrackp* opt, char* file, char* fncname); +extern void clearCallbacks(htscallbacks* chain); + + + +#endif // internals + + +/* Spaces: CR,LF,TAB,FF */ +#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) +#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) ) +#define is_taborspace(c) ( ((c)==' ') || ((c)==9) ) +#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') ) +#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) ) +//HTS_INLINE int is_space(char); +//HTS_INLINE int is_realspace(char); + +// compare le début de f avec s et retourne la position de la fin +// 'A=a' (case insensitive) +static int strfield(const char* f,const char* s) { + int r=0; + while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; } + if (*s==0) + return r; + else + return 0; +} +static int strcmpnocase(char* a,char* b) { + while(*a) { + int cmp = hichar(*a) - hichar(*b); + if (cmp != 0) + return cmp; + a++; + b++; + } + return 0; +} + +#ifdef _WIN32 +#define strcasecmp(a,b) stricmp(a,b) +#define strncasecmp(a,b,n) strnicmp(a,b,n) +#endif + +#define strfield2(f,s) ( (strlen(f)!=strlen(s)) ? 0 : (strfield(f,s)) ) + +// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type? +#define HTS_HYPERTEXT_DEFAULT_MIME "text/html" +#define is_hypertext_mime__(a) \ + ( (strfield2((a),"text/html")!=0)\ + || (strfield2((a),"application/x-javascript")!=0) \ + || (strfield2((a),"text/css")!=0) \ + /*|| (strfield2((a),"text/vnd.wap.wml")!=0)*/ \ + || (strfield2((a),"image/svg+xml")!=0) \ + || (strfield2((a),"image/svg-xml")!=0) \ + /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\ + || (strfield2((a),"application/x-authorware-map")!=0) \ + ) +#define may_be_hypertext_mime__(a) \ + (\ + (strfield2((a),"audio/x-pn-realaudio")!=0) \ + || (strfield2((a),"audio/x-mpegurl")!=0) \ + ) + + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + +// check if (mime, file) is hypertext +static int is_hypertext_mime(const char* mime, const char* file) { + if (is_hypertext_mime__(mime)) + return 1; + if (may_unknown(mime)) { + char guessed[256]; + guessed[0] = '\0'; + guess_httptype(guessed, file); + return is_hypertext_mime__(guessed); + } + return 0; +} + +// check if (mime, file) might be "false" hypertext +static int may_be_hypertext_mime(const char* mime, const char* file) { + if (may_be_hypertext_mime__(mime)) + return 1; + if (file != NULL && file[0] != '\0' && may_unknown(mime)) { + char guessed[256]; + guessed[0] = '\0'; + guess_httptype(guessed, file); + return may_be_hypertext_mime__(guessed); + } + return 0; +} + +// compare (mime, file) with reference +static int compare_mime(const char* mime, const char* file, const char* reference) { + if (is_hypertext_mime__(mime) || may_be_hypertext_mime__(mime)) + return strfield2(mime, reference); + if (file != NULL && file[0] != '\0' && may_unknown(mime)) { + char guessed[256]; + guessed[0] = '\0'; + guess_httptype(guessed, file); + return strfield2(guessed, reference); + } + return 0; +} + +#endif + +#ifdef _WIN32_WCE_XXC +extern char cwd[MAX_PATH+1]; +static char *getcwd_ce(char *buffer, int maxlen) +{ + TCHAR fileUnc[MAX_PATH+1]; + char* plast; + + if(cwd[0] == 0) + { + GetModuleFileName(NULL, fileUnc, MAX_PATH); + WideCharToMultiByte(CP_ACP, 0, fileUnc, -1, cwd, MAX_PATH, NULL, NULL); + plast = strrchr(cwd, '\\'); + if(plast) + *plast = 0; + /* Special trick to keep start menu clean... */ + if(_stricmp(cwd, "\\windows\\start menu") == 0) + strcpy(cwd, "\\Apps"); + } + if(buffer) + strncpy(buffer, cwd, maxlen); + return cwd; +} +#undef getcwd +#define getcwd getcwd_ce +#endif + #endif diff --git a/src/htsmd5.c b/src/htsmd5.c index 47242d8..92aec5e 100644 --- a/src/htsmd5.c +++ b/src/htsmd5.c @@ -39,6 +39,9 @@ Please visit our Website: http://www.httrack.com /* Modified 2000 by Xavier Roche for domd5mem */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htsmd5.h" #include "md5.h" #include <string.h> @@ -48,12 +51,25 @@ int domd5mem(unsigned char * buf, int len, unsigned char * digest, int asAscii) { int endian = 1; unsigned char bindigest[16]; +#if 1 +//#ifndef _WIN32_WCE MD5_CTX ctx; MD5Init(&ctx, * ( (char*) &endian)); MD5Update(&ctx, buf, len); MD5Final(bindigest, &ctx); - +#else + /* Broken md5.. temporary hack */ + int i; + memset(bindigest, 0, 16); + if (len > 0) { + for(i = 0 ; i < len + 16 ; i++) { + bindigest[i % 16] ^= ( buf[i % len] + i + len ); + bindigest[(i - 1) % 16] ^= bindigest[ ( i + buf[i % len]*buf[(i-1) % len] ) % 16]; + } + } +#endif + if (!asAscii) { memcpy(digest, bindigest, 16); } else { @@ -70,7 +86,8 @@ int domd5mem(unsigned char * buf, int len, } unsigned long int md5sum32(char* buff) { - char digest[16]; - domd5mem(buff,strlen(buff),digest,0); - return *( (long int*)(char*)digest ); + unsigned char md5digest[16]; + unsigned char* md5digest_ = md5digest; + domd5mem(buff,strlen(buff),md5digest,0); + return *( (long int*)(char*)md5digest ); } diff --git a/src/htsmd5.h b/src/htsmd5.h index 84148bd..3e3b00c 100644 --- a/src/htsmd5.h +++ b/src/htsmd5.h @@ -42,9 +42,12 @@ Please visit our Website: http://www.httrack.com #ifndef HTSMD5_DEFH #define HTSMD5_DEFH +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int domd5mem(unsigned char * buf, int len, unsigned char * digest, int asAscii); unsigned long int md5sum32(char* buff); +#endif #endif diff --git a/src/htsmodules.c b/src/htsmodules.c index 27ab855..3299c41 100644 --- a/src/htsmodules.c +++ b/src/htsmodules.c @@ -35,21 +35,20 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -#ifndef _WIN32 -#if HTS_DLOPEN -#include <dlfcn.h> -#endif -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE #include "htsglobal.h" #include "htsmodules.h" #include "htsopt.h" extern int fspc(FILE* fp,char* type); +#ifndef _WIN32 +#if HTS_DLOPEN +#include <dlfcn.h> +#endif +#endif + /* >>> Put all modules definitions here */ #include "htszlib.h" #include "htsbase.h" @@ -71,9 +70,11 @@ t_hts_detect_swf hts_detect_swf = NULL; t_hts_parse_swf hts_parse_swf = NULL; int gz_is_available = 0; +#if 0 t_gzopen gzopen = NULL; t_gzread gzread = NULL; t_gzclose gzclose = NULL; +#endif int SSL_is_available = 0; t_SSL_shutdown SSL_shutdown = NULL; @@ -108,6 +109,7 @@ void abortLog__fnc(char* msg, char* file, int line) { FILE* fp = fopen("CRASH.TXT", "wb"); if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); + if (!fp) fp = fopen("CRASH.TXT", "wb"); if (fp) { fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '%s', line %d\r\n", file, line); fprintf(fp, "Reason:\r\n%s\r\n", msg); @@ -144,17 +146,60 @@ int hts_parse_externals(htsmoduleStruct* str) { return -1; } -/* NOTE: handled NOT closed */ -void* getFunctionPtr(char* file_, char* fncname) { - char file[1024]; +static void addCallback(htscallbacks* chain, void* moduleHandle, htscallbacksfncptr exitFnc) { + while(chain->next != NULL) { + chain = chain->next; + } + chain->next = calloct(1, sizeof(htscallbacks)); + assertf(chain->next != NULL); + chain = chain->next; + memset(chain, 0, sizeof(*chain)); + chain->exitFnc = exitFnc; + chain->moduleHandle = moduleHandle; +} + +void clearCallbacks(htscallbacks* chain_); +void clearCallbacks(htscallbacks* chain_) { + htscallbacks* chain; + chain = chain_; + while(chain != NULL) { + if (chain->exitFnc != NULL) { + (void) chain->exitFnc(); /* result ignored */ + chain->exitFnc = NULL; + } + chain = chain->next; + } + chain = chain_; + while(chain != NULL) { + if (chain->moduleHandle != NULL) { +#ifdef _WIN32 + FreeLibrary(chain->moduleHandle); +#else + dlclose(chain->moduleHandle); +#endif + } + chain = chain->next; + } + chain = chain_->next; // Don't free the block #0 + while(chain != NULL) { + htscallbacks* nextchain = chain->next; + freet(chain); + chain = nextchain; + } + chain_->next = NULL; // Empty +} + +void* getFunctionPtr(httrackp* opt, char* file_, char* fncname); +void* getFunctionPtr(httrackp* opt, char* file_, char* fncname) { + char BIGSTK file[1024]; void* handle; void* userfunction = NULL; strcpybuff(file, file_); #ifdef _WIN32 - handle = LoadLibrary(file); + handle = LoadLibraryA((char*)file); if (handle == NULL) { strcatbuff(file, ".dll"); - handle = LoadLibrary(file); + handle = LoadLibraryA((char*)file); } #else handle = dlopen(file, RTLD_LAZY); @@ -164,13 +209,61 @@ void* getFunctionPtr(char* file_, char* fncname) { } #endif if (handle) { - userfunction = (void*) DynamicGet(handle, fncname); + /* Thanks to Lars Clausen for the "wrapper-init" patch */ + /* If given arguments, call "<wrappername>_init" */ + char BIGSTK tmpName[1024]; + char *comma; + if ((comma = strchr(fncname, ',')) != NULL) { /* empty arg */ + *comma++ = '\0'; + } + + /* speficic plug init */ + { + t_htsWrapperPlugInit initfunction; + sprintf(tmpName, "%s_init", fncname); + initfunction = (t_htsWrapperPlugInit)DynamicGet(handle, (char*)tmpName); + if (initfunction != NULL) { + int result = (int) initfunction(comma); + if (!result) { + if (userfunction == NULL) { +#ifdef _WIN32 + FreeLibrary(handle); +#else + dlclose(handle); +#endif + } + return NULL; + } + } + } + /* wrapper_init() */ + { + t_htsWrapperInit initfunction = (t_htsWrapperInit)DynamicGet(handle, (char*)"wrapper_init"); + if (initfunction != NULL) { + if (! initfunction(fncname, comma)) { + if (userfunction == NULL) { +#ifdef _WIN32 + FreeLibrary(handle); +#else + dlclose(handle); +#endif + } + return NULL; + } + } + } + /* the function itself */ + userfunction = (void*) DynamicGet(handle, (char*)fncname); if (userfunction == NULL) { #ifdef _WIN32 FreeLibrary(handle); #else dlclose(handle); #endif + } else { + /* optional exit wrapper */ + t_htsWrapperExit exitFnc = (t_htsWrapperExit) DynamicGet(handle, (char*)"wrapper_exit"); + addCallback(&opt->state.callbacks, handle, exitFnc); // exitFnc can be null } } return userfunction; @@ -183,7 +276,10 @@ void htspe_init() { /* >>> Put all module initializations here */ + /* Zlib */ + gz_is_available = 1; + /* #if HTS_DLOPEN { void* handle; @@ -202,13 +298,14 @@ void htspe_init() { } } #endif + */ /* OpenSSL */ #if HTS_DLOPEN { void* handle; #ifdef _WIN32 - handle = LoadLibrary("ssleay32"); + handle = LoadLibraryA((char*)"ssleay32"); #else /* We are compatible with 0.9.6/7 and potentially above */ handle = dlopen("libssl.so.0.9.7", RTLD_LAZY); @@ -221,27 +318,27 @@ void htspe_init() { } #endif if (handle) { - SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, "SSL_shutdown"); - SSL_free = (t_SSL_free) DynamicGet(handle, "SSL_free"); - SSL_new = (t_SSL_new) DynamicGet(handle, "SSL_new"); - SSL_clear = (t_SSL_clear) DynamicGet(handle, "SSL_clear"); - SSL_set_fd = (t_SSL_set_fd) DynamicGet(handle, "SSL_set_fd"); - SSL_set_connect_state = (t_SSL_set_connect_state) DynamicGet(handle, "SSL_set_connect_state"); - SSL_connect = (t_SSL_connect) DynamicGet(handle, "SSL_connect"); - SSL_get_error = (t_SSL_get_error) DynamicGet(handle, "SSL_get_error"); - SSL_write = (t_SSL_write) DynamicGet(handle, "SSL_write"); - SSL_read = (t_SSL_read) DynamicGet(handle, "SSL_read"); - SSL_library_init = (t_SSL_library_init) DynamicGet(handle, "SSL_library_init"); - ERR_load_SSL_strings = (t_ERR_load_SSL_strings) DynamicGet(handle, "ERR_load_SSL_strings"); - SSLv23_client_method = (t_SSLv23_client_method) DynamicGet(handle, "SSLv23_client_method"); - SSL_CTX_new = (t_SSL_CTX_new) DynamicGet(handle, "SSL_CTX_new"); - SSL_load_error_strings = (t_SSL_load_error_strings) DynamicGet(handle, "SSL_load_error_strings"); - SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, "SSL_CTX_ctrl"); + SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, (char*)"SSL_shutdown"); + SSL_free = (t_SSL_free) DynamicGet(handle, (char*)"SSL_free"); + SSL_new = (t_SSL_new) DynamicGet(handle, (char*)"SSL_new"); + SSL_clear = (t_SSL_clear) DynamicGet(handle, (char*)"SSL_clear"); + SSL_set_fd = (t_SSL_set_fd) DynamicGet(handle, (char*)"SSL_set_fd"); + SSL_set_connect_state = (t_SSL_set_connect_state) DynamicGet(handle, (char*)"SSL_set_connect_state"); + SSL_connect = (t_SSL_connect) DynamicGet(handle, (char*)"SSL_connect"); + SSL_get_error = (t_SSL_get_error) DynamicGet(handle, (char*)"SSL_get_error"); + SSL_write = (t_SSL_write) DynamicGet(handle, (char*)"SSL_write"); + SSL_read = (t_SSL_read) DynamicGet(handle, (char*)"SSL_read"); + SSL_library_init = (t_SSL_library_init) DynamicGet(handle, (char*)"SSL_library_init"); + ERR_load_SSL_strings = (t_ERR_load_SSL_strings) DynamicGet(handle, (char*)"ERR_load_SSL_strings"); + SSLv23_client_method = (t_SSLv23_client_method) DynamicGet(handle, (char*)"SSLv23_client_method"); + SSL_CTX_new = (t_SSL_CTX_new) DynamicGet(handle, (char*)"SSL_CTX_new"); + SSL_load_error_strings = (t_SSL_load_error_strings) DynamicGet(handle, (char*)"SSL_load_error_strings"); + SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, (char*)"SSL_CTX_ctrl"); #ifdef _WIN32 - handle = LoadLibrary("libeay32"); + handle = LoadLibraryA((char*)"libeay32"); #endif - ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, "ERR_load_crypto_strings"); - ERR_error_string = (t_ERR_error_string) DynamicGet(handle, "ERR_error_string"); + ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, (char*)"ERR_load_crypto_strings"); + ERR_error_string = (t_ERR_error_string) DynamicGet(handle, (char*)"ERR_error_string"); if (SSL_shutdown && SSL_free && SSL_CTX_ctrl && SSL_new && SSL_clear && SSL_set_fd && SSL_set_connect_state && SSL_connect && SSL_get_error && SSL_write @@ -262,7 +359,7 @@ void htspe_init() { #if HTS_DLOPEN { #ifdef _WIN32 - void* handle = LoadLibrary("htsswf"); + void* handle = LoadLibraryA((char*)"htsswf"); #else void* handle = dlopen("libhtsswf.so.1", RTLD_LAZY); #endif @@ -300,6 +397,7 @@ static void htspe_log(htsmoduleStruct* str, char* msg) { } } +HTSEXT_API const char* hts_is_available(void); HTSEXT_API const char* hts_is_available(void) { return WHAT_is_available; } diff --git a/src/htsmodules.h b/src/htsmodules.h index 7d1154b..5d2b989 100644 --- a/src/htsmodules.h +++ b/src/htsmodules.h @@ -98,14 +98,22 @@ struct htsmoduleStruct { }; +/* Used to wrap module initialization */ +/* return 1 if init was ok */ +typedef int (*t_htsWrapperInit)(char *fn, char *args); +typedef int (*t_htsWrapperExit)(void); +typedef int (*t_htsWrapperPlugInit)(char *args); + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE extern void htspe_init(void); extern int hts_parse_externals(htsmoduleStruct* str); -extern void* getFunctionPtr(char* file, char* fncname); extern int gz_is_available; extern int swf_is_available; extern int SSL_is_available; extern int V6_is_available; extern char WHAT_is_available[64]; +#endif #endif diff --git a/src/htsname.c b/src/htsname.c index 56fa6a6..8af2062 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -35,14 +35,15 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htsname.h" /* specific definitions */ #include "htsbase.h" #include "htstools.h" #include "htsmd5.h" -#include <stdio.h> -#include <stdlib.h> #include <ctype.h> /* END specific definitions */ @@ -51,7 +52,7 @@ Please visit our Website: http://www.httrack.com #define ADD_STANDARD_PATH \ { /* ajout nom */\ - char buff[HTS_URLMAXSIZE*2];\ + char BIGSTK buff[HTS_URLMAXSIZE*2];\ buff[0]='\0';\ strncatbuff(buff,start_pos,(int) (nom_pos - start_pos));\ url_savename_addstr(save,buff);\ @@ -59,7 +60,7 @@ Please visit our Website: http://www.httrack.com #define ADD_STANDARD_NAME(shortname) \ { /* ajout nom */\ - char buff[HTS_URLMAXSIZE*2];\ + char BIGSTK buff[HTS_URLMAXSIZE*2];\ standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\ url_savename_addstr(save,buff);\ } @@ -78,13 +79,38 @@ static const char *hts_tbdev[] = }; +#define URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET() do { \ + int prev = _hts_in_html_parsing; \ + while(back_pluggable_sockets_strict(back, back_max, opt) <= 0) { \ + _hts_in_html_parsing = 6; \ + /* Wait .. */ \ + back_wait(back,back_max,opt,cache,0); \ + /* Transfer rate */ \ + engine_stats(); \ + /* Refresh various stats */ \ + HTS_STAT.stat_nsocket=back_nsoc(back,back_max); \ + HTS_STAT.stat_errors=fspc(NULL,"error"); \ + HTS_STAT.stat_warnings=fspc(NULL,"warning"); \ + HTS_STAT.stat_infos=fspc(NULL,"info"); \ + HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); \ + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); \ + /* Check */ \ + if (!hts_htmlcheck_loop(back,back_max,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ + return -1; \ + } \ + } \ + _hts_in_html_parsing = prev; \ +} while(0) + // forme le nom du fichier à sauver (save) à partir de fil et adr // système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html) int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe) { - char newfil[HTS_URLMAXSIZE*2]; /* ="" */ - /*char normadr_[HTS_URLMAXSIZE*2];*/ - char normfil_[HTS_URLMAXSIZE*2]; + char BIGSTK newfil[HTS_URLMAXSIZE*2]; /* ="" */ + /*char BIGSTK normadr_[HTS_URLMAXSIZE*2];*/ + char BIGSTK normadr_[HTS_URLMAXSIZE*2], normfil_[HTS_URLMAXSIZE*2]; + int protocol = 0; + static const char* protocol_str[] = {"http", "https", "ftp", "file", "unknown"}; char* normadr; char* normfil; char* fil; @@ -100,11 +126,11 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a /* 8-3 ? */ switch(opt->savename_83) { - case 1: + case 1: // 8-3 max_char=8; break; - case 2: - max_char=30; + case 2: // Level 2 File names may be up to 31 characters. + max_char=31; break; default: max_char=8; @@ -130,13 +156,33 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // www-42.foo.com -> foo.com // foo.com/bar//foobar -> foo.com/bar/foobar if (opt->urlhack) { - // copy of adr (withiotu protocol), used for lookups (see urlhack) - normadr=jump_normalized(adr); + // copy of adr (without protocol), used for lookups (see urlhack) + normadr=adr_normalized(adr, normadr_); normfil=fil_normalized(fil,normfil_); + } else { + if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder + char* pos = strchr(adr_complete, ':'); + if (pos != NULL) { + normadr_[0] = '\0'; + strncatbuff(normadr_, adr_complete, (int)(pos - adr_complete)); + strcatbuff(normadr_, "://"); + strcatbuff(normadr_, normadr); + normadr=normadr_; + } + } } // à afficher sans ftp:// print_adr=jump_protocol(adr); + if (strfield(adr_complete, "https:")) { + protocol = 1; + } else if (strfield(adr_complete, "ftp:")) { + protocol = 2; + } else if (strfield(adr_complete, "file:")) { + protocol = 3; + } else { + protocol = 0; + } // court-circuit pour lien primaire if (strnotempty(adr)==0) { @@ -199,7 +245,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // chercher sans / ou avec / dans former { - char fil_complete_patche[HTS_URLMAXSIZE*2]; + char BIGSTK fil_complete_patche[HTS_URLMAXSIZE*2]; strcpybuff(fil_complete_patche,normfil); // Version avec ou sans / if (fil_complete_patche[strlen(fil_complete_patche)-1]=='/') @@ -254,30 +300,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } // décoder % strcpybuff(fil,unescape_http(fil)); - /* - { - char tempo[HTS_URLMAXSIZE*2]; - int i,j=0; - for (i=0;i<(int) strlen(fil);i++) { - if (fil[i]=='%') { - i++; - tempo[j++]=(char) ehex(fil+i); - i++; // sauter 2 caractères finalement - } else - tempo[j++]=fil[i]; - } - tempo[j++]='\0'; - strcpybuff(fil,tempo); - } - */ - - + /* replace shtml to html.. */ switch (ishtml(fil)) { /* .html,.shtml,.. */ case 1: if ( - (strcmp(get_ext(fil),"html") != 0) - && (strcmp(get_ext(fil),"htm") != 0) + (strfield2(get_ext(fil),"html") == 0) + && (strfield2(get_ext(fil),"htm") == 0) ) { strcpybuff(ext,"html"); ext_chg=1; @@ -286,7 +315,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a case 0: if (!strnotempty(ext)) { if (is_userknowntype(get_ext(fil))) { // mime known by user - char mime[1024]; + char BIGSTK mime[1024]; mime[0]=ext[0]='\0'; get_userhttptype(0,mime,get_ext(fil)); if (strnotempty(mime)) { @@ -330,13 +359,35 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } } // + } else if (is_userknowntype(fil)) { /* PATCH BY BRIAN SCHRÖDER. + Lookup mimetype not only by extension, + but also by filename */ + /* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type, + that is, ".html" */ + char BIGSTK mime[1024]; + mime[0]=ext[0]='\0'; + get_userhttptype(0, mime, fil); + if (strnotempty(mime)) { + give_mimext(ext, mime); + if (strnotempty(ext)) { + ext_chg=1; + } + } } else { // test imposible dans le cache, faire une requête // #if HTS_ANALYSTE int hihp=_hts_in_html_parsing; #endif int has_been_moved=0; - char curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2]; + char BIGSTK curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2]; + + /* Ensure we don't use too many sockets by using a "testing" one + If we have only 1 simultaneous connection authorized, wait for pending download + Wait for an available slot + */ + URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET(); + + /* Rock'in */ curr_adr[0]=curr_fil[0]='\0'; #if HTS_ANALYSTE _hts_in_html_parsing=2; // test @@ -383,7 +434,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { return -1; } else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) - back_delete(opt,back,b); // cancel test + back_delete(opt,cache,back,b); // cancel test stop_looping = 1; } } @@ -399,7 +450,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a ) { // agh moved.. un tit tour de plus if ((petits_tours<5) && (former_adr) && (former_fil)) { // on va pas tourner en rond non plus! if ((int) strnotempty(back[b].r.location)) { // location existe! - char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; + char BIGSTK mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; mov_url[0]=mov_adr[0]=mov_fil[0]='\0'; // strcpybuff(mov_url,back[b].r.location); // copier URL @@ -424,11 +475,12 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a robots_wizard* robots = (robots_wizard*) opt->robotsptr; if (hts_acceptlink(opt,ptr,lien_tot,liens, mov_adr,mov_fil, + NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */ has_been_moved = 1; - back_maydelete(opt,back,b); // ok + back_maydelete(opt,cache,back,b); // ok strcpybuff(curr_adr,mov_adr); strcpybuff(curr_fil,mov_fil); mov_url[0]='\0'; @@ -439,7 +491,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // ftp: stop! if (strfield(mov_url,"ftp://")) { // ftp, ok on arrête has_been_moved = 1; - back_maydelete(opt,back,b); // ok + back_maydelete(opt,cache,back,b); // ok strcpybuff(curr_adr,mov_adr); strcpybuff(curr_fil,mov_fil); stop_looping = 1; @@ -455,6 +507,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } } // Ajouter + URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET(); if (back_add(back,back_max,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1,NULL)!=-1) { // OK if ( (opt->debug>1) && (opt->errlog!=NULL) ) { fspc(opt->errlog,"warning"); fprintf(opt->errlog,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); @@ -462,7 +515,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } // libérer emplacement backing actuel et attendre le prochain - back_maydelete(opt,back,b); + back_maydelete(opt,cache,back,b); strcpybuff(curr_adr,mov_adr); strcpybuff(curr_fil,mov_fil); b=back_index(back,back_max,curr_adr,curr_fil,methode); @@ -507,7 +560,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); test_flush; } - back_delete(opt,back,b); + back_delete(opt,cache,back,b); return -1; // ERREUR (404 par exemple) */ } @@ -531,7 +584,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // FIN Si non déplacé, forcer type? // libérer emplacement backing - back_maydelete(opt,back,b); + back_maydelete(opt,cache,back,b); // --- --- --- // oops, a été déplacé.. on recalcule en récursif (osons!) @@ -787,7 +840,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (!short_ver) { // Noms longs strncatbuff(b,fil,(int) (nom_pos - fil) - 1); } else { - char pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2]; + char BIGSTK pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2]; pth[0]=n83[0]='\0'; // strncatbuff(pth,fil,(int) (nom_pos - fil) - 1); @@ -816,7 +869,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a *b='\0'; { char digest[32+2]; - char buff[HTS_URLMAXSIZE*2]; + char BIGSTK buff[HTS_URLMAXSIZE*2]; digest[0]=buff[0]='\0'; strcpybuff(buff,adr); strcatbuff(buff,fil_complete); @@ -831,6 +884,11 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a strncatbuff(b,url_md5(fil_complete),(tok == 'Q')?32:4); b+=strlen(b); // pointer à la fin break; + case 'r': case 'R': // protocol + *b='\0'; + strcatbuff(b, protocol_str[protocol]); + b+=strlen(b); // pointer à la fin + break; } } else *b++=*a++; @@ -1044,7 +1102,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a { char* a=jump_identification(save); if (a!=save) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; char *b; tempo[0]='\0'; strcpybuff(tempo,"["); @@ -1061,7 +1119,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // éviter les / au début (cause: N100) if (save[0]=='/') { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; strcpybuff(tempo,save+1); strcpybuff(save,tempo); } @@ -1110,7 +1168,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a case '/': case '.': { - char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; strncatbuff(tempo,save,(int) (a - save) + strlen(hts_tbdev[i])); strcatbuff(tempo,"_"); strcatbuff(tempo,a+strlen(hts_tbdev[i])); @@ -1123,15 +1181,57 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a i++; } } + /* Strip ending . or ' ' forbidden on windoz */ + { + int len; + char* a=save; + while((a=strstr(a,"./"))) { + *a = '_'; + } + a=save; + while((a=strstr(a," /"))) { + *a = '_'; + } + len = (int) strlen(save); + if (len > 0 && ( save[len - 1] == '.' || save[len - 1] == ' ') ) { + save[len - 1] = '_'; + } + } #endif // conversion 8-3 .. y compris pour les répertoires if (opt->savename_83) { - char n83[HTS_URLMAXSIZE*2]; + char BIGSTK n83[HTS_URLMAXSIZE*2]; long_to_83(opt->savename_83,n83,save); strcpybuff(save,n83); } + // enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson) + // Level 1 File names are restricted to 8 characters with a 3 character extension, + // upper case letters, numbers and underscore; maximum depth of directories is 8. + // This will be our "DOS mode" + // L2: 31 characters + // A-Z,0-9,_ + if (opt->savename_83 > 0) { + char *a, *last; + for(last = save + strlen(save) - 1 ; last != save && *last != '/' && *last != '\\' && *last != '.' ; last--); + if (*last != '.') { + last = NULL; + } + for(a = save ; *a != '\0' ; a++) { + if (*a >= 'a' && *a <= 'z') { + *a -= 'a' - 'A'; + } + else if (*a == '.') { + if (a != last) { + *a = '_'; + } + } + else if ( ! ( (*a >= 'A' && *a <= 'Z') || (*a >= '0' && *a <= '9') || *a == '_' || *a == '/' || *a == '\\') ) { + *a = '_'; + } + } + } /* ensure that there is no ../ (potential vulnerability) */ fil_simplifie(save); @@ -1148,7 +1248,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // chemin primaire éventuel A METTRE AVANT if (strnotempty(opt->path_html)) { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; strcpybuff(tempo,opt->path_html); strcatbuff(tempo,save); strcpybuff(save,tempo); @@ -1189,17 +1289,22 @@ printf("%cParse: %d",13,i); #if HTS_CASSE if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0)) #else - if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete))) + if ((strfield2(liens[i]->adr, normadr)) && (strfield2(liens[i]->fil, normfil))) + //if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete))) #endif { // ok c'est le même lien, adresse déja définie - //printf("Ok, %s\n",save); - //i=lien_tot; // sortir + /* Take the existing name not to screw up with cAsE sEnSiTiViTy of Linux/Unix */ + if (strcmp(liens[i]->sav, save) != 0) { + strcpybuff(save, liens[i]->sav); + } i=0; #if DEBUG_SAVENAME printf("\nOK ALREADY DEFINED\n",13,i); #endif +#if HTS_CASSE +#endif } else { // utilisé par un AUTRE, changer de nom - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; char* a=save+strlen(save)-1; char* b; int n=2; @@ -1310,7 +1415,7 @@ char* url_md5(char* fil_complete) { a=strchr(fil_complete,'?'); if (a) { if (strlen(a)) { - char buff[HTS_URLMAXSIZE*2]; + char BIGSTK buff[HTS_URLMAXSIZE*2]; a++; digest[0]=buff[0]='\0'; strcatbuff(buff,a); /* query string MD5 */ diff --git a/src/htsname.h b/src/htsname.h index aae5f99..61ed1de 100644 --- a/src/htsname.h +++ b/src/htsname.h @@ -42,9 +42,12 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe); void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver); void url_savename_addstr(char* d,char* s); char* url_md5(char* fil_complete); +#endif #endif diff --git a/src/htsnet.h b/src/htsnet.h index dbdbcc6..7b7cc1a 100644 --- a/src/htsnet.h +++ b/src/htsnet.h @@ -45,7 +45,9 @@ Please visit our Website: http://www.httrack.com #include <ctype.h> #if HTS_WIN // pour read +#ifndef _WIN32_WCE #include <io.h> +#endif // pour FindFirstFile #include <winbase.h> #else @@ -71,12 +73,6 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_DO_NOT_REDEFINE_in_addr_t typedef unsigned long in_addr_t; #endif -#undef min -#undef max -#undef Sleep -#define min(a,b) ((a)>(b)?(b):(a)) -#define max(a,b) ((a)>(b)?(a):(b)) -#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); } #endif /* @@ -87,7 +83,7 @@ Please visit our Website: http://www.httrack.com /* Ipv4 structures */ typedef struct in_addr INaddr; /* This should handle all cases */ -typedef struct { +typedef struct SOCaddr { union { struct sockaddr_in in; struct sockaddr sa; @@ -155,7 +151,7 @@ strcpy(namebuf, dot); \ /* Ipv4 structures */ typedef struct in6_addr INaddr; /* This should handle all cases */ -typedef struct { +typedef struct SOCaddr { union { struct sockaddr_in6 in6; struct sockaddr_in in; @@ -236,7 +232,7 @@ getnameinfo((struct sockaddr *)&(ss), sslen, \ #endif /* Buffer structure to copy various hostent structures */ -typedef struct { +typedef struct t_fullhostent { t_hostent hp; char* list[2]; char addr[HTS_MAXADDRLEN]; /* various struct sockaddr structures */ diff --git a/src/htsnostatic.c b/src/htsnostatic.c index eff6184..22e7d7a 100644 --- a/src/htsnostatic.c +++ b/src/htsnostatic.c @@ -35,13 +35,16 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htsnostatic.h" #include "htsbase.h" #include "htshash.h" #include "htsinthash.h" -typedef struct { +typedef struct hts_varhash { /* inthash values; */ diff --git a/src/htsnostatic.h b/src/htsnostatic.h index f24f0ad..3bf4ec9 100644 --- a/src/htsnostatic.h +++ b/src/htsnostatic.h @@ -53,21 +53,12 @@ Please visit our Website: http://www.httrack.com #ifndef HTSNOSTATIC_DEFH #define HTSNOSTATIC_DEFH +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + #include "htscore.h" #include "htsthread.h" -/* -#if USE_PTHREAD -#if HTS_WIN -#undef HTS_REENTRANT -#else -#define HTS_REENTRANT -#endif -#else -#undef HTS_REENTRANT -#endif -*/ - #define HTS_VAR_MAIN_HASH 127 /* @@ -157,7 +148,61 @@ void hts_destroyvar_key(void* adr); &cKey */ -#if HTS_WIN +#ifdef _WIN32 + +#ifdef _WIN32_WCE + +/* Windows CE: static only */ +#define NOSTATIC_XRESERVE(name, type, nelt) do { \ + /*__declspec( thread )*/ static type thValue[nelt]; \ + /* __declspec( thread ) */ int static initValue = 0; \ + name = thValue; \ + if (!initValue) { \ + initValue = 1; \ + memset(&thValue, 0, sizeof(thValue)); \ + } \ +} while(0) + +#elif 1 + +/* New Windows version: TLS */ +/* Suggested by daan at zwif.com to be more gentle with LoadLibrary (04/2004) +See http://msdn.microsoft.com/library/en-us/vccore/html/_core_rules_and_limitations_for_tls.asp +And especially the "DLL declares any nonlocal data or object as __declspec( thread )" section +*/ +#define NOSTATIC_XRESERVE(name,type,nelt) do { \ + static DWORD tlsIndex = 0; \ + static int initValue = 0; \ + if (initValue == 0) \ + { \ + if (!hts_maylockvar()) { \ + abortLog("unable to lock mutex (not initialized?!)"); \ + abort(); \ + } \ + hts_lockvar(); \ + if (initValue == 0) { \ + tlsIndex = TlsAlloc(); \ + if (tlsIndex == 0xFFFFFFFF) { \ + abortLog("unable to allocate thread local storage (TLS) for variable!"); \ + abort(); \ + } \ + initValue = 1; \ + } \ + hts_unlockvar(); \ + } \ + name = (type*)TlsGetValue(tlsIndex); \ + if (name == NULL) { \ + name = (type*)malloc(sizeof(type)*nelt); \ + if (name == NULL) { \ + abortLog("unable to allocate memory for variable!"); \ + abort(); \ + } \ + memset(name, 0, sizeof(type)*nelt); \ + TlsSetValue(tlsIndex, name); \ + } \ +} while(0) + +#else /* Windows: handled by the compiler */ #define NOSTATIC_XRESERVE(name, type, nelt) do { \ @@ -170,6 +215,8 @@ void hts_destroyvar_key(void* adr); } \ } while(0) +#endif + #else /* Un*x : slightly more complex, we have to create a thread-key */ @@ -227,3 +274,5 @@ else { \ #endif #endif + +#endif diff --git a/src/htsopt.h b/src/htsopt.h index 77910b6..3328ce0 100644 --- a/src/htsopt.h +++ b/src/htsopt.h @@ -44,7 +44,7 @@ Please visit our Website: http://www.httrack.com #include "htsbauth.h" // structure proxy -typedef struct { +typedef struct t_proxy { int active; char name[1024]; int port; @@ -52,14 +52,24 @@ typedef struct { } t_proxy; /* Structure utile pour copier en bloc les paramètres */ -typedef struct { +typedef struct htsfilters { char*** filters; int* filptr; //int* filter_max; } htsfilters; +/* User callbacks chain */ +typedef int (*htscallbacksfncptr)(void); +typedef struct htscallbacks htscallbacks; +struct htscallbacks { + char callbackName[128]; + void* moduleHandle; + htscallbacksfncptr exitFnc; + htscallbacks * next; +}; + /* Structure état du miroir */ -typedef struct { +typedef struct htsoptstate { int stop; int exit_xh; int back_add_stats; @@ -67,11 +77,13 @@ typedef struct { int mimehtml_created; char mimemid[256]; FILE* mimefp; + /* */ + htscallbacks callbacks; } htsoptstate; // paramètres httrack (options) -typedef struct { +typedef struct httrackp { int wizard; // wizard aucun/grand/petit int flush; // fflush sur les fichiers log int travel; // type de déplacements (same domain etc) @@ -96,7 +108,7 @@ typedef struct { int rateout; // nombre d'octets minium pour le transfert int maxtime; // temps max en secondes int maxrate; // taux de transfert max - int maxconn; // nombre max de connexions/s + float maxconn; // nombre max de connexions/s int waittime; // démarrage programmé int cache; // génération d'un cache //int aff_progress; // barre de progression @@ -108,6 +120,8 @@ typedef struct { int mimehtml; // MIME-html int user_agent_send; // user agent (ex: httrack/1.0 [sun]) char user_agent[128]; + char referer[256]; // referer + char from[256]; // from char path_log[1024]; // chemin pour cache et log char path_html[1024]; // chemin pour miroir char path_bin[1024]; // chemin pour templates @@ -135,6 +149,7 @@ typedef struct { int urlhack; // force "url normalization" to avoid loops int tolerant; // accepter content-length incorrect int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple) + int parsedebug; // débugger parser (debug!) int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur int verbosedisplay; // animation textuelle char footer[256]; // ligne d'infos @@ -156,6 +171,7 @@ typedef struct { // int quiet; // poser des questions autres que wizard? int keyboard; // vérifier stdin + int bypass_limits; // bypass built-in limits // int is_update; // c'est une update (afficher "File updated...") int dir_topindex; // reconstruire top index par la suite @@ -164,7 +180,7 @@ typedef struct { } httrackp; // stats for httrack -typedef struct { +typedef struct hts_stat_struct { LLint HTS_TOTAL_RECV; // flux entrant reçu LLint stat_bytes; // octets écrits sur disque // int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup @@ -193,6 +209,9 @@ typedef struct { LLint nb; // données transférées actuellement (estimation) // LLint rate; + // + TStamp last_connect; // last connect() call + TStamp last_request; // last request issued } hts_stat_struct; diff --git a/src/htsparse.c b/src/htsparse.c index 3d35252..79cc1cc 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -37,12 +37,12 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> +#ifndef _WIN32_WCE #include <fcntl.h> +#endif #include <ctype.h> /* File defs */ @@ -92,7 +92,7 @@ Please visit our Website: http://www.httrack.com abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \ exit(1); \ } \ - } \ +} \ ht_len+=A; #define HT_ADD_ADR \ if ((opt->getmode & 1) && (ptr>0)) { \ @@ -103,11 +103,35 @@ Please visit our Website: http://www.httrack.com } #define HT_ADD(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i=strlen(A),j=ht_len; \ - if (i) { \ - HT_ADD_CHK(i) \ - memcpy(ht_buff+j, A, i); \ - ht_buff[j+i]='\0'; \ + int i_=strlen(A),j_=ht_len; \ + if (i_) { \ + HT_ADD_CHK(i_) \ + memcpy(ht_buff+j_, A, i_); \ + ht_buff[j_+i_]='\0'; \ + } } +#define HT_ADD_HTMLESCAPED(A) \ + if ((opt->getmode & 1) && (ptr>0)) { \ + int i_, j_; \ + char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ + escape_for_html_print(A, tempo_); \ + i_=strlen(tempo_); \ + j_=ht_len; \ + if (i_) { \ + HT_ADD_CHK(i_) \ + memcpy(ht_buff+j_, tempo_, i_); \ + ht_buff[j_+i_]='\0'; \ + } } +#define HT_ADD_HTMLESCAPED_FULL(A) \ + if ((opt->getmode & 1) && (ptr>0)) { \ + int i_, j_; \ + char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ + escape_for_html_print_full(A, tempo_); \ + i_=strlen(tempo_); \ + j_=ht_len; \ + if (i_) { \ + HT_ADD_CHK(i_) \ + memcpy(ht_buff+j_, tempo_, i_); \ + ht_buff[j_+i_]='\0'; \ } } #define HT_ADD_START \ int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \ @@ -126,12 +150,11 @@ Please visit our Website: http://www.httrack.com #define HT_ADD_END { \ int ok=0;\ if (ht_buff) { \ - INTsys file_len=(INTsys) strlen(ht_buff);\ char digest[32+2];\ digest[0]='\0';\ - domd5mem(ht_buff,file_len,digest,1);\ - if (fsize(fconv(savename))==file_len) { \ - int mlen;\ + domd5mem(ht_buff,ht_len,digest,1);\ + if (fsize(fconv(savename))==ht_len) { \ + int mlen = 0;\ char* mbuff;\ cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\ if (mlen) mbuff[mlen]='\0';\ @@ -148,8 +171,8 @@ Please visit our Website: http://www.httrack.com if (!ok) { \ fp=filecreate(savename); \ if (fp) { \ - if (file_len>0) {\ - if ((INTsys)fwrite(ht_buff,1,file_len,fp) != file_len) { \ + if (ht_len>0) {\ + if ((INTsys)fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ opt->state.exit_xh=-1;\ @@ -186,32 +209,32 @@ Please visit our Website: http://www.httrack.com filenote(savename,NULL); \ }\ if (cache->ndx)\ - cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ + cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ } \ freet(ht_buff); ht_buff=NULL; \ - } +} #define HT_ADD_FOP // COPY IN HTSCORE.C #define HT_INDEX_END do { \ -if (!makeindex_done) { \ -if (makeindex_fp) { \ - char tempo[1024]; \ + if (!makeindex_done) { \ + if (makeindex_fp) { \ + char BIGSTK tempo[1024]; \ if (makeindex_links == 1) { \ - sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \ + sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \ } else \ - tempo[0]='\0'; \ + tempo[0]='\0'; \ fprintf(makeindex_fp,template_footer, \ - "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \ - tempo \ - ); \ + "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \ + tempo \ + ); \ fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \ -} \ -} \ -makeindex_done=1; /* ok c'est fait */ \ + } \ + } \ + makeindex_done=1; /* ok c'est fait */ \ } while(0) // Enregistrement d'un lien: @@ -228,50 +251,50 @@ makeindex_done=1; /* ok c'est fait */ \ // COPIE DE HTSCORE.C #define liens_record(A,F,S,FA,FF) { \ -int notecode=0; \ -int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ + int notecode=0; \ + int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ fil_len=strlen(F),\ sav_len=strlen(S),\ cod_len=0,\ former_adr_len=strlen(FA),\ former_fil_len=strlen(FF); \ -if (former_adr_len>0) {\ + if (former_adr_len>0) {\ former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ -} else former_adr_len=former_fil_len=0;\ -if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \ -cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \ -adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ -if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \ -lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \ -lien_size=add_tab_alloc; \ -if (lien_buffer!=NULL) { \ -liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \ -liens[lien_tot]->firstblock=1; \ -} \ -} else { \ -liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \ -liens[lien_tot]->firstblock=0; \ -} \ -if (liens[lien_tot]!=NULL) { \ -liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \ -liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \ -liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \ -liens[lien_tot]->cod=NULL; \ -if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \ -if (former_adr_len>0) {\ -liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \ -liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \ -strcpybuff(liens[lien_tot]->former_adr,FA); \ -strcpybuff(liens[lien_tot]->former_fil,FF); \ -}\ -strcpybuff(liens[lien_tot]->adr,A); \ -strcpybuff(liens[lien_tot]->fil,F); \ -strcpybuff(liens[lien_tot]->sav,S); \ -liens_record_sav_len(liens[lien_tot]); \ -hash_write(hashptr,lien_tot,opt->urlhack); \ -} \ + } else former_adr_len=former_fil_len=0;\ + if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \ + cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \ + adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ + if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \ + lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \ + lien_size=add_tab_alloc; \ + if (lien_buffer!=NULL) { \ + liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \ + liens[lien_tot]->firstblock=1; \ + } \ + } else { \ + liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \ + liens[lien_tot]->firstblock=0; \ + } \ + if (liens[lien_tot]!=NULL) { \ + liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \ + liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \ + liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \ + liens[lien_tot]->cod=NULL; \ + if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \ + if (former_adr_len>0) {\ + liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \ + liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \ + strcpybuff(liens[lien_tot]->former_adr,FA); \ + strcpybuff(liens[lien_tot]->former_fil,FF); \ + }\ + strcpybuff(liens[lien_tot]->adr,A); \ + strcpybuff(liens[lien_tot]->fil,F); \ + strcpybuff(liens[lien_tot]->sav,S); \ + liens_record_sav_len(liens[lien_tot]); \ + hash_write(hashptr,lien_tot,opt->urlhack); \ + } \ } #define ENGINE_LOAD_CONTEXT() \ @@ -314,32 +337,67 @@ hash_write(hashptr,lien_tot,opt->urlhack); \ #define ENGINE_SAVE_CONTEXT() \ /* Apply changes */ \ - * ( (int*) (str->lien_tot_) ) = lien_tot; \ - * ( (int*) (str->ptr_) ) = ptr; \ - * ( (int*) (str->lien_size_) ) = lien_size; \ - * ( (char**) (str->lien_buffer_) ) = lien_buffer; \ - /* */ \ - * stre->error_ = error; \ - * stre->store_errpage_ = store_errpage; \ - * stre->lien_max_ = lien_max; \ - /* */ \ - *stre->makeindex_done_ = makeindex_done; \ - *stre->makeindex_fp_ = makeindex_fp; \ - *stre->makeindex_links_ = makeindex_links; \ - /* */ \ - *stre->stat_fragment_ = stat_fragment + * ( (int*) (str->lien_tot_) ) = lien_tot; \ + * ( (int*) (str->ptr_) ) = ptr; \ + * ( (int*) (str->lien_size_) ) = lien_size; \ + * ( (char**) (str->lien_buffer_) ) = lien_buffer; \ + /* */ \ + * stre->error_ = error; \ + * stre->store_errpage_ = store_errpage; \ + * stre->lien_max_ = lien_max; \ + /* */ \ + *stre->makeindex_done_ = makeindex_done; \ + *stre->makeindex_fp_ = makeindex_fp; \ + *stre->makeindex_links_ = makeindex_links; \ + /* */ \ + *stre->stat_fragment_ = stat_fragment #define _FILTERS (*opt->filters.filters) #define _FILTERS_PTR (opt->filters.filptr) #define _ROBOTS ((robots_wizard*)opt->robotsptr) +/* Apply current *adr character for the script automate */ +#define AUTOMATE_LOOKUP_CURRENT_ADR() do { \ + if (inscript) { \ + int new_state_pos; \ + new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \ + if (new_state_pos < 0) { \ + new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \ + } \ + assertf(new_state_pos >= 0); \ + assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state)); \ + inscript_state_pos=new_state_pos; \ + } \ +} while(0) + +/* Increment current pointer to 'steps' characters, modifying automate if necessary */ +#define INCREMENT_CURRENT_ADR(steps) do { \ + int steps__ = (steps); \ + while(steps__ > 0) { \ + adr++; \ + AUTOMATE_LOOKUP_CURRENT_ADR(); \ + steps__ --; \ + } \ +} while(0) + /* Main parser */ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Load engine variables */ ENGINE_LOAD_CONTEXT(); - + #if HTS_ANALYSTE + { + char* cAddr = r->adr; + int cSize = (int) r->size; + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); + } + if (hts_htmlcheck_preprocess(&cAddr, &cSize, urladr, urlfil) == 1) { + r->adr = cAddr; + r->size = cSize; + } + } if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) { #endif FILE* fp=NULL; // fichier écrit localement @@ -348,8 +406,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if ( (opt->debug>1) && (opt->log!=NULL) ) { fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush; } - - + + // Indexing! #if HTS_MAKE_KEYWORD_INDEX if (opt->kindex) { @@ -364,13 +422,13 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } #endif - + // Now, parsing if ((opt->getmode & 1) && (ptr>0)) { // récupérer les html sur disque // créer le fichier html local HT_ADD_FOP; // écrire peu à peu le fichier } - + if (!error) { int detect_title=0; // détection du title int back_add_stats = opt->state.back_add_stats; @@ -410,10 +468,11 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { char* intag_start=adr; char* intag_startattr=NULL; int intag_start_valid=0; + int intag_ctype=0; // int parent_relative=0; // the parent is the base path (.js, .css..) HT_ADD_START; // débuter - + /* Initialize script automate for comments, quotes.. */ memset(inscript_state, 0xff, sizeof(inscript_state)); inscript_state[INSCRIPT_START][INSCRIPT_DEFAULT]=INSCRIPT_START; /* by default, stay in START */ @@ -444,12 +503,12 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* statistics */ if ((opt->getmode & 1) && (ptr>0)) { - /* - HTS_STAT.stat_files++; - HTS_STAT.stat_bytes+=r->size; + /* + HTS_STAT.stat_files++; + HTS_STAT.stat_bytes+=r->size; */ } - + /* Primary list or URLs */ if (ptr == 0) { intag=1; @@ -457,28 +516,46 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } /* Check is the file is a .js file */ else if ( - (strfield2(r->contenttype,"application/x-javascript")!=0) - || (strfield2(r->contenttype,"text/css")!=0) + (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0) + || (compare_mime(r->contenttype, str->url_file, "text/css")!=0) ) { /* JavaScript js file */ - inscript=1; - inscript_name="script"; - intag=1; // because après <script> on y est .. - pas utile - intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush; - } - // all links must be checked against parent, not this link - if (liens[ptr]->precedent != 0) { - parent_relative=1; + inscript=1; + if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } + inscript_name="script"; + intag=1; // because après <script> on y est .. - pas utile + intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush; + } + // for javascript only + if (compare_mime(r->contenttype, str->url_file, "application/x-javascript") != 0) { + // all links must be checked against parent, not this link + if (liens[ptr]->precedent != 0) { + parent_relative=1; + } + } } - } - /* Or a real audio */ - else if (strfield2(r->contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */ - inscript=intag=1; + /* Or a real audio */ + else if (compare_mime(r->contenttype, str->url_file, "audio/x-pn-realaudio")!=0) { /* realaudio link file */ + inscript=intag=0; inscript_name="media"; intag_start_valid=0; - in_media="RAM"; // real media! - } + in_media="LNK"; // real media! -> links + } + /* Or a m3u playlist */ + else if (compare_mime(r->contenttype, str->url_file, "audio/x-mpegurl")!=0) { /* mp3 link file */ + inscript=intag=0; + inscript_name="media"; + intag_start_valid=0; + in_media="LNK"; // m3u! -> links + } + else if (compare_mime(r->contenttype, str->url_file, "application/x-authorware-map")!=0) { /* macromedia aam file */ + inscript=intag=0; + inscript_name="media"; + intag_start_valid=0; + in_media="AAM"; // aam + } + // Detect UTF8 format if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) { no_esc_utf=1; @@ -487,8 +564,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } // Hack to prevent any problems with ram files of other files * ( r->adr + r->size ) = '\0'; - - + + // ------------------------------------------------------------ // analyser ce qu'il y a en mémoire (fichier html) // on scanne les balises @@ -505,9 +582,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { int valid_p=0; // force to take p even if == 0 int ending_p='\0'; // ending quote? int archivetag_p=0; // avoid multiple-archives with commas + int unquoted_script=0; INSCRIPT inscript_state_pos_prev=inscript_state_pos; error=0; - + /* Hack to avoid NULL char problems with C syntax */ /* Yes, some bogus HTML pages can embed null chars and therefore can not be properly handled if this hack is not done @@ -516,9 +594,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if ( ((int) (adr - r->adr)) < r->size) *adr=' '; } - - - + + + /* index.html built here */ @@ -546,24 +624,24 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } else p=0; - + if (p) { // ok center if (makeindex_fp==NULL) { verif_backblue(opt,opt->path_html); // générer gif makeindex_fp=filecreate(fconcat(opt->path_html,"index.html")); if (makeindex_fp!=NULL) { - + // Header fprintf(makeindex_fp,template_header, "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->" ); - + } else makeindex_done=-1; // fait, erreur } - + if (makeindex_fp!=NULL) { - char tempo[HTS_URLMAXSIZE*2]; - char s[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + char BIGSTK s[HTS_URLMAXSIZE*2]; char* a=NULL; char* b=NULL; s[0]='\0'; @@ -594,19 +672,19 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { strncpy(s,a,b-a+1); *(s+(b-a)+1)='\0'; } - + // Body fprintf(makeindex_fp,template_body, tempo, s ); - + } } } } } - + } else if (liens[ptr]->depth<opt->depth) { // on a sauté level1+1 et level1 HT_INDEX_END; } @@ -616,137 +694,141 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* end -- index.html built here */ - - - + + + /* Parse */ if ( (*adr=='<') /* No starting tag */ && (!inscript) /* Not in (java)script */ && (!incomment) /* Not in comment (<!--) */ + && (!in_media) /* Not in media */ ) { - intag=1; - //parseall_incomment=0; - //inquote=0; // effacer quote - intag_start=adr; intag_start_valid=1; - codebase[0]='\0'; // effacer éventuel codebase - - if (opt->getmode & 1) { // sauver html - p=strfield(adr,"</html"); - if (p==0) p=strfield(adr,"<head>"); - // if (p==0) p=strfield(adr,"<doctype"); - if (p) { - char* eol="\n"; - if (strchr(r->adr,'\r')) - eol="\r\n"; - if (strnotempty(opt->footer)) { - char tempo[1024+HTS_URLMAXSIZE*2]; - char gmttime[256]; - tempo[0]='\0'; - time_gmt_rfc822(gmttime); - strcatbuff(tempo,eol); - sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","",""); - strcatbuff(tempo,eol); - //fwrite(tempo,1,strlen(tempo),fp); - HT_ADD(tempo); - } - if (r->charset[0]) { - HT_ADD("<meta http-equiv=\"content-type\" content=\"text/html;charset="); - HT_ADD(r->charset); - HT_ADD("\">"); - HT_ADD(eol); - } - } - } - - // éliminer les <!-- (commentaires) : intag dévalidé - if (*(adr+1)=='!') - if (*(adr+2)=='-') - if (*(adr+3)=='-') { - intag=0; - incomment=1; - intag_start_valid=0; + intag=1; + intag_ctype=0; + //parseall_incomment=0; + //inquote=0; // effacer quote + intag_start=adr; intag_start_valid=1; + codebase[0]='\0'; // effacer éventuel codebase + + if (opt->getmode & 1) { // sauver html + p=strfield(adr,"</html"); + if (p==0) p=strfield(adr,"<head>"); + // if (p==0) p=strfield(adr,"<doctype"); + if (p) { + char* eol="\n"; + if (strchr(r->adr,'\r')) + eol="\r\n"; + if (strnotempty(opt->footer)) { + char BIGSTK tempo[1024+HTS_URLMAXSIZE*2]; + char gmttime[256]; + tempo[0]='\0'; + time_gmt_rfc822(gmttime); + strcatbuff(tempo,eol); + sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","",""); + strcatbuff(tempo,eol); + //fwrite(tempo,1,strlen(tempo),fp); + HT_ADD(tempo); + if (r->charset[0]) { + HT_ADD("<!-- Added by HTTrack --><meta http-equiv=\"content-type\" content=\"text/html;charset="); + HT_ADD(r->charset); + HT_ADD("\"><!-- /Added by HTTrack -->"); + HT_ADD(eol); + } + } } - - } + } + + // éliminer les <!-- (commentaires) : intag dévalidé + if (*(adr+1)=='!') + if (*(adr+2)=='-') + if (*(adr+3)=='-') { + intag=0; + incomment=1; + intag_start_valid=0; + } + + } else if ( (*adr=='>') /* ending tag */ - && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */ + && ( (!inscript && !in_media) || (inscript_tag) ) /* and in tag (or in script) */ ) { - if (inscript_tag) { - inscript_tag=inscript=0; - intag=0; - incomment=0; - intag_start_valid=0; - } else if (!incomment) { - intag=0; //inquote=0; - - // entrée dans du javascript? - // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans - //if (!inscript) { // sinon on est dans un obj.write(".. - if ((intag_start_valid) && - ( - check_tag(intag_start,"script") - || - check_tag(intag_start,"style") - ) - ) { - char* a=intag_start; // < - // ** while(is_realspace(*(--a))); - if (*a=='<') { // sûr que c'est un tag? - if (check_tag(intag_start,"script")) - inscript_name="script"; - else - inscript_name="style"; - inscript=1; - inscript_state_pos=INSCRIPT_START; - intag=1; // because après <script> on y est .. - pas utile - intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag - } - } - } else { /* end of comment? */ - // vérifier fermeture correcte - if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) { + if (inscript_tag) { + inscript_tag=inscript=0; intag=0; incomment=0; intag_start_valid=0; - } -#if GT_ENDS_COMMENT - /* wrong comment ending */ - else { - /* check if correct ending does not exists - <!-- foo > example <!-- bar > is sometimes accepted by browsers - when no --> is used somewhere else.. darn those browsers are dirty - */ - if (!strstr(adr,"-->")) { + if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); } + } else if (!incomment) { + intag=0; //inquote=0; + + // entrée dans du javascript? + // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans + //if (!inscript) { // sinon on est dans un obj.write(".. + if ((intag_start_valid) && + ( + check_tag(intag_start,"script") + || + check_tag(intag_start,"style") + ) + ) { + char* a=intag_start; // < + // ** while(is_realspace(*(--a))); + if (*a=='<') { // sûr que c'est un tag? + if (check_tag(intag_start,"script")) + inscript_name="script"; + else + inscript_name="style"; + inscript=1; + inscript_state_pos=INSCRIPT_START; + intag=1; // because après <script> on y est .. - pas utile + intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag + if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } + } + } + } else { /* end of comment? */ + // vérifier fermeture correcte + if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) { intag=0; incomment=0; intag_start_valid=0; } - } +#if GT_ENDS_COMMENT + /* wrong comment ending */ + else { + /* check if correct ending does not exists + <!-- foo > example <!-- bar > is sometimes accepted by browsers + when no --> is used somewhere else.. darn those browsers are dirty + */ + if (!strstr(adr,"-->")) { + intag=0; + incomment=0; + intag_start_valid=0; + } + } #endif + } + //} } + //else if (*adr==34) { + // inquote=(inquote?0:1); //} - } - //else if (*adr==34) { - // inquote=(inquote?0:1); - //} - else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag + else if (intag || inscript || in_media) { // nous sommes dans un tag/commentaire, tester si on recoit un tag int p_type=0; int p_nocatch=0; int p_searchMETAURL=0; // chercher ..URL=<url> int add_class=0; // ajouter .class int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>" char* p_flush=NULL; - - + + // ------------------------------------------------------------ // parsing évolé // ------------------------------------------------------------ - if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester.. - - - /* caractère de terminaison pour "miniparsing" javascript=.. ? + if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester.. + + + /* caractère de terminaison pour "miniparsing" javascript=.. ? (ex: <a href="javascript:()" action="foo"> ) */ if (inscript_tag) { if (inscript_tag_lastc) { @@ -754,39 +836,58 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* sortir */ inscript_tag=inscript=0; incomment=0; + if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); } } } } /* automate */ - if (inscript) { - int new_state_pos; - new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; - if (new_state_pos < 0) { - new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; - } - assertf(new_state_pos >= 0); - assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state)); - inscript_state_pos=new_state_pos; - } - - + AUTOMATE_LOOKUP_CURRENT_ADR(); + + // Note: // Certaines pages ne respectent pas le html // notamment les guillements ne sont pas fixés // Nous sommes dans un tag, donc on peut faire un test plus // large pour pouvoi prendre en compte ces particularités - + // à vérifier: ACTION, CODEBASE, VRML - + if (in_media) { - if (strcmp(in_media,"RAM")==0) { // real media + if (strcmp(in_media,"LNK")==0) { // real media p=0; valid_p=1; } + else if (strcmp(in_media,"AAM")==0) { // AAM + if (is_space((unsigned char)adr[0]) && ! is_space((unsigned char)adr[1])) { + char* a = adr + 1; + int n = 0; + int ok = 0; + int dot = 0; + while(n < HTS_URLMAXSIZE/2 && a[n] != '\0' && + ( ! is_space((unsigned char)a[n]) || ! ( ok = 1) ) + ) { + if (a[n] == '.') { + dot = n; + } + n++; + } + if (ok && dot > 0) { + char BIGSTK tmp[HTS_URLMAXSIZE/2 + 2]; + tmp[0] = '\0'; + strncat(tmp, a + dot + 1, n - dot - 1); + if (is_knowntype(tmp) || ishtml_ext(tmp) != -1) { + adr++; + p = 0; + valid_p = 1; + unquoted_script = 1; + } + } + } + } } else if (ptr>0) { /* pas première page 0 (primary) */ p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p - + // ------------------------------ // détection d'écriture JavaScript. // osons les obj.write et les obj.href=.. ! osons! @@ -823,31 +924,31 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder a++; // sauter = } - - }*/ - + + }*/ + // on a un truc du genre instruction"code généré" dont on parse le code if (check_this_fking_line) { while(is_realspace(*a)) a++; if ((*a=='\'') || (*a=='"')) { // départ de '' ou "" char *b; - int ex=0; scriptgen_q=*a; // quote b=a+1; // départ de la chaîne // vérifier forme ("code") et pas ("code"+var), ingérable do { - a++; // caractère suivant if (*a==scriptgen_q && *(a-1)!='\\') // quote non slash - ex=1; // sortie - if (*a==10 && *(a-1) != '\\' /* LF and no continue (\) character */ - && ( *(a-1) != '\r' || *(a-2) != '\\' ) ) /* and not CRLF and no .. */ - ex=1; - } while(!ex); + break; // sortie + else if (*a==10 && *(a-1) != '\\' /* LF and no continue (\) character */ + && ( *(a-1) != '\r' || *(a-2) != '\\' ) ) /* and not CRLF and no .. */ + break; + else + a++; // caractère suivant + } while((a-b) < HTS_URLMAXSIZE / 2); if (*a==scriptgen_q) { // fin du quote a++; while(is_realspace(*a)) a++; if (*a==must_be_terminated) { // parenthèse fermante: ("..") - + // bon, on doit parser une ligne javascript // 1) si check.. ==1 alors c'est un nom de fichier direct, donc // on fixe p sur le saut nécessaire pour atteindre le nom du fichier @@ -864,7 +965,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { inscriptgen=1; // SCRIPTGEN actif adr=b; // jump } - + if ((opt->debug>1) && (opt->log!=NULL)) { char str[512]; str[0]='\0'; @@ -872,19 +973,19 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush; } } - + } - + } - - + + } } } // fin detection code générant javascript vers html // ------------------------------ - - + + // analyse proprement dite, A HREF=.. etc.. if (!p) { // si dans un tag, et pas dans un script - sauf si on analyse un obj.write(".. @@ -898,7 +999,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { p_type=2; // c'est un chemin } } - + /* Tags supplémentaires à vérifier (<img src=..> etc) */ if (p==0) { int i=0; @@ -913,7 +1014,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { i++; } } - + /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */ if (p==0) { int i=0; @@ -922,7 +1023,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { i++; } } - + /* Tags supplémentaires à vérifier : URL=.. */ if (p==0) { int i=0; @@ -930,10 +1031,40 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { p=rech_tageq(adr,hts_detectURL[i]); i++; } - if (p) - p_searchMETAURL=1; + if (p) { + if (intag_ctype == 1) { + p = 0; +#if 0 + //if ((pos=rech_tageq(adr, "content"))) { + char temp[256]; + char* token = NULL; + int len = rech_endtoken(adr + pos, &token); + if (len > 0 && len < sizeof(temp) - 2) { + char* chpos; + temp[0] = '\0'; + strncat(temp, token, len); + if ((chpos = strstr(temp, "charset")) + && + (chpos = strchr(chpos, '=')) + ) { + chpos++; + while(is_space(*chpos)) chpod++; + chpos + } + } +#endif + } + // <META HTTP-EQUIV="Refresh" CONTENT="3;URL=http://www.example.com"> + else if (intag_ctype == 2) { + p_searchMETAURL=1; + } else { + p = 0; /* cancel */ + } + } + + } - + /* Tags supplémentaires à vérifier, mais à ne pas capturer */ if (p==0) { int i=0; @@ -944,125 +1075,127 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (p) p_nocatch=1; /* ne pas rechercher */ } - + /* Evénements */ if (p==0 && ! inscript /* we don't want events inside document.write */ ) { - int i=0; - /* détection onLoad etc */ - while( (p==0) && (strnotempty(hts_detect_js[i])) ) { - p=rech_tageq(adr,hts_detect_js[i]); - i++; - } - /* non détecté - détecter également les onXxxxx= */ - if (p==0) { - if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) { - p=0; - while(isalpha((unsigned char)adr[p]) && (p<64) ) p++; - if (p<64) { - while(is_space(adr[p])) p++; - if (adr[p]=='=') - p++; - else p=0; - } else p=0; + int i=0; + /* détection onLoad etc */ + while( (p==0) && (strnotempty(hts_detect_js[i])) ) { + p=rech_tageq(adr,hts_detect_js[i]); + i++; } + /* non détecté - détecter également les onXxxxx= */ + if (p==0) { + if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) { + p=0; + while(isalpha((unsigned char)adr[p]) && (p<64) ) p++; + if (p<64) { + while(is_space(adr[p])) p++; + if (adr[p]=='=') + p++; + else p=0; + } else p=0; + } + } + /* OK, événement repéré */ + if (p) { + inscript_tag_lastc=*(adr+p); /* à attendre à la fin */ + adr+=p+1; /* saut */ + /* + On est désormais dans du code javascript + */ + inscript_name=""; + inscript=inscript_tag=1; + inscript_state_pos=INSCRIPT_START; + if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } + } + p=0; /* quoi qu'il arrive, ne rien démarrer ici */ } - /* OK, événement repéré */ - if (p) { - inscript_tag_lastc=*(adr+p); /* à attendre à la fin */ - adr+=p+1; /* saut */ - /* - On est désormais dans du code javascript - */ - inscript_name=""; - inscript=inscript_tag=1; - inscript_state_pos=INSCRIPT_START; - } - p=0; /* quoi qu'il arrive, ne rien démarrer ici */ - } - - // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire] - if (p==0) { - p=rech_tageq(adr,"code"); - if (p) { - if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! - p_type=-1; // juste le nom de fichier+dossier, écire avant codebase - add_class=1; // ajouter .class au besoin - - // vérifier qu'il n'y a pas de codebase APRES - // sinon on swappe les deux. - // pas très propre mais c'est ce qu'il y a de plus simple à faire!! - - { - char *a; - a=adr; - while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++; - if (rech_tageq(a,"codebase")) { // banzai! codebase= - char* b; - b=strchr(a,'>'); - if (b) { - if (((int) (b - adr)) < 1000) { // au total < 1Ko - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strncatbuff(tempo,a,(int) (b - a) ); - strcatbuff( tempo," "); - strncatbuff(tempo,adr,(int) (a - adr - 1)); - // éventuellement remplire par des espaces pour avoir juste la taille - while((int) strlen(tempo)<((int) (b - adr))) - strcatbuff(tempo," "); - // pas d'erreur? - if ((int) strlen(tempo) == ((int) (b - adr) )) { - strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin! - p=0; // DEVALIDER!! - p_type=0; - add_class=0; + + // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire] + if (p==0) { + p=rech_tageq(adr,"code"); + if (p) { + if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! + p_type=-1; // juste le nom de fichier+dossier, écire avant codebase + add_class=1; // ajouter .class au besoin + + // vérifier qu'il n'y a pas de codebase APRES + // sinon on swappe les deux. + // pas très propre mais c'est ce qu'il y a de plus simple à faire!! + + { + char *a; + a=adr; + while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++; + if (rech_tageq(a,"codebase")) { // banzai! codebase= + char* b; + b=strchr(a,'>'); + if (b) { + if (((int) (b - adr)) < 1000) { // au total < 1Ko + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + tempo[0]='\0'; + strncatbuff(tempo,a,(int) (b - a) ); + strcatbuff( tempo," "); + strncatbuff(tempo,adr,(int) (a - adr - 1)); + // éventuellement remplire par des espaces pour avoir juste la taille + while((int) strlen(tempo)<((int) (b - adr))) + strcatbuff(tempo," "); + // pas d'erreur? + if ((int) strlen(tempo) == ((int) (b - adr) )) { + strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin! + p=0; // DEVALIDER!! + p_type=0; + add_class=0; + } } } } } + } - } } - } - - // liens à patcher mais pas à charger (ex: codebase) - if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même - p=rech_tageq(adr,"codebase"); - if (p) { - if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! - p_type=-2; - } else p=-1; // ne plus chercher + + // liens à patcher mais pas à charger (ex: codebase) + if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même + p=rech_tageq(adr,"codebase"); + if (p) { + if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! + p_type=-2; + } else p=-1; // ne plus chercher + } } - } - - - // Meta tags pour robots - if (p==0) { - if (opt->robots) { - if ((intag_start_valid) && check_tag(intag_start,"meta")) { - if (rech_tageq(adr,"name")) { // name=robots.txt - char tempo[1100]; - char* a; - tempo[0]='\0'; - a=strchr(adr,'>'); + + + // Meta tags pour robots + if (p==0) { + if (opt->robots) { + if ((intag_start_valid) && check_tag(intag_start,"meta")) { + if (rech_tageq(adr,"name")) { // name=robots.txt + char tempo[1100]; + char* a; + tempo[0]='\0'; + a=strchr(adr,'>'); #if DEBUG_ROBOTS - printf("robots.txt meta tag detected\n"); + printf("robots.txt meta tag detected\n"); #endif - if (a) { - if (((int) (a - adr)) < 999 ) { - strncatbuff(tempo,adr,(int) (a - adr)); - if (strstrcase(tempo,"content")) { - if (strstrcase(tempo,"robots")) { - if (strstrcase(tempo,"nofollow")) { + if (a) { + if (((int) (a - adr)) < 999 ) { + strncatbuff(tempo,adr,(int) (a - adr)); + if (strstrcase(tempo,"content")) { + if (strstrcase(tempo,"robots")) { + if (strstrcase(tempo,"nofollow")) { #if DEBUG_ROBOTS - printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil); + printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil); #endif - nofollow=1; // NE PLUS suivre liens dans cette page - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil); - test_flush; + nofollow=1; // NE PLUS suivre liens dans cette page + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil); + test_flush; + } } } } @@ -1072,379 +1205,400 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } } - } - - // entrée dans une applet javascript - /*if (!inscript) { // sinon on est dans un obj.write(".. - if (p==0) - if (rech_sampletag(adr,"script")) - if (check_tag(intag_start,"script")) { - inscript=1; - } - }*/ - - // Ici on procède à une analyse du code javascript pour tenter de récupérer - // certains fichiers évidents. - // C'est devenu obligatoire vu le nombre de pages qui intègrent - // des images réactives par exemple - } - } else if (inscript) { + + // charset meta tags + if (p==0) { + if ((intag_start_valid) && check_tag(intag_start,"meta")) { + int pos; + // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> + if ((pos=rech_tageq(adr, "http-equiv"))) { + const char* token = NULL; + int len = rech_endtoken(adr + pos, &token); + if (len > 0) { + if (strfield(token, "content-type")) { + intag_ctype=1; + } + else if (strfield(token, "refresh")) { + intag_ctype=2; + } + } + } + } + } + + // entrée dans une applet javascript + /*if (!inscript) { // sinon on est dans un obj.write(".. + if (p==0) + if (rech_sampletag(adr,"script")) + if (check_tag(intag_start,"script")) { + inscript=1; + } + }*/ + + // Ici on procède à une analyse du code javascript pour tenter de récupérer + // certains fichiers évidents. + // C'est devenu obligatoire vu le nombre de pages qui intègrent + // des images réactives par exemple + } + } else if (inscript) { #if 0 - /* Check // javascript comments */ - if (*adr == 10 || *adr == 13) { - inscript_check_comments = 1; - inscript_in_comments = 0; - } - else if (inscript_check_comments) { - if (!is_realspace(*adr)) { - inscript_check_comments = 0; - if (adr[0] == '/' && adr[1] == '/') { - inscript_in_comments = 1; + /* Check // javascript comments */ + if (*adr == 10 || *adr == 13) { + inscript_check_comments = 1; + inscript_in_comments = 0; + } + else if (inscript_check_comments) { + if (!is_realspace(*adr)) { + inscript_check_comments = 0; + if (adr[0] == '/' && adr[1] == '/') { + inscript_in_comments = 1; + } } } - } #endif - /* Parse */ - assertf(inscript_name != NULL); - if ( - ( - (strfield(adr,"/script") && strfield(inscript_name, "script")) - || - (strfield(adr,"/style") && strfield(inscript_name, "style")) - ) - ) { - char* a=adr; - //while(is_realspace(*(--a))); - while( is_realspace(*a) ) a--; - a--; - if (*a=='<') { // sûr que c'est un tag? - inscript=0; - } - } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) { - /* - Script Analyzing - different types supported: - foo="url" - foo("url") or foo(url) - foo "url" - */ - int nc; - char expected = '='; // caractère attendu après - char* expected_end = ";"; - int can_avoid_quotes=0; - char quotes_replacement='\0'; - int ensure_not_mime=0; - if (inscript_tag) - expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'" - nc = strfield(adr,".src"); // nom.src="image"; - if (!nc) nc = strfield(adr,".location"); // document.location="doc" - if (!nc) nc = strfield(adr,":location"); // javascript:location="doc" - if (!nc) nc = strfield(adr,".href"); // document.location="doc" - if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",.. - expected='('; // parenthèse - expected_end="),"; // fin: virgule ou parenthèse - ensure_not_mime=1; //* ensure the url is not a mime type */ - } - if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url") - expected='('; // parenthèse - expected_end=")"; // fin: parenthèse - } - if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url") - expected='('; // parenthèse - expected_end=")"; // fin: parenthèse - } - if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) - && *(adr - 1) != '_' - ) { // url(url) - expected='('; // parenthèse - expected_end=")"; // fin: parenthèse - can_avoid_quotes=1; - quotes_replacement=')'; - } - if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url" - if (is_space(*(adr+nc))) { - expected=0; // no char expected - } else - nc=0; - } - if (nc) { - char *a; - a=adr+nc; - while(is_realspace(*a)) a++; - if ((*a == expected) || (!expected)) { - if (expected) - a++; - while(is_realspace(*a)) a++; - if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) { - char *b,*c; - int ndelim=1; - if ((*a==34) || (*a=='\'')) - a++; - else - ndelim=0; - b=a; - if (ndelim) { - while((*b!=34) && (*b!='\'') && (*b!='\0')) b++; + /* Parse */ + assertf(inscript_name != NULL); + if ( + *adr == '/' && + ( + (strfield(adr,"/script") && strfield(inscript_name, "script")) + || + (strfield(adr,"/style") && strfield(inscript_name, "style")) + ) + ) { + char* a=adr; + //while(is_realspace(*(--a))); + while( is_realspace(*a) ) a--; + a--; + if (*a=='<') { // sûr que c'est un tag? + inscript=0; + if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); } + } + } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) { + /* + Script Analyzing - different types supported: + foo="url" + foo("url") or foo(url) + foo "url" + */ + int nc; + char expected = '='; // caractère attendu après + char* expected_end = ";"; + int can_avoid_quotes=0; + char quotes_replacement='\0'; + int ensure_not_mime=0; + if (inscript_tag) + expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'" + nc = strfield(adr,".src"); // nom.src="image"; + if (!nc) nc = strfield(adr,".location"); // document.location="doc" + if (!nc) nc = strfield(adr,":location"); // javascript:location="doc" + if (!nc) nc = strfield(adr,".href"); // document.location="doc" + if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",.. + expected='('; // parenthèse + expected_end="),"; // fin: virgule ou parenthèse + ensure_not_mime=1; //* ensure the url is not a mime type */ + } + if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url") + expected='('; // parenthèse + expected_end=")"; // fin: parenthèse + } + if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url") + expected='('; // parenthèse + expected_end=")"; // fin: parenthèse + } + if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) + && *(adr - 1) != '_' + ) { // url(url) + expected='('; // parenthèse + expected_end=")"; // fin: parenthèse + can_avoid_quotes=1; + quotes_replacement=')'; } - else { - while((*b != quotes_replacement) && (*b!='\0')) b++; + if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url" + if (is_space(*(adr+nc))) { + expected=0; // no char expected + } else + nc=0; } - c=b--; c+=ndelim; - while(*c==' ') c++; - if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) { - c-=(ndelim+1); - if ((int) (c - a + 1)) { - if (ensure_not_mime) { - int i = 0; - while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') { - int p; - if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') { - a=NULL; - } - i++; + if (nc) { + char *a; + a=adr+nc; + while(is_realspace(*a)) a++; + if ((*a == expected) || (!expected)) { + if (expected) + a++; + while(is_realspace(*a)) a++; + if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) { + char *b,*c; + int ndelim=1; + if ((*a==34) || (*a=='\'')) + a++; + else + ndelim=0; + b=a; + if (ndelim) { + while((*b!=34) && (*b!='\'') && (*b!='\0')) b++; } - } - if (a != NULL) { - if ((opt->debug>1) && (opt->log!=NULL)) { - char str[512]; - str[0]='\0'; - strncatbuff(str,a,minimum((int) (c - a + 1),32)); - fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush; + else { + while((*b != quotes_replacement) && (*b!='\0')) b++; } - p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER - if (can_avoid_quotes) { - ending_p=quotes_replacement; + c=b--; c+=ndelim; + while(*c==' ') c++; + if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) { + c-=(ndelim+1); + if ((int) (c - a + 1)) { + if (ensure_not_mime) { + int i = 0; + while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') { + int p; + if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') { + a=NULL; + } + i++; + } + } + if (a != NULL) { + if ((opt->debug>1) && (opt->log!=NULL)) { + char str[512]; + str[0]='\0'; + strncatbuff(str,a,minimum((int) (c - a + 1),32)); + fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush; + } + p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER + if (can_avoid_quotes) { + ending_p=quotes_replacement; + } + } + } } + + } } } - - - } + } - } - } } - } - - } else { // ptr == 0 - //p=rech_tageq(adr,"primary"); // lien primaire, yeah - p=0; // No stupid tag anymore, raw link - valid_p=1; // Valid even if p==0 - while ((adr[p] == '\r') || (adr[p] == '\n')) - p++; - //can_avoid_quotes=1; - ending_p='\r'; - } - - } else if (isspace((unsigned char)*adr)) { - intag_startattr=adr+1; // attribute in tag (for dirty parsing) - } - - - // ------------------------------------------------------------ - // dernier recours - parsing "sale" : détection systématique des .gif, etc. - // risque: générer de faux fichiers parazites - // fix: ne parse plus dans les commentaires - // ------------------------------------------------------------ - if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut" - //int incomment_justquit=0; - if (!is_realspace(*adr)) { - int noparse=0; - - // Gestion des /* */ + + } else { // ptr == 0 + //p=rech_tageq(adr,"primary"); // lien primaire, yeah + p=0; // No stupid tag anymore, raw link + valid_p=1; // Valid even if p==0 + while ((adr[p] == '\r') || (adr[p] == '\n')) + p++; + //can_avoid_quotes=1; + ending_p='\r'; + } + + } else if (isspace((unsigned char)*adr)) { + intag_startattr=adr+1; // attribute in tag (for dirty parsing) + } + + + // ------------------------------------------------------------ + // dernier recours - parsing "sale" : détection systématique des .gif, etc. + // risque: générer de faux fichiers parazites + // fix: ne parse plus dans les commentaires + // ------------------------------------------------------------ + if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut" + //int incomment_justquit=0; + if (!is_realspace(*adr)) { + int noparse=0; + + // Gestion des /* */ #if 0 - if (inscript) { - if (parseall_incomment) { - if ((*adr=='/') && (*(adr-1)=='*')) - parseall_incomment=0; - incomment_justquit=1; // ne pas noter dernier caractère - } else { - if ((*adr=='/') && (*(adr+1)=='*')) - parseall_incomment=1; - } - } else - parseall_incomment=0; + if (inscript) { + if (parseall_incomment) { + if ((*adr=='/') && (*(adr-1)=='*')) + parseall_incomment=0; + incomment_justquit=1; // ne pas noter dernier caractère + } else { + if ((*adr=='/') && (*(adr+1)=='*')) + parseall_incomment=1; + } + } else + parseall_incomment=0; #endif - /* ensure automate state 0 (not in comments, quotes..) */ - if (inscript && ( - inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2 - ) ) { - noparse=1; - } - - /* vérifier que l'on est pas dans un <!-- --> pur */ - if ( (!intag) && (incomment) && (!inscript)) - noparse=1; /* commentaire */ - - // recherche d'URLs - if (!noparse) { - //if ((!parseall_incomment) && (!noparse)) { - if (!p) { // non déja trouvé - if (adr != r->adr) { // >1 caractère - // scanner les chaines - if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif' - if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. (handles comments) - char *a=adr; - char stop=*adr; // " ou ' - int count=0; - - // sauter caractères - a++; - // copier - while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; } - - // ok chaine terminée par " ou ' - if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) { - char c; - char* aend; - // - aend=a; // sauver début - a++; - while(is_taborspace(*a)) a++; - c=*a; - if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif"; - // le / est pour funct("img.gif" /* URL */); - char tempo[HTS_URLMAXSIZE*2]; - char type[256]; - int url_ok=0; // url valide? - tempo[0]='\0'; type[0]='\0'; - // - strncatbuff(tempo,adr+1,count); - // - if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript) - int invalid_url=0; - - // escape - unescape_amp(tempo); - - // Couper au # ou ? éventuel - { - char* a=strchr(tempo,'#'); - if (a) - *a='\0'; - a=strchr(tempo,'?'); - if (a) - *a='\0'; - } - - // vérifier qu'il n'y a pas de caractères spéciaux - if (!strnotempty(tempo)) - invalid_url=1; - else if (strchr(tempo,'*') - || strchr(tempo,'<') - || strchr(tempo,'>') - || strchr(tempo,',') /* list of files ? */ - || strchr(tempo,'\"') /* potential parsing bug */ - || strchr(tempo,'\'') /* potential parsing bug */ - ) - invalid_url=1; - else if (tempo[0] == '.' && isalnum(tempo[1])) // ".gif" - invalid_url=1; - - /* non invalide? */ - if (!invalid_url) { - // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag) - if (c!='+') { // PAS de plus à la fin + /* ensure automate state 0 (not in comments, quotes..) */ + if (inscript && ( + inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2 + ) ) { + noparse=1; + } + + /* vérifier que l'on est pas dans un <!-- --> pur */ + if ( (!intag) && (incomment) && (!inscript)) + noparse=1; /* commentaire */ + + // recherche d'URLs + if (!noparse) { + //if ((!parseall_incomment) && (!noparse)) { + if (!p) { // non déja trouvé + if (adr != r->adr) { // >1 caractère + // scanner les chaines + if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif' + if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. (handles comments) + char *a=adr; + char stop=*adr; // " ou ' + int count=0; + + // sauter caractères + a++; + // copier + while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; } + + // ok chaine terminée par " ou ' + if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) { + char c; + char* aend; + // + aend=a; // sauver début + a++; + while(is_taborspace(*a)) a++; + c=*a; + if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif"; + // le / est pour funct("img.gif" /* URL */); + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + char type[256]; + int url_ok=0; // url valide? + tempo[0]='\0'; type[0]='\0'; + // + strncatbuff(tempo,adr+1,count); + // + if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript) + int invalid_url=0; + + // escape + unescape_amp(tempo); + + // Couper au # ou ? éventuel + { + char* a=strchr(tempo,'#'); + if (a) + *a='\0'; + a=strchr(tempo,'?'); + if (a) + *a='\0'; + } + + // vérifier qu'il n'y a pas de caractères spéciaux + if (!strnotempty(tempo)) + invalid_url=1; + else if (strchr(tempo,'*') + || strchr(tempo,'<') + || strchr(tempo,'>') + || strchr(tempo,',') /* list of files ? */ + || strchr(tempo,'\"') /* potential parsing bug */ + || strchr(tempo,'\'') /* potential parsing bug */ + ) + invalid_url=1; + else if (tempo[0] == '.' && isalnum(tempo[1])) // ".gif" + invalid_url=1; + + /* non invalide? */ + if (!invalid_url) { + // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag) + if (c!='+') { // PAS de plus à la fin #if 0 - char* a; + char* a; #endif - // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) - //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème - if ( - (strfield(tempo,"http:")) - || (strfield(tempo,"ftp:")) + // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) + //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème + if ( + (strfield(tempo,"http:")) + || (strfield(tempo,"ftp:")) #if HTS_USEOPENSSL - || ( - SSL_is_available && - (strfield(tempo,"https:")) - ) + || ( + SSL_is_available && + (strfield(tempo,"https:")) + ) #endif - ) // ok pas de problème - url_ok=1; - else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok.. - if (inscript) // sinon si pas javascript, méfiance (répertoire style base?) - url_ok=1; - } + ) // ok pas de problème + url_ok=1; + else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok.. + if (inscript) // sinon si pas javascript, méfiance (répertoire style base?) + url_ok=1; + } #if 0 - else if ((a=strchr(tempo,'/'))) { // un slash: ok.. - if (inscript) { // sinon si pas javascript, méfiance (style "text/css") - if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css') - if (!strchr(tempo,' ')) // avoid spaces (too dangerous for comments) + else if ((a=strchr(tempo,'/'))) { // un slash: ok.. + if (inscript) { // sinon si pas javascript, méfiance (style "text/css") + if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css') + if (!strchr(tempo,' ')) // avoid spaces (too dangerous for comments) + url_ok=1; + } + } +#endif + } + // Prendre si extension reconnue + if (!url_ok) { + get_httptype(type,tempo,0); + if (strnotempty(type)) // type reconnu! url_ok=1; + else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp.. + url_ok=1; + // MAIS pas les foobar@aol.com !! + if (strchr(tempo,'@')) + url_ok=0; } - } -#endif - } - // Prendre si extension reconnue - if (!url_ok) { - get_httptype(type,tempo,0); - if (strnotempty(type)) // type reconnu! - url_ok=1; - else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp.. - url_ok=1; - // MAIS pas les foobar@aol.com !! - if (strchr(tempo,'@')) - url_ok=0; - } - // - // Ok, cela pourrait être une URL - if (url_ok) { - - // Check if not fodbidden tag (id,name..) - if (intag_start_valid) { - if (intag_start) - if (intag_startattr) - if (intag) - if (!inscript) - if (!incomment) { - int i=0,nop=0; - while( (nop==0) && (strnotempty(hts_nodetect[i])) ) { - nop=rech_tageq(intag_startattr,hts_nodetect[i]); - i++; - } - // Forbidden tag - if (nop) { - url_ok=0; - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush; + // + // Ok, cela pourrait être une URL + if (url_ok) { + + // Check if not fodbidden tag (id,name..) + if (intag_start_valid) { + if (intag_start) + if (intag_startattr) + if (intag) + if (!inscript) + if (!incomment) { + int i=0,nop=0; + while( (nop==0) && (strnotempty(hts_nodetect[i])) ) { + nop=rech_tageq(intag_startattr,hts_nodetect[i]); + i++; + } + // Forbidden tag + if (nop) { + url_ok=0; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush; + } + } } - } - } - } - - - // Accepter URL, on la traitera comme une URL normale!! - if (url_ok) { - valid_p = 1; - p = 0; + } + + + // Accepter URL, on la traitera comme une URL normale!! + if (url_ok) { + valid_p = 1; + p = 0; + } + + } } - } } } - } } } } + } // p == 0 + + } // not in comment + + // plus dans un commentaire + if ( inscript_state_pos == INSCRIPT_START + && inscript_state_pos_prev == INSCRIPT_START) { + parseall_lastc=*adr; // caractère avant le prochain } - } // p == 0 - - } // not in comment - - // plus dans un commentaire - if ( inscript_state_pos == INSCRIPT_START - && inscript_state_pos_prev == INSCRIPT_START) { - parseall_lastc=*adr; // caractère avant le prochain - } } // if realspace } // if parseall - - + + // ------------------------------------------------------------ // p!=0 : on a repéré un éventuel lien // ------------------------------------------------------------ @@ -1457,11 +1611,11 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { char quote='\0'; int quoteinscript=0; int noquote=0; - + // si nofollow ou un stop a été déclenché, réécrire tous les liens en externe if ((nofollow) || (opt->state.stop)) p_nocatch=1; - + // écrire codebase avant, flusher avant code if ((p_type==-1) || (p_type==-2)) { if ((opt->getmode & 1) && (ptr>0)) { @@ -1469,116 +1623,120 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } lastsaved=adr; // dernier écrit+1 } - + // sauter espaces - adr+=p; + // adr+=p; + INCREMENT_CURRENT_ADR(p); while( ( is_space(*adr) || ( - inscriptgen - && adr[0] == '\\' - && is_space(adr[1]) - ) - ) - && quote == '\0' - ) { - if (!quote) - if ((*adr=='\"') || (*adr=='\'')) { - quote=*adr; // on doit attendre cela à la fin - if (inscriptgen && *(adr - 1) == '\\') { - quoteinscript=1; /* will wait for \" */ + inscriptgen + && adr[0] == '\\' + && is_space(adr[1]) + ) + ) + && quote == '\0' + ) { + if (!quote) + if ((*adr=='\"') || (*adr=='\'')) { + quote=*adr; // on doit attendre cela à la fin + if (inscriptgen && *(adr - 1) == '\\') { + quoteinscript=1; /* will wait for \" */ + } + } + // puis quitter + // adr++; // sauter les espaces, "" et cie + INCREMENT_CURRENT_ADR(1); + } + + /* Stop at \n (LF) if primary links or link lists */ + if (ptr == 0 || (in_media && strcmp(in_media,"LNK")==0)) + quote='\n'; + /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */ + else if (inscript && ! unquoted_script) + noquote=1; + + // sauter éventuel \" ou \' javascript + if (inscript) { // on est dans un obj.write(".. + if (*adr=='\\') { + if ((*(adr+1)=='\'') || (*(adr+1)=='"')) { // \" ou \' + // adr+=2; // sauter + INCREMENT_CURRENT_ADR(2); } - } - // puis quitter - adr++; // sauter les espaces, "" et cie - } - - /* Stop at \n (LF) if primary links*/ - if (ptr == 0) - quote='\n'; - /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */ - else if (inscript) - noquote=1; - - // sauter éventuel \" ou \' javascript - if (inscript) { // on est dans un obj.write(".. - if (*adr=='\\') { - if ((*(adr+1)=='\'') || (*(adr+1)=='"')) { // \" ou \' - adr+=2; // sauter } } - } - - // sauter content="1;URL=http://.. - if (p_searchMETAURL) { - int l=0; - while( - (adr + l + 4 < r->adr + r->size) - && (!strfield(adr+l,"URL=")) - && (l<128) ) l++; - if (!strfield(adr+l,"URL=")) - ok=-1; - else - adr+=(l+4); - } - - /* éviter les javascript:document.location=.. : les parser, plutôt */ - if (ok!=-1) { - if (strfield(adr,"javascript:") - && ! inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */ - ) { - ok=-1; - /* - On est désormais dans du code javascript - */ - inscript_name=""; - inscript_tag=inscript=1; - inscript_state_pos=INSCRIPT_START; - inscript_tag_lastc=quote; /* à attendre à la fin */ + + // sauter content="1;URL=http://.. + if (p_searchMETAURL) { + int l=0; + while( + (adr + l + 4 < r->adr + r->size) + && (!strfield(adr+l,"URL=")) + && (l<128) ) l++; + if (!strfield(adr+l,"URL=")) + ok=-1; + else + adr+=(l+4); } - } - - if (p_type==1) { - if (*adr=='#') { - adr++; // sauter # pour usemap etc + + /* éviter les javascript:document.location=.. : les parser, plutôt */ + if (ok!=-1) { + if (strfield(adr,"javascript:") + && ! inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */ + ) { + ok=-1; + /* + On est désormais dans du code javascript + */ + inscript_name=""; + inscript_tag=inscript=1; + inscript_state_pos=INSCRIPT_START; + inscript_tag_lastc=quote; /* à attendre à la fin */ + if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } + } } - } - eadr=adr; - - // ne pas flusher après code si on doit écrire le codebase avant! - if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) { - if ((opt->getmode & 1) && (ptr>0)) { - HT_ADD_ADR; // refresh + + if (p_type==1) { + if (*adr=='#') { + adr++; // sauter # pour usemap etc + } } - lastsaved=adr; // dernier écrit+1 - // après on écrira soit les données initiales, - // soir une URL/lien modifié! - } else if (p_type==-1) p_flush=adr; // flusher jusqu'à adr ensuite - - if (ok!=-1) { // continuer - // découper le lien - do { - if ((* (unsigned char*) eadr)<32) { // caractère de contrôle (ou \0) - if (!is_space(*eadr)) - ok=0; + eadr=adr; + + // ne pas flusher après code si on doit écrire le codebase avant! + if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) { + if ((opt->getmode & 1) && (ptr>0)) { + HT_ADD_ADR; // refresh } - if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path) - ok=-1; // ne pas traiter ce lien - - if (ok > 0) { - //if (*eadr!=' ') { - if (is_space(*eadr)) { // guillemets,CR, etc - if ( - ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') ) // end quote - || ( noquote && (*eadr == '\"' || *eadr == '\'') ) // end at any quote - || (!noquote && quote == '\0' && is_realspace(*eadr) ) // unquoted href - ) // si pas d'attente de quote spéciale ou si quote atteinte + lastsaved=adr; // dernier écrit+1 + // après on écrira soit les données initiales, + // soir une URL/lien modifié! + } else if (p_type==-1) p_flush=adr; // flusher jusqu'à adr ensuite + + if (ok!=-1) { // continuer + // découper le lien + do { + if ((* (unsigned char*) eadr)<32) { // caractère de contrôle (ou \0) + if (!is_space(*eadr)) ok=0; - } else if (ending_p && (*eadr==ending_p)) - ok=0; - else { - switch(*eadr) { + } + if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path) + ok=-1; // ne pas traiter ce lien + + if (ok > 0) { + //if (*eadr!=' ') { + if (is_space(*eadr)) { // guillemets,CR, etc + if ( + ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') ) // end quote + || ( noquote && (*eadr == '\"' || *eadr == '\'') ) // end at any quote + || (!noquote && quote == '\0' && is_realspace(*eadr) ) // unquoted href + ) // si pas d'attente de quote spéciale ou si quote atteinte + ok=0; + } else if (ending_p && (*eadr==ending_p)) + ok=0; + else { + switch(*eadr) { case '>': if (!quote) { - if (!inscript) { + if (!inscript && !in_media) { intag=0; // PLUS dans un tag! intag_start_valid=0; } @@ -1593,404 +1751,385 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // case '?': non! case '\\': if (inscript) ok=0; break; // \" ou \' point d'arrêt case '?': quote_adr=adr; break; // noter position query + } } - } - //} - } - eadr++; - } while(ok==1); - - // Empty link detected - if ( (((int) (eadr - adr))) <= 1) { // link empty - ok=-1; // No - if (*adr != '#') { // Not empty+unique # - if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr) - if (quote) { - if ((opt->getmode & 1) && (ptr>0)) { - HT_ADD("#"); // We add this for a <href=""> + //} + } + eadr++; + } while(ok==1); + + // Empty link detected + if ( (((int) (eadr - adr))) <= 1) { // link empty + ok=-1; // No + if (*adr != '#') { // Not empty+unique # + if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr) + if (quote) { + if ((opt->getmode & 1) && (ptr>0)) { + HT_ADD("#"); // We add this for a <href=""> + } } } } } - } - // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag - if (strfield(adr, "(Empty Reference!)")) { - ok=-1; // No + // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag + if (strfield(adr, "(Empty Reference!)")) { + ok=-1; // No + } + } - - } - - if (ok==0) { // tester un lien - char lien[HTS_URLMAXSIZE*2]; - int meme_adresse=0; // 0 par défaut pour primary - //char *copie_de_adr=adr; - //char* p; - - // construire lien (découpage) - if ( (((int) (eadr - adr))-1) < HTS_URLMAXSIZE ) { // pas trop long? - strncpy(lien,adr,((int) (eadr - adr))-1); - *(lien+ (((int) (eadr - adr)))-1 )='\0'; - //printf("link: %s\n",lien); - // supprimer les espaces - while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0'; - - -#if HTS_STRIP_DOUBLE_SLASH - // supprimer les // en / (sauf pour http://) - { - char *a,*p,*q; - int done=0; - a=strchr(lien,':'); // http:// - if (a) { - a++; - while(*a=='/') a++; // position après http:// - } else { - a=lien; // début - while(*a=='/') a++; // position après http:// + + if (ok==0) { // tester un lien + char BIGSTK lien[HTS_URLMAXSIZE*2]; + int meme_adresse=0; // 0 par défaut pour primary + //char *copie_de_adr=adr; + //char* p; + + // construire lien (découpage) + if ( (((int) (eadr - adr))-1) < HTS_URLMAXSIZE ) { // pas trop long? + strncpy(lien,adr,((int) (eadr - adr))-1); + *(lien+ (((int) (eadr - adr)))-1 )='\0'; + //printf("link: %s\n",lien); + // supprimer les espaces + while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0'; + + + } else + lien[0]='\0'; // erreur + + + // ------------------------------------------------------ + // Lien repéré et extrait + if (strnotempty(lien)>0) { // construction du lien + char BIGSTK adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2]; // ATTENTION adr cache le "vrai" adr + int forbidden_url=-1; // lien non interdit (mais non autorisé..) + int just_test_it=0; // mode de test des liens + int set_prio_to=0; // pour capture de page isolée + int import_done=0; // lien importé (ne pas scanner ensuite *à priori*) + // + adr[0]='\0'; fil[0]='\0'; + // + // 0: autorisé + // 1: interdit (patcher tout de même adresse) + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush; } - q=strchr(a,'?'); // ne pas traiter après '?' - if (!q) - q=a+strlen(a)-1; - while(( p=strstr(a,"//")) && (!done) ) { // remplacer // par / - if ((int) p>(int) q) { // après le ? (toto.cgi?param=1//2.3) - done=1; // stopper - } else { - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strncatbuff(tempo,a,(int) p - (int) a); - strcatbuff (tempo,p+1); - strcpybuff(a,tempo); // recopier + + // external check +#if HTS_ANALYSTE + if (!hts_htmlcheck_linkdetected(lien) || !hts_htmlcheck_linkdetected2(lien, intag_start)) { + error=1; // erreur + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien); + test_flush; } } - } #endif - - } else - lien[0]='\0'; // erreur - - // ------------------------------------------------------ - // Lien repéré et extrait - if (strnotempty(lien)>0) { // construction du lien - char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2]; // ATTENTION adr cache le "vrai" adr - int forbidden_url=-1; // lien non interdit (mais non autorisé..) - int just_test_it=0; // mode de test des liens - int set_prio_to=0; // pour capture de page isolée - int import_done=0; // lien importé (ne pas scanner ensuite *à priori*) - // - adr[0]='\0'; fil[0]='\0'; - // - // 0: autorisé - // 1: interdit (patcher tout de même adresse) - - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush; - } - - // external check -#if HTS_ANALYSTE - if (!hts_htmlcheck_linkdetected(lien)) { - error=1; // erreur - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien); - test_flush; + +#if HTS_STRIP_DOUBLE_SLASH + // supprimer les // en / (sauf pour http://) + if (opt->urlhack) { + char *a,*p,*q; + int done=0; + a=strchr(lien,':'); // http:// + if (a) { + a++; + while(*a=='/') a++; // position après http:// + } else { + a=lien; // début + while(*a=='/') a++; // position après http:// + } + q=strchr(a,'?'); // ne pas traiter après '?' + if (!q) + q=a+strlen(a)-1; + while(( p=strstr(a,"//")) && (!done) ) { // remplacer // par / + if ((int) p>(int) q) { // après le ? (toto.cgi?param=1//2.3) + done=1; // stopper + } else { + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + tempo[0]='\0'; + strncatbuff(tempo,a,(int) p - (int) a); + strcatbuff (tempo,p+1); + strcpybuff(a,tempo); // recopier + } + } } - } #endif - - // purger espaces de début et fin, CR,LF résiduels - // (IMG SRC="foo.<\n><\t>gif<\t>") - { - char* a = lien; - int llen; - - // strip ending spaces - llen = ( *a != '\0' ) ? strlen(a) : 0; - while(llen > 0 && is_realspace(lien[llen - 1]) ) { - a[--llen]='\0'; - } - // skip leading ones - while(is_realspace(*a)) a++; - // strip cr, lf, tab inside URL - llen = 0; - while(*a) { - if (*a != '\n' && *a != '\r' && *a != '\t') { - lien[llen++] = *a; + + // purger espaces de début et fin, CR,LF résiduels + // (IMG SRC="foo.<\n><\t>gif<\t>") + { + char* a = lien; + int llen; + + // strip ending spaces + llen = ( *a != '\0' ) ? strlen(a) : 0; + while(llen > 0 && is_realspace(lien[llen - 1]) ) { + a[--llen]='\0'; + } + // skip leading ones + while(is_realspace(*a)) a++; + // strip cr, lf, tab inside URL + llen = 0; + while(*a) { + if (*a != '\n' && *a != '\r' && *a != '\t') { + lien[llen++] = *a; + } + a++; } - a++; + lien[llen] = '\0'; } - lien[llen] = '\0'; - } - // commas are forbidden - if (archivetag_p) { - if (strchr(lien, ',')) { - error=1; // erreur - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush; + // commas are forbidden + if (archivetag_p) { + if (strchr(lien, ',')) { + error=1; // erreur + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush; + } } + } + + /* Unescape/escape %20 and other */ + { + char BIGSTK query[HTS_URLMAXSIZE*2]; + char* a=strchr(lien,'?'); + if (a) { + strcpybuff(query,a); + *a='\0'; + } else + query[0]='\0'; + // conversion & -> & et autres joyeusetés + unescape_amp(lien); + unescape_amp(query); + // décoder l'inutile (%2E par exemple) et coder espaces + // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien)); + strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1)); + escape_remove_control(lien); + escape_spc_url(lien); + strcatbuff(lien,query); /* restore */ } - } - - /* Unescape/escape %20 and other */ - { - char query[HTS_URLMAXSIZE*2]; - char* a=strchr(lien,'?'); - if (a) { - strcpybuff(query,a); - *a='\0'; - } else - query[0]='\0'; - // conversion & -> & et autres joyeusetés - unescape_amp(lien); - unescape_amp(query); - // décoder l'inutile (%2E par exemple) et coder espaces - // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien)); - strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1)); - escape_remove_control(lien); - escape_spc_url(lien); - strcatbuff(lien,query); /* restore */ - } - - // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance! - { - char* a=jump_identification(lien); - while( (a=strchr(a,'\\')) ) *a='/'; - } - - // supprimer le(s) ./ - while ((lien[0]=='.') && (lien[1]=='/')) { - char tempo[HTS_URLMAXSIZE*2]; - strcpybuff(tempo,lien+2); - strcpybuff(lien,tempo); - } - if (strnotempty(lien)==0) // sauf si plus de nom de fichier - strcpybuff(lien,"./"); - - // vérifie les /~machin -> /~machin/ - // supposition dangereuse? - // OUI!! + + // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance! + { + char* a; + for(a = jump_identification(lien) ; *a != '\0' && *a != '?' ; a++) { + if (*a == '\\') { + *a = '/'; + } + } + } + + // supprimer le(s) ./ + while ((lien[0]=='.') && (lien[1]=='/')) { + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + strcpybuff(tempo,lien+2); + strcpybuff(lien,tempo); + } + if (strnotempty(lien)==0) // sauf si plus de nom de fichier + strcpybuff(lien,"./"); + + // vérifie les /~machin -> /~machin/ + // supposition dangereuse? + // OUI!! #if HTS_TILDE_SLASH - if (lien[strlen(lien)-1]!='/') { - char *a=lien+strlen(lien)-1; - // éviter aussi index~1.html - while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--; - if (*a=='~') { - strcatbuff(lien,"/"); // ajouter slash + if (lien[strlen(lien)-1]!='/') { + char *a=lien+strlen(lien)-1; + // éviter aussi index~1.html + while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--; + if (*a=='~') { + strcatbuff(lien,"/"); // ajouter slash + } } - } #endif - - // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class" - // yes, this is dirty - // but I'm so lazzy.. - // and besides the java "code" convention is really a pain in html code - if (p_type==-1) { - char* a=strrchr(lien,'.'); - add_class_dots_to_patch=0; - if (a) { - char* b; - do { - b=strchr(lien,'.'); - if ((b != a) && (b)) { - add_class_dots_to_patch++; - *b='/'; - } - } while((b != a) && (b)); + + // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class" + // yes, this is dirty + // but I'm so lazzy.. + // and besides the java "code" convention is really a pain in html code + if (p_type==-1) { + char* a=strrchr(lien,'.'); + add_class_dots_to_patch=0; + if (a) { + char* b; + do { + b=strchr(lien,'.'); + if ((b != a) && (b)) { + add_class_dots_to_patch++; + *b='/'; + } + } while((b != a) && (b)); + } } - } - // éliminer les éventuels :80 (port par défaut!) - if (link_has_authority(lien)) { - char * a; - a=strstr(lien,"//"); // "//" authority - if (a) - a+=2; - else - a=lien; - // while((*a) && (*a!='/') && (*a!=':')) a++; - a=jump_toport(a); - if (a) { // port - int port=0; - int defport=80; - char* b=a+1; + // éliminer les éventuels :80 (port par défaut!) + if (link_has_authority(lien)) { + char * a; + a=strstr(lien,"//"); // "//" authority + if (a) + a+=2; + else + a=lien; + // while((*a) && (*a!='/') && (*a!=':')) a++; + a=jump_toport(a); + if (a) { // port + int port=0; + int defport=80; + char* b=a+1; #if HTS_USEOPENSSL - // FIXME - //if (strfield(adr, "https:")) { - //} + // FIXME + //if (strfield(adr, "https:")) { + //} #endif - while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; } - if (port==defport) { // port 80, default - c'est débile - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strncatbuff(tempo,lien,(int) (a - lien)); - strcatbuff(tempo,a+3); // sauter :80 - strcpybuff(lien,tempo); + while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; } + if (port==defport) { // port 80, default - c'est débile + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + tempo[0]='\0'; + strncatbuff(tempo,lien,(int) (a - lien)); + strcatbuff(tempo,a+3); // sauter :80 + strcpybuff(lien,tempo); + } } } - } - - // filtrer les parazites (mailto & cie) - /* - if (strfield(lien,"mailto:")) { // ne pas traiter - error=1; - } else if (strfield(lien,"news:")) { // ne pas traiter - error=1; - } - */ - - // vérifier que l'on ne doit pas ajouter de .class - if (!error) { - if (add_class) { - char *a = lien+strlen(lien)-1; - while(( a > lien) && (*a!='/') && (*a!='.')) a--; - if (*a != '.') - strcatbuff(lien,".class"); // ajouter .class - else if (!strfield2(a,".class")) - strcatbuff(lien,".class"); // idem - } - } - - // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/) - if (!error) { - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush; + + // filtrer les parazites (mailto & cie) + /* + if (strfield(lien,"mailto:")) { // ne pas traiter + error=1; + } else if (strfield(lien,"news:")) { // ne pas traiter + error=1; } - - if ((p_type==2) || (p_type==-2)) { // code ou codebase - // Vérifier les codebase=applet (au lieu de applet/) - if (p_type==-2) { // codebase - if (strnotempty(lien)) { - if (fil[strlen(lien)-1]!='/') { // pas répertoire - strcatbuff(lien,"/"); - } - } - } + */ - /* base has always authority */ - if (p_type==2 && !link_has_authority(lien)) { - char tmp[HTS_URLMAXSIZE*2]; - strcpybuff(tmp, "http://"); - strcatbuff(tmp, lien); - strcpybuff(lien, tmp); + // vérifier que l'on ne doit pas ajouter de .class + if (!error) { + if (add_class) { + char *a = lien+strlen(lien)-1; + while(( a > lien) && (*a!='/') && (*a!='.')) a--; + if (*a != '.') + strcatbuff(lien,".class"); // ajouter .class + else if (!strfield2(a,".class")) + strcatbuff(lien,".class"); // idem } + } - /* only one ending / (bug on some pages) */ - if ((int)strlen(lien)>2) { - int len = (int) strlen(lien); - while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' ) /* double // (bug) */ - lien[--len]='\0'; + // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/) + if (!error) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush; } - // copier nom host si besoin est - if (!link_has_authority(lien)) { // pas de http:// - char adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2]; // ** euh ident_url_relatif?? - if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) { - error=1; - } else { - strcpybuff(lien,"http://"); - strcatbuff(lien,adr2); - if (*fil2!='/') - strcatbuff(lien,"/"); - strcatbuff(lien,fil2); - { - char* a; - a=lien+strlen(lien)-1; - while((*a) && (*a!='/') && ( a> lien)) a--; - if (*a=='/') { - *(a+1)='\0'; + + if ((p_type==2) || (p_type==-2)) { // code ou codebase + // Vérifier les codebase=applet (au lieu de applet/) + if (p_type==-2) { // codebase + if (strnotempty(lien)) { + if (fil[strlen(lien)-1]!='/') { // pas répertoire + strcatbuff(lien,"/"); } } - //char tempo[HTS_URLMAXSIZE*2]; - //strcpybuff(tempo,"http://"); - //strcatbuff(tempo,urladr); // host - //if (*lien!='/') - // strcatbuff(tempo,"/"); - //strcatbuff(tempo,lien); - //strcpybuff(lien,tempo); } - } - - if (!error) { // pas d'erreur? - if (p_type==2) { // code ET PAS codebase - char* a=lien+strlen(lien)-1; - while( (a > lien) && (*a) && (*a!='/')) a--; - if (*a=='/') // ok on a repéré le dernier / - *(a+1)='\0'; // couper - else { - *lien='\0'; // éliminer - error=1; // erreur, ne pas poursuivre - } + + /* base has always authority */ + if (p_type==2 && !link_has_authority(lien)) { + char BIGSTK tmp[HTS_URLMAXSIZE*2]; + strcpybuff(tmp, "http://"); + strcatbuff(tmp, lien); + strcpybuff(lien, tmp); } - - // stocker base ou codebase? - switch(p_type) { + + /* only one ending / (bug on some pages) */ + if ((int)strlen(lien)>2) { + int len = (int) strlen(lien); + while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' ) /* double // (bug) */ + lien[--len]='\0'; + } + // copier nom host si besoin est + if (!link_has_authority(lien)) { // pas de http:// + char BIGSTK adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2]; // ** euh ident_url_relatif?? + if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) { + error=1; + } else { + strcpybuff(lien,"http://"); + strcatbuff(lien,adr2); + if (*fil2!='/') + strcatbuff(lien,"/"); + strcatbuff(lien,fil2); + { + char* a; + a=lien+strlen(lien)-1; + while((*a) && (*a!='/') && ( a> lien)) a--; + if (*a=='/') { + *(a+1)='\0'; + } + } + //char BIGSTK tempo[HTS_URLMAXSIZE*2]; + //strcpybuff(tempo,"http://"); + //strcatbuff(tempo,urladr); // host + //if (*lien!='/') + // strcatbuff(tempo,"/"); + //strcatbuff(tempo,lien); + //strcpybuff(lien,tempo); + } + } + + if (!error) { // pas d'erreur? + if (p_type==2) { // code ET PAS codebase + char* a=lien+strlen(lien)-1; + while( (a > lien) && (*a) && (*a!='/')) a--; + if (*a=='/') // ok on a repéré le dernier / + *(a+1)='\0'; // couper + else { + *lien='\0'; // éliminer + error=1; // erreur, ne pas poursuivre + } + } + + // stocker base ou codebase? + switch(p_type) { case 2: { //if (*lien!='/') strcatbuff(base,"/"); strcpybuff(base,lien); } - break; // base + break; // base case -2: { //if (*lien!='/') strcatbuff(codebase,"/"); strcpybuff(codebase,lien); } - break; // base - } - - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush; + break; // base + } + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush; + } + //printf("base code: %s - %s\n",lien,base); } - //printf("base code: %s - %s\n",lien,base); - } - - } else { - char* _base; - if (p_type==-1) // code (applet) - _base=codebase; - else - _base=base; - - - // ajouter chemin de base href.. - if (strnotempty(_base)) { // considérer base - if (!link_has_authority(lien)) { // non absolue - if (*lien!='/') { // non absolu sur le site (/) - if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) { - // mailto: and co: do NOT add base - if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) { - char tempo[HTS_URLMAXSIZE*2]; - // base est absolue - strcpybuff(tempo,_base); - strcatbuff(tempo,lien + ((*lien=='/')?1:0) ); - strcpybuff(lien,tempo); // patcher en considérant base - // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..) - - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; - } - } - } else { - error=1; // erreur - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien); - test_flush; - } - } - } else { - char badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2]; - if (ident_url_absolute(_base, badr, bfil) >=0 ) { - if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) { - char tempo[HTS_URLMAXSIZE*2]; - // base est absolue - tempo[0] = '\0'; - if (!link_has_authority(badr)) { - strcatbuff(tempo, "http://"); - } - strcatbuff(tempo,badr); - strcatbuff(tempo,lien); - strcpybuff(lien,tempo); // patcher en considérant base - - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + + } else { + char* _base; + if (p_type==-1) // code (applet) + _base=codebase; + else + _base=base; + + + // ajouter chemin de base href.. + if (strnotempty(_base)) { // considérer base + if (!link_has_authority(lien)) { // non absolue + if (*lien!='/') { // non absolu sur le site (/) + if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) { + // mailto: and co: do NOT add base + if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) { + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + // base est absolue + strcpybuff(tempo,_base); + strcatbuff(tempo,lien + ((*lien=='/')?1:0) ); + strcpybuff(lien,tempo); // patcher en considérant base + // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..) + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + } } } else { error=1; // erreur @@ -1999,74 +2138,98 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { test_flush; } } + } else { + char BIGSTK badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2]; + if (ident_url_absolute(_base, badr, bfil) >=0 ) { + if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) { + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + // base est absolue + tempo[0] = '\0'; + if (!link_has_authority(badr)) { + strcatbuff(tempo, "http://"); + } + strcatbuff(tempo,badr); + strcatbuff(tempo,lien); + strcpybuff(lien,tempo); // patcher en considérant base + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + } + } else { + error=1; // erreur + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien); + test_flush; + } + } + } } } } + + } - - - } - } - - - // transformer lien quelconque (http, relatif, etc) en une adresse - // et un chemin+fichier (adr,fil) - if (!error) { - int reponse; - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; } - if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) { - adr[0]='\0'; // erreur - if (reponse==-2) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien); - test_flush; + + + // transformer lien quelconque (http, relatif, etc) en une adresse + // et un chemin+fichier (adr,fil) + if (!error) { + int reponse; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; + } + if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) { + adr[0]='\0'; // erreur + if (reponse==-2) { + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien); + test_flush; + } + } else { + if ((opt->debug>1) && (opt->errlog!=NULL)) { + fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; + } } } else { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush; } } } else { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush; + fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush; } + adr[0]='\0'; } - } else { - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush; - } - adr[0]='\0'; - } - + #if HTS_CHECK_STRANGEDIR - // !ATTENTION! - // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin) - // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire - // et un fichier en http A PRIORI : je fais donc un test - // En cas de moved xxx, on recalcule adr et fil, tout simplement - // DEFAUT: test effectué plusieurs fois! à revoir!!! - if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) { - //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) { - if (fil[strlen(fil)-1]!='/') { // pas répertoire - if (ishtml(fil)==-2) { // pas d'extension - char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position - loc[0]='\0'; - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil); - test_flush; - } - - // tester éventuelle nouvelle position - switch (http_location(adr,fil,loc).statuscode) { + // !ATTENTION! + // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin) + // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire + // et un fichier en http A PRIORI : je fais donc un test + // En cas de moved xxx, on recalcule adr et fil, tout simplement + // DEFAUT: test effectué plusieurs fois! à revoir!!! + if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) { + //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) { + if (fil[strlen(fil)-1]!='/') { // pas répertoire + if (ishtml(fil)==-2) { // pas d'extension + char BIGSTK loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position + loc[0]='\0'; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil); + test_flush; + } + + // tester éventuelle nouvelle position + switch (http_location(adr,fil,loc).statuscode) { case 200: // ok au final if (strnotempty(loc)) { // a changé d'adresse if (opt->errlog) { fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil); test_flush; } - + // recalculer adr et fil! if (ident_url_absolute(loc,adr,fil)==-1) { adr[0]='\0'; // cancel @@ -2075,7 +2238,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { test_flush; } } - + } break; case -2: case -3: // timeout ou erreur grave @@ -2083,214 +2246,216 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil); test_flush; } - + break; + } + } - - } - } - } + } + } #endif - - // Le lien doit juste être réécrit, mais ne doit pas générer un lien - // exemple: <FORM ACTION="url_cgi"> - if (p_nocatch) { - forbidden_url=1; // interdire récupération du lien - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil); - test_flush; + + // Le lien doit juste être réécrit, mais ne doit pas générer un lien + // exemple: <FORM ACTION="url_cgi"> + if (p_nocatch) { + forbidden_url=1; // interdire récupération du lien + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil); + test_flush; + } } - } - - // Tester si un lien doit être accepté ou refusé (wizard) - // forbidden_url=1 : lien refusé - // forbidden_url=0 : lien accepté - //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? - if ((p_type!=2) && (p_type!=-2)) { // tester autorisations? - if (!p_nocatch) { - if (adr[0]!='\0') { - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil); - test_flush; - } - forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, - adr,fil, - &set_prio_to, - &just_test_it); - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url); - test_flush; + + // Tester si un lien doit être accepté ou refusé (wizard) + // forbidden_url=1 : lien refusé + // forbidden_url=0 : lien accepté + //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? + if ((p_type!=2) && (p_type!=-2)) { // tester autorisations? + if (!p_nocatch) { + if (adr[0]!='\0') { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil); + test_flush; + } + forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, + adr,fil, + NULL, NULL, + &set_prio_to, + &just_test_it); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url); + test_flush; + } } } } - } - - // calculer meme_adresse - meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr)); - - - - // Début partie sauvegarde - - // ici on forme le nom du fichier à sauver, et on patche l'URL - if (adr[0]!='\0') { - // savename: simplifier les ../ et autres joyeusetés - char save[HTS_URLMAXSIZE*2]; - int r_sv=0; - // En cas de moved, adresse première - char former_adr[HTS_URLMAXSIZE*2]; - char former_fil[HTS_URLMAXSIZE*2]; - // - save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0'; - // - - // nom du chemin à sauver si on doit le calculer - // note: url_savename peut décider de tester le lien si il le trouve - // suspect, et modifier alors adr et fil - // dans ce cas on aura une référence directe au lieu des traditionnels - // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers - // gif sont impliqués par exemple) - if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase - if (forbidden_url!=1) { - char last_adr[HTS_URLMAXSIZE*2]; - last_adr[0]='\0'; - //char last_fil[HTS_URLMAXSIZE*2]=""; - strcpybuff(last_adr,adr); // ancienne adresse - //strcpybuff(last_fil,fil); // ancien chemin - r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe); - if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé - - // 2e test si moved - - // Tester si un lien doit être accepté ou refusé (wizard) - // forbidden_url=1 : lien refusé - // forbidden_url=0 : lien accepté - if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? - if (!p_nocatch) { - if (adr[0]!='\0') { - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil); - test_flush; - } - forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, - adr,fil, - &set_prio_to, - &just_test_it); - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url); - test_flush; + + // calculer meme_adresse + meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr)); + + + + // Début partie sauvegarde + + // ici on forme le nom du fichier à sauver, et on patche l'URL + if (adr[0]!='\0') { + // savename: simplifier les ../ et autres joyeusetés + char BIGSTK save[HTS_URLMAXSIZE*2]; + int r_sv=0; + // En cas de moved, adresse première + char BIGSTK former_adr[HTS_URLMAXSIZE*2]; + char BIGSTK former_fil[HTS_URLMAXSIZE*2]; + // + save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0'; + // + + // nom du chemin à sauver si on doit le calculer + // note: url_savename peut décider de tester le lien si il le trouve + // suspect, et modifier alors adr et fil + // dans ce cas on aura une référence directe au lieu des traditionnels + // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers + // gif sont impliqués par exemple) + if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase + if (forbidden_url!=1) { + char BIGSTK last_adr[HTS_URLMAXSIZE*2]; + last_adr[0]='\0'; + //char last_fil[HTS_URLMAXSIZE*2]=""; + strcpybuff(last_adr,adr); // ancienne adresse + //strcpybuff(last_fil,fil); // ancien chemin + r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe); + if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé + + // 2e test si moved + + // Tester si un lien doit être accepté ou refusé (wizard) + // forbidden_url=1 : lien refusé + // forbidden_url=0 : lien accepté + if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? + if (!p_nocatch) { + if (adr[0]!='\0') { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil); + test_flush; + } + forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, + adr,fil, + NULL, NULL, + &set_prio_to, + &just_test_it); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url); + test_flush; + } } } } + + //import_done=1; // c'est un import! + meme_adresse=0; // on a changé } - - //import_done=1; // c'est un import! - meme_adresse=0; // on a changé + } else { + strcpybuff(save,""); // dummy } - } else { - strcpybuff(save,""); // dummy } - } - if (r_sv!=-1) { // pas d'erreur, on continue - /* log */ - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); - if (forbidden_url!=1) { // le lien va être chargé - if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien - fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil); - } else if ((opt->getmode & 4)==0) { - fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); - } else { - if (!ishtml(fil)) - fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save); - else + if (r_sv!=-1) { // pas d'erreur, on continue + /* log */ + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); + if (forbidden_url!=1) { // le lien va être chargé + if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien + fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil); + } else if ((opt->getmode & 4)==0) { fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); - } - } else - fprintf(opt->log,"External: %s%s"LF,adr,fil); - test_flush; - } - /* FIN log */ - - // écrire lien - if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter - lastsaved=eadr-1+1; // sauter " - } - /* */ - else if (opt->urlmode==0) { // URL absolue dans tous les cas - if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html - if (!link_has_authority(adr)) { - HT_ADD("http://"); - } else { - char* aut = strstr(adr, "//"); - if (aut) { - char tmp[256]; - tmp[0]='\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme - HT_ADD(tmp); // Protocol - HT_ADD("//"); - } - } - - if (!opt->passprivacy) { - HT_ADD(jump_protocol(adr)); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); + } else { + if (!ishtml(fil)) + fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save); + else + fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); + } + } else + fprintf(opt->log,"External: %s%s"LF,adr,fil); + test_flush; + } + /* FIN log */ + + // écrire lien + if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter + lastsaved=eadr-1+1; // sauter " } - lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) - /* */ - } else if (opt->urlmode >= 4) { // ne rien faire dans tous les cas! /* */ - /* leave the link 'as is' */ - /* Sinon, dépend de interne/externe */ - } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe! - if ((opt->getmode & 1) && (ptr>0)) { - if (p_type!=-1) { // pas que le nom de fichier (pas classe java) - if (!opt->external) { - if (!link_has_authority(adr)) { - HT_ADD("http://"); - if (!opt->passprivacy) { - HT_ADD(adr); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); - } else { - char* aut = strstr(adr, "//"); - if (aut) { - char tmp[256]; - tmp[0]='\0'; - strncatbuff(tmp, adr, (int) (aut - adr)); // scheme - HT_ADD(tmp); // Protocol - HT_ADD("//"); + else if (opt->urlmode==0) { // URL absolue dans tous les cas + if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html + if (!link_has_authority(adr)) { + HT_ADD("http://"); + } else { + char* aut = strstr(adr, "//"); + if (aut) { + char tmp[256]; + tmp[0]='\0'; + strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + HT_ADD(tmp); // Protocol + HT_ADD("//"); + } + } + + if (!opt->passprivacy) { + HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + } else { + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD_HTMLESCAPED(fil); + } + lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) + /* */ + } else if (opt->urlmode >= 4) { // ne rien faire dans tous les cas! + /* */ + /* leave the link 'as is' */ + /* Sinon, dépend de interne/externe */ + } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe! + if ((opt->getmode & 1) && (ptr>0)) { + if (p_type!=-1) { // pas que le nom de fichier (pas classe java) + if (!opt->external) { + if (!link_has_authority(adr)) { + HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(adr); // Password } else { - HT_ADD(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password } if (*fil!='/') HT_ADD("/"); - HT_ADD(fil); + HT_ADD_HTMLESCAPED(fil); + } else { + char* aut = strstr(adr, "//"); + if (aut) { + char tmp[256]; + tmp[0]='\0'; + strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + HT_ADD(tmp); // Protocol + HT_ADD("//"); + if (!opt->passprivacy) { + HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + } else { + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD_HTMLESCAPED(fil); + } } - } - // - } else { // fichier/page externe, mais on veut générer une erreur - // - int patch_it=0; - int add_url=0; - char* cat_name=NULL; - char* cat_data=NULL; - int cat_nb=0; - int cat_data_len=0; - - // ajouter lien external - switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) { + // + } else { // fichier/page externe, mais on veut générer une erreur + // + int patch_it=0; + int add_url=0; + char* cat_name=NULL; + char* cat_data=NULL; + int cat_nb=0; + int cat_data_len=0; + + // ajouter lien external + switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) { case 1: case -2: // html ou répertoire if (opt->getmode & 1) { // sauver html patch_it=1; // redirect @@ -2308,108 +2473,108 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) /*|| (ishtml(fil)!=0)*/ ) { patch_it=1; // redirect - add_url=1; // avec link aussi - cat_name="external.gif"; - cat_nb=1; - cat_data=HTS_DATA_UNKNOWN_GIF; - cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN; - } else /* if (is_dyntype(get_ext(fil))) */ { - patch_it=1; // redirect - add_url=1; // avec link? - cat_name="external.html"; - cat_nb=0; - cat_data=HTS_DATA_UNKNOWN_HTML; - cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN; - } - break; - }// html,gif - - if (patch_it) { - char save[HTS_URLMAXSIZE*2]; - char tempo[HTS_URLMAXSIZE*2]; - strcpybuff(save,opt->path_html); - strcatbuff(save,cat_name); - if (lienrelatif(tempo,save, relativesavename)==0) { - if (!no_esc_utf) - escape_uri(tempo); // escape with %xx - else - escape_uri_utf(tempo); // escape with %xx - HT_ADD(tempo); // page externe - if (add_url) { - HT_ADD("?link="); // page externe - - // same as above - if (!link_has_authority(adr)) { - HT_ADD("http://"); - if (!opt->passprivacy) { - HT_ADD(adr); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); - } else { - char* aut = strstr(adr, "//"); - if (aut) { - char tmp[256]; - tmp[0]='\0'; - strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme - HT_ADD(tmp); + add_url=1; // avec link aussi + cat_name="external.gif"; + cat_nb=1; + cat_data=HTS_DATA_UNKNOWN_GIF; + cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN; + } else /* if (is_dyntype(get_ext(fil))) */ { + patch_it=1; // redirect + add_url=1; // avec link? + cat_name="external.html"; + cat_nb=0; + cat_data=HTS_DATA_UNKNOWN_HTML; + cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN; + } + break; + }// html,gif + + if (patch_it) { + char BIGSTK save[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + strcpybuff(save,opt->path_html); + strcatbuff(save,cat_name); + if (lienrelatif(tempo,save, relativesavename)==0) { + if (!no_esc_utf) + escape_uri(tempo); // escape with %xx + else + escape_uri_utf(tempo); // escape with %xx + HT_ADD_HTMLESCAPED(tempo); // page externe + if (add_url) { + HT_ADD("?link="); // page externe + + // same as above + if (!link_has_authority(adr)) { + HT_ADD("http://"); if (!opt->passprivacy) { - HT_ADD(jump_protocol(adr)); // Password + HT_ADD_HTMLESCAPED(adr); // Password } else { - HT_ADD(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password } if (*fil!='/') HT_ADD("/"); - HT_ADD(fil); + HT_ADD_HTMLESCAPED(fil); + } else { + char* aut = strstr(adr, "//"); + if (aut) { + char tmp[256]; + tmp[0]='\0'; + strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme + HT_ADD(tmp); + if (!opt->passprivacy) { + HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password + } else { + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD_HTMLESCAPED(fil); + } } + // + } - // - } - } - - // écrire fichier? - if (verif_external(cat_nb,1)) { - //if (!fexist(fconcat(opt->path_html,cat_name))) { - FILE* fp = filecreate(fconcat(opt->path_html,cat_name)); - if (fp) { - if (cat_data_len==0) { // texte - verif_backblue(opt,opt->path_html); - fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data); - } else { // data - fwrite(cat_data,cat_data_len,1,fp); + + // écrire fichier? + if (verif_external(cat_nb,1)) { + //if (!fexist(fconcat(opt->path_html,cat_name))) { + FILE* fp = filecreate(fconcat(opt->path_html,cat_name)); + if (fp) { + if (cat_data_len==0) { // texte + verif_backblue(opt,opt->path_html); + fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data); + } else { // data + fwrite(cat_data,cat_data_len,1,fp); + } + fclose(fp); + usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"",""); } - fclose(fp); - usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"",""); } - } - } else { // écrire normalement le nom de fichier - HT_ADD("http://"); - if (!opt->passprivacy) { - HT_ADD(adr); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); - }// patcher? + } else { // écrire normalement le nom de fichier + HT_ADD("http://"); + if (!opt->passprivacy) { + HT_ADD_HTMLESCAPED(adr); // Password + } else { + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD_HTMLESCAPED(fil); + }// patcher? } // external } else { // que le nom de fichier (classe java) // en gros recopie de plus bas: copier codebase et base if (p_flush) { - char tempo[HTS_URLMAXSIZE*2]; // <-- ajouté - char tempo_pat[HTS_URLMAXSIZE*2]; - + char BIGSTK tempo[HTS_URLMAXSIZE*2]; // <-- ajouté + char BIGSTK tempo_pat[HTS_URLMAXSIZE*2]; + // Calculer chemin tempo_pat[0]='\0'; strcpybuff(tempo,fil); // <-- ajouté { char* a=strrchr(tempo,'/'); - + // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class" // we have to do the contrary now if (add_class_dots_to_patch>0) { @@ -2426,33 +2591,33 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } } - + // Cut path/filename if (a) { - char tempo2[HTS_URLMAXSIZE*2]; + char BIGSTK tempo2[HTS_URLMAXSIZE*2]; strcpybuff(tempo2,a+1); // FICHIER strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin strcpybuff(tempo,tempo2); // fichier } } - + // érire codebase="chemin" if ((opt->getmode & 1) && (ptr>0)) { - char tempo4[HTS_URLMAXSIZE*2]; + char BIGSTK tempo4[HTS_URLMAXSIZE*2]; tempo4[0]='\0'; - + if (strnotempty(tempo_pat)) { HT_ADD("codebase=\"http://"); if (!opt->passprivacy) { - HT_ADD(adr); // Password + HT_ADD_HTMLESCAPED(adr); // Password } else { - HT_ADD(jump_identification(adr)); // No Password + HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password } if (*tempo_pat!='/') HT_ADD("/"); HT_ADD(tempo_pat); HT_ADD("\" "); } - + strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved)); HT_ADD(tempo4); // refresh code=" HT_ADD(tempo); @@ -2476,46 +2641,53 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } */ - else if (opt->mimehtml) { - char buff[HTS_URLMAXSIZE*3]; - HT_ADD("cid:"); - strcpybuff(buff, adr); - strcatbuff(buff, fil); - escape_in_url(buff); - { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } - HT_ADD(buff); - lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) - } - else if (opt->urlmode==3) { // URI absolue / - if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html - HT_ADD(fil); + else if (opt->mimehtml) { + char BIGSTK buff[HTS_URLMAXSIZE*3]; + HT_ADD("cid:"); + strcpybuff(buff, adr); + strcatbuff(buff, fil); + escape_in_url(buff); + { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } + HT_ADD_HTMLESCAPED(buff); + lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } - lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) - } - else if (opt->urlmode==2) { // RELATIF - char tempo[HTS_URLMAXSIZE*2]; + else if (opt->urlmode==3) { // URI absolue / + if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html + HT_ADD_HTMLESCAPED(fil); + } + lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) + } + else if (opt->urlmode==2) { // RELATIF + char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; // calculer le lien relatif - + if (lienrelatif(tempo,save,relativesavename)==0) { - if (!no_esc_utf) - escape_uri(tempo); // escape with %xx - else - escape_uri_utf(tempo); // escape with %xx + if (!in_media) { // In media (such as real audio): don't patch + if (!no_esc_utf) + escape_uri(tempo); // escape with %xx + else { + /* No escaping at all - remaining upper chars will be escaped below */ + /* FIXME - Should be done in all local cases */ + //x_escape_html(tempo); + //escape_uri_utf(tempo); // FIXME - escape with %xx + //escape_uri(tempo); // escape with %xx + } + } if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo); test_flush; } - + // lien applet (code) - il faut placer un codebase avant if (p_type==-1) { // que le nom de fichier - + if (p_flush) { - char tempo_pat[HTS_URLMAXSIZE*2]; + char BIGSTK tempo_pat[HTS_URLMAXSIZE*2]; tempo_pat[0]='\0'; { char* a=strrchr(tempo,'/'); - + // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class" // we have to do the contrary now if (add_class_dots_to_patch>0) { @@ -2532,43 +2704,44 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } } - + if (a) { - char tempo2[HTS_URLMAXSIZE*2]; + char BIGSTK tempo2[HTS_URLMAXSIZE*2]; strcpybuff(tempo2,a+1); strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin strcpybuff(tempo,tempo2); // fichier } } - + // érire codebase="chemin" if ((opt->getmode & 1) && (ptr>0)) { - char tempo4[HTS_URLMAXSIZE*2]; + char BIGSTK tempo4[HTS_URLMAXSIZE*2]; tempo4[0]='\0'; - + if (strnotempty(tempo_pat)) { HT_ADD("codebase=\""); - HT_ADD(tempo_pat); + HT_ADD_HTMLESCAPED(tempo_pat); HT_ADD("\" "); } - + strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved)); HT_ADD(tempo4); // refresh code=" } } //lastsaved=adr; // dernier écrit+1 } - + if ((opt->getmode & 1) && (ptr>0)) { // écrire le lien modifié, relatif - HT_ADD(tempo); - + // Note: escape all chars, even >127 (no UTF) + HT_ADD_HTMLESCAPED_FULL(tempo); + // Add query-string, for informational purpose only // Useless, because all parameters-pages are saved into different targets if (opt->includequery) { char* a=strchr(lien,'?'); if (a) { - HT_ADD(a); + HT_ADD_HTMLESCAPED(a); } } } @@ -2580,8 +2753,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } } // sinon le lien sera écrit normalement - - + + #if 0 if (fexist(save)) { // le fichier existe.. adr[0]='\0'; @@ -2592,7 +2765,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } #endif - + /* Security check */ if (strlen(save) >= HTS_URLMAXSIZE) { adr[0]='\0'; @@ -2601,7 +2774,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { test_flush; } } - + if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) { // si le fichier n'existe pas, ajouter à la liste // n'y a-t-il pas trop de liens? if (lien_tot+1 >= lien_max-4) { // trop de liens! @@ -2614,10 +2787,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } XH_uninit; // désallocation mémoire & buffers return -1; - + } else { // noter le lien sur la listes des liens à charger int pass_fix,dejafait=0; - + // Calculer la priorité de ce lien if ((opt->getmode & 4)==0) { // traiter html après pass_fix=0; @@ -2627,7 +2800,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { else pass_fix=max(0,numero_passe); // priorité normale } - + /* If the file seems to be an html file, get depth-1 */ /* if (strnotempty(save)) { @@ -2638,7 +2811,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } */ - + // vérifier que le lien n'a pas déja été noté // si c'est le cas, alors il faut s'assurer que la priorité associée // au fichier est la plus grande des deux priorités @@ -2653,9 +2826,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { strcmp(adr, liens[i]->adr) != 0 || strcmp(fil, liens[i]->fil) != 0 ) { - fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil); - test_flush; - } + fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil); + test_flush; + } } liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1); dejafait=1; @@ -2676,7 +2849,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } #endif - + // le lien n'a jamais été créé. // cette fois ci, on le crée! if (!dejafait) { @@ -2686,57 +2859,57 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // enregistrer lien à charger //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0'; // même adresse: l'objet père est l'objet père de l'actuel - + // DEBUT ROBOTS.TXT AJOUT if (!just_test_it) { if ( (!strfield(adr,"ftp://")) // non ftp && (!strfield(adr,"file://")) ) { // non file - if (opt->robots) { // récupérer robots - if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés - if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ? - checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide - if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ? - // enregistrer robots.txt (MACRO) - liens_record(adr,"/robots.txt","","",""); - if (liens[lien_tot]==NULL) { // erreur, pas de place réservée - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); - test_flush; - } - if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } - XH_uninit; // désallocation mémoire & buffers - return -1; - } - liens[lien_tot]->testmode=0; // pas mode test - liens[lien_tot]->link_import=0; // pas mode import - liens[lien_tot]->premier=lien_tot; - liens[lien_tot]->precedent=ptr; - liens[lien_tot]->depth=0; - liens[lien_tot]->pass2=max(0,numero_passe); - liens[lien_tot]->retry=0; - lien_tot++; // UN LIEN DE PLUS + if (opt->robots) { // récupérer robots + if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés + if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ? + checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide + if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ? + // enregistrer robots.txt (MACRO) + liens_record(adr,"/robots.txt","","",""); + if (liens[lien_tot]==NULL) { // erreur, pas de place réservée + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } + XH_uninit; // désallocation mémoire & buffers + return -1; + } + liens[lien_tot]->testmode=0; // pas mode test + liens[lien_tot]->link_import=0; // pas mode import + liens[lien_tot]->premier=lien_tot; + liens[lien_tot]->precedent=ptr; + liens[lien_tot]->depth=0; + liens[lien_tot]->pass2=max(0,numero_passe); + liens[lien_tot]->retry=0; + lien_tot++; // UN LIEN DE PLUS #if DEBUG_ROBOTS - printf("robots.txt: added file robots.txt for %s\n",adr); + printf("robots.txt: added file robots.txt for %s\n",adr); #endif - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr); - test_flush; - } - } else { - if (opt->errlog) { - fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__); - test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr); + test_flush; + } + } else { + if (opt->errlog) { + fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__); + test_flush; + } } } } } } - } } // FIN ROBOTS.TXT AJOUT - + // enregistrer (MACRO) liens_record(adr,fil,save,former_adr,former_fil); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée @@ -2749,7 +2922,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { XH_uninit; // désallocation mémoire & buffers return -1; } - + // mode test? if (!just_test_it) liens[lien_tot]->testmode=0; // pas mode test @@ -2765,7 +2938,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { else // sinon l'objet père est le précédent lui même liens[lien_tot]->premier=lien_tot; // liens[lien_tot]->premier=ptr; - + liens[lien_tot]->precedent=ptr; // noter la priorité if (!set_prio_to) @@ -2775,7 +2948,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // noter pass liens[lien_tot]->pass2=pass_fix; liens[lien_tot]->retry=opt->retry; - + //strcpybuff(liens[lien_tot]->adr,adr); //strcpybuff(liens[lien_tot]->fil,fil); //strcpybuff(liens[lien_tot]->sav,save); @@ -2787,185 +2960,203 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } test_flush; } - + lien_tot++; // UN LIEN DE PLUS } else { // if !dejafait if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save); test_flush; } - + } - - + + } // si pas trop de liens } // si adr[0]!='\0' - - + + } // if adr[0]!='\0' - + } // if adr[0]!='\0' - + } // if strlen(lien)>0 - + } // if ok==0 - - adr=eadr-1; // ** sauter - /* We skipped bytes and skip the " : reset state */ - if (inscript) { - inscript_state_pos = INSCRIPT_START; + assertf(eadr - adr >= 0); // Should not go back + if (eadr > adr) { + INCREMENT_CURRENT_ADR(eadr - 1 - adr); } + // adr=eadr-1; // ** sauter - } // if (p) - - } // si '<' ou '>' - - // plus loin - adr++; - - - /* Otimization: if we are scanning in HTML data (not in tag or script), - then jump to the next starting tag */ - if (ptr>0) { - if ( (!intag) /* Not in tag */ - && (!inscript) /* Not in (java)script */ - && (!incomment) /* Not in comment (<!--) */ - && (!inscript_tag) /* Not in tag with script inside */ - ) - { - /* Not at the end */ - if (( ((int) (adr - r->adr)) ) < r->size) { - /* Not on a starting tag yet */ - if (*adr != '<') { - /* strchr does not well behave with null chrs.. */ - /* char* adr_next = strchr(adr,'<'); */ - char* adr_next = adr; - while(*adr_next != '<' && (adr_next - r->adr) < r->size ) { - adr_next++; - } - /* Jump to near end (index hack) */ - if (!adr_next || *adr_next != '<') { - if ( - ( (int)(adr - r->adr) < (r->size - 4)) - && - (r->size > 4) - ) { + /* We skipped bytes and skip the " : reset state */ + /*if (inscript) { + inscript_state_pos = INSCRIPT_START; + }*/ + + } // if (p) + + } // si '<' ou '>' + + // plus loin + adr++; // automate will be checked next loop + + + /* Otimization: if we are scanning in HTML data (not in tag or script), + then jump to the next starting tag */ + if (ptr>0) { + if ( (!intag) /* Not in tag */ + && (!inscript) /* Not in (java)script */ + && (!in_media) /* Not in media */ + && (!incomment) /* Not in comment (<!--) */ + && (!inscript_tag) /* Not in tag with script inside */ + ) + { + /* Not at the end */ + if (( ((int) (adr - r->adr)) ) < r->size) { + /* Not on a starting tag yet */ + if (*adr != '<') { + /* strchr does not well behave with null chrs.. */ + /* char* adr_next = strchr(adr,'<'); */ + char* adr_next = adr; + while(*adr_next != '<' && (adr_next - r->adr) < r->size ) { + adr_next++; + } + /* Jump to near end (index hack) */ + if (!adr_next || *adr_next != '<') { + if ( + ( (int)(adr - r->adr) < (r->size - 4)) + && + (r->size > 4) + ) { adr = r->adr + r->size - 2; } - } else { - adr = adr_next; - } + } else { + adr = adr_next; } } } } - - // ---------- - // écrire peu à peu - if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR; - lastsaved=adr; // dernier écrit+1 - // ---------- - - // Checks - if (back_add_stats != opt->state.back_add_stats) { - back_add_stats = opt->state.back_add_stats; - - // Check max time - if (!back_checkmirror(opt)) { - adr = r->adr + r->size; - } + } + + // ---------- + // écrire peu à peu + if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR; + lastsaved=adr; // dernier écrit+1 + // ---------- + + // Checks + if (back_add_stats != opt->state.back_add_stats) { + back_add_stats = opt->state.back_add_stats; + + // Check max time + if (!back_checkmirror(opt)) { + adr = r->adr + r->size; } + } - // pour les stats du shell si parsing trop long + // pour les stats du shell si parsing trop long #if HTS_ANALYSTE - if (r->size) - _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size); - if (_hts_in_html_poll) { - _hts_in_html_poll=0; - // temps à attendre, et remplir autant que l'on peut le cache (backing) - back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); - back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(back,back_max); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - - if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); - test_flush; - } - *stre->exit_xh_=1; // exit requested - XH_uninit; - return -1; - //adr = r->adr + r->size; // exit - } else if (_hts_cancel==1) { - // adr = r->adr + r->size; // exit - nofollow=1; // moins violent - _hts_cancel=0; - } - } - - // refresh the backing system each 2 seconds - if (engine_stats()) { - back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); - back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + if (r->size) + _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size); + if (_hts_in_html_poll) { + _hts_in_html_poll=0; + // temps à attendre, et remplir autant que l'on peut le cache (backing) + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(back,back_max); + HTS_STAT.stat_errors=fspc(NULL,"error"); + HTS_STAT.stat_warnings=fspc(NULL,"warning"); + HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); + + if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + test_flush; + } + *stre->exit_xh_=1; // exit requested + XH_uninit; + return -1; + //adr = r->adr + r->size; // exit + } else if (_hts_cancel==1) { + // adr = r->adr + r->size; // exit + nofollow=1; // moins violent + _hts_cancel=0; } + } + + // refresh the backing system each 2 seconds + if (engine_stats()) { + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + } #endif - } while(( ((int) (adr - r->adr)) ) < r->size); + } while(( ((int) (adr - r->adr)) ) < r->size); #if HTS_ANALYSTE - _hts_in_html_parsing=0; // flag - _hts_cancel=0; // pas de cancel + _hts_in_html_parsing=0; // flag + _hts_cancel=0; // pas de cancel #endif - if ((opt->getmode & 1) && (ptr>0)) { - HT_ADD_END; // achever + if ((opt->getmode & 1) && (ptr>0)) { + { + char* cAddr = ht_buff; + int cSize = ht_len; + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: postprocess-html: %s%s"LF, urladr, urlfil); + } + if (hts_htmlcheck_postprocess(&cAddr, &cSize, urladr, urlfil) == 1) { + ht_buff = cAddr; + ht_len = cSize; + } } - // - // - // - } // if !error - - - if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } - // sauver fichier - //structcheck(savename); - //filesave(opt,r->adr,r->size,savename); - + + /* Flush and save to disk */ + HT_ADD_END; // achever + } + // + // + // + } // if !error + + + if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + // sauver fichier + //structcheck(savename); + //filesave(opt,r->adr,r->size,savename); + #if HTS_ANALYSTE - } // analyse OK + } // analyse OK #endif - /* Apply changes */ - ENGINE_SAVE_CONTEXT(); - - return 0; + /* Apply changes */ + ENGINE_SAVE_CONTEXT(); + + return 0; } /* - Check 301, 302, .. statuscodes (moved) +Check 301, 302, .. statuscodes (moved) */ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Load engine variables */ ENGINE_LOAD_CONTEXT(); - + // DEBUT rattrapage des 301,302,307.. // ------------------------------------------------------------ if (!error) { ////////{ // on a chargé un fichier en plus // if (!error) stat_loaded+=r.size; - + // ------------------------------------------------------------ // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing // ------------------------------------------------------------ @@ -2974,187 +3165,205 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) || (r->statuscode==303) || (r->statuscode==307) ) { - //if (r->adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi] - //int i=0; - char *rn=NULL; - // char* p; - - if ( (opt->debug>0) && (opt->errlog!=NULL) ) { - //if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil); - test_flush; - } - - - { - char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; - int get_it=0; // ne pas prendre le fichier à la même adresse par défaut - int reponse=0; - mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0'; - // - - strcpybuff(mov_url,r->location); - - // url qque -> adresse+fichier - if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) { - int set_prio_to=0; // pas de priotité fixéd par wizard - - //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue - // c'est (en gros) la même URL.. - // si c'est un problème de casse dans le host c'est que le serveur est buggé - // ("RFC says.." : host name IS case insensitive) - if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près - // on tourne en rond - if (strcmp(mov_fil,urlfil)==0) { - error=1; - get_it=-1; // ne rien faire - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil); - test_flush; + //if (r->adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi] + //int i=0; + char *rn=NULL; + // char* p; + + if ( (opt->debug>0) && (opt->errlog!=NULL) ) { + //if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil); + test_flush; + } + + + { + char BIGSTK mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; + int get_it=0; // ne pas prendre le fichier à la même adresse par défaut + int reponse=0; + mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0'; + // + + strcpybuff(mov_url,r->location); + + // url qque -> adresse+fichier + if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) { + int set_prio_to=0; // pas de priotité fixéd par wizard + + // check whether URLHack is harmless or not + if (opt->urlhack) { + char BIGSTK n_adr[HTS_URLMAXSIZE*2], n_fil[HTS_URLMAXSIZE*2]; + char BIGSTK pn_adr[HTS_URLMAXSIZE*2], pn_fil[HTS_URLMAXSIZE*2]; + n_adr[0] = n_fil[0] = '\0'; + (void) adr_normalized(mov_adr, n_adr); + (void) fil_normalized(mov_fil, n_fil); + (void) adr_normalized(urladr, pn_adr); + (void) fil_normalized(urlfil, pn_fil); + if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) { + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s"LF, urladr, urlfil, mov_adr, mov_fil); + test_flush; + } } - } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois - get_it=1; } - } else { // adresse différente - if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible) - // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil); - test_flush; - } - // accepté? - if (hts_acceptlink(opt,ptr,lien_tot,liens, - mov_adr,mov_fil, - &set_prio_to, - NULL) != 1) { /* nouvelle adresse non refusée ? */ + + //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue + // c'est (en gros) la même URL.. + // si c'est un problème de casse dans le host c'est que le serveur est buggé + // ("RFC says.." : host name IS case insensitive) + if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près + // on tourne en rond + if (strcmp(mov_fil,urlfil)==0) { + error=1; + get_it=-1; // ne rien faire + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil); + test_flush; + } + } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois get_it=1; + } + } else { // adresse différente + if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible) + // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil); + fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil); test_flush; } - } - } /* sinon traité normalement */ - } - - //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près - if (get_it==1) { - // court-circuiter le reste du traitement - // et reculer pour mieux sauter - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil); - test_flush; - } - // canceller lien actuel - error=1; - strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash) + // accepté? + if (hts_acceptlink(opt,ptr,lien_tot,liens, + mov_adr,mov_fil, + NULL, NULL, + &set_prio_to, + NULL) != 1) { /* nouvelle adresse non refusée ? */ + get_it=1; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil); + test_flush; + } + } + } /* sinon traité normalement */ + } + + //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près + if (get_it==1) { + // court-circuiter le reste du traitement + // et reculer pour mieux sauter + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil); + test_flush; + } + // canceller lien actuel + error=1; + strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash) #if HTS_HASH #else - liens[ptr]->sav_len=-1; // taille invalide + liens[ptr]->sav_len=-1; // taille invalide #endif - // noter NOUVEAU lien - //xxc xxc - // set_prio_to=0+1; // protection if the moved URL is an html page!! - //xxc xxc - { - char mov_sav[HTS_URLMAXSIZE*2]; - // calculer lien et éventuellement modifier addresse/fichier - if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { - if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja - // enregistrer lien (MACRO) avec SAV IDENTIQUE - liens_record(mov_adr,mov_fil,liens[ptr]->sav,"",""); - //liens_record(mov_adr,mov_fil,mov_sav,"",""); - if (liens[lien_tot]!=NULL) { // OK, pas d'erreur - // mode test? - liens[lien_tot]->testmode=liens[ptr]->testmode; - liens[lien_tot]->link_import=0; // mode normal - if (!set_prio_to) - liens[lien_tot]->depth=liens[ptr]->depth; - else - liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page) - liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); - liens[lien_tot]->retry=liens[ptr]->retry; - liens[lien_tot]->premier=liens[ptr]->premier; - liens[lien_tot]->precedent=liens[ptr]->precedent; - lien_tot++; - } else { // oups erreur, plus de mémoire!! - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + // noter NOUVEAU lien + //xxc xxc + // set_prio_to=0+1; // protection if the moved URL is an html page!! + //xxc xxc + { + char BIGSTK mov_sav[HTS_URLMAXSIZE*2]; + // calculer lien et éventuellement modifier addresse/fichier + if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { + if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja + // enregistrer lien (MACRO) avec SAV IDENTIQUE + liens_record(mov_adr,mov_fil,liens[ptr]->sav,"",""); + //liens_record(mov_adr,mov_fil,mov_sav,"",""); + if (liens[lien_tot]!=NULL) { // OK, pas d'erreur + // mode test? + liens[lien_tot]->testmode=liens[ptr]->testmode; + liens[lien_tot]->link_import=0; // mode normal + if (!set_prio_to) + liens[lien_tot]->depth=liens[ptr]->depth; + else + liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page) + liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); + liens[lien_tot]->retry=liens[ptr]->retry; + liens[lien_tot]->premier=liens[ptr]->premier; + liens[lien_tot]->precedent=liens[ptr]->precedent; + lien_tot++; + } else { // oups erreur, plus de mémoire!! + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + XH_uninit; // désallocation mémoire & buffers + return 0; + } + } else { + if ( (opt->debug>0) && (opt->errlog!=NULL) ) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil); test_flush; } - //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } - XH_uninit; // désallocation mémoire & buffers - return 0; - } - } else { - if ( (opt->debug>0) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil); - test_flush; } + } - } + + //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav); + + // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML + // sous DOS ca marche pas très bien... mais comme je suis génial url_savename() + // est à même de régler ce problème } - - //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav); - - // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML - // sous DOS ca marche pas très bien... mais comme je suis génial url_savename() - // est à même de régler ce problème - } - } // ident_url_xx - - if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie) - rn=(char*) calloct(8192,1); - if (rn!=NULL) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url); - test_flush; - } - if (!opt->mimehtml) { - escape_uri(mov_url); - } else { - char buff[HTS_URLMAXSIZE*3]; - strcpybuff(buff, mov_adr); - strcatbuff(buff, mov_fil); - escape_in_url(buff); - { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } - strcpybuff(mov_url, "cid:"); - strcatbuff(mov_url, buff); - } - // On prépare une page qui sautera immédiatement sur la bonne URL - // Le scanner re-changera, ensuite, cette URL, pour la mirrorer! - strcpybuff(rn,"<HTML>"CRLF); - strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); - strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF); - strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL="); - strcatbuff(rn,mov_url); // URL - strcatbuff(rn,"\">"CRLF); - strcatbuff(rn,"<A HREF=\""); - strcatbuff(rn,mov_url); - strcatbuff(rn,"\">"); - strcatbuff(rn,"<B>Click here...</B></A>"CRLF); - strcatbuff(rn,"</BODY>"CRLF); - strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); - strcatbuff(rn,"</HTML>"CRLF); - - // changer la page - if (r->adr) { - freet(r->adr); - r->adr=NULL; - } - r->adr=rn; - r->size=strlen(r->adr); - strcpybuff(r->contenttype,"text/html"); + } // ident_url_xx + + if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie) + rn=(char*) calloct(8192,1); + if (rn!=NULL) { + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url); + test_flush; + } + if (!opt->mimehtml) { + escape_uri(mov_url); + } else { + char BIGSTK buff[HTS_URLMAXSIZE*3]; + strcpybuff(buff, mov_adr); + strcatbuff(buff, mov_fil); + escape_in_url(buff); + { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } + strcpybuff(mov_url, "cid:"); + strcatbuff(mov_url, buff); + } + // On prépare une page qui sautera immédiatement sur la bonne URL + // Le scanner re-changera, ensuite, cette URL, pour la mirrorer! + strcpybuff(rn,"<HTML>"CRLF); + strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); + strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF); + strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL="); + strcatbuff(rn,mov_url); // URL + strcatbuff(rn,"\">"CRLF); + strcatbuff(rn,"<A HREF=\""); + strcatbuff(rn,mov_url); + strcatbuff(rn,"\">"); + strcatbuff(rn,"<B>Click here...</B></A>"CRLF); + strcatbuff(rn,"</BODY>"CRLF); + strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); + strcatbuff(rn,"</HTML>"CRLF); + + // changer la page + if (r->adr) { + freet(r->adr); + r->adr=NULL; } - } // get_it==0 - - } // bloc - // erreur HTTP (ex: 404, not found) - } else if ( - (r->statuscode==412) - || (r->statuscode==416) - ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier + r->adr=rn; + r->size=strlen(r->adr); + strcpybuff(r->contenttype, "text/html"); + } + } // get_it==0 + + } // bloc + // erreur HTTP (ex: 404, not found) + } else if ( + (r->statuscode==412) + || (r->statuscode==416) + ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier if (fexist(liens[ptr]->sav)) { remove(liens[ptr]->sav); // Eliminer if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..) @@ -3210,7 +3419,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) } } else if (r->statuscode!=200) { int can_retry=0; - + // cas où l'on peut reessayer // -2=timeout -3=rateout (interne à httrack) switch(r->statuscode) { @@ -3251,7 +3460,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) case 408: case 409: case 500: case 502: case 504: can_retry=1; break; } - + if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0) if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible) if (opt->errlog) { @@ -3278,7 +3487,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) } test_flush; } - + // NO error in trop level // due to the "no connection -> previous restored" hack // This prevent the engine from wiping all data if the website has been deleted (or moved) @@ -3290,19 +3499,19 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) } } else { if (strcmp(urlfil,"/robots.txt") != 0) { - /* - This is an error caused by a link entered by the user - That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.) - If all links entered are invalid, the session failed and we will attempt to restore - the previous one - Example: Try to update a website which has been deleted remotely: this may delete - the website locally, which is really not desired (especially if the website disappeared!) - With this hack, the engine won't wipe local files (how clever) + /* + This is an error caused by a link entered by the user + That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.) + If all links entered are invalid, the session failed and we will attempt to restore + the previous one + Example: Try to update a website which has been deleted remotely: this may delete + the website locally, which is really not desired (especially if the website disappeared!) + With this hack, the engine won't wipe local files (how clever) */ HTS_STAT.stat_errors_front++; } } - + } else { // retry!! if (opt->debug>0 && opt->errlog != NULL) { // on fera un alert si le retry échoue fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); @@ -3349,23 +3558,23 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) } // FIN rattrapage des 301,302,307.. // ------------------------------------------------------------ - - } // if !error - - - /* Apply changes */ - ENGINE_SAVE_CONTEXT(); - - return 0; - - + + } // if !error + + + /* Apply changes */ + ENGINE_SAVE_CONTEXT(); + + return 0; + + } /* - Wait for next file and - check 301, 302, .. statuscodes (moved) +Wait for next file and +check 301, 302, .. statuscodes (moved) */ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Load engine variables */ @@ -3373,15 +3582,15 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* /* */ int b; int n; - + #if BDEBUG==1 printf("\nBack test..\n"); #endif - + // pause/lock files { int do_pause=0; - + // user pause lockfile : create hts-paused.lock --> HTTrack will be paused if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) { // remove lockfile @@ -3390,14 +3599,14 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* do_pause=1; } } - + // after receving N bytes, pause if (opt->fragment>0) { if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) { do_pause=1; } } - + // pause? if (do_pause) { if ( (opt->debug>0) && (opt->log!=NULL) ) { @@ -3409,10 +3618,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* #if HTS_ANALYSTE { back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); - + // Transfer rate engine_stats(); - + // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(back,back_max); HTS_STAT.stat_errors=fspc(NULL,"error"); @@ -3420,18 +3629,18 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* HTS_STAT.stat_infos=fspc(NULL,"info"); HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - + b=0; if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT) || !back_checkmirror(opt)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); - test_flush; + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + test_flush; + } + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; } - *stre->exit_xh_=1; // exit requested - XH_uninit; - return 0; - } } #endif } @@ -3463,7 +3672,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* // } // end of pause/lock files - + #if HTS_ANALYSTE // changement dans les préférences /* @@ -3473,10 +3682,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* } */ if (_hts_addurl) { - char add_adr[HTS_URLMAXSIZE*2]; - char add_fil[HTS_URLMAXSIZE*2]; + char BIGSTK add_adr[HTS_URLMAXSIZE*2]; + char BIGSTK add_fil[HTS_URLMAXSIZE*2]; while(*_hts_addurl) { - char add_url[HTS_URLMAXSIZE*2]; + char BIGSTK add_url[HTS_URLMAXSIZE*2]; add_adr[0]=add_fil[0]=add_url[0]='\0'; if (!link_has_authority(*_hts_addurl)) strcpybuff(add_url,"http://"); // ajouter http:// @@ -3484,7 +3693,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* if (ident_url_absolute(add_url,add_adr,add_fil)>=0) { // ----Ajout---- // noter NOUVEAU lien - char add_sav[HTS_URLMAXSIZE*2]; + char BIGSTK add_sav[HTS_URLMAXSIZE*2]; // calculer lien et éventuellement modifier addresse/fichier if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { if (hash_read(hash,add_sav,"",0,0)<0) { // n'existe pas déja @@ -3520,7 +3729,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* test_flush; } } - + } } else { if (opt->errlog) { @@ -3535,16 +3744,18 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* _hts_addurl=NULL; // libérer _hts_addurl } // si une pause a été demandée - if (_hts_setpause) { + if (_hts_setpause || back_pluggable_sockets_strict(back, back_max, opt) <= 0) { // index du lien actuel int b=back_index(back,back_max,urladr,urlfil,savename); + int prev = _hts_in_html_parsing; if (b<0) b=0; // forcer pour les stats - while(_hts_setpause) { // on fait la pause.. + while(_hts_setpause || back_pluggable_sockets_strict(back, back_max, opt) <= 0) { // on fait la pause.. + _hts_in_html_parsing = 6; back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); - + // Transfer rate engine_stats(); - + // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(back,back_max); HTS_STAT.stat_errors=fspc(NULL,"error"); @@ -3552,7 +3763,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* HTS_STAT.stat_infos=fspc(NULL,"info"); HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - + if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { if (opt->errlog) { fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); @@ -3562,12 +3773,12 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* XH_uninit; return 0; } - if (back_nsoc(back,back_max)==0) - Sleep(250); // tite pause + Sleep(100); // pause } + _hts_in_html_parsing = prev; } #endif - + // si le fichier n'est pas en backing, le mettre.. if (!back_exist(back,back_max,urladr,urlfil,savename)) { #if BDEBUG==1 @@ -3582,382 +3793,385 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil); test_flush; } - + } } - + #if BDEBUG==1 printf("test number of socks\n"); #endif - + // ajouter autant de socket qu'on peut ajouter n=opt->maxsoc-back_nsoc(back,back_max); #if BDEBUG==1 printf("%d sockets available for backing\n",n); #endif - + #if HTS_ANALYSTE if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter #else - if (n>0) { // si sockets libre + if (n>0) { // si sockets libre #endif - // remplir autant que l'on peut le cache (backing) - back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); - } - - // index du lien actuel - /* - b=back_index(back,back_max,urladr,urlfil,savename); - - if (b>=0) - */ - { - // ------------------------------------------------------------ - // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE - do { - - // index du lien actuel - b=back_index(back,back_max,urladr,urlfil,savename); + // remplir autant que l'on peut le cache (backing) + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + } + + // index du lien actuel + /* + b=back_index(back,back_max,urladr,urlfil,savename); + + if (b>=0) + */ + { + // ------------------------------------------------------------ + // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE + do { + + // index du lien actuel + b=back_index(back,back_max,urladr,urlfil,savename); #if BDEBUG==1 - printf("back index %d, waiting\n",b); + printf("back index %d, waiting\n",b); #endif - // Continue to the loop if link still present - if (b<0) - continue; - - // Receive data - if (back[b].status>0) - back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); - - // Continue to the loop if link still present - b=back_index(back,back_max,urladr,urlfil,savename); - if (b<0) - continue; - - // Stop the mirror - if (!back_checkmirror(opt)) { - *stre->exit_xh_=1; // exit requested - XH_uninit; - return 0; - } - - // And fill the backing stack - if (back[b].status>0) - back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); - - // Continue to the loop if link still present - b=back_index(back,back_max,urladr,urlfil,savename); - if (b<0) - continue; - - // autres occupations de HTTrack: statistiques, boucle d'attente, etc. - if ((opt->makestat) || (opt->maketrack)) { - TStamp l=time_local(); - if ((int) (l-makestat_time) >= 60) { - if (makestat_fp != NULL) { - fspc(makestat_fp,"info"); - fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot); - fflush(makestat_fp); - *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV; - *stre->makestat_lnk_=lien_tot; - } - if (stre->maketrack_fp != NULL) { - int i; - fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF); - for(i=0;i<back_max;i++) { - back_info(back,i,3,stre->maketrack_fp); - } - fprintf(stre->maketrack_fp,LF); - + // Continue to the loop if link still present + if (b<0) + continue; + + // Receive data + if (back[b].status>0) + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + + // Continue to the loop if link still present + b=back_index(back,back_max,urladr,urlfil,savename); + if (b<0) + continue; + + // Stop the mirror + if (!back_checkmirror(opt)) { + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; + } + + // And fill the backing stack + if (back[b].status>0) + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + + // Continue to the loop if link still present + b=back_index(back,back_max,urladr,urlfil,savename); + if (b<0) + continue; + + // autres occupations de HTTrack: statistiques, boucle d'attente, etc. + if ((opt->makestat) || (opt->maketrack)) { + TStamp l=time_local(); + if ((int) (l-makestat_time) >= 60) { + if (makestat_fp != NULL) { + fspc(makestat_fp,"info"); + fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot); + fflush(makestat_fp); + *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV; + *stre->makestat_lnk_=lien_tot; + } + if (stre->maketrack_fp != NULL) { + int i; + fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF); + for(i=0;i<back_max;i++) { + back_info(back,i,3,stre->maketrack_fp); } - makestat_time=l; + fprintf(stre->maketrack_fp,LF); + fflush(stre->maketrack_fp); + } + makestat_time=l; } + } #if HTS_ANALYSTE + { + int i; { - int i; - { - char* s=hts_cancel_file(""); - if (strnotempty(s)) { // fichier à canceller - for(i=0;i<back_max;i++) { - if ((back[i].status>0)) { - if (strcmp(back[i].url_sav,s)==0) { // ok trouvé - if (back[i].status != 1000) { + char* s=hts_cancel_file(""); + if (strnotempty(s)) { // fichier à canceller + for(i=0;i<back_max;i++) { + if ((back[i].status>0)) { + if (strcmp(back[i].url_sav,s)==0) { // ok trouvé + if (back[i].status != 1000) { #if HTS_DEBUG_CLOSESOCK - DEBUG_W("user cancel: deletehttp\n"); + DEBUG_W("user cancel: deletehttp\n"); #endif - if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r); - back[i].r.soc=INVALID_SOCKET; - back[i].r.statuscode=-1; - strcpybuff(back[i].r.msg,"Cancelled by User"); - back[i].status=0; // terminé - } else // cancel ftp.. flag à 1 - back[i].stop_ftp = 1; - } + if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r); + back[i].r.soc=INVALID_SOCKET; + back[i].r.statuscode=-1; + strcpybuff(back[i].r.msg,"Cancelled by User"); + back[i].status=0; // terminé + } else // cancel ftp.. flag à 1 + back[i].stop_ftp = 1; } } - s[0]='\0'; } + s[0]='\0'; } - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(back,back_max); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - - if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); - test_flush; - } - *stre->exit_xh_=1; // exit requested - XH_uninit; - return 0; - } } - + + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(back,back_max); + HTS_STAT.stat_errors=fspc(NULL,"error"); + HTS_STAT.stat_warnings=fspc(NULL,"warning"); + HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); + + if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + test_flush; + } + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; + } + } + #endif #if HTS_POLL - if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) { - TStamp tl; - *stre->info_shell_=1; - - /* Toggle with ENTER */ - if (!opt->quiet) { - if (check_stdin()) { - char com[256]; - linput(stdin,com,200); - if (opt->verbosedisplay==2) - opt->verbosedisplay=1; - else - opt->verbosedisplay=2; - /* Info for wrappers */ - if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF); - } + if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) { + TStamp tl; + *stre->info_shell_=1; + + /* Toggle with ENTER */ + if (!opt->quiet) { + if (check_stdin()) { + char com[256]; + linput(stdin,com,200); + if (opt->verbosedisplay==2) + opt->verbosedisplay=1; + else + opt->verbosedisplay=2; + /* Info for wrappers */ + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF); + } #if HTS_ANALYSTE - hts_htmlcheck_chopt(opt); + hts_htmlcheck_chopt(opt); #endif - } } - - tl=time_local(); - - // générer un message d'infos sur l'état actuel - if (opt->shell) { // si shell - if ((tl-*stre->last_info_shell_)>0) { // toute les 1 sec - FILE* fp=stdout; - int a=0; - *stre->last_info_shell_=tl; - if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant - // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi) - // (libérons les robots esclaves de l'internet!) - remove(fconcat(opt->path_log,"hts-autopsy")); - fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb"); - a=1; + } + + tl=time_local(); + + // générer un message d'infos sur l'état actuel + if (opt->shell) { // si shell + if ((tl-*stre->last_info_shell_)>0) { // toute les 1 sec + FILE* fp=stdout; + int a=0; + *stre->last_info_shell_=tl; + if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant + // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi) + // (libérons les robots esclaves de l'internet!) + remove(fconcat(opt->path_log,"hts-autopsy")); + fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb"); + a=1; + } + if ((*stre->info_shell_) || a) { + int i,j; + + fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart)); + fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes); + fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); + fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max)); + fprintf(fp,"LINK %d"LF,lien_tot); + { + LLint mem=0; + for(i=0;i<back_max;i++) + if (back[i].r.adr!=NULL) + mem+=back[i].r.size; + fprintf(fp,"INMEM "LLintP""LF,(LLint)mem); } - if ((*stre->info_shell_) || a) { - int i,j; - - fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart)); - fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes); - fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); - fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max)); - fprintf(fp,"LINK %d"LF,lien_tot); - { - LLint mem=0; - for(i=0;i<back_max;i++) - if (back[i].r.adr!=NULL) - mem+=back[i].r.size; - fprintf(fp,"INMEM "LLintP""LF,(LLint)mem); + for(j=0;j<2;j++) { // passes pour ready et wait + for(i=0;i<back_max;i++) { + back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // ** } - for(j=0;j<2;j++) { // passes pour ready et wait - for(i=0;i<back_max;i++) { - back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // ** - } - } - fprintf(fp,LF); - if (a) - fclose(fp); - io_flush; } + fprintf(fp,LF); + if (a) + fclose(fp); + io_flush; } - } // si shell - - } // si shell ou keyboard (option) - // + } + } // si shell + + } // si shell ou keyboard (option) + // #endif - } while((b>=0) && (back[max(b,0)].status>0)); - - - // If link not found on the stack, it's because it has already been downloaded - // in background - // Then, skip it and go to the next one - if (b<0) { - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil); + } while((b>=0) && (back[max(b,0)].status>0)); + + + // If link not found on the stack, it's because it has already been downloaded + // in background + // Then, skip it and go to the next one + if (b<0) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil); + test_flush; + } + + // prochain lien + // ptr++; + + return 2; // goto jump_if_done; + + } +#if 0 + /* FIXME - finalized HAS NO MORE THIS MEANING */ + /* link put in cache by the backing system for memory spare - reclaim */ + else if (back[b].finalized) { + assertf(back[b].r.adr == NULL); + /* read file in cache */ + back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer); + /* ensure correct location buffer set */ + back[b].r.location=back[b].location_buffer; + if (back[b].r.statuscode == -1) { + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil); test_flush; } - - // prochain lien - // ptr++; - - return 2; // goto jump_if_done; - - } - /* link put in cache by the backing system for memory spare - reclaim */ - else if (back[b].finalized) { - assertf(back[b].r.adr == NULL); - /* read file in cache */ - back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer); - /* ensure correct location buffer set */ - back[b].r.location=back[b].location_buffer; - if (back[b].r.statuscode == -1) { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil); - test_flush; - } - } else { - if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush; - } + } else { + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush; } } - - + } +#endif + #if HTS_ANALYSTE==2 #else - //if (!opt->quiet) { // petite animation - if (!opt->verbosedisplay) { - if (!opt->quiet) { - static int roll=0; /* static: ok */ - roll=(roll+1)%4; - printf("%c\x0d",("/-\\|")[roll]); - fflush(stdout); - } - } else if (opt->verbosedisplay==1) { - if (back[b].r.statuscode==200) - printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size); - else - printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode); + //if (!opt->quiet) { // petite animation + if (!opt->verbosedisplay) { + if (!opt->quiet) { + static int roll=0; /* static: ok */ + roll=(roll+1)%4; + printf("%c\x0d",("/-\\|")[roll]); fflush(stdout); } - //} + } else if (opt->verbosedisplay==1) { + if (back[b].r.statuscode==200) + printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size); + else + printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode); + fflush(stdout); + } + //} #endif - // ------------------------------------------------------------ - // Vérificateur d'intégrité + // ------------------------------------------------------------ + // Vérificateur d'intégrité #if DEBUG_CHECKINT - _CHECKINT(&back[b],"Retour de back_wait, après le while") - { - int i; - for(i=0;i<back_max;i++) { - char si[256]; - sprintf(si,"Test global après back_wait, index %d",i); - _CHECKINT(&back[i],si) - } + _CHECKINT(&back[b],"Retour de back_wait, après le while") + { + int i; + for(i=0;i<back_max;i++) { + char si[256]; + sprintf(si,"Test global après back_wait, index %d",i); + _CHECKINT(&back[i],si) } + } #endif - - // copier structure réponse htsblk - memcpy(r, &(back[b].r), sizeof(htsblk)); - r->location=stre->loc_; // ne PAS copier location!! adresse, pas de buffer - if (back[b].r.location) - strcpybuff(r->location,back[b].r.location); - back[b].r.adr=NULL; // ne pas faire de desalloc ensuite - - // libérer emplacement backing - back_maydelete(opt,back,b); - - // progression + + // copier structure réponse htsblk + memcpy(r, &(back[b].r), sizeof(htsblk)); + r->location=stre->loc_; // ne PAS copier location!! adresse, pas de buffer + if (back[b].r.location) + strcpybuff(r->location,back[b].r.location); + back[b].r.adr=NULL; // ne pas faire de desalloc ensuite + + // libérer emplacement backing + back_maydelete(opt,cache,back,b); + + // progression #if 0 - if (opt->aff_progress) { - TStamp tl=time_local(); - if ((tl-HTS_STAT.stat_timestart)>0) { - char s[32]; - int i=0; - lastime=tl; - _CLRSCR; _GOTOXY("1","1"); - printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); - while(i<minimum(back_max,99)) { // ** - if (back[i].status>=0) { // loading.. - s[0]='\0'; - if (strlen(back[i].url_fil)>16) - strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16); - else - strncatbuff(s,back[i].url_fil,16); - printf("%s : ",s); - - printf("["); - if (back[i].r.totalsize>0) { - int p; - int j; - p=(int)((back[i].r.size*10)/back[i].r.totalsize); - p=minimum(10,p); - for(j=0;j<p;j++) printf("*"); - for(j=0;j<(10-p);j++) printf("-"); - } else { - printf(LLintP,(LLint)back[i].r.size); - } - printf("]"); - - //} else if (back[i].status==0) { - // strcpybuff(s,"ENDED"); - } - printf("\n"); - i++; - } - io_flush; + if (opt->aff_progress) { + TStamp tl=time_local(); + if ((tl-HTS_STAT.stat_timestart)>0) { + char s[32]; + int i=0; + lastime=tl; + _CLRSCR; _GOTOXY("1","1"); + printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); + while(i<minimum(back_max,99)) { // ** + if (back[i].status>=0) { // loading.. + s[0]='\0'; + if (strlen(back[i].url_fil)>16) + strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16); + else + strncatbuff(s,back[i].url_fil,16); + printf("%s : ",s); + + printf("["); + if (back[i].r.totalsize>0) { + int p; + int j; + p=(int)((back[i].r.size*10)/back[i].r.totalsize); + p=minimum(10,p); + for(j=0;j<p;j++) printf("*"); + for(j=0;j<(10-p);j++) printf("-"); + } else { + printf(LLintP,(LLint)back[i].r.size); + } + printf("]"); + + //} else if (back[i].status==0) { + // strcpybuff(s,"ENDED"); + } + printf("\n"); + i++; } + io_flush; } + } #endif - - // débug graphique + + // débug graphique #if BDEBUG==2 - { - char s[12]; - int i=0; - _GOTOXY(1,1); - printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); - while(i<minimum(back_max,160)) { - if (back[i].status>0) { - sprintf(s,"%d",back[i].r.size); - } else if (back[i].status==0) { - strcpybuff(s,"ENDED"); - } else - strcpybuff(s," - "); - while(strlen(s)<8) strcatbuff(s," "); - printf("%s",s); io_flush; - i++; - } + { + char s[12]; + int i=0; + _GOTOXY(1,1); + printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); + while(i<minimum(back_max,160)) { + if (back[i].status>0) { + sprintf(s,"%d",back[i].r.size); + } else if (back[i].status==0) { + strcpybuff(s,"ENDED"); + } else + strcpybuff(s," - "); + while(strlen(s)<8) strcatbuff(s," "); + printf("%s",s); io_flush; + i++; } + } #endif - - + + #if BDEBUG==1 - printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg); + printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg); #endif - - } - /*else { - #if BDEBUG==1 - printf("back index error\n"); - #endif - } - */ - - - - ENGINE_SAVE_CONTEXT(); - - return 0; - - + + } + /*else { + #if BDEBUG==1 + printf("back index error\n"); + #endif + } + */ + + + + ENGINE_SAVE_CONTEXT(); + + return 0; + + } diff --git a/src/htsparse.h b/src/htsparse.h index 4efc386..d36217c 100644 --- a/src/htsparse.h +++ b/src/htsparse.h @@ -37,7 +37,7 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ -typedef struct { +typedef struct htsmoduleStructExtended { /* Main object */ htsblk* r_; @@ -84,6 +84,9 @@ typedef struct { } htsmoduleStructExtended; +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE + /* Main parser, attempt to scan links inside the html/css/js file Parameters: The public module structure, and the private module variables @@ -105,4 +108,4 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre); */ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre); - +#endif diff --git a/src/htsrobots.c b/src/htsrobots.c index 58e97fb..5ca7640 100644 --- a/src/htsrobots.c +++ b/src/htsrobots.c @@ -35,15 +35,14 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE #include "htsrobots.h" /* specific definitions */ #include "htsbase.h" #include "htslib.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> /* END specific definitions */ diff --git a/src/htsrobots.h b/src/htsrobots.h index ef08183..195bbde 100644 --- a/src/htsrobots.h +++ b/src/htsrobots.h @@ -47,10 +47,11 @@ typedef struct robots_wizard { } robots_wizard; -// robots +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int checkrobots(robots_wizard* robots,char* adr,char* fil); void checkrobots_free(robots_wizard* robots); int checkrobots_set(robots_wizard* robots,char* adr,char* data); - +#endif #endif diff --git a/src/htsserver.c b/src/htsserver.c index 0408976..0906b39 100644 --- a/src/htsserver.c +++ b/src/htsserver.c @@ -37,7 +37,12 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ /* specific definitions */ -#include "htsbase.h" + +/* Bypass internal definition protection */ +#define HTS_INTERNAL_BYTECODE + #include "htsbase.h" +#undef HTS_INTERNAL_BYTECODE + #include "htsnet.h" #include "htslib.h" #include <stdio.h> @@ -64,7 +69,12 @@ Please visit our Website: http://www.httrack.com #include "httrack-library.h" /* Language files */ -#include "htsinthash.h" + +/* Bypass internal definition protection */ +#define HTS_INTERNAL_BYTECODE + #include "htsinthash.h" +#undef HTS_INTERNAL_BYTECODE + int NewLangStrSz=1024; inthash NewLangStr=NULL; int NewLangStrKeysSz=1024; @@ -73,7 +83,6 @@ int NewLangListSz=1024; inthash NewLangList=NULL; /* Language files */ - #include "htsserver.h" char* gethomedir(void); @@ -103,6 +112,15 @@ static void sig_brpipe( int code ) { /* ignore */ } +static int check_readinput_t(T_SOC soc, int timeout); +static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout); +static int linputsoc(T_SOC soc, char* s, int max); +static int check_readinput(htsblk* r); +static int linputsoc_t(T_SOC soc, char* s, int max, int timeout); + + +static int linput(FILE* fp,char* s,int max); + // URL Link catcher @@ -209,23 +227,8 @@ T_SOC smallserver_init(int* port,char* adr) { // 2 - Wait for URL -static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout) { - if (check_readinput_t(soc, timeout)) { - int n = 1; - size_t size = len; - size_t offs = 0; - while(n > 0 && size > 0) { - n = recv(soc, ((char*)buffer) + offs, (int) size, 0); - if (n > 0) { - offs += n; - size -= n; - } - } - return (int)offs; - } - return -1; -} +// check if data is available // smallserver // returns 0 if error @@ -241,20 +244,6 @@ typedef struct { char* value; } initStrElt; -int smallserver_setkey(char* key, char* value) { - return inthash_write(NewLangList, key, (unsigned long int)strdup(value)); -} -int smallserver_setkeyint(char* key, LLint value) { - char tmp[256]; - sprintf(tmp, LLintP, value); - return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp)); -} -int smallserver_setkeyarr(char* key, int id, char* key2, char* value) { - char tmp[256]; - sprintf(tmp, "%s%d%s", key, id, key2); - return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value)); -} - #define SET_ERROR(err) do { \ inthash_write(NewLangList, "error", (unsigned long int)strdup(err)); \ error_redirect = "/server/error.html"; \ @@ -269,6 +258,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { String headers = STRING_EMPTY; String output = STRING_EMPTY; String tmpbuff = STRING_EMPTY; + String tmpbuff2 = STRING_EMPTY; String fspath = STRING_EMPTY; /* Load strings */ @@ -283,7 +273,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { { char pth[1024]; char* initOn[] = { "parseall", "Cache", "ka", - "cookies", "parsejava", "testall", "updhack", "index", NULL }; + "cookies", "parsejava", "testall", "updhack", "urlhack", "index", NULL }; initIntElt initInt[] = { { "filter", 4 }, { "travel", 2 }, @@ -303,7 +293,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { }; initStrElt initStr[] = { { "user", "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" }, - { "footer", "<!-- Mirrored from %s%s by HTTrack Website Copier/3.x [XR&CO'2002], %s -->" }, + { "footer", "<!-- Mirrored from %s%s by HTTrack Website Copier/3.x [XR&CO'2005], %s -->" }, { "url2", "+*.png +*.gif +*.jpg +*.css +*.js -ad.doubleclick.net/*" }, { NULL, NULL } }; @@ -340,14 +330,16 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { line[0] = '\0'; buffer[0] = '\0'; - StringClear(&headers); - StringClear(&output); - StringClear(&tmpbuff); - StringClear(&fspath); - StringStrcat(&headers, ""); - StringStrcat(&output, ""); - StringStrcat(&tmpbuff, ""); - StringStrcat(&fspath, ""); + StringClear(headers); + StringClear(output); + StringClear(tmpbuff); + StringClear(tmpbuff2); + StringClear(fspath); + StringStrcat(headers, ""); + StringStrcat(output, ""); + StringStrcat(tmpbuff, ""); + StringStrcat(tmpbuff2, ""); + StringStrcat(fspath, ""); memset(&dummyaddr, 0, sizeof(dummyaddr)); /* UnLock */ @@ -473,8 +465,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { unsigned long int adr = 0; if (inthash_readptr(NewLangList, "lang", (long int *)&adr)) { int n = 0; - if (sscanf((char*)adr, "%d", &n) == 1 && n - 1 != LANG_T(path, -1)) { + if (sscanf((char*)adr, "%d", &n) == 1 && n > 0 && n - 1 != LANG_T(path, -1)) { LANG_T(path, n - 1); + /* make a backup, because the GUI will override it */ + inthash_write(NewLangList, "lang_", (unsigned long int)strdup((char*)adr)); } } @@ -487,24 +481,48 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { inthash_write(NewLangList, "loadprojname", (unsigned long int)NULL); doLoad=1; } - + else if (inthash_readptr(NewLangList, "loadprojcateg", (long int *)&adr)) { + char* pname = (char*) adr; + if (*pname) { + inthash_write(NewLangList, "projcateg", (unsigned long int)strdup(pname)); + } + inthash_write(NewLangList, "loadprojcateg", (unsigned long int)NULL); + } + + /* intial configuration */ + { + if (!inthash_read(NewLangList, "conf_file_loaded", NULL)) { + inthash_write(NewLangList, "conf_file_loaded", (unsigned long int)strdup("true")); + doLoad = 2; + } + } + /* path : <path>/<project> */ if (!commandRunning) { unsigned long int adrw = 0, adrpath = 0, adrprojname = 0; if (inthash_readptr(NewLangList, "path", (long int *)&adrpath) && inthash_readptr(NewLangList, "projname", (long int *)&adrprojname)) { - StringClear(&fspath); - StringStrcat(&fspath, (char*)adrpath); - StringStrcat(&fspath, "/"); - StringStrcat(&fspath, (char*)adrprojname); + StringClear(fspath); + StringStrcat(fspath, (char*)adrpath); + StringStrcat(fspath, "/"); + StringStrcat(fspath, (char*)adrprojname); } } /* Load existing project settings */ if (doLoad) { FILE* fp; - StringStrcat(&fspath, "/hts-cache/winprofile.ini"); - fp = fopen(StringBuff(&fspath), "rb"); + if (doLoad == 1) { + StringStrcat(fspath, "/hts-cache/winprofile.ini"); + } else if (doLoad == 2) { + StringStrcpy(fspath, gethomedir()); +#ifdef _WIN32 + StringStrcat(fspath, "/httrack.ini"); +#else + StringStrcat(fspath, "/.httrack.ini"); +#endif + } + fp = fopen(StringBuff(fspath), "rb"); if (fp) { /* Read file */ while(!feof(fp)) { @@ -579,15 +597,39 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { if (inthash_readptr(NewLangList, "command_do", (long int *)&adrcd)) { unsigned long int adrw = 0, adrpath = 0, adrprojname = 0; if (inthash_readptr(NewLangList, "winprofile", (long int *)&adrw)) { - StringClear(&tmpbuff); - StringStrcat(&tmpbuff, StringBuff(&fspath)); - StringStrcat(&tmpbuff, "/hts-cache/"); + + /* User general profile */ + unsigned long int adruserprofile = 0; + if (inthash_readptr(NewLangList, "userprofile", (long int *)&adruserprofile) + && adruserprofile != 0) { + int count = (int) strlen((char*)adruserprofile); + if (count > 0) { + FILE* fp; + StringClear(tmpbuff); + StringStrcpy(tmpbuff, gethomedir()); +#ifdef _WIN32 + StringStrcat(tmpbuff, "/httrack.ini"); +#else + StringStrcat(tmpbuff, "/.httrack.ini"); +#endif + fp = fopen(StringBuff(tmpbuff), "wb"); + if (fp != NULL) { + (void)((int)fwrite((void*)adruserprofile, 1, count, fp)); + fclose(fp); + } + } + } + + /* Profile */ + StringClear(tmpbuff); + StringStrcat(tmpbuff, StringBuff(fspath)); + StringStrcat(tmpbuff, "/hts-cache/"); /* Create minimal directory structure */ - if (!structcheck(StringBuff(&tmpbuff))) { + if (!structcheck(StringBuff(tmpbuff))) { FILE* fp; - StringStrcat(&tmpbuff, "winprofile.ini"); - fp = fopen(StringBuff(&tmpbuff), "wb"); + StringStrcat(tmpbuff, "winprofile.ini"); + fp = fopen(StringBuff(tmpbuff), "wb"); if (fp != NULL) { int count = (int) strlen((char*)adrw); if ((int)fwrite((void*)adrw, 1, count, fp) == count) { @@ -596,10 +638,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { even a bit annoying (duplicate/ghost options) The behaviour is exactly the same as in WinHTTrack */ - StringClear(&tmpbuff); - StringStrcat(&tmpbuff, StringBuff(&fspath)); - StringStrcat(&tmpbuff, "/hts-cache/doit.log"); - remove(StringBuff(&tmpbuff)); + StringClear(tmpbuff); + StringStrcat(tmpbuff, StringBuff(fspath)); + StringStrcat(tmpbuff, "/hts-cache/doit.log"); + remove(StringBuff(tmpbuff)); /* RUN THE SERVER @@ -612,18 +654,18 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } } else { char tmp[1024]; - sprintf(tmp, "Unable to write %d bytes in the the init file %s", count, StringBuff(&fspath)); + sprintf(tmp, "Unable to write %d bytes in the the init file %s", count, StringBuff(fspath)); SET_ERROR(tmp); } fclose(fp); } else { char tmp[1024]; - sprintf(tmp, "Unable to create the init file %s", StringBuff(&fspath)); + sprintf(tmp, "Unable to create the init file %s", StringBuff(fspath)); SET_ERROR(tmp); } } else { char tmp[1024]; - sprintf(tmp, "Unable to create the directory structure in %s", StringBuff(&fspath)); + sprintf(tmp, "Unable to create the directory structure in %s", StringBuff(fspath)); SET_ERROR(tmp); } @@ -732,22 +774,22 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { newfile = newadr; } } - StringMemcat(&headers, redir, strlen(redir)); + StringMemcat(headers, redir, strlen(redir)); { char tmp[256]; if (strlen(file) < sizeof(tmp) - 32) { sprintf(tmp, "Location: %s\r\n", newfile); - StringMemcat(&headers, tmp, strlen(tmp)); + StringMemcat(headers, tmp, strlen(tmp)); } } inthash_write(NewLangList, "redirect", (unsigned long int)NULL); } else if (is_html(file)) { int outputmode = 0; - StringMemcat(&headers, ok, sizeof(ok) - 1); + StringMemcat(headers, ok, sizeof(ok) - 1); while(!feof(fp)) { char* str = line; - int prevlen = StringLength(&output); + int prevlen = StringLength(output); int nocr = 0; if (!linput(fp, line, sizeof(line) - 2)) { *str = '\0'; @@ -828,7 +870,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } } else if (strcmp(name, "if-project-file-exists") == 0) { if (strstr(pos2, "..") == NULL) { - if (!fexist(fconcat(StringBuff(&fspath), pos2))) { + if (!fexist(fconcat(StringBuff(fspath), pos2))) { outputmode = -1; } } @@ -854,35 +896,17 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { unsigned long int adr = 0; if (inthash_readptr(NewLangList, "path", (long int *)&adr)) { char* rpath = (char*) adr; - find_handle h; + //find_handle h; if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') { rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */ } } - h = hts_findfirst(rpath); - if (h) { - struct topindex_chain * chain=NULL; - struct topindex_chain * startchain=NULL; - StringClear(&tmpbuff); - do { - if (hts_findisdir(h)) { - char iname[HTS_URLMAXSIZE*2]; - strcpybuff(iname,rpath); - strcatbuff(iname,"/"); - strcatbuff(iname,hts_findgetname(h)); - strcatbuff(iname,"/hts-cache/winprofile.ini"); - if (fexist(iname)) { - if (StringLength(&tmpbuff) > 0) { - StringStrcat(&tmpbuff, "\r\n"); - } - StringStrcat(&tmpbuff, hts_findgetname(h)); - } - - } - } while(hts_findnext(h)); - hts_findclose(h); - inthash_write(NewLangList, "winprofile", (unsigned long int)StringAcquire(&tmpbuff)); + { + char* profiles = hts_getcategories(rpath, 0); + char* categ = hts_getcategories(rpath,1 ); + inthash_write(NewLangList, "winprofile", (unsigned long int)profiles); + inthash_write(NewLangList, "wincateg", (unsigned long int)categ); } } } else if (strcmp(name, "copy") == 0) { @@ -1020,27 +1044,27 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { char c; if (sscanf(a+1, "%x", &n) == 1) { c = (char)n; - StringMemcat(&output, &c, 1); + StringMemcat(output, &c, 1); } a += 2; } else if (outputmode && a[0] == '<') { - StringStrcat(&output, "<"); + StringStrcat(output, "<"); } else if (outputmode && a[0] == '>') { - StringStrcat(&output, ">"); + StringStrcat(output, ">"); } else if (outputmode && a[0] == '&') { - StringStrcat(&output, "&"); + StringStrcat(output, "&"); } else if (outputmode == 3 && a[0] == ' ') { - StringStrcat(&output, "%20"); + StringStrcat(output, "%20"); } else if (outputmode >= 2 && ((unsigned char)a[0]) < 32) { char tmp[32]; sprintf(tmp, "%%%02x", (unsigned char)a[0]); - StringStrcat(&output, tmp); + StringStrcat(output, tmp); } else if (outputmode == 2 && a[0] == '%') { - StringStrcat(&output, "%%"); + StringStrcat(output, "%%"); } else if (outputmode == 3 && a[0] == '%') { - StringStrcat(&output, "%25"); + StringStrcat(output, "%25"); } else { - StringMemcat(&output, a, 1); + StringMemcat(output, a, 1); } a++; } @@ -1048,108 +1072,108 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { break; case 3: if (*langstr) { - StringStrcat(&output, "checked"); + StringStrcat(output, "checked"); } break; default: if (*langstr) { int id=1; char* fstr = langstr; - StringClear(&tmpbuff); + StringClear(tmpbuff); if (format == 2) { - StringStrcat(&output, "<option value=1>"); + StringStrcat(output, "<option value=1>"); } else if (format == -2) { - StringStrcat(&output, "<option value=\""); + StringStrcat(output, "<option value=\""); } while(*fstr) { switch(*fstr) { case 13: break; case 10: if (format == 1) { - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "<br>\r\n"); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "<br>\r\n"); } else if (format == -2) { - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "\">"); - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "</option>\r\n"); - StringStrcat(&output, "<option value=\""); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "\">"); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "</option>\r\n"); + StringStrcat(output, "<option value=\""); } else { char tmp[32]; sprintf(tmp, "%d", ++id); - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "</option>\r\n"); - StringStrcat(&output, "<option value="); - StringStrcat(&output, tmp); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "</option>\r\n"); + StringStrcat(output, "<option value="); + StringStrcat(output, tmp); if (listDefault == id) { - StringStrcat(&output, " selected"); + StringStrcat(output, " selected"); } - StringStrcat(&output, ">"); + StringStrcat(output, ">"); } - StringClear(&tmpbuff); + StringClear(tmpbuff); break; case '<': - StringStrcat(&tmpbuff, "<"); + StringStrcat(tmpbuff, "<"); break; case '>': - StringStrcat(&tmpbuff, ">"); + StringStrcat(tmpbuff, ">"); break; case '&': - StringStrcat(&tmpbuff, "&"); + StringStrcat(tmpbuff, "&"); break; default: - StringMemcat(&tmpbuff, fstr, 1); + StringMemcat(tmpbuff, fstr, 1); break; } fstr++; } if (format == 2) { - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "</option>"); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "</option>"); } else if (format == -2) { - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "\">"); - StringStrcat(&output, StringBuff(&tmpbuff)); - StringStrcat(&output, "</option>"); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "\">"); + StringStrcat(output, StringBuff(tmpbuff)); + StringStrcat(output, "</option>"); } else { - StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(output, StringBuff(tmpbuff)); } - StringClear(&tmpbuff); + StringClear(tmpbuff); } } } str = pos; } else { if (outputmode != -1) { - StringMemcat(&output, str, 1); + StringMemcat(output, str, 1); } } str++; } - if (!nocr && prevlen != StringLength(&output)) { - StringStrcat(&output, "\r\n"); + if (!nocr && prevlen != StringLength(output)) { + StringStrcat(output, "\r\n"); } } #ifdef _DEBUG { - int len = (int)strlen((char*)StringBuff(&output)); - assert(len == (int)StringLength(&output)); + int len = (int)strlen((char*)StringBuff(output)); + assert(len == (int)StringLength(output)); } #endif } else if (is_text(file)) { - StringMemcat(&headers, ok_text, sizeof(ok_text) - 1); + StringMemcat(headers, ok_text, sizeof(ok_text) - 1); while(!feof(fp)) { int n = fread(line, 1, sizeof(line) - 2, fp); if (n > 0) { - StringMemcat(&output, line, n); + StringMemcat(output, line, n); } } } else { - StringMemcat(&headers, ok_img, sizeof(ok_img) - 1); + StringMemcat(headers, ok_img, sizeof(ok_img) - 1); while(!feof(fp)) { int n = fread(line, 1, sizeof(line) - 2, fp); if (n > 0) { - StringMemcat(&output, line, n); + StringMemcat(output, line, n); } } } @@ -1160,8 +1184,8 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { "Content-type: text/html\r\n"; char error[] = "Page not found.\r\n"; - StringStrcat(&headers, error_hdr); - StringStrcat(&output, error); + StringStrcat(headers, error_hdr); + StringStrcat(output, error); //assert(file == NULL); } } @@ -1172,20 +1196,20 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { "Content-type: text/html\r\n"; char error[] = "Server error.\r\n"; - StringStrcat(&headers, error_hdr); - StringStrcat(&output, error); + StringStrcat(headers, error_hdr); + StringStrcat(output, error); #endif } { char tmp[256]; - sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(&output)); - StringStrcat(&headers, tmp); + sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(output)); + StringStrcat(headers, tmp); } - StringStrcat(&headers, "\r\n"); + StringStrcat(headers, "\r\n"); if ( - (send(soc_c, StringBuff(&headers), StringLength(&headers), 0) != StringLength(&headers)) + (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers)) || - ( (meth == 1) && (send(soc_c, StringBuff(&output), StringLength(&output), 0) != StringLength(&output)) ) + ( (meth == 1) && (send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output)) ) ) { #ifdef _DEBUG //assert(FALSE); @@ -1224,10 +1248,11 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { #endif } - StringFree(&headers); - StringFree(&output); - StringFree(&tmpbuff); - StringFree(&fspath); + StringFree(headers); + StringFree(output); + StringFree(tmpbuff); + StringFree(tmpbuff2); + StringFree(fspath); if (buffer) free(buffer); @@ -1270,7 +1295,21 @@ int htslang_uninit() { return 1; } -int htslang_load(char* limit_to, char* path) { +int smallserver_setkey(char* key, char* value) { + return inthash_write(NewLangList, key, (unsigned long int)strdup(value)); +} +int smallserver_setkeyint(char* key, LLint value) { + char tmp[256]; + sprintf(tmp, LLintP, value); + return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp)); +} +int smallserver_setkeyarr(char* key, int id, char* key2, char* value) { + char tmp[256]; + sprintf(tmp, "%s%d%s", key, id, key2); + return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value)); +} + +static int htslang_load(char* limit_to, char* path) { char* hashname; // int selected_lang=LANG_T(path, -1); @@ -1433,7 +1472,7 @@ int htslang_load(char* limit_to, char* path) { } /* NOTE : also contains the "webhttrack" hack */ -void conv_printf(char* from,char* to) { +static void conv_printf(char* from,char* to) { int i=0,j=0,len; len=strlen(from); while(i<len) { @@ -1475,13 +1514,13 @@ void conv_printf(char* from,char* to) { } } -void LANG_DELETE() { +static void LANG_DELETE() { inthash_delete(&NewLangStr); inthash_delete(&NewLangStrKeys); } // sélection de la langue -void LANG_INIT(char* path) { +static void LANG_INIT(char* path) { //CWinApp* pApp = AfxGetApp(); //if (pApp) { int test = 0; /* pApp->GetProfileInt("Language","IntId",0); */ @@ -1489,7 +1528,7 @@ void LANG_INIT(char* path) { //} } -int LANG_T(char* path, int l) { +static int LANG_T(char* path, int l) { if (l>=0) { QLANG_T(l); htslang_load(NULL, path); @@ -1497,7 +1536,7 @@ int LANG_T(char* path, int l) { return QLANG_T(-1); // 0=default (english) } -int LANG_SEARCH(char* path, char* iso) { +static int LANG_SEARCH(char* path, char* iso) { char lang_str[1024]; int i = 0; int curr_lng=LANG_T(path, -1); @@ -1516,7 +1555,7 @@ int LANG_SEARCH(char* path, char* iso) { return found; } -int LANG_LIST(char* path, char* buffer) { +static int LANG_LIST(char* path, char* buffer) { char lang_str[1024]; int i = 0; int curr_lng=LANG_T(path, -1); @@ -1537,7 +1576,7 @@ int LANG_LIST(char* path, char* buffer) { return i; } -int QLANG_T(int l) { +static int QLANG_T(int l) { static int lng=0; if (l>=0) { lng=l; @@ -1545,7 +1584,7 @@ int QLANG_T(int l) { return lng; // 0=default (english) } -char* LANGSEL(char* name) { +static char* LANGSEL(char* name) { unsigned long int adr = 0; if (NewLangStr) if (!inthash_read(NewLangStr,name,(long int *)&adr)) @@ -1556,7 +1595,7 @@ char* LANGSEL(char* name) { return ""; } -char* LANGINTKEY(char* name) { +static char* LANGINTKEY(char* name) { unsigned long int adr=0; if (NewLangStrKeys) if (!inthash_read(NewLangStrKeys,name,(long int *)&adr)) @@ -1567,123 +1606,48 @@ char* LANGINTKEY(char* name) { return ""; } -char* gethomedir(void) { - char* home = getenv( "HOME" ); - if (home) - return home; - else - return "."; -} -int linput_cpp(FILE* fp,char* s,int max) { - int rlen=0; - s[0]='\0'; - do { - int ret; - if (rlen>0) - if (s[rlen-1]=='\\') - s[--rlen]='\0'; // couper \ final - // lire ligne - ret=linput_trim(fp,s+rlen,max-rlen); - if (ret>0) - rlen+=ret; - } while((s[max(rlen-1,0)]=='\\') && (rlen<max)); - return rlen; -} -// copy of concat -typedef struct { - char buff[16][HTS_URLMAXSIZE*2*2]; - int rol; -} concat_strc; -char* concat(const char* a,const char* b) { - static concat_strc* strc = NULL; - if (strc == NULL) { - strc = (concat_strc*) calloc(16, sizeof(concat_strc)); - } - strc->rol=((strc->rol+1)%16); // roving pointer - strcpybuff(strc->buff[strc->rol],a); - if (b) strcatbuff(strc->buff[strc->rol],b); - return strc->buff[strc->rol]; -} -#ifdef _WIN32 -char* __fconv(char* a) { - int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='/') // convertir - a[i]='\\'; - return a; -} -char* fconcat(char* a,char* b) { - return __fconv(concat(a,b)); -} -char* fconv(char* a) { - return __fconv(concat(a,"")); -} -#endif /* *** Various functions *** */ -int fexist(char* s) { - struct stat st; - memset(&st, 0, sizeof(st)); - if (stat(s, &st) == 0) { - if (S_ISREG(st.st_mode)) { - return 1; - } - } - return 0; -} -int linput(FILE* fp,char* s,int max) { - int c; - int j=0; - do { - c=fgetc(fp); - if (c!=EOF) { - switch(c) { - case 13: break; // sauter CR - case 10: c=-1; break; - case 0: case 9: case 12: break; // sauter ces caractères - default: s[j++]=(char) c; break; - } - } - } while((c!=-1) && (c!=EOF) && (j<(max-1))); - s[j]='\0'; - return j; +static int check_readinput_t(T_SOC soc, int timeout) { + if (soc != INVALID_SOCKET) { + fd_set fds; // poll structures + struct timeval tv; // structure for select + FD_ZERO(&fds); + FD_SET(soc,&fds); + tv.tv_sec=timeout; + tv.tv_usec=0; + select(soc + 1,&fds,NULL,NULL,&tv); + if (FD_ISSET(soc,&fds)) + return 1; + else + return 0; + } else + return 0; } -int linput_trim(FILE* fp,char* s,int max) { - int rlen=0; - char* ls=(char*) malloct(max+2); - s[0]='\0'; - if (ls) { - char* a; - // lire ligne - rlen=linput(fp,ls,max); - if (rlen) { - // sauter espaces et tabs en fin - while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) ) - ls[--rlen]='\0'; - // sauter espaces en début - a=ls; - while((rlen>0) && ((*a==' ') || (*a=='\t'))) { - a++; - rlen--; - } - if (rlen>0) { - memcpy(s,a,rlen); // can copy \0 chars - s[rlen]='\0'; +static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout) { + if (check_readinput_t(soc, timeout)) { + int n = 1; + size_t size = len; + size_t offs = 0; + while(n > 0 && size > 0) { + n = recv(soc, ((char*)buffer) + offs, (int) size, 0); + if (n > 0) { + offs += n; + size -= n; } } - // - freet(ls); + return (int)offs; } - return rlen; + return -1; } -int linputsoc(T_SOC soc, char* s, int max) { +static int linputsoc(T_SOC soc, char* s, int max) { int c; int j=0; do { @@ -1706,15 +1670,8 @@ int linputsoc(T_SOC soc, char* s, int max) { return j; } -int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { - if (check_readinput_t(soc, timeout)) { - return linputsoc(soc, s, max); - } - return -1; -} - // check if data is available -int check_readinput(htsblk* r) { +static int check_readinput(htsblk* r) { if (r->soc != INVALID_SOCKET) { fd_set fds; // poll structures struct timeval tv; // structure for select @@ -1731,84 +1688,21 @@ int check_readinput(htsblk* r) { return 0; } -// check if data is available -int check_readinput_t(T_SOC soc, int timeout) { - if (soc != INVALID_SOCKET) { - fd_set fds; // poll structures - struct timeval tv; // structure for select - FD_ZERO(&fds); - FD_SET(soc,&fds); - tv.tv_sec=timeout; - tv.tv_usec=0; - select(soc + 1,&fds,NULL,NULL,&tv); - if (FD_ISSET(soc,&fds)) - return 1; - else - return 0; - } else - return 0; +static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { + if (check_readinput_t(soc, timeout)) { + return linputsoc(soc, s, max); + } + return -1; } -int strfield(const char* f,const char* s) { +/*int strfield(const char* f,const char* s) { int r=0; while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; } if (*s==0) return r; else return 0; -} - -int ehexh(char c) { - if ((c>='0') && (c<='9')) return c-'0'; - if ((c>='a') && (c<='f')) c-=('a'-'A'); - if ((c>='A') && (c<='F')) return (c-'A'+10); - return 0; -} - -int ehex(char* s) { - return 16*ehexh(*s)+ehexh(*(s+1)); -} - -void unescapehttp(char* s, String* tempo) { - int i; - for (i=0;i<(int) strlen(s);i++) { - if (s[i]=='%' && s[i+1]=='%') { - i++; - StringAddchar(tempo, '%'); - } else if (s[i]=='%') { - char hc; - i++; - hc = (char) ehex(s+i); - StringAddchar(tempo, (char) hc); - i++; // sauter 2 caractères finalement - } - else if (s[i]=='+') { - StringAddchar(tempo, ' '); - } - else - StringAddchar(tempo, s[i]); - } -} +}*/ /* same, except + */ -void unescapeini(char* s, String* tempo) { - int i; - char lastc=0; - for (i=0;i<(int) strlen(s);i++) { - if (s[i]=='%' && s[i+1]=='%') { - i++; - StringAddchar(tempo, lastc = '%'); - } else if (s[i]=='%') { - char hc; - i++; - hc = (char) ehex(s+i); - if (!is_retorsep(hc) || !is_retorsep(lastc)) { - StringAddchar(tempo, lastc = (char) hc); - } - i++; // sauter 2 caractères finalement - } - else - StringAddchar(tempo, lastc = s[i]); - } -} diff --git a/src/htsserver.h b/src/htsserver.h index 2818b34..acac908 100644 --- a/src/htsserver.h +++ b/src/htsserver.h @@ -41,6 +41,10 @@ Please visit our Website: http://www.httrack.com #include "htsbasenet.h" +/* String */ +#include "htsstrings.h" + + // Fonctions void socinput(T_SOC soc,char* s,int max); T_SOC smallserver_init_std(int* port_prox,char* adr_prox); @@ -70,79 +74,186 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path); "<!-- Generated by HTTrack Website Copier -->\r\n"\ "\r\n"\ +extern int NewLangStrSz; +extern inthash NewLangStr; +extern int NewLangStrKeysSz; +extern inthash NewLangStrKeys; +extern int NewLangListSz; +extern inthash NewLangList; -/* String */ - -typedef struct { - char* buff; - int len; - int capa; -} String; - -#define STRING_EMPTY {NULL, 0, 0} -#define BLK_SIZE 8192 -#define StringBuff(blk) ((blk)->buff) -#define StringLength(blk) ((blk)->len) -#define StringCapacity(blk) ((blk)->capa) -#define StringClear(blk) do { \ - if ((blk)->capa > 0) { \ - (blk)->buff[0] = '\0'; \ - }\ - (blk)->len = 0; \ -} while(0) -#define StringFree(blk) do { if ((blk)->buff != NULL) { freet((blk)->buff); (blk)->buff = NULL; } } while(0) -#define StringMemcat(blk, str, size) do { \ - if ((blk)->len + (int)(size) + 1 > (blk)->capa) { \ - (blk)->capa = (blk)->len + (size) + BLK_SIZE; \ - (blk)->buff = (char*) realloct((blk)->buff, (blk)->capa); \ - assertf((blk)->buff != NULL); \ - } \ - if ((int)(size) > 0) { \ - memcpy((blk)->buff + (blk)->len, (str), (size)); \ - (blk)->len += (size); \ - } \ - *((blk)->buff + (blk)->len) = '\0'; \ -} while(0) -#define StringAddchar(blk, c) do { \ - char __c = (c); \ - StringMemcat(blk, &__c, 1); \ -} while(0) -static void* StringAcquire(String* blk) { - void* buff = blk->buff; - blk->buff = NULL; - blk->capa = 0; - blk->len = 0; - return buff; -} +/* Spaces: CR,LF,TAB,FF */ +#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) +#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) ) +#define is_taborspace(c) ( ((c)==' ') || ((c)==9) ) +#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') ) +#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) ) -static void StringStrcat(String* blk, char* str) { - StringMemcat(blk, str, strlen(str)); -} +extern int smallserver_setkey(char* key, char* value); +extern int smallserver_setkeyint(char* key, LLint value); +extern int smallserver_setkeyarr(char* key, int id, char* key2, char* value); /* Language files */ -int htslang_load(char* limit_to, char* apppath); -void conv_printf(char* from,char* to); -void LANG_DELETE(void); -void LANG_INIT(char* path); -int LANG_T(char* path, int l); -int QLANG_T(int l); -char* LANGSEL(char* name); -char* LANGINTKEY(char* name); -int LANG_SEARCH(char* path, char* iso); -int LANG_LIST(char* path, char* buffer); +static int htslang_load(char* limit_to, char* apppath); +static void conv_printf(char* from,char* to); +static void LANG_DELETE(void); +static void LANG_INIT(char* path); +static int LANG_T(char* path, int l); +static int QLANG_T(int l); +static char* LANGSEL(char* name); +static char* LANGINTKEY(char* name); +static int LANG_SEARCH(char* path, char* iso); +static int LANG_LIST(char* path, char* buffer); int htslang_init(void); int htslang_uninit(void); -int linput_cpp(FILE* fp,char* s,int max); -void unescapehttp(char* s, String* tempo); -void unescapeini(char* s, String* tempo); +/* Static definitions */ + +static char* gethomedir(void); +static int linput_cpp(FILE* fp,char* s,int max); +static int linput_trim(FILE* fp,char* s,int max); +static char* concat(const char* a,const char* b); +static int fexist(char* s); +static int linput(FILE* fp,char* s,int max); +static int linputsoc_t(T_SOC soc, char* s, int max, int timeout); + +static char* gethomedir(void) { + char* home = getenv( "HOME" ); + if (home) + return home; + else + return "."; +} +static int linput_cpp(FILE* fp,char* s,int max) { + int rlen=0; + s[0]='\0'; + do { + int ret; + if (rlen>0) + if (s[rlen-1]=='\\') + s[--rlen]='\0'; // couper \ final + // lire ligne + ret=linput_trim(fp,s+rlen,max-rlen); + if (ret>0) + rlen+=ret; + } while((s[max(rlen-1,0)]=='\\') && (rlen<max)); + return rlen; +} +// copy of concat +typedef struct concat_strc { + char buff[16][HTS_URLMAXSIZE*2*2]; + int rol; +} concat_strc; +static char* concat(const char* a,const char* b) { + static concat_strc* strc = NULL; + if (strc == NULL) { + strc = (concat_strc*) calloc(16, sizeof(concat_strc)); + } + strc->rol=((strc->rol+1)%16); // roving pointer + strcpybuff(strc->buff[strc->rol],a); + if (b) strcatbuff(strc->buff[strc->rol],b); + return strc->buff[strc->rol]; +} + +static int fexist(char* s) { + struct stat st; + memset(&st, 0, sizeof(st)); + if (stat(s, &st) == 0) { + if (S_ISREG(st.st_mode)) { + return 1; + } + } + return 0; +} +static int linput(FILE* fp,char* s,int max) { + int c; + int j=0; + do { + c=fgetc(fp); + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 0: case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} +static int linput_trim(FILE* fp,char* s,int max) { + int rlen=0; + char* ls=(char*) malloct(max+2); + s[0]='\0'; + if (ls) { + char* a; + // lire ligne + rlen=linput(fp,ls,max); + if (rlen) { + // sauter espaces et tabs en fin + while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) ) + ls[--rlen]='\0'; + // sauter espaces en début + a=ls; + while((rlen>0) && ((*a==' ') || (*a=='\t'))) { + a++; + rlen--; + } + if (rlen>0) { + memcpy(s,a,rlen); // can copy \0 chars + s[rlen]='\0'; + } + } + // + freet(ls); + } + return rlen; +} + +static void unescapeini(char* s, String* tempo) { + int i; + char lastc=0; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(*tempo, lastc = '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + if (!is_retorsep(hc) || !is_retorsep(lastc)) { + StringAddchar(*tempo, lastc = (char) hc); + } + i++; // sauter 2 caractères finalement + } + else + StringAddchar(*tempo, lastc = s[i]); + } +} + +#ifndef _WIN32 +#define fconv(a) (a) +#define fconcat(a,b) concat(a,b) +#endif + +#ifdef _WIN32 +static char* __fconv(char* a) { + int i; + for(i=0;i<(int) strlen(a);i++) + if (a[i]=='/') // convertir + a[i]='\\'; + return a; +} +static char* fconcat(char* a,char* b) { + return __fconv(concat(a,b)); +} +static char* fconv(char* a) { + return __fconv(concat(a,"")); +} +#endif -int smallserver_setkey(char* key, char* value); -int smallserver_setkeyint(char* key, LLint value); -int smallserver_setkeyarr(char* key, int id, char* key2, char* value); - #endif diff --git a/src/htsstrings.h b/src/htsstrings.h new file mode 100755 index 0000000..0c4998a --- /dev/null +++ b/src/htsstrings.h @@ -0,0 +1,138 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: Strings */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +// Strings a bit safer than static buffers + +#ifndef HTS_STRINGS_DEFSTATIC +#define HTS_STRINGS_DEFSTATIC + +typedef struct String { + char* buff; + int len; + int capa; +} String; + +#define STRING_EMPTY {NULL, 0, 0} +#define STRING_BLK_SIZE 256 +#define StringBuff(blk) ((blk).buff) +#define StringLength(blk) ((blk).len) +#define StringCapacity(blk) ((blk).capa) +#define StringRoom(blk, size) do { \ + if ((blk).len + (int)(size) + 1 > (blk).capa) { \ + (blk).capa = ((blk).len + (size) + 1) * 2; \ + (blk).buff = (char*) realloct((blk).buff, (blk).capa); \ + assertf((blk).buff != NULL); \ + } \ +} while(0) +#define StringBuffN(blk, size) StringBuffN_(&(blk), size) +static char* StringBuffN_(String* blk, int size) { + StringRoom(*blk, (blk->len) + size); + return StringBuff(*blk); +} +#define StringClear(blk) do { \ + StringRoom(blk, 0); \ + (blk).buff[0] = '\0'; \ + (blk).len = 0; \ +} while(0) +#define StringFree(blk) do { \ + if ((blk).buff != NULL) { \ + freet((blk).buff); \ + (blk).buff = NULL; \ + } \ + (blk).capa = 0; \ + (blk).len = 0; \ +} while(0) +#define StringMemcat(blk, str, size) do { \ + StringRoom(blk, size); \ + if ((int)(size) > 0) { \ + memcpy((blk).buff + (blk).len, (str), (size)); \ + (blk).len += (size); \ + } \ + *((blk).buff + (blk).len) = '\0'; \ +} while(0) +#define StringAddchar(blk, c) do { \ + char __c = (c); \ + StringMemcat(blk, &__c, 1); \ +} while(0) +static void* StringAcquire(String* blk) { + void* buff = blk->buff; + blk->buff = NULL; + blk->capa = 0; + blk->len = 0; + return buff; +} +#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? strlen(str) : 0) +#define StringStrcpy(blk, str) do { \ + StringClear(blk); \ + StringStrcat(blk, str); \ +} while(0) + +/* Tools */ + +static int ehexh(char c) { + if ((c>='0') && (c<='9')) return c-'0'; + if ((c>='a') && (c<='f')) c-=('a'-'A'); + if ((c>='A') && (c<='F')) return (c-'A'+10); + return 0; +} + +static int ehex(char* s) { + return 16*ehexh(*s)+ehexh(*(s+1)); +} + +static void unescapehttp(char* s, String* tempo) { + int i; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(*tempo, '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + StringAddchar(*tempo, (char) hc); + i++; // sauter 2 caractères finalement + } + else if (s[i]=='+') { + StringAddchar(*tempo, ' '); + } + else + StringAddchar(*tempo, s[i]); + } +} + + +#endif diff --git a/src/htsthread.c b/src/htsthread.c index d403730..a766a40 100644 --- a/src/htsthread.c +++ b/src/htsthread.c @@ -34,15 +34,115 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE #include "htsglobal.h" +#include "htsbase.h" #include "htsthread.h" +#if USE_BEGINTHREAD +#if HTS_WIN +#include <process.h> +#endif +#endif + +static int process_chain = 0; +static PTHREAD_LOCK_TYPE process_chain_mutex; + +HTSEXT_API void htsthread_wait(void ) { + htsthread_wait_n(0); +} + +HTSEXT_API void htsthread_wait_n(int n_wait) { +#if USE_BEGINTHREAD + int wait = 0; + do { + htsSetLock(&process_chain_mutex, 1); + wait = (process_chain > n_wait ); + htsSetLock(&process_chain_mutex, 0); + if (wait) + Sleep(100); + } while(wait); +#endif +} + +HTSEXT_API void htsthread_init(void ) { +#if USE_BEGINTHREAD + assertf(process_chain == 0); + htsSetLock(&process_chain_mutex, -999); +#endif +} + +HTSEXT_API void htsthread_uninit(void ) { + htsthread_wait(); +#if USE_BEGINTHREAD + htsSetLock(&process_chain_mutex, -998); +#endif +} + +typedef struct { + PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ); + void** arglist; +} execth_args; +static PTHREAD_TYPE PTHREAD_TYPE_FNC execth( void * arg ) +{ + execth_args* args = (execth_args*) arg; + assertf(args != NULL); + + htsSetLock(&process_chain_mutex, 1); + process_chain++; + assertf(process_chain > 0); + htsSetLock(&process_chain_mutex, 0); + + (void) args->start_address(args->arglist); + + htsSetLock(&process_chain_mutex, 1); + process_chain--; + assertf(process_chain >= 0); + htsSetLock(&process_chain_mutex, 0); + + free(arg); + return PTHREAD_RETURN; +} + + +HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ), unsigned stack_size, void *arglist ) +{ + execth_args* args = (execth_args*) malloc(sizeof(execth_args)); + assertf(args != NULL); + args->start_address = start_address; + args->arglist = arglist; + + /* create a thread */ +#ifdef _WIN32 + if (_beginthread(execth, stack_size, args) == -1) { + free(args); + return -1; + } +#else + { + PTHREAD_HANDLE handle = 0; + int retcode; + retcode = pthread_create(&handle, NULL, execth, args); + if (retcode != 0) { /* error */ + free(args); + return -1; + } else { + /* detach the thread from the main process so that is can be independent */ + pthread_detach(handle); + } + } +#endif + return 0; +} + + // Threads - emulate _beginthread under Linux/Unix using pthread_XX // Some changes will have to be done, see PTHREAD_RETURN,PTHREAD_TYPE #if USE_PTHREAD #include <pthread.h> /* _beginthread, _endthread */ - +#if 0 unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist ) { pthread_t th; @@ -56,6 +156,7 @@ unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_s return 0; } #endif +#endif #if USE_BEGINTHREAD /* @@ -67,28 +168,49 @@ unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_s 0 unlock the mutex [-1 check if locked (always return 0 with mutex)] -999 initialize -*/ + -998 free + */ HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) { #if HTS_WIN /* lock */ - if (lock==1) - WaitForSingleObject(*hMutex,INFINITE); - /* unlock */ - else if (lock==0) - ReleaseMutex(*hMutex); - /* create */ - else if (lock==-999) - *hMutex=CreateMutex(NULL,FALSE,NULL); + switch(lock) { + case 1: /* lock */ + assertf(*hMutex != NULL); + WaitForSingleObject(*hMutex,INFINITE); + break; + case 0: /* unlock */ + assertf(*hMutex != NULL); + ReleaseMutex(*hMutex); + break; + case -999: /* create */ + *hMutex=CreateMutex(NULL,FALSE,NULL); + break; + case -998: /* destroy */ + CloseHandle(*hMutex); + *hMutex = NULL; + break; + default: + assert(FALSE); + break; + } #else - /* lock */ - if (lock==1) - pthread_mutex_lock(hMutex); - /* unlock */ - else if (lock==0) - pthread_mutex_unlock(hMutex); - /* create */ - else if (lock==-999) - pthread_mutex_init(hMutex,0); + switch(lock) { + case 1: /* lock */ + pthread_mutex_lock(hMutex); + break; + case 0: /* unlock */ + pthread_mutex_unlock(hMutex); + break; + case -999: /* create */ + pthread_mutex_init(hMutex,0); + break; + case -998: /* destroy */ + pthread_mutex_destroy(hMutex); + break; + default: + assert(0); + break; + } #endif return 0; } diff --git a/src/htsthread.h b/src/htsthread.h index 326c8cb..cac48de 100644 --- a/src/htsthread.h +++ b/src/htsthread.h @@ -43,14 +43,23 @@ Please visit our Website: http://www.httrack.com #endif #if HTS_WIN #include "windows.h" +#ifdef _WIN32_WCE +#ifndef HTS_CECOMPAT +#include "cethread.h" +#endif +#endif #endif #if USE_BEGINTHREAD #if HTS_WIN #define PTHREAD_RETURN -#define PTHREAD_TYPE void __cdecl +#define PTHREAD_TYPE void +#define PTHREAD_TYPE_FNC __cdecl #define PTHREAD_LOCK_TYPE HANDLE +#define PTHREAD_HANDLE HANDLE +#define PTHREAD_WAIT_THREAD(A) do { WaitForSingleObject(A, INFINITE); CloseHandle(A); } while(0) + /* Useless - see '__declspec( thread )' */ /* @@ -65,12 +74,15 @@ Please visit our Website: http://www.httrack.com #define PTHREAD_RETURN NULL #define PTHREAD_TYPE void* +#define PTHREAD_TYPE_FNC #define PTHREAD_LOCK_TYPE pthread_mutex_t #define PTHREAD_KEY_TYPE pthread_key_t #define PTHREAD_KEY_CREATE(ptrkey, uninit) pthread_key_create(ptrkey, uninit) #define PTHREAD_KEY_DELETE(key) pthread_key_delete(key) #define PTHREAD_KEY_SET(key, val, ptrtype) pthread_setspecific(key, (void*)val) #define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)pthread_getspecific(key); } while(0) +#define PTHREAD_HANDLE pthread_t +#define PTHREAD_WAIT_THREAD(A) do { pthread_join(A, NULL); CloseHandle(A); } while(0) #endif @@ -82,13 +94,24 @@ Please visit our Website: http://www.httrack.com #define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0) #define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0) #define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0) +#define PTHREAD_HANDLE void #endif +/* Library internal definictions */ +HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ), unsigned stack_size, void *arglist ); +HTSEXT_API void htsthread_wait(void ); +HTSEXT_API void htsthread_wait_n(int n_wait); + +#ifdef HTS_INTERNAL_BYTECODE HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock); +HTSEXT_API void htsthread_init(void ); +HTSEXT_API void htsthread_uninit(void ); #if USE_PTHREAD -unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist ); +// unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist ); + +#endif #endif #endif diff --git a/src/htstools.c b/src/htstools.c index 44e5137..389bd3a 100644 --- a/src/htstools.c +++ b/src/htstools.c @@ -35,14 +35,16 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htstools.h" /* specific definitions */ #include "htsbase.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include <ctype.h> +/* String */ +#include "htsstrings.h" /* END specific definitions */ @@ -90,10 +92,14 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha ok=-2; // non supporté } #if HTS_USEOPENSSL - } else if (SSL_is_available && strfield(lien,"https://")) { - // Note: ftp:foobar.gif is not valid - if (ident_url_absolute(lien,adr,fil)==-1) { - ok=-1; // erreur URL + } else if (strfield(lien,"https://")) { + if (SSL_is_available) { + // Note: ftp:foobar.gif is not valid + if (ident_url_absolute(lien,adr,fil)==-1) { + ok=-1; // erreur URL + } + } else { + ok=-1; } #endif } else if ((scheme) && ( @@ -190,8 +196,8 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha // créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu) // un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif int lienrelatif(char* s,char* link,char* curr_fil) { - char _curr[HTS_URLMAXSIZE*2]; - char newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2]; + char BIGSTK _curr[HTS_URLMAXSIZE*2]; + char BIGSTK newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2]; char* curr; //int n=0; char* a; @@ -325,7 +331,7 @@ void longfile_to_83(int mode,char* n83,char* save) { max=8; break; case 2: - max=30; + max=31; break; default: max=8; @@ -348,21 +354,20 @@ void longfile_to_83(int mode,char* n83,char* save) { } } /* - Avoid: (ISO9660, but also suitable for 8-3) - (Thanks to jonat@cellcast.com for te hint) - /:;?\#*~ - 0x00-0x1f and 0x80-0xff + Avoid: (ISO9660, but also suitable for 8-3) + (Thanks to jonat@cellcast.com for te hint) + /:;?\#*~ + 0x00-0x1f and 0x80-0xff */ - for(i=0 ; i < (int) strlen(save) ; i++) { - if ( - (strchr("/:;?\\#*~", save[i])) - || - (save[i] < 32) - || - (save[i] >= 127) - ) { - save[i]='_'; + for(i = 0 ; save[i] != 0 ; i++) { + char a = save[i]; + if (a >= 'a' && a <= 'z') { + a -= 'a' - 'A'; } + else if ( ! ( (a >= 'A' && a <= 'Z') || (a >= '0' && a <= '9') || a == '_' || a == '.') ) { + a = '_'; + } + save[i] = a; } i=j=0; @@ -475,6 +480,20 @@ HTS_INLINE int __rech_tageq(const char* adr,const char* s) { } return 0; } + +HTS_INLINE int rech_endtoken(const char* adr, const char** start) { + char quote = '\0'; + int length = 0; + while(is_space(*adr)) adr++; + if (*adr == '"' || *adr == '\'') + quote = *adr++; + *start = adr; + while(*adr != 0 && *adr != quote && (quote != '\0' || !is_space(*adr)) ) { + length++; + adr++; + } + return length; +} // same, but check begining of adr wirh s (for <object src="bar.mov" .. hotspot123="foo.html">) HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s) { int p; @@ -519,7 +538,7 @@ HTS_INLINE int check_tag(char* from,const char* tag) { int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) { int ok=1; if (size>0) { - if (is_hypertext_mime(type)) { + if (is_hypertext_mime(type, "")) { if (maxhtml>0) { if (size>maxhtml) ok=0; @@ -535,18 +554,34 @@ int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) { } +static int sortTopIndexFnc(const void * a_, const void * b_) { + int cmp; + topindex_chain** a = (topindex_chain**) a_; + topindex_chain** b = (topindex_chain**) b_; + /* Category first, then name */ + if ((cmp = (*a)->level - (*b)->level) == 0) { + if ((cmp = strcmpnocase((*a)->category, (*b)->category)) == 0) { + cmp = strcmpnocase((*a)->name, (*b)->name); + } + } + return cmp; +} + +HTSEXT_API char* hts_getcategory(char* filename); + HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { FILE* fpo; int retval=0; - char rpath[1024*2]; - char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL; + char BIGSTK rpath[1024*2]; + char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL,*toptemplate_bodycat=NULL; // et templates html toptemplate_header=readfile_or(fconcat(binpath,"templates/topindex-header.html"),HTS_INDEX_HEADER); toptemplate_body=readfile_or(fconcat(binpath,"templates/topindex-body.html"),HTS_INDEX_BODY); + toptemplate_bodycat=readfile_or(fconcat(binpath,"templates/topindex-bodycat.html"),HTS_INDEX_BODYCAT); toptemplate_footer=readfile_or(fconcat(binpath,"templates/topindex-footer.html"),HTS_INDEX_FOOTER); - if (toptemplate_header && toptemplate_body && toptemplate_footer) { + if (toptemplate_header && toptemplate_body && toptemplate_footer && toptemplate_bodycat) { strcpybuff(rpath,path); if (rpath[0]) { @@ -556,6 +591,7 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { fpo=fopen(fconcat(rpath,"/index.html"),"wb"); if (fpo) { + String iname = STRING_EMPTY; find_handle h; verif_backblue(opt,concat(rpath,"/")); // générer gif // Header @@ -568,16 +604,40 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { if (h) { struct topindex_chain * chain=NULL; struct topindex_chain * startchain=NULL; + String iname = STRING_EMPTY; + int chainSize = 0; do { if (hts_findisdir(h)) { - char iname[HTS_URLMAXSIZE*2]; - strcpybuff(iname,rpath); - strcatbuff(iname,"/"); - strcatbuff(iname,hts_findgetname(h)); - strcatbuff(iname,"/index.html"); - if (fexist(iname)) { + StringStrcpy(iname,rpath); + StringStrcat(iname,"/"); + StringStrcat(iname,hts_findgetname(h)); + StringStrcat(iname,"/index.html"); + if (fexist(StringBuff(iname))) { + int level = 0; + char* category = NULL; struct topindex_chain * oldchain=chain; + + /* Check for an existing category */ + StringStrcpy(iname,rpath); + StringStrcat(iname,"/"); + StringStrcat(iname,hts_findgetname(h)); + StringStrcat(iname,"/hts-cache/winprofile.ini"); + if (fexist(StringBuff(iname))) { + category = hts_getcategory(StringBuff(iname)); + if (category != NULL) { + if (*category == '\0') { + freet(category); + category = NULL; + } + } + } + if (category == NULL) { + category = strdupt("No categories"); + level = 1; + } + chain=calloc(sizeof(struct topindex_chain), 1); + chainSize++; if (!startchain) { startchain=chain; } @@ -587,29 +647,63 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { } chain->next=NULL; strcpybuff(chain->name, hts_findgetname(h)); + chain->category = category; + chain->level = level; } } } } while(hts_findnext(h)); hts_findclose(h); - - /* Build sorted index */ - chain=startchain; - while(chain) { - char hname[HTS_URLMAXSIZE*2]; - strcpybuff(hname,chain->name); - escape_check_url(hname); - fprintf(fpo,toptemplate_body, - hname, - chain->name - ); - - chain=chain->next; + StringFree(iname); + + /* Sort chain */ + { + struct topindex_chain** sortedElts = (struct topindex_chain**) calloct(sizeof(topindex_chain*), chainSize); + assertf(sortedElts != NULL); + if (sortedElts != NULL) { + int i; + char* category = ""; + + /* Build array */ + struct topindex_chain * chain = startchain; + for(i = 0 ; i < chainSize ; i++) { + assertf(chain != NULL); + sortedElts[i] = chain; + chain = chain->next; + } + qsort(sortedElts, chainSize, sizeof(topindex_chain*), sortTopIndexFnc); + + /* Build sorted index */ + for(i = 0 ; i < chainSize ; i++) { + char BIGSTK hname[HTS_URLMAXSIZE*2]; + strcpybuff(hname,sortedElts[i]->name); + escape_check_url(hname); + + /* Changed category */ + if (strcmp(category, sortedElts[i]->category) != 0) { + category = sortedElts[i]->category; + fprintf(fpo,toptemplate_bodycat, category); + } + fprintf(fpo,toptemplate_body, + hname, + sortedElts[i]->name + ); + } + + /* Wipe elements */ + for(i = 0 ; i < chainSize ; i++) { + freet(sortedElts[i]->category); + freet(sortedElts[i]); + sortedElts[i] = NULL; + } + freet(sortedElts); + + /* Return value */ + retval=1; + } } - - - retval=1; + } // Footer @@ -629,10 +723,110 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { freet(toptemplate_body); if (toptemplate_footer) freet(toptemplate_footer); + if (toptemplate_body) + freet(toptemplate_body); return retval; } +HTSEXT_API char* hts_getcategory(char* filename) { + String categ = STRING_EMPTY; + if (fexist(filename)) { + FILE* fp = fopen(filename, "rb"); + if (fp != NULL) { + int done=0; + while(!feof(fp) && !done) { + char BIGSTK line[1024]; + int n = linput(fp, line, sizeof(line) - 2); + if (n > 0) { + if (strfield(line, "category=")) { + unescapehttp(line+9, &categ); + done=1; + } + } + } + fclose(fp); + } + } + return StringBuff(categ); +} + +HTSEXT_API char* hts_getcategories(char* path, int type) { + String categ = STRING_EMPTY; + String profiles = STRING_EMPTY; + char* rpath = path; + find_handle h; + inthash hashCateg = NULL; + if (rpath[0]) { + if (rpath[strlen(rpath)-1]=='/') { + rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */ + } + } + h = hts_findfirst(rpath); + if (h) { + struct topindex_chain * chain=NULL; + struct topindex_chain * startchain=NULL; + String iname = STRING_EMPTY; + if (type == 1) { + hashCateg = inthash_new(127); + StringStrcat(categ, "Test category 1"); + StringStrcat(categ, "\r\nTest category 2"); + } + do { + if (hts_findisdir(h)) { + char BIGSTK line2[1024]; + StringStrcpy(iname,rpath); + StringStrcat(iname,"/"); + StringStrcat(iname,hts_findgetname(h)); + StringStrcat(iname,"/hts-cache/winprofile.ini"); + if (fexist(StringBuff(iname))) { + if (type == 1) { + FILE* fp = fopen(StringBuff(iname), "rb"); + if (fp != NULL) { + int done=0; + while(!feof(fp) && !done) { + int n = linput(fp, line2, sizeof(line2) - 2); + if (n > 0) { + if (strfield(line2, "category=")) { + if (*(line2+9)) { + if (!inthash_read(hashCateg, line2+9, NULL)) { + inthash_write(hashCateg, line2+9, 0); + if (StringLength(categ) > 0) { + StringStrcat(categ, "\r\n"); + } + unescapehttp(line2+9, &categ); + } + } + done=1; + } + } + } + line2[0] = '\0'; + fclose(fp); + } + } else { + if (StringLength(profiles) > 0) { + StringStrcat(profiles, "\r\n"); + } + StringStrcat(profiles, hts_findgetname(h)); + } + } + + } + } while(hts_findnext(h)); + hts_findclose(h); + StringFree(iname); + } + if (hashCateg) { + inthash_delete(&hashCateg); + hashCateg = NULL; + } + if (type == 1) + return StringBuff(categ); + else + return StringBuff(profiles); +} + @@ -658,14 +852,14 @@ HTSEXT_API find_handle hts_findfirst(char* path) { memset(find, 0, sizeof(find_handle_struct)); #if HTS_WIN { - char rpath[1024*2]; + char BIGSTK rpath[1024*2]; strcpybuff(rpath,path); if (rpath[0]) { if (rpath[strlen(rpath)-1]!='\\') strcatbuff(rpath,"\\"); } strcatbuff(rpath,"*.*"); - find->handle = FindFirstFile(rpath,&find->hdata); + find->handle = FindFirstFileA(rpath,&find->hdata); if (find->handle != INVALID_HANDLE_VALUE) return find; } @@ -693,7 +887,7 @@ HTSEXT_API find_handle hts_findfirst(char* path) { HTSEXT_API int hts_findnext(find_handle find) { if (find) { #if HTS_WIN - if ( (FindNextFile(find->handle,&find->hdata))) + if ( (FindNextFileA(find->handle,&find->hdata))) return 1; #else memset(&(find->filestat), 0, sizeof(find->filestat)); diff --git a/src/htstools.h b/src/htstools.h index e3f7dd7..c75d74d 100644 --- a/src/htstools.h +++ b/src/htstools.h @@ -40,8 +40,6 @@ Please visit our Website: http://www.httrack.com #define HTSTOOLS_DEFH /* specific definitions */ -#include <stdio.h> -#include <stdlib.h> #include "htsbase.h" #include "htscore.h" @@ -54,6 +52,33 @@ Please visit our Website: http://www.httrack.com #include <sys/stat.h> #endif +#ifndef HTTRACK_DEFLIB + +// Portable directory find functions +#ifdef _WIN32 +typedef struct find_handle_struct { + WIN32_FIND_DATAA hdata; + HANDLE handle; +} find_handle_struct; +#else +typedef struct find_handle_struct { + DIR * hdir; + struct dirent* dirp; + struct stat filestat; + char path[2048]; +} find_handle_struct; +#endif +typedef find_handle_struct* find_handle; +typedef struct topindex_chain { + int level; /* sort level */ + char* category; /* category */ + char name[2048]; /* path */ + struct topindex_chain* next; /* next element */ +} topindex_chain ; +#endif + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int ident_url_relatif(char *lien,char* urladr,char* urlfil,char* adr,char* fil); int lienrelatif(char* s,char* link,char* curr); int link_has_authority(char* lien); @@ -84,38 +109,15 @@ HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s); ) //HTS_INLINE int rech_tageq(const char* adr,const char* s); HTS_INLINE int rech_sampletag(const char* adr,const char* s); +HTS_INLINE int rech_endtoken(const char* adr, const char** start); HTS_INLINE int check_tag(char* from,const char* tag); int verif_backblue(httrackp* opt,char* base); int verif_external(int nb,int test); int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type); - -#ifndef HTTRACK_DEFLIB HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath); -#endif - // Portable directory find functions - -#ifndef HTTRACK_DEFLIB -#ifdef _WIN32 -typedef struct { - WIN32_FIND_DATA hdata; - HANDLE handle; -} find_handle_struct; -#else -typedef struct { - DIR * hdir; - struct dirent* dirp; - struct stat filestat; - char path[2048]; -} find_handle_struct; -#endif -typedef find_handle_struct* find_handle; -typedef struct topindex_chain { - char name[2048]; /* path */ - struct topindex_chain* next; /* next element */ -} topindex_chain ; // Directory find functions HTSEXT_API find_handle hts_findfirst(char* path); HTSEXT_API int hts_findnext(find_handle find); @@ -126,6 +128,7 @@ HTSEXT_API int hts_findgetsize(find_handle find); HTSEXT_API int hts_findisdir(find_handle find); HTSEXT_API int hts_findisfile(find_handle find); HTSEXT_API int hts_findissystem(find_handle find); + #endif #endif diff --git a/src/htsweb.c b/src/htsweb.c index a5e1902..8e33e34 100644 --- a/src/htsweb.c +++ b/src/htsweb.c @@ -120,7 +120,7 @@ int main(int argc, char* argv[]) if (argc < 2 || (argc % 2) != 0) { fprintf(stderr, "** Warning: use the webhttrack frontend if available\n"); fprintf(stderr, "usage: %s <path-to-html-root-dir> [key value [key value]..]\n", argv[0]); - fprintf(stderr, "example: %s /usr/share/httrack\n", argv[0]); + fprintf(stderr, "example: %s /usr/share/httrack/\n", argv[0]); return 1; } @@ -200,6 +200,7 @@ int main(int argc, char* argv[]) /* launch */ ret = help_server(argv[1]); + htsthread_wait(); hts_uninit(); #ifdef _WIN32 @@ -210,7 +211,7 @@ int main(int argc, char* argv[]) } static int webhttrack_runmain(int argc, char** argv); -static PTHREAD_TYPE back_launch_cmd( void* pP ) { +static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_cmd( void* pP ) { char* cmd = (char*) pP; char** argv = (char**) malloct(1024 * sizeof(char*)); int argc = 0; @@ -267,9 +268,12 @@ static PTHREAD_TYPE back_launch_cmd( void* pP ) { void webhttrack_main(char* cmd) { commandRunning = 1; - _beginthread(back_launch_cmd, 0, (void*) strdup(cmd)); + (void)hts_newthread(back_launch_cmd, 0, (void*) strdup(cmd)); } +/* Internal locking */ +HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock); + void webhttrack_lock(int lock) { htsSetLock(&refreshMutex, lock); } @@ -281,6 +285,7 @@ static int webhttrack_runmain(int argc, char** argv) { htswrap_add("start",htsshow_start); htswrap_add("change-options",htsshow_chopt); htswrap_add("end",htsshow_end); + htswrap_add("preprocess-html",htsshow_preprocesshtml); htswrap_add("check-html",htsshow_checkhtml); htswrap_add("loop",htsshow_loop); htswrap_add("query",htsshow_query); @@ -290,8 +295,10 @@ static int webhttrack_runmain(int argc, char** argv) { htswrap_add("pause",htsshow_pause); htswrap_add("save-file",htsshow_filesave); htswrap_add("link-detected",htsshow_linkdetected); + htswrap_add("link-detected2",htsshow_linkdetected2); htswrap_add("transfer-status",htsshow_xfrstatus); htswrap_add("save-name",htsshow_savename); + htsthread_wait_n(1); hts_uninit(); return hts_main(argc,argv); @@ -358,6 +365,9 @@ int __cdecl htsshow_chopt(httrackp* opt) { int __cdecl htsshow_end(void) { return 1; } +int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { + return 1; +} int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { return 1; } @@ -589,6 +599,15 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, case 2: sprintf(tmp, "purging files"); break; + case 3: + sprintf(tmp, "loading cache"); + break; + case 4: + sprintf(tmp, "waiting (scheduler)"); + break; + case 5: + sprintf(tmp, "waiting (throttle)"); + break; } smallserver_setkey("info.currentjob", tmp); } @@ -643,11 +662,20 @@ void __cdecl htsshow_filesave(char* file) { int __cdecl htsshow_linkdetected(char* link) { return 1; } +int __cdecl htsshow_linkdetected2(char* link, char* start_tag) { + return 1; +} int __cdecl htsshow_xfrstatus(lien_back* back) { return 1; } int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { return 1; } +int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { + return 1; +} +int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { + return 1; +} diff --git a/src/htsweb.h b/src/htsweb.h index 272b363..82293b4 100644 --- a/src/htsweb.h +++ b/src/htsweb.h @@ -43,7 +43,7 @@ Please visit our Website: http://www.httrack.com #define NStatsBuffer 14 #define MAX_LEN_INPROGRESS 40 -typedef struct { +typedef struct t_StatsBuffer { char name[1024]; char file[1024]; char state[256]; @@ -59,7 +59,7 @@ typedef struct { int actived; // pour disabled } t_StatsBuffer; -typedef struct { +typedef struct t_InpInfo { int ask_refresh; int refresh; LLint stat_bytes; @@ -85,6 +85,7 @@ void __cdecl htsshow_uninit(void); int __cdecl htsshow_start(httrackp* opt); int __cdecl htsshow_chopt(httrackp* opt); int __cdecl htsshow_end(void); +int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); char* __cdecl htsshow_query(char* question); @@ -94,17 +95,14 @@ int __cdecl htsshow_check(char* adr,char* fil,int status); void __cdecl htsshow_pause(char* lockfile); void __cdecl htsshow_filesave(char* file); int __cdecl htsshow_linkdetected(char* link); +int __cdecl htsshow_linkdetected2(char* link, char* start_tag); int __cdecl htsshow_xfrstatus(lien_back* back); int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); - +int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); +int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); int main(int argc, char **argv); void webhttrack_main(char* cmd); void webhttrack_lock(int lock); -#ifndef _WIN32 -#define fconv(a) (a) -#define fconcat(a,b) concat(a,b) -#endif - #endif diff --git a/src/htswizard.c b/src/htswizard.c index e976ffd..366a23a 100644 --- a/src/htswizard.c +++ b/src/htswizard.c @@ -35,14 +35,14 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htswizard.h" #include "htsdefines.h" /* specific definitions */ #include "htsbase.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include <ctype.h> /* END specific definitions */ @@ -91,6 +91,7 @@ retour: int hts_acceptlink(httrackp* opt, int ptr,int lien_tot,lien_url** liens, char* adr,char* fil, + char* tag, char* attribute, int* set_prio_to, int* just_test_it) { @@ -166,8 +167,8 @@ int hts_acceptlink(httrackp* opt, { // tester interdiction de descendre // MODIFIE : en cas de remontée puis de redescente, il se pouvait qu'on ne puisse pas atteindre certains fichiers // problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*.. - char tempo[HTS_URLMAXSIZE*2]; - char tempo2[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo2[HTS_URLMAXSIZE*2]; tempo[0] = tempo2[0] = '\0'; // note (up/down): on calcule à partir du lien primaire, ET du lien précédent. @@ -190,15 +191,17 @@ int hts_acceptlink(httrackp* opt, // (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés)) // NEW - if (tempo[0] != '\0' && tempo[1] != '\0') { - if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) { - if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' - forbidden_url=0; - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); - test_flush; - } - } + if ( + (tempo[0] != '\0' && tempo[1] != '\0' && strchr(tempo+1,'/') == 0) + || + (tempo2[0] != '\0' && tempo2[1] != '\0' && strchr(tempo2+1,'/') == 0) + ) { + if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' + forbidden_url=0; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); + test_flush; + } } } @@ -289,8 +292,8 @@ int hts_acceptlink(httrackp* opt, } // tester interdiction de descendre? { // tester interdiction de monter - char tempo[HTS_URLMAXSIZE*2]; - char tempo2[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo2[HTS_URLMAXSIZE*2]; if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) { if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) { } else { @@ -414,8 +417,8 @@ int hts_acceptlink(httrackp* opt, int question=1; // poser une question int force_mirror=0; // pour mirror links int filters_answer=0; // décision prise par les filtres - char l[HTS_URLMAXSIZE*2]; - char lfull[HTS_URLMAXSIZE*2]; + char BIGSTK l[HTS_URLMAXSIZE*2]; + char BIGSTK lfull[HTS_URLMAXSIZE*2]; if (forbidden_url!=-1) question=0; // pas de question, résolu @@ -618,7 +621,7 @@ int hts_acceptlink(httrackp* opt, HTS_REQUEST_END; #if HTS_ANALYSTE { - char tempo[HTS_URLMAXSIZE*2]; + char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; strcatbuff(tempo,adr); strcatbuff(tempo,"/"); @@ -851,8 +854,8 @@ int hts_testlinksize(httrackp* opt, LLint size) { int jok=0; if (size>=0) { - char l[HTS_URLMAXSIZE*2]; - char lfull[HTS_URLMAXSIZE*2]; + char BIGSTK l[HTS_URLMAXSIZE*2]; + char BIGSTK lfull[HTS_URLMAXSIZE*2]; if (size>=0) { LLint sz=size; int size_flag=0; diff --git a/src/htswizard.h b/src/htswizard.h index 147c7b7..a36940d 100644 --- a/src/htswizard.h +++ b/src/htswizard.h @@ -40,12 +40,18 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" + +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE int hts_acceptlink(httrackp* opt, int ptr,int lien_tot,lien_url** liens, char* adr,char* fil, + char* tag, char* attribute, int* set_prio_to_0, int* just_test_it); int hts_testlinksize(httrackp* opt, char* adr,char* fil, LLint size); #endif + +#endif diff --git a/src/htswrap.c b/src/htswrap.c index 28c4c71..3150f1d 100644 --- a/src/htswrap.c +++ b/src/htswrap.c @@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + #include "htswrap.h" #include "htshash.h" #include "htsinthash.h" diff --git a/src/htswrap.h b/src/htswrap.h index b87bf11..f97157a 100644 --- a/src/htswrap.h +++ b/src/htswrap.h @@ -40,7 +40,8 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" -#ifndef HTTRACK_DEFLIB +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE HTSEXT_API int htswrap_init(void); HTSEXT_API int htswrap_add(char* name,void* fct); HTSEXT_API int htswrap_free(void); diff --git a/src/htszlib.c b/src/htszlib.c index faf4e88..19e3abb 100644 --- a/src/htszlib.c +++ b/src/htszlib.c @@ -35,13 +35,12 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE /* specific definitions */ -#include <stdio.h> -#include <stdlib.h> #include "htsbase.h" #include "htscore.h" - #include "htszlib.h" #if HTS_USEZLIB @@ -65,7 +64,7 @@ int hts_zunpack(char* filename,char* newfile) { if (fpout) { int nr; do { - char buff[1024]; + char BIGSTK buff[1024]; nr=gzread (gz, buff, 1024); if (nr>0) { size+=nr; @@ -84,4 +83,65 @@ int hts_zunpack(char* filename,char* newfile) { return -1; } +int hts_extract_meta(char* path) { + unzFile zFile = unzOpen(fconcat(path,"hts-cache/new.zip")); + zipFile zFileOut = zipOpen(fconcat(path,"hts-cache/meta.zip"), 0); + if (zFile != NULL && zFileOut != NULL) { + if (unzGoToFirstFile(zFile) == Z_OK) { + zip_fileinfo fi; + unz_file_info ufi; + char BIGSTK filename[HTS_URLMAXSIZE * 4]; + char BIGSTK comment[8192]; + int entries = 0; + memset(comment, 0, sizeof(comment)); // for truncated reads + memset(&fi, 0, sizeof(fi)); + memset(&ufi, 0, sizeof(ufi)); + do { + int readSizeHeader; + filename[0] = '\0'; + comment[0] = '\0'; + + if (unzOpenCurrentFile(zFile) == Z_OK) { + if ( + (readSizeHeader = unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0 + && + unzGetCurrentFileInfo(zFile, &ufi, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK + ) + { + comment[readSizeHeader] = '\0'; + fi.dosDate = ufi.dosDate; + fi.internal_fa = ufi.internal_fa; + fi.external_fa = ufi.external_fa; + if (zipOpenNewFileInZip(zFileOut, + filename, + &fi, + NULL, + 0, + NULL, + 0, + NULL, /* comment */ + Z_DEFLATED, + Z_DEFAULT_COMPRESSION) == Z_OK) + { + if (zipWriteInFileInZip(zFileOut, comment, (int) strlen(comment)) != Z_OK) { + } + if (zipCloseFileInZip(zFileOut) != Z_OK) { + } + } + } + unzCloseCurrentFile(zFile); + } + } while( unzGoToNextFile(zFile) == Z_OK ); + } + zipClose(zFileOut, "Meta-data extracted by HTTrack/"HTTRACK_VERSION); + unzClose(zFile); + return 1; + } + return 0; +} + +#else + +#error HTS_USEZLIB not defined + #endif diff --git a/src/htszlib.h b/src/htszlib.h index 173d966..8f8b565 100644 --- a/src/htszlib.h +++ b/src/htszlib.h @@ -39,32 +39,20 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_DEFZLIB #define HTS_DEFZLIB -#if HTS_USEZLIB +/* ZLib */ +#include "zlib.h" +//#include "zutil.h" -int hts_zunpack(char* filename,char* newfile); - -#define gzopen hts_ptrfunc_gzopen -#define gzread hts_ptrfunc_gzread -#define gzclose hts_ptrfunc_gzclose - -#ifdef _WIN32 -#define ZEXPORT WINAPI -#else -#define ZEXPORT -#endif - -typedef void* voidp; -typedef voidp gzFile; -typedef gzFile (ZEXPORT *t_gzopen)(const char *path, const char *mode); -typedef int (ZEXPORT *t_gzread)(gzFile file, voidp buf, unsigned len); -typedef int (ZEXPORT *t_gzclose)(gzFile file); +/* MiniZip */ +#include "minizip/zip.h" +#include "minizip/unzip.h" +#include "minizip/mztools.h" +/* Library internal definictions */ +#ifdef HTS_INTERNAL_BYTECODE extern int gz_is_available; -extern t_gzopen gzopen; -extern t_gzread gzread; -extern t_gzclose gzclose; - -#endif - +extern int hts_zunpack(char* filename,char* newfile); +extern int hts_extract_meta(char* path); #endif +#endif diff --git a/src/httrack-library.h b/src/httrack-library.h index aeea70f..2ee2511 100644 --- a/src/httrack-library.h +++ b/src/httrack-library.h @@ -57,6 +57,8 @@ HTSEXT_API const char* hts_is_available(void); /* Other functions */ HTSEXT_API int hts_resetvar(void); HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath); +HTSEXT_API char* hts_getcategories(char* path, int type); +HTSEXT_API char* hts_getcategory(char* filename); /* Catch-URL */ HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox); @@ -111,11 +113,17 @@ HTSEXT_API char* unescape_http_unharm(char* s, int no_high); HTSEXT_API char* antislash_unescaped(char* s); HTSEXT_API void escape_remove_control(char* s); +/* Debugging */ +HTSEXT_API void hts_debug(int level); + /* Portable directory API */ typedef struct find_handle_struct find_handle_struct; typedef find_handle_struct* find_handle; + typedef struct topindex_chain { + int level; /* sort level */ + char* category; /* category */ char name[2048]; /* path */ struct topindex_chain* next; /* next element */ } topindex_chain ; diff --git a/src/httrack.c b/src/httrack.c index c69a600..b3accb1 100644 --- a/src/httrack.c +++ b/src/httrack.c @@ -41,9 +41,18 @@ Please visit our Website: http://www.httrack.com #endif #endif +#include "httrack-library.h" + #include "htsglobal.h" +#include "htsbase.h" +#include "htsopt.h" #include "httrack.h" +/* Static definitions */ +static int fexist(char* s); +static int linput(FILE* fp,char* s,int max); + + // htswrap_add #include "htswrap.h" @@ -64,9 +73,6 @@ Please visit our Website: http://www.httrack.com #include <unistd.h> #endif #include <ctype.h> -#ifdef _WIN32 -//#include "Winsock.h" -#endif /* END specific definitions */ // ISO VT100/220 definitions @@ -105,10 +111,10 @@ Please visit our Website: http://www.httrack.com #define VT_CLRSCR "\33[2J" // #define csi(X) printf(s_csi( X )); -void vt_clear(void) { +static void vt_clear(void) { printf("%s%s%s",VT_RESET,VT_CLRSCR,VT_GOTOXY("1","0")); } -void vt_home(void) { +static void vt_home(void) { printf("%s%s",VT_RESET,VT_GOTOXY("1","0")); } // @@ -146,7 +152,13 @@ Log: "engine: end" hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); Log: "engine: change-options" - hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); + hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html"); +Log: "preprocess-html: <url>" + + hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html"); +Log: "postprocess-html: <url>" + +hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); Log: "check-html: <url>" hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); @@ -161,6 +173,7 @@ Log: "pause: <lockfile>" hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); + hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2"); Log: none hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); @@ -179,6 +192,8 @@ Log: htswrap_add("start",htsshow_start); htswrap_add("change-options",htsshow_chopt); htswrap_add("end",htsshow_end); + htswrap_add("preprocess-html",htsshow_preprocesshtml); + htswrap_add("postprocess-html",htsshow_preprocesshtml); htswrap_add("check-html",htsshow_checkhtml); htswrap_add("loop",htsshow_loop); htswrap_add("query",htsshow_query); @@ -188,8 +203,11 @@ Log: htswrap_add("pause",htsshow_pause); htswrap_add("save-file",htsshow_filesave); htswrap_add("link-detected",htsshow_linkdetected); + htswrap_add("link-detected2",htsshow_linkdetected2); htswrap_add("transfer-status",htsshow_xfrstatus); htswrap_add("save-name",htsshow_savename); + htswrap_add("send-header", htsshow_sendheader); + htswrap_add("receive-header", htsshow_receiveheader); ret = hts_main(argc,argv); if (ret) { @@ -202,7 +220,7 @@ Log: /* CALLBACK FUNCTIONS */ /* Initialize the Winsock */ -void __cdecl htsshow_init(void) { +static void __cdecl htsshow_init(void) { #ifdef _WIN32 { WORD wVersionRequested; // requested version WinSock API @@ -222,12 +240,12 @@ void __cdecl htsshow_init(void) { #endif } -void __cdecl htsshow_uninit(void) { +static void __cdecl htsshow_uninit(void) { #ifdef _WIN32 WSACleanup(); #endif } -int __cdecl htsshow_start(httrackp* opt) { +static int __cdecl htsshow_start(httrackp* opt) { use_show=0; if (opt->verbosedisplay==2) { use_show=1; @@ -235,16 +253,19 @@ int __cdecl htsshow_start(httrackp* opt) { } return 1; } -int __cdecl htsshow_chopt(httrackp* opt) { +static int __cdecl htsshow_chopt(httrackp* opt) { return htsshow_start(opt); } -int __cdecl htsshow_end(void) { +static int __cdecl htsshow_end(void) { return 1; } -int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { +static int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { return 1; } -int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack +static int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { + return 1; +} +static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack static TStamp prev_mytime=0; /* ok */ static t_InpInfo SInfo; /* ok */ // @@ -436,7 +457,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, } if (ok) { - char s[HTS_URLMAXSIZE*2]; + char BIGSTK s[HTS_URLMAXSIZE*2]; // StatsBuffer[index].back=i; // index pour + d'infos // @@ -508,6 +529,15 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, case 2: printf("purging files"); break; + case 3: + printf("loading cache"); + break; + case 4: + printf("waiting (scheduler)"); + break; + case 5: + printf("waiting (throttle)"); + break; } } printf("%s\n",VT_CLREOL); @@ -539,19 +569,19 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, return 1; } -char* __cdecl htsshow_query(char* question) { +static char* __cdecl htsshow_query(char* question) { static char s[12]=""; /* ok */ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question); io_flush; linput(stdin,s,4); return s; } -char* __cdecl htsshow_query2(char* question) { +static char* __cdecl htsshow_query2(char* question) { static char s[12]=""; /* ok */ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question); io_flush; linput(stdin,s,4); return s; } -char* __cdecl htsshow_query3(char* question) { +static char* __cdecl htsshow_query3(char* question) { static char line[256]; /* ok */ do { io_flush; linput(stdin,line,206); @@ -559,31 +589,39 @@ char* __cdecl htsshow_query3(char* question) { printf("ok..\n"); return line; } -int __cdecl htsshow_check(char* adr,char* fil,int status) { +static int __cdecl htsshow_check(char* adr,char* fil,int status) { return -1; } -void __cdecl htsshow_pause(char* lockfile) { +static void __cdecl htsshow_pause(char* lockfile) { while (fexist(lockfile)) { Sleep(1000); } } -void __cdecl htsshow_filesave(char* file) { +static void __cdecl htsshow_filesave(char* file) { } -int __cdecl htsshow_linkdetected(char* link) { +static int __cdecl htsshow_linkdetected(char* link) { return 1; } -int __cdecl htsshow_xfrstatus(lien_back* back) { +static int __cdecl htsshow_linkdetected2(char* link, char* start_tag) { return 1; } -int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { +static int __cdecl htsshow_xfrstatus(lien_back* back) { + return 1; +} +static int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { + return 1; +} +static int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { + return 1; +} +static int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { return 1; } - /* *** Various functions *** */ -int fexist(char* s) { +static int fexist(char* s) { struct stat st; memset(&st, 0, sizeof(st)); if (stat(s, &st) == 0) { @@ -594,7 +632,7 @@ int fexist(char* s) { return 0; } -int linput(FILE* fp,char* s,int max) { +static int linput(FILE* fp,char* s,int max) { int c; int j=0; do { diff --git a/src/httrack.h b/src/httrack.h index f297e00..a3c82a4 100644 --- a/src/httrack.h +++ b/src/httrack.h @@ -46,9 +46,9 @@ typedef struct { char name[1024]; char file[1024]; char state[256]; - char url_sav[HTS_URLMAXSIZE*2]; // pour cancel - char url_adr[HTS_URLMAXSIZE*2]; - char url_fil[HTS_URLMAXSIZE*2]; + char BIGSTK url_sav[HTS_URLMAXSIZE*2]; // pour cancel + char BIGSTK url_adr[HTS_URLMAXSIZE*2]; + char BIGSTK url_fil[HTS_URLMAXSIZE*2]; LLint size; LLint sizetot; int offset; @@ -79,29 +79,220 @@ typedef struct { } t_InpInfo; // wrappers -void __cdecl htsshow_init(void); -void __cdecl htsshow_uninit(void); -int __cdecl htsshow_start(httrackp* opt); -int __cdecl htsshow_chopt(httrackp* opt); -int __cdecl htsshow_end(void); -int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); -int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); -char* __cdecl htsshow_query(char* question); -char* __cdecl htsshow_query2(char* question); -char* __cdecl htsshow_query3(char* question); -int __cdecl htsshow_check(char* adr,char* fil,int status); -void __cdecl htsshow_pause(char* lockfile); -void __cdecl htsshow_filesave(char* file); -int __cdecl htsshow_linkdetected(char* link); -int __cdecl htsshow_xfrstatus(lien_back* back); -int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); - +static void __cdecl htsshow_init(void); +static void __cdecl htsshow_uninit(void); +static int __cdecl htsshow_start(httrackp* opt); +static int __cdecl htsshow_chopt(httrackp* opt); +static int __cdecl htsshow_end(void); +static int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); +static int __cdecl htsshow_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); +static int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); +static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); +static char* __cdecl htsshow_query(char* question); +static char* __cdecl htsshow_query2(char* question); +static char* __cdecl htsshow_query3(char* question); +static int __cdecl htsshow_check(char* adr,char* fil,int status); +static void __cdecl htsshow_pause(char* lockfile); +static void __cdecl htsshow_filesave(char* file); +static int __cdecl htsshow_linkdetected(char* link); +static int __cdecl htsshow_linkdetected2(char* link, char* start_tag); +static int __cdecl htsshow_xfrstatus(lien_back* back); +static int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); +static int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); +static int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); + int main(int argc, char **argv); -void vt_color(int text,int back); -void vt_clear(void); -void vt_home(void); +static void vt_color(int text,int back); +static void vt_clear(void); +static void vt_home(void); #endif +/* */ + +// Engine internal variables +typedef void (* htsErrorCallback)(char* msg, char* file, int line); +extern HTSEXT_API htsErrorCallback htsCallbackErr; +extern HTSEXT_API int htsMemoryFastXfr; +/* */ +extern HTSEXT_API hts_stat_struct HTS_STAT; +extern int _DEBUG_HEAD; +extern FILE* ioinfo; + +// from htsbase.h + +/* protected strcat, strncat and strcpy - definitely useful */ +#define strcatbuff(A, B) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strcat(A, B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int sz = (unsigned int) strlen(A); \ + unsigned int szf = (unsigned int) strlen(B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(sz + szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (sz + szf + 1 < sizeof(A)) { \ + memcpy((A) + sz, (B), szf + 1); \ + } \ + } \ + } else if (szf > 0) { \ + memcpybuff((A) + sz, (B), szf + 1); \ + } \ + } \ +} while(0) +#define strncatbuff(A, B, N) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strncat(A, B, N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int sz = (unsigned int) strlen(A); \ + unsigned int szf = (unsigned int) strlen(B); \ + if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(sz + szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (sz + szf + 1 < sizeof(A)) { \ + memcpy((A) + sz, (B), szf); \ + * ( (A) + sz + szf) = '\0'; \ + } \ + } \ + } else if (szf > 0) { \ + memcpybuff((A) + sz, (B), szf); \ + * ( (A) + sz + szf) = '\0'; \ + } \ + } \ +} while(0) +#define strcpybuff(A, B) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strcpy(A, B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int szf = (unsigned int) strlen(B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (szf + 1 < sizeof(A)) { \ + memcpy((A), (B), szf + 1); \ + } else { \ + * (A) = '\0'; \ + } \ + } else { \ + * (A) = '\0'; \ + } \ + } else { \ + memcpybuff((A), (B), szf + 1); \ + } \ + } \ +} while(0) +#define strncpybuff(A, B, N) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strncpy(A, B, N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int szf = (unsigned int) strlen(B); \ + if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (szf + 1 < sizeof(A)) { \ + memcpy((A), (B), szf); \ + } \ + } \ + } else { \ + memcpybuff((A), (B), szf); \ + } \ + } \ +} while(0) + +// emergency log +typedef void (*t_abortLog)(char* msg, char* file, int line); +extern HTSEXT_API t_abortLog abortLog__; +#define abortLog(a) abortLog__(a, __FILE__, __LINE__) +#define abortLogFmt(a) do { \ + FILE* fp = fopen("CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \ + if (fp) { \ + fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \ + fprintf(fp, "Reason:\r\n"); \ + fprintf(fp, a); \ + fprintf(fp, "\r\n"); \ + fflush(fp); \ + fclose(fp); \ + } \ +} while(0) + +#define _ , +#define abortLogFmt(a) do { \ + FILE* fp = fopen("CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \ + if (fp) { \ + fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \ + fprintf(fp, "Reason:\r\n"); \ + fprintf(fp, a); \ + fprintf(fp, "\r\n"); \ + fflush(fp); \ + fclose(fp); \ + } \ +} while(0) +#define assertf(exp) do { \ + if (! ( exp ) ) { \ + abortLog("assert failed: " #exp); \ + if (htsCallbackErr != NULL) { \ + htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \ + } \ + assert(exp); \ + abort(); \ + } \ +} while(0) +/* non-fatal assert */ +#define assertnf(exp) do { \ + if (! ( exp ) ) { \ + abortLog("assert failed: " #exp); \ + if (htsCallbackErr != NULL) { \ + htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \ + } \ + } \ +} while(0) + +// + +#define malloct(A) malloc(A) +#define calloct(A,B) calloc((A), (B)) +#define freet(A) do { assertnf((A) != NULL); if ((A) != NULL) { free(A); (A) = NULL; } } while(0) +#define strdupt(A) strdup(A) +#define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) ) +#define memcpybuff(A, B, N) memcpy((A), (B), (N)) + #endif @@ -1,16 +1,29 @@ #ifndef MD5_H #define MD5_H -#ifdef __alpha -typedef unsigned int uint32; +#ifdef _WIN32 +#ifndef SIZEOF_LONG +#define SIZEOF_LONG 4 +#endif #else +#include "config.h" +#endif + +#if SIZEOF_LONG==8 +typedef unsigned int uint32; +#elif SIZEOF_LONG==4 typedef unsigned long uint32; +#else +#error undefined: SIZEOF_LONG #endif struct MD5Context { + unsigned char in[64]; uint32 buf[4]; uint32 bits[2]; - unsigned char in[64]; +#ifdef _WIN32_WCE + uint32 pad[2]; +#endif int doByteReverse; }; diff --git a/src/minizip/ChangeLogUnzip b/src/minizip/ChangeLogUnzip new file mode 100755 index 0000000..3508eb4 --- /dev/null +++ b/src/minizip/ChangeLogUnzip @@ -0,0 +1,55 @@ +Change in 1.00: (10 sept 03)
+- rename to 1.00
+- cosmetic code change
+
+Change in 0.22: (19 May 03)
+- crypting support (unless you define NOCRYPT)
+- append file in existing zipfile
+
+Change in 0.21: (10 Mar 03)
+- bug fixes
+
+Change in 0.17: (27 Jan 02)
+- bug fixes
+
+Change in 0.16: (19 Jan 02)
+- Support of ioapi for virtualize zip file access
+
+Change in 0.15: (19 Mar 98)
+- fix memory leak in minizip.c
+
+Change in 0.14: (10 Mar 98)
+- fix bugs in minizip.c sample for zipping big file
+- fix problem in month in date handling
+- fix bug in unzlocal_GetCurrentFileInfoInternal in unzip.c for
+ comment handling
+
+Change in 0.13: (6 Mar 98)
+- fix bugs in zip.c
+- add real minizip sample
+
+Change in 0.12: (4 Mar 98)
+- add zip.c and zip.h for creates .zip file
+- fix change_file_date in miniunz.c for Unix (Jean-loup Gailly)
+- fix miniunz.c for file without specific record for directory
+
+Change in 0.11: (3 Mar 98)
+- fix bug in unzGetCurrentFileInfo for get extra field and comment
+- enhance miniunz sample, remove the bad unztst.c sample
+
+Change in 0.10: (2 Mar 98)
+- fix bug in unzReadCurrentFile
+- rename unzip* to unz* function and structure
+- remove Windows-like hungary notation variable name
+- modify some structure in unzip.h
+- add somes comment in source
+- remove unzipGetcCurrentFile function
+- replace ZUNZEXPORT by ZEXPORT
+- add unzGetLocalExtrafield for get the local extrafield info
+- add a new sample, miniunz.c
+
+Change in 0.4: (25 Feb 98)
+- suppress the type unzipFileInZip.
+ Only on file in the zipfile can be open at the same time
+- fix somes typo in code
+- added tm_unz structure in unzip_file_info (date/time in readable format)
diff --git a/src/minizip/crypt.h b/src/minizip/crypt.h new file mode 100644 index 0000000..9c7a89c --- /dev/null +++ b/src/minizip/crypt.h @@ -0,0 +1,132 @@ +/* crypt.h -- base code for crypt/uncrypt ZIPfile + + + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant + + This code is a modified version of crypting code in Infozip distribution + + The encryption/decryption parts of this source code (as opposed to the + non-echoing password parts) were originally written in Europe. The + whole source package can be freely distributed, including from the USA. + (Prior to January 2000, re-export from the US was a violation of US law.) + + This encryption code is a direct transcription of the algorithm from + Roger Schlafly, described by Phil Katz in the file appnote.txt. This + file (appnote.txt) is distributed with the PKZIP program (even in the + version without encryption capabilities). + + If you don't need crypting in your application, just define symbols + NOCRYPT and NOUNCRYPT. + + This code support the "Traditional PKWARE Encryption". + + The new AES encryption added on Zip format by Winzip (see the page + http://www.winzip.com/aes_info.htm ) and PKWare PKZip 5.x Strong + Encryption is not supported. +*/ + +#define CRC32(c, b) ((*(pcrc_32_tab+(((int)(c) ^ (b)) & 0xff))) ^ ((c) >> 8)) + +/*********************************************************************** + * Return the next byte in the pseudo-random sequence + */ +static int decrypt_byte(unsigned long* pkeys, const unsigned long* pcrc_32_tab) +{ + unsigned temp; /* POTENTIAL BUG: temp*(temp^1) may overflow in an + * unpredictable manner on 16-bit systems; not a problem + * with any known compiler so far, though */ + + temp = ((unsigned)(*(pkeys+2)) & 0xffff) | 2; + return (int)(((temp * (temp ^ 1)) >> 8) & 0xff); +} + +/*********************************************************************** + * Update the encryption keys with the next byte of plain text + */ +static int update_keys(unsigned long* pkeys,const unsigned long* pcrc_32_tab,int c) +{ + (*(pkeys+0)) = CRC32((*(pkeys+0)), c); + (*(pkeys+1)) += (*(pkeys+0)) & 0xff; + (*(pkeys+1)) = (*(pkeys+1)) * 134775813L + 1; + { + register int keyshift = (int)((*(pkeys+1)) >> 24); + (*(pkeys+2)) = CRC32((*(pkeys+2)), keyshift); + } + return c; +} + + +/*********************************************************************** + * Initialize the encryption keys and the random header according to + * the given password. + */ +static void init_keys(const char* passwd,unsigned long* pkeys,const unsigned long* pcrc_32_tab) +{ + *(pkeys+0) = 305419896L; + *(pkeys+1) = 591751049L; + *(pkeys+2) = 878082192L; + while (*passwd != '\0') { + update_keys(pkeys,pcrc_32_tab,(int)*passwd); + passwd++; + } +} + +#define zdecode(pkeys,pcrc_32_tab,c) \ + (update_keys(pkeys,pcrc_32_tab,c ^= decrypt_byte(pkeys,pcrc_32_tab))) + +#define zencode(pkeys,pcrc_32_tab,c,t) \ + (t=decrypt_byte(pkeys,pcrc_32_tab), update_keys(pkeys,pcrc_32_tab,c), t^(c)) + +#ifdef INCLUDECRYPTINGCODE_IFCRYPTALLOWED + +#define RAND_HEAD_LEN 12 + /* "last resort" source for second part of crypt seed pattern */ +# ifndef ZCR_SEED2 +# define ZCR_SEED2 3141592654UL /* use PI as default pattern */ +# endif + +static int crypthead(passwd, buf, bufSize, pkeys, pcrc_32_tab, crcForCrypting) + const char *passwd; /* password string */ + unsigned char *buf; /* where to write header */ + int bufSize; + unsigned long* pkeys; + const unsigned long* pcrc_32_tab; + unsigned long crcForCrypting; +{ + int n; /* index in random header */ + int t; /* temporary */ + int c; /* random byte */ + unsigned char header[RAND_HEAD_LEN-2]; /* random header */ + static unsigned calls = 0; /* ensure different random header each time */ + + if (bufSize<RAND_HEAD_LEN) + return 0; + + /* First generate RAND_HEAD_LEN-2 random bytes. We encrypt the + * output of rand() to get less predictability, since rand() is + * often poorly implemented. + */ + if (++calls == 1) + { + srand((unsigned)(time(NULL) ^ ZCR_SEED2)); + } + init_keys(passwd, pkeys, pcrc_32_tab); + for (n = 0; n < RAND_HEAD_LEN-2; n++) + { + c = (rand() >> 7) & 0xff; + header[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, c, t); + } + /* Encrypt random header (last two bytes is high word of crc) */ + init_keys(passwd, pkeys, pcrc_32_tab); + for (n = 0; n < RAND_HEAD_LEN-2; n++) + { + buf[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, header[n], t); + } + buf[n++] = zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 16) & 0xff, t); + buf[n++] = zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 24) & 0xff, t); + return n; +} + +#endif diff --git a/src/minizip/ioapi.c b/src/minizip/ioapi.c new file mode 100644 index 0000000..53583ed --- /dev/null +++ b/src/minizip/ioapi.c @@ -0,0 +1,196 @@ +/* ioapi.c -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant +*/ + +#ifndef _WIN32_WCE +#include <stdio.h> +#include <stdlib.h> +#else +#include <stdio.h> +//#include "celib.h" +#endif +#include <string.h> + +#include "zlib.h" +#include "ioapi.h" + + + +/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */ + +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif + +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +voidpf ZCALLBACK fopen_file_func OF(( + voidpf opaque, + const char* filename, + int mode)); + +uLong ZCALLBACK fread_file_func OF(( + voidpf opaque, + voidpf stream, + void* buf, + uLong size)); + +uLong ZCALLBACK fwrite_file_func OF(( + voidpf opaque, + voidpf stream, + const void* buf, + uLong size)); + +long ZCALLBACK ftell_file_func OF(( + voidpf opaque, + voidpf stream)); + +long ZCALLBACK fseek_file_func OF(( + voidpf opaque, + voidpf stream, + uLong offset, + int origin)); + +int ZCALLBACK fflush_file_func OF(( + voidpf opaque, + voidpf stream)); + +int ZCALLBACK fclose_file_func OF(( + voidpf opaque, + voidpf stream)); + +int ZCALLBACK ferror_file_func OF(( + voidpf opaque, + voidpf stream)); + + +voidpf ZCALLBACK fopen_file_func (opaque, filename, mode) + voidpf opaque; + const char* filename; + int mode; +{ + FILE* file = NULL; + const char* mode_fopen = NULL; + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + mode_fopen = "rb"; + else + if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + mode_fopen = "r+b"; + else + if (mode & ZLIB_FILEFUNC_MODE_CREATE) + mode_fopen = "wb"; + + if ((filename!=NULL) && (mode_fopen != NULL)) + file = fopen(filename, mode_fopen); + return file; +} + + +uLong ZCALLBACK fread_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + void* buf; + uLong size; +{ + uLong ret; + ret = fread(buf, 1, (size_t)size, (FILE *)stream); + return ret; +} + + +uLong ZCALLBACK fwrite_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + const void* buf; + uLong size; +{ + uLong ret; + ret = fwrite(buf, 1, (size_t)size, (FILE *)stream); + return ret; +} + +long ZCALLBACK ftell_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + long ret; + ret = ftell((FILE *)stream); + return ret; +} + +long ZCALLBACK fseek_file_func (opaque, stream, offset, origin) + voidpf opaque; + voidpf stream; + uLong offset; + int origin; +{ + int fseek_origin=0; + long ret; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + fseek_origin = SEEK_CUR; + break; + case ZLIB_FILEFUNC_SEEK_END : + fseek_origin = SEEK_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + fseek_origin = SEEK_SET; + break; + default: return -1; + } + ret = 0; + fseek((FILE *)stream, offset, fseek_origin); + return ret; +} + +int ZCALLBACK fflush_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret; + ret = fflush((FILE *)stream); + return ret; +} + +int ZCALLBACK fclose_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret; + ret = fclose((FILE *)stream); + return ret; +} + +int ZCALLBACK ferror_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret; + ret = ferror((FILE *)stream); + return ret; +} + +void fill_fopen_filefunc (pzlib_filefunc_def) + zlib_filefunc_def* pzlib_filefunc_def; +{ + pzlib_filefunc_def->zopen_file = fopen_file_func; + pzlib_filefunc_def->zread_file = fread_file_func; + pzlib_filefunc_def->zwrite_file = fwrite_file_func; + pzlib_filefunc_def->ztell_file = ftell_file_func; + pzlib_filefunc_def->zseek_file = fseek_file_func; + pzlib_filefunc_def->zflush_file = fflush_file_func; + pzlib_filefunc_def->zclose_file = fclose_file_func; + pzlib_filefunc_def->zerror_file = ferror_file_func; + pzlib_filefunc_def->opaque = NULL; +} diff --git a/src/minizip/ioapi.h b/src/minizip/ioapi.h new file mode 100644 index 0000000..ee92287 --- /dev/null +++ b/src/minizip/ioapi.h @@ -0,0 +1,78 @@ +/* ioapi.h -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant +*/ + +#ifndef _ZLIBIOAPI_H +#define _ZLIBIOAPI_H + + +#define ZLIB_FILEFUNC_SEEK_CUR (1) +#define ZLIB_FILEFUNC_SEEK_END (2) +#define ZLIB_FILEFUNC_SEEK_SET (0) + +#define ZLIB_FILEFUNC_MODE_READ (1) +#define ZLIB_FILEFUNC_MODE_WRITE (2) +#define ZLIB_FILEFUNC_MODE_READWRITEFILTER (3) + +#define ZLIB_FILEFUNC_MODE_EXISTING (4) +#define ZLIB_FILEFUNC_MODE_CREATE (8) + + +#ifndef ZCALLBACK + +#if (defined(WIN32) || defined (WINDOWS) || defined (_WINDOWS)) && defined(CALLBACK) && defined (USEWINDOWS_CALLBACK) +#define ZCALLBACK CALLBACK +#else +#define ZCALLBACK +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef voidpf (ZCALLBACK *open_file_func) OF((voidpf opaque, const char* filename, int mode)); +typedef uLong (ZCALLBACK *read_file_func) OF((voidpf opaque, voidpf stream, void* buf, uLong size)); +typedef uLong (ZCALLBACK *write_file_func) OF((voidpf opaque, voidpf stream, const void* buf, uLong size)); +typedef long (ZCALLBACK *tell_file_func) OF((voidpf opaque, voidpf stream)); +typedef long (ZCALLBACK *seek_file_func) OF((voidpf opaque, voidpf stream, uLong offset, int origin)); +typedef int (ZCALLBACK *flush_file_func) OF((voidpf opaque, voidpf stream)); +typedef int (ZCALLBACK *close_file_func) OF((voidpf opaque, voidpf stream)); +typedef int (ZCALLBACK *testerror_file_func) OF((voidpf opaque, voidpf stream)); + +typedef struct zlib_filefunc_def_s +{ + open_file_func zopen_file; + read_file_func zread_file; + write_file_func zwrite_file; + tell_file_func ztell_file; + seek_file_func zseek_file; + flush_file_func zflush_file; + close_file_func zclose_file; + testerror_file_func zerror_file; + voidpf opaque; +} zlib_filefunc_def; + + + +void fill_fopen_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def)); + +#define ZREAD(filefunc,filestream,buf,size) ((*((filefunc).zread_file))((filefunc).opaque,filestream,buf,size)) +#define ZWRITE(filefunc,filestream,buf,size) ((*((filefunc).zwrite_file))((filefunc).opaque,filestream,buf,size)) +#define ZTELL(filefunc,filestream) ((*((filefunc).ztell_file))((filefunc).opaque,filestream)) +#define ZSEEK(filefunc,filestream,pos,mode) ((*((filefunc).zseek_file))((filefunc).opaque,filestream,pos,mode)) +#define ZFLUSH(filefunc,filestream) ((*((filefunc).zflush_file))((filefunc).opaque,filestream)) +#define ZCLOSE(filefunc,filestream) ((*((filefunc).zclose_file))((filefunc).opaque,filestream)) +#define ZERROR(filefunc,filestream) ((*((filefunc).zerror_file))((filefunc).opaque,filestream)) + + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/minizip/iowin32.c b/src/minizip/iowin32.c new file mode 100644 index 0000000..1afddaa --- /dev/null +++ b/src/minizip/iowin32.c @@ -0,0 +1,275 @@ +/* iowin32.c -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + This IO API version uses the Win32 API (for Microsoft Windows) + + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant +*/ + +#include <stdlib.h> +#ifndef _WIN32_WCE +#include <stdlib.h> +#else +//#include "celib.h" +#endif + +#include "zlib.h" +#include "ioapi.h" +#include "iowin32.h" + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE (0xFFFFFFFF) +#endif + +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif + +voidpf ZCALLBACK win32_open_file_func OF(( + voidpf opaque, + const char* filename, + int mode)); + +uLong ZCALLBACK win32_read_file_func OF(( + voidpf opaque, + voidpf stream, + void* buf, + uLong size)); + +uLong ZCALLBACK win32_write_file_func OF(( + voidpf opaque, + voidpf stream, + const void* buf, + uLong size)); + +long ZCALLBACK win32_tell_file_func OF(( + voidpf opaque, + voidpf stream)); + +long ZCALLBACK win32_seek_file_func OF(( + voidpf opaque, + voidpf stream, + uLong offset, + int origin)); + +int ZCALLBACK win32_close_file_func OF(( + voidpf opaque, + voidpf stream)); + +int ZCALLBACK win32_error_file_func OF(( + voidpf opaque, + voidpf stream)); + +typedef struct +{ + HANDLE hf; + int error; +} WIN32FILE_IOWIN; + +voidpf ZCALLBACK win32_open_file_func (opaque, filename, mode) + voidpf opaque; + const char* filename; + int mode; +{ + const char* mode_fopen = NULL; + DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ; + HANDLE hFile = 0; + voidpf ret=NULL; + + dwDesiredAccess = dwShareMode = dwFlagsAndAttributes = 0; + + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + { + dwDesiredAccess = GENERIC_READ; + dwCreationDisposition = OPEN_EXISTING; + dwShareMode = FILE_SHARE_READ; + } + else + if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + { + dwDesiredAccess = GENERIC_WRITE | GENERIC_READ; + dwCreationDisposition = OPEN_EXISTING; + } + else + if (mode & ZLIB_FILEFUNC_MODE_CREATE) + { + dwDesiredAccess = GENERIC_WRITE | GENERIC_READ; + dwCreationDisposition = CREATE_ALWAYS; + } + + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile((LPCTSTR)filename, dwDesiredAccess, dwShareMode, NULL, + dwCreationDisposition, dwFlagsAndAttributes, NULL); + + if (hFile == INVALID_HANDLE_VALUE) + hFile = NULL; + + if (hFile != NULL) + { + WIN32FILE_IOWIN w32fiow; + w32fiow.hf = hFile; + w32fiow.error = 0; + ret = malloc(sizeof(WIN32FILE_IOWIN)); + if (ret==NULL) + CloseHandle(hFile); + else *((WIN32FILE_IOWIN*)ret) = w32fiow; + } + return ret; +} + + +uLong ZCALLBACK win32_read_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + void* buf; + uLong size; +{ + uLong ret=0; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + if (!ReadFile(hFile, buf, size, &ret, NULL)) + { + DWORD dwErr = GetLastError(); + if (dwErr == ERROR_HANDLE_EOF) + dwErr = 0; + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + } + + return ret; +} + + +uLong ZCALLBACK win32_write_file_func (opaque, stream, buf, size) + voidpf opaque; + voidpf stream; + const void* buf; + uLong size; +{ + uLong ret=0; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + + if (hFile !=NULL) + if (!WriteFile(hFile, buf, size, &ret, NULL)) + { + DWORD dwErr = GetLastError(); + if (dwErr == ERROR_HANDLE_EOF) + dwErr = 0; + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + } + + return ret; +} + +long ZCALLBACK win32_tell_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + long ret=-1; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + { + DWORD dwSet = SetFilePointer(hFile, 0, NULL, FILE_CURRENT); + if (dwSet == INVALID_SET_FILE_POINTER) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=(long)dwSet; + } + return ret; +} + +long ZCALLBACK win32_seek_file_func (opaque, stream, offset, origin) + voidpf opaque; + voidpf stream; + uLong offset; + int origin; +{ + DWORD dwMoveMethod=0xFFFFFFFF; + HANDLE hFile = NULL; + + long ret=-1; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + dwMoveMethod = FILE_CURRENT; + break; + case ZLIB_FILEFUNC_SEEK_END : + dwMoveMethod = FILE_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + dwMoveMethod = FILE_BEGIN; + break; + default: return -1; + } + + if (hFile != NULL) + { + DWORD dwSet = SetFilePointer(hFile, offset, NULL, dwMoveMethod); + if (dwSet == INVALID_SET_FILE_POINTER) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=0; + } + return ret; +} + +int ZCALLBACK win32_close_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret=-1; + + if (stream!=NULL) + { + HANDLE hFile; + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + { + CloseHandle(hFile); + ret=0; + } + free(stream); + } + return ret; +} + +int ZCALLBACK win32_error_file_func (opaque, stream) + voidpf opaque; + voidpf stream; +{ + int ret=-1; + if (stream!=NULL) + { + ret = ((WIN32FILE_IOWIN*)stream) -> error; + } + return ret; +} + +void fill_win32_filefunc (pzlib_filefunc_def) + zlib_filefunc_def* pzlib_filefunc_def; +{ + pzlib_filefunc_def->zopen_file = win32_open_file_func; + pzlib_filefunc_def->zread_file = win32_read_file_func; + pzlib_filefunc_def->zwrite_file = win32_write_file_func; + pzlib_filefunc_def->ztell_file = win32_tell_file_func; + pzlib_filefunc_def->zseek_file = win32_seek_file_func; + pzlib_filefunc_def->zclose_file = win32_close_file_func; + pzlib_filefunc_def->zerror_file = win32_error_file_func; + pzlib_filefunc_def->opaque=NULL; +} diff --git a/src/minizip/iowin32.h b/src/minizip/iowin32.h new file mode 100644 index 0000000..c0ebd50 --- /dev/null +++ b/src/minizip/iowin32.h @@ -0,0 +1,21 @@ +/* iowin32.h -- IO base function header for compress/uncompress .zip + files using zlib + zip or unzip API + This IO API version uses the Win32 API (for Microsoft Windows) + + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant +*/ + +#include <windows.h> + + +#ifdef __cplusplus +extern "C" { +#endif + +void fill_win32_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def)); + +#ifdef __cplusplus +} +#endif diff --git a/src/minizip/mztools.c b/src/minizip/mztools.c new file mode 100644 index 0000000..6021c49 --- /dev/null +++ b/src/minizip/mztools.c @@ -0,0 +1,287 @@ +/* + Additional tools for Minizip + Code: Xavier Roche '2004 + License: Same as ZLIB (www.gzip.org) +*/ + +/* Code */ +#include <string.h> +#ifndef _WIN32_WCE +#include <stdio.h> +#include <stdlib.h> +#else +#include <stdio.h> +#include <stdlib.h> +#include "celib.h" +#endif +#include "zlib.h" +#include "unzip.h" + +#define READ_8(adr) ((unsigned char)*(adr)) +#define READ_16(adr) ( READ_8(adr) | (READ_8(adr+1) << 8) ) +#define READ_32(adr) ( READ_16(adr) | (READ_16((adr)+2) << 16) ) + +#define WRITE_8(buff, n) do { \ + *((unsigned char*)(buff)) = (unsigned char) ((n) & 0xff); \ +} while(0) +#define WRITE_16(buff, n) do { \ + WRITE_8((unsigned char*)(buff), n); \ + WRITE_8(((unsigned char*)(buff)) + 1, (n) >> 8); \ +} while(0) +#define WRITE_32(buff, n) do { \ + WRITE_16((unsigned char*)(buff), (n) & 0xffff); \ + WRITE_16((unsigned char*)(buff) + 2, (n) >> 16); \ +} while(0) + +extern int ZEXPORT unzRepair(file, fileOut, fileOutTmp, nRecovered, bytesRecovered) +const char* file; +const char* fileOut; +const char* fileOutTmp; +uLong* nRecovered; +uLong* bytesRecovered; +{ + int err = Z_OK; + FILE* fpZip = fopen(file, "rb"); + FILE* fpOut = fopen(fileOut, "wb"); + FILE* fpOutCD = fopen(fileOutTmp, "wb"); + if (fpZip != NULL && fpOut != NULL) { + int entries = 0; + uLong totalBytes = 0; + char header[30]; + char filename[256]; + char extra[1024]; + int offset = 0; + int offsetCD = 0; + while ( fread(header, 1, 30, fpZip) == 30 ) { + int currentOffset = offset; + + /* File entry */ + if (READ_32(header) == 0x04034b50) { + unsigned int version = READ_16(header + 4); + unsigned int gpflag = READ_16(header + 6); + unsigned int method = READ_16(header + 8); + unsigned int filetime = READ_16(header + 10); + unsigned int filedate = READ_16(header + 12); + unsigned int crc = READ_32(header + 14); /* crc */ + unsigned int cpsize = READ_32(header + 18); /* compressed size */ + unsigned int uncpsize = READ_32(header + 22); /* uncompressed sz */ + unsigned int fnsize = READ_16(header + 26); /* file name length */ + unsigned int extsize = READ_16(header + 28); /* extra field length */ + filename[0] = extra[0] = '\0'; + + /* Header */ + if (fwrite(header, 1, 30, fpOut) == 30) { + offset += 30; + } else { + err = Z_ERRNO; + break; + } + + /* Filename */ + if (fnsize > 0) { + if (fread(filename, 1, fnsize, fpZip) == fnsize) { + if (fwrite(filename, 1, fnsize, fpOut) == fnsize) { + offset += fnsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_STREAM_ERROR; + break; + } + + /* Extra field */ + if (extsize > 0) { + if (fread(extra, 1, extsize, fpZip) == extsize) { + if (fwrite(extra, 1, extsize, fpOut) == extsize) { + offset += extsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } + + /* Data */ + { + int dataSize = cpsize; + if (dataSize == 0) { + dataSize = uncpsize; + } + if (dataSize > 0) { + char* data = malloc(dataSize); + if (data != NULL) { + if ((int)fread(data, 1, dataSize, fpZip) == dataSize) { + if ((int)fwrite(data, 1, dataSize, fpOut) == dataSize) { + offset += dataSize; + totalBytes += dataSize; + } else { + err = Z_ERRNO; + } + } else { + err = Z_ERRNO; + } + free(data); + if (err != Z_OK) { + break; + } + } else { + err = Z_MEM_ERROR; + break; + } + } + } + + /* Central directory entry */ + { + char header[46]; + char* comment = ""; + int comsize = (int) strlen(comment); + WRITE_32(header, 0x02014b50); + WRITE_16(header + 4, version); + WRITE_16(header + 6, version); + WRITE_16(header + 8, gpflag); + WRITE_16(header + 10, method); + WRITE_16(header + 12, filetime); + WRITE_16(header + 14, filedate); + WRITE_32(header + 16, crc); + WRITE_32(header + 20, cpsize); + WRITE_32(header + 24, uncpsize); + WRITE_16(header + 28, fnsize); + WRITE_16(header + 30, extsize); + WRITE_16(header + 32, comsize); + WRITE_16(header + 34, 0); /* disk # */ + WRITE_16(header + 36, 0); /* int attrb */ + WRITE_32(header + 38, 0); /* ext attrb */ + WRITE_32(header + 42, currentOffset); + /* Header */ + if (fwrite(header, 1, 46, fpOutCD) == 46) { + offsetCD += 46; + + /* Filename */ + if (fnsize > 0) { + if (fwrite(filename, 1, fnsize, fpOutCD) == fnsize) { + offsetCD += fnsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_STREAM_ERROR; + break; + } + + /* Extra field */ + if (extsize > 0) { + if (fwrite(extra, 1, extsize, fpOutCD) == extsize) { + offsetCD += extsize; + } else { + err = Z_ERRNO; + break; + } + } + + /* Comment field */ + if (comsize > 0) { + if ((int)fwrite(comment, 1, comsize, fpOutCD) == comsize) { + offsetCD += comsize; + } else { + err = Z_ERRNO; + break; + } + } + + + } else { + err = Z_ERRNO; + break; + } + } + + /* Success */ + entries++; + + } else { + break; + } + } + + /* Final central directory */ + { + int entriesZip = entries; + char header[22]; + char* comment = ""; // "ZIP File recovered by zlib/minizip/mztools"; + int comsize = (int) strlen(comment); + if (entriesZip > 0xffff) { + entriesZip = 0xffff; + } + WRITE_32(header, 0x06054b50); + WRITE_16(header + 4, 0); /* disk # */ + WRITE_16(header + 6, 0); /* disk # */ + WRITE_16(header + 8, entriesZip); /* hack */ + WRITE_16(header + 10, entriesZip); /* hack */ + WRITE_32(header + 12, offsetCD); /* size of CD */ + WRITE_32(header + 16, offset); /* offset to CD */ + WRITE_16(header + 20, comsize); /* comment */ + + /* Header */ + if (fwrite(header, 1, 22, fpOutCD) == 22) { + + /* Comment field */ + if (comsize > 0) { + if ((int)fwrite(comment, 1, comsize, fpOutCD) != comsize) { + err = Z_ERRNO; + } + } + + } else { + err = Z_ERRNO; + } + } + + /* Final merge (file + central directory) */ + fclose(fpOutCD); + if (err == Z_OK) { + fpOutCD = fopen(fileOutTmp, "rb"); + if (fpOutCD != NULL) { + int nRead; + char buffer[8192]; + while ( (nRead = fread(buffer, 1, sizeof(buffer), fpOutCD)) > 0) { + if ((int)fwrite(buffer, 1, nRead, fpOut) != nRead) { + err = Z_ERRNO; + break; + } + } + fclose(fpOutCD); + } + } + + /* Close */ + fclose(fpZip); + fclose(fpOut); + + /* Wipe temporary file */ + (void)remove(fileOutTmp); + + /* Number of recovered entries */ + if (err == Z_OK) { + if (nRecovered != NULL) { + *nRecovered = entries; + } + if (bytesRecovered != NULL) { + *bytesRecovered = totalBytes; + } + } + } else { + err = Z_STREAM_ERROR; + } + return err; +} diff --git a/src/minizip/mztools.h b/src/minizip/mztools.h new file mode 100644 index 0000000..eee78dc --- /dev/null +++ b/src/minizip/mztools.h @@ -0,0 +1,31 @@ +/* + Additional tools for Minizip + Code: Xavier Roche '2004 + License: Same as ZLIB (www.gzip.org) +*/ + +#ifndef _zip_tools_H +#define _zip_tools_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#include "unzip.h" + +/* Repair a ZIP file (missing central directory) + file: file to recover + fileOut: output file after recovery + fileOutTmp: temporary file name used for recovery +*/ +extern int ZEXPORT unzRepair(const char* file, + const char* fileOut, + const char* fileOutTmp, + uLong* nRecovered, + uLong* bytesRecovered); + +#endif diff --git a/src/minizip/unzip.c b/src/minizip/unzip.c new file mode 100644 index 0000000..1452a54 --- /dev/null +++ b/src/minizip/unzip.c @@ -0,0 +1,1591 @@ +/* unzip.c -- IO for uncompress .zip files using zlib + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant + + Read unzip.h for more info +*/ + +/* Decryption code comes from crypt.c by Info-ZIP but has been greatly reduced in terms of +compatibility with older software. The following is from the original crypt.c. Code +woven in by Terry Thorsen 1/2003. +*/ +/* + Copyright (c) 1990-2000 Info-ZIP. All rights reserved. + + See the accompanying file LICENSE, version 2000-Apr-09 or later + (the contents of which are also included in zip.h) for terms of use. + If, for some reason, all these files are missing, the Info-ZIP license + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html +*/ +/* + crypt.c (full version) by Info-ZIP. Last revised: [see crypt.h] + + The encryption/decryption parts of this source code (as opposed to the + non-echoing password parts) were originally written in Europe. The + whole source package can be freely distributed, including from the USA. + (Prior to January 2000, re-export from the US was a violation of US law.) + */ + +/* + This encryption code is a direct transcription of the algorithm from + Roger Schlafly, described by Phil Katz in the file appnote.txt. This + file (appnote.txt) is distributed with the PKZIP program (even in the + version without encryption capabilities). + */ + + +#ifndef _WIN32_WCE +#include <stdio.h> +#include <stdlib.h> +#else +#include <stdio.h> +#include "celib.h" +#endif +#include <string.h> + +#include "zlib.h" +#include "unzip.h" + +#ifdef STDC +# include <stddef.h> +# include <string.h> +# include <stdlib.h> +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include <errno.h> +#endif + + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + + +#ifndef CASESENSITIVITYDEFAULT_NO +# if !defined(unix) && !defined(CASESENSITIVITYDEFAULT_YES) +# define CASESENSITIVITYDEFAULT_NO +# endif +#endif + + +#ifndef UNZ_BUFSIZE +#define UNZ_BUFSIZE (16384) +#endif + +#ifndef UNZ_MAXFILENAMEINZIP +#define UNZ_MAXFILENAMEINZIP (256) +#endif + +#ifndef ALLOC +# define ALLOC(size) (malloc(size)) +#endif +#ifndef TRYFREE +# define TRYFREE(p) {if (p) free(p);} +#endif + +#define SIZECENTRALDIRITEM (0x2e) +#define SIZEZIPLOCALHEADER (0x1e) + + + + +const char unz_copyright[] = + " unzip 1.00 Copyright 1998-2003 Gilles Vollant - http://www.winimage.com/zLibDll"; + +/* unz_file_info_interntal contain internal info about a file in zipfile*/ +typedef struct unz_file_info_internal_s +{ + uLong offset_curfile;/* relative offset of local header 4 bytes */ +} unz_file_info_internal; + + +/* file_in_zip_read_info_s contain internal information about a file in zipfile, + when reading and decompress it */ +typedef struct +{ + char *read_buffer; /* internal buffer for compressed data */ + z_stream stream; /* zLib stream structure for inflate */ + + uLong pos_in_zipfile; /* position in byte on the zipfile, for fseek*/ + uLong stream_initialised; /* flag set if stream structure is initialised*/ + + uLong offset_local_extrafield;/* offset of the local extra field */ + uInt size_local_extrafield;/* size of the local extra field */ + uLong pos_local_extrafield; /* position in the local extra field in read*/ + + uLong crc32; /* crc32 of all data uncompressed */ + uLong crc32_wait; /* crc32 we must obtain after decompress all */ + uLong rest_read_compressed; /* number of byte to be decompressed */ + uLong rest_read_uncompressed;/*number of byte to be obtained after decomp*/ + zlib_filefunc_def z_filefunc; + voidpf filestream; /* io structore of the zipfile */ + uLong compression_method; /* compression method (0==store) */ + uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + int raw; +} file_in_zip_read_info_s; + + +/* unz_s contain internal information about the zipfile +*/ +typedef struct +{ + zlib_filefunc_def z_filefunc; + voidpf filestream; /* io structore of the zipfile */ + unz_global_info gi; /* public global information */ + uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + uLong num_file; /* number of the current file in the zipfile*/ + uLong pos_in_central_dir; /* pos of the current file in the central dir*/ + uLong current_file_ok; /* flag about the usability of the current file*/ + uLong central_pos; /* position of the beginning of the central dir*/ + + uLong size_central_dir; /* size of the central directory */ + uLong offset_central_dir; /* offset of start of central directory with + respect to the starting disk number */ + + unz_file_info cur_file_info; /* public info about the current file in zip*/ + unz_file_info_internal cur_file_info_internal; /* private info about it*/ + file_in_zip_read_info_s* pfile_in_zip_read; /* structure about the current + file if we are decompressing it */ + int encrypted; +# ifndef NOUNCRYPT + unsigned long keys[3]; /* keys defining the pseudo-random sequence */ + const unsigned long* pcrc_32_tab; +# endif +} unz_s; + + +#ifndef NOUNCRYPT +#include "crypt.h" +#endif + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ + + +local int unzlocal_getByte OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + int *pi)); + +local int unzlocal_getByte(pzlib_filefunc_def,filestream,pi) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + int *pi; +{ + unsigned char c; + int err = (int)ZREAD(*pzlib_filefunc_def,filestream,&c,1); + if (err==1) + { + *pi = (int)c; + return UNZ_OK; + } + else + { + if (ZERROR(*pzlib_filefunc_def,filestream)) + return UNZ_ERRNO; + else + return UNZ_EOF; + } +} + + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ +local int unzlocal_getShort OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int unzlocal_getShort (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==UNZ_OK) + *pX = x; + else + *pX = 0; + return err; +} + +local int unzlocal_getLong OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int unzlocal_getLong (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<16; + + if (err==UNZ_OK) + err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<24; + + if (err==UNZ_OK) + *pX = x; + else + *pX = 0; + return err; +} + + +/* My own strcmpi / strcasecmp */ +local int strcmpcasenosensitive_internal (fileName1,fileName2) + const char* fileName1; + const char* fileName2; +{ + for (;;) + { + char c1=*(fileName1++); + char c2=*(fileName2++); + if ((c1>='a') && (c1<='z')) + c1 -= 0x20; + if ((c2>='a') && (c2<='z')) + c2 -= 0x20; + if (c1=='\0') + return ((c2=='\0') ? 0 : -1); + if (c2=='\0') + return 1; + if (c1<c2) + return -1; + if (c1>c2) + return 1; + } +} + + +#ifdef CASESENSITIVITYDEFAULT_NO +#define CASESENSITIVITYDEFAULTVALUE 2 +#else +#define CASESENSITIVITYDEFAULTVALUE 1 +#endif + +#ifndef STRCMPCASENOSENTIVEFUNCTION +#define STRCMPCASENOSENTIVEFUNCTION strcmpcasenosensitive_internal +#endif + +/* + Compare two filename (fileName1,fileName2). + If iCaseSenisivity = 1, comparision is case sensitivity (like strcmp) + If iCaseSenisivity = 2, comparision is not case sensitivity (like strcmpi + or strcasecmp) + If iCaseSenisivity = 0, case sensitivity is defaut of your operating system + (like 1 on Unix, 2 on Windows) + +*/ +extern int ZEXPORT unzStringFileNameCompare (fileName1,fileName2,iCaseSensitivity) + const char* fileName1; + const char* fileName2; + int iCaseSensitivity; +{ + if (iCaseSensitivity==0) + iCaseSensitivity=CASESENSITIVITYDEFAULTVALUE; + + if (iCaseSensitivity==1) + return strcmp(fileName1,fileName2); + + return STRCMPCASENOSENTIVEFUNCTION(fileName1,fileName2); +} + +#ifndef BUFREADCOMMENT +#define BUFREADCOMMENT (0x400) +#endif + +/* + Locate the Central directory of a zipfile (at the end, just before + the global comment) +*/ +local uLong unzlocal_SearchCentralDir OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream)); + +local uLong unzlocal_SearchCentralDir(pzlib_filefunc_def,filestream) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; +{ + unsigned char* buf; + uLong uSizeFile; + uLong uBackRead; + uLong uMaxBack=0xffff; /* maximum size of global comment */ + uLong uPosFound=0; + + if (ZSEEK(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return 0; + + + uSizeFile = ZTELL(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return 0; + + uBackRead = 4; + while (uBackRead<uMaxBack) + { + uLong uReadSize,uReadPos ; + int i; + if (uBackRead+BUFREADCOMMENT>uMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uSizeFile-uReadPos); + if (ZSEEK(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) + { + uPosFound = uReadPos+i; + break; + } + + if (uPosFound!=0) + break; + } + TRYFREE(buf); + return uPosFound; +} + +/* + Open a Zip file. path contain the full pathname (by example, + on a Windows NT computer "c:\\test\\zlib114.zip" or on an Unix computer + "zlib/zlib114.zip". + If the zipfile cannot be opened (file doesn't exist or in not valid), the + return value is NULL. + Else, the return value is a unzFile Handle, usable with other function + of this unzip package. +*/ +extern unzFile ZEXPORT unzOpen2 (path, pzlib_filefunc_def) + const char *path; + zlib_filefunc_def* pzlib_filefunc_def; +{ + unz_s us; + unz_s *s; + uLong central_pos,uL; + + uLong number_disk; /* number of the current dist, used for + spaning ZIP, unsupported, always 0*/ + uLong number_disk_with_CD; /* number the the disk with central dir, used + for spaning ZIP, unsupported, always 0*/ + uLong number_entry_CD; /* total number of entries in + the central dir + (same than number_entry on nospan) */ + + int err=UNZ_OK; + + if (unz_copyright[0]!=' ') + return NULL; + + if (pzlib_filefunc_def==NULL) + fill_fopen_filefunc(&us.z_filefunc); + else + us.z_filefunc = *pzlib_filefunc_def; + + us.filestream= (*(us.z_filefunc.zopen_file))(us.z_filefunc.opaque, + path, + ZLIB_FILEFUNC_MODE_READ | + ZLIB_FILEFUNC_MODE_EXISTING); + if (us.filestream==NULL) + return NULL; + + central_pos = unzlocal_SearchCentralDir(&us.z_filefunc,us.filestream); + if (central_pos==0) + err=UNZ_ERRNO; + + if (ZSEEK(us.z_filefunc, us.filestream, + central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + /* the signature, already checked */ + if (unzlocal_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of this disk */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_disk)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of the disk with the start of the central directory */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_disk_with_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central dir on this disk */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&us.gi.number_entry)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central dir */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_entry_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + if ((number_entry_CD!=us.gi.number_entry) || + (number_disk_with_CD!=0) || + (number_disk!=0)) + err=UNZ_BADZIPFILE; + + /* size of the central directory */ + if (unzlocal_getLong(&us.z_filefunc, us.filestream,&us.size_central_dir)!=UNZ_OK) + err=UNZ_ERRNO; + + /* offset of start of central directory with respect to the + starting disk number */ + if (unzlocal_getLong(&us.z_filefunc, us.filestream,&us.offset_central_dir)!=UNZ_OK) + err=UNZ_ERRNO; + + /* zipfile comment length */ + if (unzlocal_getShort(&us.z_filefunc, us.filestream,&us.gi.size_comment)!=UNZ_OK) + err=UNZ_ERRNO; + + if ((central_pos<us.offset_central_dir+us.size_central_dir) && + (err==UNZ_OK)) + err=UNZ_BADZIPFILE; + + if (err!=UNZ_OK) + { + ZCLOSE(us.z_filefunc, us.filestream); + return NULL; + } + + us.byte_before_the_zipfile = central_pos - + (us.offset_central_dir+us.size_central_dir); + us.central_pos = central_pos; + us.pfile_in_zip_read = NULL; + us.encrypted = 0; + + + s=(unz_s*)ALLOC(sizeof(unz_s)); + *s=us; + unzGoToFirstFile((unzFile)s); + return (unzFile)s; +} + + +extern unzFile ZEXPORT unzOpen (path) + const char *path; +{ + return unzOpen2(path, NULL); +} + +/* + Close a ZipFile opened with unzipOpen. + If there is files inside the .Zip opened with unzipOpenCurrentFile (see later), + these files MUST be closed with unzipCloseCurrentFile before call unzipClose. + return UNZ_OK if there is no problem. */ +extern int ZEXPORT unzClose (file) + unzFile file; +{ + unz_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + if (s->pfile_in_zip_read!=NULL) + unzCloseCurrentFile(file); + + ZCLOSE(s->z_filefunc, s->filestream); + TRYFREE(s); + return UNZ_OK; +} + + +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. */ +extern int ZEXPORT unzGetGlobalInfo (file,pglobal_info) + unzFile file; + unz_global_info *pglobal_info; +{ + unz_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + *pglobal_info=s->gi; + return UNZ_OK; +} + + +/* + Translate date/time from Dos format to tm_unz (readable more easilty) +*/ +local void unzlocal_DosDateToTmuDate (ulDosDate, ptm) + uLong ulDosDate; + tm_unz* ptm; +{ + uLong uDate; + uDate = (uLong)(ulDosDate>>16); + ptm->tm_mday = (uInt)(uDate&0x1f) ; + ptm->tm_mon = (uInt)((((uDate)&0x1E0)/0x20)-1) ; + ptm->tm_year = (uInt)(((uDate&0x0FE00)/0x0200)+1980) ; + + ptm->tm_hour = (uInt) ((ulDosDate &0xF800)/0x800); + ptm->tm_min = (uInt) ((ulDosDate&0x7E0)/0x20) ; + ptm->tm_sec = (uInt) (2*(ulDosDate&0x1f)) ; +} + +/* + Get Info about the current file in the zipfile, with internal only info +*/ +local int unzlocal_GetCurrentFileInfoInternal OF((unzFile file, + unz_file_info *pfile_info, + unz_file_info_internal + *pfile_info_internal, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize)); + +local int unzlocal_GetCurrentFileInfoInternal (file, + pfile_info, + pfile_info_internal, + szFileName, fileNameBufferSize, + extraField, extraFieldBufferSize, + szComment, commentBufferSize) + unzFile file; + unz_file_info *pfile_info; + unz_file_info_internal *pfile_info_internal; + char *szFileName; + uLong fileNameBufferSize; + void *extraField; + uLong extraFieldBufferSize; + char *szComment; + uLong commentBufferSize; +{ + unz_s* s; + unz_file_info file_info; + unz_file_info_internal file_info_internal; + int err=UNZ_OK; + uLong uMagic; + long lSeek=0; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (ZSEEK(s->z_filefunc, s->filestream, + s->pos_in_central_dir+s->byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + + /* we check the magic */ + if (err==UNZ_OK) + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK) + err=UNZ_ERRNO; + else if (uMagic!=0x02014b50) + err=UNZ_BADZIPFILE; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.version) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.version_needed) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.flag) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.compression_method) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.dosDate) != UNZ_OK) + err=UNZ_ERRNO; + + unzlocal_DosDateToTmuDate(file_info.dosDate,&file_info.tmu_date); + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.crc) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.compressed_size) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.uncompressed_size) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_filename) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_extra) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_comment) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.disk_num_start) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.internal_fa) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.external_fa) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info_internal.offset_curfile) != UNZ_OK) + err=UNZ_ERRNO; + + lSeek+=file_info.size_filename; + if ((err==UNZ_OK) && (szFileName!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_filename<fileNameBufferSize) + { + *(szFileName+file_info.size_filename)='\0'; + uSizeRead = file_info.size_filename; + } + else + uSizeRead = fileNameBufferSize; + + if ((file_info.size_filename>0) && (fileNameBufferSize>0)) + if (ZREAD(s->z_filefunc, s->filestream,szFileName,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek -= uSizeRead; + } + + + if ((err==UNZ_OK) && (extraField!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_file_extra<extraFieldBufferSize) + uSizeRead = file_info.size_file_extra; + else + uSizeRead = extraFieldBufferSize; + + if (lSeek!=0) + if (ZSEEK(s->z_filefunc, s->filestream,lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + if ((file_info.size_file_extra>0) && (extraFieldBufferSize>0)) + if (ZREAD(s->z_filefunc, s->filestream,extraField,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek += file_info.size_file_extra - uSizeRead; + } + else + lSeek+=file_info.size_file_extra; + + + if ((err==UNZ_OK) && (szComment!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_file_comment<commentBufferSize) + { + *(szComment+file_info.size_file_comment)='\0'; + uSizeRead = file_info.size_file_comment; + } + else + uSizeRead = commentBufferSize; + + if (lSeek!=0) + if (ZSEEK(s->z_filefunc, s->filestream,lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + if ((file_info.size_file_comment>0) && (commentBufferSize>0)) + if (ZREAD(s->z_filefunc, s->filestream,szComment,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek+=file_info.size_file_comment - uSizeRead; + } + else + lSeek+=file_info.size_file_comment; + + if ((err==UNZ_OK) && (pfile_info!=NULL)) + *pfile_info=file_info; + + if ((err==UNZ_OK) && (pfile_info_internal!=NULL)) + *pfile_info_internal=file_info_internal; + + return err; +} + + + +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. +*/ +extern int ZEXPORT unzGetCurrentFileInfo (file, + pfile_info, + szFileName, fileNameBufferSize, + extraField, extraFieldBufferSize, + szComment, commentBufferSize) + unzFile file; + unz_file_info *pfile_info; + char *szFileName; + uLong fileNameBufferSize; + void *extraField; + uLong extraFieldBufferSize; + char *szComment; + uLong commentBufferSize; +{ + return unzlocal_GetCurrentFileInfoInternal(file,pfile_info,NULL, + szFileName,fileNameBufferSize, + extraField,extraFieldBufferSize, + szComment,commentBufferSize); +} + +/* + Set the current file of the zipfile to the first file. + return UNZ_OK if there is no problem +*/ +extern int ZEXPORT unzGoToFirstFile (file) + unzFile file; +{ + int err=UNZ_OK; + unz_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + s->pos_in_central_dir=s->offset_central_dir; + s->num_file=0; + err=unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + +/* + Set the current file of the zipfile to the next file. + return UNZ_OK if there is no problem + return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest. +*/ +extern int ZEXPORT unzGoToNextFile (file) + unzFile file; +{ + unz_s* s; + int err; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + if (s->gi.number_entry != 0xffff) // 2^16 files overflow hack + if (s->num_file+1==s->gi.number_entry) + return UNZ_END_OF_LIST_OF_FILE; + + s->pos_in_central_dir += SIZECENTRALDIRITEM + s->cur_file_info.size_filename + + s->cur_file_info.size_file_extra + s->cur_file_info.size_file_comment ; + s->num_file++; + err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + + +/* + Try locate the file szFileName in the zipfile. + For the iCaseSensitivity signification, see unzipStringFileNameCompare + + return value : + UNZ_OK if the file is found. It becomes the current file. + UNZ_END_OF_LIST_OF_FILE if the file is not found +*/ +extern int ZEXPORT unzLocateFile (file, szFileName, iCaseSensitivity) + unzFile file; + const char *szFileName; + int iCaseSensitivity; +{ + unz_s* s; + int err; + + /* We remember the 'current' position in the file so that we can jump + * back there if we fail. + */ + unz_file_info cur_file_infoSaved; + unz_file_info_internal cur_file_info_internalSaved; + uLong num_fileSaved; + uLong pos_in_central_dirSaved; + + + if (file==NULL) + return UNZ_PARAMERROR; + + if (strlen(szFileName)>=UNZ_MAXFILENAMEINZIP) + return UNZ_PARAMERROR; + + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + + /* Save the current state */ + num_fileSaved = s->num_file; + pos_in_central_dirSaved = s->pos_in_central_dir; + cur_file_infoSaved = s->cur_file_info; + cur_file_info_internalSaved = s->cur_file_info_internal; + + err = unzGoToFirstFile(file); + + while (err == UNZ_OK) + { + char szCurrentFileName[UNZ_MAXFILENAMEINZIP+1]; + err = unzGetCurrentFileInfo(file,NULL, + szCurrentFileName,sizeof(szCurrentFileName)-1, + NULL,0,NULL,0); + if (err == UNZ_OK) + { + if (unzStringFileNameCompare(szCurrentFileName, + szFileName,iCaseSensitivity)==0) + return UNZ_OK; + err = unzGoToNextFile(file); + } + } + + /* We failed, so restore the state of the 'current file' to where we + * were. + */ + s->num_file = num_fileSaved ; + s->pos_in_central_dir = pos_in_central_dirSaved ; + s->cur_file_info = cur_file_infoSaved; + s->cur_file_info_internal = cur_file_info_internalSaved; + return err; +} + + +/* +/////////////////////////////////////////// +// Contributed by Ryan Haksi (mailto://cryogen@infoserve.net) +// I need random access +// +// Further optimization could be realized by adding an ability +// to cache the directory in memory. The goal being a single +// comprehensive file read to put the file I need in a memory. +*/ + +/* +typedef struct unz_file_pos_s +{ + uLong pos_in_zip_directory; // offset in file + uLong num_of_file; // # of file +} unz_file_pos; +*/ + +extern int ZEXPORT unzGetFilePos(file, file_pos) + unzFile file; + unz_file_pos* file_pos; +{ + unz_s* s; + + if (file==NULL || file_pos==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + + file_pos->pos_in_zip_directory = s->pos_in_central_dir; + file_pos->num_of_file = s->num_file; + + return UNZ_OK; +} + +extern int ZEXPORT unzGoToFilePos(file, file_pos) + unzFile file; + unz_file_pos* file_pos; +{ + unz_s* s; + int err; + + if (file==NULL || file_pos==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + /* jump to the right spot */ + s->pos_in_central_dir = file_pos->pos_in_zip_directory; + s->num_file = file_pos->num_of_file; + + /* set the current file */ + err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + /* return results */ + s->current_file_ok = (err == UNZ_OK); + return err; +} + +/* +// Unzip Helper Functions - should be here? +/////////////////////////////////////////// +*/ + +/* + Read the local header of the current zipfile + Check the coherency of the local header and info in the end of central + directory about this file + store in *piSizeVar the size of extra info in local header + (filename and size of extra field data) +*/ +local int unzlocal_CheckCurrentFileCoherencyHeader (s,piSizeVar, + poffset_local_extrafield, + psize_local_extrafield) + unz_s* s; + uInt* piSizeVar; + uLong *poffset_local_extrafield; + uInt *psize_local_extrafield; +{ + uLong uMagic,uData,uFlags; + uLong size_filename; + uLong size_extra_field; + int err=UNZ_OK; + + *piSizeVar = 0; + *poffset_local_extrafield = 0; + *psize_local_extrafield = 0; + + if (ZSEEK(s->z_filefunc, s->filestream,s->cur_file_info_internal.offset_curfile + + s->byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + + if (err==UNZ_OK) + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK) + err=UNZ_ERRNO; + else if (uMagic!=0x04034b50) + err=UNZ_BADZIPFILE; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) + err=UNZ_ERRNO; +/* + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.wVersion)) + err=UNZ_BADZIPFILE; +*/ + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uFlags) != UNZ_OK) + err=UNZ_ERRNO; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compression_method)) + err=UNZ_BADZIPFILE; + + if ((err==UNZ_OK) && (s->cur_file_info.compression_method!=0) && + (s->cur_file_info.compression_method!=Z_DEFLATED)) + err=UNZ_BADZIPFILE; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* date/time */ + err=UNZ_ERRNO; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* crc */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.crc) && + ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size compr */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compressed_size) && + ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size uncompr */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.uncompressed_size) && + ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&size_filename) != UNZ_OK) + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (size_filename!=s->cur_file_info.size_filename)) + err=UNZ_BADZIPFILE; + + *piSizeVar += (uInt)size_filename; + + if (unzlocal_getShort(&s->z_filefunc, s->filestream,&size_extra_field) != UNZ_OK) + err=UNZ_ERRNO; + *poffset_local_extrafield= s->cur_file_info_internal.offset_curfile + + SIZEZIPLOCALHEADER + size_filename; + *psize_local_extrafield = (uInt)size_extra_field; + + *piSizeVar += (uInt)size_extra_field; + + return err; +} + +/* + Open for reading data the current file in the zipfile. + If there is no error and the file is opened, the return value is UNZ_OK. +*/ +extern int ZEXPORT unzOpenCurrentFile3 (file, method, level, raw, password) + unzFile file; + int* method; + int* level; + int raw; + const char* password; +{ + int err=UNZ_OK; + uInt iSizeVar; + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + uLong offset_local_extrafield; /* offset of the local extra field */ + uInt size_local_extrafield; /* size of the local extra field */ +# ifndef NOUNCRYPT + char source[12]; +# else + if (password != NULL) + return UNZ_PARAMERROR; +# endif + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return UNZ_PARAMERROR; + + if (s->pfile_in_zip_read != NULL) + unzCloseCurrentFile(file); + + if (unzlocal_CheckCurrentFileCoherencyHeader(s,&iSizeVar, + &offset_local_extrafield,&size_local_extrafield)!=UNZ_OK) + return UNZ_BADZIPFILE; + + pfile_in_zip_read_info = (file_in_zip_read_info_s*) + ALLOC(sizeof(file_in_zip_read_info_s)); + if (pfile_in_zip_read_info==NULL) + return UNZ_INTERNALERROR; + + pfile_in_zip_read_info->read_buffer=(char*)ALLOC(UNZ_BUFSIZE); + pfile_in_zip_read_info->offset_local_extrafield = offset_local_extrafield; + pfile_in_zip_read_info->size_local_extrafield = size_local_extrafield; + pfile_in_zip_read_info->pos_local_extrafield=0; + pfile_in_zip_read_info->raw=raw; + + if (pfile_in_zip_read_info->read_buffer==NULL) + { + TRYFREE(pfile_in_zip_read_info); + return UNZ_INTERNALERROR; + } + + pfile_in_zip_read_info->stream_initialised=0; + + if (method!=NULL) + *method = (int)s->cur_file_info.compression_method; + + if (level!=NULL) + { + *level = 6; + switch (s->cur_file_info.flag & 0x06) + { + case 6 : *level = 1; break; + case 4 : *level = 2; break; + case 2 : *level = 9; break; + } + } + + if ((s->cur_file_info.compression_method!=0) && + (s->cur_file_info.compression_method!=Z_DEFLATED)) + err=UNZ_BADZIPFILE; + + pfile_in_zip_read_info->crc32_wait=s->cur_file_info.crc; + pfile_in_zip_read_info->crc32=0; + pfile_in_zip_read_info->compression_method = + s->cur_file_info.compression_method; + pfile_in_zip_read_info->filestream=s->filestream; + pfile_in_zip_read_info->z_filefunc=s->z_filefunc; + pfile_in_zip_read_info->byte_before_the_zipfile=s->byte_before_the_zipfile; + + pfile_in_zip_read_info->stream.total_out = 0; + + if ((s->cur_file_info.compression_method==Z_DEFLATED) && + (!raw)) + { + pfile_in_zip_read_info->stream.zalloc = (alloc_func)0; + pfile_in_zip_read_info->stream.zfree = (free_func)0; + pfile_in_zip_read_info->stream.opaque = (voidpf)0; + pfile_in_zip_read_info->stream.next_in = (voidpf)0; + pfile_in_zip_read_info->stream.avail_in = 0; + + err=inflateInit2(&pfile_in_zip_read_info->stream, -MAX_WBITS); + if (err == Z_OK) + pfile_in_zip_read_info->stream_initialised=1; + else + return err; + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. + * In unzip, i don't wait absolutely Z_STREAM_END because I known the + * size of both compressed and uncompressed data + */ + } + pfile_in_zip_read_info->rest_read_compressed = + s->cur_file_info.compressed_size ; + pfile_in_zip_read_info->rest_read_uncompressed = + s->cur_file_info.uncompressed_size ; + + + pfile_in_zip_read_info->pos_in_zipfile = + s->cur_file_info_internal.offset_curfile + SIZEZIPLOCALHEADER + + iSizeVar; + + pfile_in_zip_read_info->stream.avail_in = (uInt)0; + + s->pfile_in_zip_read = pfile_in_zip_read_info; + +# ifndef NOUNCRYPT + if (password != NULL) + { + int i; + s->pcrc_32_tab = get_crc_table(); + init_keys(password,s->keys,s->pcrc_32_tab); + if (ZSEEK(s->z_filefunc, s->filestream, + s->pfile_in_zip_read->pos_in_zipfile + + s->pfile_in_zip_read->byte_before_the_zipfile, + SEEK_SET)!=0) + return UNZ_INTERNALERROR; + if(ZREAD(s->z_filefunc, s->filestream,source, 12)<12) + return UNZ_INTERNALERROR; + + for (i = 0; i<12; i++) + zdecode(s->keys,s->pcrc_32_tab,source[i]); + + s->pfile_in_zip_read->pos_in_zipfile+=12; + s->encrypted=1; + } +# endif + + + return UNZ_OK; +} + +extern int ZEXPORT unzOpenCurrentFile (file) + unzFile file; +{ + return unzOpenCurrentFile3(file, NULL, NULL, 0, NULL); +} + +extern int ZEXPORT unzOpenCurrentFilePassword (file, password) + unzFile file; + const char* password; +{ + return unzOpenCurrentFile3(file, NULL, NULL, 0, password); +} + +extern int ZEXPORT unzOpenCurrentFile2 (file,method,level,raw) + unzFile file; + int* method; + int* level; + int raw; +{ + return unzOpenCurrentFile3(file, method, level, raw, NULL); +} + +/* + Read bytes from the current file. + buf contain buffer where data must be copied + len the size of buf. + + return the number of byte copied if somes bytes are copied + return 0 if the end of file was reached + return <0 with error code if there is an error + (UNZ_ERRNO for IO error, or zLib error for uncompress error) +*/ +extern int ZEXPORT unzReadCurrentFile (file, buf, len) + unzFile file; + voidp buf; + unsigned len; +{ + int err=UNZ_OK; + uInt iRead = 0; + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + + if ((pfile_in_zip_read_info->read_buffer == NULL)) + return UNZ_END_OF_LIST_OF_FILE; + if (len==0) + return 0; + + pfile_in_zip_read_info->stream.next_out = (Bytef*)buf; + + pfile_in_zip_read_info->stream.avail_out = (uInt)len; + + if (len>pfile_in_zip_read_info->rest_read_uncompressed) + pfile_in_zip_read_info->stream.avail_out = + (uInt)pfile_in_zip_read_info->rest_read_uncompressed; + + while (pfile_in_zip_read_info->stream.avail_out>0) + { + if ((pfile_in_zip_read_info->stream.avail_in==0) && + (pfile_in_zip_read_info->rest_read_compressed>0)) + { + uInt uReadThis = UNZ_BUFSIZE; + if (pfile_in_zip_read_info->rest_read_compressed<uReadThis) + uReadThis = (uInt)pfile_in_zip_read_info->rest_read_compressed; + if (uReadThis == 0) + return UNZ_EOF; + if (ZSEEK(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->pos_in_zipfile + + pfile_in_zip_read_info->byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + if (ZREAD(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->read_buffer, + uReadThis)!=uReadThis) + return UNZ_ERRNO; + + +# ifndef NOUNCRYPT + if(s->encrypted) + { + uInt i; + for(i=0;i<uReadThis;i++) + pfile_in_zip_read_info->read_buffer[i] = + zdecode(s->keys,s->pcrc_32_tab, + pfile_in_zip_read_info->read_buffer[i]); + } +# endif + + + pfile_in_zip_read_info->pos_in_zipfile += uReadThis; + + pfile_in_zip_read_info->rest_read_compressed-=uReadThis; + + pfile_in_zip_read_info->stream.next_in = + (Bytef*)pfile_in_zip_read_info->read_buffer; + pfile_in_zip_read_info->stream.avail_in = (uInt)uReadThis; + } + + if ((pfile_in_zip_read_info->compression_method==0) || (pfile_in_zip_read_info->raw)) + { + uInt uDoCopy,i ; + + if ((pfile_in_zip_read_info->stream.avail_in == 0) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + return (iRead==0) ? UNZ_EOF : iRead; + + if (pfile_in_zip_read_info->stream.avail_out < + pfile_in_zip_read_info->stream.avail_in) + uDoCopy = pfile_in_zip_read_info->stream.avail_out ; + else + uDoCopy = pfile_in_zip_read_info->stream.avail_in ; + + for (i=0;i<uDoCopy;i++) + *(pfile_in_zip_read_info->stream.next_out+i) = + *(pfile_in_zip_read_info->stream.next_in+i); + + pfile_in_zip_read_info->crc32 = crc32(pfile_in_zip_read_info->crc32, + pfile_in_zip_read_info->stream.next_out, + uDoCopy); + pfile_in_zip_read_info->rest_read_uncompressed-=uDoCopy; + pfile_in_zip_read_info->stream.avail_in -= uDoCopy; + pfile_in_zip_read_info->stream.avail_out -= uDoCopy; + pfile_in_zip_read_info->stream.next_out += uDoCopy; + pfile_in_zip_read_info->stream.next_in += uDoCopy; + pfile_in_zip_read_info->stream.total_out += uDoCopy; + iRead += uDoCopy; + } + else + { + uLong uTotalOutBefore,uTotalOutAfter; + const Bytef *bufBefore; + uLong uOutThis; + int flush=Z_SYNC_FLUSH; + + uTotalOutBefore = pfile_in_zip_read_info->stream.total_out; + bufBefore = pfile_in_zip_read_info->stream.next_out; + + /* + if ((pfile_in_zip_read_info->rest_read_uncompressed == + pfile_in_zip_read_info->stream.avail_out) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + flush = Z_FINISH; + */ + err=inflate(&pfile_in_zip_read_info->stream,flush); + + uTotalOutAfter = pfile_in_zip_read_info->stream.total_out; + uOutThis = uTotalOutAfter-uTotalOutBefore; + + pfile_in_zip_read_info->crc32 = + crc32(pfile_in_zip_read_info->crc32,bufBefore, + (uInt)(uOutThis)); + + pfile_in_zip_read_info->rest_read_uncompressed -= + uOutThis; + + iRead += (uInt)(uTotalOutAfter - uTotalOutBefore); + + if (err==Z_STREAM_END) + return (iRead==0) ? UNZ_EOF : iRead; + if (err!=Z_OK) + break; + } + } + + if (err==Z_OK) + return iRead; + return err; +} + + +/* + Give the current position in uncompressed data +*/ +extern z_off_t ZEXPORT unztell (file) + unzFile file; +{ + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + return (z_off_t)pfile_in_zip_read_info->stream.total_out; +} + + +/* + return 1 if the end of file was reached, 0 elsewhere +*/ +extern int ZEXPORT unzeof (file) + unzFile file; +{ + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + if (pfile_in_zip_read_info->rest_read_uncompressed == 0) + return 1; + else + return 0; +} + + + +/* + Read extra field from the current file (opened by unzOpenCurrentFile) + This is the local-header version of the extra field (sometimes, there is + more info in the local-header version than in the central-header) + + if buf==NULL, it return the size of the local extra field that can be read + + if buf!=NULL, len is the size of the buffer, the extra header is copied in + buf. + the return value is the number of bytes copied in buf, or (if <0) + the error code +*/ +extern int ZEXPORT unzGetLocalExtrafield (file,buf,len) + unzFile file; + voidp buf; + unsigned len; +{ + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + uInt read_now; + uLong size_to_read; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + size_to_read = (pfile_in_zip_read_info->size_local_extrafield - + pfile_in_zip_read_info->pos_local_extrafield); + + if (buf==NULL) + return (int)size_to_read; + + if (len>size_to_read) + read_now = (uInt)size_to_read; + else + read_now = (uInt)len ; + + if (read_now==0) + return 0; + + if (ZSEEK(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->offset_local_extrafield + + pfile_in_zip_read_info->pos_local_extrafield, + ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + if (ZREAD(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + buf,read_now)!=read_now) + return UNZ_ERRNO; + + return (int)read_now; +} + +/* + Close the file in zip opened with unzipOpenCurrentFile + Return UNZ_CRCERROR if all the file was read but the CRC is not good +*/ +extern int ZEXPORT unzCloseCurrentFile (file) + unzFile file; +{ + int err=UNZ_OK; + + unz_s* s; + file_in_zip_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + + if ((pfile_in_zip_read_info->rest_read_uncompressed == 0) && + (!pfile_in_zip_read_info->raw)) + { + if (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait) + err=UNZ_CRCERROR; + } + + + TRYFREE(pfile_in_zip_read_info->read_buffer); + pfile_in_zip_read_info->read_buffer = NULL; + if (pfile_in_zip_read_info->stream_initialised) + inflateEnd(&pfile_in_zip_read_info->stream); + + pfile_in_zip_read_info->stream_initialised = 0; + TRYFREE(pfile_in_zip_read_info); + + s->pfile_in_zip_read=NULL; + + return err; +} + + +/* + Get the global comment string of the ZipFile, in the szComment buffer. + uSizeBuf is the size of the szComment buffer. + return the number of byte copied or an error code <0 +*/ +extern int ZEXPORT unzGetGlobalComment (file, szComment, uSizeBuf) + unzFile file; + char *szComment; + uLong uSizeBuf; +{ + int err=UNZ_OK; + unz_s* s; + uLong uReadThis ; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + uReadThis = uSizeBuf; + if (uReadThis>s->gi.size_comment) + uReadThis = s->gi.size_comment; + + if (ZSEEK(s->z_filefunc,s->filestream,s->central_pos+22,ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + if (uReadThis>0) + { + *szComment='\0'; + if (ZREAD(s->z_filefunc,s->filestream,szComment,uReadThis)!=uReadThis) + return UNZ_ERRNO; + } + + if ((szComment != NULL) && (uSizeBuf > s->gi.size_comment)) + *(szComment+s->gi.size_comment)='\0'; + return (int)uReadThis; +} + +/* Additions by RX '2004 */ +extern uLong ZEXPORT unzGetOffset (file) + unzFile file; +{ + unz_s* s; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + if (!s->current_file_ok) + return 0; + if (s->gi.number_entry != 0 && s->gi.number_entry != 0xffff) + if (s->num_file==s->gi.number_entry) + return 0; + return s->pos_in_central_dir; +} + +extern int ZEXPORT unzSetOffset (file, pos) + unzFile file; + uLong pos; +{ + unz_s* s; + int err; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz_s*)file; + + s->pos_in_central_dir = pos; + s->num_file = s->gi.number_entry; /* hack */ + err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + diff --git a/src/minizip/unzip.h b/src/minizip/unzip.h new file mode 100644 index 0000000..cb6cb2e --- /dev/null +++ b/src/minizip/unzip.h @@ -0,0 +1,352 @@ +/* unzip.h -- IO for uncompress .zip files using zlib + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant + + This unzip package allow extract file from .ZIP file, compatible with PKZip 2.04g + WinZip, InfoZip tools and compatible. + Encryption and multi volume ZipFile (span) are not supported. + Old compressions used by old PKZip 1.x are not supported + + + I WAIT FEEDBACK at mail info@winimage.com + Visit also http://www.winimage.com/zLibDll/unzip.htm for evolution + + Condition of use and distribution are the same than zlib : + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + +*/ + +/* for more info about .ZIP format, see + http://www.info-zip.org/pub/infozip/doc/appnote-981119-iz.zip + http://www.info-zip.org/pub/infozip/doc/ + PkWare has also a specification at : + ftp://ftp.pkware.com/probdesc.zip +*/ + +#ifndef _unz_H +#define _unz_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#ifndef _ZLIBIOAPI_H +#include "ioapi.h" +#endif + +#if defined(STRICTUNZIP) || defined(STRICTZIPUNZIP) +/* like the STRICT of WIN32, we define a pointer that cannot be converted + from (void*) without cast */ +typedef struct TagunzFile__ { int unused; } unzFile__; +typedef unzFile__ *unzFile; +#else +typedef voidp unzFile; +#endif + + +#define UNZ_OK (0) +#define UNZ_END_OF_LIST_OF_FILE (-100) +#define UNZ_ERRNO (Z_ERRNO) +#define UNZ_EOF (0) +#define UNZ_PARAMERROR (-102) +#define UNZ_BADZIPFILE (-103) +#define UNZ_INTERNALERROR (-104) +#define UNZ_CRCERROR (-105) + +/* tm_unz contain date/time info */ +typedef struct tm_unz_s +{ + uInt tm_sec; /* seconds after the minute - [0,59] */ + uInt tm_min; /* minutes after the hour - [0,59] */ + uInt tm_hour; /* hours since midnight - [0,23] */ + uInt tm_mday; /* day of the month - [1,31] */ + uInt tm_mon; /* months since January - [0,11] */ + uInt tm_year; /* years - [1980..2044] */ +} tm_unz; + +/* unz_global_info structure contain global data about the ZIPfile + These data comes from the end of central dir */ +typedef struct unz_global_info_s +{ + uLong number_entry; /* total number of entries in + the central dir on this disk */ + uLong size_comment; /* size of the global comment of the zipfile */ +} unz_global_info; + + +/* unz_file_info contain information about a file in the zipfile */ +typedef struct unz_file_info_s +{ + uLong version; /* version made by 2 bytes */ + uLong version_needed; /* version needed to extract 2 bytes */ + uLong flag; /* general purpose bit flag 2 bytes */ + uLong compression_method; /* compression method 2 bytes */ + uLong dosDate; /* last mod file date in Dos fmt 4 bytes */ + uLong crc; /* crc-32 4 bytes */ + uLong compressed_size; /* compressed size 4 bytes */ + uLong uncompressed_size; /* uncompressed size 4 bytes */ + uLong size_filename; /* filename length 2 bytes */ + uLong size_file_extra; /* extra field length 2 bytes */ + uLong size_file_comment; /* file comment length 2 bytes */ + + uLong disk_num_start; /* disk number start 2 bytes */ + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ + + tm_unz tmu_date; +} unz_file_info; + +extern int ZEXPORT unzStringFileNameCompare OF ((const char* fileName1, + const char* fileName2, + int iCaseSensitivity)); +/* + Compare two filename (fileName1,fileName2). + If iCaseSenisivity = 1, comparision is case sensitivity (like strcmp) + If iCaseSenisivity = 2, comparision is not case sensitivity (like strcmpi + or strcasecmp) + If iCaseSenisivity = 0, case sensitivity is defaut of your operating system + (like 1 on Unix, 2 on Windows) +*/ + + +extern unzFile ZEXPORT unzOpen OF((const char *path)); +/* + Open a Zip file. path contain the full pathname (by example, + on a Windows XP computer "c:\\zlib\\zlib113.zip" or on an Unix computer + "zlib/zlib113.zip". + If the zipfile cannot be opened (file don't exist or in not valid), the + return value is NULL. + Else, the return value is a unzFile Handle, usable with other function + of this unzip package. +*/ + +extern unzFile ZEXPORT unzOpen2 OF((const char *path, + zlib_filefunc_def* pzlib_filefunc_def)); +/* + Open a Zip file, like unzOpen, but provide a set of file low level API + for read/write the zip file (see ioapi.h) +*/ + +extern int ZEXPORT unzClose OF((unzFile file)); +/* + Close a ZipFile opened with unzipOpen. + If there is files inside the .Zip opened with unzOpenCurrentFile (see later), + these files MUST be closed with unzipCloseCurrentFile before call unzipClose. + return UNZ_OK if there is no problem. */ + +extern int ZEXPORT unzGetGlobalInfo OF((unzFile file, + unz_global_info *pglobal_info)); +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. */ + + +extern int ZEXPORT unzGetGlobalComment OF((unzFile file, + char *szComment, + uLong uSizeBuf)); +/* + Get the global comment string of the ZipFile, in the szComment buffer. + uSizeBuf is the size of the szComment buffer. + return the number of byte copied or an error code <0 +*/ + + +/***************************************************************************/ +/* Unzip package allow you browse the directory of the zipfile */ + +extern int ZEXPORT unzGoToFirstFile OF((unzFile file)); +/* + Set the current file of the zipfile to the first file. + return UNZ_OK if there is no problem +*/ + +extern int ZEXPORT unzGoToNextFile OF((unzFile file)); +/* + Set the current file of the zipfile to the next file. + return UNZ_OK if there is no problem + return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest. +*/ + +extern int ZEXPORT unzLocateFile OF((unzFile file, + const char *szFileName, + int iCaseSensitivity)); +/* + Try locate the file szFileName in the zipfile. + For the iCaseSensitivity signification, see unzStringFileNameCompare + + return value : + UNZ_OK if the file is found. It becomes the current file. + UNZ_END_OF_LIST_OF_FILE if the file is not found +*/ + + +/* ****************************************** */ +/* Ryan supplied functions */ +/* unz_file_info contain information about a file in the zipfile */ +typedef struct unz_file_pos_s +{ + uLong pos_in_zip_directory; /* offset in zip file directory */ + uLong num_of_file; /* # of file */ +} unz_file_pos; + +extern int ZEXPORT unzGetFilePos( + unzFile file, + unz_file_pos* file_pos); + +extern int ZEXPORT unzGoToFilePos( + unzFile file, + unz_file_pos* file_pos); + +/* ****************************************** */ + +extern int ZEXPORT unzGetCurrentFileInfo OF((unzFile file, + unz_file_info *pfile_info, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize)); +/* + Get Info about the current file + if pfile_info!=NULL, the *pfile_info structure will contain somes info about + the current file + if szFileName!=NULL, the filemane string will be copied in szFileName + (fileNameBufferSize is the size of the buffer) + if extraField!=NULL, the extra field information will be copied in extraField + (extraFieldBufferSize is the size of the buffer). + This is the Central-header version of the extra field + if szComment!=NULL, the comment string of the file will be copied in szComment + (commentBufferSize is the size of the buffer) +*/ + +/***************************************************************************/ +/* for reading the content of the current zipfile, you can open it, read data + from it, and close it (you can close it before reading all the file) + */ + +extern int ZEXPORT unzOpenCurrentFile OF((unzFile file)); +/* + Open for reading data the current file in the zipfile. + If there is no error, the return value is UNZ_OK. +*/ + +extern int ZEXPORT unzOpenCurrentFilePassword OF((unzFile file, + const char* password)); +/* + Open for reading data the current file in the zipfile. + password is a crypting password + If there is no error, the return value is UNZ_OK. +*/ + +extern int ZEXPORT unzOpenCurrentFile2 OF((unzFile file, + int* method, + int* level, + int raw)); +/* + Same than unzOpenCurrentFile, but open for read raw the file (not uncompress) + if raw==1 + *method will receive method of compression, *level will receive level of + compression + note : you can set level parameter as NULL (if you did not want known level, + but you CANNOT set method parameter as NULL +*/ + +extern int ZEXPORT unzOpenCurrentFile3 OF((unzFile file, + int* method, + int* level, + int raw, + const char* password)); +/* + Same than unzOpenCurrentFile, but open for read raw the file (not uncompress) + if raw==1 + *method will receive method of compression, *level will receive level of + compression + note : you can set level parameter as NULL (if you did not want known level, + but you CANNOT set method parameter as NULL +*/ + + +extern int ZEXPORT unzCloseCurrentFile OF((unzFile file)); +/* + Close the file in zip opened with unzOpenCurrentFile + Return UNZ_CRCERROR if all the file was read but the CRC is not good +*/ + +extern int ZEXPORT unzReadCurrentFile OF((unzFile file, + voidp buf, + unsigned len)); +/* + Read bytes from the current file (opened by unzOpenCurrentFile) + buf contain buffer where data must be copied + len the size of buf. + + return the number of byte copied if somes bytes are copied + return 0 if the end of file was reached + return <0 with error code if there is an error + (UNZ_ERRNO for IO error, or zLib error for uncompress error) +*/ + +extern z_off_t ZEXPORT unztell OF((unzFile file)); +/* + Give the current position in uncompressed data +*/ + +extern int ZEXPORT unzeof OF((unzFile file)); +/* + return 1 if the end of file was reached, 0 elsewhere +*/ + +extern int ZEXPORT unzGetLocalExtrafield OF((unzFile file, + voidp buf, + unsigned len)); +/* + Read extra field from the current file (opened by unzOpenCurrentFile) + This is the local-header version of the extra field (sometimes, there is + more info in the local-header version than in the central-header) + + if buf==NULL, it return the size of the local extra field + + if buf!=NULL, len is the size of the buffer, the extra header is copied in + buf. + the return value is the number of bytes copied in buf, or (if <0) + the error code +*/ + +/***************************************************************************/ + +/* Get the current file offset */ +extern uLong ZEXPORT unzGetOffset (unzFile file); + +/* Set the current file offset */ +extern int ZEXPORT unzSetOffset (unzFile file, uLong pos); + + + +#ifdef __cplusplus +} +#endif + +#endif /* _unz_H */ diff --git a/src/minizip/zip.c b/src/minizip/zip.c new file mode 100644 index 0000000..7dc767f --- /dev/null +++ b/src/minizip/zip.c @@ -0,0 +1,1199 @@ +/* zip.c -- IO on .zip files using zlib + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant + + Read zip.h for more info +*/ + + +#ifndef _WIN32_WCE +#include <stdio.h> +#include <stdlib.h> +#else +#include <stdio.h> +#include "celib.h" +#endif +#include <string.h> +#include <time.h> +#include "zlib.h" +#include "zip.h" + +#ifdef STDC +# include <stddef.h> +# include <string.h> +# include <stdlib.h> +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include <errno.h> +#endif + + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +#ifndef VERSIONMADEBY +# define VERSIONMADEBY (0x0) /* platform depedent */ +#endif + +#ifndef Z_BUFSIZE +#define Z_BUFSIZE (16384) +#endif + +#ifndef Z_MAXFILENAMEINZIP +#define Z_MAXFILENAMEINZIP (256) +#endif + +#ifndef ALLOC +# define ALLOC(size) (malloc(size)) +#endif +#ifndef TRYFREE +# define TRYFREE(p) {if (p) free(p);} +#endif + +/* +#define SIZECENTRALDIRITEM (0x2e) +#define SIZEZIPLOCALHEADER (0x1e) +*/ + +/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */ + +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif + +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +#ifndef DEF_MEM_LEVEL +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +#endif +const char zip_copyright[] = + " zip 1.00 Copyright 1998-2003 Gilles Vollant - http://www.winimage.com/zLibDll"; + + +#define SIZEDATA_INDATABLOCK (4096-(4*4)) + +#define LOCALHEADERMAGIC (0x04034b50) +#define CENTRALHEADERMAGIC (0x02014b50) +#define ENDHEADERMAGIC (0x06054b50) + +#define FLAG_LOCALHEADER_OFFSET (0x06) +#define CRC_LOCALHEADER_OFFSET (0x0e) + +#define SIZECENTRALHEADER (0x2e) /* 46 */ + +typedef struct linkedlist_datablock_internal_s +{ + struct linkedlist_datablock_internal_s* next_datablock; + uLong avail_in_this_block; + uLong filled_in_this_block; + uLong unused; /* for future use and alignement */ + unsigned char data[SIZEDATA_INDATABLOCK]; +} linkedlist_datablock_internal; + +typedef struct linkedlist_data_s +{ + linkedlist_datablock_internal* first_block; + linkedlist_datablock_internal* last_block; +} linkedlist_data; + + +typedef struct +{ + z_stream stream; /* zLib stream structure for inflate */ + int stream_initialised; /* 1 is stream is initialised */ + uInt pos_in_buffered_data; /* last written byte in buffered_data */ + + uLong pos_local_header; /* offset of the local header of the file + currenty writing */ + char* central_header; /* central header data for the current file */ + uLong size_centralheader; /* size of the central header for cur file */ + uLong flag; /* flag of the file currently writing */ + + int method; /* compression method of file currenty wr.*/ + int raw; /* 1 for directly writing raw data */ + Byte buffered_data[Z_BUFSIZE];/* buffer contain compressed data to be writ*/ + uLong dosDate; + uLong crc32; + int encrypt; +#ifndef NOCRYPT + unsigned long keys[3]; /* keys defining the pseudo-random sequence */ + const unsigned long* pcrc_32_tab; + int crypt_header_size; +#endif +} curfile_info; + +typedef struct +{ + zlib_filefunc_def z_filefunc; + voidpf filestream; /* io structore of the zipfile */ + linkedlist_data central_dir;/* datablock with central dir in construction*/ + int in_opened_file_inzip; /* 1 if a file in the zip is currently writ.*/ + curfile_info ci; /* info on the file curretly writing */ + + uLong begin_pos; /* position of the beginning of the zipfile */ + uLong add_position_when_writting_offset; + uLong number_entry; +} zip_internal; + + + +#ifndef NOCRYPT +#define INCLUDECRYPTINGCODE_IFCRYPTALLOWED +#include "crypt.h" +#endif + +local linkedlist_datablock_internal* allocate_new_datablock() +{ + linkedlist_datablock_internal* ldi; + ldi = (linkedlist_datablock_internal*) + ALLOC(sizeof(linkedlist_datablock_internal)); + if (ldi!=NULL) + { + ldi->next_datablock = NULL ; + ldi->filled_in_this_block = 0 ; + ldi->avail_in_this_block = SIZEDATA_INDATABLOCK ; + } + return ldi; +} + +local void free_datablock(ldi) + linkedlist_datablock_internal* ldi; +{ + while (ldi!=NULL) + { + linkedlist_datablock_internal* ldinext = ldi->next_datablock; + TRYFREE(ldi); + ldi = ldinext; + } +} + +local void init_linkedlist(ll) + linkedlist_data* ll; +{ + ll->first_block = ll->last_block = NULL; +} + +local void free_linkedlist(ll) + linkedlist_data* ll; +{ + free_datablock(ll->first_block); + ll->first_block = ll->last_block = NULL; +} + + +local int add_data_in_datablock(ll,buf,len) + linkedlist_data* ll; + const void* buf; + uLong len; +{ + linkedlist_datablock_internal* ldi; + const unsigned char* from_copy; + + if (ll==NULL) + return ZIP_INTERNALERROR; + + if (ll->last_block == NULL) + { + ll->first_block = ll->last_block = allocate_new_datablock(); + if (ll->first_block == NULL) + return ZIP_INTERNALERROR; + } + + ldi = ll->last_block; + from_copy = (unsigned char*)buf; + + while (len>0) + { + uInt copy_this; + uInt i; + unsigned char* to_copy; + + if (ldi->avail_in_this_block==0) + { + ldi->next_datablock = allocate_new_datablock(); + if (ldi->next_datablock == NULL) + return ZIP_INTERNALERROR; + ldi = ldi->next_datablock ; + ll->last_block = ldi; + } + + if (ldi->avail_in_this_block < len) + copy_this = (uInt)ldi->avail_in_this_block; + else + copy_this = (uInt)len; + + to_copy = &(ldi->data[ldi->filled_in_this_block]); + + for (i=0;i<copy_this;i++) + *(to_copy+i)=*(from_copy+i); + + ldi->filled_in_this_block += copy_this; + ldi->avail_in_this_block -= copy_this; + from_copy += copy_this ; + len -= copy_this; + } + return ZIP_OK; +} + + + +/****************************************************************************/ + +#ifndef NO_ADDFILEINEXISTINGZIP +/* =========================================================================== + Inputs a long in LSB order to the given file + nbByte == 1, 2 or 4 (byte, short or long) +*/ + +local int ziplocal_putValue OF((const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, uLong x, int nbByte)); +local int ziplocal_putValue (pzlib_filefunc_def, filestream, x, nbByte) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong x; + int nbByte; +{ + unsigned char buf[4]; + int n; + for (n = 0; n < nbByte; n++) { + buf[n] = (unsigned char)(x & 0xff); + x >>= 8; + } + if (x != 0) { // data overflow - hack for ZIP64 + for (n = 0; n < nbByte; n++) { + buf[n] = 0xff; + } + } + if (ZWRITE(*pzlib_filefunc_def,filestream,buf,nbByte)!=(uLong)nbByte) + return ZIP_ERRNO; + else + return ZIP_OK; +} + +local void ziplocal_putValue_inmemory OF((void* dest, uLong x, int nbByte)); +local void ziplocal_putValue_inmemory (dest, x, nbByte) + void* dest; + uLong x; + int nbByte; +{ + unsigned char* buf=(unsigned char*)dest; + int n; + for (n = 0; n < nbByte; n++) { + buf[n] = (unsigned char)(x & 0xff); + x >>= 8; + } + if (x != 0) { // data overflow - hack for ZIP64 + for (n = 0; n < nbByte; n++) { + buf[n] = 0xff; + } + } +} +/****************************************************************************/ + + +local uLong ziplocal_TmzDateToDosDate(ptm,dosDate) + const tm_zip* ptm; + uLong dosDate; +{ + uLong year = (uLong)ptm->tm_year; + if (year>1980) + year-=1980; + else if (year>80) + year-=80; + return + (uLong) (((ptm->tm_mday) + (32 * (ptm->tm_mon+1)) + (512 * year)) << 16) | + ((ptm->tm_sec/2) + (32* ptm->tm_min) + (2048 * (uLong)ptm->tm_hour)); +} + + +/****************************************************************************/ + +local int ziplocal_getByte OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + int *pi)); + +local int ziplocal_getByte(pzlib_filefunc_def,filestream,pi) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + int *pi; +{ + unsigned char c; + int err = (int)ZREAD(*pzlib_filefunc_def,filestream,&c,1); + if (err==1) + { + *pi = (int)c; + return ZIP_OK; + } + else + { + if (ZERROR(*pzlib_filefunc_def,filestream)) + return ZIP_ERRNO; + else + return ZIP_EOF; + } +} + + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ +local int ziplocal_getShort OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int ziplocal_getShort (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + return err; +} + +local int ziplocal_getLong OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX)); + +local int ziplocal_getLong (pzlib_filefunc_def,filestream,pX) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; + uLong *pX; +{ + uLong x ; + int i; + int err; + + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<16; + + if (err==ZIP_OK) + err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<24; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + return err; +} + +#ifndef BUFREADCOMMENT +#define BUFREADCOMMENT (0x400) +#endif +/* + Locate the Central directory of a zipfile (at the end, just before + the global comment) +*/ +local uLong ziplocal_SearchCentralDir OF(( + const zlib_filefunc_def* pzlib_filefunc_def, + voidpf filestream)); + +local uLong ziplocal_SearchCentralDir(pzlib_filefunc_def,filestream) + const zlib_filefunc_def* pzlib_filefunc_def; + voidpf filestream; +{ + unsigned char* buf; + uLong uSizeFile; + uLong uBackRead; + uLong uMaxBack=0xffff; /* maximum size of global comment */ + uLong uPosFound=0; + + if (ZSEEK(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return 0; + + + uSizeFile = ZTELL(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return 0; + + uBackRead = 4; + while (uBackRead<uMaxBack) + { + uLong uReadSize,uReadPos ; + int i; + if (uBackRead+BUFREADCOMMENT>uMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uSizeFile-uReadPos); + if (ZSEEK(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) + { + uPosFound = uReadPos+i; + break; + } + + if (uPosFound!=0) + break; + } + TRYFREE(buf); + return uPosFound; +} +#endif /* !NO_ADDFILEINEXISTINGZIP*/ + +/************************************************************/ +extern zipFile ZEXPORT zipOpen2 (pathname, append, globalcomment, pzlib_filefunc_def) + const char *pathname; + int append; + zipcharpc* globalcomment; + zlib_filefunc_def* pzlib_filefunc_def; +{ + zip_internal ziinit; + zip_internal* zi; + int err=ZIP_OK; + + + if (pzlib_filefunc_def==NULL) + fill_fopen_filefunc(&ziinit.z_filefunc); + else + ziinit.z_filefunc = *pzlib_filefunc_def; + + ziinit.filestream = (*(ziinit.z_filefunc.zopen_file)) + (ziinit.z_filefunc.opaque, + pathname, + (append == APPEND_STATUS_CREATE) ? + (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_CREATE) : + (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_EXISTING)); + + if (ziinit.filestream == NULL) + return NULL; + ziinit.begin_pos = ZTELL(ziinit.z_filefunc,ziinit.filestream); + ziinit.in_opened_file_inzip = 0; + ziinit.ci.stream_initialised = 0; + ziinit.number_entry = 0; + ziinit.add_position_when_writting_offset = 0; + init_linkedlist(&(ziinit.central_dir)); + + + zi = (zip_internal*)ALLOC(sizeof(zip_internal)); + if (zi==NULL) + { + ZCLOSE(ziinit.z_filefunc,ziinit.filestream); + return NULL; + } + + /* now we add file in a zipfile */ +# ifndef NO_ADDFILEINEXISTINGZIP + if (append == APPEND_STATUS_ADDINZIP) + { + uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + + uLong size_central_dir; /* size of the central directory */ + uLong offset_central_dir; /* offset of start of central directory */ + uLong central_pos,uL; + + uLong number_disk; /* number of the current dist, used for + spaning ZIP, unsupported, always 0*/ + uLong number_disk_with_CD; /* number the the disk with central dir, used + for spaning ZIP, unsupported, always 0*/ + uLong number_entry; + uLong number_entry_CD; /* total number of entries in + the central dir + (same than number_entry on nospan) */ + uLong size_comment; + + central_pos = ziplocal_SearchCentralDir(&ziinit.z_filefunc,ziinit.filestream); + if (central_pos==0) + err=ZIP_ERRNO; + + if (ZSEEK(ziinit.z_filefunc, ziinit.filestream, + central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=ZIP_ERRNO; + + /* the signature, already checked */ + if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&uL)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of this disk */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_disk)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of the disk with the start of the central directory */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_disk_with_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central dir on this disk */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_entry)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central dir */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_entry_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + if ((number_entry_CD!=number_entry) || + (number_disk_with_CD!=0) || + (number_disk!=0)) + err=ZIP_BADZIPFILE; + + /* size of the central directory */ + if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&size_central_dir)!=ZIP_OK) + err=ZIP_ERRNO; + + /* offset of start of central directory with respect to the + starting disk number */ + if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&offset_central_dir)!=ZIP_OK) + err=ZIP_ERRNO; + + /* zipfile comment length */ + if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&size_comment)!=ZIP_OK) + err=ZIP_ERRNO; + + if ((central_pos<offset_central_dir+size_central_dir) && + (err==ZIP_OK)) + err=ZIP_BADZIPFILE; + + if (err!=ZIP_OK) + { + ZCLOSE(ziinit.z_filefunc, ziinit.filestream); + return NULL; + } + + byte_before_the_zipfile = central_pos - + (offset_central_dir+size_central_dir); + ziinit.add_position_when_writting_offset = byte_before_the_zipfile ; + + { + uLong size_central_dir_to_read = size_central_dir; + size_t buf_size = SIZEDATA_INDATABLOCK; + void* buf_read = (void*)ALLOC(buf_size); + if (ZSEEK(ziinit.z_filefunc, ziinit.filestream, + offset_central_dir + byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET) != 0) + err=ZIP_ERRNO; + + while ((size_central_dir_to_read>0) && (err==ZIP_OK)) + { + uLong read_this = SIZEDATA_INDATABLOCK; + if (read_this > size_central_dir_to_read) + read_this = size_central_dir_to_read; + if (ZREAD(ziinit.z_filefunc, ziinit.filestream,buf_read,read_this) != read_this) + err=ZIP_ERRNO; + + if (err==ZIP_OK) + err = add_data_in_datablock(&ziinit.central_dir,buf_read, + (uLong)read_this); + size_central_dir_to_read-=read_this; + } + TRYFREE(buf_read); + } + ziinit.begin_pos = byte_before_the_zipfile; + ziinit.number_entry = number_entry_CD; + + if (ZSEEK(ziinit.z_filefunc, ziinit.filestream, + offset_central_dir+byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=ZIP_ERRNO; + } +# endif /* !NO_ADDFILEINEXISTINGZIP*/ + + if (err != ZIP_OK) + { + TRYFREE(zi); + return NULL; + } + else + { + *zi = ziinit; + return (zipFile)zi; + } +} + +extern zipFile ZEXPORT zipOpen (pathname, append) + const char *pathname; + int append; +{ + return zipOpen2(pathname,append,NULL,NULL); +} + +extern int ZEXPORT zipOpenNewFileInZip3 (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + windowBits, memLevel, strategy, + password, crcForCrypting) + zipFile file; + const char* filename; + const zip_fileinfo* zipfi; + const void* extrafield_local; + uInt size_extrafield_local; + const void* extrafield_global; + uInt size_extrafield_global; + const char* comment; + int method; + int level; + int raw; + int windowBits; + int memLevel; + int strategy; + const char* password; + uLong crcForCrypting; +{ + zip_internal* zi; + uInt size_filename; + uInt size_comment; + uInt i; + int err = ZIP_OK; + +# ifdef NOCRYPT + if (password != NULL) + return ZIP_PARAMERROR; +# endif + + if (file == NULL) + return ZIP_PARAMERROR; + if ((method!=0) && (method!=Z_DEFLATED)) + return ZIP_PARAMERROR; + + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 1) + { + err = zipCloseFileInZip (file); + if (err != ZIP_OK) + return err; + } + + + if (filename==NULL) + filename="-"; + + if (comment==NULL) + size_comment = 0; + else + size_comment = strlen(comment); + + size_filename = strlen(filename); + + if (zipfi == NULL) + zi->ci.dosDate = 0; + else + { + if (zipfi->dosDate != 0) + zi->ci.dosDate = zipfi->dosDate; + else zi->ci.dosDate = ziplocal_TmzDateToDosDate(&zipfi->tmz_date,zipfi->dosDate); + } + + zi->ci.flag = 0; + if ((level==8) || (level==9)) + zi->ci.flag |= 2; + if ((level==2)) + zi->ci.flag |= 4; + if ((level==1)) + zi->ci.flag |= 6; + if (password != NULL) + zi->ci.flag |= 1; + + zi->ci.crc32 = 0; + zi->ci.method = method; + zi->ci.encrypt = 0; + zi->ci.stream_initialised = 0; + zi->ci.pos_in_buffered_data = 0; + zi->ci.raw = raw; + zi->ci.pos_local_header = ZTELL(zi->z_filefunc,zi->filestream) ; + zi->ci.size_centralheader = SIZECENTRALHEADER + size_filename + + size_extrafield_global + size_comment; + zi->ci.central_header = (char*)ALLOC((uInt)zi->ci.size_centralheader); + + ziplocal_putValue_inmemory(zi->ci.central_header,(uLong)CENTRALHEADERMAGIC,4); + /* version info */ + ziplocal_putValue_inmemory(zi->ci.central_header+4,(uLong)VERSIONMADEBY,2); + ziplocal_putValue_inmemory(zi->ci.central_header+6,(uLong)20,2); + ziplocal_putValue_inmemory(zi->ci.central_header+8,(uLong)zi->ci.flag,2); + ziplocal_putValue_inmemory(zi->ci.central_header+10,(uLong)zi->ci.method,2); + ziplocal_putValue_inmemory(zi->ci.central_header+12,(uLong)zi->ci.dosDate,4); + ziplocal_putValue_inmemory(zi->ci.central_header+16,(uLong)0,4); /*crc*/ + ziplocal_putValue_inmemory(zi->ci.central_header+20,(uLong)0,4); /*compr size*/ + ziplocal_putValue_inmemory(zi->ci.central_header+24,(uLong)0,4); /*uncompr size*/ + ziplocal_putValue_inmemory(zi->ci.central_header+28,(uLong)size_filename,2); + ziplocal_putValue_inmemory(zi->ci.central_header+30,(uLong)size_extrafield_global,2); + ziplocal_putValue_inmemory(zi->ci.central_header+32,(uLong)size_comment,2); + ziplocal_putValue_inmemory(zi->ci.central_header+34,(uLong)0,2); /*disk nm start*/ + + if (zipfi==NULL) + ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)0,2); + else + ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)zipfi->internal_fa,2); + + if (zipfi==NULL) + ziplocal_putValue_inmemory(zi->ci.central_header+38,(uLong)0,4); + else + ziplocal_putValue_inmemory(zi->ci.central_header+38,(uLong)zipfi->external_fa,4); + + ziplocal_putValue_inmemory(zi->ci.central_header+42,(uLong)zi->ci.pos_local_header- zi->add_position_when_writting_offset,4); + + for (i=0;i<size_filename;i++) + *(zi->ci.central_header+SIZECENTRALHEADER+i) = *(filename+i); + + for (i=0;i<size_extrafield_global;i++) + *(zi->ci.central_header+SIZECENTRALHEADER+size_filename+i) = + *(((const char*)extrafield_global)+i); + + for (i=0;i<size_comment;i++) + *(zi->ci.central_header+SIZECENTRALHEADER+size_filename+ + size_extrafield_global+i) = *(comment+i); + if (zi->ci.central_header == NULL) + return ZIP_INTERNALERROR; + + /* write the local header */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)LOCALHEADERMAGIC,4); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)20,2);/* version needed to extract */ + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.flag,2); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.method,2); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.dosDate,4); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* crc 32, unknown */ + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* compressed size, unknown */ + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* uncompressed size, unknown */ + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_filename,2); + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_extrafield_local,2); + + if ((err==ZIP_OK) && (size_filename>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream,filename,size_filename)!=size_filename) + err = ZIP_ERRNO; + + if ((err==ZIP_OK) && (size_extrafield_local>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream,extrafield_local,size_extrafield_local) + !=size_extrafield_local) + err = ZIP_ERRNO; + + zi->ci.stream.avail_in = (uInt)0; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + zi->ci.stream.total_in = 0; + zi->ci.stream.total_out = 0; + + if ((err==ZIP_OK) && (zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + zi->ci.stream.zalloc = (alloc_func)0; + zi->ci.stream.zfree = (free_func)0; + zi->ci.stream.opaque = (voidpf)0; + + if (windowBits>0) + windowBits = -windowBits; + + err = deflateInit2(&zi->ci.stream, level, + Z_DEFLATED, windowBits, memLevel, strategy); + + if (err==Z_OK) + zi->ci.stream_initialised = 1; + } +# ifndef NOCRYPT + zi->ci.crypt_header_size = 0; + if ((err==Z_OK) && (password != NULL)) + { + unsigned char bufHead[RAND_HEAD_LEN]; + unsigned int sizeHead; + zi->ci.encrypt = 1; + zi->ci.pcrc_32_tab = get_crc_table(); + /*init_keys(password,zi->ci.keys,zi->ci.pcrc_32_tab);*/ + + sizeHead=crypthead(password,bufHead,RAND_HEAD_LEN,zi->ci.keys,zi->ci.pcrc_32_tab,crcForCrypting); + zi->ci.crypt_header_size = sizeHead; + + if (ZWRITE(zi->z_filefunc,zi->filestream,bufHead,sizeHead) != sizeHead) + err = ZIP_ERRNO; + } +# endif + + if (err==Z_OK) + zi->in_opened_file_inzip = 1; + return err; +} + +extern int ZEXPORT zipOpenNewFileInZip2(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw) + zipFile file; + const char* filename; + const zip_fileinfo* zipfi; + const void* extrafield_local; + uInt size_extrafield_local; + const void* extrafield_global; + uInt size_extrafield_global; + const char* comment; + int method; + int level; + int raw; +{ + return zipOpenNewFileInZip3 (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + NULL, 0); +} + +extern int ZEXPORT zipOpenNewFileInZip (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level) + zipFile file; + const char* filename; + const zip_fileinfo* zipfi; + const void* extrafield_local; + uInt size_extrafield_local; + const void* extrafield_global; + uInt size_extrafield_global; + const char* comment; + int method; + int level; +{ + return zipOpenNewFileInZip2 (file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, 0); +} + +local int zipFlushWriteBuffer(zi) + zip_internal* zi; +{ + int err=ZIP_OK; + + if (zi->ci.encrypt != 0) + { +#ifndef NOCRYPT + uInt i; + int t; + for (i=0;i<zi->ci.pos_in_buffered_data;i++) + zi->ci.buffered_data[i] = zencode(zi->ci.keys, zi->ci.pcrc_32_tab, + zi->ci.buffered_data[i],t); +#endif + } + if (ZWRITE(zi->z_filefunc,zi->filestream,zi->ci.buffered_data,zi->ci.pos_in_buffered_data) + !=zi->ci.pos_in_buffered_data) + err = ZIP_ERRNO; + zi->ci.pos_in_buffered_data = 0; + return err; +} + +extern int ZEXPORT zipWriteInFileInZip (file, buf, len) + zipFile file; + const void* buf; + unsigned len; +{ + zip_internal* zi; + int err=ZIP_OK; + + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 0) + return ZIP_PARAMERROR; + + zi->ci.stream.next_in = (void*)buf; + zi->ci.stream.avail_in = len; + zi->ci.crc32 = crc32(zi->ci.crc32,buf,len); + + while ((err==ZIP_OK) && (zi->ci.stream.avail_in>0)) + { + if (zi->ci.stream.avail_out == 0) + { + if (zipFlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + } + + + if(err != ZIP_OK) + break; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + uLong uTotalOutBefore = zi->ci.stream.total_out; + err=deflate(&zi->ci.stream, Z_NO_FLUSH); + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ; + + } + else + { + uInt copy_this,i; + if (zi->ci.stream.avail_in < zi->ci.stream.avail_out) + copy_this = zi->ci.stream.avail_in; + else + copy_this = zi->ci.stream.avail_out; + for (i=0;i<copy_this;i++) + *(((char*)zi->ci.stream.next_out)+i) = + *(((const char*)zi->ci.stream.next_in)+i); + { + zi->ci.stream.avail_in -= copy_this; + zi->ci.stream.avail_out-= copy_this; + zi->ci.stream.next_in+= copy_this; + zi->ci.stream.next_out+= copy_this; + zi->ci.stream.total_in+= copy_this; + zi->ci.stream.total_out+= copy_this; + zi->ci.pos_in_buffered_data += copy_this; + } + } + } + + return err; +} + +extern int ZEXPORT zipCloseFileInZipRaw (file, uncompressed_size, crc32) + zipFile file; + uLong uncompressed_size; + uLong crc32; +{ + zip_internal* zi; + uLong compressed_size; + int err=ZIP_OK; + + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 0) + return ZIP_PARAMERROR; + zi->ci.stream.avail_in = 0; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + while (err==ZIP_OK) + { + uLong uTotalOutBefore; + if (zi->ci.stream.avail_out == 0) + { + if (zipFlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + } + uTotalOutBefore = zi->ci.stream.total_out; + err=deflate(&zi->ci.stream, Z_FINISH); + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ; + } + + if (err==Z_STREAM_END) + err=ZIP_OK; /* this is normal */ + + if ((zi->ci.pos_in_buffered_data>0) && (err==ZIP_OK)) + if (zipFlushWriteBuffer(zi)==ZIP_ERRNO) + err = ZIP_ERRNO; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + err=deflateEnd(&zi->ci.stream); + zi->ci.stream_initialised = 0; + } + + if (!zi->ci.raw) + { + crc32 = (uLong)zi->ci.crc32; + uncompressed_size = (uLong)zi->ci.stream.total_in; + } + compressed_size = (uLong)zi->ci.stream.total_out; +# ifndef NOCRYPT + compressed_size += zi->ci.crypt_header_size; +# endif + + ziplocal_putValue_inmemory(zi->ci.central_header+16,crc32,4); /*crc*/ + ziplocal_putValue_inmemory(zi->ci.central_header+20, + compressed_size,4); /*compr size*/ + if (zi->ci.stream.data_type == Z_ASCII) + ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)Z_ASCII,2); + ziplocal_putValue_inmemory(zi->ci.central_header+24, + uncompressed_size,4); /*uncompr size*/ + + if (err==ZIP_OK) + err = add_data_in_datablock(&zi->central_dir,zi->ci.central_header, + (uLong)zi->ci.size_centralheader); + free(zi->ci.central_header); + + if (err==ZIP_OK) + { + long cur_pos_inzip = ZTELL(zi->z_filefunc,zi->filestream); + if (ZSEEK(zi->z_filefunc,zi->filestream, + zi->ci.pos_local_header + 14,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + + if (err==ZIP_OK) + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,crc32,4); /* crc 32, unknown */ + + if (err==ZIP_OK) /* compressed size, unknown */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,compressed_size,4); + + if (err==ZIP_OK) /* uncompressed size, unknown */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,uncompressed_size,4); + + if (ZSEEK(zi->z_filefunc,zi->filestream, + cur_pos_inzip,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + } + + zi->number_entry ++; + zi->in_opened_file_inzip = 0; + + return err; +} + +extern int ZEXPORT zipCloseFileInZip (file) + zipFile file; +{ + return zipCloseFileInZipRaw (file,0,0); +} + +extern int ZEXPORT zipClose (file, global_comment) + zipFile file; + const char* global_comment; +{ + zip_internal* zi; + int err = 0; + uLong size_centraldir = 0; + uLong centraldir_pos_inzip ; + uInt size_global_comment; + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->in_opened_file_inzip == 1) + { + err = zipCloseFileInZip (file); + } + + if (global_comment==NULL) + size_global_comment = 0; + else + size_global_comment = strlen(global_comment); + + + centraldir_pos_inzip = ZTELL(zi->z_filefunc,zi->filestream); + if (err==ZIP_OK) + { + linkedlist_datablock_internal* ldi = zi->central_dir.first_block ; + while (ldi!=NULL) + { + if ((err==ZIP_OK) && (ldi->filled_in_this_block>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream, + ldi->data,ldi->filled_in_this_block) + !=ldi->filled_in_this_block ) + err = ZIP_ERRNO; + + size_centraldir += ldi->filled_in_this_block; + ldi = ldi->next_datablock; + } + } + free_datablock(zi->central_dir.first_block); + + if (err==ZIP_OK) /* Magic End */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)ENDHEADERMAGIC,4); + + if (err==ZIP_OK) /* number of this disk */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2); + + if (err==ZIP_OK) /* number of the disk with the start of the central directory */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2); + + if (err==ZIP_OK) /* total number of entries in the central dir on this disk */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2); + + if (err==ZIP_OK) /* total number of entries in the central dir */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2); + + if (err==ZIP_OK) /* size of the central directory */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_centraldir,4); + + if (err==ZIP_OK) /* offset of start of central directory with respect to the + starting disk number */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream, + (uLong)(centraldir_pos_inzip - zi->add_position_when_writting_offset),4); + + if (err==ZIP_OK) /* zipfile comment length */ + err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_global_comment,2); + + if ((err==ZIP_OK) && (size_global_comment>0)) + if (ZWRITE(zi->z_filefunc,zi->filestream, + global_comment,size_global_comment) != size_global_comment) + err = ZIP_ERRNO; + + if (ZCLOSE(zi->z_filefunc,zi->filestream) != 0) + if (err == ZIP_OK) + err = ZIP_ERRNO; + + TRYFREE(zi); + + return err; +} + +extern int ZEXPORT zipFlush (file) + zipFile file; +{ + zip_internal* zi; + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip_internal*)file; + + if (zi->z_filefunc.zflush_file != NULL && zi->filestream != NULL) + return ZFLUSH(zi->z_filefunc,zi->filestream); + else + return EOF; +} diff --git a/src/minizip/zip.h b/src/minizip/zip.h new file mode 100644 index 0000000..d4337b7 --- /dev/null +++ b/src/minizip/zip.h @@ -0,0 +1,239 @@ +/* zip.h -- IO for compress .zip files using zlib + Version 1.00, September 10th, 2003 + + Copyright (C) 1998-2003 Gilles Vollant + + This unzip package allow creates .ZIP file, compatible with PKZip 2.04g + WinZip, InfoZip tools and compatible. + Encryption and multi volume ZipFile (span) are not supported. + Old compressions used by old PKZip 1.x are not supported + + For uncompress .zip file, look at unzip.h + + + I WAIT FEEDBACK at mail info@winimage.com + Visit also http://www.winimage.com/zLibDll/unzip.html for evolution + + Condition of use and distribution are the same than zlib : + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + +*/ + +/* for more info about .ZIP format, see + http://www.info-zip.org/pub/infozip/doc/appnote-981119-iz.zip + http://www.info-zip.org/pub/infozip/doc/ + PkWare has also a specification at : + ftp://ftp.pkware.com/probdesc.zip +*/ + +#ifndef _zip_H +#define _zip_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#ifndef _ZLIBIOAPI_H +#include "ioapi.h" +#endif + +#if defined(STRICTZIP) || defined(STRICTZIPUNZIP) +/* like the STRICT of WIN32, we define a pointer that cannot be converted + from (void*) without cast */ +typedef struct TagzipFile__ { int unused; } zipFile__; +typedef zipFile__ *zipFile; +#else +typedef voidp zipFile; +#endif + +#define ZIP_OK (0) +#define ZIP_EOF (0) +#define ZIP_ERRNO (Z_ERRNO) +#define ZIP_PARAMERROR (-102) +#define ZIP_BADZIPFILE (-103) +#define ZIP_INTERNALERROR (-104) + +#ifndef DEF_MEM_LEVEL +# if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +# else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +# endif +#endif +/* default memLevel */ + +/* tm_zip contain date/time info */ +typedef struct tm_zip_s +{ + uInt tm_sec; /* seconds after the minute - [0,59] */ + uInt tm_min; /* minutes after the hour - [0,59] */ + uInt tm_hour; /* hours since midnight - [0,23] */ + uInt tm_mday; /* day of the month - [1,31] */ + uInt tm_mon; /* months since January - [0,11] */ + uInt tm_year; /* years - [1980..2044] */ +} tm_zip; + +typedef struct +{ + tm_zip tmz_date; /* date in understandable format */ + uLong dosDate; /* if dos_date == 0, tmu_date is used */ +/* uLong flag; */ /* general purpose bit flag 2 bytes */ + + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ +} zip_fileinfo; + +typedef const char* zipcharpc; + + +#define APPEND_STATUS_CREATE (0) +#define APPEND_STATUS_CREATEAFTER (1) +#define APPEND_STATUS_ADDINZIP (2) + +extern zipFile ZEXPORT zipOpen OF((const char *pathname, int append)); +/* + Create a zipfile. + pathname contain on Windows XP a filename like "c:\\zlib\\zlib113.zip" or on + an Unix computer "zlib/zlib113.zip". + if the file pathname exist and append==APPEND_STATUS_CREATEAFTER, the zip + will be created at the end of the file. + (useful if the file contain a self extractor code) + if the file pathname exist and append==APPEND_STATUS_ADDINZIP, we will + add files in existing zip (be sure you don't add file that doesn't exist) + If the zipfile cannot be opened, the return value is NULL. + Else, the return value is a zipFile Handle, usable with other function + of this zip package. +*/ + +/* Note : there is no delete function into a zipfile. + If you want delete file into a zipfile, you must open a zipfile, and create another + Of couse, you can use RAW reading and writing to copy the file you did not want delte +*/ + +extern zipFile ZEXPORT zipOpen2 OF((const char *pathname, + int append, + zipcharpc* globalcomment, + zlib_filefunc_def* pzlib_filefunc_def)); + +extern int ZEXPORT zipOpenNewFileInZip OF((zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level)); +/* + Open a file in the ZIP for writing. + filename : the filename in zip (if NULL, '-' without quote will be used + *zipfi contain supplemental information + if extrafield_local!=NULL and size_extrafield_local>0, extrafield_local + contains the extrafield data the the local header + if extrafield_global!=NULL and size_extrafield_global>0, extrafield_global + contains the extrafield data the the local header + if comment != NULL, comment contain the comment string + method contain the compression method (0 for store, Z_DEFLATED for deflate) + level contain the level of compression (can be Z_DEFAULT_COMPRESSION) +*/ + + +extern int ZEXPORT zipOpenNewFileInZip2 OF((zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw)); + +/* + Same than zipOpenNewFileInZip, except if raw=1, we write raw file + */ + +extern int ZEXPORT zipOpenNewFileInZip3 OF((zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int windowBits, + int memLevel, + int strategy, + const char* password, + uLong crcForCtypting)); + +/* + Same than zipOpenNewFileInZip2, except + windowBits,memLevel,,strategy : see parameter strategy in deflateInit2 + password : crypting password (NULL for no crypting) + crcForCtypting : crc of file to compress (needed for crypting) + */ + + +extern int ZEXPORT zipWriteInFileInZip OF((zipFile file, + const void* buf, + unsigned len)); +/* + Write data in the zipfile +*/ + +extern int ZEXPORT zipCloseFileInZip OF((zipFile file)); +/* + Close the current file in the zipfile +*/ + +extern int ZEXPORT zipCloseFileInZipRaw OF((zipFile file, + uLong uncompressed_size, + uLong crc32)); +/* + Close the current file in the zipfile, for fiel opened with + parameter raw=1 in zipOpenNewFileInZip2 + uncompressed_size and crc32 are value for the uncompressed size +*/ + +extern int ZEXPORT zipClose OF((zipFile file, + const char* global_comment)); +/* + Close the zipfile +*/ + +extern int ZEXPORT zipFlush OF((zipFile file)); +/* + Flush the zipfile output +*/ + +#ifdef __cplusplus +} +#endif + +#endif /* _zip_H */ diff --git a/src/webhttrack b/src/webhttrack index 3b433bb..ca3f512 100755 --- a/src/webhttrack +++ b/src/webhttrack @@ -2,14 +2,16 @@ # # WebHTTrack launcher script # Initializes the htsserver GUI frontend and launch the default browser + BROWSEREXE= -SRCHBROWSEREXE="x-www-browser www-browser mozilla galeon konqueror opera netscape" +SRCHBROWSEREXE="x-www-browser www-browser mozilla firefox firebird galeon konqueror opera netscape" if test -n "${BROWSER}"; then # sensible-browser will f up if BROWSER is not set SRCHBROWSEREXE="sensible-browser ${SRCHBROWSEREXE}" fi -SRCHPATH="/usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack ${HOME}/usr/bin ${HOME}/bin" -SRCHDISTPATH="/usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share ${HOME}/usr/local ${HOME}/usr/share" +SRCHPATH="/usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack /sw/bin ${HOME}/usr/bin ${HOME}/bin" +SRCHPATH="$SRCHPATH "`echo $PATH | tr ":" " "` +SRCHDISTPATH="/usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share /sw ${HOME}/usr/local ${HOME}/usr/share" ### # And now some famous cuisine @@ -19,6 +21,74 @@ echo "$0($$): $@" >&2 return 0 } +function mozillabrowser { +# returns 0, if the browser is mozilla type +echo "$1" | grep -q "mozilla" +[ $? -eq 0 ] && return 0 +echo "$1" | grep -q "netscape" +[ $? -eq 0 ] && return 0 +echo "$1" | grep -q "firebird" +[ $? -eq 0 ] && return 0 +echo "$1" | grep -q "firefox" +[ $? -eq 0 ] && return 0 +return 1; +} +function mozillaloaded { +user_name=`logname 2>/dev/null` +if ! test -n "${user_name}"; then +user_name=`id -un` +fi +if test -n "${user_name}"; then +ps -e --user "$user_name" | grep -qE "(mozilla|netscape|firebird|firefox)" +else +false +fi +} + +function launch_browser { +log "launching $1" +start_t=`date +%s` +browser=$1 +url=$2 +moz= +if mozillaloaded; then +moz=1 +fi +# launch any browser +# if it is a mozilla like browser, check if the browser is running and use +# -remote if needed. Change the URL into openURL($url) too. +# (thanks to Torsten Werner for the patch) +# see http://www.mozilla.org/unix/remote.html +if mozillabrowser ${browser}; then + if ! ${browser} -remote "${url}"; then + log "spawning browser.." + ${browser} "${url}" + fi +else + log "spawning regular browser.." + ${browser} "${url}" +fi +# this is a real pain in the neck: browser can hiddenly use the -remote feature of +# mozilla and therefore return immediately +# this loop is the only reliable solution AFAIK +end_t=`date +%s` +if test -n "$start_t" -a -n "$end_t"; then + int_t=$[$end_t-$start_t] +else + int_t=0 +fi +if test -n "${int_t}" -a "${int_t}" -lt 60; then + if test -n "$moz"; then + log "waiting for browser to terminate.." + while mozillaloaded; do + sleep 3 + done + log "browser seems to have been closed.." + fi +fi +log "browser exited" +} + # First ensure that we can launch the server BINPATH= for i in ${SRCHPATH}; do @@ -45,6 +115,7 @@ LANGN=`grep "${HTSLANG}:" ${DISTPATH}/lang.indexes | cut -f2 -d':'` # Find the browser # note: not all systems have sensible-browser or www-browser alternative # thefeore, we have to find a bit more if sensible-browser could not be found + for i in ${SRCHBROWSEREXE}; do for j in ${SRCHPATH}; do if test -x ${j}/${i}; then @@ -58,12 +129,12 @@ test -n "$BROWSEREXE" || ! log "cound not find any suitable browser" || exit 1 # "browse" command if test "$1" = "browse"; then -${BROWSEREXE} "file://${HOME}/websites/index.html" +launch_browser "${BROWSEREXE}" "file://${HOME}/websites/index.html" exit $? fi # Create a temporary filename -TMPSRVFILE="/tmp/.webhttrack.$$.`/usr/bin/head -c16 /dev/random | /usr/bin/md5sum | /usr/bin/cut -f1 -d' '`" +TMPSRVFILE="/tmp/.webhttrack.$$.`head -c16 /dev/random | md5sum | cut -f1 -d' '`" >${TMPSRVFILE} || ! log "cound not create the temporary file ${TMPSRVFILE}" || exit 1 # Launch htsserver binary and setup the server (${BINPATH}/htsserver "${DISTPATH}/" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}& @@ -74,15 +145,15 @@ while ! test -n "$SRVURL"; do MAXCOUNT=$[$MAXCOUNT - 1] test $MAXCOUNT -gt 0 || exit 1 test $MAXCOUNT -lt 50 && echo "waiting for server to reply.." -SRVURL=`/bin/grep -E URL= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=` +SRVURL=`grep -E URL= ${TMPSRVFILE} | cut -f2- -d=` test ! "$SRVURL" = "error" || ! log "could not spawn htsserver" || exit 1 -test -n "$SRVURL" || /bin/sleep 1 +test -n "$SRVURL" || sleep 1 done # Cleanup function function cleanup { test -n "$1" && log "nasty signal caught, cleaning up.." -test -f ${TMPSRVFILE} && SRVPID=`/bin/grep -E PID= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=` +test -f ${TMPSRVFILE} && SRVPID=`grep -E PID= ${TMPSRVFILE} | cut -f2- -d=` test -n "${SRVPID}" && kill -9 ${SRVPID} test -f ${TMPSRVFILE} && rm ${TMPSRVFILE} test -n "$1" && log "..done" @@ -93,7 +164,7 @@ return 0 trap "cleanup now; exit" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25 # Got SRVURL, launch browser -${BROWSEREXE} "${SRVURL}" +launch_browser "${BROWSEREXE}" "${SRVURL}" # That's all, folks! trap "" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25 diff --git a/src/webhttrack.dsp b/src/webhttrack.dsp index a5940e8..60e8ff9 100755 --- a/src/webhttrack.dsp +++ b/src/webhttrack.dsp @@ -42,7 +42,7 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /c
-# ADD CPP /nologo /MD /W3 /GX /O2 /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /O2 /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /c
# SUBTRACT CPP /YX /Yc /Yu
# ADD BASE RSC /l 0x40c /d "NDEBUG"
# ADD RSC /l 0x40c /d "NDEBUG"
|