summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am12
-rw-r--r--src/Makefile.in159
-rw-r--r--src/hts-indextmpl.h19
-rw-r--r--src/htsalias.c26
-rw-r--r--src/htsalias.h3
-rw-r--r--src/htsback.c651
-rw-r--r--src/htsback.h19
-rw-r--r--src/htsbase.h62
-rw-r--r--src/htsbasenet.h31
-rw-r--r--src/htsbauth.c51
-rw-r--r--src/htsbauth.h7
-rw-r--r--src/htscache.c864
-rw-r--r--src/htscache.h9
-rw-r--r--src/htscatchurl.c15
-rw-r--r--src/htscatchurl.h5
-rw-r--r--src/htscore.c246
-rw-r--r--src/htscore.h63
-rw-r--r--src/htscoremain.c365
-rw-r--r--src/htscoremain.h4
-rw-r--r--src/htsdefines.h9
-rw-r--r--src/htsfilters.c6
-rw-r--r--src/htsfilters.h3
-rw-r--r--src/htsftp.c139
-rw-r--r--src/htsftp.h6
-rw-r--r--src/htsglobal.h109
-rw-r--r--src/htshash.c10
-rw-r--r--src/htshash.h3
-rw-r--r--src/htshelp.c40
-rw-r--r--src/htshelp.h3
-rw-r--r--src/htsindex.c9
-rw-r--r--src/htsindex.h3
-rw-r--r--src/htsinthash.c23
-rw-r--r--src/htsinthash.h7
-rw-r--r--src/htsjava.c11
-rw-r--r--src/htsjava.h4
-rw-r--r--src/htslib.c708
-rw-r--r--src/htslib.h214
-rw-r--r--src/htsmd5.c25
-rw-r--r--src/htsmd5.h3
-rw-r--r--src/htsmodules.c170
-rw-r--r--src/htsmodules.h10
-rw-r--r--src/htsname.c213
-rw-r--r--src/htsname.h3
-rw-r--r--src/htsnet.h14
-rw-r--r--src/htsnostatic.c5
-rw-r--r--src/htsnostatic.h75
-rw-r--r--src/htsopt.h31
-rw-r--r--src/htsparse.c4678
-rw-r--r--src/htsparse.h7
-rw-r--r--src/htsrobots.c5
-rw-r--r--src/htsrobots.h5
-rw-r--r--src/htsserver.c584
-rw-r--r--src/htsserver.h237
-rwxr-xr-xsrc/htsstrings.h138
-rw-r--r--src/htsthread.c160
-rw-r--r--src/htsthread.h27
-rw-r--r--src/htstools.c298
-rw-r--r--src/htstools.h55
-rw-r--r--src/htsweb.c34
-rw-r--r--src/htsweb.h14
-rw-r--r--src/htswizard.c45
-rw-r--r--src/htswizard.h6
-rw-r--r--src/htswrap.c3
-rw-r--r--src/htswrap.h3
-rw-r--r--src/htszlib.c68
-rw-r--r--src/htszlib.h36
-rw-r--r--src/httrack-library.h8
-rw-r--r--src/httrack.c90
-rw-r--r--src/httrack.h237
-rw-r--r--src/md5.h19
-rwxr-xr-xsrc/minizip/ChangeLogUnzip55
-rw-r--r--src/minizip/crypt.h132
-rw-r--r--src/minizip/ioapi.c196
-rw-r--r--src/minizip/ioapi.h78
-rw-r--r--src/minizip/iowin32.c275
-rw-r--r--src/minizip/iowin32.h21
-rw-r--r--src/minizip/mztools.c287
-rw-r--r--src/minizip/mztools.h31
-rw-r--r--src/minizip/unzip.c1591
-rw-r--r--src/minizip/unzip.h352
-rw-r--r--src/minizip/zip.c1199
-rw-r--r--src/minizip/zip.h239
-rwxr-xr-xsrc/webhttrack89
-rwxr-xr-xsrc/webhttrack.dsp2
84 files changed, 11710 insertions, 4061 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index c391d3f..bc74182 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -42,6 +42,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
htsmd5.c htszlib.c htsnostatic.c htswrap.c \
htsmodules.c \
md5.c \
+ minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
hts-indextmpl.h htsalias.h htsback.h htsbase.h \
htsbasenet.h htsbauth.h htscache.h htscatchurl.h \
htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
@@ -50,11 +51,16 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
htsmodules.h htsname.h htsnet.h htsnostatic.h \
htsopt.h htsrobots.h htssystem.h htsthread.h \
htstools.h htswizard.h htswrap.h htszlib.h \
- httrack-library.h md5.h
+ htsstrings.h httrack-library.h \
+ md5.h \
+ minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h
+
-libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS)
+libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS)
libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO)
EXTRA_DIST = httrack.h webhttrack \
httrack.dsp httrack.dsw \
- webhttrack.dsp webhttrack.dsw
+ webhttrack.dsp webhttrack.dsw \
+ minizip/ChangeLogUnzip minizip/iowin32.c minizip/iowin32.h
+
diff --git a/src/Makefile.in b/src/Makefile.in
index 22590c6..aa5da2a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -71,6 +71,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LDFLAGS = @LDFLAGS@
LFS_FLAG = @LFS_FLAG@
+LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LN_S = @LN_S@
@@ -179,6 +180,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
htsmd5.c htszlib.c htsnostatic.c htswrap.c \
htsmodules.c \
md5.c \
+ minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c \
hts-indextmpl.h htsalias.h htsback.h htsbase.h \
htsbasenet.h htsbauth.h htscache.h htscatchurl.h \
htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
@@ -187,15 +189,18 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
htsmodules.h htsname.h htsnet.h htsnostatic.h \
htsopt.h htsrobots.h htssystem.h htsthread.h \
htstools.h htswizard.h htswrap.h htszlib.h \
- httrack-library.h md5.h
+ htsstrings.h httrack-library.h \
+ md5.h \
+ minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h
-libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS)
+libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS)
libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO)
EXTRA_DIST = httrack.h webhttrack \
httrack.dsp httrack.dsw \
- webhttrack.dsp webhttrack.dsw
+ webhttrack.dsp webhttrack.dsw \
+ minizip/ChangeLogUnzip minizip/iowin32.c minizip/iowin32.h
subdir = src
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
@@ -209,7 +214,8 @@ am_libhttrack_la_OBJECTS = htscore.lo htsparse.lo htsback.lo htscache.lo \
htshelp.lo htsjava.lo htslib.lo htscoremain.lo htsname.lo \
htsrobots.lo htstools.lo htswizard.lo htsalias.lo htsthread.lo \
htsindex.lo htsbauth.lo htsmd5.lo htszlib.lo htsnostatic.lo \
- htswrap.lo htsmodules.lo md5.lo
+ htswrap.lo htsmodules.lo md5.lo ioapi.lo mztools.lo unzip.lo \
+ zip.lo
libhttrack_la_OBJECTS = $(am_libhttrack_la_OBJECTS)
bin_PROGRAMS = httrack$(EXEEXT) htsserver$(EXEEXT)
PROGRAMS = $(bin_PROGRAMS)
@@ -243,7 +249,9 @@ am__depfiles_maybe = depfiles
@AMDEP_TRUE@ ./$(DEPDIR)/htstools.Plo ./$(DEPDIR)/htsweb.Po \
@AMDEP_TRUE@ ./$(DEPDIR)/htswizard.Plo ./$(DEPDIR)/htswrap.Plo \
@AMDEP_TRUE@ ./$(DEPDIR)/htszlib.Plo ./$(DEPDIR)/httrack.Po \
-@AMDEP_TRUE@ ./$(DEPDIR)/md5.Plo
+@AMDEP_TRUE@ ./$(DEPDIR)/ioapi.Plo ./$(DEPDIR)/md5.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/mztools.Plo ./$(DEPDIR)/unzip.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/zip.Plo
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
@@ -294,6 +302,10 @@ clean-libLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
+ioapi.lo: minizip/ioapi.c
+mztools.lo: minizip/mztools.c
+unzip.lo: minizip/unzip.c
+zip.lo: minizip/zip.c
libhttrack.la: $(libhttrack_la_OBJECTS) $(libhttrack_la_DEPENDENCIES)
$(LINK) -rpath $(libdir) $(libhttrack_la_LDFLAGS) $(libhttrack_la_OBJECTS) $(libhttrack_la_LIBADD) $(LIBS)
binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
@@ -387,7 +399,11 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htswrap.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htszlib.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/httrack.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ioapi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mztools.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unzip.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zip.Plo@am__quote@
distclean-depend:
-rm -rf ./$(DEPDIR)
@@ -425,6 +441,138 @@ distclean-depend:
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+ioapi.o: minizip/ioapi.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ioapi.o -MD -MP -MF "$(DEPDIR)/ioapi.Tpo" \
+@am__fastdepCC_TRUE@ -c -o ioapi.o `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/ioapi.Tpo" "$(DEPDIR)/ioapi.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/ioapi.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/ioapi.c' object='ioapi.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/ioapi.Po' tmpdepfile='$(DEPDIR)/ioapi.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ioapi.o `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c
+
+ioapi.obj: minizip/ioapi.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ioapi.obj -MD -MP -MF "$(DEPDIR)/ioapi.Tpo" \
+@am__fastdepCC_TRUE@ -c -o ioapi.obj `if test -f 'minizip/ioapi.c'; then $(CYGPATH_W) 'minizip/ioapi.c'; else $(CYGPATH_W) '$(srcdir)/minizip/ioapi.c'`; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/ioapi.Tpo" "$(DEPDIR)/ioapi.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/ioapi.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/ioapi.c' object='ioapi.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/ioapi.Po' tmpdepfile='$(DEPDIR)/ioapi.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ioapi.obj `if test -f 'minizip/ioapi.c'; then $(CYGPATH_W) 'minizip/ioapi.c'; else $(CYGPATH_W) '$(srcdir)/minizip/ioapi.c'`
+
+ioapi.lo: minizip/ioapi.c
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ioapi.lo -MD -MP -MF "$(DEPDIR)/ioapi.Tpo" \
+@am__fastdepCC_TRUE@ -c -o ioapi.lo `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/ioapi.Tpo" "$(DEPDIR)/ioapi.Plo"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/ioapi.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/ioapi.c' object='ioapi.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/ioapi.Plo' tmpdepfile='$(DEPDIR)/ioapi.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ioapi.lo `test -f 'minizip/ioapi.c' || echo '$(srcdir)/'`minizip/ioapi.c
+
+mztools.o: minizip/mztools.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mztools.o -MD -MP -MF "$(DEPDIR)/mztools.Tpo" \
+@am__fastdepCC_TRUE@ -c -o mztools.o `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/mztools.Tpo" "$(DEPDIR)/mztools.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/mztools.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/mztools.c' object='mztools.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/mztools.Po' tmpdepfile='$(DEPDIR)/mztools.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mztools.o `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c
+
+mztools.obj: minizip/mztools.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mztools.obj -MD -MP -MF "$(DEPDIR)/mztools.Tpo" \
+@am__fastdepCC_TRUE@ -c -o mztools.obj `if test -f 'minizip/mztools.c'; then $(CYGPATH_W) 'minizip/mztools.c'; else $(CYGPATH_W) '$(srcdir)/minizip/mztools.c'`; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/mztools.Tpo" "$(DEPDIR)/mztools.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/mztools.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/mztools.c' object='mztools.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/mztools.Po' tmpdepfile='$(DEPDIR)/mztools.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mztools.obj `if test -f 'minizip/mztools.c'; then $(CYGPATH_W) 'minizip/mztools.c'; else $(CYGPATH_W) '$(srcdir)/minizip/mztools.c'`
+
+mztools.lo: minizip/mztools.c
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mztools.lo -MD -MP -MF "$(DEPDIR)/mztools.Tpo" \
+@am__fastdepCC_TRUE@ -c -o mztools.lo `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/mztools.Tpo" "$(DEPDIR)/mztools.Plo"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/mztools.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/mztools.c' object='mztools.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/mztools.Plo' tmpdepfile='$(DEPDIR)/mztools.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mztools.lo `test -f 'minizip/mztools.c' || echo '$(srcdir)/'`minizip/mztools.c
+
+unzip.o: minizip/unzip.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unzip.o -MD -MP -MF "$(DEPDIR)/unzip.Tpo" \
+@am__fastdepCC_TRUE@ -c -o unzip.o `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/unzip.Tpo" "$(DEPDIR)/unzip.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/unzip.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/unzip.c' object='unzip.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/unzip.Po' tmpdepfile='$(DEPDIR)/unzip.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unzip.o `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c
+
+unzip.obj: minizip/unzip.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unzip.obj -MD -MP -MF "$(DEPDIR)/unzip.Tpo" \
+@am__fastdepCC_TRUE@ -c -o unzip.obj `if test -f 'minizip/unzip.c'; then $(CYGPATH_W) 'minizip/unzip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/unzip.c'`; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/unzip.Tpo" "$(DEPDIR)/unzip.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/unzip.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/unzip.c' object='unzip.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/unzip.Po' tmpdepfile='$(DEPDIR)/unzip.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unzip.obj `if test -f 'minizip/unzip.c'; then $(CYGPATH_W) 'minizip/unzip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/unzip.c'`
+
+unzip.lo: minizip/unzip.c
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unzip.lo -MD -MP -MF "$(DEPDIR)/unzip.Tpo" \
+@am__fastdepCC_TRUE@ -c -o unzip.lo `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/unzip.Tpo" "$(DEPDIR)/unzip.Plo"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/unzip.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/unzip.c' object='unzip.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/unzip.Plo' tmpdepfile='$(DEPDIR)/unzip.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unzip.lo `test -f 'minizip/unzip.c' || echo '$(srcdir)/'`minizip/unzip.c
+
+zip.o: minizip/zip.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT zip.o -MD -MP -MF "$(DEPDIR)/zip.Tpo" \
+@am__fastdepCC_TRUE@ -c -o zip.o `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/zip.Tpo" "$(DEPDIR)/zip.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/zip.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/zip.c' object='zip.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/zip.Po' tmpdepfile='$(DEPDIR)/zip.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o zip.o `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c
+
+zip.obj: minizip/zip.c
+@am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT zip.obj -MD -MP -MF "$(DEPDIR)/zip.Tpo" \
+@am__fastdepCC_TRUE@ -c -o zip.obj `if test -f 'minizip/zip.c'; then $(CYGPATH_W) 'minizip/zip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/zip.c'`; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/zip.Tpo" "$(DEPDIR)/zip.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/zip.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/zip.c' object='zip.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/zip.Po' tmpdepfile='$(DEPDIR)/zip.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o zip.obj `if test -f 'minizip/zip.c'; then $(CYGPATH_W) 'minizip/zip.c'; else $(CYGPATH_W) '$(srcdir)/minizip/zip.c'`
+
+zip.lo: minizip/zip.c
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT zip.lo -MD -MP -MF "$(DEPDIR)/zip.Tpo" \
+@am__fastdepCC_TRUE@ -c -o zip.lo `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/zip.Tpo" "$(DEPDIR)/zip.Plo"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/zip.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='minizip/zip.c' object='zip.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/zip.Plo' tmpdepfile='$(DEPDIR)/zip.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o zip.lo `test -f 'minizip/zip.c' || echo '$(srcdir)/'`minizip/zip.c
+
mostlyclean-libtool:
-rm -f *.lo
@@ -512,6 +660,7 @@ top_distdir = ..
distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
distdir: $(DISTFILES)
+ $(mkinstalldirs) $(distdir)/minizip
@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
list='$(DISTFILES)'; for file in $$list; do \
case $$file in \
diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h
index 29dd122..cddbefa 100644
--- a/src/hts-indextmpl.h
+++ b/src/hts-indextmpl.h
@@ -158,6 +158,13 @@ regen:
" </TD>"LF\
" </TR>"LF
+#define HTS_INDEX_BODYCAT \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " <TH>"LF\
+ " <BR/>"LF\
+ " %s"LF\
+ " </TH>"LF
+
/* %s = INFO */
/* %s = META REFRESH IF ANY */
#define HTS_INDEX_FOOTER \
@@ -167,7 +174,7 @@ regen:
" <BR>"LF\
" <BR>"LF\
" <H6 ALIGN=\"RIGHT\">"LF\
- " <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2003]</I>"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2005]</I>"LF\
" </H6>"LF\
" %s"LF\
" <!-- Thanks for using HTTrack Website Copier! -->"LF\
@@ -186,7 +193,7 @@ regen:
""LF\
"<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
@@ -317,7 +324,7 @@ regen:
" </TABLE>"LF\
" <BR>"LF\
" <H6 ALIGN=\"RIGHT\">"LF\
- " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2003]</I>"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2005]</I>"LF\
" </H6>"LF\
" %s"LF\
" <!-- Thanks for using HTTrack Website Copier! -->"LF\
@@ -335,7 +342,7 @@ regen:
""LF\
"<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
@@ -476,7 +483,7 @@ regen:
""LF\
"<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
@@ -613,7 +620,7 @@ regen:
""LF\
"<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2005 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
diff --git a/src/htsalias.c b/src/htsalias.c
index 1b65945..d2e09e1 100644
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -35,12 +35,13 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htsbase.h"
#include "htsalias.h"
#include "htsglobal.h"
+
void linput(FILE* fp,char* s,int max);
void hts_lowcase(char* s);
@@ -108,6 +109,7 @@ const char* hts_optalias[][4] = {
{"host-control","-H","param",""},
{"extended-parsing","-%P","param",""},
{"near","-n","single",""},
+ {"disable-security-limits","-%!","single",""},
{"test","-t","single",""},
{"list","-%L","param1",""},
{"urllist","-%S","param1",""},
@@ -115,7 +117,7 @@ const char* hts_optalias[][4] = {
{"structure","-N","param",""}, {"user-structure","-N","param1",""},
{"long-names","-L","param",""},
{"keep-links","-K","param",""},
- {"mime-html","-%M","param",""}, {"mht","-%M","param",""},
+ {"mime-html","-%M","single",""}, {"mht","-%M","single",""},
{"replace-external","-x","single",""},
{"disable-passwords","-%x","single",""},{"disable-password","-%x","single",""},
{"include-query-string","-%q","single",""},
@@ -135,6 +137,8 @@ const char* hts_optalias[][4] = {
{"updatehack","-%s","single",""}, {"sizehack","-%s","single",""},
{"urlhack","-%u","single",""},
{"user-agent","-F","param1","user-agent identity"},
+ {"referer","-%R","param1","default referer URL"},
+ {"from","-%E","param1","from email address"},
{"footer","-%F","param1",""},
{"cache","-C","param","number of retries for non-fatal errors"},
{"store-all-in-cache","-k","single",""},
@@ -150,7 +154,7 @@ const char* hts_optalias[][4] = {
{"priority","-p","param",""},
{"debug-headers","-%H","single",""},
{"userdef-cmd","-V","param1",""},
- {"callback","-%W","param1",""}, {"wrapper","-%W","param1",""},
+ {"callback","-%W","param1","plug an external callback"}, {"wrapper","-%W","param1","plug an external callback"},
{"structure","-N","param1","user-defined structure"},
{"usercommand","-V","param1","user-defined command"},
{"display","-%v","single","show files transfered and other funny realtime information"},
@@ -185,7 +189,10 @@ const char* hts_optalias[][4] = {
{"fast-engine","-#X","single","Enable fast routines"},
{"debug-overflows","-#X0","single","Attempt to detect buffer overflows"},
{"debug-cache","-#C","param1","List files in the cache"},
-
+ {"extract-cache","-#C","single","Extract meta-data"},
+ {"debug-parsing","-#d","single","debug: test parser"},
+ {"repair-cache","-#R","single","repair the damaged cache ZIP file"}, {"repair","-#R","single",""},
+
/* STANDARD ALIASES */
{"spider","-p0C0I0t","single",""},
{"testsite","-p0C0I0t","single",""},
@@ -226,6 +233,7 @@ const char* hts_optalias[][4] = {
{"updatehttrack","--updatehttrack","single","update HTTrack Website Copier"},
{"clean","--clean","single","clean up log files and cache"},
{"tide","--clean","single","clean up log files and cache"},
+ {"autotest","-#T","single",""},
/* */
{"","","",""}
@@ -342,7 +350,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
return need_param;
}
- /* Check -P <path> */
+ /* Check -O <path> */
{
int pos;
if ((pos=optreal_find(argv[n_arg]))>=0) {
@@ -514,17 +522,19 @@ int optinclude_file(const char* name,
/* Get home directory, '.' if failed */
/* example: /home/smith */
char* hts_gethome(void) {
+#ifndef _WIN32_WCE
char* home = getenv( "HOME" );
if (home)
return home;
else
+#endif
return ".";
}
/* Convert ~/foo into /home/smith/foo */
void expand_home(char* str) {
if (str[0] == '~') {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
strcpybuff(tempo,hts_gethome());
strcatbuff(tempo,str+1);
strcpybuff(str,tempo);
diff --git a/src/htsalias.h b/src/htsalias.h
index e5e8f82..21c3142 100644
--- a/src/htsalias.h
+++ b/src/htsalias.h
@@ -39,6 +39,8 @@ Please visit our Website: http://www.httrack.com
#ifndef HTSALIAS_DEFH
#define HTSALIAS_DEFH
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
extern const char* hts_optalias[][4];
int optalias_check(int argc,const char * const * argv,int n_arg,
int* return_argc,char** return_argv,
@@ -54,5 +56,6 @@ const char* opttype_value(int p);
const char* opthelp_value(int p);
char* hts_gethome(void);
void expand_home(char* str);
+#endif
#endif
diff --git a/src/htsback.c b/src/htsback.c
index 6d0b119..317d4e7 100644
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -35,15 +35,15 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htsback.h"
/* specific definitions */
#include "htsbase.h"
#include "htsnet.h"
#include "htsthread.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <time.h>
/* END specific definitions */
@@ -51,14 +51,18 @@ Please visit our Website: http://www.httrack.com
#include "htsftp.h"
#if HTS_USEZLIB
#include "htszlib.h"
+#else
+#error HTS_USEZLIB not defined
#endif
//#endif
#if HTS_WIN
#ifndef __cplusplus
// DOS
+#ifndef _WIN32_WCE
#include <process.h> /* _beginthread, _endthread */
#endif
+#endif
#else
#endif
@@ -142,228 +146,243 @@ int back_nsoc_overall(lien_back* back,int back_max) {
// fermer les paramètres de transfert,
// et notamment vérifier les fichiers compressés (décompresser), callback etc.
int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
- if (
+ /* Don't store broken files */
+ if (back[p].r.totalsize > 0 && back[p].r.size != back[p].r.totalsize && ! opt->tolerant) {
+ return -1;
+ }
+
+ /* Store ? */
+ if (!back[p].finalized) {
+ back[p].finalized = 1;
+ if (
(back[p].status == 0) // ready
&&
- (!back[p].testmode) // not test mode
- &&
(back[p].r.statuscode>0) // not internal error
) {
- char* state="unknown";
-
- /* décompression */
+ if (!back[p].testmode) { // not test mode
+ char* state="unknown";
+
+ /* décompression */
#if HTS_USEZLIB
- if (gz_is_available && back[p].r.compressed) {
- if (back[p].r.size > 0) {
- //if ( (back[p].r.adr) && (back[p].r.size>0) ) {
- // stats
- back[p].compressed_size=back[p].r.size;
- // en mémoire -> passage sur disque
- if (!back[p].r.is_write) {
- back[p].tmpfile_buffer[0]='\0';
- back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
- if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
- back[p].r.out=fopen(back[p].tmpfile,"wb");
- if (back[p].r.out) {
- if ((back[p].r.adr) && (back[p].r.size>0)) {
- if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
+ if (gz_is_available && back[p].r.compressed) {
+ if (back[p].r.size > 0) {
+ //if ( (back[p].r.adr) && (back[p].r.size>0) ) {
+ // stats
+ back[p].compressed_size=back[p].r.size;
+ // en mémoire -> passage sur disque
+ if (!back[p].r.is_write) {
+ back[p].tmpfile_buffer[0]='\0';
+ back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
+ if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
+ back[p].r.out=fopen(back[p].tmpfile,"wb");
+ if (back[p].r.out) {
+ if ((back[p].r.adr) && (back[p].r.size>0)) {
+ if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
+ back[p].r.statuscode=-1;
+ strcpybuff(back[p].r.msg,"Write error when decompressing");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Write error when decompressing");
+ strcpybuff(back[p].r.msg,"Empty compressed file");
}
} else {
back[p].tmpfile[0]='\0';
back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Empty compressed file");
+ strcpybuff(back[p].r.msg,"Open error when decompressing");
}
- } else {
- back[p].tmpfile[0]='\0';
- back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Open error when decompressing");
}
}
- }
- // fermer fichier sortie
- if (back[p].r.out!=NULL) {
- fclose(back[p].r.out);
- back[p].r.out=NULL;
- }
- // décompression
- if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) {
- LLint size;
- filecreateempty(back[p].url_sav); // filenote & co
- if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
- back[p].r.size=back[p].r.totalsize=size;
- // fichier -> mémoire
- if (!back[p].r.is_write) {
- deleteaddr(&back[p].r);
- back[p].r.adr=readfile(back[p].url_sav);
- if (!back[p].r.adr) {
- back[p].r.statuscode=-1;
- strcpybuff(back[p].r.msg,"Read error when decompressing");
+ // fermer fichier sortie
+ if (back[p].r.out!=NULL) {
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+ // décompression
+ if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) {
+ LLint size;
+ filecreateempty(back[p].url_sav); // filenote & co
+ if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
+ back[p].r.size=back[p].r.totalsize=size;
+ // fichier -> mémoire
+ if (!back[p].r.is_write) {
+ deleteaddr(&back[p].r);
+ back[p].r.adr=readfile(back[p].url_sav);
+ if (!back[p].r.adr) {
+ back[p].r.statuscode=-1;
+ strcpybuff(back[p].r.msg,"Read error when decompressing");
+ }
+ remove(back[p].url_sav);
}
- remove(back[p].url_sav);
}
+ remove(back[p].tmpfile);
}
- remove(back[p].tmpfile);
+ // stats
+ HTS_STAT.total_packed+=back[p].compressed_size;
+ HTS_STAT.total_unpacked+=back[p].r.size;
+ HTS_STAT.total_packedfiles++;
+ // unflag
}
- // stats
- HTS_STAT.total_packed+=back[p].compressed_size;
- HTS_STAT.total_unpacked+=back[p].r.size;
- HTS_STAT.total_packedfiles++;
- // unflag
}
- }
- back[p].r.compressed=0;
+ back[p].r.compressed=0;
#endif
-
- /* Stats */
- if (cache->txt) {
- char flags[32];
- char s[256];
- time_t tt;
- struct tm* A;
- tt=time(NULL);
- A=localtime(&tt);
- if (A == NULL) {
- int localtime_returned_null=0;
- assert(localtime_returned_null);
- }
- strftime(s,250,"%H:%M:%S",A);
- flags[0]='\0';
- /* input flags */
- if (back[p].is_update)
- strcatbuff(flags, "U"); // update request
- else
- strcatbuff(flags, "-");
- if (back[p].range_req_size)
- strcatbuff(flags, "R"); // range request
- else
- strcatbuff(flags, "-");
- /* state flags */
- if (back[p].r.is_file) // direct to disk
- strcatbuff(flags, "F");
- else
- strcatbuff(flags, "-");
- /* output flags */
- if (!back[p].r.notmodified)
- strcatbuff(flags, "M"); // modified
- else
- strcatbuff(flags, "-");
- if (back[p].r.is_chunk) // chunked
- strcatbuff(flags, "C");
- else
- strcatbuff(flags, "-");
- if (back[p].r.compressed)
- strcatbuff(flags, "Z"); // gzip
- else
- strcatbuff(flags, "-");
- /* Err I had to split these.. */
- fprintf(cache->txt,"%s\t", s);
- fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size);
- fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize);
- fprintf(cache->txt,"\t%s\t",flags);
- }
- if (back[p].r.statuscode==200) {
- if (back[p].r.size>=0) {
- if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) {
- HTS_STAT.stat_bytes+=back[p].r.size;
- HTS_STAT.stat_files++;
+ /* Stats */
+ if (cache->txt) {
+ char flags[32];
+ char s[256];
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ if (A == NULL) {
+ int localtime_returned_null=0;
+ assert(localtime_returned_null);
}
- if ( (!back[p].r.notmodified) && (opt->is_update) ) {
- HTS_STAT.stat_updated_files++; // page modifiée
- if (opt->log!=NULL) {
- fspc(opt->log,"info");
- if (back[p].is_update) {
- fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
- } else {
- fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
- }
- test_flush;
+ strftime(s,250,"%H:%M:%S",A);
+
+ flags[0]='\0';
+ /* input flags */
+ if (back[p].is_update)
+ strcatbuff(flags, "U"); // update request
+ else
+ strcatbuff(flags, "-");
+ if (back[p].range_req_size)
+ strcatbuff(flags, "R"); // range request
+ else
+ strcatbuff(flags, "-");
+ /* state flags */
+ if (back[p].r.is_file) // direct to disk
+ strcatbuff(flags, "F");
+ else
+ strcatbuff(flags, "-");
+ /* output flags */
+ if (!back[p].r.notmodified)
+ strcatbuff(flags, "M"); // modified
+ else
+ strcatbuff(flags, "-");
+ if (back[p].r.is_chunk) // chunked
+ strcatbuff(flags, "C");
+ else
+ strcatbuff(flags, "-");
+ if (back[p].r.compressed)
+ strcatbuff(flags, "Z"); // gzip
+ else
+ strcatbuff(flags, "-");
+ /* Err I had to split these.. */
+ fprintf(cache->txt,"%s\t", s);
+ fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size);
+ fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize);
+ fprintf(cache->txt,"\t%s\t",flags);
+ }
+ if (back[p].r.statuscode==200) {
+ if (back[p].r.size>=0) {
+ if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) {
+ HTS_STAT.stat_bytes+=back[p].r.size;
+ HTS_STAT.stat_files++;
}
- if (cache->txt) {
- if (back[p].is_update) {
- state="updated";
- } else {
- state="added";
+ if ( (!back[p].r.notmodified) && (opt->is_update) ) {
+ HTS_STAT.stat_updated_files++; // page modifiée
+ if (opt->log!=NULL) {
+ fspc(opt->log,"info");
+ if (back[p].is_update) {
+ fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ } else {
+ fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ }
+ test_flush;
+ }
+ if (cache->txt) {
+ if (back[p].is_update) {
+ state="updated";
+ } else {
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ test_flush;
+ }
+ if (cache->txt) {
+ if (opt->is_update)
+ state="untouched";
+ else
+ state="added";
}
}
} else {
if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
test_flush;
}
if (cache->txt) {
- if (opt->is_update)
- state="untouched";
- else
- state="added";
+ state="empty";
}
}
} else {
if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
- test_flush;
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
}
if (cache->txt) {
- state="empty";
+ state="error";
}
}
- } else {
- if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
- }
if (cache->txt) {
- state="error";
+ fprintf(cache->txt,
+ "%d\t"
+ "%s ('%s')\t"
+ "%s\t"
+ "%s%s\t"
+ "%s%s\t%s\t"
+ "(from %s%s)"
+ LF,
+ back[p].r.statuscode,
+ state, escape_check_url_addr(back[p].r.msg),
+ escape_check_url_addr(back[p].r.contenttype),
+ ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)),
+ escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav),
+ escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil)
+ );
+ if (opt->flush)
+ fflush(cache->txt);
}
- }
- if (cache->txt) {
- fprintf(cache->txt,
- "%d\t"
- "%s ('%s')\t"
- "%s\t"
- "%s%s\t"
- "%s%s\t%s\t"
- "(from %s%s)"
- LF,
- back[p].r.statuscode,
- state, escape_check_url_addr(back[p].r.msg),
- escape_check_url_addr(back[p].r.contenttype),
- ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)),
- escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav),
- escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil)
- );
- if (opt->flush)
- fflush(cache->txt);
- }
-
- /* Cache */
- cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
-
- // status finished callback
+
+ /* Cache */
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+
+ // status finished callback
#if HTS_ANALYSTE
- hts_htmlcheck_xfrstatus(&back[p]);
+ hts_htmlcheck_xfrstatus(&back[p]);
#endif
- return 0;
+ return 0;
+ } else { // testmode
+ if (back[p].r.statuscode / 100 >= 3) { /* Store 3XX, 4XX, 5XX test response codes, but NOT 2XX */
+ /* Cache */
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL);
+ }
+ }
+ }
}
return -1;
}
/* try to keep the connection alive */
-int back_letlive(httrackp* opt, lien_back* back, int p) {
+int back_letlive(httrackp* opt, cache_back* cache, lien_back* back, int p) {
+ int checkerror;
htsblk* src = &back[p].r;
if (src && !src->is_file
&& src->soc != INVALID_SOCKET
&& src->statuscode >= 0 /* no timeout errors & co */
&& src->keep_alive_trailers == 0 /* not yet supported (chunk trailers) */
- && !check_sockerror(src->soc)
+ && ! ( checkerror = check_sockerror(src->soc) )
/*&& !check_sockdata(src->soc)*/ /* no unexpected data */
) {
htsblk tmp;
memset(&tmp, 0, sizeof(tmp));
/* clear everything but connection: switch, close, and reswitch */
back_connxfr(src, &tmp);
- back_delete(opt, back, p);
+ back_delete(opt, cache, back, p);
//deletehttp(src);
back_connxfr(&tmp, src);
src->req.flush_garbage=1; /* ignore CRLF garbage */
@@ -392,17 +411,25 @@ void back_connxfr(htsblk* src, htsblk* dst) {
}
// clear, or leave for keep-alive
-int back_maydelete(httrackp* opt,lien_back* back, int p) {
+int back_maydelete(httrackp* opt,cache_back* cache,lien_back* back, int p) {
if (p>=0) { // on sait jamais..
- if (!opt->nokeepalive
+ if (
+ /* Keep-alive authorized by user */
+ !opt->nokeepalive
+ /* Socket currently is keep-alive! */
&& back[p].r.keep_alive
+ /* Remaining authorized requests */
&& back[p].r.keep_alive_max > 1
+ /* Known keep-alive start (security) */
&& back[p].ka_time_start
+ /* We're on time */
&& time_local() < back[p].ka_time_start + back[p].r.keep_alive_t
+ /* Connection delay must not exceed keep-alive timeout */
+ && ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) )
) {
lien_back tmp;
strcpybuff(tmp.url_adr, back[p].url_adr);
- if (back_letlive(opt, back, p)) {
+ if (back_letlive(opt, cache, back, p)) {
strcpybuff(back[p].url_adr, tmp.url_adr);
back[p].status = -103; // alive & waiting
if ((opt->debug>1) && (opt->log!=NULL)) {
@@ -413,21 +440,37 @@ int back_maydelete(httrackp* opt,lien_back* back, int p) {
return 1;
}
}
- back_delete(opt,back, p);
+ back_delete(opt,cache,back, p);
}
return 0;
}
// clear, or leave for keep-alive
-void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p) {
+void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int back_max, int p) {
+ TStamp lt = 0;
if (back[p].r.soc!=INVALID_SOCKET) {
int q;
- if (!opt->nokeepalive
+ if (
+ back[p].r.soc != INVALID_SOCKET /* security check */
+ && back[p].r.statuscode >= 0 /* no timeout errors & co */
+ && back[p].r.keep_alive_trailers == 0 /* not yet supported (chunk trailers) */
+ /* Socket not in I/O error status */
+ && !back[p].r.is_file
+ && !check_sockerror(back[p].r.soc)
+ /* Keep-alive authorized by user */
+ && !opt->nokeepalive
+ /* Socket currently is keep-alive! */
&& back[p].r.keep_alive
+ /* Remaining authorized requests */
&& back[p].r.keep_alive_max > 1
+ /* Known keep-alive start (security) */
&& back[p].ka_time_start
- && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t
- && ( q = back_search(opt, back, back_max) ) >= 0
+ /* We're on time */
+ && ( lt = time_local() ) < back[p].ka_time_start + back[p].r.keep_alive_t
+ /* Connection delay must not exceed keep-alive timeout */
+ && ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) )
+ /* Available slot in backing */
+ && ( q = back_search(opt, cache, back, back_max) ) >= 0
)
{
lien_back tmp;
@@ -452,13 +495,13 @@ void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p) {
/* attempt to attach a live connection to this slot */
-int back_trylive(httrackp* opt,lien_back* back, int back_max, int p) {
+int back_trylive(httrackp* opt,cache_back* cache,lien_back* back, int back_max, int p) {
if (p>=0 && back[p].status != -103) { // we never know..
int i = back_searchlive(opt,back, back_max, back[p].url_adr); // search slot
if (i >= 0 && i != p) {
deletehttp(&back[p].r); // security check
back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p
- back_delete(opt,back, i); // delete old slot
+ back_delete(opt,cache,back, i); // delete old slot
back[p].status=100; // ready to connect
return 1; // success: will reuse live connection
}
@@ -483,7 +526,7 @@ int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_a
return -1;
}
-int back_search(httrackp* opt,lien_back* back, int back_max) {
+int back_search(httrackp* opt,cache_back* cache,lien_back* back, int back_max) {
int i;
/* try to find an empty place */
@@ -497,7 +540,7 @@ int back_search(httrackp* opt,lien_back* back, int back_max) {
for(i = 0 ; i < back_max ; i++ ) {
if (back[i].status == -103) {
/* close this place */
- back_delete(opt,back, i);
+ back_delete(opt,cache,back, i);
return i;
}
}
@@ -507,18 +550,33 @@ int back_search(httrackp* opt,lien_back* back, int back_max) {
}
// effacer entrée
-int back_delete(httrackp* opt, lien_back* back, int p) {
+int back_delete(httrackp* opt, cache_back* cache, lien_back* back, int p) {
if (p>=0) { // on sait jamais..
// Vérificateur d'intégrité
#if DEBUG_CHECKINT
_CHECKINT(&back[p],"Appel back_delete")
#endif
#if HTS_DEBUG_CLOSESOCK
- char info[256];
- sprintf(info,"back_delete: #%d\n",p);
- DEBUG_W2(info);
+ DEBUG_W("back_delete: #%d\n" _ (int) p);
#endif
-
+
+ // Finalize
+ if (!back[p].finalized) {
+ if (
+ (back[p].status == 0) // ready
+ &&
+ (!back[p].testmode) // not test mode
+ &&
+ (back[p].r.statuscode>0) // not internal error
+ ) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush;
+ }
+ }
+ back_finalize(opt, cache, back, p);
+ }
+ back[p].finalized = 0;
+
// Libérer tous les sockets, handles, buffers..
if (back[p].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
@@ -546,6 +604,12 @@ int back_delete(httrackp* opt, lien_back* back, int p) {
}
// }
+ // headers
+ if (back[p].r.headers != NULL) {
+ freet(back[p].r.headers);
+ back[p].r.headers = NULL;
+ }
+
/* fichier de sortie */
if (back[p].r.out!=NULL) { // fermer fichier sortie
fclose(back[p].r.out);
@@ -607,7 +671,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
// rechercher emplacement
back_clean(opt, cache, back, back_max);
- if ( ( p = back_search(opt, back, back_max) ) >= 0) {
+ if ( ( p = back_search(opt, cache, back, back_max) ) >= 0) {
back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur
// clear r
@@ -664,6 +728,25 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
return 0;
}
+ // test "fast header" cache ; that is, tests we did that lead to 3XX/4XX/5XX response codes
+ if (cache->cached_tests != NULL) {
+ long int ptr = 0;
+ if (inthash_read((inthash)cache->cached_tests, concat(adr, fil), (long int*)&ptr)) { // gotcha
+ if (ptr != 0) {
+ char* text = (char*) ptr;
+ char* lf = strchr(text, '\n');
+ int code = 0;
+ if (sscanf(text, "%d", &code) == 1) { // got code
+ back[p].r.statuscode=code;
+ if (lf != NULL && *lf != '\0') { // got location ?
+ strcpybuff(back[p].r.location, lf + 1);
+ }
+ return 0;
+ }
+ }
+ }
+ }
+
// tester cache
if ((strcmp(adr,"file://")) /* pas fichier */
&& ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */
@@ -681,7 +764,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
#else
if (cache->use) {
#endif
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
#if HTS_FAST_CACHE
strcpybuff(buff,adr); strcatbuff(buff,fil);
hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
@@ -710,7 +793,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
/* It is possible that the file has been moved due to changes in build structure */
{
- char previous_save[HTS_URLMAXSIZE*2];
+ char BIGSTK previous_save[HTS_URLMAXSIZE*2];
previous_save[0] = '\0';
back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0);
if (previous_save[0] != '\0' && fexist(fconv(previous_save))) {
@@ -861,9 +944,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
back[p].r.req.nocompression=1; /* Do not compress when updating! */
}
- /* else if (strnotempty(cache->lastmodified))
- sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified);
- */
}
#if DEBUGCA
printf("..is modified test %s\n",back[p].send_too);
@@ -881,10 +961,11 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
// On demande juste les données restantes si le date est valide (206), tout sinon (200)
if ((ishtml(save) != 1) && (ishtml(back[p].url_fil)!=1)) { // NON HTML (liens changés!!)
if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!)
- if (strnotempty(cache->lastmodified)) { /* pas de If-.. possible */
- /*if ( (!opt->http10) && (strnotempty(cache->lastmodified)) ) { */ /* ne pas forcer 1.0 */
+ char lastmodified[256];
+ get_filetime_rfc822(save, lastmodified);
+ if (strnotempty(lastmodified)) { /* pas de If-.. possible */
#if DEBUGCA
- printf("..if unmodified since %s size "LLintP"\n",cache->lastmodified,(LLint)sz);
+ printf("..if unmodified since %s size "LLintP"\n", lastmodified, (LLint)sz);
#endif
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
@@ -899,10 +980,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
back[p].http11=1; // En tête 1.1
} else
*/
- if (strlen(cache->lastmodified)) {
+ if (strlen(lastmodified)) {
sprintf(back[p].send_too,
"If-Unmodified-Since: %s\r\nRange: bytes="LLintP"-\r\n"
- ,cache->lastmodified,(LLint)sz);
+ , lastmodified, (LLint)sz);
back[p].http11=1; // En tête 1.1
back[p].range_req_size=sz;
back[p].r.req.range_used=1;
@@ -959,6 +1040,8 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy));
// et user-agent
strcpybuff(back[p].r.req.user_agent,opt->user_agent);
+ strcpybuff(back[p].r.req.referer,opt->referer);
+ strcpybuff(back[p].r.req.from,opt->from);
strcpybuff(back[p].r.req.lang_iso,opt->lang_iso);
back[p].r.req.user_agent_send=opt->user_agent_send;
// et http11
@@ -997,7 +1080,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
#endif
- if (!back_trylive(opt,back, back_max, p)) {
+ if (!back_trylive(opt, cache, back, back_max, p)) {
#if HTS_XGETHOST
#if HDEBUG
printf("back_solve..\n");
@@ -1117,7 +1200,7 @@ printf("Xfopen ok, poll..\n");
#if HTS_XGETHOST
#if USE_BEGINTHREAD
// lancement multithread du robot
-PTHREAD_TYPE Hostlookup(void* iadr_p) {
+PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) {
char iadr[256];
t_dnscache* cache=_hts_cache(); // adresse du cache
t_hostent* hp;
@@ -1209,7 +1292,7 @@ void back_solve(lien_back* back) {
char* p = calloct(strlen(a)+2,1);
if (p) {
strcpybuff(p,a);
- _beginthread( Hostlookup , 0, p );
+ (void)hts_newthread( Hostlookup , 0, p );
}
}
#else
@@ -1221,7 +1304,7 @@ void back_solve(lien_back* back) {
char* p = calloct(strlen(a)+2,1);
if (p) {
strcpybuff(p,a);
- _beginthread( Hostlookup , 0, p );
+ (void)hts_newthread( Hostlookup , 0, p );
}
#else
// Sous Unix, le gethostbyname() est bloquant..
@@ -1264,8 +1347,8 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
if (back[i].r.statuscode==200) { // HTTP "OK"
if (back[i].r.size>0) { // size>0
if (back[i].r.is_write // not in memory (on disk, ready)
- && !is_hypertext_mime(back[i].r.contenttype) // not HTML/hypertext
- && !may_be_hypertext_mime(back[i].r.contenttype) // may NOT be parseable mime type
+ && !is_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // not HTML/hypertext
+ && !may_be_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // may NOT be parseable mime type
) {
if (back[i].pass2_ptr) {
// finalize
@@ -1279,20 +1362,20 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
//xxxcache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav);
usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil);
*back[i].pass2_ptr=-1; // Done!
- back_maydelete(opt,back,i); // May delete backing entry
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush;
}
+ back_maydelete(opt,cache,back,i); // May delete backing entry
}
} else {
if (!back[i].finalized) {
if (1) {
/* Ensure deleted or recycled socket */
/* BUT DO NOT YET WIPE back[i].r.adr */
- back_maydeletehttp(opt, back, back_max, i);
if ( (opt->debug>1) && (opt->log!=NULL) ) {
fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
+ back_maydeletehttp(opt, cache, back, back_max, i);
} else {
/*
NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO)
@@ -1302,7 +1385,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
htsblk r;
/* Ensure deleted or recycled socket */
- back_maydeletehttp(opt, back, back_max, i);
+ back_maydeletehttp(opt, cache, back, back_max, i);
assertf(back[i].r.soc == INVALID_SOCKET);
/* Check header */
@@ -1312,7 +1395,6 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
/* Delete buffer and sockets */
deleteaddr(&back[i].r);
deletehttp(&back[i].r);
- back[i].finalized = 1;
if ( (opt->debug>1) && (opt->log!=NULL) ) {
fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
@@ -1344,7 +1426,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
back[i].url_adr);
test_flush;
}
- back_delete(opt,back, i); // delete backing entry
+ back_delete(opt,cache,back, i); // delete backing entry
}
}
}
@@ -1352,7 +1434,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
for(i=0;i<back_max;i++) {
if (back[i].status == 0) { // ready
if (back[i].r.soc != INVALID_SOCKET) {
- back_maydeletehttp(opt,back, back_max, i);
+ back_maydeletehttp(opt,cache,back, back_max, i);
}
}
@@ -1369,7 +1451,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
}
for(i = 0 ; i < back_max && curr > max ; i++) {
if (back[i].status == -103) {
- back_delete(opt,back, i); // delete backing entry
+ back_delete(opt,cache,back, i); // delete backing entry
curr--;
}
}
@@ -1379,7 +1461,7 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
// attente (gestion des buffers des sockets)
void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart) {
- int i;
+ unsigned int i_mod;
T_SOC nfds=INVALID_SOCKET;
fd_set fds,fds_c,fds_e; // fds pour lecture, connect (write), et erreur
int nsockets; // nbre sockets
@@ -1393,7 +1475,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#if HTS_ANALYSTE
int max_loop_chk=0;
#endif
-
+ unsigned int mod_random = (unsigned int) ( time_local() + HTS_STAT.HTS_TOTAL_RECV );
// max. number of loops
max_loop=8;
@@ -1422,7 +1504,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
nfds=INVALID_SOCKET;
max_c=1;
- for(i=0;i<back_max;i++) {
+ for(i_mod = 0 ; i_mod < (unsigned int) back_max ; i_mod++) {
+ // for(i=0;i<back_max;i++) {
+ unsigned int i = ( i_mod + mod_random ) % ( back_max );
// en cas de gestion du connect préemptif
#if HTS_XCONN
@@ -1541,8 +1625,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
busy_recv=0;
// recevoir les données arrivées
- for(i=0;i<back_max;i++) {
-
+ for(i_mod = 0 ; i_mod < (unsigned int) back_max ; i_mod++) {
+ // for(i=0;i<back_max;i++) {
+ unsigned int i = ( i_mod + mod_random ) % ( back_max );
if (back[i].status>0) {
if (!back[i].r.is_file) { // not file..
if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know..
@@ -1561,7 +1646,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
else
strcpybuff(back[i].r.msg,"Receive Error");
if (back[i].status == -103) { /* Keep-alive socket */
- back_delete(opt,back, i);
+ back_delete(opt,cache,back, i);
} else {
back[i].status=0; // terminé
}
@@ -1619,9 +1704,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==100)) {
/* limit nb. connections/seconds to avoid server overload */
- if (opt->maxconn>0) {
+ /*if (opt->maxconn>0) {
Sleep(1000/opt->maxconn);
- }
+ }*/
back[i].ka_time_start=time_local();
if (back[i].timeout>0) { // refresh timeout si besoin est
@@ -1754,6 +1839,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
}
#endif
+ else if (back[i].status==1001) { // ftp ready
+ back[i].status=0;
+ // finalize transfer
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+ }
else if ((back[i].status>0) && (back[i].status<1000)) { // en réception http
int dispo=0;
@@ -1764,11 +1856,12 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// données dispo?
//## if (back[i].url_adr[0]!=lOCAL_CHAR)
- if (!back[i].r.is_file) {
- dispo=FD_ISSET(back[i].r.soc,&fds);
- }
- else
+ if (back[i].r.is_file)
+ dispo=1;
+ else if (back[i].r.ssl)
dispo=1;
+ else
+ dispo=FD_ISSET(back[i].r.soc,&fds);
// Check transfer rate!
if (!max_read_bytes)
@@ -1795,7 +1888,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (strnotempty(back[i].url_sav)) {
if (strcmp(back[i].url_fil,"/robots.txt")) {
if (back[i].r.statuscode==200) { // 'OK'
- if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)
+ ) { // pas HTML
if (opt->getmode&2) { // on peut ecrire des non html
int fcheck=0;
back[i].r.is_write=1; // écrire
@@ -1900,7 +1994,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
retour_fread=(int) http_xfread1(&(back[i].r),(int) max_read_bytes);
// retour_fread=http_fread1(&(back[i].r));
} else
- retour_fread=-1; // interruption ou annulation interne (peut ne pas être une erreur)
+ retour_fread=READ_EOF; // interruption ou annulation interne (peut ne pas être une erreur)
// Si réception chunk, tester si on est pas à la fin!
if (back[i].status==1) {
@@ -1920,27 +2014,25 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
} else if (back[i].r.keep_alive) {
if (back[i].r.size==back[i].r.totalsize) { // fin!
- retour_fread=-1; // end
+ retour_fread=READ_EOF; // end
}
}
}
if (retour_fread < 0) { // fin réception
back[i].status=0; // terminé
- if (back[i].r.soc!=INVALID_SOCKET) {
-#if HTS_DEBUG_CLOSESOCK
- DEBUG_W("back_wait(4): deletehttp\n");
-#endif
- /*KA deletehttp(&back[i].r);*/
- back_maydeletehttp(opt, back, back_max, i);
- }
- /*KA back[i].r.soc=INVALID_SOCKET; */
+ /*KA back[i].r.soc=INVALID_SOCKET; */
#if CHUNKDEBUG==1
if (back[i].is_chunk)
printf("[%d] must be the last chunk for %s (connection closed) - %d/%d\n",(int)back[i].r.soc,back[i].url_fil,back[i].r.size,back[i].r.totalsize);
#endif
- //if ((back[i].r.statuscode==-1) && (strnotempty(back[i].r.msg)==0)) {
- if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) {
+ if (retour_fread < 0 && retour_fread != READ_EOF) {
+ if (back[i].r.size > 0)
+ strcatbuff(back[i].r.msg, "Interrupted transfer");
+ else
+ strcatbuff(back[i].r.msg, "No data (connection closed)");
+ back[i].r.statuscode=-4;
+ } else if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) {
#if HDEBUG
printf("error interruped: %s\n",back[i].r.adr);
#endif
@@ -1951,6 +2043,15 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.statuscode=-4;
}
+ // Close socket
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(4): deletehttp\n");
+#endif
+ /*KA deletehttp(&back[i].r);*/
+ back_maydeletehttp(opt, cache, back, back_max, i);
+ }
+
// finalize transfer
if (back[i].r.statuscode>0) {
back_finalize(opt,cache,back,i);
@@ -2101,16 +2202,16 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
/* Tester totalsize en fin de chunk */
if ((back[i].r.totalsize>0)) { // tester totalsize
if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
-#if HTS_CL_IS_FATAL
- deleteaddr(&back[i].r);
- back[i].r.statuscode=-1;
- strcpybuff(back[i].r.msg,"Incorrect length");
-#else
- // Un warning suffira..
- if (cache->errlog!=NULL) {
- fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ if (!opt->tolerant) {
+ deleteaddr(&back[i].r);
+ back[i].r.statuscode=-1;
+ strcpybuff(back[i].r.msg,"Incorrect length");
+ } else {
+ // Un warning suffira..
+ if (cache->errlog!=NULL) {
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
}
-#endif
}
}
@@ -2153,22 +2254,6 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
- // Callback
-#if HTS_ANALYSTE
- if (hts_htmlcheck_receivehead != NULL) {
- int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r);
- if (test_head!=1) {
- if ((opt->debug>0) && (opt->log!=NULL)) {
- fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
- }
- back[i].status=0; // FINI
- deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
- strcpybuff(back[i].r.msg,"External wrapper aborted transfer");
- back[i].r.statuscode = -1;
- }
- }
-#endif
-
/* Hack for zero-length headers */
if (back[i].status != 0 && back[i].r.adr[0] != '<') {
@@ -2223,15 +2308,36 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
} while(strnotempty(rcvd));
// ----------------------------------------
- // libérer mémoire -- après! --
- deleteaddr(&back[i].r);
} else {
// assume text/html, OK
treatfirstline(&back[i].r, back[i].r.adr);
noFreebuff=1;
}
-
+ // Callback
+#if HTS_ANALYSTE
+ if (hts_htmlcheck_receivehead != NULL) {
+ int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r);
+ if (test_head!=1) {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ back[i].status=0; // FINI
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ strcpybuff(back[i].r.msg,"External wrapper aborted transfer");
+ back[i].r.statuscode = -1;
+ }
+ }
+#endif
+
+ // Free headers memory now
+ // Actually, save them for informational purpose
+ if (!noFreebuff) {
+ char* block = back[i].r.adr;
+ back[i].r.adr = NULL;
+ deleteaddr(&back[i].r);
+ back[i].r.headers = block;
+ }
/*
Status code and header-response hacks
@@ -2325,7 +2431,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (opt->sizehack) {
if (!back[i].is_update) { // mise à jour
if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK'
- if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML
+ if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // not HTML
if (strnotempty(back[i].url_sav)) { // target found
int size = fsize(back[i].url_sav); // target size
if (size >= 0) {
@@ -2360,7 +2466,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (strnotempty(back[i].url_sav)) {
if (strcmp(back[i].url_fil,"/robots.txt")) {
if (back[i].r.statuscode==200) { // 'OK'
- if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML
if (back[i].r.statuscode==200) { // "OK"
if (back[i].range_req_size>0) { // but Range: requested
if (back[i].range_req_size == back[i].r.totalsize) { // And same size
@@ -2495,7 +2601,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
// Couper connexion
/*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/
- back_maydeletehttp(opt, back, back_max, i);
+ back_maydeletehttp(opt, cache, back, back_max, i);
back[i].status=0; // terminé
// finalize
@@ -2520,7 +2626,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
printf("partial content: "LLintP" on disk..\n",(LLint)sz);
#endif
if (sz>=0) {
- if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_sav)) { // pas HTML
if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir**
filenote(back[i].url_sav,NULL); // noter fichier comme connu
back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append
@@ -2591,9 +2697,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].status!=0) { // non terminé (erreur)
if (!back[i].testmode) { // fichier normal
- if (back[i].r.empty && back[i].r.statuscode==200) { // empty response
+ if (back[i].r.empty /* ?? && back[i].r.statuscode==200 */) { // empty response
// Couper connexion
- deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back_maydeletehttp(opt, cache, back, back_max, i);
+ /* KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; */
back[i].status=0; // terminé
if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) {
back[i].r.adr[0] = 0;
@@ -2687,7 +2794,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (gestion_timeout) {
TStamp act;
act=time_local(); // temps en secondes
- for(i=0;i<back_max;i++) {
+ for(i_mod = 0 ; i_mod < (unsigned int) back_max ; i_mod++) {
+ // for(i=0;i<back_max;i++) {
+ unsigned int i = ( i_mod + mod_random ) % ( back_max );
if (back[i].status>0) { // réception/connexion/..
if (back[i].timeout>0) {
//printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout);
@@ -2795,7 +2904,7 @@ LLint back_transfered(LLint nb,lien_back* back,int back_max) {
// j: 1 afficher sockets 2 afficher autres 3 tout afficher
void back_info(lien_back* back,int i,int j,FILE* fp) {
if (back[i].status>=0) {
- char s[HTS_URLMAXSIZE*2+1024];
+ char BIGSTK s[HTS_URLMAXSIZE*2+1024];
s[0]='\0';
back_infostr(back,i,j,s);
strcatbuff(s,LF);
@@ -2881,7 +2990,7 @@ void back_infostr(lien_back* back,int i,int j,char* s) {
if (aff) {
{
- char s2[HTS_URLMAXSIZE*2+1024];
+ char BIGSTK s2[HTS_URLMAXSIZE*2+1024];
sprintf(s2,"\"%s",back[i].url_adr); strcatbuff(s,s2);
if (back[i].url_fil[0]!='/') strcatbuff(s,"/");
diff --git a/src/htsback.h b/src/htsback.h
index 74fd540..9587d7e 100644
--- a/src/htsback.h
+++ b/src/htsback.h
@@ -42,6 +42,9 @@ Please visit our Website: http://www.httrack.com
#include "htsbasenet.h"
#include "htscore.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
// backing
#define BACK_ADD_TEST "(dummy)"
#define BACK_ADD_TEST2 "(dummy2)"
@@ -53,16 +56,16 @@ int back_nsoc(lien_back* back,int back_max);
int back_nsoc_overall(lien_back* back,int back_max);
int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr);
int back_stack_available(lien_back* back,int back_max);
-int back_search(httrackp* opt, lien_back* back, int back_max);
+int back_search(httrackp* opt, cache_back* cache, lien_back* back, int back_max);
void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max);
void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart);
-int back_letlive(httrackp* opt, lien_back* back, int p);
+int back_letlive(httrackp* opt, cache_back* cache, lien_back* back, int p);
int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr);
void back_connxfr(htsblk* src, htsblk* dst);
-int back_delete(httrackp* opt,lien_back* back,int p);
-int back_maydelete(httrackp* opt, lien_back* back, int p);
-void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p);
-int back_trylive(httrackp* opt,lien_back* back, int back_max, int p);
+int back_delete(httrackp* opt,cache_back* cache,lien_back* back,int p);
+int back_maydelete(httrackp* opt, cache_back* cache, lien_back* back, int p);
+void back_maydeletehttp(httrackp* opt, cache_back* cache, lien_back* back, int back_max, int p);
+int back_trylive(httrackp* opt,cache_back* cache,lien_back* back, int back_max, int p);
int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p);
void back_info(lien_back* back,int i,int j,FILE* fp);
void back_infostr(lien_back* back,int i,int j,char* s);
@@ -77,8 +80,10 @@ int back_checkmirror(httrackp* opt);
#if HTS_XGETHOST
#if USE_BEGINTHREAD
-PTHREAD_TYPE Hostlookup(void* iadr_p);
+PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p);
+#endif
#endif
+
#endif
#endif
diff --git a/src/htsbase.h b/src/htsbase.h
index 139e3ed..9911d73 100644
--- a/src/htsbase.h
+++ b/src/htsbase.h
@@ -44,9 +44,8 @@ extern "C" {
#include "htsglobal.h"
-// size_t et mode_t
-#include <stdio.h>
-#include <stdlib.h>
+#include <string.h>
+#include <time.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -76,30 +75,24 @@ extern "C" {
#define min(a,b) ((a)>(b)?(b):(a))
#define max(a,b) ((a)>(b)?(a):(b))
+#ifndef _WIN32
+#undef Sleep
+#define min(a,b) ((a)>(b)?(b):(a))
+#define max(a,b) ((a)>(b)?(a):(b))
+#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
+#endif
+
// teste égalité de 2 chars, case insensitive
#define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a))
#define streql(a,b) (hichar(a)==hichar(b))
-// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
-#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
-#define is_hypertext_mime(a) \
- ( (strfield2((a),"text/html")!=0)\
- || (strfield2((a),"application/x-javascript")!=0) \
- || (strfield2((a),"text/css")!=0) \
- /*|| (strfield2((a),"text/vnd.wap.wml")!=0)*/ \
- || (strfield2((a),"image/svg+xml")!=0) \
- || (strfield2((a),"image/svg-xml")!=0) \
- /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\
- )
+// caractère maj
+#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
-#define may_be_hypertext_mime(a) \
- (\
- (strfield2((a),"audio/x-pn-realaudio")!=0) \
- )
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
-// caractère maj
-#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
// functions
#ifdef _WIN32
@@ -112,10 +105,15 @@ extern "C" {
typedef void (*t_abortLog)(char* msg, char* file, int line);
extern HTSEXT_API t_abortLog abortLog__;
#define abortLog(a) abortLog__(a, __FILE__, __LINE__)
+#define _ ,
+#ifndef _WIN32_WCE
#define abortLogFmt(a) do { \
FILE* fp = fopen("CRASH.TXT", "wb"); \
if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \
if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("\\Temp\\CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("\\CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("CRASH.TXT", "wb"); \
if (fp) { \
fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \
fprintf(fp, "Reason:\r\n"); \
@@ -125,22 +123,12 @@ extern HTSEXT_API t_abortLog abortLog__;
fclose(fp); \
} \
} while(0)
-
-
-#define _ ,
+#else
#define abortLogFmt(a) do { \
- FILE* fp = fopen("CRASH.TXT", "wb"); \
- if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \
- if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \
- if (fp) { \
- fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \
- fprintf(fp, "Reason:\r\n"); \
- fprintf(fp, a); \
- fprintf(fp, "\r\n"); \
- fflush(fp); \
- fclose(fp); \
- } \
+ XCEShowMessageA("HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\nReason:\r\n%s\r\n", __LINE__, a); \
} while(0)
+#endif
+
#define assertf(exp) do { \
if (! ( exp ) ) { \
abortLog("assert failed: " #exp); \
@@ -167,17 +155,20 @@ extern HTSEXT_API t_abortLog abortLog__;
#define malloct(A) malloc(A)
#define calloct(A,B) calloc((A), (B))
#define freet(A) do { assertnf((A) != NULL); if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
+#define strdupt(A) strdup(A)
#define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) )
#define memcpybuff(A, B, N) memcpy((A), (B), (N))
#else
/* debug version */
#define malloct(A) hts_malloc(A)
#define calloct(A,B) hts_calloc(A,B)
+#define strdupt(A) hts_strdup(A)
#define freet(A) do { hts_free(A); (A) = NULL; } while(0)
#define realloct(A,B) hts_realloc(A,B)
void hts_freeall();
void* hts_malloc (size_t);
void* hts_calloc(size_t,size_t);
+char* hts_strdup(char*);
void* hts_xmalloc(size_t,size_t);
void hts_free (void*);
void* hts_realloc (void*,size_t);
@@ -379,9 +370,10 @@ extern HTSEXT_API int htsMemoryFastXfr;
#endif
+#endif
#ifdef __cplusplus
- };
+}
#endif
#endif
diff --git a/src/htsbasenet.h b/src/htsbasenet.h
index 71ac9c9..f2a6c53 100644
--- a/src/htsbasenet.h
+++ b/src/htsbasenet.h
@@ -41,15 +41,23 @@ Please visit our Website: http://www.httrack.com
#if HTS_WIN
#if HTS_INET6==0
- #include <winsock.h>
+ #include <winsock2.h>
#else
+
+#ifndef _WIN32_WCE
#undef HTS_USESCOPEID
#define WIN32_LEAN_AND_MEAN
#include <winsock2.h>
#include <ws2tcpip.h>
#include <tpipv6.h>
+#else
+ #include <winsock2.h>
+ #include <socket.h>
+#endif
+
#endif
- typedef SOCKET T_SOC;
+
+typedef SOCKET T_SOC;
typedef struct hostent FAR t_hostent;
#else
@@ -67,9 +75,6 @@ Please visit our Website: http://www.httrack.com
*/
#ifndef HTS_OPENSSL_H_INCLUDED
#define HTS_OPENSSL_H_INCLUDED
-#ifdef __cplusplus
-extern "C" {
-#endif
/*
#include <openssl/ssl.h>
@@ -77,6 +82,9 @@ extern "C" {
#include <openssl/err.h>
*/
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
/* OpenSSL definitions */
#define SSL_shutdown hts_ptrfunc_SSL_shutdown
#define SSL_free hts_ptrfunc_SSL_free
@@ -96,6 +104,9 @@ extern "C" {
#define ERR_error_string hts_ptrfunc_ERR_error_string
#define SSL_load_error_strings hts_ptrfunc_SSL_load_error_strings
#define SSL_CTX_ctrl hts_ptrfunc_SSL_CTX_ctrl
+
+#endif
+
/* */
typedef void SSL_CTX;
typedef void* SSL;
@@ -118,6 +129,10 @@ typedef SSL_CTX * (*t_SSL_CTX_new)(SSL_METHOD *method);
typedef char * (*t_ERR_error_string)(unsigned long e, char *buf);
typedef void (*t_SSL_load_error_strings)(void);
typedef long (*t_SSL_CTX_ctrl)(SSL_CTX *ctx, int cmd, long larg, char *parg);
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
extern int SSL_is_available;
extern t_SSL_shutdown SSL_shutdown;
extern t_SSL_free SSL_free;
@@ -137,6 +152,9 @@ extern t_SSL_CTX_new SSL_CTX_new;
extern t_ERR_error_string ERR_error_string;
extern t_SSL_load_error_strings SSL_load_error_strings;
extern t_SSL_CTX_ctrl SSL_CTX_ctrl;
+
+#endif
+
/*
From /usr/include/openssl/ssl.h
*/
@@ -154,9 +172,6 @@ From /usr/include/openssl/ssl.h
SSL_CTX_ctrl(ctx,SSL_CTRL_OPTIONS,op,NULL)
//#include <openssl/bio.h>
-#ifdef __cplusplus
- };
-#endif
/* OpenSSL structure */
extern SSL_CTX *openssl_ctx;
diff --git a/src/htsbauth.c b/src/htsbauth.c
index 23a22af..cdc7f1c 100644
--- a/src/htsbauth.c
+++ b/src/htsbauth.c
@@ -35,15 +35,14 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
#include "htsbauth.h"
/* specific definitions */
#include "htsglobal.h"
#include "htslib.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include "htsnostatic.h"
@@ -171,17 +170,17 @@ char* cookie_nextfield(char* a) {
// lire également (Windows seulement) les *@*.txt (cookies IE copiés)
// !=0 : erreur
int cookie_load(t_cookie* cookie,char* fpath,char* name) {
- cookie->data[0]='\0';
+ // cookie->data[0]='\0';
// Fusionner d'abord les éventuels cookies IE
#if HTS_WIN
{
- WIN32_FIND_DATA find;
+ WIN32_FIND_DATAA find;
HANDLE h;
char pth[MAX_PATH + 32];
strcpybuff(pth,fpath);
strcatbuff(pth,"*@*.txt");
- h = FindFirstFile(pth,&find);
+ h = FindFirstFileA((char*)pth,&find);
if (h != INVALID_HANDLE_VALUE) {
do {
if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ))
@@ -191,19 +190,33 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
char cook_name[256];
char cook_value[1000];
char domainpathpath[512];
+ char dummy[512];
//
char domain[256]; // domaine cookie (.netscape.com)
char path[256]; // chemin (/)
int cookie_merged=0;
- linput(fp,cook_name,250);
- if (!feof(fp)) {
- linput(fp,cook_value,250);
- if ( (!feof(fp)) && (strnotempty(cook_value)) ) {
- linput(fp,domainpathpath,500);
- if (strnotempty(domainpathpath)) {
- if (ident_url_absolute(domainpathpath,domain,path)>=0) {
- cookie_add(cookie,cook_name,cook_value,domain,path);
- cookie_merged=1;
+ //
+ // Read all cookies
+ while( ! feof(fp) ) {
+ cook_name[0] = cook_value[0] = domainpathpath[0]
+ = dummy[0] = domain[0] = path[0] = '\0';
+ linput(fp,cook_name,250);
+ if ( ! feof(fp) ) {
+ linput(fp,cook_value,250);
+ if ( ! feof(fp) ) {
+ int i;
+ linput(fp,domainpathpath,500);
+ /* Read 6 other useless values */
+ for(i = 0 ; ! feof(fp) && i < 6 ; i++) {
+ linput(fp,dummy,500);
+ }
+ if (strnotempty(cook_name)
+ && strnotempty(cook_value)
+ && strnotempty(domainpathpath)) {
+ if (ident_url_absolute(domainpathpath,domain,path)>=0) {
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ cookie_merged=1;
+ }
}
}
}
@@ -213,7 +226,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
remove(fconcat(fpath,find.cFileName));
} // if fp
}
- } while(FindNextFile(h,&find));
+ } while(FindNextFileA(h,&find));
FindClose(h);
}
}
@@ -223,7 +236,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
{
FILE* fp = fopen(fconcat(fpath,name),"rb");
if (fp) {
- char line[8192];
+ char BIGSTK line[8192];
while( (!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) {
rawlinput(fp,line,8100);
if (strnotempty(line)) {
@@ -232,7 +245,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
char domain[256]; // domaine cookie (.netscape.com)
char path[256]; // chemin (/)
char cook_name[256]; // nom cookie (MYCOOK)
- char cook_value[8192]; // valeur (ID=toto,S=1234)
+ char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234)
strcpybuff(domain,cookie_get(line,0)); // host
strcpybuff(path,cookie_get(line,2)); // path
strcpybuff(cook_name,cookie_get(line,5)); // name
@@ -256,7 +269,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
// !=0 : erreur
int cookie_save(t_cookie* cookie,char* name) {
if (strnotempty(cookie->data)) {
- char line[8192];
+ char BIGSTK line[8192];
FILE* fp = fopen(fconv(name),"wb");
if (fp) {
char* a=cookie->data;
diff --git a/src/htsbauth.h b/src/htsbauth.h
index d361d83..4066ece 100644
--- a/src/htsbauth.h
+++ b/src/htsbauth.h
@@ -48,12 +48,16 @@ typedef struct bauth_chain {
// buffer pour les cookies et authentification
-typedef struct {
+typedef struct t_cookie {
int max_len;
char data[32768];
bauth_chain auth;
} t_cookie;
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
// cookies
int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path);
int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path);
@@ -70,5 +74,6 @@ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth);
char* bauth_check(t_cookie* cookie,char* adr,char* fil);
char* bauth_prefix(char* adr,char* fil);
+#endif
#endif
diff --git a/src/htscache.c b/src/htscache.c
index b90fa67..aa9a6c8 100644
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -35,15 +35,19 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htscache.h"
/* specific definitions */
#include "htsbase.h"
#include "htsbasenet.h"
#include "htsmd5.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <time.h>
+
+#include "htszlib.h"
+
#include "htsnostatic.h"
/* END specific definitions */
@@ -116,10 +120,15 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char*
// ---stockage en cache---
// stocker dans le cache?
if (opt->cache) {
- if (cache->dat!=NULL) {
+ if (cache_writable(cache)) {
// c'est le seul endroit ou l'on ajoute des elements dans le cache (fichier entier ou header)
// on stocke tout fichier "ok", mais également les réponses 404,301,302...
- if ((r->statuscode==200) /* stocker réponse standard, plus */
+ if (
+#if 1
+ r->statuscode > 0
+#else
+ /* We don't store 5XX errors, because it might be a server problem */
+ (r->statuscode==200) /* stocker réponse standard, plus */
|| (r->statuscode==204) /* no content */
|| (r->statuscode==301) /* moved perm */
|| (r->statuscode==302) /* moved temp */
@@ -129,13 +138,33 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char*
|| (r->statuscode==403) /* unauthorized */
|| (r->statuscode==404) /* not found */
|| (r->statuscode==410) /* gone */
+#endif
)
{ /* ne pas stocker si la page générée est une erreur */
if (!r->is_file) {
// stocker fichiers (et robots.txt)
- if ( (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
+ if ( url_save == NULL || (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
// ajouter le fichier au cache
- cache_add(*r,url_adr,url_fil,url_save,cache->ndx,cache->dat,opt->all_in_cache);
+ cache_add(cache,*r,url_adr,url_fil,url_save,opt->all_in_cache);
+ //
+ // store a reference NOT to redo the same test zillions of times!
+ // (problem reported by Lars Clausen)
+ // we just store statuscode + location (if any)
+ if (url_save == NULL && r->statuscode / 100 >= 3) {
+ // cached "fast" header doesn't uet exists
+ if (inthash_read((inthash)cache->cached_tests, concat(url_adr, url_fil), NULL) == 0) {
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ sprintf(tempo, "%d", (int)r->statuscode);
+ if (r->location != NULL && r->location[0] != '\0') {
+ strcatbuff(tempo, "\n");
+ strcatbuff(tempo, r->location);
+ }
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode);
+ }
+ inthash_add((inthash)cache->cached_tests, concat(url_adr, url_fil), (long int)strdupt(tempo));
+ }
+ }
}
}
}
@@ -145,13 +174,222 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char*
}
+
+#if 01
+
+/* test only - to be removed */
+
+#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \
+ if ( (value != NULL) && (value)[0] != '\0') { \
+ sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+ } \
+} while(0)
+#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \
+ if ( (value != 0) ) { \
+ sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+ } \
+} while(0)
+#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \
+ sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \
+ (headersSize) += (int) strlen(headers + headersSize); \
+} while(0)
+
+struct cache_back_zip_entry {
+ unsigned long int hdrPos;
+ unsigned long int size;
+ int compressionMethod;
+};
+
+#define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ strcpybuff(refvalue, value); \
+ line[0] = '\0'; \
+ } \
+} while(0)
+#define ZIP_READFIELD_INT(line, value, refline, refvalue) do { \
+ if (line[0] != '\0' && strfield2(line, refline)) { \
+ int intval = 0; \
+ sscanf(value, "%d", &intval); \
+ (refvalue) = intval; \
+ line[0] = '\0'; \
+ } \
+} while(0)
+
+
+/* Ajout d'un fichier en cache */
+void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) {
+ char BIGSTK filemame[HTS_URLMAXSIZE*4];
+ int dataincache=0; // put data in cache ?
+ char BIGSTK headers[8192];
+ int headersSize = 0;
+ int entryBodySize = 0;
+ int entryFilenameSize = 0;
+ zip_fileinfo fi;
+
+ // robots.txt hack
+ if (url_save == NULL) {
+ dataincache=0; // testing links
+ }
+ else {
+ if ( (strnotempty(url_save)==0) ) {
+ if (strcmp(url_fil,"/robots.txt")==0) // robots.txt
+ dataincache=1;
+ else
+ return; // error (except robots.txt)
+ }
+
+ /* Data in cache ? */
+ if (is_hypertext_mime(r.contenttype, url_fil))
+ dataincache=1;
+ else if (all_in_cache)
+ dataincache=1;
+ }
+
+ if (r.size < 0) // error
+ return;
+
+ // data in cache
+ if (dataincache) {
+ assertf(((int) r.size) == r.size);
+ entryBodySize = (int) r.size;
+ }
+
+ /* Fields */
+ headers[0] = '\0';
+ headersSize = 0;
+ /* */
+ {
+ char* message;
+ if (strlen(r.msg) < 32) {
+ message = r.msg;
+ } else {
+ message = "(See X-StatusMessage)";
+ }
+ /* 64 characters MAX for first line */
+ sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r.statuscode, r.msg);
+ }
+ headersSize += (int) strlen(headers + headersSize);
+ /* Second line MUST ALWAYS be X-In-Cache */
+ ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", dataincache);
+ ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r.statuscode);
+ ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r.msg);
+ ZIP_FIELD_INT(headers, headersSize, "X-Size", r.size); // size
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r.contenttype); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r.charset); // contenttype
+ ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r.lastmodified); // last-modified
+ ZIP_FIELD_STRING(headers, headersSize, "Etag", r.etag); // Etag
+ ZIP_FIELD_STRING(headers, headersSize, "Location", r.location); // 'location' pour moved
+ ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r.cdispo); // Content-disposition
+ ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address
+ ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename
+ ZIP_FIELD_STRING(headers, headersSize, "X-Save", url_save); // Original save filename
+
+ entryFilenameSize = (int) ( strlen(url_adr) + strlen(url_fil));
+
+ /* Filename */
+ if (!link_has_authority(url_adr)) {
+ strcpybuff(filemame, "http://");
+ } else {
+ strcpybuff(filemame, "");
+ }
+ strcatbuff(filemame, url_adr);
+ strcatbuff(filemame, url_fil);
+
+ /* Time */
+ memset(&fi, 0, sizeof(fi));
+ if (r.lastmodified[0] != '\0') {
+ struct tm* tm_s=convert_time_rfc822(r.lastmodified);
+ if (tm_s) {
+ fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec;
+ fi.tmz_date.tm_min = (uInt) tm_s->tm_min;
+ fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour;
+ fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday;
+ fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon;
+ fi.tmz_date.tm_year = (uInt) tm_s->tm_year;
+ }
+ }
+
+ /* Open file - NOTE: headers in "comment" */
+ if (zipOpenNewFileInZip((zipFile) cache->zipOutput,
+ filemame,
+ &fi,
+ /*
+ Store headers in realtime in the local file directory as extra field
+ In case of crash, we'll be able to recover the whole ZIP file by rescanning it
+ */
+ headers,
+ (uInt) strlen(headers),
+ NULL,
+ 0,
+ NULL, /* comment */
+ Z_DEFLATED,
+ Z_DEFAULT_COMPRESSION) != Z_OK)
+ {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+
+ /* Write data in cache */
+ if (dataincache) {
+ if (r.is_write == 0) {
+ if (r.size > 0 && r.adr != NULL) {
+ if (zipWriteInFileInZip((zipFile) cache->zipOutput, r.adr, (int) r.size) != Z_OK) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+ }
+ } else {
+ FILE* fp;
+ // On recopie le fichier..
+ LLint file_size=fsize(fconv(url_save));
+ if (file_size>=0) {
+ fp=fopen(fconv(url_save),"rb");
+ if (fp!=NULL) {
+ char BIGSTK buff[32768];
+ INTsys nl;
+ do {
+ nl=fread(buff,1,32768,fp);
+ if (nl>0) {
+ if (zipWriteInFileInZip((zipFile) cache->zipOutput, buff, (int) nl) != Z_OK) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+ }
+ } while(nl>0);
+ fclose(fp);
+ } else {
+ /* Err FIXME - lost file */
+ }
+ } /* Empty files are OK */
+ }
+ }
+
+ /* Close */
+ if (zipCloseFileInZip((zipFile) cache->zipOutput) != Z_OK) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+
+ /* Flush */
+ if (zipFlush((zipFile) cache->zipOutput) != 0) {
+ int zip_disk_write_failed = 0;
+ assertf(zip_disk_write_failed);
+ }
+}
+
+#else
+
/* Ajout d'un fichier en cache */
-void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache) {
+void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) {
int pos;
char s[256];
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
int ok=1;
int dataincache=0; // donnée en cache?
+ FILE* cache_ndx = cache->ndx;
+ FILE* cache_dat = cache->dat;
/*char digest[32+2];*/
/*digest[0]='\0';*/
@@ -159,6 +397,8 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
if ( (strnotempty(url_save)==0) ) {
if (strcmp(url_fil,"/robots.txt")==0) // robots.txt
dataincache=1;
+ else if (strcmp(url_fil,"/test")==0) // testing links
+ dataincache=0;
else
return; // erreur (sauf robots.txt)
}
@@ -167,7 +407,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
return; // refusé..
// Mettre les *donées* en cache ?
- if (is_hypertext_mime(r.contenttype)) // html, mise en cache des données et
+ if (is_hypertext_mime(r.contenttype, url_fil)) // html, mise en cache des données et
dataincache=1; // pas uniquement de l'en tête
else if (all_in_cache)
dataincache=1; // forcer tout en cache
@@ -209,6 +449,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
&& cache_wstr(cache_dat,url_adr) != -1 // Original address
&& cache_wstr(cache_dat,url_fil) != -1 // Original URI filename
&& cache_wstr(cache_dat,url_save) != -1 // Original save filename
+ && cache_wstr(cache_dat,r.headers) != -1 // Full HTTP Headers
&& cache_wstr(cache_dat,"HTS") != -1 // end of header
) {
ok=1; /* ok */
@@ -238,7 +479,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
if (cache_wLLint(cache_dat,file_size)!=-1) {
fp=fopen(fconv(url_save),"rb");
if (fp!=NULL) {
- char buff[32768];
+ char BIGSTK buff[32768];
INTsys nl;
do {
nl=fread(buff,1,32768,fp);
@@ -275,6 +516,8 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
fflush(cache_dat); fflush(cache_ndx);
}
+#endif
+
htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) {
return cache_readex(opt,cache,adr,fil,save,location,NULL,0);
@@ -284,19 +527,274 @@ htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* s
return cache_readex(opt,cache,adr,fil,save,location,NULL,1);
}
+static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly);
+
+static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly);
+
// lecture d'un fichier dans le cache
// si save==null alors test unqiquement
htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
char* return_save, int readonly) {
+ if (cache->zipInput != NULL) {
+ return cache_readex_new(opt, cache, adr, fil, save, location, return_save, readonly);
+ } else {
+ return cache_readex_old(opt, cache, adr, fil, save, location, return_save, readonly);
+ }
+}
+
+// lecture d'un fichier dans le cache
+// si save==null alors test unqiquement
+static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly) {
+ char BIGSTK location_default[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
+ char BIGSTK previous_save[HTS_URLMAXSIZE*2];
+ long int hash_pos;
+ int hash_pos_return;
+ htsblk r;
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET;
+
+ if (location) {
+ r.location = location;
+ } else {
+ r.location = location_default;
+ }
+ strcpybuff(r.location, "");
+ strcpybuff(buff, adr);
+ strcatbuff(buff,fil);
+ hash_pos_return = inthash_read((inthash)cache->hashtable, buff, (long int*)&hash_pos);
+ /* avoid errors on data entries */
+ if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') {
+#if HTS_FAST_CACHE
+ hash_pos_return = 0;
+#else
+ a = NULL;
+#endif
+ }
+
+ if (hash_pos_return) {
+ uLong posInZip;
+ if (hash_pos > 0) {
+ posInZip = (uLong) hash_pos;
+ } else {
+ posInZip = (uLong) -hash_pos;
+ }
+ if (unzSetOffset((unzFile) cache->zipInput, posInZip) == Z_OK) {
+ /* Read header (Max 8KiB) */
+ if (unzOpenCurrentFile((unzFile) cache->zipInput) == Z_OK) {
+ char BIGSTK headerBuff[8192 + 2];
+ int readSizeHeader;
+ int totalHeader = 0;
+ int dataincache = 0;
+
+ /* For BIG comments */
+ headerBuff[0]
+ = headerBuff[sizeof(headerBuff) - 1]
+ = headerBuff[sizeof(headerBuff) - 2]
+ = headerBuff[sizeof(headerBuff) - 3] = '\0';
+
+ if ( (readSizeHeader = unzGetLocalExtrafield((unzFile) cache->zipInput, headerBuff, sizeof(headerBuff) - 2)) > 0)
+ /*if (unzGetCurrentFileInfo((unzFile) cache->zipInput, NULL,
+ NULL, 0, NULL, 0, headerBuff, sizeof(headerBuff) - 2) == Z_OK ) */
+ {
+ int offset = 0;
+ char BIGSTK line[HTS_URLMAXSIZE + 2];
+ int lineEof = 0;
+ /*readSizeHeader = (int) strlen(headerBuff);*/
+ headerBuff[readSizeHeader] = '\0';
+ do {
+ char* value;
+ line[0] = '\0';
+ offset += binput(headerBuff + offset, line, sizeof(line) - 2);
+ if (line[0] == '\0') {
+ lineEof = 1;
+ }
+ value = strchr(line, ':');
+ if (value != NULL) {
+ *value++ = '\0';
+ if (*value == ' ' || *value == '\t') value++;
+ ZIP_READFIELD_INT(line, value, "X-In-Cache", dataincache);
+ ZIP_READFIELD_INT(line, value, "X-Statuscode", r.statuscode);
+ ZIP_READFIELD_STRING(line, value, "X-StatusMessage", r.msg); // msg
+ ZIP_READFIELD_INT(line, value, "X-Size", r.size); // size
+ ZIP_READFIELD_STRING(line, value, "Content-Type", r.contenttype); // contenttype
+ ZIP_READFIELD_STRING(line, value, "X-Charset", r.charset); // contenttype
+ ZIP_READFIELD_STRING(line, value, "Last-Modified", r.lastmodified); // last-modified
+ ZIP_READFIELD_STRING(line, value, "Etag", r.etag); // Etag
+ ZIP_READFIELD_STRING(line, value, "Location", r.location); // 'location' pour moved
+ ZIP_READFIELD_STRING(line, value, "Content-Disposition", r.cdispo); // Content-disposition
+ ZIP_READFIELD_STRING(line, value, "X-Addr", previous_save); // Original address
+ ZIP_READFIELD_STRING(line, value, "X-Fil", previous_save); // Original URI filename
+ ZIP_READFIELD_STRING(line, value, "X-Save", previous_save); // Original save filename
+ }
+ } while(offset < readSizeHeader && !lineEof);
+ totalHeader = offset;
+
+ /* Complete fields */
+ r.totalsize=r.size;
+ r.adr=NULL;
+ r.out=NULL;
+ r.fp=NULL;
+
+ if (save != NULL) { /* ne pas lire uniquement header */
+ int ok = 0;
+
+#if HTS_DIRECTDISK
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ if (ok) {
+ if (r.msg[0] == '\0') {
+ strcpybuff(r.msg,"Cache Read Error : Unexpected error");
+ }
+ }
+ else if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement
+
+ r.is_write=1; // écrire
+ if (fexist(fconv(save))) { // un fichier existe déja
+ //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ ok=1; // plus rien à faire
+ filenote(save,NULL); // noter comme connu
+ }
+
+ if (!dataincache && !ok) { // Pas de donnée en cache et fichier introuvable : erreur!
+ if (opt->norecatch) {
+ filecreateempty(save);
+ //
+ r.statuscode=-1;
+ strcpybuff(r.msg,"File deleted by user not recaught");
+ ok=1; // ne pas récupérer (et pas d'erreur)
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Previous cache file not found");
+ ok=1; // ne pas récupérer
+ }
+ }
+
+ if (!ok) {
+ r.out=filecreate(save);
+#if HDEBUG
+ printf("direct-disk: %s\n",save);
+#endif
+ if (r.out!=NULL) {
+ char BIGSTK buff[32768+4];
+ LLint size = r.size;
+ if (size > 0) {
+ INTsys nl;
+ do {
+ nl = unzReadCurrentFile((unzFile) cache->zipInput, buff, (int)minimum(size, 32768));
+ if (nl>0) {
+ size-=nl;
+ if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read To Disk");
+ }
+ }
+ } while((nl>0) && (size>0) && (r.statuscode!=-1));
+ }
+
+ fclose(r.out);
+ r.out=NULL;
+#if HTS_WIN==0
+ chmod(save,HTS_ACCESS_FILE);
+#endif
+ //xxusercommand(opt,0,NULL,fconv(save), adr, fil);
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Write Error : Unable to Create File");
+ //printf("%s\n",save);
+ }
+ }
+
+ } else
+#endif
+ { // lire en mémoire
+
+ if (!dataincache) {
+ if (strnotempty(save)) { // Pas de donnée en cache, bizarre car html!!!
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Previous cache file not found (2)");
+ } else { /* Read in memory from cache */
+ if (strnotempty(return_save) && fexist(return_save)) {
+ FILE* fp = fopen(fconv(return_save), "rb");
+ if (fp != NULL) {
+ r.adr=(char*) malloct((INTsys)r.size + 4);
+ if (adr != NULL) {
+ if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Read error in cache disk data");
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache file not found on disk");
+ }
+ }
+ } else {
+ // lire fichier (d'un coup)
+ r.adr=(char*) malloct((INTsys)r.size+4);
+ if (r.adr!=NULL) {
+ if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (INTsys)r.size) != r.size) { // erreur
+ freet(r.adr);
+ r.adr=NULL;
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read Data");
+ } else
+ *(r.adr+r.size)='\0';
+ //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
+ } else { // erreur
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Memory Error");
+ }
+ }
+ }
+ } // si save==null, ne rien charger (juste en tête)
+
+
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read Header Data");
+ }
+ unzCloseCurrentFile((unzFile) cache->zipInput);
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Open File");
+ }
+
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Bad Offset");
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"File Cache Entry Not Found");
+ }
+ if (!location) { /* don't export internal buffer */
+ r.location = NULL;
+ }
+ return r;
+}
+
+
+// lecture d'un fichier dans le cache
+// si save==null alors test unqiquement
+static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly) {
#if HTS_FAST_CACHE
long int hash_pos;
int hash_pos_return;
#else
char* a;
#endif
- char buff[HTS_URLMAXSIZE*2];
- char location_default[HTS_URLMAXSIZE*2];
- char previous_save[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
+ char BIGSTK location_default[HTS_URLMAXSIZE*2];
+ char BIGSTK previous_save[HTS_URLMAXSIZE*2];
htsblk r;
int ok=0;
int header_only=0;
@@ -388,6 +886,9 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
strcpybuff(return_save, previous_save);
}
}
+ if (cache->version >= 5) {
+ r.headers = cache_rstr_addr(cache->olddat);
+ }
//
cache_rstr(cache->olddat,check);
if (strcmp(check,"HTS")==0) { /* intégrité OK */
@@ -425,7 +926,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
#if HTS_DIRECTDISK
// Court-circuit:
// Peut-on stocker le fichier directement sur disque?
- if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype) && strnotempty(save)) { // pas HTML, écrire sur disk directement
+ if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement
int ok=0;
r.is_write=1; // écrire
@@ -457,7 +958,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
printf("direct-disk: %s\n",save);
#endif
if (r.out!=NULL) {
- char buff[32768+4];
+ char BIGSTK buff[32768+4];
LLint size = r.size;
if (size > 0) {
INTsys nl;
@@ -572,7 +1073,7 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa
/* 0 if failed */
int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) {
if (cache_dat) {
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
char s[256];
int pos;
fflush(cache_dat); fflush(cache_ndx);
@@ -599,7 +1100,7 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char*
int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) {
#if HTS_FAST_CACHE
if (cache->hashtable) {
- char buff[HTS_URLMAXSIZE*4];
+ char BIGSTK buff[HTS_URLMAXSIZE*4];
long int pos;
strcpybuff(buff,str1); strcatbuff(buff,str2);
if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) {
@@ -651,7 +1152,29 @@ void cache_init(cache_back* cache,httrackp* opt) {
#else
mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER);
#endif
- if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer
+ /* Previous cache from the previous cache version */
+#if 0
+ /* No.. reuse with old httrack releases! */
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/old.ndx"));
+#endif
+ /* Previous cache version */
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat"));
+ rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx"));
+ }
+
+ /* Remove OLD cache */
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.zip")))
+ remove(fconcat(opt->path_log,"hts-cache/old.zip"));
+
+ /* Rename */
+ rename(fconcat(opt->path_log,"hts-cache/new.zip"),fconcat(opt->path_log,"hts-cache/old.zip"));
+ }
+ else if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
#if DEBUGCA
printf("work with former cache\n");
#endif
@@ -677,6 +1200,136 @@ void cache_init(cache_back* cache,httrackp* opt) {
if (
(
!cache->ro &&
+ fsize(fconcat(opt->path_log,"hts-cache/old.zip")) > 0
+ )
+ ||
+ (
+ cache->ro &&
+ fsize(fconcat(opt->path_log,"hts-cache/new.zip")) > 0
+ )
+ )
+ {
+ if (!cache->ro) {
+ cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/old.zip"));
+ } else {
+ cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/new.zip"));
+ }
+
+ // Corrupted ZIP file ? Try to repair!
+ if (cache->zipInput == NULL && !cache->ro) {
+ char* name;
+ uLong repaired = 0;
+ uLong repairedBytes = 0;
+ if (!cache->ro) {
+ name = fconcat(opt->path_log,"hts-cache/old.zip");
+ } else {
+ name = fconcat(opt->path_log,"hts-cache/new.zip");
+ }
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF);
+ fflush(opt->log);
+ }
+ if (unzRepair(name,
+ fconcat(opt->path_log,"hts-cache/repair.zip"),
+ fconcat(opt->path_log,"hts-cache/repair.tmp"),
+ &repaired, &repairedBytes
+ ) == Z_OK) {
+ unlink(name);
+ rename(fconcat(opt->path_log,"hts-cache/repair.zip"), name);
+ cache->zipInput = unzOpen(name);
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF,
+ (int) repairedBytes, (int) repaired);
+ fflush(opt->log);
+ }
+ } else {
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: could not repair the cache"LF);
+ fflush(opt->log);
+ }
+ }
+ }
+
+ // Opened ?
+ if (cache->zipInput!=NULL) {
+
+ /* Ready directory entries */
+ if (unzGoToFirstFile((unzFile) cache->zipInput) == Z_OK) {
+ char comment[128];
+ char BIGSTK filename[HTS_URLMAXSIZE * 4];
+ int entries = 0;
+ memset(comment, 0, sizeof(comment)); // for truncated reads
+ do {
+ int readSizeHeader = 0;
+ filename[0] = '\0';
+ comment[0] = '\0';
+ if (unzOpenCurrentFile((unzFile) cache->zipInput) == Z_OK) {
+ if (
+ (readSizeHeader = unzGetLocalExtrafield((unzFile) cache->zipInput, comment, sizeof(comment) - 2)) > 0
+ &&
+ unzGetCurrentFileInfo((unzFile) cache->zipInput, NULL, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK
+ )
+ {
+ long int pos = (long int) unzGetOffset((unzFile) cache->zipInput);
+ assertf(readSizeHeader < sizeof(comment));
+ comment[readSizeHeader] = '\0';
+ entries++;
+ if (pos > 0) {
+ int dataincache = 0; // data in cache ?
+ char* filenameIndex = filename;
+ if (strfield(filenameIndex, "http://")) {
+ filenameIndex += 7;
+ }
+ if (comment[0] != '\0') {
+ int maxLine = 2;
+ char* a = comment;
+ while(*a && maxLine-- > 0) { // parse only few first lines
+ char BIGSTK line[1024];
+ line[0] = '\0';
+ a+=binput(a, line, sizeof(line) - 2);
+ if (strfield(line, "X-In-Cache:")) {
+ if (strfield2(line, "X-In-Cache: 1")) {
+ dataincache = 1;
+ } else {
+ dataincache = 0;
+ }
+ break;
+ }
+ }
+ }
+ if (dataincache)
+ inthash_add((inthash)cache->hashtable, filenameIndex, pos);
+ else
+ inthash_add((inthash)cache->hashtable, filenameIndex, -pos);
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries);
+ }
+ }
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries);
+ }
+ }
+ unzCloseCurrentFile((unzFile) cache->zipInput);
+ } else {
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries);
+ }
+ }
+ } while( unzGoToNextFile((unzFile) cache->zipInput) == Z_OK );
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries);
+ }
+ opt->is_update=1; // signaler comme update
+
+ }
+
+ }
+
+ } else if (
+ (
+ !cache->ro &&
fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0
)
||
@@ -724,7 +1377,7 @@ void cache_init(cache_back* cache,httrackp* opt) {
if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
cache->version=(int)(firstline[8]-'0'); // cache 1.x
- if (cache->version <= 4) {
+ if (cache->version <= 5) {
a+=cache_brstr(a,firstline);
strcpybuff(cache->lastmodified,firstline);
} else {
@@ -762,7 +1415,7 @@ void cache_init(cache_back* cache,httrackp* opt) {
/* Create hash table for the cache (MUCH FASTER!) */
#if HTS_FAST_CACHE
if (cache->use) {
- char line[HTS_URLMAXSIZE*2];
+ char BIGSTK line[HTS_URLMAXSIZE*2];
char linepos[256];
int pos;
while ( (a!=NULL) && (a < (cache->use+buffl) ) ) {
@@ -793,60 +1446,96 @@ void cache_init(cache_back* cache,httrackp* opt) {
if (!cache->ro) {
// ouvrir caches actuels
structcheck(fconcat(opt->path_log, "hts-cache/"));
- cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
- cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
- // les deux doivent être ouvrables
- if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
- fclose(cache->ndx);
- cache->ndx=NULL;
- }
- if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
- fclose(cache->dat);
- cache->dat=NULL;
- }
- if (cache->ndx!=NULL) {
- char s[256];
-
- cache_wstr(cache->dat,"CACHE-1.4");
- fflush(cache->dat);
- cache_wstr(cache->ndx,"CACHE-1.4");
- fflush(cache->ndx);
- //
- time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
- cache_wstr(cache->ndx,s);
- fflush(cache->ndx); // un petit fflush au cas où
-
- // supprimer old.lst
- if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
- remove(fconcat(opt->path_log,"hts-cache/old.lst"));
- // renommer
- if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
- rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
- // ouvrir
- cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
- {
- filecreate_params tmp;
- strcpybuff(tmp.path,opt->path_html); // chemin
- tmp.lst=cache->lst; // fichier lst
- filenote("",&tmp); // initialiser filecreate
+ if (1) {
+ /* Create ZIP file cache */
+ cache->zipOutput = (void*) zipOpen(fconcat(opt->path_log,"hts-cache/new.zip"), 0);
+
+ if (cache->zipOutput != NULL) {
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpybuff(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
}
-
- // supprimer old.txt
- if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
- remove(fconcat(opt->path_log,"hts-cache/old.txt"));
- // renommer
- if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
- rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
- // ouvrir
- cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
- if (cache->txt) {
- fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
- fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ } else {
+ cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
+ cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
+ // les deux doivent être ouvrables
+ if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
+ fclose(cache->ndx);
+ cache->ndx=NULL;
+ }
+ if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
+ fclose(cache->dat);
+ cache->dat=NULL;
}
- // test
- // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ if (cache->ndx!=NULL) {
+ char s[256];
+
+ cache_wstr(cache->dat,"CACHE-1.5");
+ fflush(cache->dat);
+ cache_wstr(cache->ndx,"CACHE-1.5");
+ fflush(cache->ndx);
+ //
+ time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
+ cache_wstr(cache->ndx,s);
+ fflush(cache->ndx); // un petit fflush au cas où
+
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpybuff(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
+
+ // test
+ // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ }
}
} else {
@@ -906,12 +1595,11 @@ char* readfile_or(char* fil,char* defaultdata) {
int cache_wstr(FILE* fp,char* s) {
INTsys i;
char buff[256+4];
- i=strlen(s);
+ i = s != NULL ? strlen(s) : 0;
sprintf(buff,INTsysP "\n",i);
if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff))
return -1;
- if (i>0)
- if ((INTsys)fwrite(s,1,i,fp) != i)
+ if (i > 0 && (INTsys)fwrite(s,1,i,fp) != i)
return -1;
return 0;
}
@@ -922,10 +1610,34 @@ void cache_rstr(FILE* fp,char* s) {
sscanf(buff,INTsysP,&i);
if (i < 0 || i > 32768) /* error, something nasty happened */
i=0;
- if (i>0)
- fread(s,1,i,fp);
+ if (i>0) {
+ if ((int) fread(s,1,i,fp) != i) {
+ int fread_cache_failed = 0;
+ assertf(fread_cache_failed);
+ }
+ }
*(s+i)='\0';
}
+char* cache_rstr_addr(FILE* fp) {
+ INTsys i;
+ char* addr = NULL;
+ char buff[256+4];
+ linput(fp,buff,256);
+ sscanf(buff,INTsysP,&i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i=0;
+ if (i > 0) {
+ addr = malloct(i + 1);
+ if (addr != NULL) {
+ if ((int) fread(addr,1,i,fp) != i) {
+ int fread_cache_failed = 0;
+ assertf(fread_cache_failed);
+ }
+ *(addr+i)='\0';
+ }
+ }
+ return addr;
+}
int cache_brstr(char* adr,char* s) {
int i;
int off;
diff --git a/src/htscache.h b/src/htscache.h
index ef897f1..51dd439 100644
--- a/src/htscache.h
+++ b/src/htscache.h
@@ -42,9 +42,12 @@ Please visit our Website: http://www.httrack.com
#include "htscore.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
// cache
void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save);
-void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache);
+void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache);
htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location);
htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location);
htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,char* return_save,int readonly);
@@ -56,6 +59,7 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* le
int cache_wstr(FILE* fp,char* s);
void cache_rstr(FILE* fp,char* s);
+char* cache_rstr_addr(FILE* fp);
int cache_brstr(char* adr,char* s);
int cache_quickbrstr(char* adr,char* s);
int cache_brint(char* adr,int* i);
@@ -63,4 +67,7 @@ void cache_rint(FILE* fp,int* i);
int cache_wint(FILE* fp,int i);
void cache_rLLint(FILE* fp,LLint* i);
int cache_wLLint(FILE* fp,LLint i);
+
+#endif
+
#endif
diff --git a/src/htscatchurl.c b/src/htscatchurl.c
index 8455ea0..3832019 100644
--- a/src/htscatchurl.c
+++ b/src/htscatchurl.c
@@ -34,6 +34,9 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
// Fichier intercepteur d'URL .c
/* specific definitions */
@@ -41,11 +44,9 @@ Please visit our Website: http://www.httrack.com
#include "htsbase.h"
#include "htsnet.h"
#include "htslib.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#ifndef _WIN32_WCE
#include <fcntl.h>
+#endif
#if HTS_WIN
#else
#include <arpa/inet.h>
@@ -194,8 +195,8 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) {
socinput(soc,line,1000);
if (strnotempty(line)) {
if (sscanf(line,"%s %s %s",method,url,protocol) == 3) {
- char url_adr[HTS_URLMAXSIZE*2];
- char url_fil[HTS_URLMAXSIZE*2];
+ char BIGSTK url_adr[HTS_URLMAXSIZE*2];
+ char BIGSTK url_fil[HTS_URLMAXSIZE*2];
// méthode en majuscule
int i,r=0;
url_adr[0]=url_fil[0]='\0';
@@ -207,7 +208,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) {
// adresse du lien
if (ident_url_absolute(url,url_adr,url_fil)>=0) {
// Traitement des en-têtes
- char loc[HTS_URLMAXSIZE*2];
+ char BIGSTK loc[HTS_URLMAXSIZE*2];
htsblk blkretour;
memset(&blkretour, 0, sizeof(htsblk)); // effacer
blkretour.location=loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
diff --git a/src/htscatchurl.h b/src/htscatchurl.h
index a2514ef..cec7537 100644
--- a/src/htscatchurl.h
+++ b/src/htscatchurl.h
@@ -41,6 +41,9 @@ Please visit our Website: http://www.httrack.com
#include "htsbasenet.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
// Fonctions
void socinput(T_SOC soc,char* s,int max);
#ifndef HTTRACK_DEFLIB
@@ -74,5 +77,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data);
#endif
+#endif
+
diff --git a/src/htscore.c b/src/htscore.c
index ba1e226..ff761ef 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -34,11 +34,12 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
+#ifndef _WIN32_WCE
#include <fcntl.h>
+#endif
#include <ctype.h>
/* File defs */
@@ -60,6 +61,10 @@ Please visit our Website: http://www.httrack.com
// parser
#include "htsparse.h"
+/* Cache */
+#include "htszlib.h"
+
+
/* END specific definitions */
@@ -71,6 +76,8 @@ t_hts_htmlcheck_uninit hts_htmlcheck_uninit = NULL;
t_hts_htmlcheck_start hts_htmlcheck_start = NULL;
t_hts_htmlcheck_end hts_htmlcheck_end = NULL;
t_hts_htmlcheck_chopt hts_htmlcheck_chopt = NULL;
+t_hts_htmlcheck_process hts_htmlcheck_preprocess = NULL;
+t_hts_htmlcheck_process hts_htmlcheck_postprocess = NULL;
t_hts_htmlcheck hts_htmlcheck = NULL;
t_hts_htmlcheck_query hts_htmlcheck_query = NULL;
t_hts_htmlcheck_query2 hts_htmlcheck_query2 = NULL;
@@ -80,11 +87,13 @@ t_hts_htmlcheck_check hts_htmlcheck_check = NULL;
t_hts_htmlcheck_pause hts_htmlcheck_pause = NULL;
t_hts_htmlcheck_filesave hts_htmlcheck_filesave = NULL;
t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected = NULL;
+t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2 = NULL;
t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus = NULL;
t_hts_htmlcheck_savename hts_htmlcheck_savename = NULL;
t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead = NULL;
t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead = NULL;
+extern void set_wrappers(void);
char _hts_errmsg[1100]="";
int _hts_in_html_parsing=0;
@@ -201,7 +210,7 @@ hts_htmlcheck_end(); \
if (back) { \
int i; \
for(i=0;i<back_max;i++) { \
- back_delete(&opt,back,i); \
+ back_delete(&opt,&cache,back,i); \
} \
freet(back); back=NULL; \
} \
@@ -209,6 +218,14 @@ hts_htmlcheck_end(); \
if (cache.use) { freet(cache.use); cache.use=NULL; } \
if (cache.dat) { fclose(cache.dat); cache.dat=NULL; } \
if (cache.ndx) { fclose(cache.ndx); cache.ndx=NULL; } \
+ if (cache.zipOutput) { \
+ zipClose(cache.zipOutput, "Created by HTTrack Website Copier/"HTTRACK_VERSION); \
+ cache.zipOutput = NULL; \
+ } \
+ if (cache.zipInput) { \
+ unzClose(cache.zipInput); \
+ cache.zipInput = NULL; \
+ } \
if (cache.olddat) { fclose(cache.olddat); cache.olddat=NULL; } \
if (cache.lst) { fclose(cache.lst); cache.lst=NULL; } \
if (cache.txt) { fclose(cache.txt); cache.txt=NULL; } \
@@ -219,9 +236,11 @@ hts_htmlcheck_end(); \
if (opt.accept_cookie) cookie_save(opt.cookie,fconcat(opt.path_log,"cookies.txt")); \
if (makeindex_fp) { fclose(makeindex_fp); makeindex_fp=NULL; } \
if (cache_hashtable) { inthash_delete(&cache_hashtable); } \
+ if (cache_tests) { inthash_delete(&cache_tests); } \
if (template_header) { freet(template_header); template_header=NULL; } \
if (template_body) { freet(template_body); template_body=NULL; } \
if (template_footer) { freet(template_footer); template_footer=NULL; } \
+ clearCallbacks(&opt.state.callbacks); \
/*structcheck_init(-1);*/ \
} while(0)
#define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0)
@@ -289,7 +308,7 @@ hash_write(hashptr,lien_tot,NORM); \
#define HT_INDEX_END do { \
if (!makeindex_done) { \
if (makeindex_fp) { \
- char tempo[1024]; \
+ char BIGSTK tempo[1024]; \
if (makeindex_links == 1) { \
sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
} else \
@@ -313,13 +332,13 @@ makeindex_done=1; /* ok c'est fait */ \
// Début de httpmirror, robot
// url1 peut être multiple
int httpmirror(char* url1,httrackp* ptropt) {
- httrackp opt = *ptropt; // structure d'options
+ httrackp BIGSTK opt; // structure d'options
char* primary=NULL; // première page, contenant les liens à scanner
int lien_tot=0; // nombre de liens pour le moment
lien_url** liens=NULL; // les pointeurs sur les liens
hash_struct hash; // système de hachage, accélère la recherche dans les liens
hash_struct* hashptr = &hash;
- t_cookie cookie; // gestion des cookies
+ t_cookie BIGSTK cookie; // gestion des cookies
int lien_max=0;
int lien_size=0; // octets restants dans buffer liens dispo
char* lien_buffer=NULL; // buffer liens actuel
@@ -330,7 +349,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
int numero_passe=0; // deux passes pour html puis images
int back_max=0; // fichiers qui peuvent être en local
lien_back* back=NULL; // backing en local
- htsblk r; // retour de certaines fonctions
+ htsblk BIGSTK r; // retour de certaines fonctions
TStamp lastime=0; // pour affichage infos de tmp en tmp
// pour les stats, nombre de fichiers & octets écrits
LLint stat_fragment=0; // pour la fragmentation
@@ -346,7 +365,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
int makeindex_done=0; // lorsque l'index sera fait
FILE* makeindex_fp=NULL;
int makeindex_links=0;
- char makeindex_firstlink[HTS_URLMAXSIZE*2];
+ char BIGSTK makeindex_firstlink[HTS_URLMAXSIZE*2];
// statistiques (mode #Z)
FILE* makestat_fp=NULL; // fichier de stats taux transfert
FILE* maketrack_fp=NULL; // idem pour le tracking
@@ -354,16 +373,19 @@ int httpmirror(char* url1,httrackp* ptropt) {
LLint makestat_total=0; // repère du nombre d'octets transférés depuis denrière stat
int makestat_lnk=0; // idem, pour le nombre de liens
//
- char codebase[HTS_URLMAXSIZE*2]; // base pour applet java
- char base[HTS_URLMAXSIZE*2]; // base pour les autres fichiers
+ char BIGSTK codebase[HTS_URLMAXSIZE*2]; // base pour applet java
+ char BIGSTK base[HTS_URLMAXSIZE*2]; // base pour les autres fichiers
//
- cache_back cache;
- robots_wizard robots; // gestion robots.txt
+ cache_back BIGSTK cache;
+ robots_wizard BIGSTK robots; // gestion robots.txt
inthash cache_hashtable=NULL;
+ inthash cache_tests=NULL;
int cache_hash_size=0;
//
char *template_header=NULL,*template_body=NULL,*template_footer=NULL;
//
+ opt = *ptropt;
+ //
codebase[0]='\0'; base[0]='\0';
//
cookie.auth.next=NULL;
@@ -444,13 +466,16 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (!cache_hash_size)
cache_hash_size=HTS_HASH_SIZE;
cache_hashtable=inthash_new(cache_hash_size);
- if (cache_hashtable==NULL) {
+ cache_tests=inthash_new(cache_hash_size);
+ if (cache_hashtable==NULL || cache_tests==NULL) {
printf("PANIC! : Not enough memory [%d]\n",__LINE__);
filters[0]=NULL; back_max=0; // uniquement a cause du warning de XH_extuninit
XH_extuninit;
return 0;
}
+ inthash_value_is_malloc(cache_tests, 1); /* malloc */
cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */
+ cache.cached_tests=(void*)cache_tests; /* copy of cache_tests */
// initialiser cache DNS
_hts_lockdns(-999);
@@ -539,7 +564,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (joker) { // joker ou filters
//char* p;
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
int type; int plus=0;
// noter joker (dans b)
@@ -598,7 +623,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
}
} else { // adresse normale
- char url[HTS_URLMAXSIZE*2];
+ char BIGSTK url[HTS_URLMAXSIZE*2];
// prochaine adresse
i=0;
while((*a!=0) && (!isspace((unsigned char)*a))) { url[i++]=*a; a++; }
@@ -638,7 +663,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (filelist_buff) {
int filelist_ptr=0;
int n=0;
- char line[HTS_URLMAXSIZE*2];
+ char BIGSTK line[HTS_URLMAXSIZE*2];
char* primary_ptr = primary + strlen(primary);
while( filelist_ptr < filelist_sz ) {
int count=binput(filelist_buff+filelist_ptr,line,HTS_URLMAXSIZE);
@@ -758,6 +783,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
makestat_fp=fopen(fconcat(opt.path_log,"hts-stats.txt"),"wb");
if (makestat_fp != NULL) {
fprintf(makestat_fp,"HTTrack statistics report, every minutes"LF LF);
+ fflush(makestat_fp);
}
}
@@ -766,6 +792,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
maketrack_fp=fopen(fconcat(opt.path_log,"hts-track.txt"),"wb");
if (maketrack_fp != NULL) {
fprintf(maketrack_fp,"HTTrack tracking report, every minutes"LF LF);
+ fflush(maketrack_fp);
}
}
@@ -776,6 +803,10 @@ int httpmirror(char* url1,httrackp* ptropt) {
}
}
+ /* Send options to callback functions */
+#if HTS_ANALYSTE
+ hts_htmlcheck_chopt(&opt);
+#endif
// attendre une certaine heure..
if (opt.waittime>0) {
@@ -795,6 +826,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
}
// attendre..
+ _hts_in_html_parsing=5;
do {
TStamp tl=0;
time_t tt;
@@ -828,6 +860,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
}
#endif
} while(!ok);
+ _hts_in_html_parsing=0;
// note: recopie de plus haut
// noter heure actuelle de départ en secondes
@@ -854,6 +887,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
XH_extuninit;
return 1;
}
+ set_wrappers(); // _start() is allowed to set other wrappers
#endif
@@ -865,7 +899,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
do {
int error=0; // si error alors sauter
int store_errpage=0; // c'est une erreur mais on enregistre le html
- char loc[HTS_URLMAXSIZE*2]; // adresse de relocation
+ char BIGSTK loc[HTS_URLMAXSIZE*2]; // adresse de relocation
// Ici on charge le fichier (html, gif..) en mémoire
// Les HTMLs sont traités (si leur priorité est suffisante)
@@ -877,6 +911,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy));
// et user-agent
strcpybuff(r.req.user_agent,opt.user_agent);
+ strcpybuff(r.req.referer,opt.referer);
+ strcpybuff(r.req.from,opt.from);
+ strcpybuff(r.req.lang_iso,opt.lang_iso);
r.req.user_agent_send=opt.user_agent_send;
if (!error) {
@@ -928,9 +965,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
Get the next link, waiting for other files, handling external callbacks
*/
{
- char buff_err_msg[1024];
- htsmoduleStruct str;
- htsmoduleStructExtended stre;
+ char BIGSTK buff_err_msg[1024];
+ htsmoduleStruct BIGSTK str;
+ htsmoduleStructExtended BIGSTK stre;
buff_err_msg[0] = '\0';
memset(&str, 0, sizeof(str));
memset(&stre, 0, sizeof(stre));
@@ -1018,7 +1055,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
} else { // lien vide..
- if (opt.errlog) {
+ if (opt.errlog && opt.debug > 0) {
fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush;
}
error=1;
@@ -1067,8 +1104,8 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (!error) {
if (r.statuscode == 200) { // OK (ou 304 en backing)
if (r.adr) { // Written file
- if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
- || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */
+ if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype, urlfil) && (r.adr) ) /* Is real media, .. */
) {
if (strnotempty(r.cdispo)) { // Content-disposition set!
if (ishtml(savename) == 0) { // Non HTML!!
@@ -1083,8 +1120,8 @@ int httpmirror(char* url1,httrackp* ptropt) {
// ------------------------------------
// BOGUS MIME TYPE HACK II (the revenge)
// Check if we have a bogus MIME type
- if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
- || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */
+ if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype, urlfil)) /* Is real media, .. */
) {
if ((r.adr) && (r.size)) {
unsigned int map[256];
@@ -1159,11 +1196,11 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (!error) {
if (r.statuscode == 200) { // OK (ou 304 en backing)
if (r.adr==NULL) { // Written file
- if (may_be_hypertext_mime(r.contenttype)) { // to parse!
+ if (may_be_hypertext_mime(r.contenttype, urlfil)) { // to parse!
LLint sz;
sz=fsize(savename);
if (sz>0) { // ok, exists!
- if (sz < 1024) { // ok, small file --> to parse!
+ if (sz < 8192) { // ok, small file --> to parse!
FILE* fp=fopen(savename,"rb");
if (fp) {
r.adr=malloct((int)sz + 2);
@@ -1216,9 +1253,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
redirect pages.
*/
if (!error) {
- char buff_err_msg[1024];
- htsmoduleStruct str;
- htsmoduleStructExtended stre;
+ char BIGSTK buff_err_msg[1024];
+ htsmoduleStruct BIGSTK str;
+ htsmoduleStructExtended BIGSTK stre;
buff_err_msg[0] = '\0';
memset(&str, 0, sizeof(str));
memset(&stre, 0, sizeof(stre));
@@ -1346,8 +1383,8 @@ int httpmirror(char* url1,httrackp* ptropt) {
// traiter
if (
- ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
- || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */
+ ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype, urlfil) && (r.adr) ) /* Is real media, .. */
)
&& (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */
&& (r.adr!=NULL) /* HTML Data exists */
@@ -1363,9 +1400,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil);
}
{
- char buff_err_msg[1024];
- htsmoduleStruct str;
- htsmoduleStructExtended stre;
+ char BIGSTK buff_err_msg[1024];
+ htsmoduleStruct BIGSTK str;
+ htsmoduleStructExtended BIGSTK stre;
buff_err_msg[0] = '\0';
memset(&str, 0, sizeof(str));
memset(&stre, 0, sizeof(stre));
@@ -1507,9 +1544,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (strcmp(urlfil,"/robots.txt")==0) { // robots.txt
if (r.adr) {
int bptr=0;
- char line[1024];
- char buff[8192];
- char infobuff[8192];
+ char BIGSTK line[1024];
+ char BIGSTK buff[8192];
+ char BIGSTK infobuff[8192];
int record=0;
line[0]='\0'; buff[0]='\0'; infobuff[0]='\0';
//
@@ -1553,7 +1590,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
while(is_realspace(*a))
a++; // sauter espace(s)
if (strnotempty(a)) {
- if (strcmp(a,"/") != 0) { /* ignoring disallow: / */
+ if (strcmp(a,"/") != 0 || opt.robots >= 3) { /* ignoring disallow: / */
if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) {
strcatbuff(buff,a);
strcatbuff(buff,"\n");
@@ -1601,8 +1638,8 @@ int httpmirror(char* url1,httrackp* ptropt) {
// Si par la suite on doit retraiter ce fichier avec un niveau de récursion plus
// fort, on supprimera le readme, et on scannera le fichier html!
// note: sauté si store_errpage (càd si page d'erreur, non à scanner!)
- if ( (is_hypertext_mime(r.contenttype)) && (!store_errpage) && (r.size>0)) { // c'est du html!!
- char tempo[HTS_URLMAXSIZE*2];
+ if ( (is_hypertext_mime(r.contenttype, urlfil)) && (!store_errpage) && (r.size>0)) { // c'est du html!!
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
FILE* fp;
tempo[0]='\0';
strcpybuff(tempo,savename);
@@ -1695,7 +1732,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
FILE* fp=fopen(savename,"r+b");
if (fp) {
if (!fseek(fp,0,SEEK_SET)) {
- char line[HTS_URLMAXSIZE*2];
+ char BIGSTK line[HTS_URLMAXSIZE*2];
linput(fp,line,HTS_URLMAXSIZE);
if (strnotempty(line)) {
if ((opt.debug>1) && (opt.log!=NULL)) {
@@ -1711,8 +1748,8 @@ int httpmirror(char* url1,httrackp* ptropt) {
/* External modules */
if (opt.parsejava && fexist(savename)) {
- char buff_err_msg[1024];
- htsmoduleStruct str;
+ char BIGSTK buff_err_msg[1024];
+ htsmoduleStruct BIGSTK str;
buff_err_msg[0] = '\0';
memset(&str, 0, sizeof(str));
/* */
@@ -1889,7 +1926,7 @@ jump_if_done:
while(!feof(old_lst)) {
linput(old_lst,line,1000);
if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau?
- char file[HTS_URLMAXSIZE*2];
+ char BIGSTK file[HTS_URLMAXSIZE*2];
strcpybuff(file,opt.path_html);
strcatbuff(file,line+1);
file[strlen(file)-1]='\0';
@@ -1912,7 +1949,7 @@ jump_if_done:
line[strlen(line)-1]='\0';
if (strnotempty(line))
if (!strstr(adr,line)) { // non trouvé?
- char file[HTS_URLMAXSIZE*2];
+ char BIGSTK file[HTS_URLMAXSIZE*2];
strcpybuff(file,opt.path_html);
strcatbuff(file,line+1);
while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait)
@@ -1956,26 +1993,28 @@ jump_if_done:
// afficher résumé dans log
if (opt.log!=NULL) {
+ char BIGSTK finalInfo[8192];
int error = fspc(NULL,"error");
int warning = fspc(NULL,"warning");
int info = fspc(NULL,"info");
- char htstime[256];
- char infoupdated[256];
+ char BIGSTK htstime[256];
+ char BIGSTK infoupdated[256];
// int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart));
LLint n=(LLint) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart)));
sec2str(htstime,time_local()-HTS_STAT.stat_timestart);
- //fprintf(opt.log,LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n);
+ //sprintf(finalInfo + strlen(finalInfo),LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n);
infoupdated[0] = '\0';
if (opt.is_update) {
- if (HTS_STAT.stat_updated_files < 0) {
+ if (HTS_STAT.stat_updated_files > 0) {
sprintf(infoupdated, ", %d files updated", (int)HTS_STAT.stat_updated_files);
} else {
sprintf(infoupdated, ", no files updated");
}
}
- fprintf(opt.log,LF
- "HTTrack mirror complete in %s : "
+ finalInfo[0] = '\0';
+ sprintf(finalInfo + strlen(finalInfo),
+ "HTTrack Website Copier/"HTTRACK_VERSION" mirror complete in %s : "
"%d links scanned, %d files written ("LLintP" bytes overall)%s "
"["LLintP" bytes received at "LLintP" bytes/sec]",
htstime,
@@ -1985,20 +2024,31 @@ jump_if_done:
infoupdated,
(LLint)HTS_STAT.HTS_TOTAL_RECV,
(LLint)n
- );
+ );
+
if (HTS_STAT.total_packed > 0 && HTS_STAT.total_unpacked > 0) {
int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked);
- fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio);
+ sprintf(finalInfo + strlen(finalInfo),", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio);
}
if (!opt.nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) {
int rq = (HTS_STAT.stat_nrequests * 10) / HTS_STAT.stat_sockid;
- fprintf(opt.log,", %d.%d requests per connection", rq/10, rq%10);
+ sprintf(finalInfo + strlen(finalInfo),", %d.%d requests per connection", rq/10, rq%10);
}
- fprintf(opt.log,LF);
+ sprintf(finalInfo + strlen(finalInfo),LF);
if (error)
- fprintf(opt.log,"(%d errors, %d warnings, %d messages)"LF,error,warning,info);
+ sprintf(finalInfo + strlen(finalInfo),"(%d errors, %d warnings, %d messages)"LF,error,warning,info);
else
- fprintf(opt.log,"(No errors, %d warnings, %d messages)"LF,warning,info);
+ sprintf(finalInfo + strlen(finalInfo),"(No errors, %d warnings, %d messages)"LF,warning,info);
+
+ // Log
+ fprintf(opt.log,LF"%s", finalInfo);
+
+ // Close ZIP
+ if (cache.zipOutput) {
+ zipClose(cache.zipOutput, finalInfo);
+ cache.zipOutput = NULL;
+ }
+
test_flush;
}
#if DEBUG_HASH
@@ -2301,7 +2351,7 @@ int filters_init(char*** ptrfilters, int maxfilter, int filterinc) {
HTSEXT_API int structcheck(char* s) {
// vérifier la présence des dossier(s)
char *a=s;
- char nom[HTS_URLMAXSIZE*2];
+ char BIGSTK nom[HTS_URLMAXSIZE*2];
char *b;
//inthash structcheck_hash=NULL;
if (strnotempty(s)==0) return 0;
@@ -2399,7 +2449,7 @@ int check_fatal_io_errno(void) {
// ouvrir un fichier (avec chemin Un*x)
FILE* filecreate(char* s) {
- char fname[HTS_URLMAXSIZE*2];
+ char BIGSTK fname[HTS_URLMAXSIZE*2];
FILE* fp;
fname[0]='\0';
@@ -2464,7 +2514,7 @@ int filenote(char* s,filecreate_params* params) {
strc->lst=params->lst;
return 0;
} else if (strc->lst) {
- char savelst[HTS_URLMAXSIZE*2];
+ char BIGSTK savelst[HTS_URLMAXSIZE*2];
strcpybuff(savelst,fslash(s));
// couper chemin?
if (strnotempty(strc->path)) {
@@ -2515,7 +2565,7 @@ HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* a
}
}
void usercommand_exe(char* cmd,char* file) {
- char temp[8192];
+ char BIGSTK temp[8192];
char c[2]="";
int i;
temp[0]='\0';
@@ -2554,7 +2604,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) {
first = 1;
opt->state.mimefp = fopen(fconcat(opt->path_html,"index.mht"), "wb");
if (opt->state.mimefp != NULL) {
- char rndtmp[1024], currtime[256];
+ char BIGSTK rndtmp[1024], currtime[256];
srand(time(NULL));
time_gmt_rfc822(currtime);
sprintf(rndtmp, "%d_%d", (int)time(NULL), (int) rand());
@@ -2583,7 +2633,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) {
if (fp != NULL) {
char buff[60*100 + 2];
char mimebuff[256];
- char cid[HTS_URLMAXSIZE*3];
+ char BIGSTK cid[HTS_URLMAXSIZE*3];
int len;
int isHtml = ( ishtml(save) == 1 );
mimebuff[0] = '\0';
@@ -2730,13 +2780,31 @@ HTS_INLINE int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_bac
return -1; /* plus de place */
}
-// remplir backing
-int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
+int back_pluggable_sockets_strict(lien_back* back, int back_max, httrackp* opt) {
+ int n = opt->maxsoc - back_nsoc(back, back_max);
+
+ // connect limiter
+ if (n > 0 && opt->maxconn > 0 && HTS_STAT.last_connect > 0) {
+ TStamp opTime = HTS_STAT.last_request ? HTS_STAT.last_request : HTS_STAT.last_connect;
+ TStamp cTime = mtime_local();
+ TStamp lap = ( cTime - opTime );
+ TStamp minLap = (TStamp) ( 1000.0 / opt->maxconn );
+ if (lap < minLap) {
+ n = 0;
+ } else {
+ int nMax = (int) ( lap / minLap );
+ n = min(n, nMax);
+ }
+ }
+
+ return n;
+}
+
+int back_pluggable_sockets(lien_back* back, int back_max, httrackp* opt) {
int n;
- int oneLess = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links
// ajouter autant de socket qu'on peut ajouter
- n=opt->maxsoc-back_nsoc(back,back_max) - oneLess;
+ n=back_pluggable_sockets_strict(back, back_max, opt);
// vérifier qu'il restera assez de place pour les tests ensuite (en théorie, 1 entrée libre restante suffirait)
n=min( n, back_available(back,back_max) - 8 );
@@ -2745,6 +2813,12 @@ int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_
if (back_stack_available(back,back_max) <= 2)
n=0;
+ return n;
+}
+
+// remplir backing
+int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
+ int n = back_pluggable_sockets(back, back_max, opt);
if (n>0) {
int p;
@@ -2886,11 +2960,7 @@ void sig_ask( int code ) { // demander
void sig_ignore( int code ) { // ignorer signal
}
void sig_brpipe( int code ) { // treat if necessary
- /*
- if (!sig_ignore_flag(-1)) {
- sig_term(code);
- }
- */
+ signal(code, sig_brpipe);
}
void sig_doback(int blind) { // mettre en backing
int out=-1;
@@ -2943,7 +3013,11 @@ int read_stdin(char* s,int max) {
}
#ifdef _WIN32
HTS_INLINE int check_stdin(void) {
+#ifndef _WIN32_WCE
return (_kbhit());
+#else
+ return 0;
+#endif
}
#else
HTS_INLINE int check_flot(T_SOC s) {
@@ -3043,7 +3117,7 @@ char* next_token(char* p,int flag) {
else if (*(p+1)=='"')
c='"';
if (c) {
- char tempo[8192];
+ char BIGSTK tempo[8192];
tempo[0]=c; tempo[1]='\0';
strcatbuff(tempo,p+2);
strcpybuff(p,tempo);
@@ -3051,7 +3125,7 @@ char* next_token(char* p,int flag) {
}
}
else if (*p==34) { // guillemets (de fin)
- char tempo[8192];
+ char BIGSTK tempo[8192];
tempo[0]='\0';
strcatbuff(tempo,p+1);
strcpybuff(p,tempo); /* wipe "" */
@@ -3181,6 +3255,10 @@ HTSEXT_API int hts_is_testing(void) { // 0 non 1 test 2 purge
return 2;
else if (_hts_in_html_parsing==4)
return 3;
+ else if (_hts_in_html_parsing==5) // scheduling
+ return 4;
+ else if (_hts_in_html_parsing==6) // wait for slot
+ return 5;
return 0;
}
HTSEXT_API int hts_is_exiting(void) {
@@ -3254,6 +3332,9 @@ HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) {
if (from->maxrate > -1)
to->maxrate = from->maxrate;
+ if (from->maxconn > 0)
+ to->maxconn = from->maxconn;
+
if (strnotempty(from->user_agent))
strcpybuff(to->user_agent , from->user_agent);
@@ -3303,10 +3384,10 @@ int htsAddLink(htsmoduleStruct* str, char* link) {
char* lien_buffer = * ( (char**) (str->lien_buffer_) );
/* */
/* */
- char adr[HTS_URLMAXSIZE*2],
+ char BIGSTK adr[HTS_URLMAXSIZE*2],
fil[HTS_URLMAXSIZE*2],
save[HTS_URLMAXSIZE*2];
- char codebase[HTS_URLMAXSIZE*2];
+ char BIGSTK codebase[HTS_URLMAXSIZE*2];
/* */
int pass_fix, prio_fix;
/* */
@@ -3321,7 +3402,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) {
//
#if HTS_ANALYSTE
- if (!hts_htmlcheck_linkdetected(link)) {
+ if (!hts_htmlcheck_linkdetected(link) || !hts_htmlcheck_linkdetected2(link, NULL)) {
if (opt->errlog) {
fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF, link);
test_flush;
@@ -3347,7 +3428,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) {
*(a+1)='\0'; // couper
} else { // couper http:// éventuel
if (strfield(codebase,"http://")) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
char* a=codebase+7;
a=strchr(a,'/'); // après host
if (a) { // ** msg erreur et vérifier?
@@ -3382,6 +3463,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) {
int just_test_it = 0;
forbidden_url = hts_acceptlink(opt, ptr, lien_tot, liens,
adr,fil,
+ NULL, NULL,
&set_prio_to,
&just_test_it);
if ((opt->debug>1) && (opt->log!=NULL)) {
@@ -3391,7 +3473,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) {
/* Link accepted */
if (!forbidden_url) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
int a,b;
tempo[0]='\0';
a=opt->savename_type;
diff --git a/src/htscore.h b/src/htscore.h
index d9e5d0a..97c0127 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -44,16 +44,25 @@ Please visit our Website: http://www.httrack.com
/* specific definitions */
#include "htsbase.h"
// Includes & définitions
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
+#endif
#ifdef _WIN32
+#ifndef _WIN32_WCE
#include <conio.h>
+#endif
+#ifndef _WIN32_WCE
#include <signal.h>
#include <direct.h>
#else
+#ifndef HTS_CECOMPAT
+#include "signal.h"
+#endif
+#endif
+#else
#include <signal.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -68,7 +77,7 @@ Please visit our Website: http://www.httrack.com
#include "htsopt.h"
// structure d'un lien
-typedef struct {
+typedef struct lien_url {
char firstblock; // flag 1=premier malloc
char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down
int depth; // profondeur autorisée lien ; >0 forte 0=faible
@@ -93,7 +102,7 @@ typedef struct {
} lien_url;
// chargement de fichiers en 'arrière plan'
-typedef struct {
+typedef struct lien_back {
#if DEBUG_CHECKINT
char magic;
#endif
@@ -137,8 +146,10 @@ typedef struct {
#endif
} lien_back;
+typedef struct cache_back_zip_entry cache_back_zip_entry;
+
// cache
-typedef struct {
+typedef struct cache_back {
int version; // 0 ou 1
/* */
int type;
@@ -150,15 +161,23 @@ typedef struct {
char lastmodified[256];
// HASH
void* hashtable;
+ // HASH for tests (naming subsystem)
+ void* cached_tests;
// fichiers log optionnels
FILE* log;
FILE* errlog;
// variables
int ptr_ant; // pointeur pour anticiper
int ptr_last; // pointeur pour anticiper
+ //
+ void* zipInput;
+ void* zipOutput;
+ cache_back_zip_entry* zipEntries;
+ int zipEntriesOffs;
+ int zipEntriesCapa;
} cache_back;
-typedef struct {
+typedef struct hash_struct {
lien_url** liens; // pointeur sur liens
int max_lien; // indice le plus grand rencontré
int hash[3][HTS_HASH_SIZE]; // tables pour sav/adr-fil/former_adr-former_fil
@@ -169,11 +188,24 @@ typedef struct {
#define hash_write(A,B)
#endif
-typedef struct {
+typedef struct filecreate_params {
FILE* lst;
char path[HTS_URLMAXSIZE*2];
} filecreate_params;
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
+static int cache_writable(cache_back* cache) {
+ return (cache != NULL && ( cache->dat != NULL || cache->zipOutput != NULL ) );
+}
+
+static int cache_readable(cache_back* cache) {
+ return (cache != NULL && ( cache->olddat != NULL || cache->zipInput != NULL ) );
+}
+
+#endif
+
// Fonctions
// INCLUDES .H PARTIES DE CODE HTTRACK
@@ -240,6 +272,7 @@ typedef void (* t_hts_htmlcheck_uninit)(void);
typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
typedef int (* t_hts_htmlcheck_end)(void);
typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier);
typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
typedef char* (* t_hts_htmlcheck_query)(char* question);
typedef char* (* t_hts_htmlcheck_query2)(char* question);
@@ -249,6 +282,7 @@ typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
typedef void (* t_hts_htmlcheck_filesave)(char* file);
typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
+typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start);
typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
@@ -264,6 +298,8 @@ extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
extern t_hts_htmlcheck_start hts_htmlcheck_start;
extern t_hts_htmlcheck_end hts_htmlcheck_end;
extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck_process hts_htmlcheck_preprocess;
+extern t_hts_htmlcheck_process hts_htmlcheck_postprocess;
extern t_hts_htmlcheck hts_htmlcheck;
extern t_hts_htmlcheck_query hts_htmlcheck_query;
extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
@@ -273,11 +309,16 @@ extern t_hts_htmlcheck_check hts_htmlcheck_check;
extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2;
extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
extern t_hts_htmlcheck_savename hts_htmlcheck_savename;
extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead;
extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead;
*/
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
//
#ifndef HTTRACK_DEFLIB
HTSEXT_API int hts_is_parsing(int flag);
@@ -307,8 +348,6 @@ extern char** _hts_addurl;
extern int _hts_cancel;
#endif
-
-
//
@@ -342,6 +381,8 @@ int liens_record(char* adr,char* fil,char* save,char* former_adr,char* former_fi
// backing, routines externes
+int back_pluggable_sockets(lien_back* back, int back_max, httrackp* opt);
+int back_pluggable_sockets_strict(lien_back* back, int back_max, httrackp* opt);
int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot);
int backlinks_done(lien_url** liens,int lien_tot,int ptr);
int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot);
@@ -395,4 +436,6 @@ void voidf(void);
#endif
+#endif
+
diff --git a/src/htscoremain.c b/src/htscoremain.c
index 1162c18..bd90593 100644
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htscoremain.h"
#include "htsglobal.h"
@@ -43,6 +46,7 @@ Please visit our Website: http://www.httrack.com
#include "htsalias.h"
#include "htswrap.h"
#include "htsmodules.h"
+#include "htszlib.h"
#include <ctype.h>
#if HTS_WIN
@@ -100,7 +104,7 @@ extern int IPV6_resolver;
} \
} while(0)
-static void set_wrappers(void) {
+void set_wrappers(void) {
#if HTS_ANALYSTE
// custom wrappers
hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
@@ -108,6 +112,8 @@ static void set_wrappers(void) {
hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+ hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html");
+ hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html");
hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
@@ -117,6 +123,7 @@ static void set_wrappers(void) {
hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+ hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2");
hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name");
hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header");
@@ -130,13 +137,9 @@ HTSEXT_API int hts_main(int argc, char **argv) {
#else
int main(int argc, char **argv) {
#endif
- char* x_argv[999]; // Patch pour argv et argc: en cas de récupération de ligne de commande
+ char** x_argv=NULL; // Patch pour argv et argc: en cas de récupération de ligne de commande
char* x_argvblk=NULL; // (reprise ou update)
int x_ptr=0; // offset
- /*
- char* x_argv2[999]; // Patch pour config
- char* x_argvblk2=NULL;
- */
//
int argv_url=-1; // ==0 : utiliser cache et doit.log
char* argv_firsturl=NULL; // utilisé pour nommage par défaut
@@ -144,13 +147,13 @@ int main(int argc, char **argv) {
int url_sz = 65535;
//char url[65536]; // URLS séparées par un espace
// the parametres
- httrackp httrack;
+ httrackp BIGSTK httrack;
int httrack_logmode=3; // ONE log file
- int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer)
+ int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer)
#if HTS_WIN
#if HTS_ANALYSTE!=2
WORD wVersionRequested; /* requested version WinSock API */
- WSADATA wsadata; /* Windows Sockets API data */
+ WSADATA BIGSTK wsadata; /* Windows Sockets API data */
#endif
#else
#ifndef HTS_DO_NOT_USE_UID
@@ -197,6 +200,8 @@ int main(int argc, char **argv) {
strcpybuff(httrack.proxy.bindhost, ""); // bind default host
httrack.user_agent_send=1; // envoyer un user-agent
strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
+ strcpybuff(httrack.referer, "");
+ strcpybuff(httrack.from, "");
httrack.savename_83=0; // noms longs par défaut
httrack.savename_type=0; // avec structure originale
httrack.mimehtml=0; // pas MIME-html
@@ -218,6 +223,7 @@ int main(int argc, char **argv) {
httrack.nocompression=0; // pas de compression
httrack.tolerant=0; // ne pas accepter content-length incorrect
httrack.parseall=1; // tout parser (tags inconnus, par exemple)
+ httrack.parsedebug=0; // pas de mode débuggage
httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur
httrack.verbosedisplay=0; // pas d'animation texte
httrack.sizehack=0; // size hack
@@ -238,19 +244,25 @@ int main(int argc, char **argv) {
strcpybuff(httrack.path_log,"");
strcpybuff(httrack.path_bin,"");
//
+#if HTS_SPARE_MEMORY==0
httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb)
httrack.maxfilter=200; // 200 filtres max par défaut
+#else
+ httrack.maxlink=10000; // 10,000 liens max par défaut (40Kb)
+ httrack.maxfilter=50; // 50 filtres max par défaut
+#endif
httrack.maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT --
//httrack.maxcache_anticipate=256; // maximum de liens à anticiper
httrack.maxtime=-1; // temps max en secondes
- httrack.maxrate=-1; // pas de taux maxi
- httrack.maxconn=10; // nombre connexions/s
+ httrack.maxrate=25000; // default max rate
+ httrack.maxconn=5.0; // nombre connexions/s
httrack.waittime=-1; // wait until.. hh*3600+mm*60+ss
//
httrack.exec=argv[0];
httrack.is_update=0; // not an update (yet)
httrack.dir_topindex=0; // do not built top index (yet)
//
+ httrack.bypass_limits=0; // enforce limits by default
httrack.state.stop=0; // stopper
httrack.state.exit_xh=0; // abort
//
@@ -337,6 +349,15 @@ int main(int argc, char **argv) {
strcpybuff(httrack.path_bin, HTS_HTTRACKDIR);
#endif
+ /* libhttrack-plugin DLL preload (libhttrack-plugin.so or libhttrack-plugin.dll) */
+ {
+ void* userfunction = getFunctionPtr(&httrack, "libhttrack-plugin", "plugin_init");
+ if (userfunction != NULL) {
+ t_hts_htmlcheck_init initFnc = (t_hts_htmlcheck_init) userfunction;
+ initFnc();
+ set_wrappers(); /* Re-read wrappers internal static functions */
+ }
+ }
/* filter CR, LF, TAB.. */
{
@@ -373,13 +394,16 @@ int main(int argc, char **argv) {
}
x_argvblk[0]='\0';
x_ptr=0;
+
+ /* Create argv */
+ x_argv = (char**) malloct(sizeof(char*) * ( argc + 1024 ));
}
/* Create new argc/argv, replace alias, count URLs, treat -h, -q, -i */
{
- char _tmp_argv[2][HTS_CDLMAXSIZE];
+ char BIGSTK _tmp_argv[2][HTS_CDLMAXSIZE];
+ char BIGSTK tmp_error[HTS_CDLMAXSIZE];
char* tmp_argv[2];
- char tmp_error[HTS_CDLMAXSIZE];
int tmp_argc;
int x_argc=0;
int na;
@@ -461,53 +485,6 @@ int main(int argc, char **argv) {
argc=x_argc;
}
-
-
-
- // Ici on ajoute les arguments de config
-/*
- if (fexist("config")) { // configuration
- x_argvblk2=(char*) calloct(32768,1);
-
- if (x_argvblk2!=NULL) {
- FILE* fp;
- int x_argc2;
-
- //strcpybuff(x_argvblk2,"httrack ");
- fp=fopen("config","rb");
- if (fp) {
- linput(fp,x_argvblk2+strlen(x_argvblk2),32000);
- fclose(fp); fp=NULL;
-
- // calculer arguments selon derniers arguments
- x_argv2[0]=argv[0];
- x_argc2=1;
- {
- char* p=x_argvblk2;
- do {
- x_argv2[x_argc2++]=p;
- p=strchr(p,' ');
- if (p) {
- *p=0; // octet nul (tableau)
- p++;
- }
- } while(p!=NULL);
- }
- // recopier arguments actuels (pointeurs uniquement)
- {
- int na;
- for(na=1;na<argc;na++) {
- x_argv2[x_argc2++]=argv[na];
- }
- }
- argc=x_argc2; // nouvel argc
- argv=x_argv2; // nouvel argv
- }
- }
- }
-*/
-
-
// Option O and includerc
{
int loops=0;
@@ -518,10 +495,10 @@ int main(int argc, char **argv) {
for(na=1;na<argc;na++) {
if (argv[na][0]=='"') {
- char tempo[HTS_CDLMAXSIZE];
+ char BIGSTK tempo[HTS_CDLMAXSIZE];
strcpybuff(tempo,argv[na]+1);
if (tempo[strlen(tempo)-1]!='"') {
- char s[HTS_CDLMAXSIZE];
+ char BIGSTK s[HTS_CDLMAXSIZE];
sprintf(s,"Missing quote in %s",argv[na]);
HTS_PANIC_PRINTF(s);
htsmain_free();
@@ -626,7 +603,7 @@ int main(int argc, char **argv) {
if (fp) {
int insert_after=1; /* insérer après nom au début */
//
- char buff[8192];
+ char BIGSTK buff[8192];
char *p,*lastp;
linput(fp,buff,8000);
fclose(fp); fp=NULL;
@@ -646,21 +623,8 @@ int main(int argc, char **argv) {
/* Insert parameters BUT so that they can be in the same order */
if (lastp) {
if (strnotempty(lastp)) {
- //char* argv0;
- //int len;
insert_after_argc=argc-insert_after;
- //argv0 = (argv+insert_after)[0];
cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr);
- /*
- DONE IN 'next_token'
- len = strlen(argv0);
- if (len >= 2 && argv0[0]=='\"' && argv0[len-1]=='\"') { // "foo"
- char tempo[1024];
- tempo[0] = '\0';
- strncatbuff(tempo, argv0+1, len-2);
- strcpybuff(argv0, tempo);
- }
- */
argc=insert_after_argc+insert_after;
insert_after++;
}
@@ -675,7 +639,11 @@ int main(int argc, char **argv) {
#if DEBUG_STEPS
printf("Checking cache\n");
#endif
- if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) {
+ if (!fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) {
+ if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) {
+ rename(fconcat(httrack.path_log,"hts-cache/old.zip"),fconcat(httrack.path_log,"hts-cache/new.zip"));
+ }
+ } else if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) {
if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) {
remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
@@ -723,6 +691,11 @@ int main(int argc, char **argv) {
remove(fconcat(httrack.path_log,"hts-err.txt"));
if (fexist(fconcat(httrack.path_html,"index.html")))
remove(fconcat(httrack.path_html,"index.html"));
+ /* */
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.zip"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.zip"));
if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")))
remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
if (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))
@@ -894,7 +867,11 @@ int main(int argc, char **argv) {
#endif
if (argv_url==0) {
// Présence d'un cache, que faire?..
- if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (
+ ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) )
+ ||
+ ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) )
+ ) { // il existe déja un cache précédent.. renommer
if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent
if (x_argvblk!=NULL) {
int m;
@@ -967,7 +944,11 @@ int main(int argc, char **argv) {
httrack.cache=1; // cache prioritaire
if (httrack.quiet==0) {
- if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (
+ ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) )
+ ||
+ ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) )
+ ) {
HT_REQUEST_START;
HT_PRINT("There is a lock-file in the directory ");
HT_PRINT(httrack.path_log);
@@ -985,7 +966,11 @@ int main(int argc, char **argv) {
//char s[32];
httrack.cache=2; // cache vient après test de validité
if (httrack.quiet==0) {
- if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (
+ ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) )
+ ||
+ ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) )
+ ) {
HT_REQUEST_START;
HT_PRINT("There is an index.html and a hts-cache folder in the directory ");
HT_PRINT(httrack.path_log);
@@ -1027,7 +1012,7 @@ int main(int argc, char **argv) {
for(na=1;na<argc;na++) {
if (argv[na][0]=='"') {
- char tempo[HTS_CDLMAXSIZE];
+ char BIGSTK tempo[HTS_CDLMAXSIZE];
strcpybuff(tempo,argv[na]+1);
if (tempo[strlen(tempo)-1]!='"') {
char s[HTS_CDLMAXSIZE];
@@ -1189,13 +1174,13 @@ int main(int argc, char **argv) {
{
sscanf(com+1,"%d",&httrack.savename_83);
switch(httrack.savename_83) {
- case 0:
+ case 0: // 8-3 (ISO9660 L1)
httrack.savename_83=1;
break;
case 1:
httrack.savename_83=0;
break;
- default:
+ default: // 2 == ISO9660 (ISO9660 L2)
httrack.savename_83=2;
break;
}
@@ -1234,7 +1219,7 @@ int main(int argc, char **argv) {
case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files
case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; }
break; // Keyword Index
- case 'c': sscanf(com+1,"%d",&httrack.maxconn); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'c': sscanf(com+1,"%f",&httrack.maxconn); while(isdigit((unsigned char)*(com+1)) || *(com+1) == '.') com++; break;
case 'e': sscanf(com+1,"%d",&httrack.extdepth); while(isdigit((unsigned char)*(com+1))) com++; break;
case 'B': httrack.tolerant=1; if (*(com+1)=='0') { httrack.tolerant=0; com++; } break; // HTTP/1.0 notamment
case 'h': httrack.http10=1; if (*(com+1)=='0') { httrack.http10=0; com++; } break; // HTTP/1.0
@@ -1246,6 +1231,7 @@ int main(int argc, char **argv) {
case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack
case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break;
case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break;
+ case '!': httrack.bypass_limits = 1; if (*(com+1)=='0') { httrack.bypass_limits=0; com++; } break;
// preserve: no footer, original links
case 'p':
@@ -1433,7 +1419,7 @@ int main(int argc, char **argv) {
na++;
if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) {
char* posf = strchr(pos + 1, ':');
- char filename[1024];
+ char BIGSTK filename[1024];
callbackname[0] = '\0';
strncatbuff(callbackname, a, pos - a);
pos++;
@@ -1442,38 +1428,38 @@ int main(int argc, char **argv) {
filename[0] = '\0';
strncatbuff(filename, pos, posf - pos);
posf++;
- userfunction = getFunctionPtr(filename, posf);
+ userfunction = getFunctionPtr(&httrack, filename, posf);
if (userfunction != NULL) {
if ((void*)htswrap_read(callbackname) != NULL) {
if (htswrap_add(callbackname, userfunction)) {
- if (!httrack.quiet) {
- set_wrappers();
- if ((void*)htswrap_read(callbackname) == userfunction) {
- printf("successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename);
- } else {
- char tmp[1024 * 2];
- sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname);
- HTS_PANIC_PRINTF(tmp);
- htsmain_free();
- return -1;
+ set_wrappers(); /* Re-read wrappers internal static functions */
+ if ((void*)htswrap_read(callbackname) == userfunction) {
+ if (!httrack.quiet) {
+ fprintf(stderr, "successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename);
}
+ } else {
+ char BIGSTK tmp[1024 * 2];
+ sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname);
+ HTS_PANIC_PRINTF(tmp);
+ htsmain_free();
+ return -1;
}
} else {
- char tmp[1024 * 2];
+ char BIGSTK tmp[1024 * 2];
sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname);
HTS_PANIC_PRINTF(tmp);
htsmain_free();
return -1;
}
} else {
- char tmp[1024 * 2];
+ char BIGSTK tmp[1024 * 2];
sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname);
HTS_PANIC_PRINTF(tmp);
htsmain_free();
return -1;
}
} else {
- char tmp[1024 * 2];
+ char BIGSTK tmp[1024 * 2];
sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname);
HTS_PANIC_PRINTF(tmp);
htsmain_free();
@@ -1494,6 +1480,39 @@ int main(int argc, char **argv) {
}
break;
+ case 'R': // Referer
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %R needs to be followed by a blank space, and a referer URL");
+ printf("Example: -%%R \"http://www.example.com/\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("Referer URL too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpybuff(httrack.referer, argv[na]);
+ }
+ break;
+ case 'E': // From Email address
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %E needs to be followed by a blank space, and an email");
+ printf("Example: -%%E \"postmaster@example.com\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("From email too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpybuff(httrack.from, argv[na]);
+ }
+ break;
+
default: {
char s[HTS_CDLMAXSIZE];
sprintf(s,"invalid option %%%c\n",*com);
@@ -1587,9 +1606,9 @@ int main(int argc, char **argv) {
cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */
cache.ro = 1; /* read only */
if (cache.hashtable) {
- char adr[HTS_URLMAXSIZE*2];
- char fil[HTS_URLMAXSIZE*2];
- char url[HTS_URLMAXSIZE*2];
+ char BIGSTK adr[HTS_URLMAXSIZE*2];
+ char BIGSTK fil[HTS_URLMAXSIZE*2];
+ char BIGSTK url[HTS_URLMAXSIZE*2];
char linepos[256];
int pos;
char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx"));
@@ -1620,7 +1639,7 @@ int main(int argc, char **argv) {
||
(strjoker(url, filter, NULL, NULL) != NULL)
) {
- r = cache_read(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data
+ r = cache_read_ro(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data
if (r.statuscode != -1) { // No errors
found++;
if (!hasFilter) {
@@ -1629,7 +1648,7 @@ int main(int argc, char **argv) {
adr, fil);
} else {
char msg[256], cdate[256];
- char sav[HTS_URLMAXSIZE*2];
+ char BIGSTK sav[HTS_URLMAXSIZE*2];
infostatuscode(msg, r.statuscode);
time_gmt_rfc822(cdate);
@@ -1713,6 +1732,14 @@ int main(int argc, char **argv) {
return 0;
}
break;
+ case 'E': // extract cache
+ if (!hts_extract_meta(httrack.path_log)) {
+ fprintf(stderr, "* error extracting meta-data\n");
+ return 1;
+ }
+ fprintf(stderr, "* successfully extracted meta-data\n");
+ return 0;
+ break;
case 'X':
#ifndef STRDEBUG
fprintf(stderr, "warning: no string debugging support built, option has no effect\n");
@@ -1720,6 +1747,34 @@ int main(int argc, char **argv) {
htsMemoryFastXfr=1;
if (*(com+1)=='0') { htsMemoryFastXfr=0; com++; }
break;
+ case 'R':
+ {
+ char* name;
+ uLong repaired = 0;
+ uLong repairedBytes = 0;
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) {
+ name = fconcat(httrack.path_log,"hts-cache/new.zip");
+ } else if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) {
+ name = fconcat(httrack.path_log,"hts-cache/old.zip");
+ } else {
+ fprintf(stderr, "* error: no cache found in %s\n", fconcat(httrack.path_log,"hts-cache/new.zip"));
+ return 1;
+ }
+ fprintf(stderr, "Cache: trying to repair %s\n", name);
+ if (unzRepair(name,
+ fconcat(httrack.path_log,"hts-cache/repair.zip"),
+ fconcat(httrack.path_log,"hts-cache/repair.tmp"),
+ &repaired, &repairedBytes
+ ) == Z_OK) {
+ unlink(name);
+ rename(fconcat(httrack.path_log,"hts-cache/repair.zip"), name);
+ fprintf(stderr,"Cache: %d bytes successfully recovered in %d entries\n", (int) repairedBytes, (int) repaired);
+ } else {
+ fprintf(stderr, "Cache: could not repair the cache\n");
+ }
+ }
+ return 0;
+ break;
case '~': /* internal lib test */
{
char thisIsATestYouShouldSeeAnError[12];
@@ -1742,11 +1797,12 @@ int main(int argc, char **argv) {
case 'T': httrack.maketrack=1; break;
case 'u': sscanf(com+1,"%d",&httrack.waittime); while(isdigit((unsigned char)*(com+1))) com++; break;
- case 'R': // ohh ftp, catch->ftpget
+ /*case 'R': // ohh ftp, catch->ftpget
HTS_PANIC_PRINTF("Unexpected internal error with -#R command");
htsmain_free();
return -1;
break;
+ */
case 'P': { // catchurl
help_catchurl(httrack.path_log);
htsmain_free();
@@ -1769,6 +1825,19 @@ int main(int argc, char **argv) {
return 0;
}
break;
+ case '1': /* test #1 : fil_simplifie */
+ if (na+1>=argc) {
+ HTS_PANIC_PRINTF("Option #1 needs to be followed by an URL");
+ printf("Example: '-#0' ./foo/bar/../foobar\n");
+ htsmain_free();
+ return -1;
+ } else {
+ fil_simplifie(argv[na+1]);
+ printf("simplified=%s\n", argv[na+1]);
+ htsmain_free();
+ return 0;
+ }
+ break;
case '!':
if (na+1>=argc) {
HTS_PANIC_PRINTF("Option #! needs to be followed by a commandline");
@@ -1779,6 +1848,15 @@ int main(int argc, char **argv) {
system(argv[na+1]);
}
break;
+ case 'd':
+ httrack.parsedebug = 1;
+ break;
+
+ /* autotest */
+ case 't': /* not yet implemented */
+ fprintf(stderr, "** AUTOCHECK OK\n");
+ exit(0);
+ break;
default: printf("Internal option %c not recognized\n",*com); break;
}
@@ -1866,7 +1944,7 @@ int main(int argc, char **argv) {
} // while
} else { // URL/filters
- char tempo[1024];
+ char BIGSTK tempo[1024];
if (strnotempty(url)) strcatbuff(url," "); // espace de séparation
strcpybuff(tempo,unescape_http_unharm(argv[na],1));
escape_spc_url(tempo);
@@ -1895,7 +1973,7 @@ int main(int argc, char **argv) {
//if (userdef) {
if (!userid) {
//if (strcmp(userdef->pw_name,"root")==0) {
- char rpath[1024];
+ char BIGSTK rpath[1024];
//printf("html=%s log=%s\n",httrack.path_html,httrack.path_log); // xxc
if ((httrack.path_html[0]) && (httrack.path_log[0])) {
char *a=httrack.path_html,*b=httrack.path_log,*c=NULL,*d=NULL;
@@ -1913,7 +1991,7 @@ int main(int argc, char **argv) {
strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html));
}
{
- char tmp[1024];
+ char BIGSTK tmp[1024];
strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp);
strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp);
}
@@ -1971,7 +2049,19 @@ int main(int argc, char **argv) {
// cad la version contenant le plus de fichiers
if (httrack.cache) {
if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // problemes..
- if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) {
+ if ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) ) {
+ if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))<32768) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip"))>65536) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip")) > fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))) {
+ remove(fconcat(httrack.path_log,"hts-cache/new.zip"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.zip"), fconcat(httrack.path_log,"hts-cache/new.zip"));
+ }
+ }
+ }
+ }
+ }
+ else if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) {
if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) {
// switcher si new<32Ko et old>65Ko (tailles arbitraires) ?
// ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre
@@ -2058,7 +2148,7 @@ int main(int argc, char **argv) {
fprintf(fp,"and is used for updating this website."LF);
fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF);
fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF);
- fprintf(fp,"(you can safely delete old.dat, old.ndx and old.lst files, however)"LF);
+ fprintf(fp,"(you can safely delete old.zip and old.lst files, however)"LF);
fprintf(fp,""LF);
fprintf(fp,HTS_LOG_SECURITY_WARNING);
fclose(fp);
@@ -2177,18 +2267,47 @@ int main(int argc, char **argv) {
io_flush;
+ /* Enforce limits to avoid bandwith abuse. The bypass_limits should only be used by administrators and experts. */
+ if (!httrack.bypass_limits) {
+ if (httrack.maxsoc <= 0 || httrack.maxsoc > 4) {
+ httrack.maxsoc = 4;
+ if (httrack.log != NULL) {
+ fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of simultaneous connections limited to %d to avoid server overload"LF, (int)httrack.maxsoc);
+ }
+ }
+ if (httrack.maxrate <= 0 || httrack.maxrate > 100000) {
+ httrack.maxrate = 100000;
+ if (httrack.log != NULL) {
+ fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum bandwidth limited to %d to avoid server overload"LF, (int)httrack.maxrate);
+ }
+ }
+ if (httrack.maxconn <= 0 || httrack.maxconn > 5.0) {
+ httrack.maxconn = 5.0;
+ if (httrack.log != NULL) {
+ fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of connections per second limited to %f to avoid server overload"LF, (float)httrack.maxconn);
+ }
+ }
+ } else {
+ if (httrack.log != NULL) {
+ fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: !!! BYPASSING SECURITY LIMITS - MONITOR THIS SESSION WITH EXTREME CARE !!!"LF);
+ }
+ }
+
/* Info for wrappers */
if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
fspc(httrack.log,"info"); fprintf(httrack.log,"engine: init"LF);
}
#if HTS_ANALYSTE
hts_htmlcheck_init();
+ set_wrappers(); // init() is allowed to set other wrappers
#endif
// détourner SIGHUP etc.
#if HTS_WIN
+#ifndef _WIN32_WCE
signal( SIGINT , sig_ask ); // ^C
signal( SIGTERM , sig_finish ); // kill <process>
+#endif
#else
signal( SIGHUP , sig_back ); // close window
signal( SIGTSTP , sig_back ); // ^Z
@@ -2226,7 +2345,7 @@ deprecated - see SIGCHLD
//
// Build top index
if (httrack.dir_topindex) {
- char rpath[1024*2];
+ char BIGSTK rpath[1024*2];
char* a;
strcpybuff(rpath,httrack.path_html);
if (rpath[0]) {
@@ -2249,33 +2368,35 @@ deprecated - see SIGCHLD
}
}
- /* Info for wrappers */
- if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
- fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF);
- }
+ /* Info for wrappers */
+ if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF);
+ }
#if HTS_ANALYSTE
- hts_htmlcheck_uninit();
+ hts_htmlcheck_uninit();
#endif
-
+
if (httrack_logmode!=1) {
if (httrack.errlog == httrack.log) httrack.errlog=NULL;
if (httrack.log) { fclose(httrack.log); httrack.log=NULL; }
if (httrack.errlog) { fclose(httrack.errlog); httrack.errlog=NULL; }
}
-
+
// Débuggage des en têtes
if (_DEBUG_HEAD) {
if (ioinfo) {
fclose(ioinfo);
}
}
-
+
// supprimer lock
remove(n_lock);
}
-
+
if (x_argvblk)
freet(x_argvblk);
+ if (x_argv)
+ freet(x_argv);
#if HTS_WIN
#if HTS_ANALYSTE!=2
@@ -2315,7 +2436,7 @@ int check_path(char* s,char* defaultname) {
if (strnotempty(s)) {
if (s[(i=strlen(s))-1]=='#') {
if (strnotempty((defaultname?defaultname:""))) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
char* a=strchr(defaultname,'#'); // we never know..
if (a) *a='\0';
tempo[0]='\0';
@@ -2339,7 +2460,7 @@ int check_path(char* s,char* defaultname) {
// détermine si l'argument est une option
int cmdl_opt(char* s) {
if (s[0]=='-') { // c'est peut être une option
- if (strchr(s,'.')!=NULL)
+ if (strchr(s,'.')!=NULL && strchr(s,'%')==NULL)
return 0; // sans doute un -www.truc.fr (note: -www n'est pas compris)
else if (strchr(s,'/')!=NULL)
return 0; // idem, -*cgi-bin/
diff --git a/src/htscoremain.h b/src/htscoremain.h
index 3662793..548c7f6 100644
--- a/src/htscoremain.h
+++ b/src/htscoremain.h
@@ -46,6 +46,8 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
// Main, récupère les paramètres et appelle le robot
#if HTS_ANALYSTE
#ifndef HTTRACK_DEFLIB
@@ -58,7 +60,7 @@ int main(int argc, char **argv);
int cmdl_opt(char* s);
int check_path(char* s,char* defaultname);
-
+#endif
#endif
diff --git a/src/htsdefines.h b/src/htsdefines.h
index 0ab2cfa..e91b5b4 100644
--- a/src/htsdefines.h
+++ b/src/htsdefines.h
@@ -43,6 +43,7 @@ typedef void (* t_hts_htmlcheck_uninit)(void);
typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
typedef int (* t_hts_htmlcheck_end)(void);
typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier);
typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
typedef char* (* t_hts_htmlcheck_query)(char* question);
typedef char* (* t_hts_htmlcheck_query2)(char* question);
@@ -52,11 +53,14 @@ typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
typedef void (* t_hts_htmlcheck_filesave)(char* file);
typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
+typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start);
typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
// demande d'interaction avec le shell
#if HTS_ANALYSTE
extern char HTbuff[2048];
@@ -65,6 +69,8 @@ extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
extern t_hts_htmlcheck_start hts_htmlcheck_start;
extern t_hts_htmlcheck_end hts_htmlcheck_end;
extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck_process hts_htmlcheck_preprocess;
+extern t_hts_htmlcheck_process hts_htmlcheck_postprocess;
extern t_hts_htmlcheck hts_htmlcheck;
extern t_hts_htmlcheck_query hts_htmlcheck_query;
extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
@@ -74,6 +80,7 @@ extern t_hts_htmlcheck_check hts_htmlcheck_check;
extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2;
extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
extern t_hts_htmlcheck_savename hts_htmlcheck_savename;
extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead;
@@ -102,3 +109,5 @@ extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead;
#endif
+#endif
+
diff --git a/src/htsfilters.c b/src/htsfilters.c
index be8b482..681b506 100644
--- a/src/htsfilters.c
+++ b/src/htsfilters.c
@@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
// *.gif match all gif files
// *[file]/*[file].exe match all exe files with one folder structure
@@ -49,9 +52,6 @@ Please visit our Website: http://www.httrack.com
/* specific definitions */
#include "htsbase.h"
#include "htslib.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <ctype.h>
/* END specific definitions */
diff --git a/src/htsfilters.h b/src/htsfilters.h
index 168d330..f963322 100644
--- a/src/htsfilters.h
+++ b/src/htsfilters.h
@@ -42,8 +42,11 @@ Please visit our Website: http://www.httrack.com
#include "htsbase.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int fa_strjoker(char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth);
HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag);
char* strjokerfind(char* chaine,char* joker);
+#endif
#endif
diff --git a/src/htsftp.c b/src/htsftp.c
index 68a8af5..7b04052 100644
--- a/src/htsftp.c
+++ b/src/htsftp.c
@@ -34,6 +34,9 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
// Gestion protocole ftp
// Version .05 (01/2000)
@@ -43,9 +46,6 @@ Please visit our Website: http://www.httrack.com
#include "htsbase.h"
#include "htsnet.h"
#include "htsthread.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#if HTS_WIN
#else
//inet_ntoa
@@ -55,9 +55,11 @@ Please visit our Website: http://www.httrack.com
#if HTS_WIN
#ifndef __cplusplus
// DOS
+#ifndef _WIN32_WCE
#include <process.h> /* _beginthread, _endthread */
#endif
#endif
+#endif
// ftp mode passif
// #if HTS_INET6==0
@@ -73,31 +75,10 @@ Please visit our Website: http://www.httrack.com
#define FTP_STATUS_READY 1001
#if USE_BEGINTHREAD
-/*
-#ifdef __cplusplus
-// C++ -> Shell
-UINT back_launch_ftp( LPVOID pP ) {
- lien_back* back=(lien_back*) pP;
- if (back == NULL) {
- //back->status=FTP_STATUS_READY; // fini
- //back->r.statuscode=-1;
- return -1;
- }
-
- // lancer ftp
- run_launch_ftp(back);
- // prêt
- back->status=0;
- return 0; // thread completed successfully
-}
-#else
-*/
-PTHREAD_TYPE back_launch_ftp( void* pP ) {
+PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ) {
lien_back* back=(lien_back*) pP;
if (back == NULL) {
- //back->status=FTP_STATUS_READY; // fini
- //back->r.statuscode=-1;
#if FTP_DEBUG
printf("[ftp error: no args]\n");
#endif
@@ -113,27 +94,19 @@ PTHREAD_TYPE back_launch_ftp( void* pP ) {
#endif
run_launch_ftp(back);
// prêt
- back->status=0;
+ back->status=FTP_STATUS_READY;
/* Uninitialize */
hts_uninit();
return PTHREAD_RETURN;
}
-/*#endif*/
// lancer en back
void launch_ftp(lien_back* back) {
-/*
-#ifdef __cplusplus
- // C++ -> Shell
- AfxBeginThread(back_launch_ftp,(LPVOID) back);
-#else
-*/
// DOS
#if FTP_DEBUG
printf("[Launching main ftp thread]\n");
#endif
- _beginthread(back_launch_ftp, 0, (void*) back);
-/*#endif*/
+ (void)hts_newthread(back_launch_ftp, 0, (void*) back);
}
#else
@@ -142,7 +115,7 @@ int back_launch_ftp(lien_back* back) {
// lancer ftp
run_launch_ftp(back);
// prêt
- back->status=0;
+ back->status=FTP_STATUS_READY;
return 0;
}
void launch_ftp(lien_back* back,char* path,char* exec) {
@@ -213,7 +186,7 @@ int run_launch_ftp(lien_back* back) {
#if FTP_PASV
int port_pasv=0;
#endif
- char adr_ip[1024];
+ char BIGSTK adr_ip[1024];
char *adr,*real_adr;
char* ftp_filename="";
int timeout = 300; // timeout
@@ -281,7 +254,11 @@ int run_launch_ftp(lien_back* back) {
ftp_filename=a;
if (strnotempty(a)) {
char* ua=unescape_http(a);
- if (
+ int len_a = (int) strlen(ua);
+ if (len_a > 0 && ua[len_a -1] == '/') { /* obviously a directory listing */
+ transfer_list=1;
+ sprintf(line_retr,"LIST -A %s",ua);
+ } else if (
(strchr(ua, ' '))
||
(strchr(ua, '\"'))
@@ -298,7 +275,7 @@ int run_launch_ftp(lien_back* back) {
}
} else {
strcpybuff(back->r.msg,"Unexpected PORT error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
}
@@ -332,7 +309,7 @@ int run_launch_ftp(lien_back* back) {
hp = hts_gethostbyname(_adr, &fullhostent_buffer);
if (hp == NULL) {
strcpybuff(back->r.msg,"Unable to get server's address");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-5;
_HALT_FTP
return 0;
@@ -349,7 +326,7 @@ int run_launch_ftp(lien_back* back) {
soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
if (soc_ctl==INVALID_SOCKET) {
strcpybuff(back->r.msg,"Unable to create a socket");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
_HALT_FTP
return 0;
@@ -367,7 +344,7 @@ int run_launch_ftp(lien_back* back) {
if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) {
#endif
strcpybuff(back->r.msg,"Unable to connect to the server");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
_HALT_FTP
return 0;
@@ -379,7 +356,7 @@ int run_launch_ftp(lien_back* back) {
_CHECK_HALT_FTP;
{
- char line[1024];
+ char BIGSTK line[1024];
// envoi du login
// --USER--
@@ -400,13 +377,23 @@ int run_launch_ftp(lien_back* back) {
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
if (line[0]=='2') { // ok
+ send_line(soc_ctl,"TYPE I");
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ // ok
+ } else {
+ strcpybuff(back->r.msg,"TYPE I error");
+ // back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
#if 0
// --CWD--
char* a;
a=back->url_fil + strlen(back->url_fil)-1;
while( (a > back->url_fil) && (*a!='/')) a--;
if (*a == '/') { // ok repéré
- char target[1024];
+ char BIGSTK target[1024];
target[0]='\0';
strncatbuff(target,back->url_fil,(int) (a - back->url_fil));
if (strnotempty(target)==0)
@@ -424,34 +411,34 @@ int run_launch_ftp(lien_back* back) {
// ok..
} else {
strcpybuff(back->r.msg,"TYPE I error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
sprintf(back->r.msg,"CWD error: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
strcpybuff(back->r.msg,"Unexpected ftp error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
#endif
} else {
sprintf(back->r.msg,"Bad password: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
sprintf(back->r.msg,"Bad user name: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
sprintf(back->r.msg,"Connection refused: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -510,7 +497,7 @@ int run_launch_ftp(lien_back* back) {
// -- fin analyse de l'adresse IP et du port --
} else {
sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
@@ -542,12 +529,12 @@ int run_launch_ftp(lien_back* back) {
}
} else {
sprintf(back->r.msg,"EPSV incorrect: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
sprintf(back->r.msg,"PASV/EPSV error: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
}
@@ -663,7 +650,7 @@ int run_launch_ftp(lien_back* back) {
deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
//
sprintf(back->r.msg,"RETR command errror: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
@@ -673,22 +660,22 @@ int run_launch_ftp(lien_back* back) {
deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
//
strcpybuff(back->r.msg,"Unable to connect");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
strcpybuff(back->r.msg,"Unable to create a socket");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
sprintf(back->r.msg,"Unable to resolve IP %s",adr_ip);
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
#else
@@ -711,17 +698,17 @@ int run_launch_ftp(lien_back* back) {
int dummylen = sizeof(struct sockaddr);
if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) {
strcpybuff(back->r.msg,"Unable to accept connection");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
sprintf(back->r.msg,"RETR command errror: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
sprintf(back->r.msg,"PORT command error: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
#if HTS_WIN
@@ -731,7 +718,7 @@ int run_launch_ftp(lien_back* back) {
#endif
} else {
strcpybuff(back->r.msg,"Unable to listen to a port");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
#endif
@@ -747,7 +734,7 @@ int run_launch_ftp(lien_back* back) {
back->r.fp = filecreate(back->url_sav);
strcpybuff(back->info,"receiving");
if (back->r.fp != NULL) {
- char buff[1024];
+ char BIGSTK buff[1024];
int len=1;
int read_len=1024;
//HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues
@@ -758,13 +745,13 @@ int run_launch_ftp(lien_back* back) {
switch(wait_socket_receive(soc_dat,timeout)) {
case -1:
strcpybuff(back->r.msg,"FTP read error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
len=0; // fin
break;
case 0:
sprintf(back->r.msg,"Time out (%d)",timeout);
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
len=0; // fin
break;
@@ -785,17 +772,17 @@ int run_launch_ftp(lien_back* back) {
}
*/
strcpybuff(back->r.msg,"Write error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
len=0; // error
}
} else {
strcpybuff(back->r.msg,"Unexpected write error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else { // Erreur ou terminé
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=0;
if (back->r.totalsize > 0 && back->r.size != back->r.totalsize) {
back->r.statuscode=-1;
@@ -812,7 +799,7 @@ int run_launch_ftp(lien_back* back) {
}
} else {
strcpybuff(back->r.msg,"Unable to write file");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
#if HTS_WIN
@@ -828,16 +815,16 @@ int run_launch_ftp(lien_back* back) {
get_ftp_line(soc_ctl,line,timeout);
if (line[0]=='2') { // OK
strcpybuff(back->r.msg,"OK");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=200;
} else {
sprintf(back->r.msg,"RETR incorrect: %s",linejmp(line));
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else {
strcpybuff(back->r.msg,"FTP read error");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
}
@@ -866,7 +853,7 @@ int run_launch_ftp(lien_back* back) {
back->r.statuscode=200;
strcpybuff(back->r.msg,"OK");
}
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
return 0;
}
@@ -976,7 +963,7 @@ FILE* dd=NULL;
// routines de réception/émission
// 0 = ERROR
int send_line(T_SOC soc,char* data) {
- char line[1024];
+ char BIGSTK line[1024];
if (_DEBUG_HEAD) {
if (ioinfo) {
fprintf(ioinfo,"---> %s\x0d\x0a",data);
@@ -1007,7 +994,7 @@ int send_line(T_SOC soc,char* data) {
}
int get_ftp_line(T_SOC soc,char* line,int timeout) {
- char data[1024];
+ char BIGSTK data[1024];
int i,ok,multiline;
#if FTP_DEBUG
if (dd == NULL) dd = fopen("toto.txt","w");
@@ -1152,7 +1139,7 @@ int wait_socket_receive(T_SOC soc,int timeout) {
int stop_ftp(lien_back* back) {
if (back->stop_ftp) {
strcpybuff(back->r.msg,"Cancelled by User");
- back->status=FTP_STATUS_READY; // fini
+ // back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
return 1;
}
diff --git a/src/htsftp.h b/src/htsftp.h
index e24f1f3..08ab784 100644
--- a/src/htsftp.h
+++ b/src/htsftp.h
@@ -45,9 +45,11 @@ Please visit our Website: http://www.httrack.com
// lien_back
#include "htscore.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
#if USE_BEGINTHREAD
void launch_ftp(lien_back* back);
-PTHREAD_TYPE back_launch_ftp( void* pP );
+PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP );
#else
void launch_ftp(lien_back* back,char* path,char* exec);
int back_launch_ftp(lien_back* back);
@@ -62,7 +64,7 @@ char* linejmp(char* line);
int check_socket(T_SOC soc);
int check_socket_connect(T_SOC soc);
int wait_socket_receive(T_SOC soc,int timeout);
-
+#endif
#endif
diff --git a/src/htsglobal.h b/src/htsglobal.h
index 38faebc..d045f14 100644
--- a/src/htsglobal.h
+++ b/src/htsglobal.h
@@ -40,20 +40,45 @@ Please visit our Website: http://www.httrack.com
#define HTTRACK_GLOBAL_DEFH
// Version
-#define HTTRACK_VERSION "3.30"
-#define HTTRACK_VERSIONID "3.30.01"
+#define HTTRACK_VERSION "3.33-2"
+#define HTTRACK_VERSIONID "3.33.16"
#define HTTRACK_AFF_VERSION "3.x"
//#define HTTRACK_AFF_WARNING "This is a BETA release of WinHTTrack Website Copier ("HTTRACK_VERSION")\nPlease report any crashes, bugs or problems"
-
+#ifndef HTS_NOINCLUDES
+#ifndef _WIN32_WCE
+#include <stdio.h>
+#include <stdlib.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HTS_CECOMPAT
+#include "cecompat.h"
+#else
+#include "celib.h"
+#endif
+#endif
+#endif
// Définition plate-forme
#include "htssystem.h"
#include "htsconfig.h"
+// WIN32 types
+#ifdef _WIN32
+#ifndef SIZEOF_LONG
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 8
+#endif
+#endif
+
+
// config.h
#ifdef _WIN32
+// WIN32
+#ifndef _WIN32_WCE
+
#define HAVE_SYS_STAT_H 1
#define HAVE_SYS_TYPES_H 1
#define HAVE_SYS_STAT_H 1
@@ -69,6 +94,35 @@ Please visit our Website: http://www.httrack.com
#else
+// Win32CE
+//#pragma runtime_checks( "s", restore )
+#define HTS_SPARE_MEMORY 1
+#define HTS_ALIGN 8
+#define BIGSTK static
+#undef DLLIB // LoadLibrary(libssl) crashes
+#define NOSTRDEBUG 1
+#undef HTS_MAKE_KEYWORD_INDEX
+#ifdef HTS_CECOMPAT
+#define HTS_DO_NOT_USE_FTIME 1
+#undef HAVE_SYS_STAT_H
+#undef HAVE_SYS_TYPES_H
+#else
+#undef HTS_DO_NOT_USE_FTIME
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TYPES_H 1
+#endif
+
+#define HTS_DLOPEN 0
+#undef HTS_INET6
+#ifndef S_ISREG
+#define S_ISREG(m) ((m) & _S_IFREG)
+
+#endif
+
+#endif
+
+#else
+
#include "config.h"
#ifndef FTIME
@@ -110,7 +164,6 @@ Please visit our Website: http://www.httrack.com
#endif
-
// Socket windows ou socket unix
#ifdef _WIN32
#undef HTS_PLATFORM
@@ -126,6 +179,15 @@ Please visit our Website: http://www.httrack.com
#endif
#endif
+// don't spare memory usage by default
+#ifndef HTS_SPARE_MEMORY
+#define HTS_SPARE_MEMORY 0
+#endif
+
+#ifndef BIGSTK
+#define BIGSTK
+#endif
+
// compatibilité DOS
#if HTS_WIN
#define HTS_DOSNAME 1
@@ -208,14 +270,24 @@ Please visit our Website: http://www.httrack.com
#endif
+#if HTS_SPARE_MEMORY==0
/* Gestion des tables de hashage */
#define HTS_HASH_SIZE 20147
/* Taille max d'une URL */
#define HTS_URLMAXSIZE 1024
/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */
#define HTS_CDLMAXSIZE 1024
+#else
+/* Gestion des tables de hashage */
+#define HTS_HASH_SIZE 1023
+/* Taille max d'une URL */
+#define HTS_URLMAXSIZE 256
+/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */
+#define HTS_CDLMAXSIZE 1024
+#endif
+
/* Copyright (C) Xavier Roche and other contributors */
-#define HTTRACK_AFF_AUTHORS "[XR&CO'2003]"
+#define HTTRACK_AFF_AUTHORS "[XR&CO'2005]"
#define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->"
#define HTTRACK_WEB "http://www.httrack.com"
#define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s"
@@ -357,7 +429,11 @@ typedef int INTsys;
#define LOCAL_SOCKET_ID -500000
// taille de chaque buffer (10 sockets 650 ko)
-#define TAILLE_BUFFER 65535
+#if HTS_SPARE_MEMORY==0
+#define TAILLE_BUFFER 65536
+#else
+#define TAILLE_BUFFER 8192
+#endif
#if HTS_WIN
#else
@@ -390,11 +466,11 @@ typedef int INTsys;
//#define HTS_TRACE_MALLOC
#ifdef HTS_TRACE_MALLOC
typedef unsigned long int t_htsboundary;
-typedef struct _mlink {
+typedef struct mlink {
char* adr;
int len;
int id;
- struct _mlink* next;
+ struct mlink* next;
} mlink;
static const t_htsboundary htsboundary = 0xDEADBEEF;
#endif
@@ -449,5 +525,22 @@ static const t_htsboundary htsboundary = 0xDEADBEEF;
// htsmain
#define DEBUG_STEPS 0
+
+// Débuggage de contrôle
+#if HTS_DEBUG_CLOSESOCK
+#define _HTS_WIDE 1
+#endif
+#if HTS_WIDE_DEBUG
+#define _HTS_WIDE 1
+#endif
+#if _HTS_WIDE
+extern FILE* DEBUG_fp;
+#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); }
+#undef _
+#define _ ,
+#endif
+
+
+
#endif
diff --git a/src/htshash.c b/src/htshash.c
index 3cbdb5f..38a2d64 100644
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -35,15 +35,15 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htshash.h"
/* specific definitions */
#include "htsbase.h"
#include "htsglobal.h"
#include "htsmd5.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
/* END specific definitions */
/* Specific macros */
@@ -63,7 +63,7 @@ Please visit our Website: http://www.httrack.com
// recherche dans la table selon nom1,nom2 et le no d'enregistrement
// retour: position ou -1 si non trouvé
int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) {
- char normfil_[HTS_URLMAXSIZE*2];
+ char BIGSTK normfil_[HTS_URLMAXSIZE*2];
char* normfil;
char* normadr;
unsigned int cle;
@@ -199,7 +199,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) {
// enregistrement lien lpos dans les 3 tables hash1..3
void hash_write(hash_struct* hash,int lpos,int normalized) {
- char normfil_[HTS_URLMAXSIZE*2];
+ char BIGSTK normfil_[HTS_URLMAXSIZE*2];
char* normfil;
unsigned int cle;
int pos;
diff --git a/src/htshash.h b/src/htshash.h
index c4acff1..43b5003 100644
--- a/src/htshash.h
+++ b/src/htshash.h
@@ -42,10 +42,13 @@ Please visit our Website: http://www.httrack.com
#include "htscore.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
// tables de hashage
int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized);
void hash_write(hash_struct* hash,int lpos,int normalized);
int* hash_calc_chaine(hash_struct* hash,int type,int pos);
unsigned long int hash_cle(char* nom1,char* nom2);
+#endif
#endif
diff --git a/src/htshelp.c b/src/htshelp.c
index 7046929..af6f742 100644
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htshelp.h"
/* specific definitions */
@@ -43,9 +46,6 @@ Please visit our Website: http://www.httrack.com
#include "htscatchurl.h"
#include "htslib.h"
#include "htsalias.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#if HTS_WIN
#else
#ifdef HAVE_UNISTD_H
@@ -334,20 +334,20 @@ int help_query(char* list,int def) {
// Capture d'URL
void help_catchurl(char* dest_path) {
- char adr_prox[HTS_URLMAXSIZE*2];
+ char BIGSTK adr_prox[HTS_URLMAXSIZE*2];
int port_prox;
T_SOC soc=catch_url_init_std(&port_prox,adr_prox);
if (soc!=INVALID_SOCKET) {
- char url[HTS_URLMAXSIZE*2];
+ char BIGSTK url[HTS_URLMAXSIZE*2];
char method[32];
- char data[32768];
+ char BIGSTK data[32768];
url[0]=method[0]=data[0]='\0';
//
printf("Okay, temporary proxy installed.\nSet your browser's preferences to:\n\n");
printf("\tProxy's address: \t%s\n\tProxy's port: \t%d\n",adr_prox,port_prox);
//
if (catch_url(soc,url,method,data)) {
- char dest[HTS_URLMAXSIZE*2];
+ char BIGSTK dest[HTS_URLMAXSIZE*2];
int i=0;
do {
sprintf(dest,"%s%s%d",dest_path,"hts-post",i);
@@ -362,7 +362,7 @@ void help_catchurl(char* dest_path) {
}
// former URL!
{
- char finalurl[HTS_URLMAXSIZE*2];
+ char BIGSTK finalurl[HTS_URLMAXSIZE*2];
escape_check_url(dest);
sprintf(finalurl,"%s"POSTTOK"file:%s",url,dest);
printf("\nThe URL is: \"%s\"\n",finalurl);
@@ -471,7 +471,7 @@ void help(char* app,int more) {
infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)");
infomsg(" u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)");
infomsg(" j *parse Java Classes (j0 don't parse)");
- infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)");
+ infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules))");
infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)");
infomsg(" %k use keep-alive if possible, greately reducing latency for small files and test requests (%k0 don't use)");
infomsg(" %B tolerant requests (accept bogus responses on some servers, but not standard!)");
@@ -479,10 +479,13 @@ void help(char* app,int more) {
infomsg(" %u url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..)");
infomsg(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)");
infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD);
+ infomsg(" can also be used to force a specific file type: --assume foo.cgi=text/html");
infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)");
infomsg("");
infomsg("Browser ID:");
- infomsg(" F user-agent field (-F \"user-agent name\")");
+ infomsg(" F user-agent field sent in HTTP headers (-F \"user-agent name\")");
+ infomsg(" %R default referer field sent in HTTP headers");
+ infomsg(" %E from email address sent in HTTP headers");
infomsg(" %F footer string in Html code (-%F \"Mirrored [from host %s [file %s [at %s]]]\"");
infomsg(" %l preffered language (-%l \"fr, en, jp, *\"");
infomsg("");
@@ -490,7 +493,7 @@ void help(char* app,int more) {
infomsg(" C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before)");
infomsg(" k store all files in cache (not useful if files on disk)");
infomsg(" %n do not re-download locally erased files");
- infomsg(" %v display on screen filenames downloaded (in realtime) - * %v1 short version");
+ infomsg(" %v display on screen filenames downloaded (in realtime) - * %v1 short version - %v2 full animation");
infomsg(" Q no log - quiet mode");
infomsg(" q no questions - quiet mode");
infomsg(" z log - extra infos");
@@ -523,6 +526,9 @@ void help(char* app,int more) {
infomsg(" #X *use optimized engine (limited memory boundary checks)");
infomsg(" #0 filter test (-#0 '*.gif' 'www.bar.com/foo.gif')");
infomsg(" #C cache list (-#C '*.com/spider*.gif'");
+ infomsg(" #R cache repair (damaged cache)");
+ infomsg(" #d debug parser");
+ infomsg(" #E extract new.zip cache meta-data in meta.zip");
infomsg(" #f always flush log files");
infomsg(" #FN maximum number of filters");
infomsg(" #h version info");
@@ -536,10 +542,15 @@ void help(char* app,int more) {
infomsg(" #Z generate transfer rate statictics every minutes");
infomsg(" #! execute a shell command (-#! \"echo hello\")");
infomsg("");
+ infomsg("Dangerous options: (do NOT use unless you exactly know what you are doing)");
+ infomsg(" %! bypass built-in security limits aimed to avoid bandwith abuses (bandwidth, simultaneous connections)");
+ infomsg(" IMPORTANT NOTE: DANGEROUS OPTION, ONLY SUITABLE FOR EXPERTS");
+ infomsg(" USE IT WITH EXTREME CARE");
+ infomsg("");
infomsg("Command-line specific options:");
infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")");
infomsg(" %U run the engine with another id when called as root (-%U smith)");
- infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction)");
+ infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction[,myparameters])");
/* infomsg(" %O do a chroot before setuid"); */
infomsg("");
infomsg("Details: Option N");
@@ -571,6 +582,7 @@ void help(char* app,int more) {
infomsg(" '%h' Host name (ex: www.someweb.com)");
infomsg(" '%M' URL MD5 (128 bits, 32 ascii bytes)");
infomsg(" '%Q' query string MD5 (128 bits, 32 ascii bytes)");
+ infomsg(" '%r' protocol name (ex: http)");
infomsg(" '%q' small query string MD5 (16 bits, 4 ascii bytes)");
infomsg(" '%s?' Short name version (ex: %sN)");
infomsg(" '%[param]' param variable in query string");
@@ -613,6 +625,8 @@ void help(char* app,int more) {
infomsg("'start' : int (* myfunction)(httrackp* opt);");
infomsg("'end' : int (* myfunction)(void);");
infomsg("'change-options' : int (* myfunction)(httrackp* opt);");
+ infomsg("'preprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);");
+ infomsg("'postprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);");
infomsg("'check-html' : int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);");
infomsg("'query' : char* (* myfunction)(char* question);");
infomsg("'query2' : char* (* myfunction)(char* question);");
@@ -622,8 +636,10 @@ void help(char* app,int more) {
infomsg("'pause' : void (* myfunction)(char* lockfile);");
infomsg("'save-file' : void (* myfunction)(char* file);");
infomsg("'link-detected' : int (* myfunction)(char* link);");
+ infomsg("'link-detected2' : int (* myfunction)(char* link, char* start_tag);");
infomsg("'transfer-status' : int (* myfunction)(lien_back* back);");
infomsg("'save-name' : int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);");
+ infomsg("And <wrappername>_init() functions if defined, called upon plug");
infomsg("");
infomsg("");
infomsg("example: httrack www.someweb.com/bob/");
diff --git a/src/htshelp.h b/src/htshelp.h
index 924a526..67354c7 100644
--- a/src/htshelp.h
+++ b/src/htshelp.h
@@ -43,11 +43,14 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
#include "htscore.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
void infomsg(char* msg);
void help(char* app,int more);
void make_empty_index(char* str);
void help_wizard(httrackp* opt);
int help_query(char* list,int def);
void help_catchurl(char* dest_path);
+#endif
#endif
diff --git a/src/htsindex.c b/src/htsindex.c
index 1a75103..af87396 100644
--- a/src/htsindex.c
+++ b/src/htsindex.c
@@ -35,9 +35,10 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
-#include <stdio.h>
-#include <stdlib.h>
#include "htsindex.h"
#include "htsglobal.h"
#include "htslib.h"
@@ -124,12 +125,14 @@ int hts_primindex_words=0;
*/
void index_init(const char* indexpath) {
#if HTS_MAKE_KEYWORD_INDEX
+#ifndef _WIN32_WCE
/* remove(concat(indexpath,"index.txt")); */
hts_index_init=1;
hts_primindex_size=0;
hts_primindex_words=0;
fp_tmpproject=tmpfile();
#endif
+#endif
}
@@ -298,7 +301,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char*
unsigned long int e=0;
if (inthash_read(WordIndexHash,line,&e)) {
//if (e) {
- char savelst[HTS_URLMAXSIZE*2];
+ char BIGSTK savelst[HTS_URLMAXSIZE*2];
e++; /* 0 means "once" */
if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper
diff --git a/src/htsindex.h b/src/htsindex.h
index 40a189b..b773034 100644
--- a/src/htsindex.h
+++ b/src/htsindex.h
@@ -41,8 +41,11 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath);
void index_init(const char* indexpath);
void index_finish(const char* indexpath,int mode);
+#endif
#endif
diff --git a/src/htsinthash.c b/src/htsinthash.c
index 95b8711..eb155cb 100644
--- a/src/htsinthash.c
+++ b/src/htsinthash.c
@@ -35,15 +35,15 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htsinthash.h"
/* specific definitions */
#include "htsbase.h"
#include "htsglobal.h"
#include "htsmd5.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
/* END specific definitions */
/* Specific macros */
@@ -68,11 +68,12 @@ int inthash_write(inthash hashtable,char* name,long int value) {
if (strcmp(h->name,name)==0) {
/* Delete element */
if (hashtable->flag_valueismalloc) {
- if (h->value.intg) {
+ void* ptr = (void*)h->value.intg;
+ if (ptr != NULL) {
if (hashtable->free_handler)
- hashtable->free_handler((void*)h->value.intg);
+ hashtable->free_handler(ptr);
else
- freet((void*)h->value.intg);
+ freet(ptr);
}
}
/* Insert */
@@ -151,7 +152,8 @@ int inthash_read(inthash hashtable,char* name,long int* value) {
inthash_chain* h=hashtable->hash[pos];
while (h) {
if (strcmp(h->name,name)==0) {
- *value=h->value.intg;
+ if (value != NULL)
+ *value=h->value.intg;
return 1;
}
h=h->next;
@@ -180,12 +182,13 @@ void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) {
inthash_delchain(hash->next,free_handler);
if (free_handler) { // pos is a malloc() block, delete it!
if (hash->value.intg) {
+ void* ptr = (void*)hash->value.intg;
if (free_handler)
- free_handler((void*)hash->value.intg);
+ free_handler(ptr);
else
- freet((void*)hash->value.intg);
+ freet(ptr);
+ hash->value.intg=0;
}
- hash->value.intg=0;
}
freet(hash);
}
diff --git a/src/htsinthash.h b/src/htsinthash.h
index c667cd4..5d7b992 100644
--- a/src/htsinthash.h
+++ b/src/htsinthash.h
@@ -54,7 +54,7 @@ typedef struct inthash_chain {
// structure behind inthash
typedef void (* t_inthash_freehandler)(void* value);
-typedef struct {
+typedef struct struct_inthash {
inthash_chain** hash;
t_inthash_freehandler free_handler;
unsigned int hash_size;
@@ -64,6 +64,8 @@ typedef struct {
// main inthash type
typedef struct_inthash* inthash;
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
// subfunctions
unsigned long int inthash_key(char* value);
void inthash_init(inthash hashtable);
@@ -72,7 +74,6 @@ void inthash_default_free_handler(void* value);
// main functions:
-
/* Hash functions: */
inthash inthash_new(int size); /* Create a new hash table */
int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */
@@ -89,6 +90,6 @@ void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entr
int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */
int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */
/* End of hash functions: */
-
+#endif
#endif
diff --git a/src/htsjava.c b/src/htsjava.c
index afb166b..3536b9b 100644
--- a/src/htsjava.c
+++ b/src/htsjava.c
@@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
/* Version: Oct/2000 */
/* Fixed: problems with class structure (10/2000) */
@@ -46,10 +49,6 @@ Please visit our Website: http://www.httrack.com
#include "htsjava.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
#include "htsnostatic.h"
//#include <math.h>
@@ -186,7 +185,7 @@ int hts_parse_java(htsmoduleStruct* str)
if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) {
if(!strstr(tab[tab[i].index1].name,"java/")) {
- char tempo[1024];
+ char BIGSTK tempo[1024];
tempo[0]='\0';
sprintf(tempo,"%s.class",tab[tab[i].index1].name);
@@ -289,7 +288,7 @@ RESP_STRUCT readtable(htsmoduleStruct* str,
strcpybuff(trans.name,"HTS_UNICODE");
{
- char buffer[1024];
+ char BIGSTK buffer[1024];
char *p;
p=&buffer[0];
diff --git a/src/htsjava.h b/src/htsjava.h
index b3d17d4..915824b 100644
--- a/src/htsjava.h
+++ b/src/htsjava.h
@@ -57,6 +57,8 @@ typedef struct {
} RESP_STRUCT;
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int hts_detect_java(htsmoduleStruct* str);
int hts_parse_java(htsmoduleStruct* str);
RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5);
@@ -65,6 +67,6 @@ RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*);
unsigned short int readshort(FILE *fp);
int tris(char*);
char * printname(char [1024]);
-
+#endif
#endif
diff --git a/src/htslib.c b/src/htslib.c
index 3954f9c..9c389c8 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -34,11 +34,20 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
// Fichier librairie .c
#include "htslib.h"
#include "htsbauth.h"
+#ifdef _WIN32_WCE
+#ifndef HTS_CECOMPAT
+#pragma comment(lib, "celib.lib") //link with celib
+#endif
+#endif
+
/* specific definitions */
#include "htsbase.h"
#include "htsnet.h"
@@ -46,9 +55,11 @@ Please visit our Website: http://www.httrack.com
#include "htsthread.h"
#include "htsnostatic.h"
#include "htswrap.h"
-#include <stdio.h>
+#include "htsmd5.h"
#if HTS_WIN
+#ifndef _WIN32_WCE
#include <direct.h>
+#endif
#else
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
@@ -60,32 +71,39 @@ Please visit our Website: http://www.httrack.com
#include <unistd.h>
#endif
#endif
-#include <stdlib.h>
#include <string.h>
#include <time.h>
+#ifndef _WIN32_WCE
+#include <sys/timeb.h>
+#else
+#ifndef HTS_CECOMPAT
#include <sys/timeb.h>
+#endif
+#endif
+#ifndef _WIN32_WCE
#include <fcntl.h>
+#endif
// pour utimbuf
#if HTS_WIN
+#ifndef _WIN32_WCE
+#include <sys/utime.h>
+#else
+#ifndef HTS_CECOMPAT
#include <sys/utime.h>
+#endif
+#endif
#else
#include <utime.h>
#endif
+#ifndef _WIN32_WCE
+#include <sys/stat.h>
+#endif
/* END specific definitions */
-
-// Débuggage de contrôle
-#if HTS_DEBUG_CLOSESOCK
-#define _HTS_WIDE 1
-#endif
-#if HTS_WIDE_DEBUG
-#define _HTS_WIDE 1
-#endif
+// Debugging
#if _HTS_WIDE
FILE* DEBUG_fp=NULL;
-#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); }
-#define DEBUG_W2(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,A); fflush(DEBUG_fp); }
#endif
/* variables globales */
@@ -553,6 +571,7 @@ const char* hts_mime[][2] = {
|| CIS(c,'*') \
|| CIS(c,'\'') \
|| CIS(c,'\"') \
+ || CIS(c,'&') \
|| CIS(c,'!') )
//#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
#define CHAR_MARK(c) ( CIS(c,'-') \
@@ -581,7 +600,9 @@ char* antislash(char* s) {
}
#endif
-
+#ifdef _WIN32_WCE
+char cwd[MAX_PATH+1] = "";
+#endif
// Récupération d'un fichier http sur le net.
// Renvoie une adresse sur le bloc de mémoire, ou bien
@@ -592,8 +613,8 @@ char* antislash(char* s) {
// en background
htsblk httpget(char* url) {
- char adr[HTS_URLMAXSIZE*2]; // adresse
- char fil[HTS_URLMAXSIZE*2]; // chemin
+ char BIGSTK adr[HTS_URLMAXSIZE*2]; // adresse
+ char BIGSTK fil[HTS_URLMAXSIZE*2]; // chemin
// séparer URL en adresse+chemin
if (ident_url_absolute(url,adr,fil)==-1) {
@@ -692,7 +713,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f
// Test en cas de file:///C|...
if (!fexist(fconv(unescape_http(fil))))
if (fexist(fconv(unescape_http(fil+1)))) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
strcpybuff(tempo,fil+1);
strcpybuff(fil,tempo);
}
@@ -802,7 +823,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f
// envoi d'une requète
int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) {
- char buff[8192];
+ char BIGSTK buff[8192];
//int use_11=0; // HTTP 1.1 utilisé
int direct_url=0; // ne pas analyser l'url (exemple: ftp://)
char* search_tag=NULL;
@@ -826,8 +847,8 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
if (mode==0) { // GET!
FILE* fp=fopen(unescape_http(search_tag+strlen(POSTTOK)+5),"rb");
if (fp) {
- char line[1100];
- char protocol[256],url[HTS_URLMAXSIZE*2],method[256];
+ char BIGSTK line[1100];
+ char BIGSTK protocol[256],url[HTS_URLMAXSIZE*2],method[256];
linput(fp,line,1000);
if (sscanf(line,"%s %s %s",method,url,protocol) == 3) {
// selon que l'on a ou pas un proxy
@@ -879,7 +900,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
// on slash doit être présent en début, sinon attention aux bad request! (400)
if (*fil!='/') strcatbuff(buff,"/");
{
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
if (search_tag)
strncatbuff(tempo,fil,(int) (search_tag - fil));
@@ -923,25 +944,31 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
}
// Referer?
- if ((referer_adr) && (referer_fil)) { // existe
- if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide
- if (
- (strcmp(referer_adr,"file://") != 0)
- &&
- ( /* no https referer to http urls */
- (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */
- ||
- (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
- )
- ) { // PAS file://
- strcatbuff(buff,"Referer: ");
- strcatbuff(buff,"http://");
- strcatbuff(buff,jump_identification(referer_adr));
- strcatbuff(buff,referer_fil);
- strcatbuff(buff,H_CRLF);
- }
+ if (referer_adr != NULL && referer_fil != NULL
+ && strnotempty(referer_adr) && strnotempty(referer_fil)
+ ) { // non vide
+ if (
+ (strcmp(referer_adr,"file://") != 0)
+ &&
+ ( /* no https referer to http urls */
+ (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */
+ ||
+ (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
+ )
+ ) { // PAS file://
+ strcatbuff(buff,"Referer: ");
+ strcatbuff(buff,"http://");
+ strcatbuff(buff,jump_identification(referer_adr));
+ strcatbuff(buff,referer_fil);
+ strcatbuff(buff,H_CRLF);
}
}
+ // HTTP field: referer
+ else if (retour->req.referer[0] != '\0') {
+ strcatbuff(buff,"Referer: ");
+ strcatbuff(buff, retour->req.referer);
+ strcatbuff(buff, H_CRLF);
+ }
// POST?
if (mode==0) { // GET!
@@ -1002,6 +1029,13 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
}
//}
+ // HTTP field: from
+ if (retour->req.from[0] != '\0') { // HTTP from
+ strcatbuff(buff,"From: ");
+ strcatbuff(buff, retour->req.from);
+ strcatbuff(buff, H_CRLF);
+ }
+
// Présence d'un user-agent?
if (retour->req.user_agent_send) { // ohh un user-agent
char s[256];
@@ -1113,12 +1147,13 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
#endif
// Envoi
+ HTS_STAT.last_request = mtime_local();
if (sendc(retour, buff)<0) { // ERREUR, socket rompue?...
//if (sendc(retour->soc,buff) != strlen(buff)) { // ERREUR, socket rompue?...
deletesoc_r(retour); // fermer tout de même
// et tenter de reconnecter
- strcpybuff(retour->msg,"Write error");
+ strcpybuff(retour->msg, "Write error");
retour->soc=INVALID_SOCKET;
}
@@ -1411,7 +1446,7 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
char domain[256]; // domaine cookie (.netscape.com)
char path[256]; // chemin (/)
char cook_name[256]; // nom cookie (MYCOOK)
- char cook_value[8192]; // valeur (ID=toto,S=1234)
+ char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234)
#if DEBUG_COOK
printf("set-cookie detected\n");
#endif
@@ -1419,7 +1454,7 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
char *token_st,*token_end;
char *value_st,*value_end;
char name[256];
- char value[8192];
+ char BIGSTK value[8192];
int next=0;
name[0]=value[0]='\0';
//
@@ -1679,6 +1714,11 @@ HTS_INLINE LLint http_fread1(htsblk* r) {
LLint http_xfread1(htsblk* r,int bufl) {
int nl=-1;
+ // EOF
+ if (r->totalsize > 0 && r->size == r->totalsize) {
+ return READ_EOF;
+ }
+
if (bufl>0) {
if (!r->is_write) { // stocker en mémoire
if (r->totalsize>0) { // totalsize déterminé ET ALLOUE
@@ -1691,9 +1731,11 @@ LLint http_xfread1(htsblk* r,int bufl) {
nl = hts_read(r,r->adr + ((int) r->size),(int) (r->totalsize-r->size) ); /* NO 32 bit overlow possible here (no 4GB html!) */
// nouvelle taille
if (nl >= 0) r->size+=nl;
-
- if ((nl < 0) || (r->size >= r->totalsize))
- nl=-1; // break
+
+ /*
+ if (r->size >= r->totalsize)
+ nl = -1; // break
+ */
r->adr[r->size]='\0'; // caractère NULL en fin au cas où l'on traite des HTML
}
@@ -1717,7 +1759,7 @@ LLint http_xfread1(htsblk* r,int bufl) {
if (r->adr!=NULL) {
// lecture
nl = hts_read(r,r->adr+(int)r->size,bufl);
- if (nl>0) {
+ if (nl > 0) {
// resize
r->adr=(char*) realloct(r->adr,(int)r->size+nl + 1);
// nouvelle taille
@@ -1737,7 +1779,7 @@ LLint http_xfread1(htsblk* r,int bufl) {
}
// pas de adr=erreur
- if (r->adr==NULL) nl=-1;
+ if (r->adr == NULL) nl = READ_ERROR;
} else { // stocker sur disque
char* buff;
@@ -1751,17 +1793,17 @@ LLint http_xfread1(htsblk* r,int bufl) {
if ((INTsys)fwrite(buff,1,nl,r->out)!=nl) {
r->statuscode=-1;
strcpybuff(r->msg,"Write error on disk");
- nl=-1;
+ nl=READ_ERROR;
}
}
- if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
- nl=-1; // break
+ //if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
+ // nl=-1; // break
// libérer bloc tempo
freet(buff);
} else
- nl=-1;
+ nl=READ_ERROR;
if ((nl < 0) && (r->out!=NULL)) {
fflush(r->out);
@@ -1783,7 +1825,7 @@ LLint http_xfread1(htsblk* r,int bufl) {
int lf_detected=0;
int at_begining=1;
do {
- nl=-1;
+ nl = READ_INTERNAL_ERROR;
count--;
if (r->adr==NULL) {
r->adr=(char*) malloct(8192);
@@ -1793,7 +1835,7 @@ LLint http_xfread1(htsblk* r,int bufl) {
if (r->size < 8190) {
// lecture
nl = hts_read(r,r->adr+r->size,1);
- if (nl>0) {
+ if (nl > 0) {
// exit if:
// lf detected AND already detected before
// or
@@ -1825,18 +1867,16 @@ LLint http_xfread1(htsblk* r,int bufl) {
count=-1;
}
} while((nl >= 0) && (count>0));
- nl = tot_nl;
+ if (nl >= 0) {
+ nl = tot_nl;
+ }
}
-#if HDEBUG
- //printf("add to %d / %d\n",r->size,r->totalsize);
-#endif
- // nl == 0 may mean "no relevant data", for example is using cache or ssl
-#if HTS_USEOPENSSL
- if (r->ssl)
+ // EOF
+ if (r->totalsize > 0 && r->size == r->totalsize) {
+ return READ_EOF;
+ } else {
return nl;
- else
-#endif
- return ((nl > 0) ? nl : -1); // ==0 is fatal if direct read
+ }
}
@@ -1977,20 +2017,20 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
char* iadr;
// unsigned short int port;
- // tester un éventuel id:pass et virer id:pass@ si détecté
- iadr = jump_identification(_iadr);
-
// si iadr="#" alors c'est une fausse URL, mais un vrai fichier
// local.
// utile pour les tests!
//## if (iadr[0]!=lOCAL_CHAR) {
- if (strcmp(_iadr,"file://")) { /* non fichier */
+ if (strcmp(_iadr,"file://") != 0) { /* non fichier */
SOCaddr server;
int server_size=sizeof(server);
t_hostent* hp;
// effacer structure
memset(&server, 0, sizeof(server));
+ // tester un éventuel id:pass et virer id:pass@ si détecté
+ iadr = jump_identification(_iadr);
+
#if HDEBUG
printf("gethostbyname\n");
#endif
@@ -2007,7 +2047,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
port=80; // port par défaut
#endif
if (a) {
- char iadr2[HTS_URLMAXSIZE*2];
+ char BIGSTK iadr2[HTS_URLMAXSIZE*2];
int i=-1;
iadr2[0]='\0';
sscanf(a+1,"%d",&i);
@@ -2046,6 +2086,9 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
}
// copie adresse
SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ // make a copy for external clients
+ retour->address_size = sizeof(retour->address);
+ SOCaddr_copyaddr(retour->address, retour->address_size, hp->h_addr_list[0], hp->h_length);
// memcpy(&SOCaddr_sinaddr(server), hp->h_addr_list[0], hp->h_length);
// créer ("attachement") une socket (point d'accès) internet,en flot
@@ -2060,7 +2103,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
retour->debugid = HTS_STAT.stat_sockid++;
}
#if HTS_WIDE_DEBUG
- DEBUG_W("socket done\n");
+ DEBUG_W("socket()=%d\n" _ (int) soc);
#endif
if (soc==INVALID_SOCKET) {
if (retour)
@@ -2103,6 +2146,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
#if HDEBUG
printf("connect\n");
#endif
+ HTS_STAT.last_connect = mtime_local();
#if HTS_WIDE_DEBUG
DEBUG_W("connect\n");
@@ -2113,10 +2157,6 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
if (connect(soc, (struct sockaddr *)&server, server_size) == -1) {
#endif
- // no - non blocking
- //deletesoc(soc);
- //soc=INVALID_SOCKET;
-
// bloquant
if (waitconnect) {
#if HDEBUG
@@ -2180,7 +2220,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
// 1. optional scheme ":"
if ((pos=strfield(url,"file:"))) { // fichier local!! (pour les tests)
- //!! p+=3;
+ //!!p+=3;
strcpybuff(adr,"file://");
} else if ((pos=strfield(url,"http:"))) { // HTTP
//!!p+=3;
@@ -2233,15 +2273,19 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
char *p;
int i;
char* a;
-
+
p=url+pos;
if (*p == '/' || *p == '\\') { /* file:///.. */
strcatbuff(fil,p); // fichier local ; adr="#"
} else {
- strcatbuff(fil,"//"); /* file://server/foo */
- strcatbuff(fil,p);
+ if (p[1] != ':') {
+ strcatbuff(fil,"//"); /* file://server/foo */
+ strcatbuff(fil,p);
+ } else {
+ strcatbuff(fil,p); // file://C:\..
+ }
}
-
+
a=strchr(fil,'?');
if (a)
*a='\0'; /* couper query (inutile pour file:// lors de la requête) */
@@ -2272,66 +2316,52 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
return 0;
}
-// simplification des ../
+/* simplify ../ and ./ */
void fil_simplifie(char* f) {
- int i=0;
- int last=0;
- char* a;
-
- // éliminer ../
- while (f[i]) {
-
- if (f[i]=='/') {
- if (f[i+1]=='.')
- if (f[i+2]=='.') // couper dernier répertoire
- if (f[i+3]=='/') // éviter les /tmp/..coolandlamedir/
- { // couper dernier répertoire
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- //
- if (!last) /* can't go upper.. */
- strcpybuff(tempo,"/");
- else
- strncpy(tempo,f,last+1);
- tempo[last+1]='\0';
- strcatbuff(tempo,f+i+4);
- strcpybuff(f,tempo); // remplacer
- i=-1; // recommencer
- last=0;
+ char *a, *b;
+ char *rollback[128];
+ int rollid = 0;
+ char lc = '/';
+ int query = 0;
+ for(a = b = f ; *a != '\0' ; ) {
+ if (*a == '?')
+ query = 1;
+ if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '/') { /* foo/./bar or ./foo */
+ a += 2;
+ }
+ else if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '.' && a[2] == '/') { /* foo/../bar or ../foo */
+ a += 3;
+ if (rollid > 1) {
+ rollid--;
+ b = rollback[rollid - 1];
+ } else {
+ rollid = 0;
+ b = f;
}
-
- if (i>=0)
- last=i;
- else
- last=0;
+ } else {
+ *b++ = lc = *a;
+ if (*a == '/') {
+ rollback[rollid++] = b;
+ if (rollid >= 127) {
+ *f = '\0'; /* ERROR */
+ break;
+ }
+ }
+ a++;
}
-
- i++;
}
-
- // éliminer ./
- while ( (a=strstr(f,"./")) ) {
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strcpybuff(tempo,a+2);
- strcpybuff(a,tempo);
- }
- // delete all remaining ../ (potential threat)
- while ( (a=strstr(f,"../")) ) {
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strcpybuff(tempo,a+3);
- strcpybuff(a,tempo);
+ *b = '\0';
+ if (*f == '\0') {
+ f[0] = '.';
+ f[1] = '/';
+ f[2] = '\0';
}
-
}
// fermer liaison fichier ou socket
HTS_INLINE void deletehttp(htsblk* r) {
#if HTS_DEBUG_CLOSESOCK
- char info[256];
- sprintf(info,"deletehttp: (htsblk*) %d\n",r);
- DEBUG_W2(info);
+ DEBUG_W("deletehttp: (htsblk*) 0x%p\n" _ (void*) r);
#endif
#if HTS_USEOPENSSL
/* Free OpenSSL structures */
@@ -2357,27 +2387,22 @@ HTS_INLINE void deletehttp(htsblk* r) {
// free the addr buffer
// always returns 1
HTS_INLINE int deleteaddr(htsblk* r) {
- if (r->adr) {
+ if (r->adr != NULL) {
freet(r->adr);
r->adr = NULL;
}
+ if (r->headers != NULL) {
+ freet(r->headers);
+ r->headers = NULL;
+ }
return 1;
}
// fermer une socket
HTS_INLINE void deletesoc(T_SOC soc) {
- if (soc!=INVALID_SOCKET) {
-// J'ai planté.. pas de shutdown
-//#if HTS_WIDE_DEBUG
-// DEBUG_W("shutdown\n");
-//#endif
-// shutdown(soc,2); // shutdown
-//#if HTS_WIDE_DEBUG
-// DEBUG_W("shutdown done\n");
-//#endif
- // Ne pas oublier de fermer la connexion avant de partir.. (plus propre)
+ if (soc!=INVALID_SOCKET && soc!=LOCAL_SOCKET_ID) {
#if HTS_WIDE_DEBUG
- DEBUG_W("close\n");
+ DEBUG_W("close %d\n" _ (int) soc);
#endif
#if HTS_WIN
closesocket(soc);
@@ -2385,7 +2410,7 @@ HTS_INLINE void deletesoc(T_SOC soc) {
close(soc);
#endif
#if HTS_WIDE_DEBUG
- DEBUG_W("close done\n");
+ DEBUG_W(".. done\n");
#endif
}
}
@@ -2400,8 +2425,10 @@ HTS_INLINE void deletesoc_r(htsblk* r) {
r->ssl_con=NULL;
}
#endif
- deletesoc(r->soc);
- r->soc=INVALID_SOCKET;
+ if (r->soc!=INVALID_SOCKET) {
+ deletesoc(r->soc);
+ r->soc=INVALID_SOCKET;
+ }
}
// renvoi le nombre de secondes depuis 1970
@@ -2597,6 +2624,20 @@ int set_filetime_rfc822(char* file,char* date) {
} else return -1;
}
+int get_filetime_rfc822(char* file,char* date) {
+ struct stat buf;
+ date[0] = '\0';
+ if (stat(file, &buf) == 0) {
+ struct tm* A;
+ time_t tt = buf.st_mtime;
+ A=gmtime(&tt);
+ if (A==NULL)
+ A=localtime(&tt);
+ time_rfc822(date, A);
+ return 1;
+ }
+ return 0;
+}
// heure au format rfc (taille buffer 256o)
HTS_INLINE void time_rfc822(char* s,struct tm * A) {
@@ -2758,33 +2799,21 @@ int finput(int fd,char* s,int max) {
}
// Like linput, but in memory (optimized)
-int binput(char* buff,char* s,int max) {
- char* end;
- int count;
-
- // clear buffer
- s[0]='\0';
- // end of buffer?
- if ( *buff == '\0')
- return 1;
- // find ending \n
- end=strchr(buff,'\n');
- // ..or end of buffer
- if (!end)
- end=buff+strlen(buff);
- // then count number of bytes, maximum=max
- count=min(max,end-buff);
- // and strip annoying ending cr
- while( (count>0) && (buff[count] == '\r'))
- count--;
- // copy
- if (count > 0) {
- strncatbuff(s, buff, count);
+int binput(char* buff, char* s, int max) {
+ int count = 0;
+ int destCount = 0;
+
+ // Note: \0 will return 1
+ while(count < max && buff != NULL && buff[count] != '\0' && buff[count] != '\n') {
+ if (buff[count] != '\r') {
+ s[destCount++] = buff[count];
+ }
+ count++;
}
- // and terminate with a null char
- s[count]='\0';
+ s[destCount] = '\0';
+
// then return the supplemental jump offset
- return (end-buff)+1;
+ return count + 1;
}
// Lecture d'une ligne (peut être unicode à priori)
@@ -2894,18 +2923,6 @@ void rawlinput(FILE* fp,char* s,int max) {
s[j++]='\0';
}
-
-// compare le début de f avec s et retourne la position de la fin
-// 'A=a' (case insensitive)
-int strfield(const char* f,const char* s) {
- int r=0;
- while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; }
- if (*s==0)
- return r;
- else
- return 0;
-}
-
//cherche chaine, case insensitive
char* strstrcase(char *s,char *o) {
while((*s) && (strfield(s,o)==0)) s++;
@@ -3006,8 +3023,8 @@ void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map)
// 1 : oui
// -1 : on sait pas
// -2 : on sait pas, pas d'extension
-int ishtml(char* fil) {
- char *a;
+int ishtml(const char* fil) {
+ const char *a;
// patch pour les truc.html?Choix=toto
if ( (a=strchr(fil,'?')) ) // paramètres?
@@ -3020,19 +3037,20 @@ int ishtml(char* fil) {
while ( (*a!='.') && (*a!='/') && ( a > fil)) a--;
if (*a=='.') { // a une extension
- char fil_noquery[HTS_URLMAXSIZE*2];
+ char BIGSTK fil_noquery[HTS_URLMAXSIZE*2];
+ char* b;
fil_noquery[0]='\0';
a++; // pointer sur extension
strncatbuff(fil_noquery,a,HTS_URLMAXSIZE);
- a=strchr(fil_noquery,'?');
- if (a)
- *a='\0';
+ b=strchr(fil_noquery,'?');
+ if (b)
+ *b='\0';
return ishtml_ext(fil_noquery); // retour
} else return -2; // indéterminé, par exemple /truc
}
// idem, mais pour uniquement l'extension
-int ishtml_ext(char* a) {
+int ishtml_ext(const char* a) {
int html=0;
//
if (strfield2(a,"html")) html = 1;
@@ -3075,6 +3093,8 @@ HTS_INLINE int ishttperror(int err) {
// une identification
HTSEXT_API char* jump_identification(char* source) {
char *a,*trytofind;
+ if (strcmp(source, "file://") == 0)
+ return source;
// rechercher dernier @ (car parfois email transmise dans adresse!)
// mais sauter ftp:// éventuel
a = jump_protocol(source);
@@ -3083,6 +3103,8 @@ HTSEXT_API char* jump_identification(char* source) {
}
HTSEXT_API char* jump_normalized(char* source) {
+ if (strcmp(source, "file://") == 0)
+ return source;
source = jump_identification(source);
if (strfield(source, "www") && source[3] != '\0') {
if (source[3] == '.') { // www.foo.com -> foo.com
@@ -3098,25 +3120,80 @@ HTSEXT_API char* jump_normalized(char* source) {
return source;
}
-HTSEXT_API char* fil_normalized(char* source, char* dest_) {
- char* dest=dest_;
+static int sortNormFnc(const void * a_, const void * b_) {
+ char** a = (char**) a_;
+ char** b = (char**) b_;
+ return strcmp(*a+1, *b+1);
+}
+
+
+HTSEXT_API char* fil_normalized(char* source, char* dest) {
char lastc = 0;
int gotquery=0;
- while(*source) {
- if (*source == '?')
- gotquery=1;
+ int ampargs=0;
+ int i,j;
+ char* query=NULL;
+ for(i=j=0 ; source[i] != '\0'; i++) {
+ if (!gotquery && source[i] == '?')
+ gotquery=ampargs=1;
if (
- (!gotquery && lastc == '/' && *source == '/') // foo//bar -> foo/bar
+ (!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar
) {
}
else {
- *dest++ = *source;
+ if (gotquery && source[i] == '&') {
+ ampargs++;
+ }
+ dest[j++] = source[i];
+ }
+ lastc = source[i];
+ }
+ dest[j++] = '\0';
+
+ /* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */
+ if (ampargs > 1) {
+ char** amps = malloct(ampargs * sizeof(char*));
+ char* copyBuff = NULL;
+ int qLen=0;
+ assertf(amps != NULL);
+ gotquery = 0;
+ for(i=j=0 ; dest[i] != '\0'; i++) {
+ if ( (gotquery && dest[i] == '&') || ( !gotquery && dest[i] == '?') ) {
+ if (!gotquery) {
+ gotquery=1;
+ query = &dest[i];
+ qLen = (int)strlen(query);
+ }
+ assertf(j < ampargs);
+ amps[j++] = &dest[i];
+ dest[i] = '\0';
+ }
}
- lastc = *source;
- source++;
+ assertf(j == ampargs);
+
+ /* Sort 'em all */
+ qsort(amps, ampargs, sizeof(char*), sortNormFnc);
+
+ /* Replace query by sorted query */
+ copyBuff = malloct(qLen + 1);
+ assertf(copyBuff != NULL);
+ copyBuff[0] = '\0';
+ for(i = 0 ; i < ampargs ; i++) {
+ if (i == 0)
+ strcatbuff(copyBuff, "?");
+ else
+ strcatbuff(copyBuff, "&");
+ strcatbuff(copyBuff, amps[i] + 1);
+ }
+ assert((int)strlen(copyBuff) <= qLen);
+ strcpybuff(query, copyBuff);
+
+ /* Cleanup */
+ freet(amps);
+ freet(copyBuff);
}
- *dest++ = '\0';
- return dest_;
+
+ return dest;
}
#define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
@@ -3154,6 +3231,21 @@ char* strrchr_limit(char* s, char c, char* limit) {
}
}
+// strrchr, but not too far
+char* strstr_limit(char* s, char* sub, char* limit) {
+ if (limit == NULL) {
+ return strstr(s, sub);
+ } else {
+ char* pos = strstr(s, sub);
+ if (pos != NULL) {
+ char* farpos = strstr(s, limit);
+ if (farpos == NULL || pos < farpos)
+ return pos;
+ }
+ }
+ return NULL;
+}
+
// retourner adr sans ftp://
HTS_INLINE char* jump_protocol(char* source) {
int p;
@@ -3456,7 +3548,7 @@ HTSEXT_API void unescape_amp(char* s) {
c='~';
// remplacer?
if (c) {
- char buff[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
buff[0]=(char) c;
strcpybuff(buff+1,end+1);
strcpybuff(s,buff);
@@ -3467,6 +3559,17 @@ HTSEXT_API void unescape_amp(char* s) {
}
}
+static int ehexh(char c) {
+ if ((c>='0') && (c<='9')) return c-'0';
+ if ((c>='a') && (c<='f')) c-=('a'-'A');
+ if ((c>='A') && (c<='F')) return (c-'A'+10);
+ return 0;
+}
+
+static int ehex(char* s) {
+ return 16*ehexh(*s)+ehexh(*(s+1));
+}
+
// remplacer %20 par ' ', | par : etc..
// buffer MAX 1Ko
HTSEXT_API char* unescape_http(char* s) {
@@ -3564,7 +3667,7 @@ HTSEXT_API void escape_remove_control(char* s) {
unsigned char* ss = (unsigned char*) s;
while(*ss) {
if (*ss < 32) { /* CONTROL characters go away! */
- char tmp[HTS_URLMAXSIZE*2];
+ char BIGSTK tmp[HTS_URLMAXSIZE*2];
strcpybuff(tmp, ss+1);
strcpybuff(ss, tmp);
} else {
@@ -3573,6 +3676,25 @@ HTSEXT_API void escape_remove_control(char* s) {
}
}
+HTSEXT_API void x_escape_html(char* s) {
+ while(*s) {
+ int test=0;
+ test = (
+ CHAR_HIG(*s)
+ || CHAR_XXAVOID(*s) );
+
+ if (test) {
+ char BIGSTK buffer[HTS_URLMAXSIZE*3];
+ int n;
+ n = (int)(unsigned char) *s;
+ strcpybuff(buffer, s+1);
+ sprintf(s,"&#x%02x;", n);
+ strcatbuff(s, buffer);
+ }
+ s++;
+ }
+}
+
HTSEXT_API void x_escape_http(char* s,int mode) {
while(*s) {
@@ -3588,7 +3710,7 @@ HTSEXT_API void x_escape_http(char* s,int mode) {
|| CHAR_MARK(*s));
}
else if (mode==2)
- test=(strchr(" ",*s)!=0); // n'escaper que espace
+ test=(*s == ' '); // n'escaper que espace
else if (mode==3) { // échapper que ce qui est nécessaire
test = (
CHAR_SPECIAL(*s)
@@ -3601,7 +3723,7 @@ HTSEXT_API void x_escape_http(char* s,int mode) {
}
if (test) {
- char buffer[HTS_URLMAXSIZE*3];
+ char BIGSTK buffer[HTS_URLMAXSIZE*3];
int n;
n=(int)(unsigned char) *s;
strcpybuff(buffer,s+1);
@@ -3612,18 +3734,34 @@ HTSEXT_API void x_escape_http(char* s,int mode) {
}
}
+HTSEXT_API void escape_for_html_print(char* s, char* d) {
+ for( ; *s ; s++) {
+ if (*s == '&') {
+ strcpybuff(d, "&amp;");
+ d += strlen(d);
+ } else {
+ *d++ = *s;
+ }
+ }
+ *d = '\0';
+}
-HTS_INLINE int ehexh(char c) {
- if ((c>='0') && (c<='9')) return c-'0';
- if ((c>='a') && (c<='f')) c-=('a'-'A');
- if ((c>='A') && (c<='F')) return (c-'A'+10);
- return 0;
+HTSEXT_API void escape_for_html_print_full(char* s, char* d) {
+ for( ; *s ; s++) {
+ if (*s == '&') {
+ strcpybuff(d, "&amp;");
+ d += strlen(d);
+ } else if (CHAR_HIG(*s)) {
+ sprintf(d, "&#x%02x;", (unsigned char) *s);
+ d += strlen(d);
+ } else {
+ *d++ = *s;
+ }
+ }
+ *d = '\0';
}
-HTS_INLINE int ehex(char* s) {
- return 16*ehexh(*s)+ehexh(*(s+1));
-}
// concat, concatène deux chaines et renvoi le résultat
// permet d'alléger grandement le code
@@ -3731,18 +3869,18 @@ HTS_INLINE int is_realspace(char c) {
// deviner type d'un fichier local..
// ex: fil="toto.gif" -> s="image/gif"
-void guess_httptype(char *s,char *fil) {
+void guess_httptype(char *s,const char *fil) {
get_httptype(s,fil,1);
}
// idem
// flag: 1 si toujours renvoyer un type
-void get_httptype(char *s,char *fil,int flag) {
+void get_httptype(char *s,const char *fil,int flag) {
if (ishtml(fil)==1)
strcpybuff(s,"text/html");
else {
- char *a=fil+strlen(fil)-1;
+ const char *a=fil+strlen(fil)-1;
while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
- if (*a=='.') {
+ if (*a=='.' && strlen(a) < 32) {
int ok=0;
int j=0;
a++;
@@ -3766,7 +3904,7 @@ void get_httptype(char *s,char *fil,int flag) {
// get type of fil (php)
// s: buffer (text/html) or NULL
// return: 1 if known by user
-int get_userhttptype(int setdefs,char *s,char *ext) {
+int get_userhttptype(int setdefs,char *s,const char *ext) {
char** buffer=NULL;
NOSTATIC_RESERVE(buffer, char*, 1);
if (setdefs) {
@@ -3778,7 +3916,7 @@ int get_userhttptype(int setdefs,char *s,char *ext) {
if (!ext)
return 0;
if (*buffer) {
- char search[1024];
+ char BIGSTK search[1024];
char* detect;
sprintf(search,"\n%s=",ext); // php=text/html
detect=strstr(*buffer,search);
@@ -3844,7 +3982,7 @@ void give_mimext(char *s,char *st) {
// 0 : non
// 1 : oui
// 2 : html
-int is_knowntype(char *fil) {
+int is_knowntype(const char *fil) {
int j=0;
if (!fil)
return 0;
@@ -3862,19 +4000,20 @@ int is_knowntype(char *fil) {
return (is_userknowntype(fil));
}
// extension : html,gif..
-char* get_ext(char *fil) {
+char* get_ext(const char *fil) {
char* fil_noquery;
- char *a=fil+strlen(fil)-1;
+ const char *a=fil+strlen(fil)-1;
NOSTATIC_RESERVE(fil_noquery, char, HTS_URLMAXSIZE*2);
while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
if (*a=='.') {
+ char* b;
fil_noquery[0]='\0';
a++; // pointer sur extension
strncatbuff(fil_noquery,a,HTS_URLMAXSIZE);
- a=strchr(fil_noquery,'?');
- if (a)
- *a='\0';
+ b=strchr(fil_noquery,'?');
+ if (b)
+ *b='\0';
return concat(fil_noquery,"");
}
else
@@ -3886,8 +4025,8 @@ char* get_ext(char *fil) {
// 2 : html
// setdefs : set mime buffer:
// file=(char*) "asp=text/html\nphp=text/html\n"
-int is_userknowntype(char *fil) {
- char mime[1024];
+int is_userknowntype(const char *fil) {
+ char BIGSTK mime[1024];
if (!fil)
return 0;
if (!strnotempty(fil))
@@ -3904,7 +4043,7 @@ int is_userknowntype(char *fil) {
// page dynamique?
// is_dyntype(get_ext("foo.asp"))
-int is_dyntype(char *fil) {
+int is_dyntype(const char *fil) {
int j=0;
if (!fil)
return 0;
@@ -3921,11 +4060,12 @@ int is_dyntype(char *fil) {
// types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
// connaissent pas le type
-int may_unknown(char* st) {
+int may_unknown(const char* st) {
int j=0;
// types média
- if (may_be_hypertext_mime(st))
+ if (may_be_hypertext_mime(st, "")) {
return 1;
+ }
while(strnotempty(hts_mime_keep[j])) {
if (strfield2(hts_mime_keep[j],st)) { // trouvé
return 1;
@@ -3936,7 +4076,6 @@ int may_unknown(char* st) {
}
-
// -- Utils fichiers
// pretty print for i/o
@@ -4106,23 +4245,24 @@ int HTS_TOTAL_RECV_CHECK(int var) {
#endif
// Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk)
+// returns:
// >0 : data received
// == 0 : not yet data
-// <0 : no more data or error
+// <0: error or no data: READ_ERROR, READ_EOF or READ_TIMEOUT
HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
int retour;
// return read(soc,buff,size);
if (r->is_file) {
#if HTS_WIDE_DEBUG
- DEBUG_W("read\n");
+ DEBUG_W("read(%p, %d, %d)\n" _ (void*) buff _ (int) size _ (int) r->fp);
#endif
if (r->fp)
- retour=(int)fread(buff,1,size,r->fp);
+ retour = (int)fread(buff,1,size,r->fp);
else
- retour=-1;
+ retour = READ_ERROR;
} else {
#if HTS_WIDE_DEBUG
- DEBUG_W("recv\n");
+ DEBUG_W("recv(%d, %p, %d)\n" _ (int) r->soc _ (void*) buff _ (int) size);
if (r->soc==INVALID_SOCKET)
printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n");
#endif
@@ -4139,13 +4279,20 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
)
{
retour = 0; /* no data yet (ssl cache) */
+ } else if (err_code == SSL_ERROR_ZERO_RETURN) {
+ retour = READ_EOF; /* completed */
} else {
- retour = -1; /* eof or error */
+ retour = READ_ERROR; /* eof or error */
}
}
} else {
#endif
retour=recv(r->soc,buff,size,0);
+ if (retour == 0) {
+ retour = READ_EOF;
+ } else if (retour < 0) {
+ retour = READ_ERROR;
+ }
}
if (retour > 0) // compter flux entrant
HTS_STAT.HTS_TOTAL_RECV+=retour;
@@ -4153,7 +4300,7 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
}
#endif
#if HTS_WIDE_DEBUG
- DEBUG_W("recv/read done\n");
+ DEBUG_W("recv/read done (%d bytes)\n" _ (int) retour);
#endif
return retour;
}
@@ -4179,7 +4326,7 @@ static void hts_cache_free_(t_dnscache* cache) {
}
}
void hts_cache_free(t_dnscache* cache) {
- if (cache != NULL) {
+ if (cache != NULL && cache->n != NULL) {
hts_cache_free_(cache->n);
cache->n = NULL;
}
@@ -4218,7 +4365,7 @@ int _hts_lockdns(int i) {
// si h_length==0 alors le nom n'existe pas dans le dns
t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour) {
// attendre que le cache dns soit prêt
- while(_hts_lockdns(-1)); // attendre libération
+ //while(_hts_lockdns(-1)); // attendre libération
_hts_lockdns(1); // locker
while(1) {
@@ -4273,7 +4420,7 @@ int hts_dnstest(char* _iadr) {
#endif
return 1;
- while(_hts_lockdns(-1)); // attendre libération
+ // while(_hts_lockdns(-1)); // attendre libération
_hts_lockdns(1); // locker
while(1) {
if (strcmp(cache->iadr,iadr)==0) { // ok trouvé
@@ -4306,7 +4453,7 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
The resolver doesn't seem to handle IP6 addresses in brackets
*/
if ((hostname[0] == '[') && (hostname[strlen(hostname)-1] == ']')) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
strncatbuff(tempo, hostname+1, strlen(hostname)-2);
strcpybuff(hostname, tempo);
@@ -4366,7 +4513,7 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
// cache dns interne à HTS // ** FREE A FAIRE sur la chaine
t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) {
- char iadr[HTS_URLMAXSIZE*2];
+ char BIGSTK iadr[HTS_URLMAXSIZE*2];
t_fullhostent* buffer = (t_fullhostent*) v_buffer;
t_dnscache* cache=_hts_cache(); // adresse du cache
t_hostent* hp;
@@ -4499,6 +4646,13 @@ void* hts_calloc(size_t len,size_t len2) {
memset(adr, 0, len * len2);
return adr;
}
+void* hts_strdup(char* str) {
+ size_t size = str ? strlen(str) : 0;
+ char* adr = (char*) hts_malloc(size + 1);
+ fassert(adr != NULL);
+ strcpy(adr, str ? str : "");
+ return adr;
+}
void* hts_xmalloc(size_t len,size_t len2) {
mlink* lnk = (mlink*) calloc(1,sizeof(mlink));
fassert(lnk != NULL);
@@ -4665,13 +4819,67 @@ int ftp_available(void) {
#endif
+int hts_dgb_init = 0;
+FILE* hts_dgb_init_fp = NULL;
+static void hts_dgb(char* msg);
+HTSEXT_API void hts_debug(int level) {
+ hts_dgb_init = level;
+ if (hts_dgb_init > 0) {
+ hts_dgb("hts_debug() called");
+ }
+}
+static void hts_dgb(char* msg) {
+ if (hts_dgb_init > 0) {
+ if (hts_dgb_init_fp == NULL) {
+#ifdef _WIN32_WCE
+ hts_dgb_init_fp = fopen("\\Temp\\hts-debug.txt", "wb");
+#else
+ hts_dgb_init_fp = fopen("hts-debug.txt", "wb");
+#endif
+ if (hts_dgb_init_fp != NULL) {
+ fprintf(hts_dgb_init_fp, "* Creating file\r\n");
+ }
+ }
+ if (hts_dgb_init_fp != NULL) {
+ fprintf(hts_dgb_init_fp, "%s\r\n", msg);
+ fflush(hts_dgb_init_fp);
+ }
+ }
+}
HTSEXT_API int hts_init(void) {
static int hts_init_ok = 0;
+ hts_dgb("entering hts_init()"); /* debug */
+
+#ifdef _WIN32_WCE
+#ifndef HTS_CECOMPAT
+ xceinit(L"");
+#endif
+#endif
+
+ /* Init threads */
+ if (!hts_init_ok) {
+ htsthread_init();
+ }
+
/* Ensure external modules are loaded */
+ hts_dgb("calling htspe_init()"); /* debug */
htspe_init();
+ /* MD5 Auto-test */
+ {
+ char digest[32 + 2];
+ unsigned char* atest = (unsigned char*)"MD5 Checksum Autotest";
+ digest[0] = '\0';
+ domd5mem(atest, strlen(atest), digest, 1); /* a42ec44369da07ace5ec1d660ba4a69a */
+ if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) {
+ int fatal_broken_md5 = 0;
+ assertf(fatal_broken_md5);
+ }
+ }
+
+ hts_dgb("initializing default wrappers"); /* debug */
if (!hts_init_ok) {
hts_init_ok = 1;
// default wrappers
@@ -4681,6 +4889,8 @@ HTSEXT_API int hts_init(void) {
htswrap_add("start",htsdefault_start);
htswrap_add("change-options",htsdefault_chopt);
htswrap_add("end",htsdefault_end);
+ htswrap_add("preprocess-html",htsdefault_preprocesshtml);
+ htswrap_add("postprocess-html",htsdefault_postprocesshtml);
htswrap_add("check-html",htsdefault_checkhtml);
htswrap_add("loop",htsdefault_loop);
htswrap_add("query",htsdefault_query);
@@ -4690,10 +4900,14 @@ HTSEXT_API int hts_init(void) {
htswrap_add("pause",htsdefault_pause);
htswrap_add("save-file",htsdefault_filesave);
htswrap_add("link-detected",htsdefault_linkdetected);
+ htswrap_add("link-detected2",htsdefault_linkdetected2);
htswrap_add("transfer-status",htsdefault_xfrstatus);
htswrap_add("save-name",htsdefault_savename);
+ htswrap_add("send-header",htsdefault_sendheader);
+ htswrap_add("receive-header",htsdefault_receiveheader);
}
+ hts_dgb("initializing SSL"); /* debug */
#if HTS_USEOPENSSL
/*
Initialize the OpensSSL library
@@ -4715,14 +4929,17 @@ HTSEXT_API int hts_init(void) {
#endif
/* Init vars and thread-specific values */
+ hts_dgb("initializing variables"); /* debug */
hts_initvar();
/* initialiser structcheck */
// structcheck_init(1);
+ hts_dgb("ending hts_init()"); /* debug */
return 1;
}
HTSEXT_API int hts_uninit(void) {
+ //htsthread_uninit();
hts_cache_free(_hts_cache());
hts_freevar();
/* htswrap_free(); */
@@ -4744,6 +4961,12 @@ int __cdecl htsdefault_chopt(void* opt) {
int __cdecl htsdefault_end(void) {
return 1;
}
+int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
return 1;
}
@@ -4772,12 +4995,21 @@ void __cdecl htsdefault_filesave(char* file) {
int __cdecl htsdefault_linkdetected(char* link) {
return 1;
}
+int __cdecl htsdefault_linkdetected2(char* link, char* start_tag) {
+ return 1;
+}
int __cdecl htsdefault_xfrstatus(void* back) {
return 1;
}
int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
return 1;
}
+int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) {
+ return 1;
+}
+int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) {
+ return 1;
+}
// end defaut wrappers
diff --git a/src/htslib.h b/src/htslib.h
index d3881d3..23a8400 100644
--- a/src/htslib.h
+++ b/src/htslib.h
@@ -43,7 +43,9 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
/* basic net definitions */
+#include "htsbase.h"
#include "htsbasenet.h"
+#include "htsnet.h"
/* cookies et auth */
#include "htsbauth.h"
@@ -52,12 +54,15 @@ Please visit our Website: http://www.httrack.com
// (à modifier avec celle-ci)
#define POSTTOK "?>post"
-#include <stdio.h>
-
#include "htsopt.h"
+#define READ_ERROR (-1)
+#define READ_EOF (-2)
+#define READ_TIMEOUT (-3)
+#define READ_INTERNAL_ERROR (-4)
+
// structure pour paramètres supplémentaires lors de la requête
-typedef struct {
+typedef struct htsrequest {
short int user_agent_send; // user agent (ex: httrack/1.0 [sun])
short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0
short int nokeepalive; // pas de keep-alive
@@ -65,13 +70,15 @@ typedef struct {
short int nocompression; // Pas de compression
short int flush_garbage; // recycled
char user_agent[128];
+ char referer[256];
+ char from[256];
char lang_iso[64];
t_proxy proxy; // proxy
} htsrequest;
// structure pour retour d'une connexion/prise d'en tête
-typedef struct {
+typedef struct htsblk {
int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
short int notmodified; // page ou fichier NON modifié (transféré)
short int is_write; // sortie sur disque (out) ou en mémoire (adr)
@@ -83,6 +90,7 @@ typedef struct {
int keep_alive_t; // KA timeout
int keep_alive_max; // KA number of requests
char* adr; // adresse du bloc de mémoire, NULL=vide
+ char* headers; // adresse des en têtes si présents
FILE* out; // écriture directe sur disque (si is_write=1)
LLint size; // taille fichier
char msg[80]; // message éventuel si échec ("\0"=non précisé)
@@ -93,6 +101,8 @@ typedef struct {
LLint totalsize; // taille totale à télécharger (-1=inconnue)
short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1
T_SOC soc; // ID socket
+ SOCaddr address; // IP address
+ int address_size; // IP address structure length
FILE* fp; // fichier pour file://
#if HTS_USEOPENSSL
short int ssl; // is this connection a SSL one? (https)
@@ -105,8 +115,8 @@ typedef struct {
LLint crange; // Content-Range
int debugid; // debug connection
/* */
- htsrequest req; // paramètres pour la requête
- /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/
+ htsrequest req; // paramètres pour la requête
+ /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/
} htsblk;
@@ -144,12 +154,8 @@ typedef struct t_dnscache {
-
-/*
-#ifdef __cplusplus
-extern "C" {
-#endif
-*/
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
// fonctions unix/winsock
int hts_read(htsblk* r,char* buff,int size);
@@ -215,6 +221,7 @@ void time_local_rfc822(char* s);
struct tm* convert_time_rfc822(char* s);
int set_filetime(char* file,struct tm* tm_time);
int set_filetime_rfc822(char* file,char* date);
+int get_filetime_rfc822(char* file,char* date);
HTS_INLINE void time_rfc822(char* s,struct tm * A);
HTS_INLINE void time_rfc822_local(char* s,struct tm * A);
#ifndef HTTRACK_DEFLIB
@@ -232,25 +239,23 @@ int linputsoc_t(T_SOC soc, char* s, int max, int timeout);
int linput_trim(FILE* fp,char* s,int max);
int linput_cpp(FILE* fp,char* s,int max);
void rawlinput(FILE* fp,char* s,int max);
-int strfield(const char* f,const char* s);
-#define strfield2(f,s) ( (strlen(f)!=strlen(s)) ? 0 : (strfield(f,s)) )
char* strstrcase(char *s,char *o);
int ident_url_absolute(char* url,char* adr,char* fil);
void fil_simplifie(char* f);
int is_unicode_utf8(unsigned char* buffer, unsigned int size);
void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map);
-int ishtml(char* urlfil);
-int ishtml_ext(char* a);
+int ishtml(const char* urlfil);
+int ishtml_ext(const char* a);
int ishttperror(int err);
-void guess_httptype(char *s,char *fil);
-void get_httptype(char *s,char *fil,int flag);
-int get_userhttptype(int setdefs,char *s,char *ext);
+void guess_httptype(char *s,const char *fil);
+void get_httptype(char *s,const char *fil,int flag);
+int get_userhttptype(int setdefs,char *s,const char *ext);
void give_mimext(char *s,char *st);
-int is_knowntype(char *fil);
-int is_userknowntype(char *fil);
-int is_dyntype(char *fil);
-char* get_ext(char *fil);
-int may_unknown(char* st);
+int is_knowntype(const char *fil);
+int is_userknowntype(const char *fil);
+int is_dyntype(const char *fil);
+char* get_ext(const char *fil);
+int may_unknown(const char* st);
#ifndef HTTRACK_DEFLIB
HTSEXT_API char* jump_identification(char*);
HTSEXT_API char* jump_normalized(char*);
@@ -259,6 +264,7 @@ HTSEXT_API char* fil_normalized(char* source, char* dest);
HTSEXT_API char* adr_normalized(char* source, char* dest);
#endif
char* strrchr_limit(char* s, char c, char* limit);
+char* strstr_limit(char* s, char* sub, char* limit);
HTS_INLINE char* jump_protocol(char* source);
void code64(unsigned char* a,int size_a,unsigned char* b,int crlf);
#ifndef HTTRACK_DEFLIB
@@ -270,15 +276,16 @@ HTSEXT_API void escape_uri_utf(char* s);
HTSEXT_API void escape_check_url(char* s);
HTSEXT_API char* escape_check_url_addr(char* s);
HTSEXT_API void x_escape_http(char* s,int mode);
+HTSEXT_API void x_escape_html(char* s);
HTSEXT_API void escape_remove_control(char* s);
+HTSEXT_API void escape_for_html_print(char* s, char* d);
+HTSEXT_API void escape_for_html_print_full(char* s, char* d);
#endif
-int ehexh(char c);
#ifndef HTTRACK_DEFLIB
HTSEXT_API char* unescape_http(char* s);
HTSEXT_API char* unescape_http_unharm(char* s, int no_high);
HTSEXT_API char* antislash_unescaped(char* s);
#endif
-int ehex(char* s);
char* concat(const char* a,const char* b);
#define copychar(a) concat((a),NULL)
#if HTS_DOSNAME
@@ -296,14 +303,6 @@ char* concat(const char* a,const char* b);
void hts_lowcase(char* s);
void hts_replace(char *s,char from,char to);
-/* Spaces: CR,LF,TAB,FF */
-#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') )
-#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) )
-#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
-#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
-#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) )
-//HTS_INLINE int is_space(char);
-//HTS_INLINE int is_realspace(char);
void fprintfio(FILE* fp,char* buff,char* prefix);
@@ -328,11 +327,6 @@ typedef void* ( *beginthread_type )( void * );
unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist );
#endif
-/*
-#ifdef __cplusplus
-}
-#endif
-*/
@@ -360,6 +354,8 @@ void __cdecl htsdefault_uninit(void);
int __cdecl htsdefault_start(void* opt);
int __cdecl htsdefault_chopt(void* opt);
int __cdecl htsdefault_end(void);
+int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier);
+int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier);
int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
char* __cdecl htsdefault_query(char* question);
@@ -369,10 +365,150 @@ int __cdecl htsdefault_check(char* adr,char* fil,int status);
void __cdecl htsdefault_pause(char* lockfile);
void __cdecl htsdefault_filesave(char*);
int __cdecl htsdefault_linkdetected(char* link);
+int __cdecl htsdefault_linkdetected2(char* link, char* tag_start);
int __cdecl htsdefault_xfrstatus(void* back);
int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
+int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
+
// end defaut wrappers
+
+// htsmodule.c definitions
+extern void* getFunctionPtr(httrackp* opt, char* file, char* fncname);
+extern void clearCallbacks(htscallbacks* chain);
+
+
+
+#endif // internals
+
+
+/* Spaces: CR,LF,TAB,FF */
+#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') )
+#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) )
+#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
+#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
+#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) )
+//HTS_INLINE int is_space(char);
+//HTS_INLINE int is_realspace(char);
+
+// compare le début de f avec s et retourne la position de la fin
+// 'A=a' (case insensitive)
+static int strfield(const char* f,const char* s) {
+ int r=0;
+ while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; }
+ if (*s==0)
+ return r;
+ else
+ return 0;
+}
+static int strcmpnocase(char* a,char* b) {
+ while(*a) {
+ int cmp = hichar(*a) - hichar(*b);
+ if (cmp != 0)
+ return cmp;
+ a++;
+ b++;
+ }
+ return 0;
+}
+
+#ifdef _WIN32
+#define strcasecmp(a,b) stricmp(a,b)
+#define strncasecmp(a,b,n) strnicmp(a,b,n)
+#endif
+
+#define strfield2(f,s) ( (strlen(f)!=strlen(s)) ? 0 : (strfield(f,s)) )
+
+// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
+#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
+#define is_hypertext_mime__(a) \
+ ( (strfield2((a),"text/html")!=0)\
+ || (strfield2((a),"application/x-javascript")!=0) \
+ || (strfield2((a),"text/css")!=0) \
+ /*|| (strfield2((a),"text/vnd.wap.wml")!=0)*/ \
+ || (strfield2((a),"image/svg+xml")!=0) \
+ || (strfield2((a),"image/svg-xml")!=0) \
+ /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\
+ || (strfield2((a),"application/x-authorware-map")!=0) \
+ )
+#define may_be_hypertext_mime__(a) \
+ (\
+ (strfield2((a),"audio/x-pn-realaudio")!=0) \
+ || (strfield2((a),"audio/x-mpegurl")!=0) \
+ )
+
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
+// check if (mime, file) is hypertext
+static int is_hypertext_mime(const char* mime, const char* file) {
+ if (is_hypertext_mime__(mime))
+ return 1;
+ if (may_unknown(mime)) {
+ char guessed[256];
+ guessed[0] = '\0';
+ guess_httptype(guessed, file);
+ return is_hypertext_mime__(guessed);
+ }
+ return 0;
+}
+
+// check if (mime, file) might be "false" hypertext
+static int may_be_hypertext_mime(const char* mime, const char* file) {
+ if (may_be_hypertext_mime__(mime))
+ return 1;
+ if (file != NULL && file[0] != '\0' && may_unknown(mime)) {
+ char guessed[256];
+ guessed[0] = '\0';
+ guess_httptype(guessed, file);
+ return may_be_hypertext_mime__(guessed);
+ }
+ return 0;
+}
+
+// compare (mime, file) with reference
+static int compare_mime(const char* mime, const char* file, const char* reference) {
+ if (is_hypertext_mime__(mime) || may_be_hypertext_mime__(mime))
+ return strfield2(mime, reference);
+ if (file != NULL && file[0] != '\0' && may_unknown(mime)) {
+ char guessed[256];
+ guessed[0] = '\0';
+ guess_httptype(guessed, file);
+ return strfield2(guessed, reference);
+ }
+ return 0;
+}
+
+#endif
+
+#ifdef _WIN32_WCE_XXC
+extern char cwd[MAX_PATH+1];
+static char *getcwd_ce(char *buffer, int maxlen)
+{
+ TCHAR fileUnc[MAX_PATH+1];
+ char* plast;
+
+ if(cwd[0] == 0)
+ {
+ GetModuleFileName(NULL, fileUnc, MAX_PATH);
+ WideCharToMultiByte(CP_ACP, 0, fileUnc, -1, cwd, MAX_PATH, NULL, NULL);
+ plast = strrchr(cwd, '\\');
+ if(plast)
+ *plast = 0;
+ /* Special trick to keep start menu clean... */
+ if(_stricmp(cwd, "\\windows\\start menu") == 0)
+ strcpy(cwd, "\\Apps");
+ }
+ if(buffer)
+ strncpy(buffer, cwd, maxlen);
+ return cwd;
+}
+#undef getcwd
+#define getcwd getcwd_ce
+#endif
+
#endif
diff --git a/src/htsmd5.c b/src/htsmd5.c
index 47242d8..92aec5e 100644
--- a/src/htsmd5.c
+++ b/src/htsmd5.c
@@ -39,6 +39,9 @@ Please visit our Website: http://www.httrack.com
/* Modified 2000 by Xavier Roche for domd5mem */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htsmd5.h"
#include "md5.h"
#include <string.h>
@@ -48,12 +51,25 @@ int domd5mem(unsigned char * buf, int len,
unsigned char * digest, int asAscii) {
int endian = 1;
unsigned char bindigest[16];
+#if 1
+//#ifndef _WIN32_WCE
MD5_CTX ctx;
MD5Init(&ctx, * ( (char*) &endian));
MD5Update(&ctx, buf, len);
MD5Final(bindigest, &ctx);
-
+#else
+ /* Broken md5.. temporary hack */
+ int i;
+ memset(bindigest, 0, 16);
+ if (len > 0) {
+ for(i = 0 ; i < len + 16 ; i++) {
+ bindigest[i % 16] ^= ( buf[i % len] + i + len );
+ bindigest[(i - 1) % 16] ^= bindigest[ ( i + buf[i % len]*buf[(i-1) % len] ) % 16];
+ }
+ }
+#endif
+
if (!asAscii) {
memcpy(digest, bindigest, 16);
} else {
@@ -70,7 +86,8 @@ int domd5mem(unsigned char * buf, int len,
}
unsigned long int md5sum32(char* buff) {
- char digest[16];
- domd5mem(buff,strlen(buff),digest,0);
- return *( (long int*)(char*)digest );
+ unsigned char md5digest[16];
+ unsigned char* md5digest_ = md5digest;
+ domd5mem(buff,strlen(buff),md5digest,0);
+ return *( (long int*)(char*)md5digest );
}
diff --git a/src/htsmd5.h b/src/htsmd5.h
index 84148bd..3e3b00c 100644
--- a/src/htsmd5.h
+++ b/src/htsmd5.h
@@ -42,9 +42,12 @@ Please visit our Website: http://www.httrack.com
#ifndef HTSMD5_DEFH
#define HTSMD5_DEFH
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int domd5mem(unsigned char * buf, int len,
unsigned char * digest, int asAscii);
unsigned long int md5sum32(char* buff);
+#endif
#endif
diff --git a/src/htsmodules.c b/src/htsmodules.c
index 27ab855..3299c41 100644
--- a/src/htsmodules.c
+++ b/src/htsmodules.c
@@ -35,21 +35,20 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-#ifndef _WIN32
-#if HTS_DLOPEN
-#include <dlfcn.h>
-#endif
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
#include "htsglobal.h"
#include "htsmodules.h"
#include "htsopt.h"
extern int fspc(FILE* fp,char* type);
+#ifndef _WIN32
+#if HTS_DLOPEN
+#include <dlfcn.h>
+#endif
+#endif
+
/* >>> Put all modules definitions here */
#include "htszlib.h"
#include "htsbase.h"
@@ -71,9 +70,11 @@ t_hts_detect_swf hts_detect_swf = NULL;
t_hts_parse_swf hts_parse_swf = NULL;
int gz_is_available = 0;
+#if 0
t_gzopen gzopen = NULL;
t_gzread gzread = NULL;
t_gzclose gzclose = NULL;
+#endif
int SSL_is_available = 0;
t_SSL_shutdown SSL_shutdown = NULL;
@@ -108,6 +109,7 @@ void abortLog__fnc(char* msg, char* file, int line) {
FILE* fp = fopen("CRASH.TXT", "wb");
if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb");
if (!fp) fp = fopen("C:\\CRASH.TXT", "wb");
+ if (!fp) fp = fopen("CRASH.TXT", "wb");
if (fp) {
fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '%s', line %d\r\n", file, line);
fprintf(fp, "Reason:\r\n%s\r\n", msg);
@@ -144,17 +146,60 @@ int hts_parse_externals(htsmoduleStruct* str) {
return -1;
}
-/* NOTE: handled NOT closed */
-void* getFunctionPtr(char* file_, char* fncname) {
- char file[1024];
+static void addCallback(htscallbacks* chain, void* moduleHandle, htscallbacksfncptr exitFnc) {
+ while(chain->next != NULL) {
+ chain = chain->next;
+ }
+ chain->next = calloct(1, sizeof(htscallbacks));
+ assertf(chain->next != NULL);
+ chain = chain->next;
+ memset(chain, 0, sizeof(*chain));
+ chain->exitFnc = exitFnc;
+ chain->moduleHandle = moduleHandle;
+}
+
+void clearCallbacks(htscallbacks* chain_);
+void clearCallbacks(htscallbacks* chain_) {
+ htscallbacks* chain;
+ chain = chain_;
+ while(chain != NULL) {
+ if (chain->exitFnc != NULL) {
+ (void) chain->exitFnc(); /* result ignored */
+ chain->exitFnc = NULL;
+ }
+ chain = chain->next;
+ }
+ chain = chain_;
+ while(chain != NULL) {
+ if (chain->moduleHandle != NULL) {
+#ifdef _WIN32
+ FreeLibrary(chain->moduleHandle);
+#else
+ dlclose(chain->moduleHandle);
+#endif
+ }
+ chain = chain->next;
+ }
+ chain = chain_->next; // Don't free the block #0
+ while(chain != NULL) {
+ htscallbacks* nextchain = chain->next;
+ freet(chain);
+ chain = nextchain;
+ }
+ chain_->next = NULL; // Empty
+}
+
+void* getFunctionPtr(httrackp* opt, char* file_, char* fncname);
+void* getFunctionPtr(httrackp* opt, char* file_, char* fncname) {
+ char BIGSTK file[1024];
void* handle;
void* userfunction = NULL;
strcpybuff(file, file_);
#ifdef _WIN32
- handle = LoadLibrary(file);
+ handle = LoadLibraryA((char*)file);
if (handle == NULL) {
strcatbuff(file, ".dll");
- handle = LoadLibrary(file);
+ handle = LoadLibraryA((char*)file);
}
#else
handle = dlopen(file, RTLD_LAZY);
@@ -164,13 +209,61 @@ void* getFunctionPtr(char* file_, char* fncname) {
}
#endif
if (handle) {
- userfunction = (void*) DynamicGet(handle, fncname);
+ /* Thanks to Lars Clausen for the "wrapper-init" patch */
+ /* If given arguments, call "<wrappername>_init" */
+ char BIGSTK tmpName[1024];
+ char *comma;
+ if ((comma = strchr(fncname, ',')) != NULL) { /* empty arg */
+ *comma++ = '\0';
+ }
+
+ /* speficic plug init */
+ {
+ t_htsWrapperPlugInit initfunction;
+ sprintf(tmpName, "%s_init", fncname);
+ initfunction = (t_htsWrapperPlugInit)DynamicGet(handle, (char*)tmpName);
+ if (initfunction != NULL) {
+ int result = (int) initfunction(comma);
+ if (!result) {
+ if (userfunction == NULL) {
+#ifdef _WIN32
+ FreeLibrary(handle);
+#else
+ dlclose(handle);
+#endif
+ }
+ return NULL;
+ }
+ }
+ }
+ /* wrapper_init() */
+ {
+ t_htsWrapperInit initfunction = (t_htsWrapperInit)DynamicGet(handle, (char*)"wrapper_init");
+ if (initfunction != NULL) {
+ if (! initfunction(fncname, comma)) {
+ if (userfunction == NULL) {
+#ifdef _WIN32
+ FreeLibrary(handle);
+#else
+ dlclose(handle);
+#endif
+ }
+ return NULL;
+ }
+ }
+ }
+ /* the function itself */
+ userfunction = (void*) DynamicGet(handle, (char*)fncname);
if (userfunction == NULL) {
#ifdef _WIN32
FreeLibrary(handle);
#else
dlclose(handle);
#endif
+ } else {
+ /* optional exit wrapper */
+ t_htsWrapperExit exitFnc = (t_htsWrapperExit) DynamicGet(handle, (char*)"wrapper_exit");
+ addCallback(&opt->state.callbacks, handle, exitFnc); // exitFnc can be null
}
}
return userfunction;
@@ -183,7 +276,10 @@ void htspe_init() {
/* >>> Put all module initializations here */
+
/* Zlib */
+ gz_is_available = 1;
+ /*
#if HTS_DLOPEN
{
void* handle;
@@ -202,13 +298,14 @@ void htspe_init() {
}
}
#endif
+ */
/* OpenSSL */
#if HTS_DLOPEN
{
void* handle;
#ifdef _WIN32
- handle = LoadLibrary("ssleay32");
+ handle = LoadLibraryA((char*)"ssleay32");
#else
/* We are compatible with 0.9.6/7 and potentially above */
handle = dlopen("libssl.so.0.9.7", RTLD_LAZY);
@@ -221,27 +318,27 @@ void htspe_init() {
}
#endif
if (handle) {
- SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, "SSL_shutdown");
- SSL_free = (t_SSL_free) DynamicGet(handle, "SSL_free");
- SSL_new = (t_SSL_new) DynamicGet(handle, "SSL_new");
- SSL_clear = (t_SSL_clear) DynamicGet(handle, "SSL_clear");
- SSL_set_fd = (t_SSL_set_fd) DynamicGet(handle, "SSL_set_fd");
- SSL_set_connect_state = (t_SSL_set_connect_state) DynamicGet(handle, "SSL_set_connect_state");
- SSL_connect = (t_SSL_connect) DynamicGet(handle, "SSL_connect");
- SSL_get_error = (t_SSL_get_error) DynamicGet(handle, "SSL_get_error");
- SSL_write = (t_SSL_write) DynamicGet(handle, "SSL_write");
- SSL_read = (t_SSL_read) DynamicGet(handle, "SSL_read");
- SSL_library_init = (t_SSL_library_init) DynamicGet(handle, "SSL_library_init");
- ERR_load_SSL_strings = (t_ERR_load_SSL_strings) DynamicGet(handle, "ERR_load_SSL_strings");
- SSLv23_client_method = (t_SSLv23_client_method) DynamicGet(handle, "SSLv23_client_method");
- SSL_CTX_new = (t_SSL_CTX_new) DynamicGet(handle, "SSL_CTX_new");
- SSL_load_error_strings = (t_SSL_load_error_strings) DynamicGet(handle, "SSL_load_error_strings");
- SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, "SSL_CTX_ctrl");
+ SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, (char*)"SSL_shutdown");
+ SSL_free = (t_SSL_free) DynamicGet(handle, (char*)"SSL_free");
+ SSL_new = (t_SSL_new) DynamicGet(handle, (char*)"SSL_new");
+ SSL_clear = (t_SSL_clear) DynamicGet(handle, (char*)"SSL_clear");
+ SSL_set_fd = (t_SSL_set_fd) DynamicGet(handle, (char*)"SSL_set_fd");
+ SSL_set_connect_state = (t_SSL_set_connect_state) DynamicGet(handle, (char*)"SSL_set_connect_state");
+ SSL_connect = (t_SSL_connect) DynamicGet(handle, (char*)"SSL_connect");
+ SSL_get_error = (t_SSL_get_error) DynamicGet(handle, (char*)"SSL_get_error");
+ SSL_write = (t_SSL_write) DynamicGet(handle, (char*)"SSL_write");
+ SSL_read = (t_SSL_read) DynamicGet(handle, (char*)"SSL_read");
+ SSL_library_init = (t_SSL_library_init) DynamicGet(handle, (char*)"SSL_library_init");
+ ERR_load_SSL_strings = (t_ERR_load_SSL_strings) DynamicGet(handle, (char*)"ERR_load_SSL_strings");
+ SSLv23_client_method = (t_SSLv23_client_method) DynamicGet(handle, (char*)"SSLv23_client_method");
+ SSL_CTX_new = (t_SSL_CTX_new) DynamicGet(handle, (char*)"SSL_CTX_new");
+ SSL_load_error_strings = (t_SSL_load_error_strings) DynamicGet(handle, (char*)"SSL_load_error_strings");
+ SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, (char*)"SSL_CTX_ctrl");
#ifdef _WIN32
- handle = LoadLibrary("libeay32");
+ handle = LoadLibraryA((char*)"libeay32");
#endif
- ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, "ERR_load_crypto_strings");
- ERR_error_string = (t_ERR_error_string) DynamicGet(handle, "ERR_error_string");
+ ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, (char*)"ERR_load_crypto_strings");
+ ERR_error_string = (t_ERR_error_string) DynamicGet(handle, (char*)"ERR_error_string");
if (SSL_shutdown && SSL_free && SSL_CTX_ctrl && SSL_new && SSL_clear &&
SSL_set_fd && SSL_set_connect_state && SSL_connect && SSL_get_error && SSL_write
@@ -262,7 +359,7 @@ void htspe_init() {
#if HTS_DLOPEN
{
#ifdef _WIN32
- void* handle = LoadLibrary("htsswf");
+ void* handle = LoadLibraryA((char*)"htsswf");
#else
void* handle = dlopen("libhtsswf.so.1", RTLD_LAZY);
#endif
@@ -300,6 +397,7 @@ static void htspe_log(htsmoduleStruct* str, char* msg) {
}
}
+HTSEXT_API const char* hts_is_available(void);
HTSEXT_API const char* hts_is_available(void) {
return WHAT_is_available;
}
diff --git a/src/htsmodules.h b/src/htsmodules.h
index 7d1154b..5d2b989 100644
--- a/src/htsmodules.h
+++ b/src/htsmodules.h
@@ -98,14 +98,22 @@ struct htsmoduleStruct {
};
+/* Used to wrap module initialization */
+/* return 1 if init was ok */
+typedef int (*t_htsWrapperInit)(char *fn, char *args);
+typedef int (*t_htsWrapperExit)(void);
+typedef int (*t_htsWrapperPlugInit)(char *args);
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
extern void htspe_init(void);
extern int hts_parse_externals(htsmoduleStruct* str);
-extern void* getFunctionPtr(char* file, char* fncname);
extern int gz_is_available;
extern int swf_is_available;
extern int SSL_is_available;
extern int V6_is_available;
extern char WHAT_is_available[64];
+#endif
#endif
diff --git a/src/htsname.c b/src/htsname.c
index 56fa6a6..8af2062 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -35,14 +35,15 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htsname.h"
/* specific definitions */
#include "htsbase.h"
#include "htstools.h"
#include "htsmd5.h"
-#include <stdio.h>
-#include <stdlib.h>
#include <ctype.h>
/* END specific definitions */
@@ -51,7 +52,7 @@ Please visit our Website: http://www.httrack.com
#define ADD_STANDARD_PATH \
{ /* ajout nom */\
- char buff[HTS_URLMAXSIZE*2];\
+ char BIGSTK buff[HTS_URLMAXSIZE*2];\
buff[0]='\0';\
strncatbuff(buff,start_pos,(int) (nom_pos - start_pos));\
url_savename_addstr(save,buff);\
@@ -59,7 +60,7 @@ Please visit our Website: http://www.httrack.com
#define ADD_STANDARD_NAME(shortname) \
{ /* ajout nom */\
- char buff[HTS_URLMAXSIZE*2];\
+ char BIGSTK buff[HTS_URLMAXSIZE*2];\
standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\
url_savename_addstr(save,buff);\
}
@@ -78,13 +79,38 @@ static const char *hts_tbdev[] =
};
+#define URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET() do { \
+ int prev = _hts_in_html_parsing; \
+ while(back_pluggable_sockets_strict(back, back_max, opt) <= 0) { \
+ _hts_in_html_parsing = 6; \
+ /* Wait .. */ \
+ back_wait(back,back_max,opt,cache,0); \
+ /* Transfer rate */ \
+ engine_stats(); \
+ /* Refresh various stats */ \
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max); \
+ HTS_STAT.stat_errors=fspc(NULL,"error"); \
+ HTS_STAT.stat_warnings=fspc(NULL,"warning"); \
+ HTS_STAT.stat_infos=fspc(NULL,"info"); \
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); \
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); \
+ /* Check */ \
+ if (!hts_htmlcheck_loop(back,back_max,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \
+ return -1; \
+ } \
+ } \
+ _hts_in_html_parsing = prev; \
+} while(0)
+
// forme le nom du fichier à sauver (save) à partir de fil et adr
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe) {
- char newfil[HTS_URLMAXSIZE*2]; /* ="" */
- /*char normadr_[HTS_URLMAXSIZE*2];*/
- char normfil_[HTS_URLMAXSIZE*2];
+ char BIGSTK newfil[HTS_URLMAXSIZE*2]; /* ="" */
+ /*char BIGSTK normadr_[HTS_URLMAXSIZE*2];*/
+ char BIGSTK normadr_[HTS_URLMAXSIZE*2], normfil_[HTS_URLMAXSIZE*2];
+ int protocol = 0;
+ static const char* protocol_str[] = {"http", "https", "ftp", "file", "unknown"};
char* normadr;
char* normfil;
char* fil;
@@ -100,11 +126,11 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
/* 8-3 ? */
switch(opt->savename_83) {
- case 1:
+ case 1: // 8-3
max_char=8;
break;
- case 2:
- max_char=30;
+ case 2: // Level 2 File names may be up to 31 characters.
+ max_char=31;
break;
default:
max_char=8;
@@ -130,13 +156,33 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// www-42.foo.com -> foo.com
// foo.com/bar//foobar -> foo.com/bar/foobar
if (opt->urlhack) {
- // copy of adr (withiotu protocol), used for lookups (see urlhack)
- normadr=jump_normalized(adr);
+ // copy of adr (without protocol), used for lookups (see urlhack)
+ normadr=adr_normalized(adr, normadr_);
normfil=fil_normalized(fil,normfil_);
+ } else {
+ if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder
+ char* pos = strchr(adr_complete, ':');
+ if (pos != NULL) {
+ normadr_[0] = '\0';
+ strncatbuff(normadr_, adr_complete, (int)(pos - adr_complete));
+ strcatbuff(normadr_, "://");
+ strcatbuff(normadr_, normadr);
+ normadr=normadr_;
+ }
+ }
}
// à afficher sans ftp://
print_adr=jump_protocol(adr);
+ if (strfield(adr_complete, "https:")) {
+ protocol = 1;
+ } else if (strfield(adr_complete, "ftp:")) {
+ protocol = 2;
+ } else if (strfield(adr_complete, "file:")) {
+ protocol = 3;
+ } else {
+ protocol = 0;
+ }
// court-circuit pour lien primaire
if (strnotempty(adr)==0) {
@@ -199,7 +245,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// chercher sans / ou avec / dans former
{
- char fil_complete_patche[HTS_URLMAXSIZE*2];
+ char BIGSTK fil_complete_patche[HTS_URLMAXSIZE*2];
strcpybuff(fil_complete_patche,normfil);
// Version avec ou sans /
if (fil_complete_patche[strlen(fil_complete_patche)-1]=='/')
@@ -254,30 +300,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
// décoder %
strcpybuff(fil,unescape_http(fil));
- /*
- {
- char tempo[HTS_URLMAXSIZE*2];
- int i,j=0;
- for (i=0;i<(int) strlen(fil);i++) {
- if (fil[i]=='%') {
- i++;
- tempo[j++]=(char) ehex(fil+i);
- i++; // sauter 2 caractères finalement
- } else
- tempo[j++]=fil[i];
- }
- tempo[j++]='\0';
- strcpybuff(fil,tempo);
- }
- */
-
-
+
/* replace shtml to html.. */
switch (ishtml(fil)) { /* .html,.shtml,.. */
case 1:
if (
- (strcmp(get_ext(fil),"html") != 0)
- && (strcmp(get_ext(fil),"htm") != 0)
+ (strfield2(get_ext(fil),"html") == 0)
+ && (strfield2(get_ext(fil),"htm") == 0)
) {
strcpybuff(ext,"html");
ext_chg=1;
@@ -286,7 +315,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
case 0:
if (!strnotempty(ext)) {
if (is_userknowntype(get_ext(fil))) { // mime known by user
- char mime[1024];
+ char BIGSTK mime[1024];
mime[0]=ext[0]='\0';
get_userhttptype(0,mime,get_ext(fil));
if (strnotempty(mime)) {
@@ -330,13 +359,35 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
}
//
+ } else if (is_userknowntype(fil)) { /* PATCH BY BRIAN SCHRÖDER.
+ Lookup mimetype not only by extension,
+ but also by filename */
+ /* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type,
+ that is, ".html" */
+ char BIGSTK mime[1024];
+ mime[0]=ext[0]='\0';
+ get_userhttptype(0, mime, fil);
+ if (strnotempty(mime)) {
+ give_mimext(ext, mime);
+ if (strnotempty(ext)) {
+ ext_chg=1;
+ }
+ }
} else { // test imposible dans le cache, faire une requête
//
#if HTS_ANALYSTE
int hihp=_hts_in_html_parsing;
#endif
int has_been_moved=0;
- char curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2];
+ char BIGSTK curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2];
+
+ /* Ensure we don't use too many sockets by using a "testing" one
+ If we have only 1 simultaneous connection authorized, wait for pending download
+ Wait for an available slot
+ */
+ URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET();
+
+ /* Rock'in */
curr_adr[0]=curr_fil[0]='\0';
#if HTS_ANALYSTE
_hts_in_html_parsing=2; // test
@@ -383,7 +434,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
return -1;
} else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing)
- back_delete(opt,back,b); // cancel test
+ back_delete(opt,cache,back,b); // cancel test
stop_looping = 1;
}
}
@@ -399,7 +450,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
) { // agh moved.. un tit tour de plus
if ((petits_tours<5) && (former_adr) && (former_fil)) { // on va pas tourner en rond non plus!
if ((int) strnotempty(back[b].r.location)) { // location existe!
- char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ char BIGSTK mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
mov_url[0]=mov_adr[0]=mov_fil[0]='\0';
//
strcpybuff(mov_url,back[b].r.location); // copier URL
@@ -424,11 +475,12 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
robots_wizard* robots = (robots_wizard*) opt->robotsptr;
if (hts_acceptlink(opt,ptr,lien_tot,liens,
mov_adr,mov_fil,
+ NULL, NULL,
&set_prio_to,
NULL) == 1)
{ /* forbidden */
has_been_moved = 1;
- back_maydelete(opt,back,b); // ok
+ back_maydelete(opt,cache,back,b); // ok
strcpybuff(curr_adr,mov_adr);
strcpybuff(curr_fil,mov_fil);
mov_url[0]='\0';
@@ -439,7 +491,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// ftp: stop!
if (strfield(mov_url,"ftp://")) { // ftp, ok on arrête
has_been_moved = 1;
- back_maydelete(opt,back,b); // ok
+ back_maydelete(opt,cache,back,b); // ok
strcpybuff(curr_adr,mov_adr);
strcpybuff(curr_fil,mov_fil);
stop_looping = 1;
@@ -455,6 +507,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
}
// Ajouter
+ URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET();
if (back_add(back,back_max,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1,NULL)!=-1) { // OK
if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
@@ -462,7 +515,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
// libérer emplacement backing actuel et attendre le prochain
- back_maydelete(opt,back,b);
+ back_maydelete(opt,cache,back,b);
strcpybuff(curr_adr,mov_adr);
strcpybuff(curr_fil,mov_fil);
b=back_index(back,back_max,curr_adr,curr_fil,methode);
@@ -507,7 +560,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
test_flush;
}
- back_delete(opt,back,b);
+ back_delete(opt,cache,back,b);
return -1; // ERREUR (404 par exemple)
*/
}
@@ -531,7 +584,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// FIN Si non déplacé, forcer type?
// libérer emplacement backing
- back_maydelete(opt,back,b);
+ back_maydelete(opt,cache,back,b);
// --- --- ---
// oops, a été déplacé.. on recalcule en récursif (osons!)
@@ -787,7 +840,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (!short_ver) { // Noms longs
strncatbuff(b,fil,(int) (nom_pos - fil) - 1);
} else {
- char pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2];
+ char BIGSTK pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2];
pth[0]=n83[0]='\0';
//
strncatbuff(pth,fil,(int) (nom_pos - fil) - 1);
@@ -816,7 +869,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
*b='\0';
{
char digest[32+2];
- char buff[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
digest[0]=buff[0]='\0';
strcpybuff(buff,adr);
strcatbuff(buff,fil_complete);
@@ -831,6 +884,11 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
strncatbuff(b,url_md5(fil_complete),(tok == 'Q')?32:4);
b+=strlen(b); // pointer à la fin
break;
+ case 'r': case 'R': // protocol
+ *b='\0';
+ strcatbuff(b, protocol_str[protocol]);
+ b+=strlen(b); // pointer à la fin
+ break;
}
} else
*b++=*a++;
@@ -1044,7 +1102,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
{
char* a=jump_identification(save);
if (a!=save) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
char *b;
tempo[0]='\0';
strcpybuff(tempo,"[");
@@ -1061,7 +1119,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// éviter les / au début (cause: N100)
if (save[0]=='/') {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
strcpybuff(tempo,save+1);
strcpybuff(save,tempo);
}
@@ -1110,7 +1168,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
case '/':
case '.':
{
- char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0';
+ char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0';
strncatbuff(tempo,save,(int) (a - save) + strlen(hts_tbdev[i]));
strcatbuff(tempo,"_");
strcatbuff(tempo,a+strlen(hts_tbdev[i]));
@@ -1123,15 +1181,57 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
i++;
}
}
+ /* Strip ending . or ' ' forbidden on windoz */
+ {
+ int len;
+ char* a=save;
+ while((a=strstr(a,"./"))) {
+ *a = '_';
+ }
+ a=save;
+ while((a=strstr(a," /"))) {
+ *a = '_';
+ }
+ len = (int) strlen(save);
+ if (len > 0 && ( save[len - 1] == '.' || save[len - 1] == ' ') ) {
+ save[len - 1] = '_';
+ }
+ }
#endif
// conversion 8-3 .. y compris pour les répertoires
if (opt->savename_83) {
- char n83[HTS_URLMAXSIZE*2];
+ char BIGSTK n83[HTS_URLMAXSIZE*2];
long_to_83(opt->savename_83,n83,save);
strcpybuff(save,n83);
}
+ // enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson)
+ // Level 1 File names are restricted to 8 characters with a 3 character extension,
+ // upper case letters, numbers and underscore; maximum depth of directories is 8.
+ // This will be our "DOS mode"
+ // L2: 31 characters
+ // A-Z,0-9,_
+ if (opt->savename_83 > 0) {
+ char *a, *last;
+ for(last = save + strlen(save) - 1 ; last != save && *last != '/' && *last != '\\' && *last != '.' ; last--);
+ if (*last != '.') {
+ last = NULL;
+ }
+ for(a = save ; *a != '\0' ; a++) {
+ if (*a >= 'a' && *a <= 'z') {
+ *a -= 'a' - 'A';
+ }
+ else if (*a == '.') {
+ if (a != last) {
+ *a = '_';
+ }
+ }
+ else if ( ! ( (*a >= 'A' && *a <= 'Z') || (*a >= '0' && *a <= '9') || *a == '_' || *a == '/' || *a == '\\') ) {
+ *a = '_';
+ }
+ }
+ }
/* ensure that there is no ../ (potential vulnerability) */
fil_simplifie(save);
@@ -1148,7 +1248,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// chemin primaire éventuel A METTRE AVANT
if (strnotempty(opt->path_html)) {
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
strcpybuff(tempo,opt->path_html);
strcatbuff(tempo,save);
strcpybuff(save,tempo);
@@ -1189,17 +1289,22 @@ printf("%cParse: %d",13,i);
#if HTS_CASSE
if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0))
#else
- if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
+ if ((strfield2(liens[i]->adr, normadr)) && (strfield2(liens[i]->fil, normfil)))
+ //if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
#endif
{ // ok c'est le même lien, adresse déja définie
- //printf("Ok, %s\n",save);
- //i=lien_tot; // sortir
+ /* Take the existing name not to screw up with cAsE sEnSiTiViTy of Linux/Unix */
+ if (strcmp(liens[i]->sav, save) != 0) {
+ strcpybuff(save, liens[i]->sav);
+ }
i=0;
#if DEBUG_SAVENAME
printf("\nOK ALREADY DEFINED\n",13,i);
#endif
+#if HTS_CASSE
+#endif
} else { // utilisé par un AUTRE, changer de nom
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
char* a=save+strlen(save)-1;
char* b;
int n=2;
@@ -1310,7 +1415,7 @@ char* url_md5(char* fil_complete) {
a=strchr(fil_complete,'?');
if (a) {
if (strlen(a)) {
- char buff[HTS_URLMAXSIZE*2];
+ char BIGSTK buff[HTS_URLMAXSIZE*2];
a++;
digest[0]=buff[0]='\0';
strcatbuff(buff,a); /* query string MD5 */
diff --git a/src/htsname.h b/src/htsname.h
index aae5f99..61ed1de 100644
--- a/src/htsname.h
+++ b/src/htsname.h
@@ -42,9 +42,12 @@ Please visit our Website: http://www.httrack.com
#include "htscore.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe);
void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver);
void url_savename_addstr(char* d,char* s);
char* url_md5(char* fil_complete);
+#endif
#endif
diff --git a/src/htsnet.h b/src/htsnet.h
index dbdbcc6..7b7cc1a 100644
--- a/src/htsnet.h
+++ b/src/htsnet.h
@@ -45,7 +45,9 @@ Please visit our Website: http://www.httrack.com
#include <ctype.h>
#if HTS_WIN
// pour read
+#ifndef _WIN32_WCE
#include <io.h>
+#endif
// pour FindFirstFile
#include <winbase.h>
#else
@@ -71,12 +73,6 @@ Please visit our Website: http://www.httrack.com
#ifndef HTS_DO_NOT_REDEFINE_in_addr_t
typedef unsigned long in_addr_t;
#endif
-#undef min
-#undef max
-#undef Sleep
-#define min(a,b) ((a)>(b)?(b):(a))
-#define max(a,b) ((a)>(b)?(a):(b))
-#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
#endif
/*
@@ -87,7 +83,7 @@ Please visit our Website: http://www.httrack.com
/* Ipv4 structures */
typedef struct in_addr INaddr;
/* This should handle all cases */
-typedef struct {
+typedef struct SOCaddr {
union {
struct sockaddr_in in;
struct sockaddr sa;
@@ -155,7 +151,7 @@ strcpy(namebuf, dot); \
/* Ipv4 structures */
typedef struct in6_addr INaddr;
/* This should handle all cases */
-typedef struct {
+typedef struct SOCaddr {
union {
struct sockaddr_in6 in6;
struct sockaddr_in in;
@@ -236,7 +232,7 @@ getnameinfo((struct sockaddr *)&(ss), sslen, \
#endif
/* Buffer structure to copy various hostent structures */
-typedef struct {
+typedef struct t_fullhostent {
t_hostent hp;
char* list[2];
char addr[HTS_MAXADDRLEN]; /* various struct sockaddr structures */
diff --git a/src/htsnostatic.c b/src/htsnostatic.c
index eff6184..22e7d7a 100644
--- a/src/htsnostatic.c
+++ b/src/htsnostatic.c
@@ -35,13 +35,16 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htsnostatic.h"
#include "htsbase.h"
#include "htshash.h"
#include "htsinthash.h"
-typedef struct {
+typedef struct hts_varhash {
/*
inthash values;
*/
diff --git a/src/htsnostatic.h b/src/htsnostatic.h
index f24f0ad..3bf4ec9 100644
--- a/src/htsnostatic.h
+++ b/src/htsnostatic.h
@@ -53,21 +53,12 @@ Please visit our Website: http://www.httrack.com
#ifndef HTSNOSTATIC_DEFH
#define HTSNOSTATIC_DEFH
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
#include "htscore.h"
#include "htsthread.h"
-/*
-#if USE_PTHREAD
-#if HTS_WIN
-#undef HTS_REENTRANT
-#else
-#define HTS_REENTRANT
-#endif
-#else
-#undef HTS_REENTRANT
-#endif
-*/
-
#define HTS_VAR_MAIN_HASH 127
/*
@@ -157,7 +148,61 @@ void hts_destroyvar_key(void* adr);
&cKey
*/
-#if HTS_WIN
+#ifdef _WIN32
+
+#ifdef _WIN32_WCE
+
+/* Windows CE: static only */
+#define NOSTATIC_XRESERVE(name, type, nelt) do { \
+ /*__declspec( thread )*/ static type thValue[nelt]; \
+ /* __declspec( thread ) */ int static initValue = 0; \
+ name = thValue; \
+ if (!initValue) { \
+ initValue = 1; \
+ memset(&thValue, 0, sizeof(thValue)); \
+ } \
+} while(0)
+
+#elif 1
+
+/* New Windows version: TLS */
+/* Suggested by daan at zwif.com to be more gentle with LoadLibrary (04/2004)
+See http://msdn.microsoft.com/library/en-us/vccore/html/_core_rules_and_limitations_for_tls.asp
+And especially the "DLL declares any nonlocal data or object as __declspec( thread )" section
+*/
+#define NOSTATIC_XRESERVE(name,type,nelt) do { \
+ static DWORD tlsIndex = 0; \
+ static int initValue = 0; \
+ if (initValue == 0) \
+ { \
+ if (!hts_maylockvar()) { \
+ abortLog("unable to lock mutex (not initialized?!)"); \
+ abort(); \
+ } \
+ hts_lockvar(); \
+ if (initValue == 0) { \
+ tlsIndex = TlsAlloc(); \
+ if (tlsIndex == 0xFFFFFFFF) { \
+ abortLog("unable to allocate thread local storage (TLS) for variable!"); \
+ abort(); \
+ } \
+ initValue = 1; \
+ } \
+ hts_unlockvar(); \
+ } \
+ name = (type*)TlsGetValue(tlsIndex); \
+ if (name == NULL) { \
+ name = (type*)malloc(sizeof(type)*nelt); \
+ if (name == NULL) { \
+ abortLog("unable to allocate memory for variable!"); \
+ abort(); \
+ } \
+ memset(name, 0, sizeof(type)*nelt); \
+ TlsSetValue(tlsIndex, name); \
+ } \
+} while(0)
+
+#else
/* Windows: handled by the compiler */
#define NOSTATIC_XRESERVE(name, type, nelt) do { \
@@ -170,6 +215,8 @@ void hts_destroyvar_key(void* adr);
} \
} while(0)
+#endif
+
#else
/* Un*x : slightly more complex, we have to create a thread-key */
@@ -227,3 +274,5 @@ else { \
#endif
#endif
+
+#endif
diff --git a/src/htsopt.h b/src/htsopt.h
index 77910b6..3328ce0 100644
--- a/src/htsopt.h
+++ b/src/htsopt.h
@@ -44,7 +44,7 @@ Please visit our Website: http://www.httrack.com
#include "htsbauth.h"
// structure proxy
-typedef struct {
+typedef struct t_proxy {
int active;
char name[1024];
int port;
@@ -52,14 +52,24 @@ typedef struct {
} t_proxy;
/* Structure utile pour copier en bloc les paramètres */
-typedef struct {
+typedef struct htsfilters {
char*** filters;
int* filptr;
//int* filter_max;
} htsfilters;
+/* User callbacks chain */
+typedef int (*htscallbacksfncptr)(void);
+typedef struct htscallbacks htscallbacks;
+struct htscallbacks {
+ char callbackName[128];
+ void* moduleHandle;
+ htscallbacksfncptr exitFnc;
+ htscallbacks * next;
+};
+
/* Structure état du miroir */
-typedef struct {
+typedef struct htsoptstate {
int stop;
int exit_xh;
int back_add_stats;
@@ -67,11 +77,13 @@ typedef struct {
int mimehtml_created;
char mimemid[256];
FILE* mimefp;
+ /* */
+ htscallbacks callbacks;
} htsoptstate;
// paramètres httrack (options)
-typedef struct {
+typedef struct httrackp {
int wizard; // wizard aucun/grand/petit
int flush; // fflush sur les fichiers log
int travel; // type de déplacements (same domain etc)
@@ -96,7 +108,7 @@ typedef struct {
int rateout; // nombre d'octets minium pour le transfert
int maxtime; // temps max en secondes
int maxrate; // taux de transfert max
- int maxconn; // nombre max de connexions/s
+ float maxconn; // nombre max de connexions/s
int waittime; // démarrage programmé
int cache; // génération d'un cache
//int aff_progress; // barre de progression
@@ -108,6 +120,8 @@ typedef struct {
int mimehtml; // MIME-html
int user_agent_send; // user agent (ex: httrack/1.0 [sun])
char user_agent[128];
+ char referer[256]; // referer
+ char from[256]; // from
char path_log[1024]; // chemin pour cache et log
char path_html[1024]; // chemin pour miroir
char path_bin[1024]; // chemin pour templates
@@ -135,6 +149,7 @@ typedef struct {
int urlhack; // force "url normalization" to avoid loops
int tolerant; // accepter content-length incorrect
int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple)
+ int parsedebug; // débugger parser (debug!)
int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur
int verbosedisplay; // animation textuelle
char footer[256]; // ligne d'infos
@@ -156,6 +171,7 @@ typedef struct {
//
int quiet; // poser des questions autres que wizard?
int keyboard; // vérifier stdin
+ int bypass_limits; // bypass built-in limits
//
int is_update; // c'est une update (afficher "File updated...")
int dir_topindex; // reconstruire top index par la suite
@@ -164,7 +180,7 @@ typedef struct {
} httrackp;
// stats for httrack
-typedef struct {
+typedef struct hts_stat_struct {
LLint HTS_TOTAL_RECV; // flux entrant reçu
LLint stat_bytes; // octets écrits sur disque
// int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
@@ -193,6 +209,9 @@ typedef struct {
LLint nb; // données transférées actuellement (estimation)
//
LLint rate;
+ //
+ TStamp last_connect; // last connect() call
+ TStamp last_request; // last request issued
} hts_stat_struct;
diff --git a/src/htsparse.c b/src/htsparse.c
index 3d35252..79cc1cc 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -37,12 +37,12 @@ Please visit our Website: http://www.httrack.com
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#ifndef _WIN32_WCE
#include <fcntl.h>
+#endif
#include <ctype.h>
/* File defs */
@@ -92,7 +92,7 @@ Please visit our Website: http://www.httrack.com
abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \
exit(1); \
} \
- } \
+} \
ht_len+=A;
#define HT_ADD_ADR \
if ((opt->getmode & 1) && (ptr>0)) { \
@@ -103,11 +103,35 @@ Please visit our Website: http://www.httrack.com
}
#define HT_ADD(A) \
if ((opt->getmode & 1) && (ptr>0)) { \
- int i=strlen(A),j=ht_len; \
- if (i) { \
- HT_ADD_CHK(i) \
- memcpy(ht_buff+j, A, i); \
- ht_buff[j+i]='\0'; \
+ int i_=strlen(A),j_=ht_len; \
+ if (i_) { \
+ HT_ADD_CHK(i_) \
+ memcpy(ht_buff+j_, A, i_); \
+ ht_buff[j_+i_]='\0'; \
+ } }
+#define HT_ADD_HTMLESCAPED(A) \
+ if ((opt->getmode & 1) && (ptr>0)) { \
+ int i_, j_; \
+ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
+ escape_for_html_print(A, tempo_); \
+ i_=strlen(tempo_); \
+ j_=ht_len; \
+ if (i_) { \
+ HT_ADD_CHK(i_) \
+ memcpy(ht_buff+j_, tempo_, i_); \
+ ht_buff[j_+i_]='\0'; \
+ } }
+#define HT_ADD_HTMLESCAPED_FULL(A) \
+ if ((opt->getmode & 1) && (ptr>0)) { \
+ int i_, j_; \
+ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \
+ escape_for_html_print_full(A, tempo_); \
+ i_=strlen(tempo_); \
+ j_=ht_len; \
+ if (i_) { \
+ HT_ADD_CHK(i_) \
+ memcpy(ht_buff+j_, tempo_, i_); \
+ ht_buff[j_+i_]='\0'; \
} }
#define HT_ADD_START \
int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \
@@ -126,12 +150,11 @@ Please visit our Website: http://www.httrack.com
#define HT_ADD_END { \
int ok=0;\
if (ht_buff) { \
- INTsys file_len=(INTsys) strlen(ht_buff);\
char digest[32+2];\
digest[0]='\0';\
- domd5mem(ht_buff,file_len,digest,1);\
- if (fsize(fconv(savename))==file_len) { \
- int mlen;\
+ domd5mem(ht_buff,ht_len,digest,1);\
+ if (fsize(fconv(savename))==ht_len) { \
+ int mlen = 0;\
char* mbuff;\
cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
if (mlen) mbuff[mlen]='\0';\
@@ -148,8 +171,8 @@ Please visit our Website: http://www.httrack.com
if (!ok) { \
fp=filecreate(savename); \
if (fp) { \
- if (file_len>0) {\
- if ((INTsys)fwrite(ht_buff,1,file_len,fp) != file_len) { \
+ if (ht_len>0) {\
+ if ((INTsys)fwrite(ht_buff,1,ht_len,fp) != ht_len) { \
int fcheck;\
if ((fcheck=check_fatal_io_errno())) {\
opt->state.exit_xh=-1;\
@@ -186,32 +209,32 @@ Please visit our Website: http://www.httrack.com
filenote(savename,NULL); \
}\
if (cache->ndx)\
- cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
+ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
} \
freet(ht_buff); ht_buff=NULL; \
- }
+}
#define HT_ADD_FOP
// COPY IN HTSCORE.C
#define HT_INDEX_END do { \
-if (!makeindex_done) { \
-if (makeindex_fp) { \
- char tempo[1024]; \
+ if (!makeindex_done) { \
+ if (makeindex_fp) { \
+ char BIGSTK tempo[1024]; \
if (makeindex_links == 1) { \
- sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
+ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
} else \
- tempo[0]='\0'; \
+ tempo[0]='\0'; \
fprintf(makeindex_fp,template_footer, \
- "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
- tempo \
- ); \
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
+ tempo \
+ ); \
fflush(makeindex_fp); \
fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
makeindex_fp=NULL; \
usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \
-} \
-} \
-makeindex_done=1; /* ok c'est fait */ \
+ } \
+ } \
+ makeindex_done=1; /* ok c'est fait */ \
} while(0)
// Enregistrement d'un lien:
@@ -228,50 +251,50 @@ makeindex_done=1; /* ok c'est fait */ \
// COPIE DE HTSCORE.C
#define liens_record(A,F,S,FA,FF) { \
-int notecode=0; \
-int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
+ int notecode=0; \
+ int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
adr_len=strlen(A),\
fil_len=strlen(F),\
sav_len=strlen(S),\
cod_len=0,\
former_adr_len=strlen(FA),\
former_fil_len=strlen(FF); \
-if (former_adr_len>0) {\
+ if (former_adr_len>0) {\
former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
-} else former_adr_len=former_fil_len=0;\
-if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
-cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
-adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
-if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
-lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
-lien_size=add_tab_alloc; \
-if (lien_buffer!=NULL) { \
-liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
-liens[lien_tot]->firstblock=1; \
-} \
-} else { \
-liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
-liens[lien_tot]->firstblock=0; \
-} \
-if (liens[lien_tot]!=NULL) { \
-liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
-liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
-liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
-liens[lien_tot]->cod=NULL; \
-if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
-if (former_adr_len>0) {\
-liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
-liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
-strcpybuff(liens[lien_tot]->former_adr,FA); \
-strcpybuff(liens[lien_tot]->former_fil,FF); \
-}\
-strcpybuff(liens[lien_tot]->adr,A); \
-strcpybuff(liens[lien_tot]->fil,F); \
-strcpybuff(liens[lien_tot]->sav,S); \
-liens_record_sav_len(liens[lien_tot]); \
-hash_write(hashptr,lien_tot,opt->urlhack); \
-} \
+ } else former_adr_len=former_fil_len=0;\
+ if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
+ cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
+ adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+ if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
+ lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
+ lien_size=add_tab_alloc; \
+ if (lien_buffer!=NULL) { \
+ liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+ liens[lien_tot]->firstblock=1; \
+ } \
+ } else { \
+ liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+ liens[lien_tot]->firstblock=0; \
+ } \
+ if (liens[lien_tot]!=NULL) { \
+ liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
+ liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
+ liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
+ liens[lien_tot]->cod=NULL; \
+ if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
+ if (former_adr_len>0) {\
+ liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
+ liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
+ strcpybuff(liens[lien_tot]->former_adr,FA); \
+ strcpybuff(liens[lien_tot]->former_fil,FF); \
+ }\
+ strcpybuff(liens[lien_tot]->adr,A); \
+ strcpybuff(liens[lien_tot]->fil,F); \
+ strcpybuff(liens[lien_tot]->sav,S); \
+ liens_record_sav_len(liens[lien_tot]); \
+ hash_write(hashptr,lien_tot,opt->urlhack); \
+ } \
}
#define ENGINE_LOAD_CONTEXT() \
@@ -314,32 +337,67 @@ hash_write(hashptr,lien_tot,opt->urlhack); \
#define ENGINE_SAVE_CONTEXT() \
/* Apply changes */ \
- * ( (int*) (str->lien_tot_) ) = lien_tot; \
- * ( (int*) (str->ptr_) ) = ptr; \
- * ( (int*) (str->lien_size_) ) = lien_size; \
- * ( (char**) (str->lien_buffer_) ) = lien_buffer; \
- /* */ \
- * stre->error_ = error; \
- * stre->store_errpage_ = store_errpage; \
- * stre->lien_max_ = lien_max; \
- /* */ \
- *stre->makeindex_done_ = makeindex_done; \
- *stre->makeindex_fp_ = makeindex_fp; \
- *stre->makeindex_links_ = makeindex_links; \
- /* */ \
- *stre->stat_fragment_ = stat_fragment
+ * ( (int*) (str->lien_tot_) ) = lien_tot; \
+ * ( (int*) (str->ptr_) ) = ptr; \
+ * ( (int*) (str->lien_size_) ) = lien_size; \
+ * ( (char**) (str->lien_buffer_) ) = lien_buffer; \
+ /* */ \
+ * stre->error_ = error; \
+ * stre->store_errpage_ = store_errpage; \
+ * stre->lien_max_ = lien_max; \
+ /* */ \
+ *stre->makeindex_done_ = makeindex_done; \
+ *stre->makeindex_fp_ = makeindex_fp; \
+ *stre->makeindex_links_ = makeindex_links; \
+ /* */ \
+ *stre->stat_fragment_ = stat_fragment
#define _FILTERS (*opt->filters.filters)
#define _FILTERS_PTR (opt->filters.filptr)
#define _ROBOTS ((robots_wizard*)opt->robotsptr)
+/* Apply current *adr character for the script automate */
+#define AUTOMATE_LOOKUP_CURRENT_ADR() do { \
+ if (inscript) { \
+ int new_state_pos; \
+ new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \
+ if (new_state_pos < 0) { \
+ new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \
+ } \
+ assertf(new_state_pos >= 0); \
+ assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state)); \
+ inscript_state_pos=new_state_pos; \
+ } \
+} while(0)
+
+/* Increment current pointer to 'steps' characters, modifying automate if necessary */
+#define INCREMENT_CURRENT_ADR(steps) do { \
+ int steps__ = (steps); \
+ while(steps__ > 0) { \
+ adr++; \
+ AUTOMATE_LOOKUP_CURRENT_ADR(); \
+ steps__ --; \
+ } \
+} while(0)
+
/* Main parser */
int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
/* Load engine variables */
ENGINE_LOAD_CONTEXT();
-
+
#if HTS_ANALYSTE
+ {
+ char* cAddr = r->adr;
+ int cSize = (int) r->size;
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil);
+ }
+ if (hts_htmlcheck_preprocess(&cAddr, &cSize, urladr, urlfil) == 1) {
+ r->adr = cAddr;
+ r->size = cSize;
+ }
+ }
if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) {
#endif
FILE* fp=NULL; // fichier écrit localement
@@ -348,8 +406,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if ( (opt->debug>1) && (opt->log!=NULL) ) {
fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush;
}
-
-
+
+
// Indexing!
#if HTS_MAKE_KEYWORD_INDEX
if (opt->kindex) {
@@ -364,13 +422,13 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
#endif
-
+
// Now, parsing
if ((opt->getmode & 1) && (ptr>0)) { // récupérer les html sur disque
// créer le fichier html local
HT_ADD_FOP; // écrire peu à peu le fichier
}
-
+
if (!error) {
int detect_title=0; // détection du title
int back_add_stats = opt->state.back_add_stats;
@@ -410,10 +468,11 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
char* intag_start=adr;
char* intag_startattr=NULL;
int intag_start_valid=0;
+ int intag_ctype=0;
//
int parent_relative=0; // the parent is the base path (.js, .css..)
HT_ADD_START; // débuter
-
+
/* Initialize script automate for comments, quotes.. */
memset(inscript_state, 0xff, sizeof(inscript_state));
inscript_state[INSCRIPT_START][INSCRIPT_DEFAULT]=INSCRIPT_START; /* by default, stay in START */
@@ -444,12 +503,12 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
/* statistics */
if ((opt->getmode & 1) && (ptr>0)) {
- /*
- HTS_STAT.stat_files++;
- HTS_STAT.stat_bytes+=r->size;
+ /*
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r->size;
*/
}
-
+
/* Primary list or URLs */
if (ptr == 0) {
intag=1;
@@ -457,28 +516,46 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
/* Check is the file is a .js file */
else if (
- (strfield2(r->contenttype,"application/x-javascript")!=0)
- || (strfield2(r->contenttype,"text/css")!=0)
+ (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0)
+ || (compare_mime(r->contenttype, str->url_file, "text/css")!=0)
) { /* JavaScript js file */
- inscript=1;
- inscript_name="script";
- intag=1; // because après <script> on y est .. - pas utile
- intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush;
- }
- // all links must be checked against parent, not this link
- if (liens[ptr]->precedent != 0) {
- parent_relative=1;
+ inscript=1;
+ if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
+ inscript_name="script";
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush;
+ }
+ // for javascript only
+ if (compare_mime(r->contenttype, str->url_file, "application/x-javascript") != 0) {
+ // all links must be checked against parent, not this link
+ if (liens[ptr]->precedent != 0) {
+ parent_relative=1;
+ }
+ }
}
- }
- /* Or a real audio */
- else if (strfield2(r->contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */
- inscript=intag=1;
+ /* Or a real audio */
+ else if (compare_mime(r->contenttype, str->url_file, "audio/x-pn-realaudio")!=0) { /* realaudio link file */
+ inscript=intag=0;
inscript_name="media";
intag_start_valid=0;
- in_media="RAM"; // real media!
- }
+ in_media="LNK"; // real media! -> links
+ }
+ /* Or a m3u playlist */
+ else if (compare_mime(r->contenttype, str->url_file, "audio/x-mpegurl")!=0) { /* mp3 link file */
+ inscript=intag=0;
+ inscript_name="media";
+ intag_start_valid=0;
+ in_media="LNK"; // m3u! -> links
+ }
+ else if (compare_mime(r->contenttype, str->url_file, "application/x-authorware-map")!=0) { /* macromedia aam file */
+ inscript=intag=0;
+ inscript_name="media";
+ intag_start_valid=0;
+ in_media="AAM"; // aam
+ }
+
// Detect UTF8 format
if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) {
no_esc_utf=1;
@@ -487,8 +564,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
// Hack to prevent any problems with ram files of other files
* ( r->adr + r->size ) = '\0';
-
-
+
+
// ------------------------------------------------------------
// analyser ce qu'il y a en mémoire (fichier html)
// on scanne les balises
@@ -505,9 +582,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
int valid_p=0; // force to take p even if == 0
int ending_p='\0'; // ending quote?
int archivetag_p=0; // avoid multiple-archives with commas
+ int unquoted_script=0;
INSCRIPT inscript_state_pos_prev=inscript_state_pos;
error=0;
-
+
/* Hack to avoid NULL char problems with C syntax */
/* Yes, some bogus HTML pages can embed null chars
and therefore can not be properly handled if this hack is not done
@@ -516,9 +594,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if ( ((int) (adr - r->adr)) < r->size)
*adr=' ';
}
-
-
-
+
+
+
/*
index.html built here
*/
@@ -546,24 +624,24 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
} else
p=0;
-
+
if (p) { // ok center
if (makeindex_fp==NULL) {
verif_backblue(opt,opt->path_html); // générer gif
makeindex_fp=filecreate(fconcat(opt->path_html,"index.html"));
if (makeindex_fp!=NULL) {
-
+
// Header
fprintf(makeindex_fp,template_header,
"<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
);
-
+
} else makeindex_done=-1; // fait, erreur
}
-
+
if (makeindex_fp!=NULL) {
- char tempo[HTS_URLMAXSIZE*2];
- char s[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK s[HTS_URLMAXSIZE*2];
char* a=NULL;
char* b=NULL;
s[0]='\0';
@@ -594,19 +672,19 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
strncpy(s,a,b-a+1);
*(s+(b-a)+1)='\0';
}
-
+
// Body
fprintf(makeindex_fp,template_body,
tempo,
s
);
-
+
}
}
}
}
}
-
+
} else if (liens[ptr]->depth<opt->depth) { // on a sauté level1+1 et level1
HT_INDEX_END;
}
@@ -616,137 +694,141 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
/*
end -- index.html built here
*/
-
-
-
+
+
+
/* Parse */
if (
(*adr=='<') /* No starting tag */
&& (!inscript) /* Not in (java)script */
&& (!incomment) /* Not in comment (<!--) */
+ && (!in_media) /* Not in media */
) {
- intag=1;
- //parseall_incomment=0;
- //inquote=0; // effacer quote
- intag_start=adr; intag_start_valid=1;
- codebase[0]='\0'; // effacer éventuel codebase
-
- if (opt->getmode & 1) { // sauver html
- p=strfield(adr,"</html");
- if (p==0) p=strfield(adr,"<head>");
- // if (p==0) p=strfield(adr,"<doctype");
- if (p) {
- char* eol="\n";
- if (strchr(r->adr,'\r'))
- eol="\r\n";
- if (strnotempty(opt->footer)) {
- char tempo[1024+HTS_URLMAXSIZE*2];
- char gmttime[256];
- tempo[0]='\0';
- time_gmt_rfc822(gmttime);
- strcatbuff(tempo,eol);
- sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","","");
- strcatbuff(tempo,eol);
- //fwrite(tempo,1,strlen(tempo),fp);
- HT_ADD(tempo);
- }
- if (r->charset[0]) {
- HT_ADD("<meta http-equiv=\"content-type\" content=\"text/html;charset=");
- HT_ADD(r->charset);
- HT_ADD("\">");
- HT_ADD(eol);
- }
- }
- }
-
- // éliminer les <!-- (commentaires) : intag dévalidé
- if (*(adr+1)=='!')
- if (*(adr+2)=='-')
- if (*(adr+3)=='-') {
- intag=0;
- incomment=1;
- intag_start_valid=0;
+ intag=1;
+ intag_ctype=0;
+ //parseall_incomment=0;
+ //inquote=0; // effacer quote
+ intag_start=adr; intag_start_valid=1;
+ codebase[0]='\0'; // effacer éventuel codebase
+
+ if (opt->getmode & 1) { // sauver html
+ p=strfield(adr,"</html");
+ if (p==0) p=strfield(adr,"<head>");
+ // if (p==0) p=strfield(adr,"<doctype");
+ if (p) {
+ char* eol="\n";
+ if (strchr(r->adr,'\r'))
+ eol="\r\n";
+ if (strnotempty(opt->footer)) {
+ char BIGSTK tempo[1024+HTS_URLMAXSIZE*2];
+ char gmttime[256];
+ tempo[0]='\0';
+ time_gmt_rfc822(gmttime);
+ strcatbuff(tempo,eol);
+ sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","","");
+ strcatbuff(tempo,eol);
+ //fwrite(tempo,1,strlen(tempo),fp);
+ HT_ADD(tempo);
+ if (r->charset[0]) {
+ HT_ADD("<!-- Added by HTTrack --><meta http-equiv=\"content-type\" content=\"text/html;charset=");
+ HT_ADD(r->charset);
+ HT_ADD("\"><!-- /Added by HTTrack -->");
+ HT_ADD(eol);
+ }
+ }
}
-
- }
+ }
+
+ // éliminer les <!-- (commentaires) : intag dévalidé
+ if (*(adr+1)=='!')
+ if (*(adr+2)=='-')
+ if (*(adr+3)=='-') {
+ intag=0;
+ incomment=1;
+ intag_start_valid=0;
+ }
+
+ }
else if (
(*adr=='>') /* ending tag */
- && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */
+ && ( (!inscript && !in_media) || (inscript_tag) ) /* and in tag (or in script) */
) {
- if (inscript_tag) {
- inscript_tag=inscript=0;
- intag=0;
- incomment=0;
- intag_start_valid=0;
- } else if (!incomment) {
- intag=0; //inquote=0;
-
- // entrée dans du javascript?
- // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
- //if (!inscript) { // sinon on est dans un obj.write("..
- if ((intag_start_valid) &&
- (
- check_tag(intag_start,"script")
- ||
- check_tag(intag_start,"style")
- )
- ) {
- char* a=intag_start; // <
- // ** while(is_realspace(*(--a)));
- if (*a=='<') { // sûr que c'est un tag?
- if (check_tag(intag_start,"script"))
- inscript_name="script";
- else
- inscript_name="style";
- inscript=1;
- inscript_state_pos=INSCRIPT_START;
- intag=1; // because après <script> on y est .. - pas utile
- intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
- }
- }
- } else { /* end of comment? */
- // vérifier fermeture correcte
- if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
+ if (inscript_tag) {
+ inscript_tag=inscript=0;
intag=0;
incomment=0;
intag_start_valid=0;
- }
-#if GT_ENDS_COMMENT
- /* wrong comment ending */
- else {
- /* check if correct ending does not exists
- <!-- foo > example <!-- bar > is sometimes accepted by browsers
- when no --> is used somewhere else.. darn those browsers are dirty
- */
- if (!strstr(adr,"-->")) {
+ if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
+ } else if (!incomment) {
+ intag=0; //inquote=0;
+
+ // entrée dans du javascript?
+ // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
+ //if (!inscript) { // sinon on est dans un obj.write("..
+ if ((intag_start_valid) &&
+ (
+ check_tag(intag_start,"script")
+ ||
+ check_tag(intag_start,"style")
+ )
+ ) {
+ char* a=intag_start; // <
+ // ** while(is_realspace(*(--a)));
+ if (*a=='<') { // sûr que c'est un tag?
+ if (check_tag(intag_start,"script"))
+ inscript_name="script";
+ else
+ inscript_name="style";
+ inscript=1;
+ inscript_state_pos=INSCRIPT_START;
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
+ }
+ }
+ } else { /* end of comment? */
+ // vérifier fermeture correcte
+ if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
intag=0;
incomment=0;
intag_start_valid=0;
}
- }
+#if GT_ENDS_COMMENT
+ /* wrong comment ending */
+ else {
+ /* check if correct ending does not exists
+ <!-- foo > example <!-- bar > is sometimes accepted by browsers
+ when no --> is used somewhere else.. darn those browsers are dirty
+ */
+ if (!strstr(adr,"-->")) {
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ }
+ }
#endif
+ }
+ //}
}
+ //else if (*adr==34) {
+ // inquote=(inquote?0:1);
//}
- }
- //else if (*adr==34) {
- // inquote=(inquote?0:1);
- //}
- else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag
+ else if (intag || inscript || in_media) { // nous sommes dans un tag/commentaire, tester si on recoit un tag
int p_type=0;
int p_nocatch=0;
int p_searchMETAURL=0; // chercher ..URL=<url>
int add_class=0; // ajouter .class
int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>"
char* p_flush=NULL;
-
-
+
+
// ------------------------------------------------------------
// parsing évolé
// ------------------------------------------------------------
- if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester..
-
-
- /* caractère de terminaison pour "miniparsing" javascript=.. ?
+ if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester..
+
+
+ /* caractère de terminaison pour "miniparsing" javascript=.. ?
(ex: <a href="javascript:()" action="foo"> ) */
if (inscript_tag) {
if (inscript_tag_lastc) {
@@ -754,39 +836,58 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
/* sortir */
inscript_tag=inscript=0;
incomment=0;
+ if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
}
}
}
/* automate */
- if (inscript) {
- int new_state_pos;
- new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr];
- if (new_state_pos < 0) {
- new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT];
- }
- assertf(new_state_pos >= 0);
- assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state));
- inscript_state_pos=new_state_pos;
- }
-
-
+ AUTOMATE_LOOKUP_CURRENT_ADR();
+
+
// Note:
// Certaines pages ne respectent pas le html
// notamment les guillements ne sont pas fixés
// Nous sommes dans un tag, donc on peut faire un test plus
// large pour pouvoi prendre en compte ces particularités
-
+
// à vérifier: ACTION, CODEBASE, VRML
-
+
if (in_media) {
- if (strcmp(in_media,"RAM")==0) { // real media
+ if (strcmp(in_media,"LNK")==0) { // real media
p=0;
valid_p=1;
}
+ else if (strcmp(in_media,"AAM")==0) { // AAM
+ if (is_space((unsigned char)adr[0]) && ! is_space((unsigned char)adr[1])) {
+ char* a = adr + 1;
+ int n = 0;
+ int ok = 0;
+ int dot = 0;
+ while(n < HTS_URLMAXSIZE/2 && a[n] != '\0' &&
+ ( ! is_space((unsigned char)a[n]) || ! ( ok = 1) )
+ ) {
+ if (a[n] == '.') {
+ dot = n;
+ }
+ n++;
+ }
+ if (ok && dot > 0) {
+ char BIGSTK tmp[HTS_URLMAXSIZE/2 + 2];
+ tmp[0] = '\0';
+ strncat(tmp, a + dot + 1, n - dot - 1);
+ if (is_knowntype(tmp) || ishtml_ext(tmp) != -1) {
+ adr++;
+ p = 0;
+ valid_p = 1;
+ unquoted_script = 1;
+ }
+ }
+ }
+ }
} else if (ptr>0) { /* pas première page 0 (primary) */
p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p
-
+
// ------------------------------
// détection d'écriture JavaScript.
// osons les obj.write et les obj.href=.. ! osons!
@@ -823,31 +924,31 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder
a++; // sauter =
}
-
- }*/
-
+
+ }*/
+
// on a un truc du genre instruction"code généré" dont on parse le code
if (check_this_fking_line) {
while(is_realspace(*a)) a++;
if ((*a=='\'') || (*a=='"')) { // départ de '' ou ""
char *b;
- int ex=0;
scriptgen_q=*a; // quote
b=a+1; // départ de la chaîne
// vérifier forme ("code") et pas ("code"+var), ingérable
do {
- a++; // caractère suivant
if (*a==scriptgen_q && *(a-1)!='\\') // quote non slash
- ex=1; // sortie
- if (*a==10 && *(a-1) != '\\' /* LF and no continue (\) character */
- && ( *(a-1) != '\r' || *(a-2) != '\\' ) ) /* and not CRLF and no .. */
- ex=1;
- } while(!ex);
+ break; // sortie
+ else if (*a==10 && *(a-1) != '\\' /* LF and no continue (\) character */
+ && ( *(a-1) != '\r' || *(a-2) != '\\' ) ) /* and not CRLF and no .. */
+ break;
+ else
+ a++; // caractère suivant
+ } while((a-b) < HTS_URLMAXSIZE / 2);
if (*a==scriptgen_q) { // fin du quote
a++;
while(is_realspace(*a)) a++;
if (*a==must_be_terminated) { // parenthèse fermante: ("..")
-
+
// bon, on doit parser une ligne javascript
// 1) si check.. ==1 alors c'est un nom de fichier direct, donc
// on fixe p sur le saut nécessaire pour atteindre le nom du fichier
@@ -864,7 +965,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
inscriptgen=1; // SCRIPTGEN actif
adr=b; // jump
}
-
+
if ((opt->debug>1) && (opt->log!=NULL)) {
char str[512];
str[0]='\0';
@@ -872,19 +973,19 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
}
}
-
+
}
-
+
}
-
-
+
+
}
}
}
// fin detection code générant javascript vers html
// ------------------------------
-
-
+
+
// analyse proprement dite, A HREF=.. etc..
if (!p) {
// si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
@@ -898,7 +999,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
p_type=2; // c'est un chemin
}
}
-
+
/* Tags supplémentaires à vérifier (<img src=..> etc) */
if (p==0) {
int i=0;
@@ -913,7 +1014,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
i++;
}
}
-
+
/* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */
if (p==0) {
int i=0;
@@ -922,7 +1023,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
i++;
}
}
-
+
/* Tags supplémentaires à vérifier : URL=.. */
if (p==0) {
int i=0;
@@ -930,10 +1031,40 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
p=rech_tageq(adr,hts_detectURL[i]);
i++;
}
- if (p)
- p_searchMETAURL=1;
+ if (p) {
+ if (intag_ctype == 1) {
+ p = 0;
+#if 0
+ //if ((pos=rech_tageq(adr, "content"))) {
+ char temp[256];
+ char* token = NULL;
+ int len = rech_endtoken(adr + pos, &token);
+ if (len > 0 && len < sizeof(temp) - 2) {
+ char* chpos;
+ temp[0] = '\0';
+ strncat(temp, token, len);
+ if ((chpos = strstr(temp, "charset"))
+ &&
+ (chpos = strchr(chpos, '='))
+ ) {
+ chpos++;
+ while(is_space(*chpos)) chpod++;
+ chpos
+ }
+ }
+#endif
+ }
+ // <META HTTP-EQUIV="Refresh" CONTENT="3;URL=http://www.example.com">
+ else if (intag_ctype == 2) {
+ p_searchMETAURL=1;
+ } else {
+ p = 0; /* cancel */
+ }
+ }
+
+
}
-
+
/* Tags supplémentaires à vérifier, mais à ne pas capturer */
if (p==0) {
int i=0;
@@ -944,125 +1075,127 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if (p)
p_nocatch=1; /* ne pas rechercher */
}
-
+
/* Evénements */
if (p==0 &&
! inscript /* we don't want events inside document.write */
) {
- int i=0;
- /* détection onLoad etc */
- while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
- p=rech_tageq(adr,hts_detect_js[i]);
- i++;
- }
- /* non détecté - détecter également les onXxxxx= */
- if (p==0) {
- if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
- p=0;
- while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
- if (p<64) {
- while(is_space(adr[p])) p++;
- if (adr[p]=='=')
- p++;
- else p=0;
- } else p=0;
+ int i=0;
+ /* détection onLoad etc */
+ while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
+ p=rech_tageq(adr,hts_detect_js[i]);
+ i++;
}
+ /* non détecté - détecter également les onXxxxx= */
+ if (p==0) {
+ if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
+ p=0;
+ while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
+ if (p<64) {
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=')
+ p++;
+ else p=0;
+ } else p=0;
+ }
+ }
+ /* OK, événement repéré */
+ if (p) {
+ inscript_tag_lastc=*(adr+p); /* à attendre à la fin */
+ adr+=p+1; /* saut */
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_name="";
+ inscript=inscript_tag=1;
+ inscript_state_pos=INSCRIPT_START;
+ if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
+ }
+ p=0; /* quoi qu'il arrive, ne rien démarrer ici */
}
- /* OK, événement repéré */
- if (p) {
- inscript_tag_lastc=*(adr+p); /* à attendre à la fin */
- adr+=p+1; /* saut */
- /*
- On est désormais dans du code javascript
- */
- inscript_name="";
- inscript=inscript_tag=1;
- inscript_state_pos=INSCRIPT_START;
- }
- p=0; /* quoi qu'il arrive, ne rien démarrer ici */
- }
-
- // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
- if (p==0) {
- p=rech_tageq(adr,"code");
- if (p) {
- if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
- p_type=-1; // juste le nom de fichier+dossier, écire avant codebase
- add_class=1; // ajouter .class au besoin
-
- // vérifier qu'il n'y a pas de codebase APRES
- // sinon on swappe les deux.
- // pas très propre mais c'est ce qu'il y a de plus simple à faire!!
-
- {
- char *a;
- a=adr;
- while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
- if (rech_tageq(a,"codebase")) { // banzai! codebase=
- char* b;
- b=strchr(a,'>');
- if (b) {
- if (((int) (b - adr)) < 1000) { // au total < 1Ko
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strncatbuff(tempo,a,(int) (b - a) );
- strcatbuff( tempo," ");
- strncatbuff(tempo,adr,(int) (a - adr - 1));
- // éventuellement remplire par des espaces pour avoir juste la taille
- while((int) strlen(tempo)<((int) (b - adr)))
- strcatbuff(tempo," ");
- // pas d'erreur?
- if ((int) strlen(tempo) == ((int) (b - adr) )) {
- strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin!
- p=0; // DEVALIDER!!
- p_type=0;
- add_class=0;
+
+ // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
+ if (p==0) {
+ p=rech_tageq(adr,"code");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-1; // juste le nom de fichier+dossier, écire avant codebase
+ add_class=1; // ajouter .class au besoin
+
+ // vérifier qu'il n'y a pas de codebase APRES
+ // sinon on swappe les deux.
+ // pas très propre mais c'est ce qu'il y a de plus simple à faire!!
+
+ {
+ char *a;
+ a=adr;
+ while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
+ if (rech_tageq(a,"codebase")) { // banzai! codebase=
+ char* b;
+ b=strchr(a,'>');
+ if (b) {
+ if (((int) (b - adr)) < 1000) { // au total < 1Ko
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncatbuff(tempo,a,(int) (b - a) );
+ strcatbuff( tempo," ");
+ strncatbuff(tempo,adr,(int) (a - adr - 1));
+ // éventuellement remplire par des espaces pour avoir juste la taille
+ while((int) strlen(tempo)<((int) (b - adr)))
+ strcatbuff(tempo," ");
+ // pas d'erreur?
+ if ((int) strlen(tempo) == ((int) (b - adr) )) {
+ strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin!
+ p=0; // DEVALIDER!!
+ p_type=0;
+ add_class=0;
+ }
}
}
}
}
+
}
-
}
}
- }
-
- // liens à patcher mais pas à charger (ex: codebase)
- if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même
- p=rech_tageq(adr,"codebase");
- if (p) {
- if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
- p_type=-2;
- } else p=-1; // ne plus chercher
+
+ // liens à patcher mais pas à charger (ex: codebase)
+ if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même
+ p=rech_tageq(adr,"codebase");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-2;
+ } else p=-1; // ne plus chercher
+ }
}
- }
-
-
- // Meta tags pour robots
- if (p==0) {
- if (opt->robots) {
- if ((intag_start_valid) && check_tag(intag_start,"meta")) {
- if (rech_tageq(adr,"name")) { // name=robots.txt
- char tempo[1100];
- char* a;
- tempo[0]='\0';
- a=strchr(adr,'>');
+
+
+ // Meta tags pour robots
+ if (p==0) {
+ if (opt->robots) {
+ if ((intag_start_valid) && check_tag(intag_start,"meta")) {
+ if (rech_tageq(adr,"name")) { // name=robots.txt
+ char tempo[1100];
+ char* a;
+ tempo[0]='\0';
+ a=strchr(adr,'>');
#if DEBUG_ROBOTS
- printf("robots.txt meta tag detected\n");
+ printf("robots.txt meta tag detected\n");
#endif
- if (a) {
- if (((int) (a - adr)) < 999 ) {
- strncatbuff(tempo,adr,(int) (a - adr));
- if (strstrcase(tempo,"content")) {
- if (strstrcase(tempo,"robots")) {
- if (strstrcase(tempo,"nofollow")) {
+ if (a) {
+ if (((int) (a - adr)) < 999 ) {
+ strncatbuff(tempo,adr,(int) (a - adr));
+ if (strstrcase(tempo,"content")) {
+ if (strstrcase(tempo,"robots")) {
+ if (strstrcase(tempo,"nofollow")) {
#if DEBUG_ROBOTS
- printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
+ printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
#endif
- nofollow=1; // NE PLUS suivre liens dans cette page
- if (opt->errlog) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
- test_flush;
+ nofollow=1; // NE PLUS suivre liens dans cette page
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
+ test_flush;
+ }
}
}
}
@@ -1072,379 +1205,400 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
}
- }
-
- // entrée dans une applet javascript
- /*if (!inscript) { // sinon on est dans un obj.write("..
- if (p==0)
- if (rech_sampletag(adr,"script"))
- if (check_tag(intag_start,"script")) {
- inscript=1;
- }
- }*/
-
- // Ici on procède à une analyse du code javascript pour tenter de récupérer
- // certains fichiers évidents.
- // C'est devenu obligatoire vu le nombre de pages qui intègrent
- // des images réactives par exemple
- }
- } else if (inscript) {
+
+ // charset meta tags
+ if (p==0) {
+ if ((intag_start_valid) && check_tag(intag_start,"meta")) {
+ int pos;
+ // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+ if ((pos=rech_tageq(adr, "http-equiv"))) {
+ const char* token = NULL;
+ int len = rech_endtoken(adr + pos, &token);
+ if (len > 0) {
+ if (strfield(token, "content-type")) {
+ intag_ctype=1;
+ }
+ else if (strfield(token, "refresh")) {
+ intag_ctype=2;
+ }
+ }
+ }
+ }
+ }
+
+ // entrée dans une applet javascript
+ /*if (!inscript) { // sinon on est dans un obj.write("..
+ if (p==0)
+ if (rech_sampletag(adr,"script"))
+ if (check_tag(intag_start,"script")) {
+ inscript=1;
+ }
+ }*/
+
+ // Ici on procède à une analyse du code javascript pour tenter de récupérer
+ // certains fichiers évidents.
+ // C'est devenu obligatoire vu le nombre de pages qui intègrent
+ // des images réactives par exemple
+ }
+ } else if (inscript) {
#if 0
- /* Check // javascript comments */
- if (*adr == 10 || *adr == 13) {
- inscript_check_comments = 1;
- inscript_in_comments = 0;
- }
- else if (inscript_check_comments) {
- if (!is_realspace(*adr)) {
- inscript_check_comments = 0;
- if (adr[0] == '/' && adr[1] == '/') {
- inscript_in_comments = 1;
+ /* Check // javascript comments */
+ if (*adr == 10 || *adr == 13) {
+ inscript_check_comments = 1;
+ inscript_in_comments = 0;
+ }
+ else if (inscript_check_comments) {
+ if (!is_realspace(*adr)) {
+ inscript_check_comments = 0;
+ if (adr[0] == '/' && adr[1] == '/') {
+ inscript_in_comments = 1;
+ }
}
}
- }
#endif
- /* Parse */
- assertf(inscript_name != NULL);
- if (
- (
- (strfield(adr,"/script") && strfield(inscript_name, "script"))
- ||
- (strfield(adr,"/style") && strfield(inscript_name, "style"))
- )
- ) {
- char* a=adr;
- //while(is_realspace(*(--a)));
- while( is_realspace(*a) ) a--;
- a--;
- if (*a=='<') { // sûr que c'est un tag?
- inscript=0;
- }
- } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) {
- /*
- Script Analyzing - different types supported:
- foo="url"
- foo("url") or foo(url)
- foo "url"
- */
- int nc;
- char expected = '='; // caractère attendu après
- char* expected_end = ";";
- int can_avoid_quotes=0;
- char quotes_replacement='\0';
- int ensure_not_mime=0;
- if (inscript_tag)
- expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'"
- nc = strfield(adr,".src"); // nom.src="image";
- if (!nc) nc = strfield(adr,".location"); // document.location="doc"
- if (!nc) nc = strfield(adr,":location"); // javascript:location="doc"
- if (!nc) nc = strfield(adr,".href"); // document.location="doc"
- if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
- expected='('; // parenthèse
- expected_end="),"; // fin: virgule ou parenthèse
- ensure_not_mime=1; //* ensure the url is not a mime type */
- }
- if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
- expected='('; // parenthèse
- expected_end=")"; // fin: parenthèse
- }
- if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
- expected='('; // parenthèse
- expected_end=")"; // fin: parenthèse
- }
- if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1)))
- && *(adr - 1) != '_'
- ) { // url(url)
- expected='('; // parenthèse
- expected_end=")"; // fin: parenthèse
- can_avoid_quotes=1;
- quotes_replacement=')';
- }
- if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
- if (is_space(*(adr+nc))) {
- expected=0; // no char expected
- } else
- nc=0;
- }
- if (nc) {
- char *a;
- a=adr+nc;
- while(is_realspace(*a)) a++;
- if ((*a == expected) || (!expected)) {
- if (expected)
- a++;
- while(is_realspace(*a)) a++;
- if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
- char *b,*c;
- int ndelim=1;
- if ((*a==34) || (*a=='\''))
- a++;
- else
- ndelim=0;
- b=a;
- if (ndelim) {
- while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
+ /* Parse */
+ assertf(inscript_name != NULL);
+ if (
+ *adr == '/' &&
+ (
+ (strfield(adr,"/script") && strfield(inscript_name, "script"))
+ ||
+ (strfield(adr,"/style") && strfield(inscript_name, "style"))
+ )
+ ) {
+ char* a=adr;
+ //while(is_realspace(*(--a)));
+ while( is_realspace(*a) ) a--;
+ a--;
+ if (*a=='<') { // sûr que c'est un tag?
+ inscript=0;
+ if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
+ }
+ } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) {
+ /*
+ Script Analyzing - different types supported:
+ foo="url"
+ foo("url") or foo(url)
+ foo "url"
+ */
+ int nc;
+ char expected = '='; // caractère attendu après
+ char* expected_end = ";";
+ int can_avoid_quotes=0;
+ char quotes_replacement='\0';
+ int ensure_not_mime=0;
+ if (inscript_tag)
+ expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'"
+ nc = strfield(adr,".src"); // nom.src="image";
+ if (!nc) nc = strfield(adr,".location"); // document.location="doc"
+ if (!nc) nc = strfield(adr,":location"); // javascript:location="doc"
+ if (!nc) nc = strfield(adr,".href"); // document.location="doc"
+ if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
+ expected='('; // parenthèse
+ expected_end="),"; // fin: virgule ou parenthèse
+ ensure_not_mime=1; //* ensure the url is not a mime type */
+ }
+ if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1)))
+ && *(adr - 1) != '_'
+ ) { // url(url)
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ can_avoid_quotes=1;
+ quotes_replacement=')';
}
- else {
- while((*b != quotes_replacement) && (*b!='\0')) b++;
+ if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
+ if (is_space(*(adr+nc))) {
+ expected=0; // no char expected
+ } else
+ nc=0;
}
- c=b--; c+=ndelim;
- while(*c==' ') c++;
- if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
- c-=(ndelim+1);
- if ((int) (c - a + 1)) {
- if (ensure_not_mime) {
- int i = 0;
- while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') {
- int p;
- if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') {
- a=NULL;
- }
- i++;
+ if (nc) {
+ char *a;
+ a=adr+nc;
+ while(is_realspace(*a)) a++;
+ if ((*a == expected) || (!expected)) {
+ if (expected)
+ a++;
+ while(is_realspace(*a)) a++;
+ if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
+ char *b,*c;
+ int ndelim=1;
+ if ((*a==34) || (*a=='\''))
+ a++;
+ else
+ ndelim=0;
+ b=a;
+ if (ndelim) {
+ while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
}
- }
- if (a != NULL) {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- char str[512];
- str[0]='\0';
- strncatbuff(str,a,minimum((int) (c - a + 1),32));
- fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush;
+ else {
+ while((*b != quotes_replacement) && (*b!='\0')) b++;
}
- p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
- if (can_avoid_quotes) {
- ending_p=quotes_replacement;
+ c=b--; c+=ndelim;
+ while(*c==' ') c++;
+ if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
+ c-=(ndelim+1);
+ if ((int) (c - a + 1)) {
+ if (ensure_not_mime) {
+ int i = 0;
+ while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') {
+ int p;
+ if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') {
+ a=NULL;
+ }
+ i++;
+ }
+ }
+ if (a != NULL) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncatbuff(str,a,minimum((int) (c - a + 1),32));
+ fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush;
+ }
+ p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
+ if (can_avoid_quotes) {
+ ending_p=quotes_replacement;
+ }
+ }
+ }
}
+
+
}
}
}
-
-
- }
+
}
- }
-
}
}
- }
-
- } else { // ptr == 0
- //p=rech_tageq(adr,"primary"); // lien primaire, yeah
- p=0; // No stupid tag anymore, raw link
- valid_p=1; // Valid even if p==0
- while ((adr[p] == '\r') || (adr[p] == '\n'))
- p++;
- //can_avoid_quotes=1;
- ending_p='\r';
- }
-
- } else if (isspace((unsigned char)*adr)) {
- intag_startattr=adr+1; // attribute in tag (for dirty parsing)
- }
-
-
- // ------------------------------------------------------------
- // dernier recours - parsing "sale" : détection systématique des .gif, etc.
- // risque: générer de faux fichiers parazites
- // fix: ne parse plus dans les commentaires
- // ------------------------------------------------------------
- if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut"
- //int incomment_justquit=0;
- if (!is_realspace(*adr)) {
- int noparse=0;
-
- // Gestion des /* */
+
+ } else { // ptr == 0
+ //p=rech_tageq(adr,"primary"); // lien primaire, yeah
+ p=0; // No stupid tag anymore, raw link
+ valid_p=1; // Valid even if p==0
+ while ((adr[p] == '\r') || (adr[p] == '\n'))
+ p++;
+ //can_avoid_quotes=1;
+ ending_p='\r';
+ }
+
+ } else if (isspace((unsigned char)*adr)) {
+ intag_startattr=adr+1; // attribute in tag (for dirty parsing)
+ }
+
+
+ // ------------------------------------------------------------
+ // dernier recours - parsing "sale" : détection systématique des .gif, etc.
+ // risque: générer de faux fichiers parazites
+ // fix: ne parse plus dans les commentaires
+ // ------------------------------------------------------------
+ if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut"
+ //int incomment_justquit=0;
+ if (!is_realspace(*adr)) {
+ int noparse=0;
+
+ // Gestion des /* */
#if 0
- if (inscript) {
- if (parseall_incomment) {
- if ((*adr=='/') && (*(adr-1)=='*'))
- parseall_incomment=0;
- incomment_justquit=1; // ne pas noter dernier caractère
- } else {
- if ((*adr=='/') && (*(adr+1)=='*'))
- parseall_incomment=1;
- }
- } else
- parseall_incomment=0;
+ if (inscript) {
+ if (parseall_incomment) {
+ if ((*adr=='/') && (*(adr-1)=='*'))
+ parseall_incomment=0;
+ incomment_justquit=1; // ne pas noter dernier caractère
+ } else {
+ if ((*adr=='/') && (*(adr+1)=='*'))
+ parseall_incomment=1;
+ }
+ } else
+ parseall_incomment=0;
#endif
- /* ensure automate state 0 (not in comments, quotes..) */
- if (inscript && (
- inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2
- ) ) {
- noparse=1;
- }
-
- /* vérifier que l'on est pas dans un <!-- --> pur */
- if ( (!intag) && (incomment) && (!inscript))
- noparse=1; /* commentaire */
-
- // recherche d'URLs
- if (!noparse) {
- //if ((!parseall_incomment) && (!noparse)) {
- if (!p) { // non déja trouvé
- if (adr != r->adr) { // >1 caractère
- // scanner les chaines
- if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif'
- if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. (handles comments)
- char *a=adr;
- char stop=*adr; // " ou '
- int count=0;
-
- // sauter caractères
- a++;
- // copier
- while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
-
- // ok chaine terminée par " ou '
- if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
- char c;
- char* aend;
- //
- aend=a; // sauver début
- a++;
- while(is_taborspace(*a)) a++;
- c=*a;
- if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif";
- // le / est pour funct("img.gif" /* URL */);
- char tempo[HTS_URLMAXSIZE*2];
- char type[256];
- int url_ok=0; // url valide?
- tempo[0]='\0'; type[0]='\0';
- //
- strncatbuff(tempo,adr+1,count);
- //
- if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
- int invalid_url=0;
-
- // escape
- unescape_amp(tempo);
-
- // Couper au # ou ? éventuel
- {
- char* a=strchr(tempo,'#');
- if (a)
- *a='\0';
- a=strchr(tempo,'?');
- if (a)
- *a='\0';
- }
-
- // vérifier qu'il n'y a pas de caractères spéciaux
- if (!strnotempty(tempo))
- invalid_url=1;
- else if (strchr(tempo,'*')
- || strchr(tempo,'<')
- || strchr(tempo,'>')
- || strchr(tempo,',') /* list of files ? */
- || strchr(tempo,'\"') /* potential parsing bug */
- || strchr(tempo,'\'') /* potential parsing bug */
- )
- invalid_url=1;
- else if (tempo[0] == '.' && isalnum(tempo[1])) // ".gif"
- invalid_url=1;
-
- /* non invalide? */
- if (!invalid_url) {
- // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
- if (c!='+') { // PAS de plus à la fin
+ /* ensure automate state 0 (not in comments, quotes..) */
+ if (inscript && (
+ inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2
+ ) ) {
+ noparse=1;
+ }
+
+ /* vérifier que l'on est pas dans un <!-- --> pur */
+ if ( (!intag) && (incomment) && (!inscript))
+ noparse=1; /* commentaire */
+
+ // recherche d'URLs
+ if (!noparse) {
+ //if ((!parseall_incomment) && (!noparse)) {
+ if (!p) { // non déja trouvé
+ if (adr != r->adr) { // >1 caractère
+ // scanner les chaines
+ if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif'
+ if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. (handles comments)
+ char *a=adr;
+ char stop=*adr; // " ou '
+ int count=0;
+
+ // sauter caractères
+ a++;
+ // copier
+ while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
+
+ // ok chaine terminée par " ou '
+ if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
+ char c;
+ char* aend;
+ //
+ aend=a; // sauver début
+ a++;
+ while(is_taborspace(*a)) a++;
+ c=*a;
+ if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif";
+ // le / est pour funct("img.gif" /* URL */);
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ char type[256];
+ int url_ok=0; // url valide?
+ tempo[0]='\0'; type[0]='\0';
+ //
+ strncatbuff(tempo,adr+1,count);
+ //
+ if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
+ int invalid_url=0;
+
+ // escape
+ unescape_amp(tempo);
+
+ // Couper au # ou ? éventuel
+ {
+ char* a=strchr(tempo,'#');
+ if (a)
+ *a='\0';
+ a=strchr(tempo,'?');
+ if (a)
+ *a='\0';
+ }
+
+ // vérifier qu'il n'y a pas de caractères spéciaux
+ if (!strnotempty(tempo))
+ invalid_url=1;
+ else if (strchr(tempo,'*')
+ || strchr(tempo,'<')
+ || strchr(tempo,'>')
+ || strchr(tempo,',') /* list of files ? */
+ || strchr(tempo,'\"') /* potential parsing bug */
+ || strchr(tempo,'\'') /* potential parsing bug */
+ )
+ invalid_url=1;
+ else if (tempo[0] == '.' && isalnum(tempo[1])) // ".gif"
+ invalid_url=1;
+
+ /* non invalide? */
+ if (!invalid_url) {
+ // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
+ if (c!='+') { // PAS de plus à la fin
#if 0
- char* a;
+ char* a;
#endif
- // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
- //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème
- if (
- (strfield(tempo,"http:"))
- || (strfield(tempo,"ftp:"))
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème
+ if (
+ (strfield(tempo,"http:"))
+ || (strfield(tempo,"ftp:"))
#if HTS_USEOPENSSL
- || (
- SSL_is_available &&
- (strfield(tempo,"https:"))
- )
+ || (
+ SSL_is_available &&
+ (strfield(tempo,"https:"))
+ )
#endif
- ) // ok pas de problème
- url_ok=1;
- else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok..
- if (inscript) // sinon si pas javascript, méfiance (répertoire style base?)
- url_ok=1;
- }
+ ) // ok pas de problème
+ url_ok=1;
+ else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok..
+ if (inscript) // sinon si pas javascript, méfiance (répertoire style base?)
+ url_ok=1;
+ }
#if 0
- else if ((a=strchr(tempo,'/'))) { // un slash: ok..
- if (inscript) { // sinon si pas javascript, méfiance (style "text/css")
- if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css')
- if (!strchr(tempo,' ')) // avoid spaces (too dangerous for comments)
+ else if ((a=strchr(tempo,'/'))) { // un slash: ok..
+ if (inscript) { // sinon si pas javascript, méfiance (style "text/css")
+ if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css')
+ if (!strchr(tempo,' ')) // avoid spaces (too dangerous for comments)
+ url_ok=1;
+ }
+ }
+#endif
+ }
+ // Prendre si extension reconnue
+ if (!url_ok) {
+ get_httptype(type,tempo,0);
+ if (strnotempty(type)) // type reconnu!
url_ok=1;
+ else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp..
+ url_ok=1;
+ // MAIS pas les foobar@aol.com !!
+ if (strchr(tempo,'@'))
+ url_ok=0;
}
- }
-#endif
- }
- // Prendre si extension reconnue
- if (!url_ok) {
- get_httptype(type,tempo,0);
- if (strnotempty(type)) // type reconnu!
- url_ok=1;
- else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp..
- url_ok=1;
- // MAIS pas les foobar@aol.com !!
- if (strchr(tempo,'@'))
- url_ok=0;
- }
- //
- // Ok, cela pourrait être une URL
- if (url_ok) {
-
- // Check if not fodbidden tag (id,name..)
- if (intag_start_valid) {
- if (intag_start)
- if (intag_startattr)
- if (intag)
- if (!inscript)
- if (!incomment) {
- int i=0,nop=0;
- while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
- nop=rech_tageq(intag_startattr,hts_nodetect[i]);
- i++;
- }
- // Forbidden tag
- if (nop) {
- url_ok=0;
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
+ //
+ // Ok, cela pourrait être une URL
+ if (url_ok) {
+
+ // Check if not fodbidden tag (id,name..)
+ if (intag_start_valid) {
+ if (intag_start)
+ if (intag_startattr)
+ if (intag)
+ if (!inscript)
+ if (!incomment) {
+ int i=0,nop=0;
+ while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
+ nop=rech_tageq(intag_startattr,hts_nodetect[i]);
+ i++;
+ }
+ // Forbidden tag
+ if (nop) {
+ url_ok=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
+ }
+ }
}
- }
- }
- }
-
-
- // Accepter URL, on la traitera comme une URL normale!!
- if (url_ok) {
- valid_p = 1;
- p = 0;
+ }
+
+
+ // Accepter URL, on la traitera comme une URL normale!!
+ if (url_ok) {
+ valid_p = 1;
+ p = 0;
+ }
+
+ }
}
-
}
}
}
- }
}
}
}
+ } // p == 0
+
+ } // not in comment
+
+ // plus dans un commentaire
+ if ( inscript_state_pos == INSCRIPT_START
+ && inscript_state_pos_prev == INSCRIPT_START) {
+ parseall_lastc=*adr; // caractère avant le prochain
}
- } // p == 0
-
- } // not in comment
-
- // plus dans un commentaire
- if ( inscript_state_pos == INSCRIPT_START
- && inscript_state_pos_prev == INSCRIPT_START) {
- parseall_lastc=*adr; // caractère avant le prochain
- }
} // if realspace
} // if parseall
-
-
+
+
// ------------------------------------------------------------
// p!=0 : on a repéré un éventuel lien
// ------------------------------------------------------------
@@ -1457,11 +1611,11 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
char quote='\0';
int quoteinscript=0;
int noquote=0;
-
+
// si nofollow ou un stop a été déclenché, réécrire tous les liens en externe
if ((nofollow) || (opt->state.stop))
p_nocatch=1;
-
+
// écrire codebase avant, flusher avant code
if ((p_type==-1) || (p_type==-2)) {
if ((opt->getmode & 1) && (ptr>0)) {
@@ -1469,116 +1623,120 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
lastsaved=adr; // dernier écrit+1
}
-
+
// sauter espaces
- adr+=p;
+ // adr+=p;
+ INCREMENT_CURRENT_ADR(p);
while( ( is_space(*adr) || (
- inscriptgen
- && adr[0] == '\\'
- && is_space(adr[1])
- )
- )
- && quote == '\0'
- ) {
- if (!quote)
- if ((*adr=='\"') || (*adr=='\'')) {
- quote=*adr; // on doit attendre cela à la fin
- if (inscriptgen && *(adr - 1) == '\\') {
- quoteinscript=1; /* will wait for \" */
+ inscriptgen
+ && adr[0] == '\\'
+ && is_space(adr[1])
+ )
+ )
+ && quote == '\0'
+ ) {
+ if (!quote)
+ if ((*adr=='\"') || (*adr=='\'')) {
+ quote=*adr; // on doit attendre cela à la fin
+ if (inscriptgen && *(adr - 1) == '\\') {
+ quoteinscript=1; /* will wait for \" */
+ }
+ }
+ // puis quitter
+ // adr++; // sauter les espaces, "" et cie
+ INCREMENT_CURRENT_ADR(1);
+ }
+
+ /* Stop at \n (LF) if primary links or link lists */
+ if (ptr == 0 || (in_media && strcmp(in_media,"LNK")==0))
+ quote='\n';
+ /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
+ else if (inscript && ! unquoted_script)
+ noquote=1;
+
+ // sauter éventuel \" ou \' javascript
+ if (inscript) { // on est dans un obj.write("..
+ if (*adr=='\\') {
+ if ((*(adr+1)=='\'') || (*(adr+1)=='"')) { // \" ou \'
+ // adr+=2; // sauter
+ INCREMENT_CURRENT_ADR(2);
}
- }
- // puis quitter
- adr++; // sauter les espaces, "" et cie
- }
-
- /* Stop at \n (LF) if primary links*/
- if (ptr == 0)
- quote='\n';
- /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
- else if (inscript)
- noquote=1;
-
- // sauter éventuel \" ou \' javascript
- if (inscript) { // on est dans un obj.write("..
- if (*adr=='\\') {
- if ((*(adr+1)=='\'') || (*(adr+1)=='"')) { // \" ou \'
- adr+=2; // sauter
}
}
- }
-
- // sauter content="1;URL=http://..
- if (p_searchMETAURL) {
- int l=0;
- while(
- (adr + l + 4 < r->adr + r->size)
- && (!strfield(adr+l,"URL="))
- && (l<128) ) l++;
- if (!strfield(adr+l,"URL="))
- ok=-1;
- else
- adr+=(l+4);
- }
-
- /* éviter les javascript:document.location=.. : les parser, plutôt */
- if (ok!=-1) {
- if (strfield(adr,"javascript:")
- && ! inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */
- ) {
- ok=-1;
- /*
- On est désormais dans du code javascript
- */
- inscript_name="";
- inscript_tag=inscript=1;
- inscript_state_pos=INSCRIPT_START;
- inscript_tag_lastc=quote; /* à attendre à la fin */
+
+ // sauter content="1;URL=http://..
+ if (p_searchMETAURL) {
+ int l=0;
+ while(
+ (adr + l + 4 < r->adr + r->size)
+ && (!strfield(adr+l,"URL="))
+ && (l<128) ) l++;
+ if (!strfield(adr+l,"URL="))
+ ok=-1;
+ else
+ adr+=(l+4);
}
- }
-
- if (p_type==1) {
- if (*adr=='#') {
- adr++; // sauter # pour usemap etc
+
+ /* éviter les javascript:document.location=.. : les parser, plutôt */
+ if (ok!=-1) {
+ if (strfield(adr,"javascript:")
+ && ! inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */
+ ) {
+ ok=-1;
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_name="";
+ inscript_tag=inscript=1;
+ inscript_state_pos=INSCRIPT_START;
+ inscript_tag_lastc=quote; /* à attendre à la fin */
+ if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
+ }
}
- }
- eadr=adr;
-
- // ne pas flusher après code si on doit écrire le codebase avant!
- if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
- if ((opt->getmode & 1) && (ptr>0)) {
- HT_ADD_ADR; // refresh
+
+ if (p_type==1) {
+ if (*adr=='#') {
+ adr++; // sauter # pour usemap etc
+ }
}
- lastsaved=adr; // dernier écrit+1
- // après on écrira soit les données initiales,
- // soir une URL/lien modifié!
- } else if (p_type==-1) p_flush=adr; // flusher jusqu'à adr ensuite
-
- if (ok!=-1) { // continuer
- // découper le lien
- do {
- if ((* (unsigned char*) eadr)<32) { // caractère de contrôle (ou \0)
- if (!is_space(*eadr))
- ok=0;
+ eadr=adr;
+
+ // ne pas flusher après code si on doit écrire le codebase avant!
+ if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
+ HT_ADD_ADR; // refresh
}
- if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path)
- ok=-1; // ne pas traiter ce lien
-
- if (ok > 0) {
- //if (*eadr!=' ') {
- if (is_space(*eadr)) { // guillemets,CR, etc
- if (
- ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') ) // end quote
- || ( noquote && (*eadr == '\"' || *eadr == '\'') ) // end at any quote
- || (!noquote && quote == '\0' && is_realspace(*eadr) ) // unquoted href
- ) // si pas d'attente de quote spéciale ou si quote atteinte
+ lastsaved=adr; // dernier écrit+1
+ // après on écrira soit les données initiales,
+ // soir une URL/lien modifié!
+ } else if (p_type==-1) p_flush=adr; // flusher jusqu'à adr ensuite
+
+ if (ok!=-1) { // continuer
+ // découper le lien
+ do {
+ if ((* (unsigned char*) eadr)<32) { // caractère de contrôle (ou \0)
+ if (!is_space(*eadr))
ok=0;
- } else if (ending_p && (*eadr==ending_p))
- ok=0;
- else {
- switch(*eadr) {
+ }
+ if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path)
+ ok=-1; // ne pas traiter ce lien
+
+ if (ok > 0) {
+ //if (*eadr!=' ') {
+ if (is_space(*eadr)) { // guillemets,CR, etc
+ if (
+ ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') ) // end quote
+ || ( noquote && (*eadr == '\"' || *eadr == '\'') ) // end at any quote
+ || (!noquote && quote == '\0' && is_realspace(*eadr) ) // unquoted href
+ ) // si pas d'attente de quote spéciale ou si quote atteinte
+ ok=0;
+ } else if (ending_p && (*eadr==ending_p))
+ ok=0;
+ else {
+ switch(*eadr) {
case '>':
if (!quote) {
- if (!inscript) {
+ if (!inscript && !in_media) {
intag=0; // PLUS dans un tag!
intag_start_valid=0;
}
@@ -1593,404 +1751,385 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
// case '?': non!
case '\\': if (inscript) ok=0; break; // \" ou \' point d'arrêt
case '?': quote_adr=adr; break; // noter position query
+ }
}
- }
- //}
- }
- eadr++;
- } while(ok==1);
-
- // Empty link detected
- if ( (((int) (eadr - adr))) <= 1) { // link empty
- ok=-1; // No
- if (*adr != '#') { // Not empty+unique #
- if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
- if (quote) {
- if ((opt->getmode & 1) && (ptr>0)) {
- HT_ADD("#"); // We add this for a <href="">
+ //}
+ }
+ eadr++;
+ } while(ok==1);
+
+ // Empty link detected
+ if ( (((int) (eadr - adr))) <= 1) { // link empty
+ ok=-1; // No
+ if (*adr != '#') { // Not empty+unique #
+ if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
+ if (quote) {
+ if ((opt->getmode & 1) && (ptr>0)) {
+ HT_ADD("#"); // We add this for a <href="">
+ }
}
}
}
}
- }
- // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag
- if (strfield(adr, "(Empty Reference!)")) {
- ok=-1; // No
+ // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag
+ if (strfield(adr, "(Empty Reference!)")) {
+ ok=-1; // No
+ }
+
}
-
- }
-
- if (ok==0) { // tester un lien
- char lien[HTS_URLMAXSIZE*2];
- int meme_adresse=0; // 0 par défaut pour primary
- //char *copie_de_adr=adr;
- //char* p;
-
- // construire lien (découpage)
- if ( (((int) (eadr - adr))-1) < HTS_URLMAXSIZE ) { // pas trop long?
- strncpy(lien,adr,((int) (eadr - adr))-1);
- *(lien+ (((int) (eadr - adr)))-1 )='\0';
- //printf("link: %s\n",lien);
- // supprimer les espaces
- while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
-
-
-#if HTS_STRIP_DOUBLE_SLASH
- // supprimer les // en / (sauf pour http://)
- {
- char *a,*p,*q;
- int done=0;
- a=strchr(lien,':'); // http://
- if (a) {
- a++;
- while(*a=='/') a++; // position après http://
- } else {
- a=lien; // début
- while(*a=='/') a++; // position après http://
+
+ if (ok==0) { // tester un lien
+ char BIGSTK lien[HTS_URLMAXSIZE*2];
+ int meme_adresse=0; // 0 par défaut pour primary
+ //char *copie_de_adr=adr;
+ //char* p;
+
+ // construire lien (découpage)
+ if ( (((int) (eadr - adr))-1) < HTS_URLMAXSIZE ) { // pas trop long?
+ strncpy(lien,adr,((int) (eadr - adr))-1);
+ *(lien+ (((int) (eadr - adr)))-1 )='\0';
+ //printf("link: %s\n",lien);
+ // supprimer les espaces
+ while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
+
+
+ } else
+ lien[0]='\0'; // erreur
+
+
+ // ------------------------------------------------------
+ // Lien repéré et extrait
+ if (strnotempty(lien)>0) { // construction du lien
+ char BIGSTK adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2]; // ATTENTION adr cache le "vrai" adr
+ int forbidden_url=-1; // lien non interdit (mais non autorisé..)
+ int just_test_it=0; // mode de test des liens
+ int set_prio_to=0; // pour capture de page isolée
+ int import_done=0; // lien importé (ne pas scanner ensuite *à priori*)
+ //
+ adr[0]='\0'; fil[0]='\0';
+ //
+ // 0: autorisé
+ // 1: interdit (patcher tout de même adresse)
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush;
}
- q=strchr(a,'?'); // ne pas traiter après '?'
- if (!q)
- q=a+strlen(a)-1;
- while(( p=strstr(a,"//")) && (!done) ) { // remplacer // par /
- if ((int) p>(int) q) { // après le ? (toto.cgi?param=1//2.3)
- done=1; // stopper
- } else {
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strncatbuff(tempo,a,(int) p - (int) a);
- strcatbuff (tempo,p+1);
- strcpybuff(a,tempo); // recopier
+
+ // external check
+#if HTS_ANALYSTE
+ if (!hts_htmlcheck_linkdetected(lien) || !hts_htmlcheck_linkdetected2(lien, intag_start)) {
+ error=1; // erreur
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien);
+ test_flush;
}
}
- }
#endif
-
- } else
- lien[0]='\0'; // erreur
-
- // ------------------------------------------------------
- // Lien repéré et extrait
- if (strnotempty(lien)>0) { // construction du lien
- char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2]; // ATTENTION adr cache le "vrai" adr
- int forbidden_url=-1; // lien non interdit (mais non autorisé..)
- int just_test_it=0; // mode de test des liens
- int set_prio_to=0; // pour capture de page isolée
- int import_done=0; // lien importé (ne pas scanner ensuite *à priori*)
- //
- adr[0]='\0'; fil[0]='\0';
- //
- // 0: autorisé
- // 1: interdit (patcher tout de même adresse)
-
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush;
- }
-
- // external check
-#if HTS_ANALYSTE
- if (!hts_htmlcheck_linkdetected(lien)) {
- error=1; // erreur
- if (opt->errlog) {
- fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien);
- test_flush;
+
+#if HTS_STRIP_DOUBLE_SLASH
+ // supprimer les // en / (sauf pour http://)
+ if (opt->urlhack) {
+ char *a,*p,*q;
+ int done=0;
+ a=strchr(lien,':'); // http://
+ if (a) {
+ a++;
+ while(*a=='/') a++; // position après http://
+ } else {
+ a=lien; // début
+ while(*a=='/') a++; // position après http://
+ }
+ q=strchr(a,'?'); // ne pas traiter après '?'
+ if (!q)
+ q=a+strlen(a)-1;
+ while(( p=strstr(a,"//")) && (!done) ) { // remplacer // par /
+ if ((int) p>(int) q) { // après le ? (toto.cgi?param=1//2.3)
+ done=1; // stopper
+ } else {
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncatbuff(tempo,a,(int) p - (int) a);
+ strcatbuff (tempo,p+1);
+ strcpybuff(a,tempo); // recopier
+ }
+ }
}
- }
#endif
-
- // purger espaces de début et fin, CR,LF résiduels
- // (IMG SRC="foo.<\n><\t>gif<\t>")
- {
- char* a = lien;
- int llen;
-
- // strip ending spaces
- llen = ( *a != '\0' ) ? strlen(a) : 0;
- while(llen > 0 && is_realspace(lien[llen - 1]) ) {
- a[--llen]='\0';
- }
- // skip leading ones
- while(is_realspace(*a)) a++;
- // strip cr, lf, tab inside URL
- llen = 0;
- while(*a) {
- if (*a != '\n' && *a != '\r' && *a != '\t') {
- lien[llen++] = *a;
+
+ // purger espaces de début et fin, CR,LF résiduels
+ // (IMG SRC="foo.<\n><\t>gif<\t>")
+ {
+ char* a = lien;
+ int llen;
+
+ // strip ending spaces
+ llen = ( *a != '\0' ) ? strlen(a) : 0;
+ while(llen > 0 && is_realspace(lien[llen - 1]) ) {
+ a[--llen]='\0';
+ }
+ // skip leading ones
+ while(is_realspace(*a)) a++;
+ // strip cr, lf, tab inside URL
+ llen = 0;
+ while(*a) {
+ if (*a != '\n' && *a != '\r' && *a != '\t') {
+ lien[llen++] = *a;
+ }
+ a++;
}
- a++;
+ lien[llen] = '\0';
}
- lien[llen] = '\0';
- }
- // commas are forbidden
- if (archivetag_p) {
- if (strchr(lien, ',')) {
- error=1; // erreur
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush;
+ // commas are forbidden
+ if (archivetag_p) {
+ if (strchr(lien, ',')) {
+ error=1; // erreur
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush;
+ }
}
+ }
+
+ /* Unescape/escape %20 and other &nbsp; */
+ {
+ char BIGSTK query[HTS_URLMAXSIZE*2];
+ char* a=strchr(lien,'?');
+ if (a) {
+ strcpybuff(query,a);
+ *a='\0';
+ } else
+ query[0]='\0';
+ // conversion &amp; -> & et autres joyeusetés
+ unescape_amp(lien);
+ unescape_amp(query);
+ // décoder l'inutile (%2E par exemple) et coder espaces
+ // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien));
+ strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
+ escape_remove_control(lien);
+ escape_spc_url(lien);
+ strcatbuff(lien,query); /* restore */
}
- }
-
- /* Unescape/escape %20 and other &nbsp; */
- {
- char query[HTS_URLMAXSIZE*2];
- char* a=strchr(lien,'?');
- if (a) {
- strcpybuff(query,a);
- *a='\0';
- } else
- query[0]='\0';
- // conversion &amp; -> & et autres joyeusetés
- unescape_amp(lien);
- unescape_amp(query);
- // décoder l'inutile (%2E par exemple) et coder espaces
- // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien));
- strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
- escape_remove_control(lien);
- escape_spc_url(lien);
- strcatbuff(lien,query); /* restore */
- }
-
- // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance!
- {
- char* a=jump_identification(lien);
- while( (a=strchr(a,'\\')) ) *a='/';
- }
-
- // supprimer le(s) ./
- while ((lien[0]=='.') && (lien[1]=='/')) {
- char tempo[HTS_URLMAXSIZE*2];
- strcpybuff(tempo,lien+2);
- strcpybuff(lien,tempo);
- }
- if (strnotempty(lien)==0) // sauf si plus de nom de fichier
- strcpybuff(lien,"./");
-
- // vérifie les /~machin -> /~machin/
- // supposition dangereuse?
- // OUI!!
+
+ // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance!
+ {
+ char* a;
+ for(a = jump_identification(lien) ; *a != '\0' && *a != '?' ; a++) {
+ if (*a == '\\') {
+ *a = '/';
+ }
+ }
+ }
+
+ // supprimer le(s) ./
+ while ((lien[0]=='.') && (lien[1]=='/')) {
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ strcpybuff(tempo,lien+2);
+ strcpybuff(lien,tempo);
+ }
+ if (strnotempty(lien)==0) // sauf si plus de nom de fichier
+ strcpybuff(lien,"./");
+
+ // vérifie les /~machin -> /~machin/
+ // supposition dangereuse?
+ // OUI!!
#if HTS_TILDE_SLASH
- if (lien[strlen(lien)-1]!='/') {
- char *a=lien+strlen(lien)-1;
- // éviter aussi index~1.html
- while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
- if (*a=='~') {
- strcatbuff(lien,"/"); // ajouter slash
+ if (lien[strlen(lien)-1]!='/') {
+ char *a=lien+strlen(lien)-1;
+ // éviter aussi index~1.html
+ while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
+ if (*a=='~') {
+ strcatbuff(lien,"/"); // ajouter slash
+ }
}
- }
#endif
-
- // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
- // yes, this is dirty
- // but I'm so lazzy..
- // and besides the java "code" convention is really a pain in html code
- if (p_type==-1) {
- char* a=strrchr(lien,'.');
- add_class_dots_to_patch=0;
- if (a) {
- char* b;
- do {
- b=strchr(lien,'.');
- if ((b != a) && (b)) {
- add_class_dots_to_patch++;
- *b='/';
- }
- } while((b != a) && (b));
+
+ // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
+ // yes, this is dirty
+ // but I'm so lazzy..
+ // and besides the java "code" convention is really a pain in html code
+ if (p_type==-1) {
+ char* a=strrchr(lien,'.');
+ add_class_dots_to_patch=0;
+ if (a) {
+ char* b;
+ do {
+ b=strchr(lien,'.');
+ if ((b != a) && (b)) {
+ add_class_dots_to_patch++;
+ *b='/';
+ }
+ } while((b != a) && (b));
+ }
}
- }
- // éliminer les éventuels :80 (port par défaut!)
- if (link_has_authority(lien)) {
- char * a;
- a=strstr(lien,"//"); // "//" authority
- if (a)
- a+=2;
- else
- a=lien;
- // while((*a) && (*a!='/') && (*a!=':')) a++;
- a=jump_toport(a);
- if (a) { // port
- int port=0;
- int defport=80;
- char* b=a+1;
+ // éliminer les éventuels :80 (port par défaut!)
+ if (link_has_authority(lien)) {
+ char * a;
+ a=strstr(lien,"//"); // "//" authority
+ if (a)
+ a+=2;
+ else
+ a=lien;
+ // while((*a) && (*a!='/') && (*a!=':')) a++;
+ a=jump_toport(a);
+ if (a) { // port
+ int port=0;
+ int defport=80;
+ char* b=a+1;
#if HTS_USEOPENSSL
- // FIXME
- //if (strfield(adr, "https:")) {
- //}
+ // FIXME
+ //if (strfield(adr, "https:")) {
+ //}
#endif
- while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
- if (port==defport) { // port 80, default - c'est débile
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strncatbuff(tempo,lien,(int) (a - lien));
- strcatbuff(tempo,a+3); // sauter :80
- strcpybuff(lien,tempo);
+ while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
+ if (port==defport) { // port 80, default - c'est débile
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncatbuff(tempo,lien,(int) (a - lien));
+ strcatbuff(tempo,a+3); // sauter :80
+ strcpybuff(lien,tempo);
+ }
}
}
- }
-
- // filtrer les parazites (mailto & cie)
- /*
- if (strfield(lien,"mailto:")) { // ne pas traiter
- error=1;
- } else if (strfield(lien,"news:")) { // ne pas traiter
- error=1;
- }
- */
-
- // vérifier que l'on ne doit pas ajouter de .class
- if (!error) {
- if (add_class) {
- char *a = lien+strlen(lien)-1;
- while(( a > lien) && (*a!='/') && (*a!='.')) a--;
- if (*a != '.')
- strcatbuff(lien,".class"); // ajouter .class
- else if (!strfield2(a,".class"))
- strcatbuff(lien,".class"); // idem
- }
- }
-
- // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/)
- if (!error) {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush;
+
+ // filtrer les parazites (mailto & cie)
+ /*
+ if (strfield(lien,"mailto:")) { // ne pas traiter
+ error=1;
+ } else if (strfield(lien,"news:")) { // ne pas traiter
+ error=1;
}
-
- if ((p_type==2) || (p_type==-2)) { // code ou codebase
- // Vérifier les codebase=applet (au lieu de applet/)
- if (p_type==-2) { // codebase
- if (strnotempty(lien)) {
- if (fil[strlen(lien)-1]!='/') { // pas répertoire
- strcatbuff(lien,"/");
- }
- }
- }
+ */
- /* base has always authority */
- if (p_type==2 && !link_has_authority(lien)) {
- char tmp[HTS_URLMAXSIZE*2];
- strcpybuff(tmp, "http://");
- strcatbuff(tmp, lien);
- strcpybuff(lien, tmp);
+ // vérifier que l'on ne doit pas ajouter de .class
+ if (!error) {
+ if (add_class) {
+ char *a = lien+strlen(lien)-1;
+ while(( a > lien) && (*a!='/') && (*a!='.')) a--;
+ if (*a != '.')
+ strcatbuff(lien,".class"); // ajouter .class
+ else if (!strfield2(a,".class"))
+ strcatbuff(lien,".class"); // idem
}
+ }
- /* only one ending / (bug on some pages) */
- if ((int)strlen(lien)>2) {
- int len = (int) strlen(lien);
- while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' ) /* double // (bug) */
- lien[--len]='\0';
+ // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/)
+ if (!error) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush;
}
- // copier nom host si besoin est
- if (!link_has_authority(lien)) { // pas de http://
- char adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2]; // ** euh ident_url_relatif??
- if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {
- error=1;
- } else {
- strcpybuff(lien,"http://");
- strcatbuff(lien,adr2);
- if (*fil2!='/')
- strcatbuff(lien,"/");
- strcatbuff(lien,fil2);
- {
- char* a;
- a=lien+strlen(lien)-1;
- while((*a) && (*a!='/') && ( a> lien)) a--;
- if (*a=='/') {
- *(a+1)='\0';
+
+ if ((p_type==2) || (p_type==-2)) { // code ou codebase
+ // Vérifier les codebase=applet (au lieu de applet/)
+ if (p_type==-2) { // codebase
+ if (strnotempty(lien)) {
+ if (fil[strlen(lien)-1]!='/') { // pas répertoire
+ strcatbuff(lien,"/");
}
}
- //char tempo[HTS_URLMAXSIZE*2];
- //strcpybuff(tempo,"http://");
- //strcatbuff(tempo,urladr); // host
- //if (*lien!='/')
- // strcatbuff(tempo,"/");
- //strcatbuff(tempo,lien);
- //strcpybuff(lien,tempo);
}
- }
-
- if (!error) { // pas d'erreur?
- if (p_type==2) { // code ET PAS codebase
- char* a=lien+strlen(lien)-1;
- while( (a > lien) && (*a) && (*a!='/')) a--;
- if (*a=='/') // ok on a repéré le dernier /
- *(a+1)='\0'; // couper
- else {
- *lien='\0'; // éliminer
- error=1; // erreur, ne pas poursuivre
- }
+
+ /* base has always authority */
+ if (p_type==2 && !link_has_authority(lien)) {
+ char BIGSTK tmp[HTS_URLMAXSIZE*2];
+ strcpybuff(tmp, "http://");
+ strcatbuff(tmp, lien);
+ strcpybuff(lien, tmp);
}
-
- // stocker base ou codebase?
- switch(p_type) {
+
+ /* only one ending / (bug on some pages) */
+ if ((int)strlen(lien)>2) {
+ int len = (int) strlen(lien);
+ while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' ) /* double // (bug) */
+ lien[--len]='\0';
+ }
+ // copier nom host si besoin est
+ if (!link_has_authority(lien)) { // pas de http://
+ char BIGSTK adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2]; // ** euh ident_url_relatif??
+ if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {
+ error=1;
+ } else {
+ strcpybuff(lien,"http://");
+ strcatbuff(lien,adr2);
+ if (*fil2!='/')
+ strcatbuff(lien,"/");
+ strcatbuff(lien,fil2);
+ {
+ char* a;
+ a=lien+strlen(lien)-1;
+ while((*a) && (*a!='/') && ( a> lien)) a--;
+ if (*a=='/') {
+ *(a+1)='\0';
+ }
+ }
+ //char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ //strcpybuff(tempo,"http://");
+ //strcatbuff(tempo,urladr); // host
+ //if (*lien!='/')
+ // strcatbuff(tempo,"/");
+ //strcatbuff(tempo,lien);
+ //strcpybuff(lien,tempo);
+ }
+ }
+
+ if (!error) { // pas d'erreur?
+ if (p_type==2) { // code ET PAS codebase
+ char* a=lien+strlen(lien)-1;
+ while( (a > lien) && (*a) && (*a!='/')) a--;
+ if (*a=='/') // ok on a repéré le dernier /
+ *(a+1)='\0'; // couper
+ else {
+ *lien='\0'; // éliminer
+ error=1; // erreur, ne pas poursuivre
+ }
+ }
+
+ // stocker base ou codebase?
+ switch(p_type) {
case 2: {
//if (*lien!='/') strcatbuff(base,"/");
strcpybuff(base,lien);
}
- break; // base
+ break; // base
case -2: {
//if (*lien!='/') strcatbuff(codebase,"/");
strcpybuff(codebase,lien);
}
- break; // base
- }
-
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush;
+ break; // base
+ }
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush;
+ }
+ //printf("base code: %s - %s\n",lien,base);
}
- //printf("base code: %s - %s\n",lien,base);
- }
-
- } else {
- char* _base;
- if (p_type==-1) // code (applet)
- _base=codebase;
- else
- _base=base;
-
-
- // ajouter chemin de base href..
- if (strnotempty(_base)) { // considérer base
- if (!link_has_authority(lien)) { // non absolue
- if (*lien!='/') { // non absolu sur le site (/)
- if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
- // mailto: and co: do NOT add base
- if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
- char tempo[HTS_URLMAXSIZE*2];
- // base est absolue
- strcpybuff(tempo,_base);
- strcatbuff(tempo,lien + ((*lien=='/')?1:0) );
- strcpybuff(lien,tempo); // patcher en considérant base
- // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..)
-
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
- }
- }
- } else {
- error=1; // erreur
- if (opt->errlog) {
- fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
- test_flush;
- }
- }
- } else {
- char badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2];
- if (ident_url_absolute(_base, badr, bfil) >=0 ) {
- if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
- char tempo[HTS_URLMAXSIZE*2];
- // base est absolue
- tempo[0] = '\0';
- if (!link_has_authority(badr)) {
- strcatbuff(tempo, "http://");
- }
- strcatbuff(tempo,badr);
- strcatbuff(tempo,lien);
- strcpybuff(lien,tempo); // patcher en considérant base
-
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
+
+ } else {
+ char* _base;
+ if (p_type==-1) // code (applet)
+ _base=codebase;
+ else
+ _base=base;
+
+
+ // ajouter chemin de base href..
+ if (strnotempty(_base)) { // considérer base
+ if (!link_has_authority(lien)) { // non absolue
+ if (*lien!='/') { // non absolu sur le site (/)
+ if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
+ // mailto: and co: do NOT add base
+ if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ // base est absolue
+ strcpybuff(tempo,_base);
+ strcatbuff(tempo,lien + ((*lien=='/')?1:0) );
+ strcpybuff(lien,tempo); // patcher en considérant base
+ // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..)
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
+ }
}
} else {
error=1; // erreur
@@ -1999,74 +2138,98 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
test_flush;
}
}
+ } else {
+ char BIGSTK badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2];
+ if (ident_url_absolute(_base, badr, bfil) >=0 ) {
+ if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ // base est absolue
+ tempo[0] = '\0';
+ if (!link_has_authority(badr)) {
+ strcatbuff(tempo, "http://");
+ }
+ strcatbuff(tempo,badr);
+ strcatbuff(tempo,lien);
+ strcpybuff(lien,tempo); // patcher en considérant base
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
+ }
+ } else {
+ error=1; // erreur
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
+ test_flush;
+ }
+ }
+ }
}
}
}
+
+
}
-
-
- }
- }
-
-
- // transformer lien quelconque (http, relatif, etc) en une adresse
- // et un chemin+fichier (adr,fil)
- if (!error) {
- int reponse;
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
}
- if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) {
- adr[0]='\0'; // erreur
- if (reponse==-2) {
- if (opt->errlog) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien);
- test_flush;
+
+
+ // transformer lien quelconque (http, relatif, etc) en une adresse
+ // et un chemin+fichier (adr,fil)
+ if (!error) {
+ int reponse;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
+ }
+ if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) {
+ adr[0]='\0'; // erreur
+ if (reponse==-2) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien);
+ test_flush;
+ }
+ } else {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
+ }
}
} else {
- if ((opt->debug>1) && (opt->errlog!=NULL)) {
- fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush;
}
}
} else {
if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush;
+ fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush;
}
+ adr[0]='\0';
}
- } else {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush;
- }
- adr[0]='\0';
- }
-
+
#if HTS_CHECK_STRANGEDIR
- // !ATTENTION!
- // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin)
- // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire
- // et un fichier en http A PRIORI : je fais donc un test
- // En cas de moved xxx, on recalcule adr et fil, tout simplement
- // DEFAUT: test effectué plusieurs fois! à revoir!!!
- if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
- //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
- if (fil[strlen(fil)-1]!='/') { // pas répertoire
- if (ishtml(fil)==-2) { // pas d'extension
- char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position
- loc[0]='\0';
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil);
- test_flush;
- }
-
- // tester éventuelle nouvelle position
- switch (http_location(adr,fil,loc).statuscode) {
+ // !ATTENTION!
+ // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin)
+ // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire
+ // et un fichier en http A PRIORI : je fais donc un test
+ // En cas de moved xxx, on recalcule adr et fil, tout simplement
+ // DEFAUT: test effectué plusieurs fois! à revoir!!!
+ if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
+ //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
+ if (fil[strlen(fil)-1]!='/') { // pas répertoire
+ if (ishtml(fil)==-2) { // pas d'extension
+ char BIGSTK loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position
+ loc[0]='\0';
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil);
+ test_flush;
+ }
+
+ // tester éventuelle nouvelle position
+ switch (http_location(adr,fil,loc).statuscode) {
case 200: // ok au final
if (strnotempty(loc)) { // a changé d'adresse
if (opt->errlog) {
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
test_flush;
}
-
+
// recalculer adr et fil!
if (ident_url_absolute(loc,adr,fil)==-1) {
adr[0]='\0'; // cancel
@@ -2075,7 +2238,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
test_flush;
}
}
-
+
}
break;
case -2: case -3: // timeout ou erreur grave
@@ -2083,214 +2246,216 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
test_flush;
}
-
+
break;
+ }
+
}
-
- }
- }
- }
+ }
+ }
#endif
-
- // Le lien doit juste être réécrit, mais ne doit pas générer un lien
- // exemple: <FORM ACTION="url_cgi">
- if (p_nocatch) {
- forbidden_url=1; // interdire récupération du lien
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil);
- test_flush;
+
+ // Le lien doit juste être réécrit, mais ne doit pas générer un lien
+ // exemple: <FORM ACTION="url_cgi">
+ if (p_nocatch) {
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil);
+ test_flush;
+ }
}
- }
-
- // Tester si un lien doit être accepté ou refusé (wizard)
- // forbidden_url=1 : lien refusé
- // forbidden_url=0 : lien accepté
- //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
- if ((p_type!=2) && (p_type!=-2)) { // tester autorisations?
- if (!p_nocatch) {
- if (adr[0]!='\0') {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil);
- test_flush;
- }
- forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
- adr,fil,
- &set_prio_to,
- &just_test_it);
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url);
- test_flush;
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if ((p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
+ adr,fil,
+ NULL, NULL,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url);
+ test_flush;
+ }
}
}
}
- }
-
- // calculer meme_adresse
- meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
-
-
-
- // Début partie sauvegarde
-
- // ici on forme le nom du fichier à sauver, et on patche l'URL
- if (adr[0]!='\0') {
- // savename: simplifier les ../ et autres joyeusetés
- char save[HTS_URLMAXSIZE*2];
- int r_sv=0;
- // En cas de moved, adresse première
- char former_adr[HTS_URLMAXSIZE*2];
- char former_fil[HTS_URLMAXSIZE*2];
- //
- save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
- //
-
- // nom du chemin à sauver si on doit le calculer
- // note: url_savename peut décider de tester le lien si il le trouve
- // suspect, et modifier alors adr et fil
- // dans ce cas on aura une référence directe au lieu des traditionnels
- // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers
- // gif sont impliqués par exemple)
- if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase
- if (forbidden_url!=1) {
- char last_adr[HTS_URLMAXSIZE*2];
- last_adr[0]='\0';
- //char last_fil[HTS_URLMAXSIZE*2]="";
- strcpybuff(last_adr,adr); // ancienne adresse
- //strcpybuff(last_fil,fil); // ancien chemin
- r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
- if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
-
- // 2e test si moved
-
- // Tester si un lien doit être accepté ou refusé (wizard)
- // forbidden_url=1 : lien refusé
- // forbidden_url=0 : lien accepté
- if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
- if (!p_nocatch) {
- if (adr[0]!='\0') {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil);
- test_flush;
- }
- forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
- adr,fil,
- &set_prio_to,
- &just_test_it);
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url);
- test_flush;
+
+ // calculer meme_adresse
+ meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
+
+
+
+ // Début partie sauvegarde
+
+ // ici on forme le nom du fichier à sauver, et on patche l'URL
+ if (adr[0]!='\0') {
+ // savename: simplifier les ../ et autres joyeusetés
+ char BIGSTK save[HTS_URLMAXSIZE*2];
+ int r_sv=0;
+ // En cas de moved, adresse première
+ char BIGSTK former_adr[HTS_URLMAXSIZE*2];
+ char BIGSTK former_fil[HTS_URLMAXSIZE*2];
+ //
+ save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
+ //
+
+ // nom du chemin à sauver si on doit le calculer
+ // note: url_savename peut décider de tester le lien si il le trouve
+ // suspect, et modifier alors adr et fil
+ // dans ce cas on aura une référence directe au lieu des traditionnels
+ // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers
+ // gif sont impliqués par exemple)
+ if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase
+ if (forbidden_url!=1) {
+ char BIGSTK last_adr[HTS_URLMAXSIZE*2];
+ last_adr[0]='\0';
+ //char last_fil[HTS_URLMAXSIZE*2]="";
+ strcpybuff(last_adr,adr); // ancienne adresse
+ //strcpybuff(last_fil,fil); // ancien chemin
+ r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
+ if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
+
+ // 2e test si moved
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
+ adr,fil,
+ NULL, NULL,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url);
+ test_flush;
+ }
}
}
}
+
+ //import_done=1; // c'est un import!
+ meme_adresse=0; // on a changé
}
-
- //import_done=1; // c'est un import!
- meme_adresse=0; // on a changé
+ } else {
+ strcpybuff(save,""); // dummy
}
- } else {
- strcpybuff(save,""); // dummy
}
- }
- if (r_sv!=-1) { // pas d'erreur, on continue
- /* log */
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug");
- if (forbidden_url!=1) { // le lien va être chargé
- if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien
- fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil);
- } else if ((opt->getmode & 4)==0) {
- fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
- } else {
- if (!ishtml(fil))
- fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save);
- else
+ if (r_sv!=-1) { // pas d'erreur, on continue
+ /* log */
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug");
+ if (forbidden_url!=1) { // le lien va être chargé
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien
+ fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil);
+ } else if ((opt->getmode & 4)==0) {
fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
- }
- } else
- fprintf(opt->log,"External: %s%s"LF,adr,fil);
- test_flush;
- }
- /* FIN log */
-
- // écrire lien
- if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter
- lastsaved=eadr-1+1; // sauter "
- }
- /* */
- else if (opt->urlmode==0) { // URL absolue dans tous les cas
- if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
- if (!link_has_authority(adr)) {
- HT_ADD("http://");
- } else {
- char* aut = strstr(adr, "//");
- if (aut) {
- char tmp[256];
- tmp[0]='\0';
- strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
- HT_ADD(tmp); // Protocol
- HT_ADD("//");
- }
- }
-
- if (!opt->passprivacy) {
- HT_ADD(jump_protocol(adr)); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
+ } else {
+ if (!ishtml(fil))
+ fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save);
+ else
+ fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
+ }
+ } else
+ fprintf(opt->log,"External: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ /* FIN log */
+
+ // écrire lien
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter
+ lastsaved=eadr-1+1; // sauter "
}
- lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
- /* */
- } else if (opt->urlmode >= 4) { // ne rien faire dans tous les cas!
/* */
- /* leave the link 'as is' */
- /* Sinon, dépend de interne/externe */
- } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe!
- if ((opt->getmode & 1) && (ptr>0)) {
- if (p_type!=-1) { // pas que le nom de fichier (pas classe java)
- if (!opt->external) {
- if (!link_has_authority(adr)) {
- HT_ADD("http://");
- if (!opt->passprivacy) {
- HT_ADD(adr); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
- } else {
- char* aut = strstr(adr, "//");
- if (aut) {
- char tmp[256];
- tmp[0]='\0';
- strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
- HT_ADD(tmp); // Protocol
- HT_ADD("//");
+ else if (opt->urlmode==0) { // URL absolue dans tous les cas
+ if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ }
+ }
+
+ if (!opt->passprivacy) {
+ HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD_HTMLESCAPED(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ /* */
+ } else if (opt->urlmode >= 4) { // ne rien faire dans tous les cas!
+ /* */
+ /* leave the link 'as is' */
+ /* Sinon, dépend de interne/externe */
+ } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe!
+ if ((opt->getmode & 1) && (ptr>0)) {
+ if (p_type!=-1) { // pas que le nom de fichier (pas classe java)
+ if (!opt->external) {
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
if (!opt->passprivacy) {
- HT_ADD(jump_protocol(adr)); // Password
+ HT_ADD_HTMLESCAPED(adr); // Password
} else {
- HT_ADD(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
}
if (*fil!='/')
HT_ADD("/");
- HT_ADD(fil);
+ HT_ADD_HTMLESCAPED(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ if (!opt->passprivacy) {
+ HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD_HTMLESCAPED(fil);
+ }
}
- }
- //
- } else { // fichier/page externe, mais on veut générer une erreur
- //
- int patch_it=0;
- int add_url=0;
- char* cat_name=NULL;
- char* cat_data=NULL;
- int cat_nb=0;
- int cat_data_len=0;
-
- // ajouter lien external
- switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) {
+ //
+ } else { // fichier/page externe, mais on veut générer une erreur
+ //
+ int patch_it=0;
+ int add_url=0;
+ char* cat_name=NULL;
+ char* cat_data=NULL;
+ int cat_nb=0;
+ int cat_data_len=0;
+
+ // ajouter lien external
+ switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) {
case 1: case -2: // html ou répertoire
if (opt->getmode & 1) { // sauver html
patch_it=1; // redirect
@@ -2308,108 +2473,108 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
|| (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm"))
/*|| (ishtml(fil)!=0)*/ ) {
patch_it=1; // redirect
- add_url=1; // avec link aussi
- cat_name="external.gif";
- cat_nb=1;
- cat_data=HTS_DATA_UNKNOWN_GIF;
- cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
- } else /* if (is_dyntype(get_ext(fil))) */ {
- patch_it=1; // redirect
- add_url=1; // avec link?
- cat_name="external.html";
- cat_nb=0;
- cat_data=HTS_DATA_UNKNOWN_HTML;
- cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
- }
- break;
- }// html,gif
-
- if (patch_it) {
- char save[HTS_URLMAXSIZE*2];
- char tempo[HTS_URLMAXSIZE*2];
- strcpybuff(save,opt->path_html);
- strcatbuff(save,cat_name);
- if (lienrelatif(tempo,save, relativesavename)==0) {
- if (!no_esc_utf)
- escape_uri(tempo); // escape with %xx
- else
- escape_uri_utf(tempo); // escape with %xx
- HT_ADD(tempo); // page externe
- if (add_url) {
- HT_ADD("?link="); // page externe
-
- // same as above
- if (!link_has_authority(adr)) {
- HT_ADD("http://");
- if (!opt->passprivacy) {
- HT_ADD(adr); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
- } else {
- char* aut = strstr(adr, "//");
- if (aut) {
- char tmp[256];
- tmp[0]='\0';
- strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme
- HT_ADD(tmp);
+ add_url=1; // avec link aussi
+ cat_name="external.gif";
+ cat_nb=1;
+ cat_data=HTS_DATA_UNKNOWN_GIF;
+ cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
+ } else /* if (is_dyntype(get_ext(fil))) */ {
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ }
+ break;
+ }// html,gif
+
+ if (patch_it) {
+ char BIGSTK save[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ strcpybuff(save,opt->path_html);
+ strcatbuff(save,cat_name);
+ if (lienrelatif(tempo,save, relativesavename)==0) {
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else
+ escape_uri_utf(tempo); // escape with %xx
+ HT_ADD_HTMLESCAPED(tempo); // page externe
+ if (add_url) {
+ HT_ADD("?link="); // page externe
+
+ // same as above
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
if (!opt->passprivacy) {
- HT_ADD(jump_protocol(adr)); // Password
+ HT_ADD_HTMLESCAPED(adr); // Password
} else {
- HT_ADD(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
}
if (*fil!='/')
HT_ADD("/");
- HT_ADD(fil);
+ HT_ADD_HTMLESCAPED(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme
+ HT_ADD(tmp);
+ if (!opt->passprivacy) {
+ HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD_HTMLESCAPED(fil);
+ }
}
+ //
+
}
- //
-
}
- }
-
- // écrire fichier?
- if (verif_external(cat_nb,1)) {
- //if (!fexist(fconcat(opt->path_html,cat_name))) {
- FILE* fp = filecreate(fconcat(opt->path_html,cat_name));
- if (fp) {
- if (cat_data_len==0) { // texte
- verif_backblue(opt,opt->path_html);
- fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
- } else { // data
- fwrite(cat_data,cat_data_len,1,fp);
+
+ // écrire fichier?
+ if (verif_external(cat_nb,1)) {
+ //if (!fexist(fconcat(opt->path_html,cat_name))) {
+ FILE* fp = filecreate(fconcat(opt->path_html,cat_name));
+ if (fp) {
+ if (cat_data_len==0) { // texte
+ verif_backblue(opt,opt->path_html);
+ fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
+ } else { // data
+ fwrite(cat_data,cat_data_len,1,fp);
+ }
+ fclose(fp);
+ usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"","");
}
- fclose(fp);
- usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"","");
}
- }
- } else { // écrire normalement le nom de fichier
- HT_ADD("http://");
- if (!opt->passprivacy) {
- HT_ADD(adr); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
- }// patcher?
+ } else { // écrire normalement le nom de fichier
+ HT_ADD("http://");
+ if (!opt->passprivacy) {
+ HT_ADD_HTMLESCAPED(adr); // Password
+ } else {
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD_HTMLESCAPED(fil);
+ }// patcher?
} // external
} else { // que le nom de fichier (classe java)
// en gros recopie de plus bas: copier codebase et base
if (p_flush) {
- char tempo[HTS_URLMAXSIZE*2]; // <-- ajouté
- char tempo_pat[HTS_URLMAXSIZE*2];
-
+ char BIGSTK tempo[HTS_URLMAXSIZE*2]; // <-- ajouté
+ char BIGSTK tempo_pat[HTS_URLMAXSIZE*2];
+
// Calculer chemin
tempo_pat[0]='\0';
strcpybuff(tempo,fil); // <-- ajouté
{
char* a=strrchr(tempo,'/');
-
+
// Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
// we have to do the contrary now
if (add_class_dots_to_patch>0) {
@@ -2426,33 +2591,33 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
}
-
+
// Cut path/filename
if (a) {
- char tempo2[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo2[HTS_URLMAXSIZE*2];
strcpybuff(tempo2,a+1); // FICHIER
strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
strcpybuff(tempo,tempo2); // fichier
}
}
-
+
// érire codebase="chemin"
if ((opt->getmode & 1) && (ptr>0)) {
- char tempo4[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo4[HTS_URLMAXSIZE*2];
tempo4[0]='\0';
-
+
if (strnotempty(tempo_pat)) {
HT_ADD("codebase=\"http://");
if (!opt->passprivacy) {
- HT_ADD(adr); // Password
+ HT_ADD_HTMLESCAPED(adr); // Password
} else {
- HT_ADD(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
}
if (*tempo_pat!='/') HT_ADD("/");
HT_ADD(tempo_pat);
HT_ADD("\" ");
}
-
+
strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
HT_ADD(tempo4); // refresh code="
HT_ADD(tempo);
@@ -2476,46 +2641,53 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
}
*/
- else if (opt->mimehtml) {
- char buff[HTS_URLMAXSIZE*3];
- HT_ADD("cid:");
- strcpybuff(buff, adr);
- strcatbuff(buff, fil);
- escape_in_url(buff);
- { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
- HT_ADD(buff);
- lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
- }
- else if (opt->urlmode==3) { // URI absolue /
- if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
- HT_ADD(fil);
+ else if (opt->mimehtml) {
+ char BIGSTK buff[HTS_URLMAXSIZE*3];
+ HT_ADD("cid:");
+ strcpybuff(buff, adr);
+ strcatbuff(buff, fil);
+ escape_in_url(buff);
+ { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
+ HT_ADD_HTMLESCAPED(buff);
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
}
- lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
- }
- else if (opt->urlmode==2) { // RELATIF
- char tempo[HTS_URLMAXSIZE*2];
+ else if (opt->urlmode==3) { // URI absolue /
+ if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
+ HT_ADD_HTMLESCAPED(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ else if (opt->urlmode==2) { // RELATIF
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
// calculer le lien relatif
-
+
if (lienrelatif(tempo,save,relativesavename)==0) {
- if (!no_esc_utf)
- escape_uri(tempo); // escape with %xx
- else
- escape_uri_utf(tempo); // escape with %xx
+ if (!in_media) { // In media (such as real audio): don't patch
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else {
+ /* No escaping at all - remaining upper chars will be escaped below */
+ /* FIXME - Should be done in all local cases */
+ //x_escape_html(tempo);
+ //escape_uri_utf(tempo); // FIXME - escape with %xx
+ //escape_uri(tempo); // escape with %xx
+ }
+ }
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo);
test_flush;
}
-
+
// lien applet (code) - il faut placer un codebase avant
if (p_type==-1) { // que le nom de fichier
-
+
if (p_flush) {
- char tempo_pat[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo_pat[HTS_URLMAXSIZE*2];
tempo_pat[0]='\0';
{
char* a=strrchr(tempo,'/');
-
+
// Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
// we have to do the contrary now
if (add_class_dots_to_patch>0) {
@@ -2532,43 +2704,44 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
}
-
+
if (a) {
- char tempo2[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo2[HTS_URLMAXSIZE*2];
strcpybuff(tempo2,a+1);
strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
strcpybuff(tempo,tempo2); // fichier
}
}
-
+
// érire codebase="chemin"
if ((opt->getmode & 1) && (ptr>0)) {
- char tempo4[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo4[HTS_URLMAXSIZE*2];
tempo4[0]='\0';
-
+
if (strnotempty(tempo_pat)) {
HT_ADD("codebase=\"");
- HT_ADD(tempo_pat);
+ HT_ADD_HTMLESCAPED(tempo_pat);
HT_ADD("\" ");
}
-
+
strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
HT_ADD(tempo4); // refresh code="
}
}
//lastsaved=adr; // dernier écrit+1
}
-
+
if ((opt->getmode & 1) && (ptr>0)) {
// écrire le lien modifié, relatif
- HT_ADD(tempo);
-
+ // Note: escape all chars, even >127 (no UTF)
+ HT_ADD_HTMLESCAPED_FULL(tempo);
+
// Add query-string, for informational purpose only
// Useless, because all parameters-pages are saved into different targets
if (opt->includequery) {
char* a=strchr(lien,'?');
if (a) {
- HT_ADD(a);
+ HT_ADD_HTMLESCAPED(a);
}
}
}
@@ -2580,8 +2753,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
} // sinon le lien sera écrit normalement
-
-
+
+
#if 0
if (fexist(save)) { // le fichier existe..
adr[0]='\0';
@@ -2592,7 +2765,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
#endif
-
+
/* Security check */
if (strlen(save) >= HTS_URLMAXSIZE) {
adr[0]='\0';
@@ -2601,7 +2774,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
test_flush;
}
}
-
+
if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) { // si le fichier n'existe pas, ajouter à la liste
// n'y a-t-il pas trop de liens?
if (lien_tot+1 >= lien_max-4) { // trop de liens!
@@ -2614,10 +2787,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
return -1;
-
+
} else { // noter le lien sur la listes des liens à charger
int pass_fix,dejafait=0;
-
+
// Calculer la priorité de ce lien
if ((opt->getmode & 4)==0) { // traiter html après
pass_fix=0;
@@ -2627,7 +2800,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
else
pass_fix=max(0,numero_passe); // priorité normale
}
-
+
/* If the file seems to be an html file, get depth-1 */
/*
if (strnotempty(save)) {
@@ -2638,7 +2811,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
*/
-
+
// vérifier que le lien n'a pas déja été noté
// si c'est le cas, alors il faut s'assurer que la priorité associée
// au fichier est la plus grande des deux priorités
@@ -2653,9 +2826,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
strcmp(adr, liens[i]->adr) != 0
|| strcmp(fil, liens[i]->fil) != 0
) {
- fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil);
- test_flush;
- }
+ fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil);
+ test_flush;
+ }
}
liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
dejafait=1;
@@ -2676,7 +2849,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
}
#endif
-
+
// le lien n'a jamais été créé.
// cette fois ci, on le crée!
if (!dejafait) {
@@ -2686,57 +2859,57 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
// enregistrer lien à charger
//liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
// même adresse: l'objet père est l'objet père de l'actuel
-
+
// DEBUT ROBOTS.TXT AJOUT
if (!just_test_it) {
if (
(!strfield(adr,"ftp://")) // non ftp
&& (!strfield(adr,"file://")) ) { // non file
- if (opt->robots) { // récupérer robots
- if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés
- if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ?
- checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide
- if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ?
- // enregistrer robots.txt (MACRO)
- liens_record(adr,"/robots.txt","","","");
- if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt->errlog) {
- fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
- test_flush;
- }
- if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
- XH_uninit; // désallocation mémoire & buffers
- return -1;
- }
- liens[lien_tot]->testmode=0; // pas mode test
- liens[lien_tot]->link_import=0; // pas mode import
- liens[lien_tot]->premier=lien_tot;
- liens[lien_tot]->precedent=ptr;
- liens[lien_tot]->depth=0;
- liens[lien_tot]->pass2=max(0,numero_passe);
- liens[lien_tot]->retry=0;
- lien_tot++; // UN LIEN DE PLUS
+ if (opt->robots) { // récupérer robots
+ if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés
+ if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ?
+ checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide
+ if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ?
+ // enregistrer robots.txt (MACRO)
+ liens_record(adr,"/robots.txt","","","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return -1;
+ }
+ liens[lien_tot]->testmode=0; // pas mode test
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->premier=lien_tot;
+ liens[lien_tot]->precedent=ptr;
+ liens[lien_tot]->depth=0;
+ liens[lien_tot]->pass2=max(0,numero_passe);
+ liens[lien_tot]->retry=0;
+ lien_tot++; // UN LIEN DE PLUS
#if DEBUG_ROBOTS
- printf("robots.txt: added file robots.txt for %s\n",adr);
+ printf("robots.txt: added file robots.txt for %s\n",adr);
#endif
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr);
- test_flush;
- }
- } else {
- if (opt->errlog) {
- fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
- test_flush;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr);
+ test_flush;
+ }
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
+ test_flush;
+ }
}
}
}
}
}
- }
}
// FIN ROBOTS.TXT AJOUT
-
+
// enregistrer (MACRO)
liens_record(adr,fil,save,former_adr,former_fil);
if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
@@ -2749,7 +2922,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
XH_uninit; // désallocation mémoire & buffers
return -1;
}
-
+
// mode test?
if (!just_test_it)
liens[lien_tot]->testmode=0; // pas mode test
@@ -2765,7 +2938,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
else // sinon l'objet père est le précédent lui même
liens[lien_tot]->premier=lien_tot;
// liens[lien_tot]->premier=ptr;
-
+
liens[lien_tot]->precedent=ptr;
// noter la priorité
if (!set_prio_to)
@@ -2775,7 +2948,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
// noter pass
liens[lien_tot]->pass2=pass_fix;
liens[lien_tot]->retry=opt->retry;
-
+
//strcpybuff(liens[lien_tot]->adr,adr);
//strcpybuff(liens[lien_tot]->fil,fil);
//strcpybuff(liens[lien_tot]->sav,save);
@@ -2787,185 +2960,203 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
}
test_flush;
}
-
+
lien_tot++; // UN LIEN DE PLUS
} else { // if !dejafait
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save);
test_flush;
}
-
+
}
-
-
+
+
} // si pas trop de liens
} // si adr[0]!='\0'
-
-
+
+
} // if adr[0]!='\0'
-
+
} // if adr[0]!='\0'
-
+
} // if strlen(lien)>0
-
+
} // if ok==0
-
- adr=eadr-1; // ** sauter
- /* We skipped bytes and skip the " : reset state */
- if (inscript) {
- inscript_state_pos = INSCRIPT_START;
+ assertf(eadr - adr >= 0); // Should not go back
+ if (eadr > adr) {
+ INCREMENT_CURRENT_ADR(eadr - 1 - adr);
}
+ // adr=eadr-1; // ** sauter
- } // if (p)
-
- } // si '<' ou '>'
-
- // plus loin
- adr++;
-
-
- /* Otimization: if we are scanning in HTML data (not in tag or script),
- then jump to the next starting tag */
- if (ptr>0) {
- if ( (!intag) /* Not in tag */
- && (!inscript) /* Not in (java)script */
- && (!incomment) /* Not in comment (<!--) */
- && (!inscript_tag) /* Not in tag with script inside */
- )
- {
- /* Not at the end */
- if (( ((int) (adr - r->adr)) ) < r->size) {
- /* Not on a starting tag yet */
- if (*adr != '<') {
- /* strchr does not well behave with null chrs.. */
- /* char* adr_next = strchr(adr,'<'); */
- char* adr_next = adr;
- while(*adr_next != '<' && (adr_next - r->adr) < r->size ) {
- adr_next++;
- }
- /* Jump to near end (index hack) */
- if (!adr_next || *adr_next != '<') {
- if (
- ( (int)(adr - r->adr) < (r->size - 4))
- &&
- (r->size > 4)
- ) {
+ /* We skipped bytes and skip the " : reset state */
+ /*if (inscript) {
+ inscript_state_pos = INSCRIPT_START;
+ }*/
+
+ } // if (p)
+
+ } // si '<' ou '>'
+
+ // plus loin
+ adr++; // automate will be checked next loop
+
+
+ /* Otimization: if we are scanning in HTML data (not in tag or script),
+ then jump to the next starting tag */
+ if (ptr>0) {
+ if ( (!intag) /* Not in tag */
+ && (!inscript) /* Not in (java)script */
+ && (!in_media) /* Not in media */
+ && (!incomment) /* Not in comment (<!--) */
+ && (!inscript_tag) /* Not in tag with script inside */
+ )
+ {
+ /* Not at the end */
+ if (( ((int) (adr - r->adr)) ) < r->size) {
+ /* Not on a starting tag yet */
+ if (*adr != '<') {
+ /* strchr does not well behave with null chrs.. */
+ /* char* adr_next = strchr(adr,'<'); */
+ char* adr_next = adr;
+ while(*adr_next != '<' && (adr_next - r->adr) < r->size ) {
+ adr_next++;
+ }
+ /* Jump to near end (index hack) */
+ if (!adr_next || *adr_next != '<') {
+ if (
+ ( (int)(adr - r->adr) < (r->size - 4))
+ &&
+ (r->size > 4)
+ ) {
adr = r->adr + r->size - 2;
}
- } else {
- adr = adr_next;
- }
+ } else {
+ adr = adr_next;
}
}
}
}
-
- // ----------
- // écrire peu à peu
- if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR;
- lastsaved=adr; // dernier écrit+1
- // ----------
-
- // Checks
- if (back_add_stats != opt->state.back_add_stats) {
- back_add_stats = opt->state.back_add_stats;
-
- // Check max time
- if (!back_checkmirror(opt)) {
- adr = r->adr + r->size;
- }
+ }
+
+ // ----------
+ // écrire peu à peu
+ if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR;
+ lastsaved=adr; // dernier écrit+1
+ // ----------
+
+ // Checks
+ if (back_add_stats != opt->state.back_add_stats) {
+ back_add_stats = opt->state.back_add_stats;
+
+ // Check max time
+ if (!back_checkmirror(opt)) {
+ adr = r->adr + r->size;
}
+ }
- // pour les stats du shell si parsing trop long
+ // pour les stats du shell si parsing trop long
#if HTS_ANALYSTE
- if (r->size)
- _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size);
- if (_hts_in_html_poll) {
- _hts_in_html_poll=0;
- // temps à attendre, et remplir autant que l'on peut le cache (backing)
- back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
- back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
-
- // Transfer rate
- engine_stats();
-
- // Refresh various stats
- HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
- HTS_STAT.stat_errors=fspc(NULL,"error");
- HTS_STAT.stat_warnings=fspc(NULL,"warning");
- HTS_STAT.stat_infos=fspc(NULL,"info");
- HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
- HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
- if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
- if (opt->errlog) {
- fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
- test_flush;
- }
- *stre->exit_xh_=1; // exit requested
- XH_uninit;
- return -1;
- //adr = r->adr + r->size; // exit
- } else if (_hts_cancel==1) {
- // adr = r->adr + r->size; // exit
- nofollow=1; // moins violent
- _hts_cancel=0;
- }
- }
-
- // refresh the backing system each 2 seconds
- if (engine_stats()) {
- back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
- back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ if (r->size)
+ _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size);
+ if (_hts_in_html_poll) {
+ _hts_in_html_poll=0;
+ // temps à attendre, et remplir autant que l'on peut le cache (backing)
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return -1;
+ //adr = r->adr + r->size; // exit
+ } else if (_hts_cancel==1) {
+ // adr = r->adr + r->size; // exit
+ nofollow=1; // moins violent
+ _hts_cancel=0;
}
+ }
+
+ // refresh the backing system each 2 seconds
+ if (engine_stats()) {
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ }
#endif
- } while(( ((int) (adr - r->adr)) ) < r->size);
+ } while(( ((int) (adr - r->adr)) ) < r->size);
#if HTS_ANALYSTE
- _hts_in_html_parsing=0; // flag
- _hts_cancel=0; // pas de cancel
+ _hts_in_html_parsing=0; // flag
+ _hts_cancel=0; // pas de cancel
#endif
- if ((opt->getmode & 1) && (ptr>0)) {
- HT_ADD_END; // achever
+ if ((opt->getmode & 1) && (ptr>0)) {
+ {
+ char* cAddr = ht_buff;
+ int cSize = ht_len;
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: postprocess-html: %s%s"LF, urladr, urlfil);
+ }
+ if (hts_htmlcheck_postprocess(&cAddr, &cSize, urladr, urlfil) == 1) {
+ ht_buff = cAddr;
+ ht_len = cSize;
+ }
}
- //
- //
- //
- } // if !error
-
-
- if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
- // sauver fichier
- //structcheck(savename);
- //filesave(opt,r->adr,r->size,savename);
-
+
+ /* Flush and save to disk */
+ HT_ADD_END; // achever
+ }
+ //
+ //
+ //
+ } // if !error
+
+
+ if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ // sauver fichier
+ //structcheck(savename);
+ //filesave(opt,r->adr,r->size,savename);
+
#if HTS_ANALYSTE
- } // analyse OK
+ } // analyse OK
#endif
- /* Apply changes */
- ENGINE_SAVE_CONTEXT();
-
- return 0;
+ /* Apply changes */
+ ENGINE_SAVE_CONTEXT();
+
+ return 0;
}
/*
- Check 301, 302, .. statuscodes (moved)
+Check 301, 302, .. statuscodes (moved)
*/
int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
/* Load engine variables */
ENGINE_LOAD_CONTEXT();
-
+
// DEBUT rattrapage des 301,302,307..
// ------------------------------------------------------------
if (!error) {
////////{
// on a chargé un fichier en plus
// if (!error) stat_loaded+=r.size;
-
+
// ------------------------------------------------------------
// Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing
// ------------------------------------------------------------
@@ -2974,187 +3165,205 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
|| (r->statuscode==303)
|| (r->statuscode==307)
) {
- //if (r->adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi]
- //int i=0;
- char *rn=NULL;
- // char* p;
-
- if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
- //if (opt->errlog) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil);
- test_flush;
- }
-
-
- {
- char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
- int get_it=0; // ne pas prendre le fichier à la même adresse par défaut
- int reponse=0;
- mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
- //
-
- strcpybuff(mov_url,r->location);
-
- // url qque -> adresse+fichier
- if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {
- int set_prio_to=0; // pas de priotité fixéd par wizard
-
- //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
- // c'est (en gros) la même URL..
- // si c'est un problème de casse dans le host c'est que le serveur est buggé
- // ("RFC says.." : host name IS case insensitive)
- if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
- // on tourne en rond
- if (strcmp(mov_fil,urlfil)==0) {
- error=1;
- get_it=-1; // ne rien faire
- if (opt->errlog) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
- test_flush;
+ //if (r->adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi]
+ //int i=0;
+ char *rn=NULL;
+ // char* p;
+
+ if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
+ //if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil);
+ test_flush;
+ }
+
+
+ {
+ char BIGSTK mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ int get_it=0; // ne pas prendre le fichier à la même adresse par défaut
+ int reponse=0;
+ mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
+ //
+
+ strcpybuff(mov_url,r->location);
+
+ // url qque -> adresse+fichier
+ if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {
+ int set_prio_to=0; // pas de priotité fixéd par wizard
+
+ // check whether URLHack is harmless or not
+ if (opt->urlhack) {
+ char BIGSTK n_adr[HTS_URLMAXSIZE*2], n_fil[HTS_URLMAXSIZE*2];
+ char BIGSTK pn_adr[HTS_URLMAXSIZE*2], pn_fil[HTS_URLMAXSIZE*2];
+ n_adr[0] = n_fil[0] = '\0';
+ (void) adr_normalized(mov_adr, n_adr);
+ (void) fil_normalized(mov_fil, n_fil);
+ (void) adr_normalized(urladr, pn_adr);
+ (void) fil_normalized(urlfil, pn_fil);
+ if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s"LF, urladr, urlfil, mov_adr, mov_fil);
+ test_flush;
+ }
}
- } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
- get_it=1;
}
- } else { // adresse différente
- if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible)
- // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
- test_flush;
- }
- // accepté?
- if (hts_acceptlink(opt,ptr,lien_tot,liens,
- mov_adr,mov_fil,
- &set_prio_to,
- NULL) != 1) { /* nouvelle adresse non refusée ? */
+
+ //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
+ // c'est (en gros) la même URL..
+ // si c'est un problème de casse dans le host c'est que le serveur est buggé
+ // ("RFC says.." : host name IS case insensitive)
+ if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ // on tourne en rond
+ if (strcmp(mov_fil,urlfil)==0) {
+ error=1;
+ get_it=-1; // ne rien faire
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
+ test_flush;
+ }
+ } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
get_it=1;
+ }
+ } else { // adresse différente
+ if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible)
+ // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
test_flush;
}
- }
- } /* sinon traité normalement */
- }
-
- //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
- if (get_it==1) {
- // court-circuiter le reste du traitement
- // et reculer pour mieux sauter
- if (opt->errlog) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
- test_flush;
- }
- // canceller lien actuel
- error=1;
- strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+ // accepté?
+ if (hts_acceptlink(opt,ptr,lien_tot,liens,
+ mov_adr,mov_fil,
+ NULL, NULL,
+ &set_prio_to,
+ NULL) != 1) { /* nouvelle adresse non refusée ? */
+ get_it=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ }
+ } /* sinon traité normalement */
+ }
+
+ //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ if (get_it==1) {
+ // court-circuiter le reste du traitement
+ // et reculer pour mieux sauter
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
+ test_flush;
+ }
+ // canceller lien actuel
+ error=1;
+ strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
#if HTS_HASH
#else
- liens[ptr]->sav_len=-1; // taille invalide
+ liens[ptr]->sav_len=-1; // taille invalide
#endif
- // noter NOUVEAU lien
- //xxc xxc
- // set_prio_to=0+1; // protection if the moved URL is an html page!!
- //xxc xxc
- {
- char mov_sav[HTS_URLMAXSIZE*2];
- // calculer lien et éventuellement modifier addresse/fichier
- if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) {
- if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja
- // enregistrer lien (MACRO) avec SAV IDENTIQUE
- liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
- //liens_record(mov_adr,mov_fil,mov_sav,"","");
- if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
- // mode test?
- liens[lien_tot]->testmode=liens[ptr]->testmode;
- liens[lien_tot]->link_import=0; // mode normal
- if (!set_prio_to)
- liens[lien_tot]->depth=liens[ptr]->depth;
- else
- liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page)
- liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
- liens[lien_tot]->retry=liens[ptr]->retry;
- liens[lien_tot]->premier=liens[ptr]->premier;
- liens[lien_tot]->precedent=liens[ptr]->precedent;
- lien_tot++;
- } else { // oups erreur, plus de mémoire!!
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt->errlog) {
- fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ // noter NOUVEAU lien
+ //xxc xxc
+ // set_prio_to=0+1; // protection if the moved URL is an html page!!
+ //xxc xxc
+ {
+ char BIGSTK mov_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) {
+ if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO) avec SAV IDENTIQUE
+ liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
+ //liens_record(mov_adr,mov_fil,mov_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ // mode test?
+ liens[lien_tot]->testmode=liens[ptr]->testmode;
+ liens[lien_tot]->link_import=0; // mode normal
+ if (!set_prio_to)
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ else
+ liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page)
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
test_flush;
}
- //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
- XH_uninit; // désallocation mémoire & buffers
- return 0;
- }
- } else {
- if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
- test_flush;
}
+
}
-
}
+
+ //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
+
+ // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
+ // sous DOS ca marche pas très bien... mais comme je suis génial url_savename()
+ // est à même de régler ce problème
}
-
- //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
-
- // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
- // sous DOS ca marche pas très bien... mais comme je suis génial url_savename()
- // est à même de régler ce problème
- }
- } // ident_url_xx
-
- if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie)
- rn=(char*) calloct(8192,1);
- if (rn!=NULL) {
- if (opt->errlog) {
- fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
- test_flush;
- }
- if (!opt->mimehtml) {
- escape_uri(mov_url);
- } else {
- char buff[HTS_URLMAXSIZE*3];
- strcpybuff(buff, mov_adr);
- strcatbuff(buff, mov_fil);
- escape_in_url(buff);
- { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
- strcpybuff(mov_url, "cid:");
- strcatbuff(mov_url, buff);
- }
- // On prépare une page qui sautera immédiatement sur la bonne URL
- // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
- strcpybuff(rn,"<HTML>"CRLF);
- strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
- strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
- strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
- strcatbuff(rn,mov_url); // URL
- strcatbuff(rn,"\">"CRLF);
- strcatbuff(rn,"<A HREF=\"");
- strcatbuff(rn,mov_url);
- strcatbuff(rn,"\">");
- strcatbuff(rn,"<B>Click here...</B></A>"CRLF);
- strcatbuff(rn,"</BODY>"CRLF);
- strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
- strcatbuff(rn,"</HTML>"CRLF);
-
- // changer la page
- if (r->adr) {
- freet(r->adr);
- r->adr=NULL;
- }
- r->adr=rn;
- r->size=strlen(r->adr);
- strcpybuff(r->contenttype,"text/html");
+ } // ident_url_xx
+
+ if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie)
+ rn=(char*) calloct(8192,1);
+ if (rn!=NULL) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
+ test_flush;
+ }
+ if (!opt->mimehtml) {
+ escape_uri(mov_url);
+ } else {
+ char BIGSTK buff[HTS_URLMAXSIZE*3];
+ strcpybuff(buff, mov_adr);
+ strcatbuff(buff, mov_fil);
+ escape_in_url(buff);
+ { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
+ strcpybuff(mov_url, "cid:");
+ strcatbuff(mov_url, buff);
+ }
+ // On prépare une page qui sautera immédiatement sur la bonne URL
+ // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
+ strcpybuff(rn,"<HTML>"CRLF);
+ strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
+ strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
+ strcatbuff(rn,mov_url); // URL
+ strcatbuff(rn,"\">"CRLF);
+ strcatbuff(rn,"<A HREF=\"");
+ strcatbuff(rn,mov_url);
+ strcatbuff(rn,"\">");
+ strcatbuff(rn,"<B>Click here...</B></A>"CRLF);
+ strcatbuff(rn,"</BODY>"CRLF);
+ strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcatbuff(rn,"</HTML>"CRLF);
+
+ // changer la page
+ if (r->adr) {
+ freet(r->adr);
+ r->adr=NULL;
}
- } // get_it==0
-
- } // bloc
- // erreur HTTP (ex: 404, not found)
- } else if (
- (r->statuscode==412)
- || (r->statuscode==416)
- ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
+ r->adr=rn;
+ r->size=strlen(r->adr);
+ strcpybuff(r->contenttype, "text/html");
+ }
+ } // get_it==0
+
+ } // bloc
+ // erreur HTTP (ex: 404, not found)
+ } else if (
+ (r->statuscode==412)
+ || (r->statuscode==416)
+ ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
if (fexist(liens[ptr]->sav)) {
remove(liens[ptr]->sav); // Eliminer
if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
@@ -3210,7 +3419,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
}
} else if (r->statuscode!=200) {
int can_retry=0;
-
+
// cas où l'on peut reessayer
// -2=timeout -3=rateout (interne à httrack)
switch(r->statuscode) {
@@ -3251,7 +3460,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
case 408: case 409: case 500: case 502: case 504: can_retry=1;
break;
}
-
+
if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0)
if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible)
if (opt->errlog) {
@@ -3278,7 +3487,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
}
test_flush;
}
-
+
// NO error in trop level
// due to the "no connection -> previous restored" hack
// This prevent the engine from wiping all data if the website has been deleted (or moved)
@@ -3290,19 +3499,19 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
}
} else {
if (strcmp(urlfil,"/robots.txt") != 0) {
- /*
- This is an error caused by a link entered by the user
- That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
- If all links entered are invalid, the session failed and we will attempt to restore
- the previous one
- Example: Try to update a website which has been deleted remotely: this may delete
- the website locally, which is really not desired (especially if the website disappeared!)
- With this hack, the engine won't wipe local files (how clever)
+ /*
+ This is an error caused by a link entered by the user
+ That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
+ If all links entered are invalid, the session failed and we will attempt to restore
+ the previous one
+ Example: Try to update a website which has been deleted remotely: this may delete
+ the website locally, which is really not desired (especially if the website disappeared!)
+ With this hack, the engine won't wipe local files (how clever)
*/
HTS_STAT.stat_errors_front++;
}
}
-
+
} else { // retry!!
if (opt->debug>0 && opt->errlog != NULL) { // on fera un alert si le retry échoue
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
@@ -3349,23 +3558,23 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre)
}
// FIN rattrapage des 301,302,307..
// ------------------------------------------------------------
-
- } // if !error
-
-
- /* Apply changes */
- ENGINE_SAVE_CONTEXT();
-
- return 0;
-
-
+
+ } // if !error
+
+
+ /* Apply changes */
+ ENGINE_SAVE_CONTEXT();
+
+ return 0;
+
+
}
/*
- Wait for next file and
- check 301, 302, .. statuscodes (moved)
+Wait for next file and
+check 301, 302, .. statuscodes (moved)
*/
int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
/* Load engine variables */
@@ -3373,15 +3582,15 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
/* */
int b;
int n;
-
+
#if BDEBUG==1
printf("\nBack test..\n");
#endif
-
+
// pause/lock files
{
int do_pause=0;
-
+
// user pause lockfile : create hts-paused.lock --> HTTrack will be paused
if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
// remove lockfile
@@ -3390,14 +3599,14 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
do_pause=1;
}
}
-
+
// after receving N bytes, pause
if (opt->fragment>0) {
if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) {
do_pause=1;
}
}
-
+
// pause?
if (do_pause) {
if ( (opt->debug>0) && (opt->log!=NULL) ) {
@@ -3409,10 +3618,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
#if HTS_ANALYSTE
{
back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
-
+
// Transfer rate
engine_stats();
-
+
// Refresh various stats
HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
HTS_STAT.stat_errors=fspc(NULL,"error");
@@ -3420,18 +3629,18 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
HTS_STAT.stat_infos=fspc(NULL,"info");
HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
+
b=0;
if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)
|| !back_checkmirror(opt)) {
- if (opt->errlog) {
- fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
- test_flush;
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
}
- *stre->exit_xh_=1; // exit requested
- XH_uninit;
- return 0;
- }
}
#endif
}
@@ -3463,7 +3672,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
//
}
// end of pause/lock files
-
+
#if HTS_ANALYSTE
// changement dans les préférences
/*
@@ -3473,10 +3682,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
}
*/
if (_hts_addurl) {
- char add_adr[HTS_URLMAXSIZE*2];
- char add_fil[HTS_URLMAXSIZE*2];
+ char BIGSTK add_adr[HTS_URLMAXSIZE*2];
+ char BIGSTK add_fil[HTS_URLMAXSIZE*2];
while(*_hts_addurl) {
- char add_url[HTS_URLMAXSIZE*2];
+ char BIGSTK add_url[HTS_URLMAXSIZE*2];
add_adr[0]=add_fil[0]=add_url[0]='\0';
if (!link_has_authority(*_hts_addurl))
strcpybuff(add_url,"http://"); // ajouter http://
@@ -3484,7 +3693,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
// ----Ajout----
// noter NOUVEAU lien
- char add_sav[HTS_URLMAXSIZE*2];
+ char BIGSTK add_sav[HTS_URLMAXSIZE*2];
// calculer lien et éventuellement modifier addresse/fichier
if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) {
if (hash_read(hash,add_sav,"",0,0)<0) { // n'existe pas déja
@@ -3520,7 +3729,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
test_flush;
}
}
-
+
}
} else {
if (opt->errlog) {
@@ -3535,16 +3744,18 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
_hts_addurl=NULL; // libérer _hts_addurl
}
// si une pause a été demandée
- if (_hts_setpause) {
+ if (_hts_setpause || back_pluggable_sockets_strict(back, back_max, opt) <= 0) {
// index du lien actuel
int b=back_index(back,back_max,urladr,urlfil,savename);
+ int prev = _hts_in_html_parsing;
if (b<0) b=0; // forcer pour les stats
- while(_hts_setpause) { // on fait la pause..
+ while(_hts_setpause || back_pluggable_sockets_strict(back, back_max, opt) <= 0) { // on fait la pause..
+ _hts_in_html_parsing = 6;
back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
-
+
// Transfer rate
engine_stats();
-
+
// Refresh various stats
HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
HTS_STAT.stat_errors=fspc(NULL,"error");
@@ -3552,7 +3763,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
HTS_STAT.stat_infos=fspc(NULL,"info");
HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
+
if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
if (opt->errlog) {
fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
@@ -3562,12 +3773,12 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
XH_uninit;
return 0;
}
- if (back_nsoc(back,back_max)==0)
- Sleep(250); // tite pause
+ Sleep(100); // pause
}
+ _hts_in_html_parsing = prev;
}
#endif
-
+
// si le fichier n'est pas en backing, le mettre..
if (!back_exist(back,back_max,urladr,urlfil,savename)) {
#if BDEBUG==1
@@ -3582,382 +3793,385 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended*
fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
test_flush;
}
-
+
}
}
-
+
#if BDEBUG==1
printf("test number of socks\n");
#endif
-
+
// ajouter autant de socket qu'on peut ajouter
n=opt->maxsoc-back_nsoc(back,back_max);
#if BDEBUG==1
printf("%d sockets available for backing\n",n);
#endif
-
+
#if HTS_ANALYSTE
if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter
#else
- if (n>0) { // si sockets libre
+ if (n>0) { // si sockets libre
#endif
- // remplir autant que l'on peut le cache (backing)
- back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
- }
-
- // index du lien actuel
- /*
- b=back_index(back,back_max,urladr,urlfil,savename);
-
- if (b>=0)
- */
- {
- // ------------------------------------------------------------
- // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE
- do {
-
- // index du lien actuel
- b=back_index(back,back_max,urladr,urlfil,savename);
+ // remplir autant que l'on peut le cache (backing)
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ }
+
+ // index du lien actuel
+ /*
+ b=back_index(back,back_max,urladr,urlfil,savename);
+
+ if (b>=0)
+ */
+ {
+ // ------------------------------------------------------------
+ // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE
+ do {
+
+ // index du lien actuel
+ b=back_index(back,back_max,urladr,urlfil,savename);
#if BDEBUG==1
- printf("back index %d, waiting\n",b);
+ printf("back index %d, waiting\n",b);
#endif
- // Continue to the loop if link still present
- if (b<0)
- continue;
-
- // Receive data
- if (back[b].status>0)
- back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
-
- // Continue to the loop if link still present
- b=back_index(back,back_max,urladr,urlfil,savename);
- if (b<0)
- continue;
-
- // Stop the mirror
- if (!back_checkmirror(opt)) {
- *stre->exit_xh_=1; // exit requested
- XH_uninit;
- return 0;
- }
-
- // And fill the backing stack
- if (back[b].status>0)
- back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
-
- // Continue to the loop if link still present
- b=back_index(back,back_max,urladr,urlfil,savename);
- if (b<0)
- continue;
-
- // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
- if ((opt->makestat) || (opt->maketrack)) {
- TStamp l=time_local();
- if ((int) (l-makestat_time) >= 60) {
- if (makestat_fp != NULL) {
- fspc(makestat_fp,"info");
- fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot);
- fflush(makestat_fp);
- *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV;
- *stre->makestat_lnk_=lien_tot;
- }
- if (stre->maketrack_fp != NULL) {
- int i;
- fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF);
- for(i=0;i<back_max;i++) {
- back_info(back,i,3,stre->maketrack_fp);
- }
- fprintf(stre->maketrack_fp,LF);
-
+ // Continue to the loop if link still present
+ if (b<0)
+ continue;
+
+ // Receive data
+ if (back[b].status>0)
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // Stop the mirror
+ if (!back_checkmirror(opt)) {
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+
+ // And fill the backing stack
+ if (back[b].status>0)
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
+ if ((opt->makestat) || (opt->maketrack)) {
+ TStamp l=time_local();
+ if ((int) (l-makestat_time) >= 60) {
+ if (makestat_fp != NULL) {
+ fspc(makestat_fp,"info");
+ fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot);
+ fflush(makestat_fp);
+ *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV;
+ *stre->makestat_lnk_=lien_tot;
+ }
+ if (stre->maketrack_fp != NULL) {
+ int i;
+ fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF);
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,3,stre->maketrack_fp);
}
- makestat_time=l;
+ fprintf(stre->maketrack_fp,LF);
+ fflush(stre->maketrack_fp);
+
}
+ makestat_time=l;
}
+ }
#if HTS_ANALYSTE
+ {
+ int i;
{
- int i;
- {
- char* s=hts_cancel_file("");
- if (strnotempty(s)) { // fichier à canceller
- for(i=0;i<back_max;i++) {
- if ((back[i].status>0)) {
- if (strcmp(back[i].url_sav,s)==0) { // ok trouvé
- if (back[i].status != 1000) {
+ char* s=hts_cancel_file("");
+ if (strnotempty(s)) { // fichier à canceller
+ for(i=0;i<back_max;i++) {
+ if ((back[i].status>0)) {
+ if (strcmp(back[i].url_sav,s)==0) { // ok trouvé
+ if (back[i].status != 1000) {
#if HTS_DEBUG_CLOSESOCK
- DEBUG_W("user cancel: deletehttp\n");
+ DEBUG_W("user cancel: deletehttp\n");
#endif
- if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
- back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-1;
- strcpybuff(back[i].r.msg,"Cancelled by User");
- back[i].status=0; // terminé
- } else // cancel ftp.. flag à 1
- back[i].stop_ftp = 1;
- }
+ if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-1;
+ strcpybuff(back[i].r.msg,"Cancelled by User");
+ back[i].status=0; // terminé
+ } else // cancel ftp.. flag à 1
+ back[i].stop_ftp = 1;
}
}
- s[0]='\0';
}
+ s[0]='\0';
}
-
- // Transfer rate
- engine_stats();
-
- // Refresh various stats
- HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
- HTS_STAT.stat_errors=fspc(NULL,"error");
- HTS_STAT.stat_warnings=fspc(NULL,"warning");
- HTS_STAT.stat_infos=fspc(NULL,"info");
- HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
- HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
- if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
- if (opt->errlog) {
- fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
- test_flush;
- }
- *stre->exit_xh_=1; // exit requested
- XH_uninit;
- return 0;
- }
}
-
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+
#endif
#if HTS_POLL
- if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) {
- TStamp tl;
- *stre->info_shell_=1;
-
- /* Toggle with ENTER */
- if (!opt->quiet) {
- if (check_stdin()) {
- char com[256];
- linput(stdin,com,200);
- if (opt->verbosedisplay==2)
- opt->verbosedisplay=1;
- else
- opt->verbosedisplay=2;
- /* Info for wrappers */
- if ( (opt->debug>0) && (opt->log!=NULL) ) {
- fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF);
- }
+ if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) {
+ TStamp tl;
+ *stre->info_shell_=1;
+
+ /* Toggle with ENTER */
+ if (!opt->quiet) {
+ if (check_stdin()) {
+ char com[256];
+ linput(stdin,com,200);
+ if (opt->verbosedisplay==2)
+ opt->verbosedisplay=1;
+ else
+ opt->verbosedisplay=2;
+ /* Info for wrappers */
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF);
+ }
#if HTS_ANALYSTE
- hts_htmlcheck_chopt(opt);
+ hts_htmlcheck_chopt(opt);
#endif
- }
}
-
- tl=time_local();
-
- // générer un message d'infos sur l'état actuel
- if (opt->shell) { // si shell
- if ((tl-*stre->last_info_shell_)>0) { // toute les 1 sec
- FILE* fp=stdout;
- int a=0;
- *stre->last_info_shell_=tl;
- if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant
- // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
- // (libérons les robots esclaves de l'internet!)
- remove(fconcat(opt->path_log,"hts-autopsy"));
- fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb");
- a=1;
+ }
+
+ tl=time_local();
+
+ // générer un message d'infos sur l'état actuel
+ if (opt->shell) { // si shell
+ if ((tl-*stre->last_info_shell_)>0) { // toute les 1 sec
+ FILE* fp=stdout;
+ int a=0;
+ *stre->last_info_shell_=tl;
+ if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant
+ // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
+ // (libérons les robots esclaves de l'internet!)
+ remove(fconcat(opt->path_log,"hts-autopsy"));
+ fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb");
+ a=1;
+ }
+ if ((*stre->info_shell_) || a) {
+ int i,j;
+
+ fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
+ fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
+ fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
+ fprintf(fp,"LINK %d"LF,lien_tot);
+ {
+ LLint mem=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].r.adr!=NULL)
+ mem+=back[i].r.size;
+ fprintf(fp,"INMEM "LLintP""LF,(LLint)mem);
}
- if ((*stre->info_shell_) || a) {
- int i,j;
-
- fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
- fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
- fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
- fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
- fprintf(fp,"LINK %d"LF,lien_tot);
- {
- LLint mem=0;
- for(i=0;i<back_max;i++)
- if (back[i].r.adr!=NULL)
- mem+=back[i].r.size;
- fprintf(fp,"INMEM "LLintP""LF,(LLint)mem);
+ for(j=0;j<2;j++) { // passes pour ready et wait
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // **
}
- for(j=0;j<2;j++) { // passes pour ready et wait
- for(i=0;i<back_max;i++) {
- back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // **
- }
- }
- fprintf(fp,LF);
- if (a)
- fclose(fp);
- io_flush;
}
+ fprintf(fp,LF);
+ if (a)
+ fclose(fp);
+ io_flush;
}
- } // si shell
-
- } // si shell ou keyboard (option)
- //
+ }
+ } // si shell
+
+ } // si shell ou keyboard (option)
+ //
#endif
- } while((b>=0) && (back[max(b,0)].status>0));
-
-
- // If link not found on the stack, it's because it has already been downloaded
- // in background
- // Then, skip it and go to the next one
- if (b<0) {
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
+ } while((b>=0) && (back[max(b,0)].status>0));
+
+
+ // If link not found on the stack, it's because it has already been downloaded
+ // in background
+ // Then, skip it and go to the next one
+ if (b<0) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
+ test_flush;
+ }
+
+ // prochain lien
+ // ptr++;
+
+ return 2; // goto jump_if_done;
+
+ }
+#if 0
+ /* FIXME - finalized HAS NO MORE THIS MEANING */
+ /* link put in cache by the backing system for memory spare - reclaim */
+ else if (back[b].finalized) {
+ assertf(back[b].r.adr == NULL);
+ /* read file in cache */
+ back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer);
+ /* ensure correct location buffer set */
+ back[b].r.location=back[b].location_buffer;
+ if (back[b].r.statuscode == -1) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil);
test_flush;
}
-
- // prochain lien
- // ptr++;
-
- return 2; // goto jump_if_done;
-
- }
- /* link put in cache by the backing system for memory spare - reclaim */
- else if (back[b].finalized) {
- assertf(back[b].r.adr == NULL);
- /* read file in cache */
- back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer);
- /* ensure correct location buffer set */
- back[b].r.location=back[b].location_buffer;
- if (back[b].r.statuscode == -1) {
- if (opt->errlog) {
- fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil);
- test_flush;
- }
- } else {
- if ( (opt->debug>1) && (opt->log!=NULL) ) {
- fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush;
- }
+ } else {
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush;
}
}
-
-
+ }
+#endif
+
#if HTS_ANALYSTE==2
#else
- //if (!opt->quiet) { // petite animation
- if (!opt->verbosedisplay) {
- if (!opt->quiet) {
- static int roll=0; /* static: ok */
- roll=(roll+1)%4;
- printf("%c\x0d",("/-\\|")[roll]);
- fflush(stdout);
- }
- } else if (opt->verbosedisplay==1) {
- if (back[b].r.statuscode==200)
- printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size);
- else
- printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode);
+ //if (!opt->quiet) { // petite animation
+ if (!opt->verbosedisplay) {
+ if (!opt->quiet) {
+ static int roll=0; /* static: ok */
+ roll=(roll+1)%4;
+ printf("%c\x0d",("/-\\|")[roll]);
fflush(stdout);
}
- //}
+ } else if (opt->verbosedisplay==1) {
+ if (back[b].r.statuscode==200)
+ printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size);
+ else
+ printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode);
+ fflush(stdout);
+ }
+ //}
#endif
- // ------------------------------------------------------------
- // Vérificateur d'intégrité
+ // ------------------------------------------------------------
+ // Vérificateur d'intégrité
#if DEBUG_CHECKINT
- _CHECKINT(&back[b],"Retour de back_wait, après le while")
- {
- int i;
- for(i=0;i<back_max;i++) {
- char si[256];
- sprintf(si,"Test global après back_wait, index %d",i);
- _CHECKINT(&back[i],si)
- }
+ _CHECKINT(&back[b],"Retour de back_wait, après le while")
+ {
+ int i;
+ for(i=0;i<back_max;i++) {
+ char si[256];
+ sprintf(si,"Test global après back_wait, index %d",i);
+ _CHECKINT(&back[i],si)
}
+ }
#endif
-
- // copier structure réponse htsblk
- memcpy(r, &(back[b].r), sizeof(htsblk));
- r->location=stre->loc_; // ne PAS copier location!! adresse, pas de buffer
- if (back[b].r.location)
- strcpybuff(r->location,back[b].r.location);
- back[b].r.adr=NULL; // ne pas faire de desalloc ensuite
-
- // libérer emplacement backing
- back_maydelete(opt,back,b);
-
- // progression
+
+ // copier structure réponse htsblk
+ memcpy(r, &(back[b].r), sizeof(htsblk));
+ r->location=stre->loc_; // ne PAS copier location!! adresse, pas de buffer
+ if (back[b].r.location)
+ strcpybuff(r->location,back[b].r.location);
+ back[b].r.adr=NULL; // ne pas faire de desalloc ensuite
+
+ // libérer emplacement backing
+ back_maydelete(opt,cache,back,b);
+
+ // progression
#if 0
- if (opt->aff_progress) {
- TStamp tl=time_local();
- if ((tl-HTS_STAT.stat_timestart)>0) {
- char s[32];
- int i=0;
- lastime=tl;
- _CLRSCR; _GOTOXY("1","1");
- printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
- while(i<minimum(back_max,99)) { // **
- if (back[i].status>=0) { // loading..
- s[0]='\0';
- if (strlen(back[i].url_fil)>16)
- strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16);
- else
- strncatbuff(s,back[i].url_fil,16);
- printf("%s : ",s);
-
- printf("[");
- if (back[i].r.totalsize>0) {
- int p;
- int j;
- p=(int)((back[i].r.size*10)/back[i].r.totalsize);
- p=minimum(10,p);
- for(j=0;j<p;j++) printf("*");
- for(j=0;j<(10-p);j++) printf("-");
- } else {
- printf(LLintP,(LLint)back[i].r.size);
- }
- printf("]");
-
- //} else if (back[i].status==0) {
- // strcpybuff(s,"ENDED");
- }
- printf("\n");
- i++;
- }
- io_flush;
+ if (opt->aff_progress) {
+ TStamp tl=time_local();
+ if ((tl-HTS_STAT.stat_timestart)>0) {
+ char s[32];
+ int i=0;
+ lastime=tl;
+ _CLRSCR; _GOTOXY("1","1");
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,99)) { // **
+ if (back[i].status>=0) { // loading..
+ s[0]='\0';
+ if (strlen(back[i].url_fil)>16)
+ strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16);
+ else
+ strncatbuff(s,back[i].url_fil,16);
+ printf("%s : ",s);
+
+ printf("[");
+ if (back[i].r.totalsize>0) {
+ int p;
+ int j;
+ p=(int)((back[i].r.size*10)/back[i].r.totalsize);
+ p=minimum(10,p);
+ for(j=0;j<p;j++) printf("*");
+ for(j=0;j<(10-p);j++) printf("-");
+ } else {
+ printf(LLintP,(LLint)back[i].r.size);
+ }
+ printf("]");
+
+ //} else if (back[i].status==0) {
+ // strcpybuff(s,"ENDED");
+ }
+ printf("\n");
+ i++;
}
+ io_flush;
}
+ }
#endif
-
- // débug graphique
+
+ // débug graphique
#if BDEBUG==2
- {
- char s[12];
- int i=0;
- _GOTOXY(1,1);
- printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
- while(i<minimum(back_max,160)) {
- if (back[i].status>0) {
- sprintf(s,"%d",back[i].r.size);
- } else if (back[i].status==0) {
- strcpybuff(s,"ENDED");
- } else
- strcpybuff(s," - ");
- while(strlen(s)<8) strcatbuff(s," ");
- printf("%s",s); io_flush;
- i++;
- }
+ {
+ char s[12];
+ int i=0;
+ _GOTOXY(1,1);
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,160)) {
+ if (back[i].status>0) {
+ sprintf(s,"%d",back[i].r.size);
+ } else if (back[i].status==0) {
+ strcpybuff(s,"ENDED");
+ } else
+ strcpybuff(s," - ");
+ while(strlen(s)<8) strcatbuff(s," ");
+ printf("%s",s); io_flush;
+ i++;
}
+ }
#endif
-
-
+
+
#if BDEBUG==1
- printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg);
+ printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg);
#endif
-
- }
- /*else {
- #if BDEBUG==1
- printf("back index error\n");
- #endif
- }
- */
-
-
-
- ENGINE_SAVE_CONTEXT();
-
- return 0;
-
-
+
+ }
+ /*else {
+ #if BDEBUG==1
+ printf("back index error\n");
+ #endif
+ }
+ */
+
+
+
+ ENGINE_SAVE_CONTEXT();
+
+ return 0;
+
+
}
diff --git a/src/htsparse.h b/src/htsparse.h
index 4efc386..d36217c 100644
--- a/src/htsparse.h
+++ b/src/htsparse.h
@@ -37,7 +37,7 @@ Please visit our Website: http://www.httrack.com
/* ------------------------------------------------------------ */
-typedef struct {
+typedef struct htsmoduleStructExtended {
/* Main object */
htsblk* r_;
@@ -84,6 +84,9 @@ typedef struct {
} htsmoduleStructExtended;
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
+
/*
Main parser, attempt to scan links inside the html/css/js file
Parameters: The public module structure, and the private module variables
@@ -105,4 +108,4 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre);
*/
int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre);
-
+#endif
diff --git a/src/htsrobots.c b/src/htsrobots.c
index 58e97fb..5ca7640 100644
--- a/src/htsrobots.c
+++ b/src/htsrobots.c
@@ -35,15 +35,14 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
#include "htsrobots.h"
/* specific definitions */
#include "htsbase.h"
#include "htslib.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
/* END specific definitions */
diff --git a/src/htsrobots.h b/src/htsrobots.h
index ef08183..195bbde 100644
--- a/src/htsrobots.h
+++ b/src/htsrobots.h
@@ -47,10 +47,11 @@ typedef struct robots_wizard {
} robots_wizard;
-// robots
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int checkrobots(robots_wizard* robots,char* adr,char* fil);
void checkrobots_free(robots_wizard* robots);
int checkrobots_set(robots_wizard* robots,char* adr,char* data);
-
+#endif
#endif
diff --git a/src/htsserver.c b/src/htsserver.c
index 0408976..0906b39 100644
--- a/src/htsserver.c
+++ b/src/htsserver.c
@@ -37,7 +37,12 @@ Please visit our Website: http://www.httrack.com
/* specific definitions */
/* specific definitions */
-#include "htsbase.h"
+
+/* Bypass internal definition protection */
+#define HTS_INTERNAL_BYTECODE
+ #include "htsbase.h"
+#undef HTS_INTERNAL_BYTECODE
+
#include "htsnet.h"
#include "htslib.h"
#include <stdio.h>
@@ -64,7 +69,12 @@ Please visit our Website: http://www.httrack.com
#include "httrack-library.h"
/* Language files */
-#include "htsinthash.h"
+
+/* Bypass internal definition protection */
+#define HTS_INTERNAL_BYTECODE
+ #include "htsinthash.h"
+#undef HTS_INTERNAL_BYTECODE
+
int NewLangStrSz=1024;
inthash NewLangStr=NULL;
int NewLangStrKeysSz=1024;
@@ -73,7 +83,6 @@ int NewLangListSz=1024;
inthash NewLangList=NULL;
/* Language files */
-
#include "htsserver.h"
char* gethomedir(void);
@@ -103,6 +112,15 @@ static void sig_brpipe( int code ) {
/* ignore */
}
+static int check_readinput_t(T_SOC soc, int timeout);
+static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout);
+static int linputsoc(T_SOC soc, char* s, int max);
+static int check_readinput(htsblk* r);
+static int linputsoc_t(T_SOC soc, char* s, int max, int timeout);
+
+
+static int linput(FILE* fp,char* s,int max);
+
// URL Link catcher
@@ -209,23 +227,8 @@ T_SOC smallserver_init(int* port,char* adr) {
// 2 - Wait for URL
-static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout) {
- if (check_readinput_t(soc, timeout)) {
- int n = 1;
- size_t size = len;
- size_t offs = 0;
- while(n > 0 && size > 0) {
- n = recv(soc, ((char*)buffer) + offs, (int) size, 0);
- if (n > 0) {
- offs += n;
- size -= n;
- }
- }
- return (int)offs;
- }
- return -1;
-}
+// check if data is available
// smallserver
// returns 0 if error
@@ -241,20 +244,6 @@ typedef struct {
char* value;
} initStrElt;
-int smallserver_setkey(char* key, char* value) {
- return inthash_write(NewLangList, key, (unsigned long int)strdup(value));
-}
-int smallserver_setkeyint(char* key, LLint value) {
- char tmp[256];
- sprintf(tmp, LLintP, value);
- return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp));
-}
-int smallserver_setkeyarr(char* key, int id, char* key2, char* value) {
- char tmp[256];
- sprintf(tmp, "%s%d%s", key, id, key2);
- return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value));
-}
-
#define SET_ERROR(err) do { \
inthash_write(NewLangList, "error", (unsigned long int)strdup(err)); \
error_redirect = "/server/error.html"; \
@@ -269,6 +258,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
String headers = STRING_EMPTY;
String output = STRING_EMPTY;
String tmpbuff = STRING_EMPTY;
+ String tmpbuff2 = STRING_EMPTY;
String fspath = STRING_EMPTY;
/* Load strings */
@@ -283,7 +273,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
{
char pth[1024];
char* initOn[] = { "parseall", "Cache", "ka",
- "cookies", "parsejava", "testall", "updhack", "index", NULL };
+ "cookies", "parsejava", "testall", "updhack", "urlhack", "index", NULL };
initIntElt initInt[] = {
{ "filter", 4 },
{ "travel", 2 },
@@ -303,7 +293,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
};
initStrElt initStr[] = {
{ "user", "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" },
- { "footer", "<!-- Mirrored from %s%s by HTTrack Website Copier/3.x [XR&CO'2002], %s -->" },
+ { "footer", "<!-- Mirrored from %s%s by HTTrack Website Copier/3.x [XR&CO'2005], %s -->" },
{ "url2", "+*.png +*.gif +*.jpg +*.css +*.js -ad.doubleclick.net/*" },
{ NULL, NULL }
};
@@ -340,14 +330,16 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
line[0] = '\0';
buffer[0] = '\0';
- StringClear(&headers);
- StringClear(&output);
- StringClear(&tmpbuff);
- StringClear(&fspath);
- StringStrcat(&headers, "");
- StringStrcat(&output, "");
- StringStrcat(&tmpbuff, "");
- StringStrcat(&fspath, "");
+ StringClear(headers);
+ StringClear(output);
+ StringClear(tmpbuff);
+ StringClear(tmpbuff2);
+ StringClear(fspath);
+ StringStrcat(headers, "");
+ StringStrcat(output, "");
+ StringStrcat(tmpbuff, "");
+ StringStrcat(tmpbuff2, "");
+ StringStrcat(fspath, "");
memset(&dummyaddr, 0, sizeof(dummyaddr));
/* UnLock */
@@ -473,8 +465,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
unsigned long int adr = 0;
if (inthash_readptr(NewLangList, "lang", (long int *)&adr)) {
int n = 0;
- if (sscanf((char*)adr, "%d", &n) == 1 && n - 1 != LANG_T(path, -1)) {
+ if (sscanf((char*)adr, "%d", &n) == 1 && n > 0 && n - 1 != LANG_T(path, -1)) {
LANG_T(path, n - 1);
+ /* make a backup, because the GUI will override it */
+ inthash_write(NewLangList, "lang_", (unsigned long int)strdup((char*)adr));
}
}
@@ -487,24 +481,48 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
inthash_write(NewLangList, "loadprojname", (unsigned long int)NULL);
doLoad=1;
}
-
+ else if (inthash_readptr(NewLangList, "loadprojcateg", (long int *)&adr)) {
+ char* pname = (char*) adr;
+ if (*pname) {
+ inthash_write(NewLangList, "projcateg", (unsigned long int)strdup(pname));
+ }
+ inthash_write(NewLangList, "loadprojcateg", (unsigned long int)NULL);
+ }
+
+ /* intial configuration */
+ {
+ if (!inthash_read(NewLangList, "conf_file_loaded", NULL)) {
+ inthash_write(NewLangList, "conf_file_loaded", (unsigned long int)strdup("true"));
+ doLoad = 2;
+ }
+ }
+
/* path : <path>/<project> */
if (!commandRunning) {
unsigned long int adrw = 0, adrpath = 0, adrprojname = 0;
if (inthash_readptr(NewLangList, "path", (long int *)&adrpath)
&& inthash_readptr(NewLangList, "projname", (long int *)&adrprojname)) {
- StringClear(&fspath);
- StringStrcat(&fspath, (char*)adrpath);
- StringStrcat(&fspath, "/");
- StringStrcat(&fspath, (char*)adrprojname);
+ StringClear(fspath);
+ StringStrcat(fspath, (char*)adrpath);
+ StringStrcat(fspath, "/");
+ StringStrcat(fspath, (char*)adrprojname);
}
}
/* Load existing project settings */
if (doLoad) {
FILE* fp;
- StringStrcat(&fspath, "/hts-cache/winprofile.ini");
- fp = fopen(StringBuff(&fspath), "rb");
+ if (doLoad == 1) {
+ StringStrcat(fspath, "/hts-cache/winprofile.ini");
+ } else if (doLoad == 2) {
+ StringStrcpy(fspath, gethomedir());
+#ifdef _WIN32
+ StringStrcat(fspath, "/httrack.ini");
+#else
+ StringStrcat(fspath, "/.httrack.ini");
+#endif
+ }
+ fp = fopen(StringBuff(fspath), "rb");
if (fp) {
/* Read file */
while(!feof(fp)) {
@@ -579,15 +597,39 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
if (inthash_readptr(NewLangList, "command_do", (long int *)&adrcd)) {
unsigned long int adrw = 0, adrpath = 0, adrprojname = 0;
if (inthash_readptr(NewLangList, "winprofile", (long int *)&adrw)) {
- StringClear(&tmpbuff);
- StringStrcat(&tmpbuff, StringBuff(&fspath));
- StringStrcat(&tmpbuff, "/hts-cache/");
+
+ /* User general profile */
+ unsigned long int adruserprofile = 0;
+ if (inthash_readptr(NewLangList, "userprofile", (long int *)&adruserprofile)
+ && adruserprofile != 0) {
+ int count = (int) strlen((char*)adruserprofile);
+ if (count > 0) {
+ FILE* fp;
+ StringClear(tmpbuff);
+ StringStrcpy(tmpbuff, gethomedir());
+#ifdef _WIN32
+ StringStrcat(tmpbuff, "/httrack.ini");
+#else
+ StringStrcat(tmpbuff, "/.httrack.ini");
+#endif
+ fp = fopen(StringBuff(tmpbuff), "wb");
+ if (fp != NULL) {
+ (void)((int)fwrite((void*)adruserprofile, 1, count, fp));
+ fclose(fp);
+ }
+ }
+ }
+
+ /* Profile */
+ StringClear(tmpbuff);
+ StringStrcat(tmpbuff, StringBuff(fspath));
+ StringStrcat(tmpbuff, "/hts-cache/");
/* Create minimal directory structure */
- if (!structcheck(StringBuff(&tmpbuff))) {
+ if (!structcheck(StringBuff(tmpbuff))) {
FILE* fp;
- StringStrcat(&tmpbuff, "winprofile.ini");
- fp = fopen(StringBuff(&tmpbuff), "wb");
+ StringStrcat(tmpbuff, "winprofile.ini");
+ fp = fopen(StringBuff(tmpbuff), "wb");
if (fp != NULL) {
int count = (int) strlen((char*)adrw);
if ((int)fwrite((void*)adrw, 1, count, fp) == count) {
@@ -596,10 +638,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
even a bit annoying (duplicate/ghost options)
The behaviour is exactly the same as in WinHTTrack
*/
- StringClear(&tmpbuff);
- StringStrcat(&tmpbuff, StringBuff(&fspath));
- StringStrcat(&tmpbuff, "/hts-cache/doit.log");
- remove(StringBuff(&tmpbuff));
+ StringClear(tmpbuff);
+ StringStrcat(tmpbuff, StringBuff(fspath));
+ StringStrcat(tmpbuff, "/hts-cache/doit.log");
+ remove(StringBuff(tmpbuff));
/*
RUN THE SERVER
@@ -612,18 +654,18 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
}
} else {
char tmp[1024];
- sprintf(tmp, "Unable to write %d bytes in the the init file %s", count, StringBuff(&fspath));
+ sprintf(tmp, "Unable to write %d bytes in the the init file %s", count, StringBuff(fspath));
SET_ERROR(tmp);
}
fclose(fp);
} else {
char tmp[1024];
- sprintf(tmp, "Unable to create the init file %s", StringBuff(&fspath));
+ sprintf(tmp, "Unable to create the init file %s", StringBuff(fspath));
SET_ERROR(tmp);
}
} else {
char tmp[1024];
- sprintf(tmp, "Unable to create the directory structure in %s", StringBuff(&fspath));
+ sprintf(tmp, "Unable to create the directory structure in %s", StringBuff(fspath));
SET_ERROR(tmp);
}
@@ -732,22 +774,22 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
newfile = newadr;
}
}
- StringMemcat(&headers, redir, strlen(redir));
+ StringMemcat(headers, redir, strlen(redir));
{
char tmp[256];
if (strlen(file) < sizeof(tmp) - 32) {
sprintf(tmp, "Location: %s\r\n", newfile);
- StringMemcat(&headers, tmp, strlen(tmp));
+ StringMemcat(headers, tmp, strlen(tmp));
}
}
inthash_write(NewLangList, "redirect", (unsigned long int)NULL);
}
else if (is_html(file)) {
int outputmode = 0;
- StringMemcat(&headers, ok, sizeof(ok) - 1);
+ StringMemcat(headers, ok, sizeof(ok) - 1);
while(!feof(fp)) {
char* str = line;
- int prevlen = StringLength(&output);
+ int prevlen = StringLength(output);
int nocr = 0;
if (!linput(fp, line, sizeof(line) - 2)) {
*str = '\0';
@@ -828,7 +870,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
}
} else if (strcmp(name, "if-project-file-exists") == 0) {
if (strstr(pos2, "..") == NULL) {
- if (!fexist(fconcat(StringBuff(&fspath), pos2))) {
+ if (!fexist(fconcat(StringBuff(fspath), pos2))) {
outputmode = -1;
}
}
@@ -854,35 +896,17 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
unsigned long int adr = 0;
if (inthash_readptr(NewLangList, "path", (long int *)&adr)) {
char* rpath = (char*) adr;
- find_handle h;
+ //find_handle h;
if (rpath[0]) {
if (rpath[strlen(rpath)-1]=='/') {
rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */
}
}
- h = hts_findfirst(rpath);
- if (h) {
- struct topindex_chain * chain=NULL;
- struct topindex_chain * startchain=NULL;
- StringClear(&tmpbuff);
- do {
- if (hts_findisdir(h)) {
- char iname[HTS_URLMAXSIZE*2];
- strcpybuff(iname,rpath);
- strcatbuff(iname,"/");
- strcatbuff(iname,hts_findgetname(h));
- strcatbuff(iname,"/hts-cache/winprofile.ini");
- if (fexist(iname)) {
- if (StringLength(&tmpbuff) > 0) {
- StringStrcat(&tmpbuff, "\r\n");
- }
- StringStrcat(&tmpbuff, hts_findgetname(h));
- }
-
- }
- } while(hts_findnext(h));
- hts_findclose(h);
- inthash_write(NewLangList, "winprofile", (unsigned long int)StringAcquire(&tmpbuff));
+ {
+ char* profiles = hts_getcategories(rpath, 0);
+ char* categ = hts_getcategories(rpath,1 );
+ inthash_write(NewLangList, "winprofile", (unsigned long int)profiles);
+ inthash_write(NewLangList, "wincateg", (unsigned long int)categ);
}
}
} else if (strcmp(name, "copy") == 0) {
@@ -1020,27 +1044,27 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
char c;
if (sscanf(a+1, "%x", &n) == 1) {
c = (char)n;
- StringMemcat(&output, &c, 1);
+ StringMemcat(output, &c, 1);
}
a += 2;
} else if (outputmode && a[0] == '<') {
- StringStrcat(&output, "&lt;");
+ StringStrcat(output, "&lt;");
} else if (outputmode && a[0] == '>') {
- StringStrcat(&output, "&gt;");
+ StringStrcat(output, "&gt;");
} else if (outputmode && a[0] == '&') {
- StringStrcat(&output, "&amp;");
+ StringStrcat(output, "&amp;");
} else if (outputmode == 3 && a[0] == ' ') {
- StringStrcat(&output, "%20");
+ StringStrcat(output, "%20");
} else if (outputmode >= 2 && ((unsigned char)a[0]) < 32) {
char tmp[32];
sprintf(tmp, "%%%02x", (unsigned char)a[0]);
- StringStrcat(&output, tmp);
+ StringStrcat(output, tmp);
} else if (outputmode == 2 && a[0] == '%') {
- StringStrcat(&output, "%%");
+ StringStrcat(output, "%%");
} else if (outputmode == 3 && a[0] == '%') {
- StringStrcat(&output, "%25");
+ StringStrcat(output, "%25");
} else {
- StringMemcat(&output, a, 1);
+ StringMemcat(output, a, 1);
}
a++;
}
@@ -1048,108 +1072,108 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
break;
case 3:
if (*langstr) {
- StringStrcat(&output, "checked");
+ StringStrcat(output, "checked");
}
break;
default:
if (*langstr) {
int id=1;
char* fstr = langstr;
- StringClear(&tmpbuff);
+ StringClear(tmpbuff);
if (format == 2) {
- StringStrcat(&output, "<option value=1>");
+ StringStrcat(output, "<option value=1>");
} else if (format == -2) {
- StringStrcat(&output, "<option value=\"");
+ StringStrcat(output, "<option value=\"");
}
while(*fstr) {
switch(*fstr) {
case 13: break;
case 10:
if (format == 1) {
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "<br>\r\n");
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "<br>\r\n");
} else if (format == -2) {
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "\">");
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "</option>\r\n");
- StringStrcat(&output, "<option value=\"");
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "\">");
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "</option>\r\n");
+ StringStrcat(output, "<option value=\"");
} else {
char tmp[32];
sprintf(tmp, "%d", ++id);
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "</option>\r\n");
- StringStrcat(&output, "<option value=");
- StringStrcat(&output, tmp);
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "</option>\r\n");
+ StringStrcat(output, "<option value=");
+ StringStrcat(output, tmp);
if (listDefault == id) {
- StringStrcat(&output, " selected");
+ StringStrcat(output, " selected");
}
- StringStrcat(&output, ">");
+ StringStrcat(output, ">");
}
- StringClear(&tmpbuff);
+ StringClear(tmpbuff);
break;
case '<':
- StringStrcat(&tmpbuff, "&lt;");
+ StringStrcat(tmpbuff, "&lt;");
break;
case '>':
- StringStrcat(&tmpbuff, "&gt;");
+ StringStrcat(tmpbuff, "&gt;");
break;
case '&':
- StringStrcat(&tmpbuff, "&amp;");
+ StringStrcat(tmpbuff, "&amp;");
break;
default:
- StringMemcat(&tmpbuff, fstr, 1);
+ StringMemcat(tmpbuff, fstr, 1);
break;
}
fstr++;
}
if (format == 2) {
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "</option>");
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "</option>");
} else if (format == -2) {
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "\">");
- StringStrcat(&output, StringBuff(&tmpbuff));
- StringStrcat(&output, "</option>");
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "\">");
+ StringStrcat(output, StringBuff(tmpbuff));
+ StringStrcat(output, "</option>");
} else {
- StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(output, StringBuff(tmpbuff));
}
- StringClear(&tmpbuff);
+ StringClear(tmpbuff);
}
}
}
str = pos;
} else {
if (outputmode != -1) {
- StringMemcat(&output, str, 1);
+ StringMemcat(output, str, 1);
}
}
str++;
}
- if (!nocr && prevlen != StringLength(&output)) {
- StringStrcat(&output, "\r\n");
+ if (!nocr && prevlen != StringLength(output)) {
+ StringStrcat(output, "\r\n");
}
}
#ifdef _DEBUG
{
- int len = (int)strlen((char*)StringBuff(&output));
- assert(len == (int)StringLength(&output));
+ int len = (int)strlen((char*)StringBuff(output));
+ assert(len == (int)StringLength(output));
}
#endif
} else if (is_text(file)) {
- StringMemcat(&headers, ok_text, sizeof(ok_text) - 1);
+ StringMemcat(headers, ok_text, sizeof(ok_text) - 1);
while(!feof(fp)) {
int n = fread(line, 1, sizeof(line) - 2, fp);
if (n > 0) {
- StringMemcat(&output, line, n);
+ StringMemcat(output, line, n);
}
}
} else {
- StringMemcat(&headers, ok_img, sizeof(ok_img) - 1);
+ StringMemcat(headers, ok_img, sizeof(ok_img) - 1);
while(!feof(fp)) {
int n = fread(line, 1, sizeof(line) - 2, fp);
if (n > 0) {
- StringMemcat(&output, line, n);
+ StringMemcat(output, line, n);
}
}
}
@@ -1160,8 +1184,8 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
"Content-type: text/html\r\n";
char error[] =
"Page not found.\r\n";
- StringStrcat(&headers, error_hdr);
- StringStrcat(&output, error);
+ StringStrcat(headers, error_hdr);
+ StringStrcat(output, error);
//assert(file == NULL);
}
}
@@ -1172,20 +1196,20 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
"Content-type: text/html\r\n";
char error[] =
"Server error.\r\n";
- StringStrcat(&headers, error_hdr);
- StringStrcat(&output, error);
+ StringStrcat(headers, error_hdr);
+ StringStrcat(output, error);
#endif
}
{
char tmp[256];
- sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(&output));
- StringStrcat(&headers, tmp);
+ sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(output));
+ StringStrcat(headers, tmp);
}
- StringStrcat(&headers, "\r\n");
+ StringStrcat(headers, "\r\n");
if (
- (send(soc_c, StringBuff(&headers), StringLength(&headers), 0) != StringLength(&headers))
+ (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers))
||
- ( (meth == 1) && (send(soc_c, StringBuff(&output), StringLength(&output), 0) != StringLength(&output)) )
+ ( (meth == 1) && (send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output)) )
) {
#ifdef _DEBUG
//assert(FALSE);
@@ -1224,10 +1248,11 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
#endif
}
- StringFree(&headers);
- StringFree(&output);
- StringFree(&tmpbuff);
- StringFree(&fspath);
+ StringFree(headers);
+ StringFree(output);
+ StringFree(tmpbuff);
+ StringFree(tmpbuff2);
+ StringFree(fspath);
if (buffer)
free(buffer);
@@ -1270,7 +1295,21 @@ int htslang_uninit() {
return 1;
}
-int htslang_load(char* limit_to, char* path) {
+int smallserver_setkey(char* key, char* value) {
+ return inthash_write(NewLangList, key, (unsigned long int)strdup(value));
+}
+int smallserver_setkeyint(char* key, LLint value) {
+ char tmp[256];
+ sprintf(tmp, LLintP, value);
+ return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp));
+}
+int smallserver_setkeyarr(char* key, int id, char* key2, char* value) {
+ char tmp[256];
+ sprintf(tmp, "%s%d%s", key, id, key2);
+ return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value));
+}
+
+static int htslang_load(char* limit_to, char* path) {
char* hashname;
//
int selected_lang=LANG_T(path, -1);
@@ -1433,7 +1472,7 @@ int htslang_load(char* limit_to, char* path) {
}
/* NOTE : also contains the "webhttrack" hack */
-void conv_printf(char* from,char* to) {
+static void conv_printf(char* from,char* to) {
int i=0,j=0,len;
len=strlen(from);
while(i<len) {
@@ -1475,13 +1514,13 @@ void conv_printf(char* from,char* to) {
}
}
-void LANG_DELETE() {
+static void LANG_DELETE() {
inthash_delete(&NewLangStr);
inthash_delete(&NewLangStrKeys);
}
// sélection de la langue
-void LANG_INIT(char* path) {
+static void LANG_INIT(char* path) {
//CWinApp* pApp = AfxGetApp();
//if (pApp) {
int test = 0; /* pApp->GetProfileInt("Language","IntId",0); */
@@ -1489,7 +1528,7 @@ void LANG_INIT(char* path) {
//}
}
-int LANG_T(char* path, int l) {
+static int LANG_T(char* path, int l) {
if (l>=0) {
QLANG_T(l);
htslang_load(NULL, path);
@@ -1497,7 +1536,7 @@ int LANG_T(char* path, int l) {
return QLANG_T(-1); // 0=default (english)
}
-int LANG_SEARCH(char* path, char* iso) {
+static int LANG_SEARCH(char* path, char* iso) {
char lang_str[1024];
int i = 0;
int curr_lng=LANG_T(path, -1);
@@ -1516,7 +1555,7 @@ int LANG_SEARCH(char* path, char* iso) {
return found;
}
-int LANG_LIST(char* path, char* buffer) {
+static int LANG_LIST(char* path, char* buffer) {
char lang_str[1024];
int i = 0;
int curr_lng=LANG_T(path, -1);
@@ -1537,7 +1576,7 @@ int LANG_LIST(char* path, char* buffer) {
return i;
}
-int QLANG_T(int l) {
+static int QLANG_T(int l) {
static int lng=0;
if (l>=0) {
lng=l;
@@ -1545,7 +1584,7 @@ int QLANG_T(int l) {
return lng; // 0=default (english)
}
-char* LANGSEL(char* name) {
+static char* LANGSEL(char* name) {
unsigned long int adr = 0;
if (NewLangStr)
if (!inthash_read(NewLangStr,name,(long int *)&adr))
@@ -1556,7 +1595,7 @@ char* LANGSEL(char* name) {
return "";
}
-char* LANGINTKEY(char* name) {
+static char* LANGINTKEY(char* name) {
unsigned long int adr=0;
if (NewLangStrKeys)
if (!inthash_read(NewLangStrKeys,name,(long int *)&adr))
@@ -1567,123 +1606,48 @@ char* LANGINTKEY(char* name) {
return "";
}
-char* gethomedir(void) {
- char* home = getenv( "HOME" );
- if (home)
- return home;
- else
- return ".";
-}
-int linput_cpp(FILE* fp,char* s,int max) {
- int rlen=0;
- s[0]='\0';
- do {
- int ret;
- if (rlen>0)
- if (s[rlen-1]=='\\')
- s[--rlen]='\0'; // couper \ final
- // lire ligne
- ret=linput_trim(fp,s+rlen,max-rlen);
- if (ret>0)
- rlen+=ret;
- } while((s[max(rlen-1,0)]=='\\') && (rlen<max));
- return rlen;
-}
-// copy of concat
-typedef struct {
- char buff[16][HTS_URLMAXSIZE*2*2];
- int rol;
-} concat_strc;
-char* concat(const char* a,const char* b) {
- static concat_strc* strc = NULL;
- if (strc == NULL) {
- strc = (concat_strc*) calloc(16, sizeof(concat_strc));
- }
- strc->rol=((strc->rol+1)%16); // roving pointer
- strcpybuff(strc->buff[strc->rol],a);
- if (b) strcatbuff(strc->buff[strc->rol],b);
- return strc->buff[strc->rol];
-}
-#ifdef _WIN32
-char* __fconv(char* a) {
- int i;
- for(i=0;i<(int) strlen(a);i++)
- if (a[i]=='/') // convertir
- a[i]='\\';
- return a;
-}
-char* fconcat(char* a,char* b) {
- return __fconv(concat(a,b));
-}
-char* fconv(char* a) {
- return __fconv(concat(a,""));
-}
-#endif
/* *** Various functions *** */
-int fexist(char* s) {
- struct stat st;
- memset(&st, 0, sizeof(st));
- if (stat(s, &st) == 0) {
- if (S_ISREG(st.st_mode)) {
- return 1;
- }
- }
- return 0;
-}
-int linput(FILE* fp,char* s,int max) {
- int c;
- int j=0;
- do {
- c=fgetc(fp);
- if (c!=EOF) {
- switch(c) {
- case 13: break; // sauter CR
- case 10: c=-1; break;
- case 0: case 9: case 12: break; // sauter ces caractères
- default: s[j++]=(char) c; break;
- }
- }
- } while((c!=-1) && (c!=EOF) && (j<(max-1)));
- s[j]='\0';
- return j;
+static int check_readinput_t(T_SOC soc, int timeout) {
+ if (soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(soc,&fds);
+ tv.tv_sec=timeout;
+ tv.tv_usec=0;
+ select(soc + 1,&fds,NULL,NULL,&tv);
+ if (FD_ISSET(soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
}
-int linput_trim(FILE* fp,char* s,int max) {
- int rlen=0;
- char* ls=(char*) malloct(max+2);
- s[0]='\0';
- if (ls) {
- char* a;
- // lire ligne
- rlen=linput(fp,ls,max);
- if (rlen) {
- // sauter espaces et tabs en fin
- while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) )
- ls[--rlen]='\0';
- // sauter espaces en début
- a=ls;
- while((rlen>0) && ((*a==' ') || (*a=='\t'))) {
- a++;
- rlen--;
- }
- if (rlen>0) {
- memcpy(s,a,rlen); // can copy \0 chars
- s[rlen]='\0';
+static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout) {
+ if (check_readinput_t(soc, timeout)) {
+ int n = 1;
+ size_t size = len;
+ size_t offs = 0;
+ while(n > 0 && size > 0) {
+ n = recv(soc, ((char*)buffer) + offs, (int) size, 0);
+ if (n > 0) {
+ offs += n;
+ size -= n;
}
}
- //
- freet(ls);
+ return (int)offs;
}
- return rlen;
+ return -1;
}
-int linputsoc(T_SOC soc, char* s, int max) {
+static int linputsoc(T_SOC soc, char* s, int max) {
int c;
int j=0;
do {
@@ -1706,15 +1670,8 @@ int linputsoc(T_SOC soc, char* s, int max) {
return j;
}
-int linputsoc_t(T_SOC soc, char* s, int max, int timeout) {
- if (check_readinput_t(soc, timeout)) {
- return linputsoc(soc, s, max);
- }
- return -1;
-}
-
// check if data is available
-int check_readinput(htsblk* r) {
+static int check_readinput(htsblk* r) {
if (r->soc != INVALID_SOCKET) {
fd_set fds; // poll structures
struct timeval tv; // structure for select
@@ -1731,84 +1688,21 @@ int check_readinput(htsblk* r) {
return 0;
}
-// check if data is available
-int check_readinput_t(T_SOC soc, int timeout) {
- if (soc != INVALID_SOCKET) {
- fd_set fds; // poll structures
- struct timeval tv; // structure for select
- FD_ZERO(&fds);
- FD_SET(soc,&fds);
- tv.tv_sec=timeout;
- tv.tv_usec=0;
- select(soc + 1,&fds,NULL,NULL,&tv);
- if (FD_ISSET(soc,&fds))
- return 1;
- else
- return 0;
- } else
- return 0;
+static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) {
+ if (check_readinput_t(soc, timeout)) {
+ return linputsoc(soc, s, max);
+ }
+ return -1;
}
-int strfield(const char* f,const char* s) {
+/*int strfield(const char* f,const char* s) {
int r=0;
while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; }
if (*s==0)
return r;
else
return 0;
-}
-
-int ehexh(char c) {
- if ((c>='0') && (c<='9')) return c-'0';
- if ((c>='a') && (c<='f')) c-=('a'-'A');
- if ((c>='A') && (c<='F')) return (c-'A'+10);
- return 0;
-}
-
-int ehex(char* s) {
- return 16*ehexh(*s)+ehexh(*(s+1));
-}
-
-void unescapehttp(char* s, String* tempo) {
- int i;
- for (i=0;i<(int) strlen(s);i++) {
- if (s[i]=='%' && s[i+1]=='%') {
- i++;
- StringAddchar(tempo, '%');
- } else if (s[i]=='%') {
- char hc;
- i++;
- hc = (char) ehex(s+i);
- StringAddchar(tempo, (char) hc);
- i++; // sauter 2 caractères finalement
- }
- else if (s[i]=='+') {
- StringAddchar(tempo, ' ');
- }
- else
- StringAddchar(tempo, s[i]);
- }
-}
+}*/
/* same, except + */
-void unescapeini(char* s, String* tempo) {
- int i;
- char lastc=0;
- for (i=0;i<(int) strlen(s);i++) {
- if (s[i]=='%' && s[i+1]=='%') {
- i++;
- StringAddchar(tempo, lastc = '%');
- } else if (s[i]=='%') {
- char hc;
- i++;
- hc = (char) ehex(s+i);
- if (!is_retorsep(hc) || !is_retorsep(lastc)) {
- StringAddchar(tempo, lastc = (char) hc);
- }
- i++; // sauter 2 caractères finalement
- }
- else
- StringAddchar(tempo, lastc = s[i]);
- }
-}
diff --git a/src/htsserver.h b/src/htsserver.h
index 2818b34..acac908 100644
--- a/src/htsserver.h
+++ b/src/htsserver.h
@@ -41,6 +41,10 @@ Please visit our Website: http://www.httrack.com
#include "htsbasenet.h"
+/* String */
+#include "htsstrings.h"
+
+
// Fonctions
void socinput(T_SOC soc,char* s,int max);
T_SOC smallserver_init_std(int* port_prox,char* adr_prox);
@@ -70,79 +74,186 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path);
"<!-- Generated by HTTrack Website Copier -->\r\n"\
"\r\n"\
+extern int NewLangStrSz;
+extern inthash NewLangStr;
+extern int NewLangStrKeysSz;
+extern inthash NewLangStrKeys;
+extern int NewLangListSz;
+extern inthash NewLangList;
-/* String */
-
-typedef struct {
- char* buff;
- int len;
- int capa;
-} String;
-
-#define STRING_EMPTY {NULL, 0, 0}
-#define BLK_SIZE 8192
-#define StringBuff(blk) ((blk)->buff)
-#define StringLength(blk) ((blk)->len)
-#define StringCapacity(blk) ((blk)->capa)
-#define StringClear(blk) do { \
- if ((blk)->capa > 0) { \
- (blk)->buff[0] = '\0'; \
- }\
- (blk)->len = 0; \
-} while(0)
-#define StringFree(blk) do { if ((blk)->buff != NULL) { freet((blk)->buff); (blk)->buff = NULL; } } while(0)
-#define StringMemcat(blk, str, size) do { \
- if ((blk)->len + (int)(size) + 1 > (blk)->capa) { \
- (blk)->capa = (blk)->len + (size) + BLK_SIZE; \
- (blk)->buff = (char*) realloct((blk)->buff, (blk)->capa); \
- assertf((blk)->buff != NULL); \
- } \
- if ((int)(size) > 0) { \
- memcpy((blk)->buff + (blk)->len, (str), (size)); \
- (blk)->len += (size); \
- } \
- *((blk)->buff + (blk)->len) = '\0'; \
-} while(0)
-#define StringAddchar(blk, c) do { \
- char __c = (c); \
- StringMemcat(blk, &__c, 1); \
-} while(0)
-static void* StringAcquire(String* blk) {
- void* buff = blk->buff;
- blk->buff = NULL;
- blk->capa = 0;
- blk->len = 0;
- return buff;
-}
+/* Spaces: CR,LF,TAB,FF */
+#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') )
+#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) )
+#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
+#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
+#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) )
-static void StringStrcat(String* blk, char* str) {
- StringMemcat(blk, str, strlen(str));
-}
+extern int smallserver_setkey(char* key, char* value);
+extern int smallserver_setkeyint(char* key, LLint value);
+extern int smallserver_setkeyarr(char* key, int id, char* key2, char* value);
/* Language files */
-int htslang_load(char* limit_to, char* apppath);
-void conv_printf(char* from,char* to);
-void LANG_DELETE(void);
-void LANG_INIT(char* path);
-int LANG_T(char* path, int l);
-int QLANG_T(int l);
-char* LANGSEL(char* name);
-char* LANGINTKEY(char* name);
-int LANG_SEARCH(char* path, char* iso);
-int LANG_LIST(char* path, char* buffer);
+static int htslang_load(char* limit_to, char* apppath);
+static void conv_printf(char* from,char* to);
+static void LANG_DELETE(void);
+static void LANG_INIT(char* path);
+static int LANG_T(char* path, int l);
+static int QLANG_T(int l);
+static char* LANGSEL(char* name);
+static char* LANGINTKEY(char* name);
+static int LANG_SEARCH(char* path, char* iso);
+static int LANG_LIST(char* path, char* buffer);
int htslang_init(void);
int htslang_uninit(void);
-int linput_cpp(FILE* fp,char* s,int max);
-void unescapehttp(char* s, String* tempo);
-void unescapeini(char* s, String* tempo);
+/* Static definitions */
+
+static char* gethomedir(void);
+static int linput_cpp(FILE* fp,char* s,int max);
+static int linput_trim(FILE* fp,char* s,int max);
+static char* concat(const char* a,const char* b);
+static int fexist(char* s);
+static int linput(FILE* fp,char* s,int max);
+static int linputsoc_t(T_SOC soc, char* s, int max, int timeout);
+
+static char* gethomedir(void) {
+ char* home = getenv( "HOME" );
+ if (home)
+ return home;
+ else
+ return ".";
+}
+static int linput_cpp(FILE* fp,char* s,int max) {
+ int rlen=0;
+ s[0]='\0';
+ do {
+ int ret;
+ if (rlen>0)
+ if (s[rlen-1]=='\\')
+ s[--rlen]='\0'; // couper \ final
+ // lire ligne
+ ret=linput_trim(fp,s+rlen,max-rlen);
+ if (ret>0)
+ rlen+=ret;
+ } while((s[max(rlen-1,0)]=='\\') && (rlen<max));
+ return rlen;
+}
+// copy of concat
+typedef struct concat_strc {
+ char buff[16][HTS_URLMAXSIZE*2*2];
+ int rol;
+} concat_strc;
+static char* concat(const char* a,const char* b) {
+ static concat_strc* strc = NULL;
+ if (strc == NULL) {
+ strc = (concat_strc*) calloc(16, sizeof(concat_strc));
+ }
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpybuff(strc->buff[strc->rol],a);
+ if (b) strcatbuff(strc->buff[strc->rol],b);
+ return strc->buff[strc->rol];
+}
+
+static int fexist(char* s) {
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ if (stat(s, &st) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+static int linput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 0: case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+static int linput_trim(FILE* fp,char* s,int max) {
+ int rlen=0;
+ char* ls=(char*) malloct(max+2);
+ s[0]='\0';
+ if (ls) {
+ char* a;
+ // lire ligne
+ rlen=linput(fp,ls,max);
+ if (rlen) {
+ // sauter espaces et tabs en fin
+ while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) )
+ ls[--rlen]='\0';
+ // sauter espaces en début
+ a=ls;
+ while((rlen>0) && ((*a==' ') || (*a=='\t'))) {
+ a++;
+ rlen--;
+ }
+ if (rlen>0) {
+ memcpy(s,a,rlen); // can copy \0 chars
+ s[rlen]='\0';
+ }
+ }
+ //
+ freet(ls);
+ }
+ return rlen;
+}
+
+static void unescapeini(char* s, String* tempo) {
+ int i;
+ char lastc=0;
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%' && s[i+1]=='%') {
+ i++;
+ StringAddchar(*tempo, lastc = '%');
+ } else if (s[i]=='%') {
+ char hc;
+ i++;
+ hc = (char) ehex(s+i);
+ if (!is_retorsep(hc) || !is_retorsep(lastc)) {
+ StringAddchar(*tempo, lastc = (char) hc);
+ }
+ i++; // sauter 2 caractères finalement
+ }
+ else
+ StringAddchar(*tempo, lastc = s[i]);
+ }
+}
+
+#ifndef _WIN32
+#define fconv(a) (a)
+#define fconcat(a,b) concat(a,b)
+#endif
+
+#ifdef _WIN32
+static char* __fconv(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='/') // convertir
+ a[i]='\\';
+ return a;
+}
+static char* fconcat(char* a,char* b) {
+ return __fconv(concat(a,b));
+}
+static char* fconv(char* a) {
+ return __fconv(concat(a,""));
+}
+#endif
-int smallserver_setkey(char* key, char* value);
-int smallserver_setkeyint(char* key, LLint value);
-int smallserver_setkeyarr(char* key, int id, char* key2, char* value);
-
#endif
diff --git a/src/htsstrings.h b/src/htsstrings.h
new file mode 100755
index 0000000..0c4998a
--- /dev/null
+++ b/src/htsstrings.h
@@ -0,0 +1,138 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Strings */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Strings a bit safer than static buffers
+
+#ifndef HTS_STRINGS_DEFSTATIC
+#define HTS_STRINGS_DEFSTATIC
+
+typedef struct String {
+ char* buff;
+ int len;
+ int capa;
+} String;
+
+#define STRING_EMPTY {NULL, 0, 0}
+#define STRING_BLK_SIZE 256
+#define StringBuff(blk) ((blk).buff)
+#define StringLength(blk) ((blk).len)
+#define StringCapacity(blk) ((blk).capa)
+#define StringRoom(blk, size) do { \
+ if ((blk).len + (int)(size) + 1 > (blk).capa) { \
+ (blk).capa = ((blk).len + (size) + 1) * 2; \
+ (blk).buff = (char*) realloct((blk).buff, (blk).capa); \
+ assertf((blk).buff != NULL); \
+ } \
+} while(0)
+#define StringBuffN(blk, size) StringBuffN_(&(blk), size)
+static char* StringBuffN_(String* blk, int size) {
+ StringRoom(*blk, (blk->len) + size);
+ return StringBuff(*blk);
+}
+#define StringClear(blk) do { \
+ StringRoom(blk, 0); \
+ (blk).buff[0] = '\0'; \
+ (blk).len = 0; \
+} while(0)
+#define StringFree(blk) do { \
+ if ((blk).buff != NULL) { \
+ freet((blk).buff); \
+ (blk).buff = NULL; \
+ } \
+ (blk).capa = 0; \
+ (blk).len = 0; \
+} while(0)
+#define StringMemcat(blk, str, size) do { \
+ StringRoom(blk, size); \
+ if ((int)(size) > 0) { \
+ memcpy((blk).buff + (blk).len, (str), (size)); \
+ (blk).len += (size); \
+ } \
+ *((blk).buff + (blk).len) = '\0'; \
+} while(0)
+#define StringAddchar(blk, c) do { \
+ char __c = (c); \
+ StringMemcat(blk, &__c, 1); \
+} while(0)
+static void* StringAcquire(String* blk) {
+ void* buff = blk->buff;
+ blk->buff = NULL;
+ blk->capa = 0;
+ blk->len = 0;
+ return buff;
+}
+#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? strlen(str) : 0)
+#define StringStrcpy(blk, str) do { \
+ StringClear(blk); \
+ StringStrcat(blk, str); \
+} while(0)
+
+/* Tools */
+
+static int ehexh(char c) {
+ if ((c>='0') && (c<='9')) return c-'0';
+ if ((c>='a') && (c<='f')) c-=('a'-'A');
+ if ((c>='A') && (c<='F')) return (c-'A'+10);
+ return 0;
+}
+
+static int ehex(char* s) {
+ return 16*ehexh(*s)+ehexh(*(s+1));
+}
+
+static void unescapehttp(char* s, String* tempo) {
+ int i;
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%' && s[i+1]=='%') {
+ i++;
+ StringAddchar(*tempo, '%');
+ } else if (s[i]=='%') {
+ char hc;
+ i++;
+ hc = (char) ehex(s+i);
+ StringAddchar(*tempo, (char) hc);
+ i++; // sauter 2 caractères finalement
+ }
+ else if (s[i]=='+') {
+ StringAddchar(*tempo, ' ');
+ }
+ else
+ StringAddchar(*tempo, s[i]);
+ }
+}
+
+
+#endif
diff --git a/src/htsthread.c b/src/htsthread.c
index d403730..a766a40 100644
--- a/src/htsthread.c
+++ b/src/htsthread.c
@@ -34,15 +34,115 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
#include "htsglobal.h"
+#include "htsbase.h"
#include "htsthread.h"
+#if USE_BEGINTHREAD
+#if HTS_WIN
+#include <process.h>
+#endif
+#endif
+
+static int process_chain = 0;
+static PTHREAD_LOCK_TYPE process_chain_mutex;
+
+HTSEXT_API void htsthread_wait(void ) {
+ htsthread_wait_n(0);
+}
+
+HTSEXT_API void htsthread_wait_n(int n_wait) {
+#if USE_BEGINTHREAD
+ int wait = 0;
+ do {
+ htsSetLock(&process_chain_mutex, 1);
+ wait = (process_chain > n_wait );
+ htsSetLock(&process_chain_mutex, 0);
+ if (wait)
+ Sleep(100);
+ } while(wait);
+#endif
+}
+
+HTSEXT_API void htsthread_init(void ) {
+#if USE_BEGINTHREAD
+ assertf(process_chain == 0);
+ htsSetLock(&process_chain_mutex, -999);
+#endif
+}
+
+HTSEXT_API void htsthread_uninit(void ) {
+ htsthread_wait();
+#if USE_BEGINTHREAD
+ htsSetLock(&process_chain_mutex, -998);
+#endif
+}
+
+typedef struct {
+ PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * );
+ void** arglist;
+} execth_args;
+static PTHREAD_TYPE PTHREAD_TYPE_FNC execth( void * arg )
+{
+ execth_args* args = (execth_args*) arg;
+ assertf(args != NULL);
+
+ htsSetLock(&process_chain_mutex, 1);
+ process_chain++;
+ assertf(process_chain > 0);
+ htsSetLock(&process_chain_mutex, 0);
+
+ (void) args->start_address(args->arglist);
+
+ htsSetLock(&process_chain_mutex, 1);
+ process_chain--;
+ assertf(process_chain >= 0);
+ htsSetLock(&process_chain_mutex, 0);
+
+ free(arg);
+ return PTHREAD_RETURN;
+}
+
+
+HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ), unsigned stack_size, void *arglist )
+{
+ execth_args* args = (execth_args*) malloc(sizeof(execth_args));
+ assertf(args != NULL);
+ args->start_address = start_address;
+ args->arglist = arglist;
+
+ /* create a thread */
+#ifdef _WIN32
+ if (_beginthread(execth, stack_size, args) == -1) {
+ free(args);
+ return -1;
+ }
+#else
+ {
+ PTHREAD_HANDLE handle = 0;
+ int retcode;
+ retcode = pthread_create(&handle, NULL, execth, args);
+ if (retcode != 0) { /* error */
+ free(args);
+ return -1;
+ } else {
+ /* detach the thread from the main process so that is can be independent */
+ pthread_detach(handle);
+ }
+ }
+#endif
+ return 0;
+}
+
+
// Threads - emulate _beginthread under Linux/Unix using pthread_XX
// Some changes will have to be done, see PTHREAD_RETURN,PTHREAD_TYPE
#if USE_PTHREAD
#include <pthread.h> /* _beginthread, _endthread */
-
+#if 0
unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist )
{
pthread_t th;
@@ -56,6 +156,7 @@ unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_s
return 0;
}
#endif
+#endif
#if USE_BEGINTHREAD
/*
@@ -67,28 +168,49 @@ unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_s
0 unlock the mutex
[-1 check if locked (always return 0 with mutex)]
-999 initialize
-*/
+ -998 free
+ */
HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) {
#if HTS_WIN
/* lock */
- if (lock==1)
- WaitForSingleObject(*hMutex,INFINITE);
- /* unlock */
- else if (lock==0)
- ReleaseMutex(*hMutex);
- /* create */
- else if (lock==-999)
- *hMutex=CreateMutex(NULL,FALSE,NULL);
+ switch(lock) {
+ case 1: /* lock */
+ assertf(*hMutex != NULL);
+ WaitForSingleObject(*hMutex,INFINITE);
+ break;
+ case 0: /* unlock */
+ assertf(*hMutex != NULL);
+ ReleaseMutex(*hMutex);
+ break;
+ case -999: /* create */
+ *hMutex=CreateMutex(NULL,FALSE,NULL);
+ break;
+ case -998: /* destroy */
+ CloseHandle(*hMutex);
+ *hMutex = NULL;
+ break;
+ default:
+ assert(FALSE);
+ break;
+ }
#else
- /* lock */
- if (lock==1)
- pthread_mutex_lock(hMutex);
- /* unlock */
- else if (lock==0)
- pthread_mutex_unlock(hMutex);
- /* create */
- else if (lock==-999)
- pthread_mutex_init(hMutex,0);
+ switch(lock) {
+ case 1: /* lock */
+ pthread_mutex_lock(hMutex);
+ break;
+ case 0: /* unlock */
+ pthread_mutex_unlock(hMutex);
+ break;
+ case -999: /* create */
+ pthread_mutex_init(hMutex,0);
+ break;
+ case -998: /* destroy */
+ pthread_mutex_destroy(hMutex);
+ break;
+ default:
+ assert(0);
+ break;
+ }
#endif
return 0;
}
diff --git a/src/htsthread.h b/src/htsthread.h
index 326c8cb..cac48de 100644
--- a/src/htsthread.h
+++ b/src/htsthread.h
@@ -43,14 +43,23 @@ Please visit our Website: http://www.httrack.com
#endif
#if HTS_WIN
#include "windows.h"
+#ifdef _WIN32_WCE
+#ifndef HTS_CECOMPAT
+#include "cethread.h"
+#endif
+#endif
#endif
#if USE_BEGINTHREAD
#if HTS_WIN
#define PTHREAD_RETURN
-#define PTHREAD_TYPE void __cdecl
+#define PTHREAD_TYPE void
+#define PTHREAD_TYPE_FNC __cdecl
#define PTHREAD_LOCK_TYPE HANDLE
+#define PTHREAD_HANDLE HANDLE
+#define PTHREAD_WAIT_THREAD(A) do { WaitForSingleObject(A, INFINITE); CloseHandle(A); } while(0)
+
/* Useless - see '__declspec( thread )' */
/*
@@ -65,12 +74,15 @@ Please visit our Website: http://www.httrack.com
#define PTHREAD_RETURN NULL
#define PTHREAD_TYPE void*
+#define PTHREAD_TYPE_FNC
#define PTHREAD_LOCK_TYPE pthread_mutex_t
#define PTHREAD_KEY_TYPE pthread_key_t
#define PTHREAD_KEY_CREATE(ptrkey, uninit) pthread_key_create(ptrkey, uninit)
#define PTHREAD_KEY_DELETE(key) pthread_key_delete(key)
#define PTHREAD_KEY_SET(key, val, ptrtype) pthread_setspecific(key, (void*)val)
#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)pthread_getspecific(key); } while(0)
+#define PTHREAD_HANDLE pthread_t
+#define PTHREAD_WAIT_THREAD(A) do { pthread_join(A, NULL); CloseHandle(A); } while(0)
#endif
@@ -82,13 +94,24 @@ Please visit our Website: http://www.httrack.com
#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0)
#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0)
#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0)
+#define PTHREAD_HANDLE void
#endif
+/* Library internal definictions */
+HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ), unsigned stack_size, void *arglist );
+HTSEXT_API void htsthread_wait(void );
+HTSEXT_API void htsthread_wait_n(int n_wait);
+
+#ifdef HTS_INTERNAL_BYTECODE
HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock);
+HTSEXT_API void htsthread_init(void );
+HTSEXT_API void htsthread_uninit(void );
#if USE_PTHREAD
-unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist );
+// unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist );
+
+#endif
#endif
#endif
diff --git a/src/htstools.c b/src/htstools.c
index 44e5137..389bd3a 100644
--- a/src/htstools.c
+++ b/src/htstools.c
@@ -35,14 +35,16 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htstools.h"
/* specific definitions */
#include "htsbase.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <ctype.h>
+/* String */
+#include "htsstrings.h"
/* END specific definitions */
@@ -90,10 +92,14 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha
ok=-2; // non supporté
}
#if HTS_USEOPENSSL
- } else if (SSL_is_available && strfield(lien,"https://")) {
- // Note: ftp:foobar.gif is not valid
- if (ident_url_absolute(lien,adr,fil)==-1) {
- ok=-1; // erreur URL
+ } else if (strfield(lien,"https://")) {
+ if (SSL_is_available) {
+ // Note: ftp:foobar.gif is not valid
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+ } else {
+ ok=-1;
}
#endif
} else if ((scheme) && (
@@ -190,8 +196,8 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha
// créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu)
// un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif
int lienrelatif(char* s,char* link,char* curr_fil) {
- char _curr[HTS_URLMAXSIZE*2];
- char newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2];
+ char BIGSTK _curr[HTS_URLMAXSIZE*2];
+ char BIGSTK newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2];
char* curr;
//int n=0;
char* a;
@@ -325,7 +331,7 @@ void longfile_to_83(int mode,char* n83,char* save) {
max=8;
break;
case 2:
- max=30;
+ max=31;
break;
default:
max=8;
@@ -348,21 +354,20 @@ void longfile_to_83(int mode,char* n83,char* save) {
}
}
/*
- Avoid: (ISO9660, but also suitable for 8-3)
- (Thanks to jonat@cellcast.com for te hint)
- /:;?\#*~
- 0x00-0x1f and 0x80-0xff
+ Avoid: (ISO9660, but also suitable for 8-3)
+ (Thanks to jonat@cellcast.com for te hint)
+ /:;?\#*~
+ 0x00-0x1f and 0x80-0xff
*/
- for(i=0 ; i < (int) strlen(save) ; i++) {
- if (
- (strchr("/:;?\\#*~", save[i]))
- ||
- (save[i] < 32)
- ||
- (save[i] >= 127)
- ) {
- save[i]='_';
+ for(i = 0 ; save[i] != 0 ; i++) {
+ char a = save[i];
+ if (a >= 'a' && a <= 'z') {
+ a -= 'a' - 'A';
}
+ else if ( ! ( (a >= 'A' && a <= 'Z') || (a >= '0' && a <= '9') || a == '_' || a == '.') ) {
+ a = '_';
+ }
+ save[i] = a;
}
i=j=0;
@@ -475,6 +480,20 @@ HTS_INLINE int __rech_tageq(const char* adr,const char* s) {
}
return 0;
}
+
+HTS_INLINE int rech_endtoken(const char* adr, const char** start) {
+ char quote = '\0';
+ int length = 0;
+ while(is_space(*adr)) adr++;
+ if (*adr == '"' || *adr == '\'')
+ quote = *adr++;
+ *start = adr;
+ while(*adr != 0 && *adr != quote && (quote != '\0' || !is_space(*adr)) ) {
+ length++;
+ adr++;
+ }
+ return length;
+}
// same, but check begining of adr wirh s (for <object src="bar.mov" .. hotspot123="foo.html">)
HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s) {
int p;
@@ -519,7 +538,7 @@ HTS_INLINE int check_tag(char* from,const char* tag) {
int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) {
int ok=1;
if (size>0) {
- if (is_hypertext_mime(type)) {
+ if (is_hypertext_mime(type, "")) {
if (maxhtml>0) {
if (size>maxhtml)
ok=0;
@@ -535,18 +554,34 @@ int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) {
}
+static int sortTopIndexFnc(const void * a_, const void * b_) {
+ int cmp;
+ topindex_chain** a = (topindex_chain**) a_;
+ topindex_chain** b = (topindex_chain**) b_;
+ /* Category first, then name */
+ if ((cmp = (*a)->level - (*b)->level) == 0) {
+ if ((cmp = strcmpnocase((*a)->category, (*b)->category)) == 0) {
+ cmp = strcmpnocase((*a)->name, (*b)->name);
+ }
+ }
+ return cmp;
+}
+
+HTSEXT_API char* hts_getcategory(char* filename);
+
HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) {
FILE* fpo;
int retval=0;
- char rpath[1024*2];
- char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL;
+ char BIGSTK rpath[1024*2];
+ char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL,*toptemplate_bodycat=NULL;
// et templates html
toptemplate_header=readfile_or(fconcat(binpath,"templates/topindex-header.html"),HTS_INDEX_HEADER);
toptemplate_body=readfile_or(fconcat(binpath,"templates/topindex-body.html"),HTS_INDEX_BODY);
+ toptemplate_bodycat=readfile_or(fconcat(binpath,"templates/topindex-bodycat.html"),HTS_INDEX_BODYCAT);
toptemplate_footer=readfile_or(fconcat(binpath,"templates/topindex-footer.html"),HTS_INDEX_FOOTER);
- if (toptemplate_header && toptemplate_body && toptemplate_footer) {
+ if (toptemplate_header && toptemplate_body && toptemplate_footer && toptemplate_bodycat) {
strcpybuff(rpath,path);
if (rpath[0]) {
@@ -556,6 +591,7 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) {
fpo=fopen(fconcat(rpath,"/index.html"),"wb");
if (fpo) {
+ String iname = STRING_EMPTY;
find_handle h;
verif_backblue(opt,concat(rpath,"/")); // générer gif
// Header
@@ -568,16 +604,40 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) {
if (h) {
struct topindex_chain * chain=NULL;
struct topindex_chain * startchain=NULL;
+ String iname = STRING_EMPTY;
+ int chainSize = 0;
do {
if (hts_findisdir(h)) {
- char iname[HTS_URLMAXSIZE*2];
- strcpybuff(iname,rpath);
- strcatbuff(iname,"/");
- strcatbuff(iname,hts_findgetname(h));
- strcatbuff(iname,"/index.html");
- if (fexist(iname)) {
+ StringStrcpy(iname,rpath);
+ StringStrcat(iname,"/");
+ StringStrcat(iname,hts_findgetname(h));
+ StringStrcat(iname,"/index.html");
+ if (fexist(StringBuff(iname))) {
+ int level = 0;
+ char* category = NULL;
struct topindex_chain * oldchain=chain;
+
+ /* Check for an existing category */
+ StringStrcpy(iname,rpath);
+ StringStrcat(iname,"/");
+ StringStrcat(iname,hts_findgetname(h));
+ StringStrcat(iname,"/hts-cache/winprofile.ini");
+ if (fexist(StringBuff(iname))) {
+ category = hts_getcategory(StringBuff(iname));
+ if (category != NULL) {
+ if (*category == '\0') {
+ freet(category);
+ category = NULL;
+ }
+ }
+ }
+ if (category == NULL) {
+ category = strdupt("No categories");
+ level = 1;
+ }
+
chain=calloc(sizeof(struct topindex_chain), 1);
+ chainSize++;
if (!startchain) {
startchain=chain;
}
@@ -587,29 +647,63 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) {
}
chain->next=NULL;
strcpybuff(chain->name, hts_findgetname(h));
+ chain->category = category;
+ chain->level = level;
}
}
}
} while(hts_findnext(h));
hts_findclose(h);
-
- /* Build sorted index */
- chain=startchain;
- while(chain) {
- char hname[HTS_URLMAXSIZE*2];
- strcpybuff(hname,chain->name);
- escape_check_url(hname);
- fprintf(fpo,toptemplate_body,
- hname,
- chain->name
- );
-
- chain=chain->next;
+ StringFree(iname);
+
+ /* Sort chain */
+ {
+ struct topindex_chain** sortedElts = (struct topindex_chain**) calloct(sizeof(topindex_chain*), chainSize);
+ assertf(sortedElts != NULL);
+ if (sortedElts != NULL) {
+ int i;
+ char* category = "";
+
+ /* Build array */
+ struct topindex_chain * chain = startchain;
+ for(i = 0 ; i < chainSize ; i++) {
+ assertf(chain != NULL);
+ sortedElts[i] = chain;
+ chain = chain->next;
+ }
+ qsort(sortedElts, chainSize, sizeof(topindex_chain*), sortTopIndexFnc);
+
+ /* Build sorted index */
+ for(i = 0 ; i < chainSize ; i++) {
+ char BIGSTK hname[HTS_URLMAXSIZE*2];
+ strcpybuff(hname,sortedElts[i]->name);
+ escape_check_url(hname);
+
+ /* Changed category */
+ if (strcmp(category, sortedElts[i]->category) != 0) {
+ category = sortedElts[i]->category;
+ fprintf(fpo,toptemplate_bodycat, category);
+ }
+ fprintf(fpo,toptemplate_body,
+ hname,
+ sortedElts[i]->name
+ );
+ }
+
+ /* Wipe elements */
+ for(i = 0 ; i < chainSize ; i++) {
+ freet(sortedElts[i]->category);
+ freet(sortedElts[i]);
+ sortedElts[i] = NULL;
+ }
+ freet(sortedElts);
+
+ /* Return value */
+ retval=1;
+ }
}
-
-
- retval=1;
+
}
// Footer
@@ -629,10 +723,110 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) {
freet(toptemplate_body);
if (toptemplate_footer)
freet(toptemplate_footer);
+ if (toptemplate_body)
+ freet(toptemplate_body);
return retval;
}
+HTSEXT_API char* hts_getcategory(char* filename) {
+ String categ = STRING_EMPTY;
+ if (fexist(filename)) {
+ FILE* fp = fopen(filename, "rb");
+ if (fp != NULL) {
+ int done=0;
+ while(!feof(fp) && !done) {
+ char BIGSTK line[1024];
+ int n = linput(fp, line, sizeof(line) - 2);
+ if (n > 0) {
+ if (strfield(line, "category=")) {
+ unescapehttp(line+9, &categ);
+ done=1;
+ }
+ }
+ }
+ fclose(fp);
+ }
+ }
+ return StringBuff(categ);
+}
+
+HTSEXT_API char* hts_getcategories(char* path, int type) {
+ String categ = STRING_EMPTY;
+ String profiles = STRING_EMPTY;
+ char* rpath = path;
+ find_handle h;
+ inthash hashCateg = NULL;
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]=='/') {
+ rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */
+ }
+ }
+ h = hts_findfirst(rpath);
+ if (h) {
+ struct topindex_chain * chain=NULL;
+ struct topindex_chain * startchain=NULL;
+ String iname = STRING_EMPTY;
+ if (type == 1) {
+ hashCateg = inthash_new(127);
+ StringStrcat(categ, "Test category 1");
+ StringStrcat(categ, "\r\nTest category 2");
+ }
+ do {
+ if (hts_findisdir(h)) {
+ char BIGSTK line2[1024];
+ StringStrcpy(iname,rpath);
+ StringStrcat(iname,"/");
+ StringStrcat(iname,hts_findgetname(h));
+ StringStrcat(iname,"/hts-cache/winprofile.ini");
+ if (fexist(StringBuff(iname))) {
+ if (type == 1) {
+ FILE* fp = fopen(StringBuff(iname), "rb");
+ if (fp != NULL) {
+ int done=0;
+ while(!feof(fp) && !done) {
+ int n = linput(fp, line2, sizeof(line2) - 2);
+ if (n > 0) {
+ if (strfield(line2, "category=")) {
+ if (*(line2+9)) {
+ if (!inthash_read(hashCateg, line2+9, NULL)) {
+ inthash_write(hashCateg, line2+9, 0);
+ if (StringLength(categ) > 0) {
+ StringStrcat(categ, "\r\n");
+ }
+ unescapehttp(line2+9, &categ);
+ }
+ }
+ done=1;
+ }
+ }
+ }
+ line2[0] = '\0';
+ fclose(fp);
+ }
+ } else {
+ if (StringLength(profiles) > 0) {
+ StringStrcat(profiles, "\r\n");
+ }
+ StringStrcat(profiles, hts_findgetname(h));
+ }
+ }
+
+ }
+ } while(hts_findnext(h));
+ hts_findclose(h);
+ StringFree(iname);
+ }
+ if (hashCateg) {
+ inthash_delete(&hashCateg);
+ hashCateg = NULL;
+ }
+ if (type == 1)
+ return StringBuff(categ);
+ else
+ return StringBuff(profiles);
+}
+
@@ -658,14 +852,14 @@ HTSEXT_API find_handle hts_findfirst(char* path) {
memset(find, 0, sizeof(find_handle_struct));
#if HTS_WIN
{
- char rpath[1024*2];
+ char BIGSTK rpath[1024*2];
strcpybuff(rpath,path);
if (rpath[0]) {
if (rpath[strlen(rpath)-1]!='\\')
strcatbuff(rpath,"\\");
}
strcatbuff(rpath,"*.*");
- find->handle = FindFirstFile(rpath,&find->hdata);
+ find->handle = FindFirstFileA(rpath,&find->hdata);
if (find->handle != INVALID_HANDLE_VALUE)
return find;
}
@@ -693,7 +887,7 @@ HTSEXT_API find_handle hts_findfirst(char* path) {
HTSEXT_API int hts_findnext(find_handle find) {
if (find) {
#if HTS_WIN
- if ( (FindNextFile(find->handle,&find->hdata)))
+ if ( (FindNextFileA(find->handle,&find->hdata)))
return 1;
#else
memset(&(find->filestat), 0, sizeof(find->filestat));
diff --git a/src/htstools.h b/src/htstools.h
index e3f7dd7..c75d74d 100644
--- a/src/htstools.h
+++ b/src/htstools.h
@@ -40,8 +40,6 @@ Please visit our Website: http://www.httrack.com
#define HTSTOOLS_DEFH
/* specific definitions */
-#include <stdio.h>
-#include <stdlib.h>
#include "htsbase.h"
#include "htscore.h"
@@ -54,6 +52,33 @@ Please visit our Website: http://www.httrack.com
#include <sys/stat.h>
#endif
+#ifndef HTTRACK_DEFLIB
+
+// Portable directory find functions
+#ifdef _WIN32
+typedef struct find_handle_struct {
+ WIN32_FIND_DATAA hdata;
+ HANDLE handle;
+} find_handle_struct;
+#else
+typedef struct find_handle_struct {
+ DIR * hdir;
+ struct dirent* dirp;
+ struct stat filestat;
+ char path[2048];
+} find_handle_struct;
+#endif
+typedef find_handle_struct* find_handle;
+typedef struct topindex_chain {
+ int level; /* sort level */
+ char* category; /* category */
+ char name[2048]; /* path */
+ struct topindex_chain* next; /* next element */
+} topindex_chain ;
+#endif
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int ident_url_relatif(char *lien,char* urladr,char* urlfil,char* adr,char* fil);
int lienrelatif(char* s,char* link,char* curr);
int link_has_authority(char* lien);
@@ -84,38 +109,15 @@ HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s);
)
//HTS_INLINE int rech_tageq(const char* adr,const char* s);
HTS_INLINE int rech_sampletag(const char* adr,const char* s);
+HTS_INLINE int rech_endtoken(const char* adr, const char** start);
HTS_INLINE int check_tag(char* from,const char* tag);
int verif_backblue(httrackp* opt,char* base);
int verif_external(int nb,int test);
int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type);
-
-#ifndef HTTRACK_DEFLIB
HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath);
-#endif
-
// Portable directory find functions
-
-#ifndef HTTRACK_DEFLIB
-#ifdef _WIN32
-typedef struct {
- WIN32_FIND_DATA hdata;
- HANDLE handle;
-} find_handle_struct;
-#else
-typedef struct {
- DIR * hdir;
- struct dirent* dirp;
- struct stat filestat;
- char path[2048];
-} find_handle_struct;
-#endif
-typedef find_handle_struct* find_handle;
-typedef struct topindex_chain {
- char name[2048]; /* path */
- struct topindex_chain* next; /* next element */
-} topindex_chain ;
// Directory find functions
HTSEXT_API find_handle hts_findfirst(char* path);
HTSEXT_API int hts_findnext(find_handle find);
@@ -126,6 +128,7 @@ HTSEXT_API int hts_findgetsize(find_handle find);
HTSEXT_API int hts_findisdir(find_handle find);
HTSEXT_API int hts_findisfile(find_handle find);
HTSEXT_API int hts_findissystem(find_handle find);
+
#endif
#endif
diff --git a/src/htsweb.c b/src/htsweb.c
index a5e1902..8e33e34 100644
--- a/src/htsweb.c
+++ b/src/htsweb.c
@@ -120,7 +120,7 @@ int main(int argc, char* argv[])
if (argc < 2 || (argc % 2) != 0) {
fprintf(stderr, "** Warning: use the webhttrack frontend if available\n");
fprintf(stderr, "usage: %s <path-to-html-root-dir> [key value [key value]..]\n", argv[0]);
- fprintf(stderr, "example: %s /usr/share/httrack\n", argv[0]);
+ fprintf(stderr, "example: %s /usr/share/httrack/\n", argv[0]);
return 1;
}
@@ -200,6 +200,7 @@ int main(int argc, char* argv[])
/* launch */
ret = help_server(argv[1]);
+ htsthread_wait();
hts_uninit();
#ifdef _WIN32
@@ -210,7 +211,7 @@ int main(int argc, char* argv[])
}
static int webhttrack_runmain(int argc, char** argv);
-static PTHREAD_TYPE back_launch_cmd( void* pP ) {
+static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_cmd( void* pP ) {
char* cmd = (char*) pP;
char** argv = (char**) malloct(1024 * sizeof(char*));
int argc = 0;
@@ -267,9 +268,12 @@ static PTHREAD_TYPE back_launch_cmd( void* pP ) {
void webhttrack_main(char* cmd) {
commandRunning = 1;
- _beginthread(back_launch_cmd, 0, (void*) strdup(cmd));
+ (void)hts_newthread(back_launch_cmd, 0, (void*) strdup(cmd));
}
+/* Internal locking */
+HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock);
+
void webhttrack_lock(int lock) {
htsSetLock(&refreshMutex, lock);
}
@@ -281,6 +285,7 @@ static int webhttrack_runmain(int argc, char** argv) {
htswrap_add("start",htsshow_start);
htswrap_add("change-options",htsshow_chopt);
htswrap_add("end",htsshow_end);
+ htswrap_add("preprocess-html",htsshow_preprocesshtml);
htswrap_add("check-html",htsshow_checkhtml);
htswrap_add("loop",htsshow_loop);
htswrap_add("query",htsshow_query);
@@ -290,8 +295,10 @@ static int webhttrack_runmain(int argc, char** argv) {
htswrap_add("pause",htsshow_pause);
htswrap_add("save-file",htsshow_filesave);
htswrap_add("link-detected",htsshow_linkdetected);
+ htswrap_add("link-detected2",htsshow_linkdetected2);
htswrap_add("transfer-status",htsshow_xfrstatus);
htswrap_add("save-name",htsshow_savename);
+ htsthread_wait_n(1);
hts_uninit();
return hts_main(argc,argv);
@@ -358,6 +365,9 @@ int __cdecl htsshow_chopt(httrackp* opt) {
int __cdecl htsshow_end(void) {
return 1;
}
+int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
return 1;
}
@@ -589,6 +599,15 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
case 2:
sprintf(tmp, "purging files");
break;
+ case 3:
+ sprintf(tmp, "loading cache");
+ break;
+ case 4:
+ sprintf(tmp, "waiting (scheduler)");
+ break;
+ case 5:
+ sprintf(tmp, "waiting (throttle)");
+ break;
}
smallserver_setkey("info.currentjob", tmp);
}
@@ -643,11 +662,20 @@ void __cdecl htsshow_filesave(char* file) {
int __cdecl htsshow_linkdetected(char* link) {
return 1;
}
+int __cdecl htsshow_linkdetected2(char* link, char* start_tag) {
+ return 1;
+}
int __cdecl htsshow_xfrstatus(lien_back* back) {
return 1;
}
int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
return 1;
}
+int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) {
+ return 1;
+}
+int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) {
+ return 1;
+}
diff --git a/src/htsweb.h b/src/htsweb.h
index 272b363..82293b4 100644
--- a/src/htsweb.h
+++ b/src/htsweb.h
@@ -43,7 +43,7 @@ Please visit our Website: http://www.httrack.com
#define NStatsBuffer 14
#define MAX_LEN_INPROGRESS 40
-typedef struct {
+typedef struct t_StatsBuffer {
char name[1024];
char file[1024];
char state[256];
@@ -59,7 +59,7 @@ typedef struct {
int actived; // pour disabled
} t_StatsBuffer;
-typedef struct {
+typedef struct t_InpInfo {
int ask_refresh;
int refresh;
LLint stat_bytes;
@@ -85,6 +85,7 @@ void __cdecl htsshow_uninit(void);
int __cdecl htsshow_start(httrackp* opt);
int __cdecl htsshow_chopt(httrackp* opt);
int __cdecl htsshow_end(void);
+int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier);
int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
char* __cdecl htsshow_query(char* question);
@@ -94,17 +95,14 @@ int __cdecl htsshow_check(char* adr,char* fil,int status);
void __cdecl htsshow_pause(char* lockfile);
void __cdecl htsshow_filesave(char* file);
int __cdecl htsshow_linkdetected(char* link);
+int __cdecl htsshow_linkdetected2(char* link, char* start_tag);
int __cdecl htsshow_xfrstatus(lien_back* back);
int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
-
+int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
+int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
int main(int argc, char **argv);
void webhttrack_main(char* cmd);
void webhttrack_lock(int lock);
-#ifndef _WIN32
-#define fconv(a) (a)
-#define fconcat(a,b) concat(a,b)
-#endif
-
#endif
diff --git a/src/htswizard.c b/src/htswizard.c
index e976ffd..366a23a 100644
--- a/src/htswizard.c
+++ b/src/htswizard.c
@@ -35,14 +35,14 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htswizard.h"
#include "htsdefines.h"
/* specific definitions */
#include "htsbase.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <ctype.h>
/* END specific definitions */
@@ -91,6 +91,7 @@ retour:
int hts_acceptlink(httrackp* opt,
int ptr,int lien_tot,lien_url** liens,
char* adr,char* fil,
+ char* tag, char* attribute,
int* set_prio_to,
int* just_test_it) {
@@ -166,8 +167,8 @@ int hts_acceptlink(httrackp* opt,
{ // tester interdiction de descendre
// MODIFIE : en cas de remontée puis de redescente, il se pouvait qu'on ne puisse pas atteindre certains fichiers
// problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*..
- char tempo[HTS_URLMAXSIZE*2];
- char tempo2[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo2[HTS_URLMAXSIZE*2];
tempo[0] = tempo2[0] = '\0';
// note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
@@ -190,15 +191,17 @@ int hts_acceptlink(httrackp* opt,
// (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés))
// NEW
- if (tempo[0] != '\0' && tempo[1] != '\0') {
- if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) {
- if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
- forbidden_url=0;
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
- test_flush;
- }
- }
+ if (
+ (tempo[0] != '\0' && tempo[1] != '\0' && strchr(tempo+1,'/') == 0)
+ ||
+ (tempo2[0] != '\0' && tempo2[1] != '\0' && strchr(tempo2+1,'/') == 0)
+ ) {
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
}
}
@@ -289,8 +292,8 @@ int hts_acceptlink(httrackp* opt,
} // tester interdiction de descendre?
{ // tester interdiction de monter
- char tempo[HTS_URLMAXSIZE*2];
- char tempo2[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo2[HTS_URLMAXSIZE*2];
if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
} else {
@@ -414,8 +417,8 @@ int hts_acceptlink(httrackp* opt,
int question=1; // poser une question
int force_mirror=0; // pour mirror links
int filters_answer=0; // décision prise par les filtres
- char l[HTS_URLMAXSIZE*2];
- char lfull[HTS_URLMAXSIZE*2];
+ char BIGSTK l[HTS_URLMAXSIZE*2];
+ char BIGSTK lfull[HTS_URLMAXSIZE*2];
if (forbidden_url!=-1) question=0; // pas de question, résolu
@@ -618,7 +621,7 @@ int hts_acceptlink(httrackp* opt,
HTS_REQUEST_END;
#if HTS_ANALYSTE
{
- char tempo[HTS_URLMAXSIZE*2];
+ char BIGSTK tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
strcatbuff(tempo,adr);
strcatbuff(tempo,"/");
@@ -851,8 +854,8 @@ int hts_testlinksize(httrackp* opt,
LLint size) {
int jok=0;
if (size>=0) {
- char l[HTS_URLMAXSIZE*2];
- char lfull[HTS_URLMAXSIZE*2];
+ char BIGSTK l[HTS_URLMAXSIZE*2];
+ char BIGSTK lfull[HTS_URLMAXSIZE*2];
if (size>=0) {
LLint sz=size;
int size_flag=0;
diff --git a/src/htswizard.h b/src/htswizard.h
index 147c7b7..a36940d 100644
--- a/src/htswizard.h
+++ b/src/htswizard.h
@@ -40,12 +40,18 @@ Please visit our Website: http://www.httrack.com
#include "htscore.h"
+
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
int hts_acceptlink(httrackp* opt,
int ptr,int lien_tot,lien_url** liens,
char* adr,char* fil,
+ char* tag, char* attribute,
int* set_prio_to_0,
int* just_test_it);
int hts_testlinksize(httrackp* opt,
char* adr,char* fil,
LLint size);
#endif
+
+#endif
diff --git a/src/htswrap.c b/src/htswrap.c
index 28c4c71..3150f1d 100644
--- a/src/htswrap.c
+++ b/src/htswrap.c
@@ -35,6 +35,9 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
+
#include "htswrap.h"
#include "htshash.h"
#include "htsinthash.h"
diff --git a/src/htswrap.h b/src/htswrap.h
index b87bf11..f97157a 100644
--- a/src/htswrap.h
+++ b/src/htswrap.h
@@ -40,7 +40,8 @@ Please visit our Website: http://www.httrack.com
#include "htsglobal.h"
-#ifndef HTTRACK_DEFLIB
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
HTSEXT_API int htswrap_init(void);
HTSEXT_API int htswrap_add(char* name,void* fct);
HTSEXT_API int htswrap_free(void);
diff --git a/src/htszlib.c b/src/htszlib.c
index faf4e88..19e3abb 100644
--- a/src/htszlib.c
+++ b/src/htszlib.c
@@ -35,13 +35,12 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
+/* Internal engine bytecode */
+#define HTS_INTERNAL_BYTECODE
/* specific definitions */
-#include <stdio.h>
-#include <stdlib.h>
#include "htsbase.h"
#include "htscore.h"
-
#include "htszlib.h"
#if HTS_USEZLIB
@@ -65,7 +64,7 @@ int hts_zunpack(char* filename,char* newfile) {
if (fpout) {
int nr;
do {
- char buff[1024];
+ char BIGSTK buff[1024];
nr=gzread (gz, buff, 1024);
if (nr>0) {
size+=nr;
@@ -84,4 +83,65 @@ int hts_zunpack(char* filename,char* newfile) {
return -1;
}
+int hts_extract_meta(char* path) {
+ unzFile zFile = unzOpen(fconcat(path,"hts-cache/new.zip"));
+ zipFile zFileOut = zipOpen(fconcat(path,"hts-cache/meta.zip"), 0);
+ if (zFile != NULL && zFileOut != NULL) {
+ if (unzGoToFirstFile(zFile) == Z_OK) {
+ zip_fileinfo fi;
+ unz_file_info ufi;
+ char BIGSTK filename[HTS_URLMAXSIZE * 4];
+ char BIGSTK comment[8192];
+ int entries = 0;
+ memset(comment, 0, sizeof(comment)); // for truncated reads
+ memset(&fi, 0, sizeof(fi));
+ memset(&ufi, 0, sizeof(ufi));
+ do {
+ int readSizeHeader;
+ filename[0] = '\0';
+ comment[0] = '\0';
+
+ if (unzOpenCurrentFile(zFile) == Z_OK) {
+ if (
+ (readSizeHeader = unzGetLocalExtrafield(zFile, comment, sizeof(comment) - 2)) > 0
+ &&
+ unzGetCurrentFileInfo(zFile, &ufi, filename, sizeof(filename) - 2, NULL, 0, NULL, 0) == Z_OK
+ )
+ {
+ comment[readSizeHeader] = '\0';
+ fi.dosDate = ufi.dosDate;
+ fi.internal_fa = ufi.internal_fa;
+ fi.external_fa = ufi.external_fa;
+ if (zipOpenNewFileInZip(zFileOut,
+ filename,
+ &fi,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL, /* comment */
+ Z_DEFLATED,
+ Z_DEFAULT_COMPRESSION) == Z_OK)
+ {
+ if (zipWriteInFileInZip(zFileOut, comment, (int) strlen(comment)) != Z_OK) {
+ }
+ if (zipCloseFileInZip(zFileOut) != Z_OK) {
+ }
+ }
+ }
+ unzCloseCurrentFile(zFile);
+ }
+ } while( unzGoToNextFile(zFile) == Z_OK );
+ }
+ zipClose(zFileOut, "Meta-data extracted by HTTrack/"HTTRACK_VERSION);
+ unzClose(zFile);
+ return 1;
+ }
+ return 0;
+}
+
+#else
+
+#error HTS_USEZLIB not defined
+
#endif
diff --git a/src/htszlib.h b/src/htszlib.h
index 173d966..8f8b565 100644
--- a/src/htszlib.h
+++ b/src/htszlib.h
@@ -39,32 +39,20 @@ Please visit our Website: http://www.httrack.com
#ifndef HTS_DEFZLIB
#define HTS_DEFZLIB
-#if HTS_USEZLIB
+/* ZLib */
+#include "zlib.h"
+//#include "zutil.h"
-int hts_zunpack(char* filename,char* newfile);
-
-#define gzopen hts_ptrfunc_gzopen
-#define gzread hts_ptrfunc_gzread
-#define gzclose hts_ptrfunc_gzclose
-
-#ifdef _WIN32
-#define ZEXPORT WINAPI
-#else
-#define ZEXPORT
-#endif
-
-typedef void* voidp;
-typedef voidp gzFile;
-typedef gzFile (ZEXPORT *t_gzopen)(const char *path, const char *mode);
-typedef int (ZEXPORT *t_gzread)(gzFile file, voidp buf, unsigned len);
-typedef int (ZEXPORT *t_gzclose)(gzFile file);
+/* MiniZip */
+#include "minizip/zip.h"
+#include "minizip/unzip.h"
+#include "minizip/mztools.h"
+/* Library internal definictions */
+#ifdef HTS_INTERNAL_BYTECODE
extern int gz_is_available;
-extern t_gzopen gzopen;
-extern t_gzread gzread;
-extern t_gzclose gzclose;
-
-#endif
-
+extern int hts_zunpack(char* filename,char* newfile);
+extern int hts_extract_meta(char* path);
#endif
+#endif
diff --git a/src/httrack-library.h b/src/httrack-library.h
index aeea70f..2ee2511 100644
--- a/src/httrack-library.h
+++ b/src/httrack-library.h
@@ -57,6 +57,8 @@ HTSEXT_API const char* hts_is_available(void);
/* Other functions */
HTSEXT_API int hts_resetvar(void);
HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath);
+HTSEXT_API char* hts_getcategories(char* path, int type);
+HTSEXT_API char* hts_getcategory(char* filename);
/* Catch-URL */
HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox);
@@ -111,11 +113,17 @@ HTSEXT_API char* unescape_http_unharm(char* s, int no_high);
HTSEXT_API char* antislash_unescaped(char* s);
HTSEXT_API void escape_remove_control(char* s);
+/* Debugging */
+HTSEXT_API void hts_debug(int level);
+
/* Portable directory API */
typedef struct find_handle_struct find_handle_struct;
typedef find_handle_struct* find_handle;
+
typedef struct topindex_chain {
+ int level; /* sort level */
+ char* category; /* category */
char name[2048]; /* path */
struct topindex_chain* next; /* next element */
} topindex_chain ;
diff --git a/src/httrack.c b/src/httrack.c
index c69a600..b3accb1 100644
--- a/src/httrack.c
+++ b/src/httrack.c
@@ -41,9 +41,18 @@ Please visit our Website: http://www.httrack.com
#endif
#endif
+#include "httrack-library.h"
+
#include "htsglobal.h"
+#include "htsbase.h"
+#include "htsopt.h"
#include "httrack.h"
+/* Static definitions */
+static int fexist(char* s);
+static int linput(FILE* fp,char* s,int max);
+
+
// htswrap_add
#include "htswrap.h"
@@ -64,9 +73,6 @@ Please visit our Website: http://www.httrack.com
#include <unistd.h>
#endif
#include <ctype.h>
-#ifdef _WIN32
-//#include "Winsock.h"
-#endif
/* END specific definitions */
// ISO VT100/220 definitions
@@ -105,10 +111,10 @@ Please visit our Website: http://www.httrack.com
#define VT_CLRSCR "\33[2J"
//
#define csi(X) printf(s_csi( X ));
-void vt_clear(void) {
+static void vt_clear(void) {
printf("%s%s%s",VT_RESET,VT_CLRSCR,VT_GOTOXY("1","0"));
}
-void vt_home(void) {
+static void vt_home(void) {
printf("%s%s",VT_RESET,VT_GOTOXY("1","0"));
}
//
@@ -146,7 +152,13 @@ Log: "engine: end"
hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
Log: "engine: change-options"
- hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+ hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html");
+Log: "preprocess-html: <url>"
+
+ hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html");
+Log: "postprocess-html: <url>"
+
+hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
Log: "check-html: <url>"
hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
@@ -161,6 +173,7 @@ Log: "pause: <lockfile>"
hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+ hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2");
Log: none
hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
@@ -179,6 +192,8 @@ Log:
htswrap_add("start",htsshow_start);
htswrap_add("change-options",htsshow_chopt);
htswrap_add("end",htsshow_end);
+ htswrap_add("preprocess-html",htsshow_preprocesshtml);
+ htswrap_add("postprocess-html",htsshow_preprocesshtml);
htswrap_add("check-html",htsshow_checkhtml);
htswrap_add("loop",htsshow_loop);
htswrap_add("query",htsshow_query);
@@ -188,8 +203,11 @@ Log:
htswrap_add("pause",htsshow_pause);
htswrap_add("save-file",htsshow_filesave);
htswrap_add("link-detected",htsshow_linkdetected);
+ htswrap_add("link-detected2",htsshow_linkdetected2);
htswrap_add("transfer-status",htsshow_xfrstatus);
htswrap_add("save-name",htsshow_savename);
+ htswrap_add("send-header", htsshow_sendheader);
+ htswrap_add("receive-header", htsshow_receiveheader);
ret = hts_main(argc,argv);
if (ret) {
@@ -202,7 +220,7 @@ Log:
/* CALLBACK FUNCTIONS */
/* Initialize the Winsock */
-void __cdecl htsshow_init(void) {
+static void __cdecl htsshow_init(void) {
#ifdef _WIN32
{
WORD wVersionRequested; // requested version WinSock API
@@ -222,12 +240,12 @@ void __cdecl htsshow_init(void) {
#endif
}
-void __cdecl htsshow_uninit(void) {
+static void __cdecl htsshow_uninit(void) {
#ifdef _WIN32
WSACleanup();
#endif
}
-int __cdecl htsshow_start(httrackp* opt) {
+static int __cdecl htsshow_start(httrackp* opt) {
use_show=0;
if (opt->verbosedisplay==2) {
use_show=1;
@@ -235,16 +253,19 @@ int __cdecl htsshow_start(httrackp* opt) {
}
return 1;
}
-int __cdecl htsshow_chopt(httrackp* opt) {
+static int __cdecl htsshow_chopt(httrackp* opt) {
return htsshow_start(opt);
}
-int __cdecl htsshow_end(void) {
+static int __cdecl htsshow_end(void) {
return 1;
}
-int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+static int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) {
return 1;
}
-int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
+static int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
static TStamp prev_mytime=0; /* ok */
static t_InpInfo SInfo; /* ok */
//
@@ -436,7 +457,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
}
if (ok) {
- char s[HTS_URLMAXSIZE*2];
+ char BIGSTK s[HTS_URLMAXSIZE*2];
//
StatsBuffer[index].back=i; // index pour + d'infos
//
@@ -508,6 +529,15 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
case 2:
printf("purging files");
break;
+ case 3:
+ printf("loading cache");
+ break;
+ case 4:
+ printf("waiting (scheduler)");
+ break;
+ case 5:
+ printf("waiting (throttle)");
+ break;
}
}
printf("%s\n",VT_CLREOL);
@@ -539,19 +569,19 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
return 1;
}
-char* __cdecl htsshow_query(char* question) {
+static char* __cdecl htsshow_query(char* question) {
static char s[12]=""; /* ok */
printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question);
io_flush; linput(stdin,s,4);
return s;
}
-char* __cdecl htsshow_query2(char* question) {
+static char* __cdecl htsshow_query2(char* question) {
static char s[12]=""; /* ok */
printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question);
io_flush; linput(stdin,s,4);
return s;
}
-char* __cdecl htsshow_query3(char* question) {
+static char* __cdecl htsshow_query3(char* question) {
static char line[256]; /* ok */
do {
io_flush; linput(stdin,line,206);
@@ -559,31 +589,39 @@ char* __cdecl htsshow_query3(char* question) {
printf("ok..\n");
return line;
}
-int __cdecl htsshow_check(char* adr,char* fil,int status) {
+static int __cdecl htsshow_check(char* adr,char* fil,int status) {
return -1;
}
-void __cdecl htsshow_pause(char* lockfile) {
+static void __cdecl htsshow_pause(char* lockfile) {
while (fexist(lockfile)) {
Sleep(1000);
}
}
-void __cdecl htsshow_filesave(char* file) {
+static void __cdecl htsshow_filesave(char* file) {
}
-int __cdecl htsshow_linkdetected(char* link) {
+static int __cdecl htsshow_linkdetected(char* link) {
return 1;
}
-int __cdecl htsshow_xfrstatus(lien_back* back) {
+static int __cdecl htsshow_linkdetected2(char* link, char* start_tag) {
return 1;
}
-int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+static int __cdecl htsshow_xfrstatus(lien_back* back) {
+ return 1;
+}
+static int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+ return 1;
+}
+static int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) {
+ return 1;
+}
+static int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) {
return 1;
}
-
/* *** Various functions *** */
-int fexist(char* s) {
+static int fexist(char* s) {
struct stat st;
memset(&st, 0, sizeof(st));
if (stat(s, &st) == 0) {
@@ -594,7 +632,7 @@ int fexist(char* s) {
return 0;
}
-int linput(FILE* fp,char* s,int max) {
+static int linput(FILE* fp,char* s,int max) {
int c;
int j=0;
do {
diff --git a/src/httrack.h b/src/httrack.h
index f297e00..a3c82a4 100644
--- a/src/httrack.h
+++ b/src/httrack.h
@@ -46,9 +46,9 @@ typedef struct {
char name[1024];
char file[1024];
char state[256];
- char url_sav[HTS_URLMAXSIZE*2]; // pour cancel
- char url_adr[HTS_URLMAXSIZE*2];
- char url_fil[HTS_URLMAXSIZE*2];
+ char BIGSTK url_sav[HTS_URLMAXSIZE*2]; // pour cancel
+ char BIGSTK url_adr[HTS_URLMAXSIZE*2];
+ char BIGSTK url_fil[HTS_URLMAXSIZE*2];
LLint size;
LLint sizetot;
int offset;
@@ -79,29 +79,220 @@ typedef struct {
} t_InpInfo;
// wrappers
-void __cdecl htsshow_init(void);
-void __cdecl htsshow_uninit(void);
-int __cdecl htsshow_start(httrackp* opt);
-int __cdecl htsshow_chopt(httrackp* opt);
-int __cdecl htsshow_end(void);
-int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
-int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
-char* __cdecl htsshow_query(char* question);
-char* __cdecl htsshow_query2(char* question);
-char* __cdecl htsshow_query3(char* question);
-int __cdecl htsshow_check(char* adr,char* fil,int status);
-void __cdecl htsshow_pause(char* lockfile);
-void __cdecl htsshow_filesave(char* file);
-int __cdecl htsshow_linkdetected(char* link);
-int __cdecl htsshow_xfrstatus(lien_back* back);
-int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
-
+static void __cdecl htsshow_init(void);
+static void __cdecl htsshow_uninit(void);
+static int __cdecl htsshow_start(httrackp* opt);
+static int __cdecl htsshow_chopt(httrackp* opt);
+static int __cdecl htsshow_end(void);
+static int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier);
+static int __cdecl htsshow_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier);
+static int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+static char* __cdecl htsshow_query(char* question);
+static char* __cdecl htsshow_query2(char* question);
+static char* __cdecl htsshow_query3(char* question);
+static int __cdecl htsshow_check(char* adr,char* fil,int status);
+static void __cdecl htsshow_pause(char* lockfile);
+static void __cdecl htsshow_filesave(char* file);
+static int __cdecl htsshow_linkdetected(char* link);
+static int __cdecl htsshow_linkdetected2(char* link, char* start_tag);
+static int __cdecl htsshow_xfrstatus(lien_back* back);
+static int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+static int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
+static int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
+
int main(int argc, char **argv);
-void vt_color(int text,int back);
-void vt_clear(void);
-void vt_home(void);
+static void vt_color(int text,int back);
+static void vt_clear(void);
+static void vt_home(void);
#endif
+/* */
+
+// Engine internal variables
+typedef void (* htsErrorCallback)(char* msg, char* file, int line);
+extern HTSEXT_API htsErrorCallback htsCallbackErr;
+extern HTSEXT_API int htsMemoryFastXfr;
+/* */
+extern HTSEXT_API hts_stat_struct HTS_STAT;
+extern int _DEBUG_HEAD;
+extern FILE* ioinfo;
+
+// from htsbase.h
+
+/* protected strcat, strncat and strcpy - definitely useful */
+#define strcatbuff(A, B) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strcat(A, B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int sz = (unsigned int) strlen(A); \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(sz + szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (sz + szf + 1 < sizeof(A)) { \
+ memcpy((A) + sz, (B), szf + 1); \
+ } \
+ } \
+ } else if (szf > 0) { \
+ memcpybuff((A) + sz, (B), szf + 1); \
+ } \
+ } \
+} while(0)
+#define strncatbuff(A, B, N) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strncat(A, B, N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int sz = (unsigned int) strlen(A); \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(sz + szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (sz + szf + 1 < sizeof(A)) { \
+ memcpy((A) + sz, (B), szf); \
+ * ( (A) + sz + szf) = '\0'; \
+ } \
+ } \
+ } else if (szf > 0) { \
+ memcpybuff((A) + sz, (B), szf); \
+ * ( (A) + sz + szf) = '\0'; \
+ } \
+ } \
+} while(0)
+#define strcpybuff(A, B) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strcpy(A, B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (szf + 1 < sizeof(A)) { \
+ memcpy((A), (B), szf + 1); \
+ } else { \
+ * (A) = '\0'; \
+ } \
+ } else { \
+ * (A) = '\0'; \
+ } \
+ } else { \
+ memcpybuff((A), (B), szf + 1); \
+ } \
+ } \
+} while(0)
+#define strncpybuff(A, B, N) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strncpy(A, B, N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (szf + 1 < sizeof(A)) { \
+ memcpy((A), (B), szf); \
+ } \
+ } \
+ } else { \
+ memcpybuff((A), (B), szf); \
+ } \
+ } \
+} while(0)
+
+// emergency log
+typedef void (*t_abortLog)(char* msg, char* file, int line);
+extern HTSEXT_API t_abortLog abortLog__;
+#define abortLog(a) abortLog__(a, __FILE__, __LINE__)
+#define abortLogFmt(a) do { \
+ FILE* fp = fopen("CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \
+ if (fp) { \
+ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \
+ fprintf(fp, "Reason:\r\n"); \
+ fprintf(fp, a); \
+ fprintf(fp, "\r\n"); \
+ fflush(fp); \
+ fclose(fp); \
+ } \
+} while(0)
+
+#define _ ,
+#define abortLogFmt(a) do { \
+ FILE* fp = fopen("CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \
+ if (fp) { \
+ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \
+ fprintf(fp, "Reason:\r\n"); \
+ fprintf(fp, a); \
+ fprintf(fp, "\r\n"); \
+ fflush(fp); \
+ fclose(fp); \
+ } \
+} while(0)
+#define assertf(exp) do { \
+ if (! ( exp ) ) { \
+ abortLog("assert failed: " #exp); \
+ if (htsCallbackErr != NULL) { \
+ htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \
+ } \
+ assert(exp); \
+ abort(); \
+ } \
+} while(0)
+/* non-fatal assert */
+#define assertnf(exp) do { \
+ if (! ( exp ) ) { \
+ abortLog("assert failed: " #exp); \
+ if (htsCallbackErr != NULL) { \
+ htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \
+ } \
+ } \
+} while(0)
+
+//
+
+#define malloct(A) malloc(A)
+#define calloct(A,B) calloc((A), (B))
+#define freet(A) do { assertnf((A) != NULL); if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
+#define strdupt(A) strdup(A)
+#define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) )
+#define memcpybuff(A, B, N) memcpy((A), (B), (N))
+
#endif
diff --git a/src/md5.h b/src/md5.h
index f5e5428..12613fe 100644
--- a/src/md5.h
+++ b/src/md5.h
@@ -1,16 +1,29 @@
#ifndef MD5_H
#define MD5_H
-#ifdef __alpha
-typedef unsigned int uint32;
+#ifdef _WIN32
+#ifndef SIZEOF_LONG
+#define SIZEOF_LONG 4
+#endif
#else
+#include "config.h"
+#endif
+
+#if SIZEOF_LONG==8
+typedef unsigned int uint32;
+#elif SIZEOF_LONG==4
typedef unsigned long uint32;
+#else
+#error undefined: SIZEOF_LONG
#endif
struct MD5Context {
+ unsigned char in[64];
uint32 buf[4];
uint32 bits[2];
- unsigned char in[64];
+#ifdef _WIN32_WCE
+ uint32 pad[2];
+#endif
int doByteReverse;
};
diff --git a/src/minizip/ChangeLogUnzip b/src/minizip/ChangeLogUnzip
new file mode 100755
index 0000000..3508eb4
--- /dev/null
+++ b/src/minizip/ChangeLogUnzip
@@ -0,0 +1,55 @@
+Change in 1.00: (10 sept 03)
+- rename to 1.00
+- cosmetic code change
+
+Change in 0.22: (19 May 03)
+- crypting support (unless you define NOCRYPT)
+- append file in existing zipfile
+
+Change in 0.21: (10 Mar 03)
+- bug fixes
+
+Change in 0.17: (27 Jan 02)
+- bug fixes
+
+Change in 0.16: (19 Jan 02)
+- Support of ioapi for virtualize zip file access
+
+Change in 0.15: (19 Mar 98)
+- fix memory leak in minizip.c
+
+Change in 0.14: (10 Mar 98)
+- fix bugs in minizip.c sample for zipping big file
+- fix problem in month in date handling
+- fix bug in unzlocal_GetCurrentFileInfoInternal in unzip.c for
+ comment handling
+
+Change in 0.13: (6 Mar 98)
+- fix bugs in zip.c
+- add real minizip sample
+
+Change in 0.12: (4 Mar 98)
+- add zip.c and zip.h for creates .zip file
+- fix change_file_date in miniunz.c for Unix (Jean-loup Gailly)
+- fix miniunz.c for file without specific record for directory
+
+Change in 0.11: (3 Mar 98)
+- fix bug in unzGetCurrentFileInfo for get extra field and comment
+- enhance miniunz sample, remove the bad unztst.c sample
+
+Change in 0.10: (2 Mar 98)
+- fix bug in unzReadCurrentFile
+- rename unzip* to unz* function and structure
+- remove Windows-like hungary notation variable name
+- modify some structure in unzip.h
+- add somes comment in source
+- remove unzipGetcCurrentFile function
+- replace ZUNZEXPORT by ZEXPORT
+- add unzGetLocalExtrafield for get the local extrafield info
+- add a new sample, miniunz.c
+
+Change in 0.4: (25 Feb 98)
+- suppress the type unzipFileInZip.
+ Only on file in the zipfile can be open at the same time
+- fix somes typo in code
+- added tm_unz structure in unzip_file_info (date/time in readable format)
diff --git a/src/minizip/crypt.h b/src/minizip/crypt.h
new file mode 100644
index 0000000..9c7a89c
--- /dev/null
+++ b/src/minizip/crypt.h
@@ -0,0 +1,132 @@
+/* crypt.h -- base code for crypt/uncrypt ZIPfile
+
+
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+
+ This code is a modified version of crypting code in Infozip distribution
+
+ The encryption/decryption parts of this source code (as opposed to the
+ non-echoing password parts) were originally written in Europe. The
+ whole source package can be freely distributed, including from the USA.
+ (Prior to January 2000, re-export from the US was a violation of US law.)
+
+ This encryption code is a direct transcription of the algorithm from
+ Roger Schlafly, described by Phil Katz in the file appnote.txt. This
+ file (appnote.txt) is distributed with the PKZIP program (even in the
+ version without encryption capabilities).
+
+ If you don't need crypting in your application, just define symbols
+ NOCRYPT and NOUNCRYPT.
+
+ This code support the "Traditional PKWARE Encryption".
+
+ The new AES encryption added on Zip format by Winzip (see the page
+ http://www.winzip.com/aes_info.htm ) and PKWare PKZip 5.x Strong
+ Encryption is not supported.
+*/
+
+#define CRC32(c, b) ((*(pcrc_32_tab+(((int)(c) ^ (b)) & 0xff))) ^ ((c) >> 8))
+
+/***********************************************************************
+ * Return the next byte in the pseudo-random sequence
+ */
+static int decrypt_byte(unsigned long* pkeys, const unsigned long* pcrc_32_tab)
+{
+ unsigned temp; /* POTENTIAL BUG: temp*(temp^1) may overflow in an
+ * unpredictable manner on 16-bit systems; not a problem
+ * with any known compiler so far, though */
+
+ temp = ((unsigned)(*(pkeys+2)) & 0xffff) | 2;
+ return (int)(((temp * (temp ^ 1)) >> 8) & 0xff);
+}
+
+/***********************************************************************
+ * Update the encryption keys with the next byte of plain text
+ */
+static int update_keys(unsigned long* pkeys,const unsigned long* pcrc_32_tab,int c)
+{
+ (*(pkeys+0)) = CRC32((*(pkeys+0)), c);
+ (*(pkeys+1)) += (*(pkeys+0)) & 0xff;
+ (*(pkeys+1)) = (*(pkeys+1)) * 134775813L + 1;
+ {
+ register int keyshift = (int)((*(pkeys+1)) >> 24);
+ (*(pkeys+2)) = CRC32((*(pkeys+2)), keyshift);
+ }
+ return c;
+}
+
+
+/***********************************************************************
+ * Initialize the encryption keys and the random header according to
+ * the given password.
+ */
+static void init_keys(const char* passwd,unsigned long* pkeys,const unsigned long* pcrc_32_tab)
+{
+ *(pkeys+0) = 305419896L;
+ *(pkeys+1) = 591751049L;
+ *(pkeys+2) = 878082192L;
+ while (*passwd != '\0') {
+ update_keys(pkeys,pcrc_32_tab,(int)*passwd);
+ passwd++;
+ }
+}
+
+#define zdecode(pkeys,pcrc_32_tab,c) \
+ (update_keys(pkeys,pcrc_32_tab,c ^= decrypt_byte(pkeys,pcrc_32_tab)))
+
+#define zencode(pkeys,pcrc_32_tab,c,t) \
+ (t=decrypt_byte(pkeys,pcrc_32_tab), update_keys(pkeys,pcrc_32_tab,c), t^(c))
+
+#ifdef INCLUDECRYPTINGCODE_IFCRYPTALLOWED
+
+#define RAND_HEAD_LEN 12
+ /* "last resort" source for second part of crypt seed pattern */
+# ifndef ZCR_SEED2
+# define ZCR_SEED2 3141592654UL /* use PI as default pattern */
+# endif
+
+static int crypthead(passwd, buf, bufSize, pkeys, pcrc_32_tab, crcForCrypting)
+ const char *passwd; /* password string */
+ unsigned char *buf; /* where to write header */
+ int bufSize;
+ unsigned long* pkeys;
+ const unsigned long* pcrc_32_tab;
+ unsigned long crcForCrypting;
+{
+ int n; /* index in random header */
+ int t; /* temporary */
+ int c; /* random byte */
+ unsigned char header[RAND_HEAD_LEN-2]; /* random header */
+ static unsigned calls = 0; /* ensure different random header each time */
+
+ if (bufSize<RAND_HEAD_LEN)
+ return 0;
+
+ /* First generate RAND_HEAD_LEN-2 random bytes. We encrypt the
+ * output of rand() to get less predictability, since rand() is
+ * often poorly implemented.
+ */
+ if (++calls == 1)
+ {
+ srand((unsigned)(time(NULL) ^ ZCR_SEED2));
+ }
+ init_keys(passwd, pkeys, pcrc_32_tab);
+ for (n = 0; n < RAND_HEAD_LEN-2; n++)
+ {
+ c = (rand() >> 7) & 0xff;
+ header[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, c, t);
+ }
+ /* Encrypt random header (last two bytes is high word of crc) */
+ init_keys(passwd, pkeys, pcrc_32_tab);
+ for (n = 0; n < RAND_HEAD_LEN-2; n++)
+ {
+ buf[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, header[n], t);
+ }
+ buf[n++] = zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 16) & 0xff, t);
+ buf[n++] = zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 24) & 0xff, t);
+ return n;
+}
+
+#endif
diff --git a/src/minizip/ioapi.c b/src/minizip/ioapi.c
new file mode 100644
index 0000000..53583ed
--- /dev/null
+++ b/src/minizip/ioapi.c
@@ -0,0 +1,196 @@
+/* ioapi.c -- IO base function header for compress/uncompress .zip
+ files using zlib + zip or unzip API
+
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+*/
+
+#ifndef _WIN32_WCE
+#include <stdio.h>
+#include <stdlib.h>
+#else
+#include <stdio.h>
+//#include "celib.h"
+#endif
+#include <string.h>
+
+#include "zlib.h"
+#include "ioapi.h"
+
+
+
+/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */
+
+#ifndef SEEK_CUR
+#define SEEK_CUR 1
+#endif
+
+#ifndef SEEK_END
+#define SEEK_END 2
+#endif
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+
+voidpf ZCALLBACK fopen_file_func OF((
+ voidpf opaque,
+ const char* filename,
+ int mode));
+
+uLong ZCALLBACK fread_file_func OF((
+ voidpf opaque,
+ voidpf stream,
+ void* buf,
+ uLong size));
+
+uLong ZCALLBACK fwrite_file_func OF((
+ voidpf opaque,
+ voidpf stream,
+ const void* buf,
+ uLong size));
+
+long ZCALLBACK ftell_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+long ZCALLBACK fseek_file_func OF((
+ voidpf opaque,
+ voidpf stream,
+ uLong offset,
+ int origin));
+
+int ZCALLBACK fflush_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+int ZCALLBACK fclose_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+int ZCALLBACK ferror_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+
+voidpf ZCALLBACK fopen_file_func (opaque, filename, mode)
+ voidpf opaque;
+ const char* filename;
+ int mode;
+{
+ FILE* file = NULL;
+ const char* mode_fopen = NULL;
+ if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ)
+ mode_fopen = "rb";
+ else
+ if (mode & ZLIB_FILEFUNC_MODE_EXISTING)
+ mode_fopen = "r+b";
+ else
+ if (mode & ZLIB_FILEFUNC_MODE_CREATE)
+ mode_fopen = "wb";
+
+ if ((filename!=NULL) && (mode_fopen != NULL))
+ file = fopen(filename, mode_fopen);
+ return file;
+}
+
+
+uLong ZCALLBACK fread_file_func (opaque, stream, buf, size)
+ voidpf opaque;
+ voidpf stream;
+ void* buf;
+ uLong size;
+{
+ uLong ret;
+ ret = fread(buf, 1, (size_t)size, (FILE *)stream);
+ return ret;
+}
+
+
+uLong ZCALLBACK fwrite_file_func (opaque, stream, buf, size)
+ voidpf opaque;
+ voidpf stream;
+ const void* buf;
+ uLong size;
+{
+ uLong ret;
+ ret = fwrite(buf, 1, (size_t)size, (FILE *)stream);
+ return ret;
+}
+
+long ZCALLBACK ftell_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ long ret;
+ ret = ftell((FILE *)stream);
+ return ret;
+}
+
+long ZCALLBACK fseek_file_func (opaque, stream, offset, origin)
+ voidpf opaque;
+ voidpf stream;
+ uLong offset;
+ int origin;
+{
+ int fseek_origin=0;
+ long ret;
+ switch (origin)
+ {
+ case ZLIB_FILEFUNC_SEEK_CUR :
+ fseek_origin = SEEK_CUR;
+ break;
+ case ZLIB_FILEFUNC_SEEK_END :
+ fseek_origin = SEEK_END;
+ break;
+ case ZLIB_FILEFUNC_SEEK_SET :
+ fseek_origin = SEEK_SET;
+ break;
+ default: return -1;
+ }
+ ret = 0;
+ fseek((FILE *)stream, offset, fseek_origin);
+ return ret;
+}
+
+int ZCALLBACK fflush_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ int ret;
+ ret = fflush((FILE *)stream);
+ return ret;
+}
+
+int ZCALLBACK fclose_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ int ret;
+ ret = fclose((FILE *)stream);
+ return ret;
+}
+
+int ZCALLBACK ferror_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ int ret;
+ ret = ferror((FILE *)stream);
+ return ret;
+}
+
+void fill_fopen_filefunc (pzlib_filefunc_def)
+ zlib_filefunc_def* pzlib_filefunc_def;
+{
+ pzlib_filefunc_def->zopen_file = fopen_file_func;
+ pzlib_filefunc_def->zread_file = fread_file_func;
+ pzlib_filefunc_def->zwrite_file = fwrite_file_func;
+ pzlib_filefunc_def->ztell_file = ftell_file_func;
+ pzlib_filefunc_def->zseek_file = fseek_file_func;
+ pzlib_filefunc_def->zflush_file = fflush_file_func;
+ pzlib_filefunc_def->zclose_file = fclose_file_func;
+ pzlib_filefunc_def->zerror_file = ferror_file_func;
+ pzlib_filefunc_def->opaque = NULL;
+}
diff --git a/src/minizip/ioapi.h b/src/minizip/ioapi.h
new file mode 100644
index 0000000..ee92287
--- /dev/null
+++ b/src/minizip/ioapi.h
@@ -0,0 +1,78 @@
+/* ioapi.h -- IO base function header for compress/uncompress .zip
+ files using zlib + zip or unzip API
+
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+*/
+
+#ifndef _ZLIBIOAPI_H
+#define _ZLIBIOAPI_H
+
+
+#define ZLIB_FILEFUNC_SEEK_CUR (1)
+#define ZLIB_FILEFUNC_SEEK_END (2)
+#define ZLIB_FILEFUNC_SEEK_SET (0)
+
+#define ZLIB_FILEFUNC_MODE_READ (1)
+#define ZLIB_FILEFUNC_MODE_WRITE (2)
+#define ZLIB_FILEFUNC_MODE_READWRITEFILTER (3)
+
+#define ZLIB_FILEFUNC_MODE_EXISTING (4)
+#define ZLIB_FILEFUNC_MODE_CREATE (8)
+
+
+#ifndef ZCALLBACK
+
+#if (defined(WIN32) || defined (WINDOWS) || defined (_WINDOWS)) && defined(CALLBACK) && defined (USEWINDOWS_CALLBACK)
+#define ZCALLBACK CALLBACK
+#else
+#define ZCALLBACK
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef voidpf (ZCALLBACK *open_file_func) OF((voidpf opaque, const char* filename, int mode));
+typedef uLong (ZCALLBACK *read_file_func) OF((voidpf opaque, voidpf stream, void* buf, uLong size));
+typedef uLong (ZCALLBACK *write_file_func) OF((voidpf opaque, voidpf stream, const void* buf, uLong size));
+typedef long (ZCALLBACK *tell_file_func) OF((voidpf opaque, voidpf stream));
+typedef long (ZCALLBACK *seek_file_func) OF((voidpf opaque, voidpf stream, uLong offset, int origin));
+typedef int (ZCALLBACK *flush_file_func) OF((voidpf opaque, voidpf stream));
+typedef int (ZCALLBACK *close_file_func) OF((voidpf opaque, voidpf stream));
+typedef int (ZCALLBACK *testerror_file_func) OF((voidpf opaque, voidpf stream));
+
+typedef struct zlib_filefunc_def_s
+{
+ open_file_func zopen_file;
+ read_file_func zread_file;
+ write_file_func zwrite_file;
+ tell_file_func ztell_file;
+ seek_file_func zseek_file;
+ flush_file_func zflush_file;
+ close_file_func zclose_file;
+ testerror_file_func zerror_file;
+ voidpf opaque;
+} zlib_filefunc_def;
+
+
+
+void fill_fopen_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def));
+
+#define ZREAD(filefunc,filestream,buf,size) ((*((filefunc).zread_file))((filefunc).opaque,filestream,buf,size))
+#define ZWRITE(filefunc,filestream,buf,size) ((*((filefunc).zwrite_file))((filefunc).opaque,filestream,buf,size))
+#define ZTELL(filefunc,filestream) ((*((filefunc).ztell_file))((filefunc).opaque,filestream))
+#define ZSEEK(filefunc,filestream,pos,mode) ((*((filefunc).zseek_file))((filefunc).opaque,filestream,pos,mode))
+#define ZFLUSH(filefunc,filestream) ((*((filefunc).zflush_file))((filefunc).opaque,filestream))
+#define ZCLOSE(filefunc,filestream) ((*((filefunc).zclose_file))((filefunc).opaque,filestream))
+#define ZERROR(filefunc,filestream) ((*((filefunc).zerror_file))((filefunc).opaque,filestream))
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/minizip/iowin32.c b/src/minizip/iowin32.c
new file mode 100644
index 0000000..1afddaa
--- /dev/null
+++ b/src/minizip/iowin32.c
@@ -0,0 +1,275 @@
+/* iowin32.c -- IO base function header for compress/uncompress .zip
+ files using zlib + zip or unzip API
+ This IO API version uses the Win32 API (for Microsoft Windows)
+
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+*/
+
+#include <stdlib.h>
+#ifndef _WIN32_WCE
+#include <stdlib.h>
+#else
+//#include "celib.h"
+#endif
+
+#include "zlib.h"
+#include "ioapi.h"
+#include "iowin32.h"
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE (0xFFFFFFFF)
+#endif
+
+#ifndef INVALID_SET_FILE_POINTER
+#define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+
+voidpf ZCALLBACK win32_open_file_func OF((
+ voidpf opaque,
+ const char* filename,
+ int mode));
+
+uLong ZCALLBACK win32_read_file_func OF((
+ voidpf opaque,
+ voidpf stream,
+ void* buf,
+ uLong size));
+
+uLong ZCALLBACK win32_write_file_func OF((
+ voidpf opaque,
+ voidpf stream,
+ const void* buf,
+ uLong size));
+
+long ZCALLBACK win32_tell_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+long ZCALLBACK win32_seek_file_func OF((
+ voidpf opaque,
+ voidpf stream,
+ uLong offset,
+ int origin));
+
+int ZCALLBACK win32_close_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+int ZCALLBACK win32_error_file_func OF((
+ voidpf opaque,
+ voidpf stream));
+
+typedef struct
+{
+ HANDLE hf;
+ int error;
+} WIN32FILE_IOWIN;
+
+voidpf ZCALLBACK win32_open_file_func (opaque, filename, mode)
+ voidpf opaque;
+ const char* filename;
+ int mode;
+{
+ const char* mode_fopen = NULL;
+ DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ;
+ HANDLE hFile = 0;
+ voidpf ret=NULL;
+
+ dwDesiredAccess = dwShareMode = dwFlagsAndAttributes = 0;
+
+ if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ)
+ {
+ dwDesiredAccess = GENERIC_READ;
+ dwCreationDisposition = OPEN_EXISTING;
+ dwShareMode = FILE_SHARE_READ;
+ }
+ else
+ if (mode & ZLIB_FILEFUNC_MODE_EXISTING)
+ {
+ dwDesiredAccess = GENERIC_WRITE | GENERIC_READ;
+ dwCreationDisposition = OPEN_EXISTING;
+ }
+ else
+ if (mode & ZLIB_FILEFUNC_MODE_CREATE)
+ {
+ dwDesiredAccess = GENERIC_WRITE | GENERIC_READ;
+ dwCreationDisposition = CREATE_ALWAYS;
+ }
+
+ if ((filename!=NULL) && (dwDesiredAccess != 0))
+ hFile = CreateFile((LPCTSTR)filename, dwDesiredAccess, dwShareMode, NULL,
+ dwCreationDisposition, dwFlagsAndAttributes, NULL);
+
+ if (hFile == INVALID_HANDLE_VALUE)
+ hFile = NULL;
+
+ if (hFile != NULL)
+ {
+ WIN32FILE_IOWIN w32fiow;
+ w32fiow.hf = hFile;
+ w32fiow.error = 0;
+ ret = malloc(sizeof(WIN32FILE_IOWIN));
+ if (ret==NULL)
+ CloseHandle(hFile);
+ else *((WIN32FILE_IOWIN*)ret) = w32fiow;
+ }
+ return ret;
+}
+
+
+uLong ZCALLBACK win32_read_file_func (opaque, stream, buf, size)
+ voidpf opaque;
+ voidpf stream;
+ void* buf;
+ uLong size;
+{
+ uLong ret=0;
+ HANDLE hFile = NULL;
+ if (stream!=NULL)
+ hFile = ((WIN32FILE_IOWIN*)stream) -> hf;
+ if (hFile != NULL)
+ if (!ReadFile(hFile, buf, size, &ret, NULL))
+ {
+ DWORD dwErr = GetLastError();
+ if (dwErr == ERROR_HANDLE_EOF)
+ dwErr = 0;
+ ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr;
+ }
+
+ return ret;
+}
+
+
+uLong ZCALLBACK win32_write_file_func (opaque, stream, buf, size)
+ voidpf opaque;
+ voidpf stream;
+ const void* buf;
+ uLong size;
+{
+ uLong ret=0;
+ HANDLE hFile = NULL;
+ if (stream!=NULL)
+ hFile = ((WIN32FILE_IOWIN*)stream) -> hf;
+
+ if (hFile !=NULL)
+ if (!WriteFile(hFile, buf, size, &ret, NULL))
+ {
+ DWORD dwErr = GetLastError();
+ if (dwErr == ERROR_HANDLE_EOF)
+ dwErr = 0;
+ ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr;
+ }
+
+ return ret;
+}
+
+long ZCALLBACK win32_tell_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ long ret=-1;
+ HANDLE hFile = NULL;
+ if (stream!=NULL)
+ hFile = ((WIN32FILE_IOWIN*)stream) -> hf;
+ if (hFile != NULL)
+ {
+ DWORD dwSet = SetFilePointer(hFile, 0, NULL, FILE_CURRENT);
+ if (dwSet == INVALID_SET_FILE_POINTER)
+ {
+ DWORD dwErr = GetLastError();
+ ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr;
+ ret = -1;
+ }
+ else
+ ret=(long)dwSet;
+ }
+ return ret;
+}
+
+long ZCALLBACK win32_seek_file_func (opaque, stream, offset, origin)
+ voidpf opaque;
+ voidpf stream;
+ uLong offset;
+ int origin;
+{
+ DWORD dwMoveMethod=0xFFFFFFFF;
+ HANDLE hFile = NULL;
+
+ long ret=-1;
+ if (stream!=NULL)
+ hFile = ((WIN32FILE_IOWIN*)stream) -> hf;
+ switch (origin)
+ {
+ case ZLIB_FILEFUNC_SEEK_CUR :
+ dwMoveMethod = FILE_CURRENT;
+ break;
+ case ZLIB_FILEFUNC_SEEK_END :
+ dwMoveMethod = FILE_END;
+ break;
+ case ZLIB_FILEFUNC_SEEK_SET :
+ dwMoveMethod = FILE_BEGIN;
+ break;
+ default: return -1;
+ }
+
+ if (hFile != NULL)
+ {
+ DWORD dwSet = SetFilePointer(hFile, offset, NULL, dwMoveMethod);
+ if (dwSet == INVALID_SET_FILE_POINTER)
+ {
+ DWORD dwErr = GetLastError();
+ ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr;
+ ret = -1;
+ }
+ else
+ ret=0;
+ }
+ return ret;
+}
+
+int ZCALLBACK win32_close_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ int ret=-1;
+
+ if (stream!=NULL)
+ {
+ HANDLE hFile;
+ hFile = ((WIN32FILE_IOWIN*)stream) -> hf;
+ if (hFile != NULL)
+ {
+ CloseHandle(hFile);
+ ret=0;
+ }
+ free(stream);
+ }
+ return ret;
+}
+
+int ZCALLBACK win32_error_file_func (opaque, stream)
+ voidpf opaque;
+ voidpf stream;
+{
+ int ret=-1;
+ if (stream!=NULL)
+ {
+ ret = ((WIN32FILE_IOWIN*)stream) -> error;
+ }
+ return ret;
+}
+
+void fill_win32_filefunc (pzlib_filefunc_def)
+ zlib_filefunc_def* pzlib_filefunc_def;
+{
+ pzlib_filefunc_def->zopen_file = win32_open_file_func;
+ pzlib_filefunc_def->zread_file = win32_read_file_func;
+ pzlib_filefunc_def->zwrite_file = win32_write_file_func;
+ pzlib_filefunc_def->ztell_file = win32_tell_file_func;
+ pzlib_filefunc_def->zseek_file = win32_seek_file_func;
+ pzlib_filefunc_def->zclose_file = win32_close_file_func;
+ pzlib_filefunc_def->zerror_file = win32_error_file_func;
+ pzlib_filefunc_def->opaque=NULL;
+}
diff --git a/src/minizip/iowin32.h b/src/minizip/iowin32.h
new file mode 100644
index 0000000..c0ebd50
--- /dev/null
+++ b/src/minizip/iowin32.h
@@ -0,0 +1,21 @@
+/* iowin32.h -- IO base function header for compress/uncompress .zip
+ files using zlib + zip or unzip API
+ This IO API version uses the Win32 API (for Microsoft Windows)
+
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+*/
+
+#include <windows.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void fill_win32_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def));
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/minizip/mztools.c b/src/minizip/mztools.c
new file mode 100644
index 0000000..6021c49
--- /dev/null
+++ b/src/minizip/mztools.c
@@ -0,0 +1,287 @@
+/*
+ Additional tools for Minizip
+ Code: Xavier Roche '2004
+ License: Same as ZLIB (www.gzip.org)
+*/
+
+/* Code */
+#include <string.h>
+#ifndef _WIN32_WCE
+#include <stdio.h>
+#include <stdlib.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include "celib.h"
+#endif
+#include "zlib.h"
+#include "unzip.h"
+
+#define READ_8(adr) ((unsigned char)*(adr))
+#define READ_16(adr) ( READ_8(adr) | (READ_8(adr+1) << 8) )
+#define READ_32(adr) ( READ_16(adr) | (READ_16((adr)+2) << 16) )
+
+#define WRITE_8(buff, n) do { \
+ *((unsigned char*)(buff)) = (unsigned char) ((n) & 0xff); \
+} while(0)
+#define WRITE_16(buff, n) do { \
+ WRITE_8((unsigned char*)(buff), n); \
+ WRITE_8(((unsigned char*)(buff)) + 1, (n) >> 8); \
+} while(0)
+#define WRITE_32(buff, n) do { \
+ WRITE_16((unsigned char*)(buff), (n) & 0xffff); \
+ WRITE_16((unsigned char*)(buff) + 2, (n) >> 16); \
+} while(0)
+
+extern int ZEXPORT unzRepair(file, fileOut, fileOutTmp, nRecovered, bytesRecovered)
+const char* file;
+const char* fileOut;
+const char* fileOutTmp;
+uLong* nRecovered;
+uLong* bytesRecovered;
+{
+ int err = Z_OK;
+ FILE* fpZip = fopen(file, "rb");
+ FILE* fpOut = fopen(fileOut, "wb");
+ FILE* fpOutCD = fopen(fileOutTmp, "wb");
+ if (fpZip != NULL && fpOut != NULL) {
+ int entries = 0;
+ uLong totalBytes = 0;
+ char header[30];
+ char filename[256];
+ char extra[1024];
+ int offset = 0;
+ int offsetCD = 0;
+ while ( fread(header, 1, 30, fpZip) == 30 ) {
+ int currentOffset = offset;
+
+ /* File entry */
+ if (READ_32(header) == 0x04034b50) {
+ unsigned int version = READ_16(header + 4);
+ unsigned int gpflag = READ_16(header + 6);
+ unsigned int method = READ_16(header + 8);
+ unsigned int filetime = READ_16(header + 10);
+ unsigned int filedate = READ_16(header + 12);
+ unsigned int crc = READ_32(header + 14); /* crc */
+ unsigned int cpsize = READ_32(header + 18); /* compressed size */
+ unsigned int uncpsize = READ_32(header + 22); /* uncompressed sz */
+ unsigned int fnsize = READ_16(header + 26); /* file name length */
+ unsigned int extsize = READ_16(header + 28); /* extra field length */
+ filename[0] = extra[0] = '\0';
+
+ /* Header */
+ if (fwrite(header, 1, 30, fpOut) == 30) {
+ offset += 30;
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+
+ /* Filename */
+ if (fnsize > 0) {
+ if (fread(filename, 1, fnsize, fpZip) == fnsize) {
+ if (fwrite(filename, 1, fnsize, fpOut) == fnsize) {
+ offset += fnsize;
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ } else {
+ err = Z_STREAM_ERROR;
+ break;
+ }
+
+ /* Extra field */
+ if (extsize > 0) {
+ if (fread(extra, 1, extsize, fpZip) == extsize) {
+ if (fwrite(extra, 1, extsize, fpOut) == extsize) {
+ offset += extsize;
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ }
+
+ /* Data */
+ {
+ int dataSize = cpsize;
+ if (dataSize == 0) {
+ dataSize = uncpsize;
+ }
+ if (dataSize > 0) {
+ char* data = malloc(dataSize);
+ if (data != NULL) {
+ if ((int)fread(data, 1, dataSize, fpZip) == dataSize) {
+ if ((int)fwrite(data, 1, dataSize, fpOut) == dataSize) {
+ offset += dataSize;
+ totalBytes += dataSize;
+ } else {
+ err = Z_ERRNO;
+ }
+ } else {
+ err = Z_ERRNO;
+ }
+ free(data);
+ if (err != Z_OK) {
+ break;
+ }
+ } else {
+ err = Z_MEM_ERROR;
+ break;
+ }
+ }
+ }
+
+ /* Central directory entry */
+ {
+ char header[46];
+ char* comment = "";
+ int comsize = (int) strlen(comment);
+ WRITE_32(header, 0x02014b50);
+ WRITE_16(header + 4, version);
+ WRITE_16(header + 6, version);
+ WRITE_16(header + 8, gpflag);
+ WRITE_16(header + 10, method);
+ WRITE_16(header + 12, filetime);
+ WRITE_16(header + 14, filedate);
+ WRITE_32(header + 16, crc);
+ WRITE_32(header + 20, cpsize);
+ WRITE_32(header + 24, uncpsize);
+ WRITE_16(header + 28, fnsize);
+ WRITE_16(header + 30, extsize);
+ WRITE_16(header + 32, comsize);
+ WRITE_16(header + 34, 0); /* disk # */
+ WRITE_16(header + 36, 0); /* int attrb */
+ WRITE_32(header + 38, 0); /* ext attrb */
+ WRITE_32(header + 42, currentOffset);
+ /* Header */
+ if (fwrite(header, 1, 46, fpOutCD) == 46) {
+ offsetCD += 46;
+
+ /* Filename */
+ if (fnsize > 0) {
+ if (fwrite(filename, 1, fnsize, fpOutCD) == fnsize) {
+ offsetCD += fnsize;
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ } else {
+ err = Z_STREAM_ERROR;
+ break;
+ }
+
+ /* Extra field */
+ if (extsize > 0) {
+ if (fwrite(extra, 1, extsize, fpOutCD) == extsize) {
+ offsetCD += extsize;
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ }
+
+ /* Comment field */
+ if (comsize > 0) {
+ if ((int)fwrite(comment, 1, comsize, fpOutCD) == comsize) {
+ offsetCD += comsize;
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ }
+
+
+ } else {
+ err = Z_ERRNO;
+ break;
+ }
+ }
+
+ /* Success */
+ entries++;
+
+ } else {
+ break;
+ }
+ }
+
+ /* Final central directory */
+ {
+ int entriesZip = entries;
+ char header[22];
+ char* comment = ""; // "ZIP File recovered by zlib/minizip/mztools";
+ int comsize = (int) strlen(comment);
+ if (entriesZip > 0xffff) {
+ entriesZip = 0xffff;
+ }
+ WRITE_32(header, 0x06054b50);
+ WRITE_16(header + 4, 0); /* disk # */
+ WRITE_16(header + 6, 0); /* disk # */
+ WRITE_16(header + 8, entriesZip); /* hack */
+ WRITE_16(header + 10, entriesZip); /* hack */
+ WRITE_32(header + 12, offsetCD); /* size of CD */
+ WRITE_32(header + 16, offset); /* offset to CD */
+ WRITE_16(header + 20, comsize); /* comment */
+
+ /* Header */
+ if (fwrite(header, 1, 22, fpOutCD) == 22) {
+
+ /* Comment field */
+ if (comsize > 0) {
+ if ((int)fwrite(comment, 1, comsize, fpOutCD) != comsize) {
+ err = Z_ERRNO;
+ }
+ }
+
+ } else {
+ err = Z_ERRNO;
+ }
+ }
+
+ /* Final merge (file + central directory) */
+ fclose(fpOutCD);
+ if (err == Z_OK) {
+ fpOutCD = fopen(fileOutTmp, "rb");
+ if (fpOutCD != NULL) {
+ int nRead;
+ char buffer[8192];
+ while ( (nRead = fread(buffer, 1, sizeof(buffer), fpOutCD)) > 0) {
+ if ((int)fwrite(buffer, 1, nRead, fpOut) != nRead) {
+ err = Z_ERRNO;
+ break;
+ }
+ }
+ fclose(fpOutCD);
+ }
+ }
+
+ /* Close */
+ fclose(fpZip);
+ fclose(fpOut);
+
+ /* Wipe temporary file */
+ (void)remove(fileOutTmp);
+
+ /* Number of recovered entries */
+ if (err == Z_OK) {
+ if (nRecovered != NULL) {
+ *nRecovered = entries;
+ }
+ if (bytesRecovered != NULL) {
+ *bytesRecovered = totalBytes;
+ }
+ }
+ } else {
+ err = Z_STREAM_ERROR;
+ }
+ return err;
+}
diff --git a/src/minizip/mztools.h b/src/minizip/mztools.h
new file mode 100644
index 0000000..eee78dc
--- /dev/null
+++ b/src/minizip/mztools.h
@@ -0,0 +1,31 @@
+/*
+ Additional tools for Minizip
+ Code: Xavier Roche '2004
+ License: Same as ZLIB (www.gzip.org)
+*/
+
+#ifndef _zip_tools_H
+#define _zip_tools_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ZLIB_H
+#include "zlib.h"
+#endif
+
+#include "unzip.h"
+
+/* Repair a ZIP file (missing central directory)
+ file: file to recover
+ fileOut: output file after recovery
+ fileOutTmp: temporary file name used for recovery
+*/
+extern int ZEXPORT unzRepair(const char* file,
+ const char* fileOut,
+ const char* fileOutTmp,
+ uLong* nRecovered,
+ uLong* bytesRecovered);
+
+#endif
diff --git a/src/minizip/unzip.c b/src/minizip/unzip.c
new file mode 100644
index 0000000..1452a54
--- /dev/null
+++ b/src/minizip/unzip.c
@@ -0,0 +1,1591 @@
+/* unzip.c -- IO for uncompress .zip files using zlib
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+
+ Read unzip.h for more info
+*/
+
+/* Decryption code comes from crypt.c by Info-ZIP but has been greatly reduced in terms of
+compatibility with older software. The following is from the original crypt.c. Code
+woven in by Terry Thorsen 1/2003.
+*/
+/*
+ Copyright (c) 1990-2000 Info-ZIP. All rights reserved.
+
+ See the accompanying file LICENSE, version 2000-Apr-09 or later
+ (the contents of which are also included in zip.h) for terms of use.
+ If, for some reason, all these files are missing, the Info-ZIP license
+ also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
+*/
+/*
+ crypt.c (full version) by Info-ZIP. Last revised: [see crypt.h]
+
+ The encryption/decryption parts of this source code (as opposed to the
+ non-echoing password parts) were originally written in Europe. The
+ whole source package can be freely distributed, including from the USA.
+ (Prior to January 2000, re-export from the US was a violation of US law.)
+ */
+
+/*
+ This encryption code is a direct transcription of the algorithm from
+ Roger Schlafly, described by Phil Katz in the file appnote.txt. This
+ file (appnote.txt) is distributed with the PKZIP program (even in the
+ version without encryption capabilities).
+ */
+
+
+#ifndef _WIN32_WCE
+#include <stdio.h>
+#include <stdlib.h>
+#else
+#include <stdio.h>
+#include "celib.h"
+#endif
+#include <string.h>
+
+#include "zlib.h"
+#include "unzip.h"
+
+#ifdef STDC
+# include <stddef.h>
+# include <string.h>
+# include <stdlib.h>
+#endif
+#ifdef NO_ERRNO_H
+ extern int errno;
+#else
+# include <errno.h>
+#endif
+
+
+#ifndef local
+# define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+
+#ifndef CASESENSITIVITYDEFAULT_NO
+# if !defined(unix) && !defined(CASESENSITIVITYDEFAULT_YES)
+# define CASESENSITIVITYDEFAULT_NO
+# endif
+#endif
+
+
+#ifndef UNZ_BUFSIZE
+#define UNZ_BUFSIZE (16384)
+#endif
+
+#ifndef UNZ_MAXFILENAMEINZIP
+#define UNZ_MAXFILENAMEINZIP (256)
+#endif
+
+#ifndef ALLOC
+# define ALLOC(size) (malloc(size))
+#endif
+#ifndef TRYFREE
+# define TRYFREE(p) {if (p) free(p);}
+#endif
+
+#define SIZECENTRALDIRITEM (0x2e)
+#define SIZEZIPLOCALHEADER (0x1e)
+
+
+
+
+const char unz_copyright[] =
+ " unzip 1.00 Copyright 1998-2003 Gilles Vollant - http://www.winimage.com/zLibDll";
+
+/* unz_file_info_interntal contain internal info about a file in zipfile*/
+typedef struct unz_file_info_internal_s
+{
+ uLong offset_curfile;/* relative offset of local header 4 bytes */
+} unz_file_info_internal;
+
+
+/* file_in_zip_read_info_s contain internal information about a file in zipfile,
+ when reading and decompress it */
+typedef struct
+{
+ char *read_buffer; /* internal buffer for compressed data */
+ z_stream stream; /* zLib stream structure for inflate */
+
+ uLong pos_in_zipfile; /* position in byte on the zipfile, for fseek*/
+ uLong stream_initialised; /* flag set if stream structure is initialised*/
+
+ uLong offset_local_extrafield;/* offset of the local extra field */
+ uInt size_local_extrafield;/* size of the local extra field */
+ uLong pos_local_extrafield; /* position in the local extra field in read*/
+
+ uLong crc32; /* crc32 of all data uncompressed */
+ uLong crc32_wait; /* crc32 we must obtain after decompress all */
+ uLong rest_read_compressed; /* number of byte to be decompressed */
+ uLong rest_read_uncompressed;/*number of byte to be obtained after decomp*/
+ zlib_filefunc_def z_filefunc;
+ voidpf filestream; /* io structore of the zipfile */
+ uLong compression_method; /* compression method (0==store) */
+ uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/
+ int raw;
+} file_in_zip_read_info_s;
+
+
+/* unz_s contain internal information about the zipfile
+*/
+typedef struct
+{
+ zlib_filefunc_def z_filefunc;
+ voidpf filestream; /* io structore of the zipfile */
+ unz_global_info gi; /* public global information */
+ uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/
+ uLong num_file; /* number of the current file in the zipfile*/
+ uLong pos_in_central_dir; /* pos of the current file in the central dir*/
+ uLong current_file_ok; /* flag about the usability of the current file*/
+ uLong central_pos; /* position of the beginning of the central dir*/
+
+ uLong size_central_dir; /* size of the central directory */
+ uLong offset_central_dir; /* offset of start of central directory with
+ respect to the starting disk number */
+
+ unz_file_info cur_file_info; /* public info about the current file in zip*/
+ unz_file_info_internal cur_file_info_internal; /* private info about it*/
+ file_in_zip_read_info_s* pfile_in_zip_read; /* structure about the current
+ file if we are decompressing it */
+ int encrypted;
+# ifndef NOUNCRYPT
+ unsigned long keys[3]; /* keys defining the pseudo-random sequence */
+ const unsigned long* pcrc_32_tab;
+# endif
+} unz_s;
+
+
+#ifndef NOUNCRYPT
+#include "crypt.h"
+#endif
+
+/* ===========================================================================
+ Read a byte from a gz_stream; update next_in and avail_in. Return EOF
+ for end of file.
+ IN assertion: the stream s has been sucessfully opened for reading.
+*/
+
+
+local int unzlocal_getByte OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream,
+ int *pi));
+
+local int unzlocal_getByte(pzlib_filefunc_def,filestream,pi)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ int *pi;
+{
+ unsigned char c;
+ int err = (int)ZREAD(*pzlib_filefunc_def,filestream,&c,1);
+ if (err==1)
+ {
+ *pi = (int)c;
+ return UNZ_OK;
+ }
+ else
+ {
+ if (ZERROR(*pzlib_filefunc_def,filestream))
+ return UNZ_ERRNO;
+ else
+ return UNZ_EOF;
+ }
+}
+
+
+/* ===========================================================================
+ Reads a long in LSB order from the given gz_stream. Sets
+*/
+local int unzlocal_getShort OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream,
+ uLong *pX));
+
+local int unzlocal_getShort (pzlib_filefunc_def,filestream,pX)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ uLong *pX;
+{
+ uLong x ;
+ int i;
+ int err;
+
+ err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x = (uLong)i;
+
+ if (err==UNZ_OK)
+ err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<8;
+
+ if (err==UNZ_OK)
+ *pX = x;
+ else
+ *pX = 0;
+ return err;
+}
+
+local int unzlocal_getLong OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream,
+ uLong *pX));
+
+local int unzlocal_getLong (pzlib_filefunc_def,filestream,pX)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ uLong *pX;
+{
+ uLong x ;
+ int i;
+ int err;
+
+ err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x = (uLong)i;
+
+ if (err==UNZ_OK)
+ err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<8;
+
+ if (err==UNZ_OK)
+ err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<16;
+
+ if (err==UNZ_OK)
+ err = unzlocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<24;
+
+ if (err==UNZ_OK)
+ *pX = x;
+ else
+ *pX = 0;
+ return err;
+}
+
+
+/* My own strcmpi / strcasecmp */
+local int strcmpcasenosensitive_internal (fileName1,fileName2)
+ const char* fileName1;
+ const char* fileName2;
+{
+ for (;;)
+ {
+ char c1=*(fileName1++);
+ char c2=*(fileName2++);
+ if ((c1>='a') && (c1<='z'))
+ c1 -= 0x20;
+ if ((c2>='a') && (c2<='z'))
+ c2 -= 0x20;
+ if (c1=='\0')
+ return ((c2=='\0') ? 0 : -1);
+ if (c2=='\0')
+ return 1;
+ if (c1<c2)
+ return -1;
+ if (c1>c2)
+ return 1;
+ }
+}
+
+
+#ifdef CASESENSITIVITYDEFAULT_NO
+#define CASESENSITIVITYDEFAULTVALUE 2
+#else
+#define CASESENSITIVITYDEFAULTVALUE 1
+#endif
+
+#ifndef STRCMPCASENOSENTIVEFUNCTION
+#define STRCMPCASENOSENTIVEFUNCTION strcmpcasenosensitive_internal
+#endif
+
+/*
+ Compare two filename (fileName1,fileName2).
+ If iCaseSenisivity = 1, comparision is case sensitivity (like strcmp)
+ If iCaseSenisivity = 2, comparision is not case sensitivity (like strcmpi
+ or strcasecmp)
+ If iCaseSenisivity = 0, case sensitivity is defaut of your operating system
+ (like 1 on Unix, 2 on Windows)
+
+*/
+extern int ZEXPORT unzStringFileNameCompare (fileName1,fileName2,iCaseSensitivity)
+ const char* fileName1;
+ const char* fileName2;
+ int iCaseSensitivity;
+{
+ if (iCaseSensitivity==0)
+ iCaseSensitivity=CASESENSITIVITYDEFAULTVALUE;
+
+ if (iCaseSensitivity==1)
+ return strcmp(fileName1,fileName2);
+
+ return STRCMPCASENOSENTIVEFUNCTION(fileName1,fileName2);
+}
+
+#ifndef BUFREADCOMMENT
+#define BUFREADCOMMENT (0x400)
+#endif
+
+/*
+ Locate the Central directory of a zipfile (at the end, just before
+ the global comment)
+*/
+local uLong unzlocal_SearchCentralDir OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream));
+
+local uLong unzlocal_SearchCentralDir(pzlib_filefunc_def,filestream)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+{
+ unsigned char* buf;
+ uLong uSizeFile;
+ uLong uBackRead;
+ uLong uMaxBack=0xffff; /* maximum size of global comment */
+ uLong uPosFound=0;
+
+ if (ZSEEK(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0)
+ return 0;
+
+
+ uSizeFile = ZTELL(*pzlib_filefunc_def,filestream);
+
+ if (uMaxBack>uSizeFile)
+ uMaxBack = uSizeFile;
+
+ buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4);
+ if (buf==NULL)
+ return 0;
+
+ uBackRead = 4;
+ while (uBackRead<uMaxBack)
+ {
+ uLong uReadSize,uReadPos ;
+ int i;
+ if (uBackRead+BUFREADCOMMENT>uMaxBack)
+ uBackRead = uMaxBack;
+ else
+ uBackRead+=BUFREADCOMMENT;
+ uReadPos = uSizeFile-uBackRead ;
+
+ uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ?
+ (BUFREADCOMMENT+4) : (uSizeFile-uReadPos);
+ if (ZSEEK(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ break;
+
+ if (ZREAD(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize)
+ break;
+
+ for (i=(int)uReadSize-3; (i--)>0;)
+ if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) &&
+ ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06))
+ {
+ uPosFound = uReadPos+i;
+ break;
+ }
+
+ if (uPosFound!=0)
+ break;
+ }
+ TRYFREE(buf);
+ return uPosFound;
+}
+
+/*
+ Open a Zip file. path contain the full pathname (by example,
+ on a Windows NT computer "c:\\test\\zlib114.zip" or on an Unix computer
+ "zlib/zlib114.zip".
+ If the zipfile cannot be opened (file doesn't exist or in not valid), the
+ return value is NULL.
+ Else, the return value is a unzFile Handle, usable with other function
+ of this unzip package.
+*/
+extern unzFile ZEXPORT unzOpen2 (path, pzlib_filefunc_def)
+ const char *path;
+ zlib_filefunc_def* pzlib_filefunc_def;
+{
+ unz_s us;
+ unz_s *s;
+ uLong central_pos,uL;
+
+ uLong number_disk; /* number of the current dist, used for
+ spaning ZIP, unsupported, always 0*/
+ uLong number_disk_with_CD; /* number the the disk with central dir, used
+ for spaning ZIP, unsupported, always 0*/
+ uLong number_entry_CD; /* total number of entries in
+ the central dir
+ (same than number_entry on nospan) */
+
+ int err=UNZ_OK;
+
+ if (unz_copyright[0]!=' ')
+ return NULL;
+
+ if (pzlib_filefunc_def==NULL)
+ fill_fopen_filefunc(&us.z_filefunc);
+ else
+ us.z_filefunc = *pzlib_filefunc_def;
+
+ us.filestream= (*(us.z_filefunc.zopen_file))(us.z_filefunc.opaque,
+ path,
+ ZLIB_FILEFUNC_MODE_READ |
+ ZLIB_FILEFUNC_MODE_EXISTING);
+ if (us.filestream==NULL)
+ return NULL;
+
+ central_pos = unzlocal_SearchCentralDir(&us.z_filefunc,us.filestream);
+ if (central_pos==0)
+ err=UNZ_ERRNO;
+
+ if (ZSEEK(us.z_filefunc, us.filestream,
+ central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ err=UNZ_ERRNO;
+
+ /* the signature, already checked */
+ if (unzlocal_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ /* number of this disk */
+ if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_disk)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ /* number of the disk with the start of the central directory */
+ if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_disk_with_CD)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ /* total number of entries in the central dir on this disk */
+ if (unzlocal_getShort(&us.z_filefunc, us.filestream,&us.gi.number_entry)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ /* total number of entries in the central dir */
+ if (unzlocal_getShort(&us.z_filefunc, us.filestream,&number_entry_CD)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if ((number_entry_CD!=us.gi.number_entry) ||
+ (number_disk_with_CD!=0) ||
+ (number_disk!=0))
+ err=UNZ_BADZIPFILE;
+
+ /* size of the central directory */
+ if (unzlocal_getLong(&us.z_filefunc, us.filestream,&us.size_central_dir)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ /* offset of start of central directory with respect to the
+ starting disk number */
+ if (unzlocal_getLong(&us.z_filefunc, us.filestream,&us.offset_central_dir)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ /* zipfile comment length */
+ if (unzlocal_getShort(&us.z_filefunc, us.filestream,&us.gi.size_comment)!=UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if ((central_pos<us.offset_central_dir+us.size_central_dir) &&
+ (err==UNZ_OK))
+ err=UNZ_BADZIPFILE;
+
+ if (err!=UNZ_OK)
+ {
+ ZCLOSE(us.z_filefunc, us.filestream);
+ return NULL;
+ }
+
+ us.byte_before_the_zipfile = central_pos -
+ (us.offset_central_dir+us.size_central_dir);
+ us.central_pos = central_pos;
+ us.pfile_in_zip_read = NULL;
+ us.encrypted = 0;
+
+
+ s=(unz_s*)ALLOC(sizeof(unz_s));
+ *s=us;
+ unzGoToFirstFile((unzFile)s);
+ return (unzFile)s;
+}
+
+
+extern unzFile ZEXPORT unzOpen (path)
+ const char *path;
+{
+ return unzOpen2(path, NULL);
+}
+
+/*
+ Close a ZipFile opened with unzipOpen.
+ If there is files inside the .Zip opened with unzipOpenCurrentFile (see later),
+ these files MUST be closed with unzipCloseCurrentFile before call unzipClose.
+ return UNZ_OK if there is no problem. */
+extern int ZEXPORT unzClose (file)
+ unzFile file;
+{
+ unz_s* s;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+
+ if (s->pfile_in_zip_read!=NULL)
+ unzCloseCurrentFile(file);
+
+ ZCLOSE(s->z_filefunc, s->filestream);
+ TRYFREE(s);
+ return UNZ_OK;
+}
+
+
+/*
+ Write info about the ZipFile in the *pglobal_info structure.
+ No preparation of the structure is needed
+ return UNZ_OK if there is no problem. */
+extern int ZEXPORT unzGetGlobalInfo (file,pglobal_info)
+ unzFile file;
+ unz_global_info *pglobal_info;
+{
+ unz_s* s;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ *pglobal_info=s->gi;
+ return UNZ_OK;
+}
+
+
+/*
+ Translate date/time from Dos format to tm_unz (readable more easilty)
+*/
+local void unzlocal_DosDateToTmuDate (ulDosDate, ptm)
+ uLong ulDosDate;
+ tm_unz* ptm;
+{
+ uLong uDate;
+ uDate = (uLong)(ulDosDate>>16);
+ ptm->tm_mday = (uInt)(uDate&0x1f) ;
+ ptm->tm_mon = (uInt)((((uDate)&0x1E0)/0x20)-1) ;
+ ptm->tm_year = (uInt)(((uDate&0x0FE00)/0x0200)+1980) ;
+
+ ptm->tm_hour = (uInt) ((ulDosDate &0xF800)/0x800);
+ ptm->tm_min = (uInt) ((ulDosDate&0x7E0)/0x20) ;
+ ptm->tm_sec = (uInt) (2*(ulDosDate&0x1f)) ;
+}
+
+/*
+ Get Info about the current file in the zipfile, with internal only info
+*/
+local int unzlocal_GetCurrentFileInfoInternal OF((unzFile file,
+ unz_file_info *pfile_info,
+ unz_file_info_internal
+ *pfile_info_internal,
+ char *szFileName,
+ uLong fileNameBufferSize,
+ void *extraField,
+ uLong extraFieldBufferSize,
+ char *szComment,
+ uLong commentBufferSize));
+
+local int unzlocal_GetCurrentFileInfoInternal (file,
+ pfile_info,
+ pfile_info_internal,
+ szFileName, fileNameBufferSize,
+ extraField, extraFieldBufferSize,
+ szComment, commentBufferSize)
+ unzFile file;
+ unz_file_info *pfile_info;
+ unz_file_info_internal *pfile_info_internal;
+ char *szFileName;
+ uLong fileNameBufferSize;
+ void *extraField;
+ uLong extraFieldBufferSize;
+ char *szComment;
+ uLong commentBufferSize;
+{
+ unz_s* s;
+ unz_file_info file_info;
+ unz_file_info_internal file_info_internal;
+ int err=UNZ_OK;
+ uLong uMagic;
+ long lSeek=0;
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ if (ZSEEK(s->z_filefunc, s->filestream,
+ s->pos_in_central_dir+s->byte_before_the_zipfile,
+ ZLIB_FILEFUNC_SEEK_SET)!=0)
+ err=UNZ_ERRNO;
+
+
+ /* we check the magic */
+ if (err==UNZ_OK)
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK)
+ err=UNZ_ERRNO;
+ else if (uMagic!=0x02014b50)
+ err=UNZ_BADZIPFILE;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.version) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.version_needed) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.flag) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.compression_method) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.dosDate) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ unzlocal_DosDateToTmuDate(file_info.dosDate,&file_info.tmu_date);
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.crc) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.compressed_size) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.uncompressed_size) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_filename) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_extra) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_comment) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.disk_num_start) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&file_info.internal_fa) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info.external_fa) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&file_info_internal.offset_curfile) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ lSeek+=file_info.size_filename;
+ if ((err==UNZ_OK) && (szFileName!=NULL))
+ {
+ uLong uSizeRead ;
+ if (file_info.size_filename<fileNameBufferSize)
+ {
+ *(szFileName+file_info.size_filename)='\0';
+ uSizeRead = file_info.size_filename;
+ }
+ else
+ uSizeRead = fileNameBufferSize;
+
+ if ((file_info.size_filename>0) && (fileNameBufferSize>0))
+ if (ZREAD(s->z_filefunc, s->filestream,szFileName,uSizeRead)!=uSizeRead)
+ err=UNZ_ERRNO;
+ lSeek -= uSizeRead;
+ }
+
+
+ if ((err==UNZ_OK) && (extraField!=NULL))
+ {
+ uLong uSizeRead ;
+ if (file_info.size_file_extra<extraFieldBufferSize)
+ uSizeRead = file_info.size_file_extra;
+ else
+ uSizeRead = extraFieldBufferSize;
+
+ if (lSeek!=0)
+ if (ZSEEK(s->z_filefunc, s->filestream,lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0)
+ lSeek=0;
+ else
+ err=UNZ_ERRNO;
+ if ((file_info.size_file_extra>0) && (extraFieldBufferSize>0))
+ if (ZREAD(s->z_filefunc, s->filestream,extraField,uSizeRead)!=uSizeRead)
+ err=UNZ_ERRNO;
+ lSeek += file_info.size_file_extra - uSizeRead;
+ }
+ else
+ lSeek+=file_info.size_file_extra;
+
+
+ if ((err==UNZ_OK) && (szComment!=NULL))
+ {
+ uLong uSizeRead ;
+ if (file_info.size_file_comment<commentBufferSize)
+ {
+ *(szComment+file_info.size_file_comment)='\0';
+ uSizeRead = file_info.size_file_comment;
+ }
+ else
+ uSizeRead = commentBufferSize;
+
+ if (lSeek!=0)
+ if (ZSEEK(s->z_filefunc, s->filestream,lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0)
+ lSeek=0;
+ else
+ err=UNZ_ERRNO;
+ if ((file_info.size_file_comment>0) && (commentBufferSize>0))
+ if (ZREAD(s->z_filefunc, s->filestream,szComment,uSizeRead)!=uSizeRead)
+ err=UNZ_ERRNO;
+ lSeek+=file_info.size_file_comment - uSizeRead;
+ }
+ else
+ lSeek+=file_info.size_file_comment;
+
+ if ((err==UNZ_OK) && (pfile_info!=NULL))
+ *pfile_info=file_info;
+
+ if ((err==UNZ_OK) && (pfile_info_internal!=NULL))
+ *pfile_info_internal=file_info_internal;
+
+ return err;
+}
+
+
+
+/*
+ Write info about the ZipFile in the *pglobal_info structure.
+ No preparation of the structure is needed
+ return UNZ_OK if there is no problem.
+*/
+extern int ZEXPORT unzGetCurrentFileInfo (file,
+ pfile_info,
+ szFileName, fileNameBufferSize,
+ extraField, extraFieldBufferSize,
+ szComment, commentBufferSize)
+ unzFile file;
+ unz_file_info *pfile_info;
+ char *szFileName;
+ uLong fileNameBufferSize;
+ void *extraField;
+ uLong extraFieldBufferSize;
+ char *szComment;
+ uLong commentBufferSize;
+{
+ return unzlocal_GetCurrentFileInfoInternal(file,pfile_info,NULL,
+ szFileName,fileNameBufferSize,
+ extraField,extraFieldBufferSize,
+ szComment,commentBufferSize);
+}
+
+/*
+ Set the current file of the zipfile to the first file.
+ return UNZ_OK if there is no problem
+*/
+extern int ZEXPORT unzGoToFirstFile (file)
+ unzFile file;
+{
+ int err=UNZ_OK;
+ unz_s* s;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ s->pos_in_central_dir=s->offset_central_dir;
+ s->num_file=0;
+ err=unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info,
+ &s->cur_file_info_internal,
+ NULL,0,NULL,0,NULL,0);
+ s->current_file_ok = (err == UNZ_OK);
+ return err;
+}
+
+/*
+ Set the current file of the zipfile to the next file.
+ return UNZ_OK if there is no problem
+ return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest.
+*/
+extern int ZEXPORT unzGoToNextFile (file)
+ unzFile file;
+{
+ unz_s* s;
+ int err;
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ if (!s->current_file_ok)
+ return UNZ_END_OF_LIST_OF_FILE;
+ if (s->gi.number_entry != 0xffff) // 2^16 files overflow hack
+ if (s->num_file+1==s->gi.number_entry)
+ return UNZ_END_OF_LIST_OF_FILE;
+
+ s->pos_in_central_dir += SIZECENTRALDIRITEM + s->cur_file_info.size_filename +
+ s->cur_file_info.size_file_extra + s->cur_file_info.size_file_comment ;
+ s->num_file++;
+ err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info,
+ &s->cur_file_info_internal,
+ NULL,0,NULL,0,NULL,0);
+ s->current_file_ok = (err == UNZ_OK);
+ return err;
+}
+
+
+/*
+ Try locate the file szFileName in the zipfile.
+ For the iCaseSensitivity signification, see unzipStringFileNameCompare
+
+ return value :
+ UNZ_OK if the file is found. It becomes the current file.
+ UNZ_END_OF_LIST_OF_FILE if the file is not found
+*/
+extern int ZEXPORT unzLocateFile (file, szFileName, iCaseSensitivity)
+ unzFile file;
+ const char *szFileName;
+ int iCaseSensitivity;
+{
+ unz_s* s;
+ int err;
+
+ /* We remember the 'current' position in the file so that we can jump
+ * back there if we fail.
+ */
+ unz_file_info cur_file_infoSaved;
+ unz_file_info_internal cur_file_info_internalSaved;
+ uLong num_fileSaved;
+ uLong pos_in_central_dirSaved;
+
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+
+ if (strlen(szFileName)>=UNZ_MAXFILENAMEINZIP)
+ return UNZ_PARAMERROR;
+
+ s=(unz_s*)file;
+ if (!s->current_file_ok)
+ return UNZ_END_OF_LIST_OF_FILE;
+
+ /* Save the current state */
+ num_fileSaved = s->num_file;
+ pos_in_central_dirSaved = s->pos_in_central_dir;
+ cur_file_infoSaved = s->cur_file_info;
+ cur_file_info_internalSaved = s->cur_file_info_internal;
+
+ err = unzGoToFirstFile(file);
+
+ while (err == UNZ_OK)
+ {
+ char szCurrentFileName[UNZ_MAXFILENAMEINZIP+1];
+ err = unzGetCurrentFileInfo(file,NULL,
+ szCurrentFileName,sizeof(szCurrentFileName)-1,
+ NULL,0,NULL,0);
+ if (err == UNZ_OK)
+ {
+ if (unzStringFileNameCompare(szCurrentFileName,
+ szFileName,iCaseSensitivity)==0)
+ return UNZ_OK;
+ err = unzGoToNextFile(file);
+ }
+ }
+
+ /* We failed, so restore the state of the 'current file' to where we
+ * were.
+ */
+ s->num_file = num_fileSaved ;
+ s->pos_in_central_dir = pos_in_central_dirSaved ;
+ s->cur_file_info = cur_file_infoSaved;
+ s->cur_file_info_internal = cur_file_info_internalSaved;
+ return err;
+}
+
+
+/*
+///////////////////////////////////////////
+// Contributed by Ryan Haksi (mailto://cryogen@infoserve.net)
+// I need random access
+//
+// Further optimization could be realized by adding an ability
+// to cache the directory in memory. The goal being a single
+// comprehensive file read to put the file I need in a memory.
+*/
+
+/*
+typedef struct unz_file_pos_s
+{
+ uLong pos_in_zip_directory; // offset in file
+ uLong num_of_file; // # of file
+} unz_file_pos;
+*/
+
+extern int ZEXPORT unzGetFilePos(file, file_pos)
+ unzFile file;
+ unz_file_pos* file_pos;
+{
+ unz_s* s;
+
+ if (file==NULL || file_pos==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ if (!s->current_file_ok)
+ return UNZ_END_OF_LIST_OF_FILE;
+
+ file_pos->pos_in_zip_directory = s->pos_in_central_dir;
+ file_pos->num_of_file = s->num_file;
+
+ return UNZ_OK;
+}
+
+extern int ZEXPORT unzGoToFilePos(file, file_pos)
+ unzFile file;
+ unz_file_pos* file_pos;
+{
+ unz_s* s;
+ int err;
+
+ if (file==NULL || file_pos==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+
+ /* jump to the right spot */
+ s->pos_in_central_dir = file_pos->pos_in_zip_directory;
+ s->num_file = file_pos->num_of_file;
+
+ /* set the current file */
+ err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info,
+ &s->cur_file_info_internal,
+ NULL,0,NULL,0,NULL,0);
+ /* return results */
+ s->current_file_ok = (err == UNZ_OK);
+ return err;
+}
+
+/*
+// Unzip Helper Functions - should be here?
+///////////////////////////////////////////
+*/
+
+/*
+ Read the local header of the current zipfile
+ Check the coherency of the local header and info in the end of central
+ directory about this file
+ store in *piSizeVar the size of extra info in local header
+ (filename and size of extra field data)
+*/
+local int unzlocal_CheckCurrentFileCoherencyHeader (s,piSizeVar,
+ poffset_local_extrafield,
+ psize_local_extrafield)
+ unz_s* s;
+ uInt* piSizeVar;
+ uLong *poffset_local_extrafield;
+ uInt *psize_local_extrafield;
+{
+ uLong uMagic,uData,uFlags;
+ uLong size_filename;
+ uLong size_extra_field;
+ int err=UNZ_OK;
+
+ *piSizeVar = 0;
+ *poffset_local_extrafield = 0;
+ *psize_local_extrafield = 0;
+
+ if (ZSEEK(s->z_filefunc, s->filestream,s->cur_file_info_internal.offset_curfile +
+ s->byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ return UNZ_ERRNO;
+
+
+ if (err==UNZ_OK)
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK)
+ err=UNZ_ERRNO;
+ else if (uMagic!=0x04034b50)
+ err=UNZ_BADZIPFILE;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK)
+ err=UNZ_ERRNO;
+/*
+ else if ((err==UNZ_OK) && (uData!=s->cur_file_info.wVersion))
+ err=UNZ_BADZIPFILE;
+*/
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uFlags) != UNZ_OK)
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK)
+ err=UNZ_ERRNO;
+ else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compression_method))
+ err=UNZ_BADZIPFILE;
+
+ if ((err==UNZ_OK) && (s->cur_file_info.compression_method!=0) &&
+ (s->cur_file_info.compression_method!=Z_DEFLATED))
+ err=UNZ_BADZIPFILE;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* date/time */
+ err=UNZ_ERRNO;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* crc */
+ err=UNZ_ERRNO;
+ else if ((err==UNZ_OK) && (uData!=s->cur_file_info.crc) &&
+ ((uFlags & 8)==0))
+ err=UNZ_BADZIPFILE;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size compr */
+ err=UNZ_ERRNO;
+ else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compressed_size) &&
+ ((uFlags & 8)==0))
+ err=UNZ_BADZIPFILE;
+
+ if (unzlocal_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size uncompr */
+ err=UNZ_ERRNO;
+ else if ((err==UNZ_OK) && (uData!=s->cur_file_info.uncompressed_size) &&
+ ((uFlags & 8)==0))
+ err=UNZ_BADZIPFILE;
+
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&size_filename) != UNZ_OK)
+ err=UNZ_ERRNO;
+ else if ((err==UNZ_OK) && (size_filename!=s->cur_file_info.size_filename))
+ err=UNZ_BADZIPFILE;
+
+ *piSizeVar += (uInt)size_filename;
+
+ if (unzlocal_getShort(&s->z_filefunc, s->filestream,&size_extra_field) != UNZ_OK)
+ err=UNZ_ERRNO;
+ *poffset_local_extrafield= s->cur_file_info_internal.offset_curfile +
+ SIZEZIPLOCALHEADER + size_filename;
+ *psize_local_extrafield = (uInt)size_extra_field;
+
+ *piSizeVar += (uInt)size_extra_field;
+
+ return err;
+}
+
+/*
+ Open for reading data the current file in the zipfile.
+ If there is no error and the file is opened, the return value is UNZ_OK.
+*/
+extern int ZEXPORT unzOpenCurrentFile3 (file, method, level, raw, password)
+ unzFile file;
+ int* method;
+ int* level;
+ int raw;
+ const char* password;
+{
+ int err=UNZ_OK;
+ uInt iSizeVar;
+ unz_s* s;
+ file_in_zip_read_info_s* pfile_in_zip_read_info;
+ uLong offset_local_extrafield; /* offset of the local extra field */
+ uInt size_local_extrafield; /* size of the local extra field */
+# ifndef NOUNCRYPT
+ char source[12];
+# else
+ if (password != NULL)
+ return UNZ_PARAMERROR;
+# endif
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ if (!s->current_file_ok)
+ return UNZ_PARAMERROR;
+
+ if (s->pfile_in_zip_read != NULL)
+ unzCloseCurrentFile(file);
+
+ if (unzlocal_CheckCurrentFileCoherencyHeader(s,&iSizeVar,
+ &offset_local_extrafield,&size_local_extrafield)!=UNZ_OK)
+ return UNZ_BADZIPFILE;
+
+ pfile_in_zip_read_info = (file_in_zip_read_info_s*)
+ ALLOC(sizeof(file_in_zip_read_info_s));
+ if (pfile_in_zip_read_info==NULL)
+ return UNZ_INTERNALERROR;
+
+ pfile_in_zip_read_info->read_buffer=(char*)ALLOC(UNZ_BUFSIZE);
+ pfile_in_zip_read_info->offset_local_extrafield = offset_local_extrafield;
+ pfile_in_zip_read_info->size_local_extrafield = size_local_extrafield;
+ pfile_in_zip_read_info->pos_local_extrafield=0;
+ pfile_in_zip_read_info->raw=raw;
+
+ if (pfile_in_zip_read_info->read_buffer==NULL)
+ {
+ TRYFREE(pfile_in_zip_read_info);
+ return UNZ_INTERNALERROR;
+ }
+
+ pfile_in_zip_read_info->stream_initialised=0;
+
+ if (method!=NULL)
+ *method = (int)s->cur_file_info.compression_method;
+
+ if (level!=NULL)
+ {
+ *level = 6;
+ switch (s->cur_file_info.flag & 0x06)
+ {
+ case 6 : *level = 1; break;
+ case 4 : *level = 2; break;
+ case 2 : *level = 9; break;
+ }
+ }
+
+ if ((s->cur_file_info.compression_method!=0) &&
+ (s->cur_file_info.compression_method!=Z_DEFLATED))
+ err=UNZ_BADZIPFILE;
+
+ pfile_in_zip_read_info->crc32_wait=s->cur_file_info.crc;
+ pfile_in_zip_read_info->crc32=0;
+ pfile_in_zip_read_info->compression_method =
+ s->cur_file_info.compression_method;
+ pfile_in_zip_read_info->filestream=s->filestream;
+ pfile_in_zip_read_info->z_filefunc=s->z_filefunc;
+ pfile_in_zip_read_info->byte_before_the_zipfile=s->byte_before_the_zipfile;
+
+ pfile_in_zip_read_info->stream.total_out = 0;
+
+ if ((s->cur_file_info.compression_method==Z_DEFLATED) &&
+ (!raw))
+ {
+ pfile_in_zip_read_info->stream.zalloc = (alloc_func)0;
+ pfile_in_zip_read_info->stream.zfree = (free_func)0;
+ pfile_in_zip_read_info->stream.opaque = (voidpf)0;
+ pfile_in_zip_read_info->stream.next_in = (voidpf)0;
+ pfile_in_zip_read_info->stream.avail_in = 0;
+
+ err=inflateInit2(&pfile_in_zip_read_info->stream, -MAX_WBITS);
+ if (err == Z_OK)
+ pfile_in_zip_read_info->stream_initialised=1;
+ else
+ return err;
+ /* windowBits is passed < 0 to tell that there is no zlib header.
+ * Note that in this case inflate *requires* an extra "dummy" byte
+ * after the compressed stream in order to complete decompression and
+ * return Z_STREAM_END.
+ * In unzip, i don't wait absolutely Z_STREAM_END because I known the
+ * size of both compressed and uncompressed data
+ */
+ }
+ pfile_in_zip_read_info->rest_read_compressed =
+ s->cur_file_info.compressed_size ;
+ pfile_in_zip_read_info->rest_read_uncompressed =
+ s->cur_file_info.uncompressed_size ;
+
+
+ pfile_in_zip_read_info->pos_in_zipfile =
+ s->cur_file_info_internal.offset_curfile + SIZEZIPLOCALHEADER +
+ iSizeVar;
+
+ pfile_in_zip_read_info->stream.avail_in = (uInt)0;
+
+ s->pfile_in_zip_read = pfile_in_zip_read_info;
+
+# ifndef NOUNCRYPT
+ if (password != NULL)
+ {
+ int i;
+ s->pcrc_32_tab = get_crc_table();
+ init_keys(password,s->keys,s->pcrc_32_tab);
+ if (ZSEEK(s->z_filefunc, s->filestream,
+ s->pfile_in_zip_read->pos_in_zipfile +
+ s->pfile_in_zip_read->byte_before_the_zipfile,
+ SEEK_SET)!=0)
+ return UNZ_INTERNALERROR;
+ if(ZREAD(s->z_filefunc, s->filestream,source, 12)<12)
+ return UNZ_INTERNALERROR;
+
+ for (i = 0; i<12; i++)
+ zdecode(s->keys,s->pcrc_32_tab,source[i]);
+
+ s->pfile_in_zip_read->pos_in_zipfile+=12;
+ s->encrypted=1;
+ }
+# endif
+
+
+ return UNZ_OK;
+}
+
+extern int ZEXPORT unzOpenCurrentFile (file)
+ unzFile file;
+{
+ return unzOpenCurrentFile3(file, NULL, NULL, 0, NULL);
+}
+
+extern int ZEXPORT unzOpenCurrentFilePassword (file, password)
+ unzFile file;
+ const char* password;
+{
+ return unzOpenCurrentFile3(file, NULL, NULL, 0, password);
+}
+
+extern int ZEXPORT unzOpenCurrentFile2 (file,method,level,raw)
+ unzFile file;
+ int* method;
+ int* level;
+ int raw;
+{
+ return unzOpenCurrentFile3(file, method, level, raw, NULL);
+}
+
+/*
+ Read bytes from the current file.
+ buf contain buffer where data must be copied
+ len the size of buf.
+
+ return the number of byte copied if somes bytes are copied
+ return 0 if the end of file was reached
+ return <0 with error code if there is an error
+ (UNZ_ERRNO for IO error, or zLib error for uncompress error)
+*/
+extern int ZEXPORT unzReadCurrentFile (file, buf, len)
+ unzFile file;
+ voidp buf;
+ unsigned len;
+{
+ int err=UNZ_OK;
+ uInt iRead = 0;
+ unz_s* s;
+ file_in_zip_read_info_s* pfile_in_zip_read_info;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ pfile_in_zip_read_info=s->pfile_in_zip_read;
+
+ if (pfile_in_zip_read_info==NULL)
+ return UNZ_PARAMERROR;
+
+
+ if ((pfile_in_zip_read_info->read_buffer == NULL))
+ return UNZ_END_OF_LIST_OF_FILE;
+ if (len==0)
+ return 0;
+
+ pfile_in_zip_read_info->stream.next_out = (Bytef*)buf;
+
+ pfile_in_zip_read_info->stream.avail_out = (uInt)len;
+
+ if (len>pfile_in_zip_read_info->rest_read_uncompressed)
+ pfile_in_zip_read_info->stream.avail_out =
+ (uInt)pfile_in_zip_read_info->rest_read_uncompressed;
+
+ while (pfile_in_zip_read_info->stream.avail_out>0)
+ {
+ if ((pfile_in_zip_read_info->stream.avail_in==0) &&
+ (pfile_in_zip_read_info->rest_read_compressed>0))
+ {
+ uInt uReadThis = UNZ_BUFSIZE;
+ if (pfile_in_zip_read_info->rest_read_compressed<uReadThis)
+ uReadThis = (uInt)pfile_in_zip_read_info->rest_read_compressed;
+ if (uReadThis == 0)
+ return UNZ_EOF;
+ if (ZSEEK(pfile_in_zip_read_info->z_filefunc,
+ pfile_in_zip_read_info->filestream,
+ pfile_in_zip_read_info->pos_in_zipfile +
+ pfile_in_zip_read_info->byte_before_the_zipfile,
+ ZLIB_FILEFUNC_SEEK_SET)!=0)
+ return UNZ_ERRNO;
+ if (ZREAD(pfile_in_zip_read_info->z_filefunc,
+ pfile_in_zip_read_info->filestream,
+ pfile_in_zip_read_info->read_buffer,
+ uReadThis)!=uReadThis)
+ return UNZ_ERRNO;
+
+
+# ifndef NOUNCRYPT
+ if(s->encrypted)
+ {
+ uInt i;
+ for(i=0;i<uReadThis;i++)
+ pfile_in_zip_read_info->read_buffer[i] =
+ zdecode(s->keys,s->pcrc_32_tab,
+ pfile_in_zip_read_info->read_buffer[i]);
+ }
+# endif
+
+
+ pfile_in_zip_read_info->pos_in_zipfile += uReadThis;
+
+ pfile_in_zip_read_info->rest_read_compressed-=uReadThis;
+
+ pfile_in_zip_read_info->stream.next_in =
+ (Bytef*)pfile_in_zip_read_info->read_buffer;
+ pfile_in_zip_read_info->stream.avail_in = (uInt)uReadThis;
+ }
+
+ if ((pfile_in_zip_read_info->compression_method==0) || (pfile_in_zip_read_info->raw))
+ {
+ uInt uDoCopy,i ;
+
+ if ((pfile_in_zip_read_info->stream.avail_in == 0) &&
+ (pfile_in_zip_read_info->rest_read_compressed == 0))
+ return (iRead==0) ? UNZ_EOF : iRead;
+
+ if (pfile_in_zip_read_info->stream.avail_out <
+ pfile_in_zip_read_info->stream.avail_in)
+ uDoCopy = pfile_in_zip_read_info->stream.avail_out ;
+ else
+ uDoCopy = pfile_in_zip_read_info->stream.avail_in ;
+
+ for (i=0;i<uDoCopy;i++)
+ *(pfile_in_zip_read_info->stream.next_out+i) =
+ *(pfile_in_zip_read_info->stream.next_in+i);
+
+ pfile_in_zip_read_info->crc32 = crc32(pfile_in_zip_read_info->crc32,
+ pfile_in_zip_read_info->stream.next_out,
+ uDoCopy);
+ pfile_in_zip_read_info->rest_read_uncompressed-=uDoCopy;
+ pfile_in_zip_read_info->stream.avail_in -= uDoCopy;
+ pfile_in_zip_read_info->stream.avail_out -= uDoCopy;
+ pfile_in_zip_read_info->stream.next_out += uDoCopy;
+ pfile_in_zip_read_info->stream.next_in += uDoCopy;
+ pfile_in_zip_read_info->stream.total_out += uDoCopy;
+ iRead += uDoCopy;
+ }
+ else
+ {
+ uLong uTotalOutBefore,uTotalOutAfter;
+ const Bytef *bufBefore;
+ uLong uOutThis;
+ int flush=Z_SYNC_FLUSH;
+
+ uTotalOutBefore = pfile_in_zip_read_info->stream.total_out;
+ bufBefore = pfile_in_zip_read_info->stream.next_out;
+
+ /*
+ if ((pfile_in_zip_read_info->rest_read_uncompressed ==
+ pfile_in_zip_read_info->stream.avail_out) &&
+ (pfile_in_zip_read_info->rest_read_compressed == 0))
+ flush = Z_FINISH;
+ */
+ err=inflate(&pfile_in_zip_read_info->stream,flush);
+
+ uTotalOutAfter = pfile_in_zip_read_info->stream.total_out;
+ uOutThis = uTotalOutAfter-uTotalOutBefore;
+
+ pfile_in_zip_read_info->crc32 =
+ crc32(pfile_in_zip_read_info->crc32,bufBefore,
+ (uInt)(uOutThis));
+
+ pfile_in_zip_read_info->rest_read_uncompressed -=
+ uOutThis;
+
+ iRead += (uInt)(uTotalOutAfter - uTotalOutBefore);
+
+ if (err==Z_STREAM_END)
+ return (iRead==0) ? UNZ_EOF : iRead;
+ if (err!=Z_OK)
+ break;
+ }
+ }
+
+ if (err==Z_OK)
+ return iRead;
+ return err;
+}
+
+
+/*
+ Give the current position in uncompressed data
+*/
+extern z_off_t ZEXPORT unztell (file)
+ unzFile file;
+{
+ unz_s* s;
+ file_in_zip_read_info_s* pfile_in_zip_read_info;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ pfile_in_zip_read_info=s->pfile_in_zip_read;
+
+ if (pfile_in_zip_read_info==NULL)
+ return UNZ_PARAMERROR;
+
+ return (z_off_t)pfile_in_zip_read_info->stream.total_out;
+}
+
+
+/*
+ return 1 if the end of file was reached, 0 elsewhere
+*/
+extern int ZEXPORT unzeof (file)
+ unzFile file;
+{
+ unz_s* s;
+ file_in_zip_read_info_s* pfile_in_zip_read_info;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ pfile_in_zip_read_info=s->pfile_in_zip_read;
+
+ if (pfile_in_zip_read_info==NULL)
+ return UNZ_PARAMERROR;
+
+ if (pfile_in_zip_read_info->rest_read_uncompressed == 0)
+ return 1;
+ else
+ return 0;
+}
+
+
+
+/*
+ Read extra field from the current file (opened by unzOpenCurrentFile)
+ This is the local-header version of the extra field (sometimes, there is
+ more info in the local-header version than in the central-header)
+
+ if buf==NULL, it return the size of the local extra field that can be read
+
+ if buf!=NULL, len is the size of the buffer, the extra header is copied in
+ buf.
+ the return value is the number of bytes copied in buf, or (if <0)
+ the error code
+*/
+extern int ZEXPORT unzGetLocalExtrafield (file,buf,len)
+ unzFile file;
+ voidp buf;
+ unsigned len;
+{
+ unz_s* s;
+ file_in_zip_read_info_s* pfile_in_zip_read_info;
+ uInt read_now;
+ uLong size_to_read;
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ pfile_in_zip_read_info=s->pfile_in_zip_read;
+
+ if (pfile_in_zip_read_info==NULL)
+ return UNZ_PARAMERROR;
+
+ size_to_read = (pfile_in_zip_read_info->size_local_extrafield -
+ pfile_in_zip_read_info->pos_local_extrafield);
+
+ if (buf==NULL)
+ return (int)size_to_read;
+
+ if (len>size_to_read)
+ read_now = (uInt)size_to_read;
+ else
+ read_now = (uInt)len ;
+
+ if (read_now==0)
+ return 0;
+
+ if (ZSEEK(pfile_in_zip_read_info->z_filefunc,
+ pfile_in_zip_read_info->filestream,
+ pfile_in_zip_read_info->offset_local_extrafield +
+ pfile_in_zip_read_info->pos_local_extrafield,
+ ZLIB_FILEFUNC_SEEK_SET)!=0)
+ return UNZ_ERRNO;
+
+ if (ZREAD(pfile_in_zip_read_info->z_filefunc,
+ pfile_in_zip_read_info->filestream,
+ buf,read_now)!=read_now)
+ return UNZ_ERRNO;
+
+ return (int)read_now;
+}
+
+/*
+ Close the file in zip opened with unzipOpenCurrentFile
+ Return UNZ_CRCERROR if all the file was read but the CRC is not good
+*/
+extern int ZEXPORT unzCloseCurrentFile (file)
+ unzFile file;
+{
+ int err=UNZ_OK;
+
+ unz_s* s;
+ file_in_zip_read_info_s* pfile_in_zip_read_info;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ pfile_in_zip_read_info=s->pfile_in_zip_read;
+
+ if (pfile_in_zip_read_info==NULL)
+ return UNZ_PARAMERROR;
+
+
+ if ((pfile_in_zip_read_info->rest_read_uncompressed == 0) &&
+ (!pfile_in_zip_read_info->raw))
+ {
+ if (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait)
+ err=UNZ_CRCERROR;
+ }
+
+
+ TRYFREE(pfile_in_zip_read_info->read_buffer);
+ pfile_in_zip_read_info->read_buffer = NULL;
+ if (pfile_in_zip_read_info->stream_initialised)
+ inflateEnd(&pfile_in_zip_read_info->stream);
+
+ pfile_in_zip_read_info->stream_initialised = 0;
+ TRYFREE(pfile_in_zip_read_info);
+
+ s->pfile_in_zip_read=NULL;
+
+ return err;
+}
+
+
+/*
+ Get the global comment string of the ZipFile, in the szComment buffer.
+ uSizeBuf is the size of the szComment buffer.
+ return the number of byte copied or an error code <0
+*/
+extern int ZEXPORT unzGetGlobalComment (file, szComment, uSizeBuf)
+ unzFile file;
+ char *szComment;
+ uLong uSizeBuf;
+{
+ int err=UNZ_OK;
+ unz_s* s;
+ uLong uReadThis ;
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+
+ uReadThis = uSizeBuf;
+ if (uReadThis>s->gi.size_comment)
+ uReadThis = s->gi.size_comment;
+
+ if (ZSEEK(s->z_filefunc,s->filestream,s->central_pos+22,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ return UNZ_ERRNO;
+
+ if (uReadThis>0)
+ {
+ *szComment='\0';
+ if (ZREAD(s->z_filefunc,s->filestream,szComment,uReadThis)!=uReadThis)
+ return UNZ_ERRNO;
+ }
+
+ if ((szComment != NULL) && (uSizeBuf > s->gi.size_comment))
+ *(szComment+s->gi.size_comment)='\0';
+ return (int)uReadThis;
+}
+
+/* Additions by RX '2004 */
+extern uLong ZEXPORT unzGetOffset (file)
+ unzFile file;
+{
+ unz_s* s;
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+ if (!s->current_file_ok)
+ return 0;
+ if (s->gi.number_entry != 0 && s->gi.number_entry != 0xffff)
+ if (s->num_file==s->gi.number_entry)
+ return 0;
+ return s->pos_in_central_dir;
+}
+
+extern int ZEXPORT unzSetOffset (file, pos)
+ unzFile file;
+ uLong pos;
+{
+ unz_s* s;
+ int err;
+
+ if (file==NULL)
+ return UNZ_PARAMERROR;
+ s=(unz_s*)file;
+
+ s->pos_in_central_dir = pos;
+ s->num_file = s->gi.number_entry; /* hack */
+ err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info,
+ &s->cur_file_info_internal,
+ NULL,0,NULL,0,NULL,0);
+ s->current_file_ok = (err == UNZ_OK);
+ return err;
+}
+
diff --git a/src/minizip/unzip.h b/src/minizip/unzip.h
new file mode 100644
index 0000000..cb6cb2e
--- /dev/null
+++ b/src/minizip/unzip.h
@@ -0,0 +1,352 @@
+/* unzip.h -- IO for uncompress .zip files using zlib
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+
+ This unzip package allow extract file from .ZIP file, compatible with PKZip 2.04g
+ WinZip, InfoZip tools and compatible.
+ Encryption and multi volume ZipFile (span) are not supported.
+ Old compressions used by old PKZip 1.x are not supported
+
+
+ I WAIT FEEDBACK at mail info@winimage.com
+ Visit also http://www.winimage.com/zLibDll/unzip.htm for evolution
+
+ Condition of use and distribution are the same than zlib :
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+
+*/
+
+/* for more info about .ZIP format, see
+ http://www.info-zip.org/pub/infozip/doc/appnote-981119-iz.zip
+ http://www.info-zip.org/pub/infozip/doc/
+ PkWare has also a specification at :
+ ftp://ftp.pkware.com/probdesc.zip
+*/
+
+#ifndef _unz_H
+#define _unz_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ZLIB_H
+#include "zlib.h"
+#endif
+
+#ifndef _ZLIBIOAPI_H
+#include "ioapi.h"
+#endif
+
+#if defined(STRICTUNZIP) || defined(STRICTZIPUNZIP)
+/* like the STRICT of WIN32, we define a pointer that cannot be converted
+ from (void*) without cast */
+typedef struct TagunzFile__ { int unused; } unzFile__;
+typedef unzFile__ *unzFile;
+#else
+typedef voidp unzFile;
+#endif
+
+
+#define UNZ_OK (0)
+#define UNZ_END_OF_LIST_OF_FILE (-100)
+#define UNZ_ERRNO (Z_ERRNO)
+#define UNZ_EOF (0)
+#define UNZ_PARAMERROR (-102)
+#define UNZ_BADZIPFILE (-103)
+#define UNZ_INTERNALERROR (-104)
+#define UNZ_CRCERROR (-105)
+
+/* tm_unz contain date/time info */
+typedef struct tm_unz_s
+{
+ uInt tm_sec; /* seconds after the minute - [0,59] */
+ uInt tm_min; /* minutes after the hour - [0,59] */
+ uInt tm_hour; /* hours since midnight - [0,23] */
+ uInt tm_mday; /* day of the month - [1,31] */
+ uInt tm_mon; /* months since January - [0,11] */
+ uInt tm_year; /* years - [1980..2044] */
+} tm_unz;
+
+/* unz_global_info structure contain global data about the ZIPfile
+ These data comes from the end of central dir */
+typedef struct unz_global_info_s
+{
+ uLong number_entry; /* total number of entries in
+ the central dir on this disk */
+ uLong size_comment; /* size of the global comment of the zipfile */
+} unz_global_info;
+
+
+/* unz_file_info contain information about a file in the zipfile */
+typedef struct unz_file_info_s
+{
+ uLong version; /* version made by 2 bytes */
+ uLong version_needed; /* version needed to extract 2 bytes */
+ uLong flag; /* general purpose bit flag 2 bytes */
+ uLong compression_method; /* compression method 2 bytes */
+ uLong dosDate; /* last mod file date in Dos fmt 4 bytes */
+ uLong crc; /* crc-32 4 bytes */
+ uLong compressed_size; /* compressed size 4 bytes */
+ uLong uncompressed_size; /* uncompressed size 4 bytes */
+ uLong size_filename; /* filename length 2 bytes */
+ uLong size_file_extra; /* extra field length 2 bytes */
+ uLong size_file_comment; /* file comment length 2 bytes */
+
+ uLong disk_num_start; /* disk number start 2 bytes */
+ uLong internal_fa; /* internal file attributes 2 bytes */
+ uLong external_fa; /* external file attributes 4 bytes */
+
+ tm_unz tmu_date;
+} unz_file_info;
+
+extern int ZEXPORT unzStringFileNameCompare OF ((const char* fileName1,
+ const char* fileName2,
+ int iCaseSensitivity));
+/*
+ Compare two filename (fileName1,fileName2).
+ If iCaseSenisivity = 1, comparision is case sensitivity (like strcmp)
+ If iCaseSenisivity = 2, comparision is not case sensitivity (like strcmpi
+ or strcasecmp)
+ If iCaseSenisivity = 0, case sensitivity is defaut of your operating system
+ (like 1 on Unix, 2 on Windows)
+*/
+
+
+extern unzFile ZEXPORT unzOpen OF((const char *path));
+/*
+ Open a Zip file. path contain the full pathname (by example,
+ on a Windows XP computer "c:\\zlib\\zlib113.zip" or on an Unix computer
+ "zlib/zlib113.zip".
+ If the zipfile cannot be opened (file don't exist or in not valid), the
+ return value is NULL.
+ Else, the return value is a unzFile Handle, usable with other function
+ of this unzip package.
+*/
+
+extern unzFile ZEXPORT unzOpen2 OF((const char *path,
+ zlib_filefunc_def* pzlib_filefunc_def));
+/*
+ Open a Zip file, like unzOpen, but provide a set of file low level API
+ for read/write the zip file (see ioapi.h)
+*/
+
+extern int ZEXPORT unzClose OF((unzFile file));
+/*
+ Close a ZipFile opened with unzipOpen.
+ If there is files inside the .Zip opened with unzOpenCurrentFile (see later),
+ these files MUST be closed with unzipCloseCurrentFile before call unzipClose.
+ return UNZ_OK if there is no problem. */
+
+extern int ZEXPORT unzGetGlobalInfo OF((unzFile file,
+ unz_global_info *pglobal_info));
+/*
+ Write info about the ZipFile in the *pglobal_info structure.
+ No preparation of the structure is needed
+ return UNZ_OK if there is no problem. */
+
+
+extern int ZEXPORT unzGetGlobalComment OF((unzFile file,
+ char *szComment,
+ uLong uSizeBuf));
+/*
+ Get the global comment string of the ZipFile, in the szComment buffer.
+ uSizeBuf is the size of the szComment buffer.
+ return the number of byte copied or an error code <0
+*/
+
+
+/***************************************************************************/
+/* Unzip package allow you browse the directory of the zipfile */
+
+extern int ZEXPORT unzGoToFirstFile OF((unzFile file));
+/*
+ Set the current file of the zipfile to the first file.
+ return UNZ_OK if there is no problem
+*/
+
+extern int ZEXPORT unzGoToNextFile OF((unzFile file));
+/*
+ Set the current file of the zipfile to the next file.
+ return UNZ_OK if there is no problem
+ return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest.
+*/
+
+extern int ZEXPORT unzLocateFile OF((unzFile file,
+ const char *szFileName,
+ int iCaseSensitivity));
+/*
+ Try locate the file szFileName in the zipfile.
+ For the iCaseSensitivity signification, see unzStringFileNameCompare
+
+ return value :
+ UNZ_OK if the file is found. It becomes the current file.
+ UNZ_END_OF_LIST_OF_FILE if the file is not found
+*/
+
+
+/* ****************************************** */
+/* Ryan supplied functions */
+/* unz_file_info contain information about a file in the zipfile */
+typedef struct unz_file_pos_s
+{
+ uLong pos_in_zip_directory; /* offset in zip file directory */
+ uLong num_of_file; /* # of file */
+} unz_file_pos;
+
+extern int ZEXPORT unzGetFilePos(
+ unzFile file,
+ unz_file_pos* file_pos);
+
+extern int ZEXPORT unzGoToFilePos(
+ unzFile file,
+ unz_file_pos* file_pos);
+
+/* ****************************************** */
+
+extern int ZEXPORT unzGetCurrentFileInfo OF((unzFile file,
+ unz_file_info *pfile_info,
+ char *szFileName,
+ uLong fileNameBufferSize,
+ void *extraField,
+ uLong extraFieldBufferSize,
+ char *szComment,
+ uLong commentBufferSize));
+/*
+ Get Info about the current file
+ if pfile_info!=NULL, the *pfile_info structure will contain somes info about
+ the current file
+ if szFileName!=NULL, the filemane string will be copied in szFileName
+ (fileNameBufferSize is the size of the buffer)
+ if extraField!=NULL, the extra field information will be copied in extraField
+ (extraFieldBufferSize is the size of the buffer).
+ This is the Central-header version of the extra field
+ if szComment!=NULL, the comment string of the file will be copied in szComment
+ (commentBufferSize is the size of the buffer)
+*/
+
+/***************************************************************************/
+/* for reading the content of the current zipfile, you can open it, read data
+ from it, and close it (you can close it before reading all the file)
+ */
+
+extern int ZEXPORT unzOpenCurrentFile OF((unzFile file));
+/*
+ Open for reading data the current file in the zipfile.
+ If there is no error, the return value is UNZ_OK.
+*/
+
+extern int ZEXPORT unzOpenCurrentFilePassword OF((unzFile file,
+ const char* password));
+/*
+ Open for reading data the current file in the zipfile.
+ password is a crypting password
+ If there is no error, the return value is UNZ_OK.
+*/
+
+extern int ZEXPORT unzOpenCurrentFile2 OF((unzFile file,
+ int* method,
+ int* level,
+ int raw));
+/*
+ Same than unzOpenCurrentFile, but open for read raw the file (not uncompress)
+ if raw==1
+ *method will receive method of compression, *level will receive level of
+ compression
+ note : you can set level parameter as NULL (if you did not want known level,
+ but you CANNOT set method parameter as NULL
+*/
+
+extern int ZEXPORT unzOpenCurrentFile3 OF((unzFile file,
+ int* method,
+ int* level,
+ int raw,
+ const char* password));
+/*
+ Same than unzOpenCurrentFile, but open for read raw the file (not uncompress)
+ if raw==1
+ *method will receive method of compression, *level will receive level of
+ compression
+ note : you can set level parameter as NULL (if you did not want known level,
+ but you CANNOT set method parameter as NULL
+*/
+
+
+extern int ZEXPORT unzCloseCurrentFile OF((unzFile file));
+/*
+ Close the file in zip opened with unzOpenCurrentFile
+ Return UNZ_CRCERROR if all the file was read but the CRC is not good
+*/
+
+extern int ZEXPORT unzReadCurrentFile OF((unzFile file,
+ voidp buf,
+ unsigned len));
+/*
+ Read bytes from the current file (opened by unzOpenCurrentFile)
+ buf contain buffer where data must be copied
+ len the size of buf.
+
+ return the number of byte copied if somes bytes are copied
+ return 0 if the end of file was reached
+ return <0 with error code if there is an error
+ (UNZ_ERRNO for IO error, or zLib error for uncompress error)
+*/
+
+extern z_off_t ZEXPORT unztell OF((unzFile file));
+/*
+ Give the current position in uncompressed data
+*/
+
+extern int ZEXPORT unzeof OF((unzFile file));
+/*
+ return 1 if the end of file was reached, 0 elsewhere
+*/
+
+extern int ZEXPORT unzGetLocalExtrafield OF((unzFile file,
+ voidp buf,
+ unsigned len));
+/*
+ Read extra field from the current file (opened by unzOpenCurrentFile)
+ This is the local-header version of the extra field (sometimes, there is
+ more info in the local-header version than in the central-header)
+
+ if buf==NULL, it return the size of the local extra field
+
+ if buf!=NULL, len is the size of the buffer, the extra header is copied in
+ buf.
+ the return value is the number of bytes copied in buf, or (if <0)
+ the error code
+*/
+
+/***************************************************************************/
+
+/* Get the current file offset */
+extern uLong ZEXPORT unzGetOffset (unzFile file);
+
+/* Set the current file offset */
+extern int ZEXPORT unzSetOffset (unzFile file, uLong pos);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _unz_H */
diff --git a/src/minizip/zip.c b/src/minizip/zip.c
new file mode 100644
index 0000000..7dc767f
--- /dev/null
+++ b/src/minizip/zip.c
@@ -0,0 +1,1199 @@
+/* zip.c -- IO on .zip files using zlib
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+
+ Read zip.h for more info
+*/
+
+
+#ifndef _WIN32_WCE
+#include <stdio.h>
+#include <stdlib.h>
+#else
+#include <stdio.h>
+#include "celib.h"
+#endif
+#include <string.h>
+#include <time.h>
+#include "zlib.h"
+#include "zip.h"
+
+#ifdef STDC
+# include <stddef.h>
+# include <string.h>
+# include <stdlib.h>
+#endif
+#ifdef NO_ERRNO_H
+ extern int errno;
+#else
+# include <errno.h>
+#endif
+
+
+#ifndef local
+# define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+#ifndef VERSIONMADEBY
+# define VERSIONMADEBY (0x0) /* platform depedent */
+#endif
+
+#ifndef Z_BUFSIZE
+#define Z_BUFSIZE (16384)
+#endif
+
+#ifndef Z_MAXFILENAMEINZIP
+#define Z_MAXFILENAMEINZIP (256)
+#endif
+
+#ifndef ALLOC
+# define ALLOC(size) (malloc(size))
+#endif
+#ifndef TRYFREE
+# define TRYFREE(p) {if (p) free(p);}
+#endif
+
+/*
+#define SIZECENTRALDIRITEM (0x2e)
+#define SIZEZIPLOCALHEADER (0x1e)
+*/
+
+/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */
+
+#ifndef SEEK_CUR
+#define SEEK_CUR 1
+#endif
+
+#ifndef SEEK_END
+#define SEEK_END 2
+#endif
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+
+#ifndef DEF_MEM_LEVEL
+#if MAX_MEM_LEVEL >= 8
+# define DEF_MEM_LEVEL 8
+#else
+# define DEF_MEM_LEVEL MAX_MEM_LEVEL
+#endif
+#endif
+const char zip_copyright[] =
+ " zip 1.00 Copyright 1998-2003 Gilles Vollant - http://www.winimage.com/zLibDll";
+
+
+#define SIZEDATA_INDATABLOCK (4096-(4*4))
+
+#define LOCALHEADERMAGIC (0x04034b50)
+#define CENTRALHEADERMAGIC (0x02014b50)
+#define ENDHEADERMAGIC (0x06054b50)
+
+#define FLAG_LOCALHEADER_OFFSET (0x06)
+#define CRC_LOCALHEADER_OFFSET (0x0e)
+
+#define SIZECENTRALHEADER (0x2e) /* 46 */
+
+typedef struct linkedlist_datablock_internal_s
+{
+ struct linkedlist_datablock_internal_s* next_datablock;
+ uLong avail_in_this_block;
+ uLong filled_in_this_block;
+ uLong unused; /* for future use and alignement */
+ unsigned char data[SIZEDATA_INDATABLOCK];
+} linkedlist_datablock_internal;
+
+typedef struct linkedlist_data_s
+{
+ linkedlist_datablock_internal* first_block;
+ linkedlist_datablock_internal* last_block;
+} linkedlist_data;
+
+
+typedef struct
+{
+ z_stream stream; /* zLib stream structure for inflate */
+ int stream_initialised; /* 1 is stream is initialised */
+ uInt pos_in_buffered_data; /* last written byte in buffered_data */
+
+ uLong pos_local_header; /* offset of the local header of the file
+ currenty writing */
+ char* central_header; /* central header data for the current file */
+ uLong size_centralheader; /* size of the central header for cur file */
+ uLong flag; /* flag of the file currently writing */
+
+ int method; /* compression method of file currenty wr.*/
+ int raw; /* 1 for directly writing raw data */
+ Byte buffered_data[Z_BUFSIZE];/* buffer contain compressed data to be writ*/
+ uLong dosDate;
+ uLong crc32;
+ int encrypt;
+#ifndef NOCRYPT
+ unsigned long keys[3]; /* keys defining the pseudo-random sequence */
+ const unsigned long* pcrc_32_tab;
+ int crypt_header_size;
+#endif
+} curfile_info;
+
+typedef struct
+{
+ zlib_filefunc_def z_filefunc;
+ voidpf filestream; /* io structore of the zipfile */
+ linkedlist_data central_dir;/* datablock with central dir in construction*/
+ int in_opened_file_inzip; /* 1 if a file in the zip is currently writ.*/
+ curfile_info ci; /* info on the file curretly writing */
+
+ uLong begin_pos; /* position of the beginning of the zipfile */
+ uLong add_position_when_writting_offset;
+ uLong number_entry;
+} zip_internal;
+
+
+
+#ifndef NOCRYPT
+#define INCLUDECRYPTINGCODE_IFCRYPTALLOWED
+#include "crypt.h"
+#endif
+
+local linkedlist_datablock_internal* allocate_new_datablock()
+{
+ linkedlist_datablock_internal* ldi;
+ ldi = (linkedlist_datablock_internal*)
+ ALLOC(sizeof(linkedlist_datablock_internal));
+ if (ldi!=NULL)
+ {
+ ldi->next_datablock = NULL ;
+ ldi->filled_in_this_block = 0 ;
+ ldi->avail_in_this_block = SIZEDATA_INDATABLOCK ;
+ }
+ return ldi;
+}
+
+local void free_datablock(ldi)
+ linkedlist_datablock_internal* ldi;
+{
+ while (ldi!=NULL)
+ {
+ linkedlist_datablock_internal* ldinext = ldi->next_datablock;
+ TRYFREE(ldi);
+ ldi = ldinext;
+ }
+}
+
+local void init_linkedlist(ll)
+ linkedlist_data* ll;
+{
+ ll->first_block = ll->last_block = NULL;
+}
+
+local void free_linkedlist(ll)
+ linkedlist_data* ll;
+{
+ free_datablock(ll->first_block);
+ ll->first_block = ll->last_block = NULL;
+}
+
+
+local int add_data_in_datablock(ll,buf,len)
+ linkedlist_data* ll;
+ const void* buf;
+ uLong len;
+{
+ linkedlist_datablock_internal* ldi;
+ const unsigned char* from_copy;
+
+ if (ll==NULL)
+ return ZIP_INTERNALERROR;
+
+ if (ll->last_block == NULL)
+ {
+ ll->first_block = ll->last_block = allocate_new_datablock();
+ if (ll->first_block == NULL)
+ return ZIP_INTERNALERROR;
+ }
+
+ ldi = ll->last_block;
+ from_copy = (unsigned char*)buf;
+
+ while (len>0)
+ {
+ uInt copy_this;
+ uInt i;
+ unsigned char* to_copy;
+
+ if (ldi->avail_in_this_block==0)
+ {
+ ldi->next_datablock = allocate_new_datablock();
+ if (ldi->next_datablock == NULL)
+ return ZIP_INTERNALERROR;
+ ldi = ldi->next_datablock ;
+ ll->last_block = ldi;
+ }
+
+ if (ldi->avail_in_this_block < len)
+ copy_this = (uInt)ldi->avail_in_this_block;
+ else
+ copy_this = (uInt)len;
+
+ to_copy = &(ldi->data[ldi->filled_in_this_block]);
+
+ for (i=0;i<copy_this;i++)
+ *(to_copy+i)=*(from_copy+i);
+
+ ldi->filled_in_this_block += copy_this;
+ ldi->avail_in_this_block -= copy_this;
+ from_copy += copy_this ;
+ len -= copy_this;
+ }
+ return ZIP_OK;
+}
+
+
+
+/****************************************************************************/
+
+#ifndef NO_ADDFILEINEXISTINGZIP
+/* ===========================================================================
+ Inputs a long in LSB order to the given file
+ nbByte == 1, 2 or 4 (byte, short or long)
+*/
+
+local int ziplocal_putValue OF((const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream, uLong x, int nbByte));
+local int ziplocal_putValue (pzlib_filefunc_def, filestream, x, nbByte)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ uLong x;
+ int nbByte;
+{
+ unsigned char buf[4];
+ int n;
+ for (n = 0; n < nbByte; n++) {
+ buf[n] = (unsigned char)(x & 0xff);
+ x >>= 8;
+ }
+ if (x != 0) { // data overflow - hack for ZIP64
+ for (n = 0; n < nbByte; n++) {
+ buf[n] = 0xff;
+ }
+ }
+ if (ZWRITE(*pzlib_filefunc_def,filestream,buf,nbByte)!=(uLong)nbByte)
+ return ZIP_ERRNO;
+ else
+ return ZIP_OK;
+}
+
+local void ziplocal_putValue_inmemory OF((void* dest, uLong x, int nbByte));
+local void ziplocal_putValue_inmemory (dest, x, nbByte)
+ void* dest;
+ uLong x;
+ int nbByte;
+{
+ unsigned char* buf=(unsigned char*)dest;
+ int n;
+ for (n = 0; n < nbByte; n++) {
+ buf[n] = (unsigned char)(x & 0xff);
+ x >>= 8;
+ }
+ if (x != 0) { // data overflow - hack for ZIP64
+ for (n = 0; n < nbByte; n++) {
+ buf[n] = 0xff;
+ }
+ }
+}
+/****************************************************************************/
+
+
+local uLong ziplocal_TmzDateToDosDate(ptm,dosDate)
+ const tm_zip* ptm;
+ uLong dosDate;
+{
+ uLong year = (uLong)ptm->tm_year;
+ if (year>1980)
+ year-=1980;
+ else if (year>80)
+ year-=80;
+ return
+ (uLong) (((ptm->tm_mday) + (32 * (ptm->tm_mon+1)) + (512 * year)) << 16) |
+ ((ptm->tm_sec/2) + (32* ptm->tm_min) + (2048 * (uLong)ptm->tm_hour));
+}
+
+
+/****************************************************************************/
+
+local int ziplocal_getByte OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream,
+ int *pi));
+
+local int ziplocal_getByte(pzlib_filefunc_def,filestream,pi)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ int *pi;
+{
+ unsigned char c;
+ int err = (int)ZREAD(*pzlib_filefunc_def,filestream,&c,1);
+ if (err==1)
+ {
+ *pi = (int)c;
+ return ZIP_OK;
+ }
+ else
+ {
+ if (ZERROR(*pzlib_filefunc_def,filestream))
+ return ZIP_ERRNO;
+ else
+ return ZIP_EOF;
+ }
+}
+
+
+/* ===========================================================================
+ Reads a long in LSB order from the given gz_stream. Sets
+*/
+local int ziplocal_getShort OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream,
+ uLong *pX));
+
+local int ziplocal_getShort (pzlib_filefunc_def,filestream,pX)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ uLong *pX;
+{
+ uLong x ;
+ int i;
+ int err;
+
+ err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x = (uLong)i;
+
+ if (err==ZIP_OK)
+ err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<8;
+
+ if (err==ZIP_OK)
+ *pX = x;
+ else
+ *pX = 0;
+ return err;
+}
+
+local int ziplocal_getLong OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream,
+ uLong *pX));
+
+local int ziplocal_getLong (pzlib_filefunc_def,filestream,pX)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+ uLong *pX;
+{
+ uLong x ;
+ int i;
+ int err;
+
+ err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x = (uLong)i;
+
+ if (err==ZIP_OK)
+ err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<8;
+
+ if (err==ZIP_OK)
+ err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<16;
+
+ if (err==ZIP_OK)
+ err = ziplocal_getByte(pzlib_filefunc_def,filestream,&i);
+ x += ((uLong)i)<<24;
+
+ if (err==ZIP_OK)
+ *pX = x;
+ else
+ *pX = 0;
+ return err;
+}
+
+#ifndef BUFREADCOMMENT
+#define BUFREADCOMMENT (0x400)
+#endif
+/*
+ Locate the Central directory of a zipfile (at the end, just before
+ the global comment)
+*/
+local uLong ziplocal_SearchCentralDir OF((
+ const zlib_filefunc_def* pzlib_filefunc_def,
+ voidpf filestream));
+
+local uLong ziplocal_SearchCentralDir(pzlib_filefunc_def,filestream)
+ const zlib_filefunc_def* pzlib_filefunc_def;
+ voidpf filestream;
+{
+ unsigned char* buf;
+ uLong uSizeFile;
+ uLong uBackRead;
+ uLong uMaxBack=0xffff; /* maximum size of global comment */
+ uLong uPosFound=0;
+
+ if (ZSEEK(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0)
+ return 0;
+
+
+ uSizeFile = ZTELL(*pzlib_filefunc_def,filestream);
+
+ if (uMaxBack>uSizeFile)
+ uMaxBack = uSizeFile;
+
+ buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4);
+ if (buf==NULL)
+ return 0;
+
+ uBackRead = 4;
+ while (uBackRead<uMaxBack)
+ {
+ uLong uReadSize,uReadPos ;
+ int i;
+ if (uBackRead+BUFREADCOMMENT>uMaxBack)
+ uBackRead = uMaxBack;
+ else
+ uBackRead+=BUFREADCOMMENT;
+ uReadPos = uSizeFile-uBackRead ;
+
+ uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ?
+ (BUFREADCOMMENT+4) : (uSizeFile-uReadPos);
+ if (ZSEEK(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ break;
+
+ if (ZREAD(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize)
+ break;
+
+ for (i=(int)uReadSize-3; (i--)>0;)
+ if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) &&
+ ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06))
+ {
+ uPosFound = uReadPos+i;
+ break;
+ }
+
+ if (uPosFound!=0)
+ break;
+ }
+ TRYFREE(buf);
+ return uPosFound;
+}
+#endif /* !NO_ADDFILEINEXISTINGZIP*/
+
+/************************************************************/
+extern zipFile ZEXPORT zipOpen2 (pathname, append, globalcomment, pzlib_filefunc_def)
+ const char *pathname;
+ int append;
+ zipcharpc* globalcomment;
+ zlib_filefunc_def* pzlib_filefunc_def;
+{
+ zip_internal ziinit;
+ zip_internal* zi;
+ int err=ZIP_OK;
+
+
+ if (pzlib_filefunc_def==NULL)
+ fill_fopen_filefunc(&ziinit.z_filefunc);
+ else
+ ziinit.z_filefunc = *pzlib_filefunc_def;
+
+ ziinit.filestream = (*(ziinit.z_filefunc.zopen_file))
+ (ziinit.z_filefunc.opaque,
+ pathname,
+ (append == APPEND_STATUS_CREATE) ?
+ (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_CREATE) :
+ (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_EXISTING));
+
+ if (ziinit.filestream == NULL)
+ return NULL;
+ ziinit.begin_pos = ZTELL(ziinit.z_filefunc,ziinit.filestream);
+ ziinit.in_opened_file_inzip = 0;
+ ziinit.ci.stream_initialised = 0;
+ ziinit.number_entry = 0;
+ ziinit.add_position_when_writting_offset = 0;
+ init_linkedlist(&(ziinit.central_dir));
+
+
+ zi = (zip_internal*)ALLOC(sizeof(zip_internal));
+ if (zi==NULL)
+ {
+ ZCLOSE(ziinit.z_filefunc,ziinit.filestream);
+ return NULL;
+ }
+
+ /* now we add file in a zipfile */
+# ifndef NO_ADDFILEINEXISTINGZIP
+ if (append == APPEND_STATUS_ADDINZIP)
+ {
+ uLong byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/
+
+ uLong size_central_dir; /* size of the central directory */
+ uLong offset_central_dir; /* offset of start of central directory */
+ uLong central_pos,uL;
+
+ uLong number_disk; /* number of the current dist, used for
+ spaning ZIP, unsupported, always 0*/
+ uLong number_disk_with_CD; /* number the the disk with central dir, used
+ for spaning ZIP, unsupported, always 0*/
+ uLong number_entry;
+ uLong number_entry_CD; /* total number of entries in
+ the central dir
+ (same than number_entry on nospan) */
+ uLong size_comment;
+
+ central_pos = ziplocal_SearchCentralDir(&ziinit.z_filefunc,ziinit.filestream);
+ if (central_pos==0)
+ err=ZIP_ERRNO;
+
+ if (ZSEEK(ziinit.z_filefunc, ziinit.filestream,
+ central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ err=ZIP_ERRNO;
+
+ /* the signature, already checked */
+ if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&uL)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ /* number of this disk */
+ if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_disk)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ /* number of the disk with the start of the central directory */
+ if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_disk_with_CD)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ /* total number of entries in the central dir on this disk */
+ if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_entry)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ /* total number of entries in the central dir */
+ if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&number_entry_CD)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ if ((number_entry_CD!=number_entry) ||
+ (number_disk_with_CD!=0) ||
+ (number_disk!=0))
+ err=ZIP_BADZIPFILE;
+
+ /* size of the central directory */
+ if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&size_central_dir)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ /* offset of start of central directory with respect to the
+ starting disk number */
+ if (ziplocal_getLong(&ziinit.z_filefunc, ziinit.filestream,&offset_central_dir)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ /* zipfile comment length */
+ if (ziplocal_getShort(&ziinit.z_filefunc, ziinit.filestream,&size_comment)!=ZIP_OK)
+ err=ZIP_ERRNO;
+
+ if ((central_pos<offset_central_dir+size_central_dir) &&
+ (err==ZIP_OK))
+ err=ZIP_BADZIPFILE;
+
+ if (err!=ZIP_OK)
+ {
+ ZCLOSE(ziinit.z_filefunc, ziinit.filestream);
+ return NULL;
+ }
+
+ byte_before_the_zipfile = central_pos -
+ (offset_central_dir+size_central_dir);
+ ziinit.add_position_when_writting_offset = byte_before_the_zipfile ;
+
+ {
+ uLong size_central_dir_to_read = size_central_dir;
+ size_t buf_size = SIZEDATA_INDATABLOCK;
+ void* buf_read = (void*)ALLOC(buf_size);
+ if (ZSEEK(ziinit.z_filefunc, ziinit.filestream,
+ offset_central_dir + byte_before_the_zipfile,
+ ZLIB_FILEFUNC_SEEK_SET) != 0)
+ err=ZIP_ERRNO;
+
+ while ((size_central_dir_to_read>0) && (err==ZIP_OK))
+ {
+ uLong read_this = SIZEDATA_INDATABLOCK;
+ if (read_this > size_central_dir_to_read)
+ read_this = size_central_dir_to_read;
+ if (ZREAD(ziinit.z_filefunc, ziinit.filestream,buf_read,read_this) != read_this)
+ err=ZIP_ERRNO;
+
+ if (err==ZIP_OK)
+ err = add_data_in_datablock(&ziinit.central_dir,buf_read,
+ (uLong)read_this);
+ size_central_dir_to_read-=read_this;
+ }
+ TRYFREE(buf_read);
+ }
+ ziinit.begin_pos = byte_before_the_zipfile;
+ ziinit.number_entry = number_entry_CD;
+
+ if (ZSEEK(ziinit.z_filefunc, ziinit.filestream,
+ offset_central_dir+byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ err=ZIP_ERRNO;
+ }
+# endif /* !NO_ADDFILEINEXISTINGZIP*/
+
+ if (err != ZIP_OK)
+ {
+ TRYFREE(zi);
+ return NULL;
+ }
+ else
+ {
+ *zi = ziinit;
+ return (zipFile)zi;
+ }
+}
+
+extern zipFile ZEXPORT zipOpen (pathname, append)
+ const char *pathname;
+ int append;
+{
+ return zipOpen2(pathname,append,NULL,NULL);
+}
+
+extern int ZEXPORT zipOpenNewFileInZip3 (file, filename, zipfi,
+ extrafield_local, size_extrafield_local,
+ extrafield_global, size_extrafield_global,
+ comment, method, level, raw,
+ windowBits, memLevel, strategy,
+ password, crcForCrypting)
+ zipFile file;
+ const char* filename;
+ const zip_fileinfo* zipfi;
+ const void* extrafield_local;
+ uInt size_extrafield_local;
+ const void* extrafield_global;
+ uInt size_extrafield_global;
+ const char* comment;
+ int method;
+ int level;
+ int raw;
+ int windowBits;
+ int memLevel;
+ int strategy;
+ const char* password;
+ uLong crcForCrypting;
+{
+ zip_internal* zi;
+ uInt size_filename;
+ uInt size_comment;
+ uInt i;
+ int err = ZIP_OK;
+
+# ifdef NOCRYPT
+ if (password != NULL)
+ return ZIP_PARAMERROR;
+# endif
+
+ if (file == NULL)
+ return ZIP_PARAMERROR;
+ if ((method!=0) && (method!=Z_DEFLATED))
+ return ZIP_PARAMERROR;
+
+ zi = (zip_internal*)file;
+
+ if (zi->in_opened_file_inzip == 1)
+ {
+ err = zipCloseFileInZip (file);
+ if (err != ZIP_OK)
+ return err;
+ }
+
+
+ if (filename==NULL)
+ filename="-";
+
+ if (comment==NULL)
+ size_comment = 0;
+ else
+ size_comment = strlen(comment);
+
+ size_filename = strlen(filename);
+
+ if (zipfi == NULL)
+ zi->ci.dosDate = 0;
+ else
+ {
+ if (zipfi->dosDate != 0)
+ zi->ci.dosDate = zipfi->dosDate;
+ else zi->ci.dosDate = ziplocal_TmzDateToDosDate(&zipfi->tmz_date,zipfi->dosDate);
+ }
+
+ zi->ci.flag = 0;
+ if ((level==8) || (level==9))
+ zi->ci.flag |= 2;
+ if ((level==2))
+ zi->ci.flag |= 4;
+ if ((level==1))
+ zi->ci.flag |= 6;
+ if (password != NULL)
+ zi->ci.flag |= 1;
+
+ zi->ci.crc32 = 0;
+ zi->ci.method = method;
+ zi->ci.encrypt = 0;
+ zi->ci.stream_initialised = 0;
+ zi->ci.pos_in_buffered_data = 0;
+ zi->ci.raw = raw;
+ zi->ci.pos_local_header = ZTELL(zi->z_filefunc,zi->filestream) ;
+ zi->ci.size_centralheader = SIZECENTRALHEADER + size_filename +
+ size_extrafield_global + size_comment;
+ zi->ci.central_header = (char*)ALLOC((uInt)zi->ci.size_centralheader);
+
+ ziplocal_putValue_inmemory(zi->ci.central_header,(uLong)CENTRALHEADERMAGIC,4);
+ /* version info */
+ ziplocal_putValue_inmemory(zi->ci.central_header+4,(uLong)VERSIONMADEBY,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+6,(uLong)20,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+8,(uLong)zi->ci.flag,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+10,(uLong)zi->ci.method,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+12,(uLong)zi->ci.dosDate,4);
+ ziplocal_putValue_inmemory(zi->ci.central_header+16,(uLong)0,4); /*crc*/
+ ziplocal_putValue_inmemory(zi->ci.central_header+20,(uLong)0,4); /*compr size*/
+ ziplocal_putValue_inmemory(zi->ci.central_header+24,(uLong)0,4); /*uncompr size*/
+ ziplocal_putValue_inmemory(zi->ci.central_header+28,(uLong)size_filename,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+30,(uLong)size_extrafield_global,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+32,(uLong)size_comment,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+34,(uLong)0,2); /*disk nm start*/
+
+ if (zipfi==NULL)
+ ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)0,2);
+ else
+ ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)zipfi->internal_fa,2);
+
+ if (zipfi==NULL)
+ ziplocal_putValue_inmemory(zi->ci.central_header+38,(uLong)0,4);
+ else
+ ziplocal_putValue_inmemory(zi->ci.central_header+38,(uLong)zipfi->external_fa,4);
+
+ ziplocal_putValue_inmemory(zi->ci.central_header+42,(uLong)zi->ci.pos_local_header- zi->add_position_when_writting_offset,4);
+
+ for (i=0;i<size_filename;i++)
+ *(zi->ci.central_header+SIZECENTRALHEADER+i) = *(filename+i);
+
+ for (i=0;i<size_extrafield_global;i++)
+ *(zi->ci.central_header+SIZECENTRALHEADER+size_filename+i) =
+ *(((const char*)extrafield_global)+i);
+
+ for (i=0;i<size_comment;i++)
+ *(zi->ci.central_header+SIZECENTRALHEADER+size_filename+
+ size_extrafield_global+i) = *(comment+i);
+ if (zi->ci.central_header == NULL)
+ return ZIP_INTERNALERROR;
+
+ /* write the local header */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)LOCALHEADERMAGIC,4);
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)20,2);/* version needed to extract */
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.flag,2);
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.method,2);
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.dosDate,4);
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* crc 32, unknown */
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* compressed size, unknown */
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* uncompressed size, unknown */
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_filename,2);
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_extrafield_local,2);
+
+ if ((err==ZIP_OK) && (size_filename>0))
+ if (ZWRITE(zi->z_filefunc,zi->filestream,filename,size_filename)!=size_filename)
+ err = ZIP_ERRNO;
+
+ if ((err==ZIP_OK) && (size_extrafield_local>0))
+ if (ZWRITE(zi->z_filefunc,zi->filestream,extrafield_local,size_extrafield_local)
+ !=size_extrafield_local)
+ err = ZIP_ERRNO;
+
+ zi->ci.stream.avail_in = (uInt)0;
+ zi->ci.stream.avail_out = (uInt)Z_BUFSIZE;
+ zi->ci.stream.next_out = zi->ci.buffered_data;
+ zi->ci.stream.total_in = 0;
+ zi->ci.stream.total_out = 0;
+
+ if ((err==ZIP_OK) && (zi->ci.method == Z_DEFLATED) && (!zi->ci.raw))
+ {
+ zi->ci.stream.zalloc = (alloc_func)0;
+ zi->ci.stream.zfree = (free_func)0;
+ zi->ci.stream.opaque = (voidpf)0;
+
+ if (windowBits>0)
+ windowBits = -windowBits;
+
+ err = deflateInit2(&zi->ci.stream, level,
+ Z_DEFLATED, windowBits, memLevel, strategy);
+
+ if (err==Z_OK)
+ zi->ci.stream_initialised = 1;
+ }
+# ifndef NOCRYPT
+ zi->ci.crypt_header_size = 0;
+ if ((err==Z_OK) && (password != NULL))
+ {
+ unsigned char bufHead[RAND_HEAD_LEN];
+ unsigned int sizeHead;
+ zi->ci.encrypt = 1;
+ zi->ci.pcrc_32_tab = get_crc_table();
+ /*init_keys(password,zi->ci.keys,zi->ci.pcrc_32_tab);*/
+
+ sizeHead=crypthead(password,bufHead,RAND_HEAD_LEN,zi->ci.keys,zi->ci.pcrc_32_tab,crcForCrypting);
+ zi->ci.crypt_header_size = sizeHead;
+
+ if (ZWRITE(zi->z_filefunc,zi->filestream,bufHead,sizeHead) != sizeHead)
+ err = ZIP_ERRNO;
+ }
+# endif
+
+ if (err==Z_OK)
+ zi->in_opened_file_inzip = 1;
+ return err;
+}
+
+extern int ZEXPORT zipOpenNewFileInZip2(file, filename, zipfi,
+ extrafield_local, size_extrafield_local,
+ extrafield_global, size_extrafield_global,
+ comment, method, level, raw)
+ zipFile file;
+ const char* filename;
+ const zip_fileinfo* zipfi;
+ const void* extrafield_local;
+ uInt size_extrafield_local;
+ const void* extrafield_global;
+ uInt size_extrafield_global;
+ const char* comment;
+ int method;
+ int level;
+ int raw;
+{
+ return zipOpenNewFileInZip3 (file, filename, zipfi,
+ extrafield_local, size_extrafield_local,
+ extrafield_global, size_extrafield_global,
+ comment, method, level, raw,
+ -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY,
+ NULL, 0);
+}
+
+extern int ZEXPORT zipOpenNewFileInZip (file, filename, zipfi,
+ extrafield_local, size_extrafield_local,
+ extrafield_global, size_extrafield_global,
+ comment, method, level)
+ zipFile file;
+ const char* filename;
+ const zip_fileinfo* zipfi;
+ const void* extrafield_local;
+ uInt size_extrafield_local;
+ const void* extrafield_global;
+ uInt size_extrafield_global;
+ const char* comment;
+ int method;
+ int level;
+{
+ return zipOpenNewFileInZip2 (file, filename, zipfi,
+ extrafield_local, size_extrafield_local,
+ extrafield_global, size_extrafield_global,
+ comment, method, level, 0);
+}
+
+local int zipFlushWriteBuffer(zi)
+ zip_internal* zi;
+{
+ int err=ZIP_OK;
+
+ if (zi->ci.encrypt != 0)
+ {
+#ifndef NOCRYPT
+ uInt i;
+ int t;
+ for (i=0;i<zi->ci.pos_in_buffered_data;i++)
+ zi->ci.buffered_data[i] = zencode(zi->ci.keys, zi->ci.pcrc_32_tab,
+ zi->ci.buffered_data[i],t);
+#endif
+ }
+ if (ZWRITE(zi->z_filefunc,zi->filestream,zi->ci.buffered_data,zi->ci.pos_in_buffered_data)
+ !=zi->ci.pos_in_buffered_data)
+ err = ZIP_ERRNO;
+ zi->ci.pos_in_buffered_data = 0;
+ return err;
+}
+
+extern int ZEXPORT zipWriteInFileInZip (file, buf, len)
+ zipFile file;
+ const void* buf;
+ unsigned len;
+{
+ zip_internal* zi;
+ int err=ZIP_OK;
+
+ if (file == NULL)
+ return ZIP_PARAMERROR;
+ zi = (zip_internal*)file;
+
+ if (zi->in_opened_file_inzip == 0)
+ return ZIP_PARAMERROR;
+
+ zi->ci.stream.next_in = (void*)buf;
+ zi->ci.stream.avail_in = len;
+ zi->ci.crc32 = crc32(zi->ci.crc32,buf,len);
+
+ while ((err==ZIP_OK) && (zi->ci.stream.avail_in>0))
+ {
+ if (zi->ci.stream.avail_out == 0)
+ {
+ if (zipFlushWriteBuffer(zi) == ZIP_ERRNO)
+ err = ZIP_ERRNO;
+ zi->ci.stream.avail_out = (uInt)Z_BUFSIZE;
+ zi->ci.stream.next_out = zi->ci.buffered_data;
+ }
+
+
+ if(err != ZIP_OK)
+ break;
+
+ if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw))
+ {
+ uLong uTotalOutBefore = zi->ci.stream.total_out;
+ err=deflate(&zi->ci.stream, Z_NO_FLUSH);
+ zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ;
+
+ }
+ else
+ {
+ uInt copy_this,i;
+ if (zi->ci.stream.avail_in < zi->ci.stream.avail_out)
+ copy_this = zi->ci.stream.avail_in;
+ else
+ copy_this = zi->ci.stream.avail_out;
+ for (i=0;i<copy_this;i++)
+ *(((char*)zi->ci.stream.next_out)+i) =
+ *(((const char*)zi->ci.stream.next_in)+i);
+ {
+ zi->ci.stream.avail_in -= copy_this;
+ zi->ci.stream.avail_out-= copy_this;
+ zi->ci.stream.next_in+= copy_this;
+ zi->ci.stream.next_out+= copy_this;
+ zi->ci.stream.total_in+= copy_this;
+ zi->ci.stream.total_out+= copy_this;
+ zi->ci.pos_in_buffered_data += copy_this;
+ }
+ }
+ }
+
+ return err;
+}
+
+extern int ZEXPORT zipCloseFileInZipRaw (file, uncompressed_size, crc32)
+ zipFile file;
+ uLong uncompressed_size;
+ uLong crc32;
+{
+ zip_internal* zi;
+ uLong compressed_size;
+ int err=ZIP_OK;
+
+ if (file == NULL)
+ return ZIP_PARAMERROR;
+ zi = (zip_internal*)file;
+
+ if (zi->in_opened_file_inzip == 0)
+ return ZIP_PARAMERROR;
+ zi->ci.stream.avail_in = 0;
+
+ if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw))
+ while (err==ZIP_OK)
+ {
+ uLong uTotalOutBefore;
+ if (zi->ci.stream.avail_out == 0)
+ {
+ if (zipFlushWriteBuffer(zi) == ZIP_ERRNO)
+ err = ZIP_ERRNO;
+ zi->ci.stream.avail_out = (uInt)Z_BUFSIZE;
+ zi->ci.stream.next_out = zi->ci.buffered_data;
+ }
+ uTotalOutBefore = zi->ci.stream.total_out;
+ err=deflate(&zi->ci.stream, Z_FINISH);
+ zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ;
+ }
+
+ if (err==Z_STREAM_END)
+ err=ZIP_OK; /* this is normal */
+
+ if ((zi->ci.pos_in_buffered_data>0) && (err==ZIP_OK))
+ if (zipFlushWriteBuffer(zi)==ZIP_ERRNO)
+ err = ZIP_ERRNO;
+
+ if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw))
+ {
+ err=deflateEnd(&zi->ci.stream);
+ zi->ci.stream_initialised = 0;
+ }
+
+ if (!zi->ci.raw)
+ {
+ crc32 = (uLong)zi->ci.crc32;
+ uncompressed_size = (uLong)zi->ci.stream.total_in;
+ }
+ compressed_size = (uLong)zi->ci.stream.total_out;
+# ifndef NOCRYPT
+ compressed_size += zi->ci.crypt_header_size;
+# endif
+
+ ziplocal_putValue_inmemory(zi->ci.central_header+16,crc32,4); /*crc*/
+ ziplocal_putValue_inmemory(zi->ci.central_header+20,
+ compressed_size,4); /*compr size*/
+ if (zi->ci.stream.data_type == Z_ASCII)
+ ziplocal_putValue_inmemory(zi->ci.central_header+36,(uLong)Z_ASCII,2);
+ ziplocal_putValue_inmemory(zi->ci.central_header+24,
+ uncompressed_size,4); /*uncompr size*/
+
+ if (err==ZIP_OK)
+ err = add_data_in_datablock(&zi->central_dir,zi->ci.central_header,
+ (uLong)zi->ci.size_centralheader);
+ free(zi->ci.central_header);
+
+ if (err==ZIP_OK)
+ {
+ long cur_pos_inzip = ZTELL(zi->z_filefunc,zi->filestream);
+ if (ZSEEK(zi->z_filefunc,zi->filestream,
+ zi->ci.pos_local_header + 14,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ err = ZIP_ERRNO;
+
+ if (err==ZIP_OK)
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,crc32,4); /* crc 32, unknown */
+
+ if (err==ZIP_OK) /* compressed size, unknown */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,compressed_size,4);
+
+ if (err==ZIP_OK) /* uncompressed size, unknown */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,uncompressed_size,4);
+
+ if (ZSEEK(zi->z_filefunc,zi->filestream,
+ cur_pos_inzip,ZLIB_FILEFUNC_SEEK_SET)!=0)
+ err = ZIP_ERRNO;
+ }
+
+ zi->number_entry ++;
+ zi->in_opened_file_inzip = 0;
+
+ return err;
+}
+
+extern int ZEXPORT zipCloseFileInZip (file)
+ zipFile file;
+{
+ return zipCloseFileInZipRaw (file,0,0);
+}
+
+extern int ZEXPORT zipClose (file, global_comment)
+ zipFile file;
+ const char* global_comment;
+{
+ zip_internal* zi;
+ int err = 0;
+ uLong size_centraldir = 0;
+ uLong centraldir_pos_inzip ;
+ uInt size_global_comment;
+ if (file == NULL)
+ return ZIP_PARAMERROR;
+ zi = (zip_internal*)file;
+
+ if (zi->in_opened_file_inzip == 1)
+ {
+ err = zipCloseFileInZip (file);
+ }
+
+ if (global_comment==NULL)
+ size_global_comment = 0;
+ else
+ size_global_comment = strlen(global_comment);
+
+
+ centraldir_pos_inzip = ZTELL(zi->z_filefunc,zi->filestream);
+ if (err==ZIP_OK)
+ {
+ linkedlist_datablock_internal* ldi = zi->central_dir.first_block ;
+ while (ldi!=NULL)
+ {
+ if ((err==ZIP_OK) && (ldi->filled_in_this_block>0))
+ if (ZWRITE(zi->z_filefunc,zi->filestream,
+ ldi->data,ldi->filled_in_this_block)
+ !=ldi->filled_in_this_block )
+ err = ZIP_ERRNO;
+
+ size_centraldir += ldi->filled_in_this_block;
+ ldi = ldi->next_datablock;
+ }
+ }
+ free_datablock(zi->central_dir.first_block);
+
+ if (err==ZIP_OK) /* Magic End */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)ENDHEADERMAGIC,4);
+
+ if (err==ZIP_OK) /* number of this disk */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2);
+
+ if (err==ZIP_OK) /* number of the disk with the start of the central directory */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2);
+
+ if (err==ZIP_OK) /* total number of entries in the central dir on this disk */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2);
+
+ if (err==ZIP_OK) /* total number of entries in the central dir */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2);
+
+ if (err==ZIP_OK) /* size of the central directory */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_centraldir,4);
+
+ if (err==ZIP_OK) /* offset of start of central directory with respect to the
+ starting disk number */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,
+ (uLong)(centraldir_pos_inzip - zi->add_position_when_writting_offset),4);
+
+ if (err==ZIP_OK) /* zipfile comment length */
+ err = ziplocal_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_global_comment,2);
+
+ if ((err==ZIP_OK) && (size_global_comment>0))
+ if (ZWRITE(zi->z_filefunc,zi->filestream,
+ global_comment,size_global_comment) != size_global_comment)
+ err = ZIP_ERRNO;
+
+ if (ZCLOSE(zi->z_filefunc,zi->filestream) != 0)
+ if (err == ZIP_OK)
+ err = ZIP_ERRNO;
+
+ TRYFREE(zi);
+
+ return err;
+}
+
+extern int ZEXPORT zipFlush (file)
+ zipFile file;
+{
+ zip_internal* zi;
+ if (file == NULL)
+ return ZIP_PARAMERROR;
+ zi = (zip_internal*)file;
+
+ if (zi->z_filefunc.zflush_file != NULL && zi->filestream != NULL)
+ return ZFLUSH(zi->z_filefunc,zi->filestream);
+ else
+ return EOF;
+}
diff --git a/src/minizip/zip.h b/src/minizip/zip.h
new file mode 100644
index 0000000..d4337b7
--- /dev/null
+++ b/src/minizip/zip.h
@@ -0,0 +1,239 @@
+/* zip.h -- IO for compress .zip files using zlib
+ Version 1.00, September 10th, 2003
+
+ Copyright (C) 1998-2003 Gilles Vollant
+
+ This unzip package allow creates .ZIP file, compatible with PKZip 2.04g
+ WinZip, InfoZip tools and compatible.
+ Encryption and multi volume ZipFile (span) are not supported.
+ Old compressions used by old PKZip 1.x are not supported
+
+ For uncompress .zip file, look at unzip.h
+
+
+ I WAIT FEEDBACK at mail info@winimage.com
+ Visit also http://www.winimage.com/zLibDll/unzip.html for evolution
+
+ Condition of use and distribution are the same than zlib :
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+
+*/
+
+/* for more info about .ZIP format, see
+ http://www.info-zip.org/pub/infozip/doc/appnote-981119-iz.zip
+ http://www.info-zip.org/pub/infozip/doc/
+ PkWare has also a specification at :
+ ftp://ftp.pkware.com/probdesc.zip
+*/
+
+#ifndef _zip_H
+#define _zip_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ZLIB_H
+#include "zlib.h"
+#endif
+
+#ifndef _ZLIBIOAPI_H
+#include "ioapi.h"
+#endif
+
+#if defined(STRICTZIP) || defined(STRICTZIPUNZIP)
+/* like the STRICT of WIN32, we define a pointer that cannot be converted
+ from (void*) without cast */
+typedef struct TagzipFile__ { int unused; } zipFile__;
+typedef zipFile__ *zipFile;
+#else
+typedef voidp zipFile;
+#endif
+
+#define ZIP_OK (0)
+#define ZIP_EOF (0)
+#define ZIP_ERRNO (Z_ERRNO)
+#define ZIP_PARAMERROR (-102)
+#define ZIP_BADZIPFILE (-103)
+#define ZIP_INTERNALERROR (-104)
+
+#ifndef DEF_MEM_LEVEL
+# if MAX_MEM_LEVEL >= 8
+# define DEF_MEM_LEVEL 8
+# else
+# define DEF_MEM_LEVEL MAX_MEM_LEVEL
+# endif
+#endif
+/* default memLevel */
+
+/* tm_zip contain date/time info */
+typedef struct tm_zip_s
+{
+ uInt tm_sec; /* seconds after the minute - [0,59] */
+ uInt tm_min; /* minutes after the hour - [0,59] */
+ uInt tm_hour; /* hours since midnight - [0,23] */
+ uInt tm_mday; /* day of the month - [1,31] */
+ uInt tm_mon; /* months since January - [0,11] */
+ uInt tm_year; /* years - [1980..2044] */
+} tm_zip;
+
+typedef struct
+{
+ tm_zip tmz_date; /* date in understandable format */
+ uLong dosDate; /* if dos_date == 0, tmu_date is used */
+/* uLong flag; */ /* general purpose bit flag 2 bytes */
+
+ uLong internal_fa; /* internal file attributes 2 bytes */
+ uLong external_fa; /* external file attributes 4 bytes */
+} zip_fileinfo;
+
+typedef const char* zipcharpc;
+
+
+#define APPEND_STATUS_CREATE (0)
+#define APPEND_STATUS_CREATEAFTER (1)
+#define APPEND_STATUS_ADDINZIP (2)
+
+extern zipFile ZEXPORT zipOpen OF((const char *pathname, int append));
+/*
+ Create a zipfile.
+ pathname contain on Windows XP a filename like "c:\\zlib\\zlib113.zip" or on
+ an Unix computer "zlib/zlib113.zip".
+ if the file pathname exist and append==APPEND_STATUS_CREATEAFTER, the zip
+ will be created at the end of the file.
+ (useful if the file contain a self extractor code)
+ if the file pathname exist and append==APPEND_STATUS_ADDINZIP, we will
+ add files in existing zip (be sure you don't add file that doesn't exist)
+ If the zipfile cannot be opened, the return value is NULL.
+ Else, the return value is a zipFile Handle, usable with other function
+ of this zip package.
+*/
+
+/* Note : there is no delete function into a zipfile.
+ If you want delete file into a zipfile, you must open a zipfile, and create another
+ Of couse, you can use RAW reading and writing to copy the file you did not want delte
+*/
+
+extern zipFile ZEXPORT zipOpen2 OF((const char *pathname,
+ int append,
+ zipcharpc* globalcomment,
+ zlib_filefunc_def* pzlib_filefunc_def));
+
+extern int ZEXPORT zipOpenNewFileInZip OF((zipFile file,
+ const char* filename,
+ const zip_fileinfo* zipfi,
+ const void* extrafield_local,
+ uInt size_extrafield_local,
+ const void* extrafield_global,
+ uInt size_extrafield_global,
+ const char* comment,
+ int method,
+ int level));
+/*
+ Open a file in the ZIP for writing.
+ filename : the filename in zip (if NULL, '-' without quote will be used
+ *zipfi contain supplemental information
+ if extrafield_local!=NULL and size_extrafield_local>0, extrafield_local
+ contains the extrafield data the the local header
+ if extrafield_global!=NULL and size_extrafield_global>0, extrafield_global
+ contains the extrafield data the the local header
+ if comment != NULL, comment contain the comment string
+ method contain the compression method (0 for store, Z_DEFLATED for deflate)
+ level contain the level of compression (can be Z_DEFAULT_COMPRESSION)
+*/
+
+
+extern int ZEXPORT zipOpenNewFileInZip2 OF((zipFile file,
+ const char* filename,
+ const zip_fileinfo* zipfi,
+ const void* extrafield_local,
+ uInt size_extrafield_local,
+ const void* extrafield_global,
+ uInt size_extrafield_global,
+ const char* comment,
+ int method,
+ int level,
+ int raw));
+
+/*
+ Same than zipOpenNewFileInZip, except if raw=1, we write raw file
+ */
+
+extern int ZEXPORT zipOpenNewFileInZip3 OF((zipFile file,
+ const char* filename,
+ const zip_fileinfo* zipfi,
+ const void* extrafield_local,
+ uInt size_extrafield_local,
+ const void* extrafield_global,
+ uInt size_extrafield_global,
+ const char* comment,
+ int method,
+ int level,
+ int raw,
+ int windowBits,
+ int memLevel,
+ int strategy,
+ const char* password,
+ uLong crcForCtypting));
+
+/*
+ Same than zipOpenNewFileInZip2, except
+ windowBits,memLevel,,strategy : see parameter strategy in deflateInit2
+ password : crypting password (NULL for no crypting)
+ crcForCtypting : crc of file to compress (needed for crypting)
+ */
+
+
+extern int ZEXPORT zipWriteInFileInZip OF((zipFile file,
+ const void* buf,
+ unsigned len));
+/*
+ Write data in the zipfile
+*/
+
+extern int ZEXPORT zipCloseFileInZip OF((zipFile file));
+/*
+ Close the current file in the zipfile
+*/
+
+extern int ZEXPORT zipCloseFileInZipRaw OF((zipFile file,
+ uLong uncompressed_size,
+ uLong crc32));
+/*
+ Close the current file in the zipfile, for fiel opened with
+ parameter raw=1 in zipOpenNewFileInZip2
+ uncompressed_size and crc32 are value for the uncompressed size
+*/
+
+extern int ZEXPORT zipClose OF((zipFile file,
+ const char* global_comment));
+/*
+ Close the zipfile
+*/
+
+extern int ZEXPORT zipFlush OF((zipFile file));
+/*
+ Flush the zipfile output
+*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _zip_H */
diff --git a/src/webhttrack b/src/webhttrack
index 3b433bb..ca3f512 100755
--- a/src/webhttrack
+++ b/src/webhttrack
@@ -2,14 +2,16 @@
#
# WebHTTrack launcher script
# Initializes the htsserver GUI frontend and launch the default browser
+
BROWSEREXE=
-SRCHBROWSEREXE="x-www-browser www-browser mozilla galeon konqueror opera netscape"
+SRCHBROWSEREXE="x-www-browser www-browser mozilla firefox firebird galeon konqueror opera netscape"
if test -n "${BROWSER}"; then
# sensible-browser will f up if BROWSER is not set
SRCHBROWSEREXE="sensible-browser ${SRCHBROWSEREXE}"
fi
-SRCHPATH="/usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack ${HOME}/usr/bin ${HOME}/bin"
-SRCHDISTPATH="/usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share ${HOME}/usr/local ${HOME}/usr/share"
+SRCHPATH="/usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack /sw/bin ${HOME}/usr/bin ${HOME}/bin"
+SRCHPATH="$SRCHPATH "`echo $PATH | tr ":" " "`
+SRCHDISTPATH="/usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share /sw ${HOME}/usr/local ${HOME}/usr/share"
###
# And now some famous cuisine
@@ -19,6 +21,74 @@ echo "$0($$): $@" >&2
return 0
}
+function mozillabrowser {
+# returns 0, if the browser is mozilla type
+echo "$1" | grep -q "mozilla"
+[ $? -eq 0 ] && return 0
+echo "$1" | grep -q "netscape"
+[ $? -eq 0 ] && return 0
+echo "$1" | grep -q "firebird"
+[ $? -eq 0 ] && return 0
+echo "$1" | grep -q "firefox"
+[ $? -eq 0 ] && return 0
+return 1;
+}
+function mozillaloaded {
+user_name=`logname 2>/dev/null`
+if ! test -n "${user_name}"; then
+user_name=`id -un`
+fi
+if test -n "${user_name}"; then
+ps -e --user "$user_name" | grep -qE "(mozilla|netscape|firebird|firefox)"
+else
+false
+fi
+}
+
+function launch_browser {
+log "launching $1"
+start_t=`date +%s`
+browser=$1
+url=$2
+moz=
+if mozillaloaded; then
+moz=1
+fi
+# launch any browser
+# if it is a mozilla like browser, check if the browser is running and use
+# -remote if needed. Change the URL into openURL($url) too.
+# (thanks to Torsten Werner for the patch)
+# see http://www.mozilla.org/unix/remote.html
+if mozillabrowser ${browser}; then
+ if ! ${browser} -remote "${url}"; then
+ log "spawning browser.."
+ ${browser} "${url}"
+ fi
+else
+ log "spawning regular browser.."
+ ${browser} "${url}"
+fi
+# this is a real pain in the neck: browser can hiddenly use the -remote feature of
+# mozilla and therefore return immediately
+# this loop is the only reliable solution AFAIK
+end_t=`date +%s`
+if test -n "$start_t" -a -n "$end_t"; then
+ int_t=$[$end_t-$start_t]
+else
+ int_t=0
+fi
+if test -n "${int_t}" -a "${int_t}" -lt 60; then
+ if test -n "$moz"; then
+ log "waiting for browser to terminate.."
+ while mozillaloaded; do
+ sleep 3
+ done
+ log "browser seems to have been closed.."
+ fi
+fi
+log "browser exited"
+}
+
# First ensure that we can launch the server
BINPATH=
for i in ${SRCHPATH}; do
@@ -45,6 +115,7 @@ LANGN=`grep "${HTSLANG}:" ${DISTPATH}/lang.indexes | cut -f2 -d':'`
# Find the browser
# note: not all systems have sensible-browser or www-browser alternative
# thefeore, we have to find a bit more if sensible-browser could not be found
+
for i in ${SRCHBROWSEREXE}; do
for j in ${SRCHPATH}; do
if test -x ${j}/${i}; then
@@ -58,12 +129,12 @@ test -n "$BROWSEREXE" || ! log "cound not find any suitable browser" || exit 1
# "browse" command
if test "$1" = "browse"; then
-${BROWSEREXE} "file://${HOME}/websites/index.html"
+launch_browser "${BROWSEREXE}" "file://${HOME}/websites/index.html"
exit $?
fi
# Create a temporary filename
-TMPSRVFILE="/tmp/.webhttrack.$$.`/usr/bin/head -c16 /dev/random | /usr/bin/md5sum | /usr/bin/cut -f1 -d' '`"
+TMPSRVFILE="/tmp/.webhttrack.$$.`head -c16 /dev/random | md5sum | cut -f1 -d' '`"
>${TMPSRVFILE} || ! log "cound not create the temporary file ${TMPSRVFILE}" || exit 1
# Launch htsserver binary and setup the server
(${BINPATH}/htsserver "${DISTPATH}/" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}&
@@ -74,15 +145,15 @@ while ! test -n "$SRVURL"; do
MAXCOUNT=$[$MAXCOUNT - 1]
test $MAXCOUNT -gt 0 || exit 1
test $MAXCOUNT -lt 50 && echo "waiting for server to reply.."
-SRVURL=`/bin/grep -E URL= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=`
+SRVURL=`grep -E URL= ${TMPSRVFILE} | cut -f2- -d=`
test ! "$SRVURL" = "error" || ! log "could not spawn htsserver" || exit 1
-test -n "$SRVURL" || /bin/sleep 1
+test -n "$SRVURL" || sleep 1
done
# Cleanup function
function cleanup {
test -n "$1" && log "nasty signal caught, cleaning up.."
-test -f ${TMPSRVFILE} && SRVPID=`/bin/grep -E PID= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=`
+test -f ${TMPSRVFILE} && SRVPID=`grep -E PID= ${TMPSRVFILE} | cut -f2- -d=`
test -n "${SRVPID}" && kill -9 ${SRVPID}
test -f ${TMPSRVFILE} && rm ${TMPSRVFILE}
test -n "$1" && log "..done"
@@ -93,7 +164,7 @@ return 0
trap "cleanup now; exit" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
# Got SRVURL, launch browser
-${BROWSEREXE} "${SRVURL}"
+launch_browser "${BROWSEREXE}" "${SRVURL}"
# That's all, folks!
trap "" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
diff --git a/src/webhttrack.dsp b/src/webhttrack.dsp
index a5940e8..60e8ff9 100755
--- a/src/webhttrack.dsp
+++ b/src/webhttrack.dsp
@@ -42,7 +42,7 @@ RSC=rc.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /c
-# ADD CPP /nologo /MD /W3 /GX /O2 /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /O2 /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /c
# SUBTRACT CPP /YX /Yc /Yu
# ADD BASE RSC /l 0x40c /d "NDEBUG"
# ADD RSC /l 0x40c /d "NDEBUG"