diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:51:31 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:51:31 +0000 |
commit | 25adbdabb47499fe641c7bd9595024ff82667058 (patch) | |
tree | 4200bb5e746bc1c0606de615ec99f0a247d4d9ba /src | |
parent | ad5b7acc19290ff91e0f42a0de448a26760fcf99 (diff) |
httrack 3.30.1
Diffstat (limited to 'src')
72 files changed, 12975 insertions, 6822 deletions
diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index be898d9..0000000 --- a/src/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -all : - @echo "please launch configure before! :" - @echo "./configure" - @echo "" - @echo "then, you can launch:" - @echo "make" - @echo "make install" - @echo "" - @echo "(see INSTALL file to know how-to-install)" - diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..c391d3f --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,60 @@ +#SUBDIRS = swf + +DevIncludesdir = $(includedir)/httrack +DevIncludes_DATA = httrack-library.h \ + htsglobal.h \ + htsopt.h \ + htswrap.h \ + htssystem.h \ + htsconfig.h \ + ../config.h \ + htsmodules.h \ + htsbasenet.h \ + htsbauth.h + +INCLUDES = \ + @DEFAULT_CFLAGS@ \ + @THREADS_CFLAGS@ \ + @V6_FLAG@ \ + @LFS_FLAG@ \ + -DPREFIX=\""$(prefix)"\" \ + -DSYSCONFDIR=\""$(sysconfdir)"\" \ + -DDATADIR=\""$(datadir)"\" \ + -DLIBDIR=\""$(libdir)"\" + +bin_PROGRAMS = httrack htsserver + +httrack_LDADD = $(THREADS_LIBS) -lhttrack +htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack + +lib_LTLIBRARIES = libhttrack.la + +htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h + +whttrackrundir = $(bindir) +whttrackrun_SCRIPTS = webhttrack + +libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ + htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ + htshelp.c htsjava.c htslib.c htscoremain.c \ + htsname.c htsrobots.c htstools.c htswizard.c \ + htsalias.c htsthread.c htsindex.c htsbauth.c \ + htsmd5.c htszlib.c htsnostatic.c htswrap.c \ + htsmodules.c \ + md5.c \ + hts-indextmpl.h htsalias.h htsback.h htsbase.h \ + htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ + htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ + htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ + htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \ + htsmodules.h htsname.h htsnet.h htsnostatic.h \ + htsopt.h htsrobots.h htssystem.h htsthread.h \ + htstools.h htswizard.h htswrap.h htszlib.h \ + httrack-library.h md5.h + +libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS) +libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) + +EXTRA_DIST = httrack.h webhttrack \ + httrack.dsp httrack.dsw \ + webhttrack.dsp webhttrack.dsw diff --git a/src/Makefile.in b/src/Makefile.in index 57bf1d3..22590c6 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -1,417 +1,639 @@ -# HTTrack Website Copier, Offline Browser for Windows and Unix -# Copyright (C) Xavier Roche and other contributors -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or any later version. -# +# Makefile.in generated by automake 1.7 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + # This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# Version: 1.3 (09/2001) -# Usage: just type 'make' - -# Generated by configure -AUTONAME = -#__AUTONAME__ - - -### Makefile commands -SHELL = /bin/sh -MAIN = -CC = gcc -CFLAGS = __CFLAGS__ -# threads -LFLAGS = __LFLAGS__ __LFLAGS2__ __LFLAGS3__ - - -BINARIES = htscore.o htsback.o htscache.o\ - htscatchurl.o htsfilters.o htsftp.o htshash.o\ - htshelp.o htsjava.o htslib.o htscoremain.o\ - htsname.o htsrobots.o htstools.o htswizard.o\ - htsalias.o htsthread.o htsindex.o\ - htsbauth.o htsmd5.o\ - htswrap.o md5.o htszlib.o\ - htsnostatic.o \ - httrack.o - -SOBINARIES = htscore.o htsback.o htscache.o\ - htscatchurl.o htsfilters.o htsftp.o htshash.o\ - htshelp.o htsjava.o htslib.o htscoremain.o\ - htsname.o htsrobots.o htstools.o htswizard.o\ - htsalias.o htsthread.o htsindex.o\ - htsbauth.o htsmd5.o htszlib.o\ - htsnostatic.o \ - htswrap.o md5.o - -MAINBIN = httrack.c - -BOUTPUT = httrack -BOUTPUTSO = libhttrack.so -DOCS = ../HelpHtml ../templates ../httrack-doc.html ../COPYING ../INSTALL ../README ../*.txt -HTSSYSTEM = htssystem.h -BINPATH = __BINPATH__ -ETCPATH = __ETCPATH__ -LIBPATH = __LIBPATH__ -PREFIX = __PREFIX__ - -## Defines for "library" (program compiled with HTTrack) -BINARIES_LIB = example.o httracklib.o\ - src/htscore.o src/htsback.o src/htscache.o\ - src/htscatchurl.o src/htsfilters.o src/htsftp.o src/htshash.o\ - src/htshelp.o src/htsjava.o src/htslib.o src/htscoremain.o\ - src/htsname.o src/htsrobots.o src/htstools.o src/htswizard.o\ - src/htsalias.o src/htsthread.o src/htsindex.o\ - src/htsbauth.o src/htsmd5.o htszlib.o\ - src/htsshow.o src/htswrap.o\ - src/htsnostatic.o \ - src/md5.o -BOUTPUT_LIB = example -HTSSYSTEM_LIB = src/htssystem.h - -# in_addr_t problems : -# In case of problems during compiling, -# make htss -# edit htssystem.h and add the following line: -# #define HTS_DO_NOT_REDEFINE_in_addr_t -# make manual - - -# Keywords for build types (example: make linux) -MAKE_LINUX = linux -MAKE_NETBSD = netbsd -MAKE_OPENBSD = openbsd -MAKE_SOLARIS = solaris -MAKE_AIX = aix -MAKE_STD = standard -MAKE_STD2 = standard2 -MAKE_STD3 = standard3 -MAKE_STD4 = standard4 -### -MAKE_LIB = build_httracklib -### -MAKE_AUTO = auto -### - -# First, detect OS Type -# If your make does not recognize this, change it! -SHORTUNAME = $(shell uname) -FULLUNAME = $(shell uname -a) - -### Targets: - -# Default target, attempt to use uname if necessary -# if uname is empty, display info message -all : - @if test -n "$(AUTONAME)"; then\ - $(MAKE) $(AUTONAME);\ - elif test -n "$(SHORTUNAME)"; then\ - $(MAKE) $(SHORTUNAME);\ - else\ - $(MAKE) help;\ - fi - -# If we can not detect the OS Type, show a message info -help : - @clear - @echo "" - @echo "Welcome to HTTrack Website Copier install!" - @echo "-----------------------------------------" - @echo "" - @echo "1. To make HTTrack, just type in:" - @echo " make $(MAKE_LINUX)" - @echo " or" - @echo " make $(MAKE_NETBSD)" - @echo " or" - @echo " make $(MAKE_OPENBSD)" - @echo " or" - @echo " make $(MAKE_SOLARIS)" - @echo " or" - @echo " make $(MAKE_AIX)" - @echo " or" - @echo " make $(MAKE_STD)" - @echo " or (problems with in_addr_t)" - @echo " make $(MAKE_STD2)" - @echo " or (problems with 64-bit)" - @echo " make $(MAKE_STD3)" - @echo " or (problems with both in_addr_t and 64-bit)" - @echo " make $(MAKE_STD4)" - @echo "" - @echo "According to your OS type" - @echo "(example: type in 'make $(MAKE_LINUX)' if you compile HTTrack with linux)" - @echo - @echo "Or, if it does not work, you can try " - @echo " make htss" - @echo " edit htssystem.h (check OS type), and add the following line:" - @echo " #define HTS_DO_NOT_REDEFINE_in_addr_t" - @echo " make manual" - @echo - @echo "2. Then, type in 'make install' to copy httrack to $(BINPATH)" - @echo " or just use ./httrack to launch the program" - @echo "" - @echo "3. Build problems, type in:" - @echo " make moreinfo" - @echo "" - @echo "Have fun with HTTrack Website Copier!" - @echo "" -info : help - -# Troubleshooter -moreinfo : - @echo "Known problems:" - @echo "" - @echo "\`in_addr_t' undeclared (first use this function)" - @echo "see in_addr_t problems in Makefile" - @echo "" - -### - -## Build Targets (this is the name given by 'uname') -Linux : $(MAKE_LINUX) -SunOS : $(MAKE_SOLARIS) -AIX : $(MAKE_AIX) -NetBSD : $(MAKE_NETBSD) -OpenBSD : $(MAKE_OPENBSD) - -### Build Targets (standard types) -default : firstinfo htssystem htssystem_default build_default strip clean lastinfo -$(MAKE_LINUX) : firstinfo htssystem htssystem_linux build_default strip clean lastinfo -$(MAKE_SOLARIS) : firstinfo htssystem htssystem_solaris build_solaris strip clean lastinfo -$(MAKE_AIX) : firstinfo htssystem htssystem_aix build_default strip clean lastinfo -$(MAKE_NETBSD) : firstinfo htssystem htssystem_netbsd build_default strip clean lastinfo -$(MAKE_OPENBSD) : firstinfo htssystem htssystem_openbsd build_nopthread strip clean lastinfo -$(MAKE_STD) : firstinfo htssystem htssystem_default build_default strip clean lastinfo -$(MAKE_STD2) : firstinfo htssystem htssystem_default2 build_default strip clean lastinfo -$(MAKE_STD3) : firstinfo htssystem htssystem_default3 build_default strip clean lastinfo -$(MAKE_STD4) : firstinfo htssystem htssystem_default4 build_default strip clean lastinfo -### Defines for "library" (program compiled with HTTrack) -$(MAKE_LIB) : build_lib strip_lib clean_lib lastinfo -### -$(MAKE_AUTO) : __MAKEAUTO__ -### - -## Defines for OSes -lib_default : htssystem htssystem_default addhtssystem_lib info_lib -lib_linux : htssystem htssystem_linux addhtssystem_lib info_lib -lib_solaris : htssystem htssystem_solaris addhtssystem_lib info_lib -lib_aix : htssystem htssystem_aix addhtssystem_lib info_lib -lib_netbsd : htssystem htssystem_netbsd addhtssystem_lib info_lib -lib_openbsd : htssystem htssystem_openbsd addhtssystem_lib info_lib -lib_std : htssystem htssystem_default addhtssystem_lib info_lib - - -# manual build -htss : htssystem htssystem_default -manual : build_default strip clean lastinfo - -# Creates htssystem.h file -htssystem : - @echo "/* HTTrack, Offline Browser for Windows and Unix */" > $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - @echo "/* HTTrack system definition */" >> $(HTSSYSTEM) - @echo "/* This should be the only file you have to change */" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - @echo "/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - @echo "/* Fix plateform number to 0 (SunOS) */" >> $(HTSSYSTEM) - @echo "/* If it doesn't compile, try another one */" >> $(HTSSYSTEM) - -htssystem_solaris : - @echo "#define HTS_PLATFORM 0" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_aix : - @echo "#define HTS_PLATFORM 2" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_linux : - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_netbsd: - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_openbsd: - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_USE_UID" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_nopthread: - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_default : - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_default2 : - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_default3 : - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -htssystem_default4 : - @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM) - @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -# Generated by configure -htssystem_auto : - @echo "#define HTS_PLATFORM __PLATFORM__" >> $(HTSSYSTEM) - @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM) - @echo "#define __DEFINEUID__" >> $(HTSSYSTEM) - @echo "#define __DEFINEINA__" >> $(HTSSYSTEM) - @echo "#define __DEFINEPTH__" >> $(HTSSYSTEM) - @echo "#define __DEFINE64B__" >> $(HTSSYSTEM) - @echo "#define __DEFINEFTI__" >> $(HTSSYSTEM) - @echo "#define HTS_PREFIX \"__DEFINEPRE__\"" >> $(HTSSYSTEM) - @echo "#define HTS_BINPATH \"__BINPATH__\"" >> $(HTSSYSTEM) - @echo "#define HTS_ETCPATH \"__ETCPATH__\"" >> $(HTSSYSTEM) - @echo "#define HTS_LIBPATH \"__LIBPATH__\"" >> $(HTSSYSTEM) - @echo "#define HTS_USEZLIB __ZLIB__" >> $(HTSSYSTEM) - @echo "#define HTS_ALIGN __PTRALIGN__" >> $(HTSSYSTEM) - @echo "#define HTS_INET6 __INET6__" >> $(HTSSYSTEM) - @echo "#define HTS_USEOPENSSL __SSL__" >> $(HTSSYSTEM) - -## Defines for "library" (program compiled with HTTrack) -addhtssystem_lib : - @echo "/* Extended functions */" >> $(HTSSYSTEM) - @echo "#define HTS_ANALYSTE 2" >> $(HTSSYSTEM) - @echo "" >> $(HTSSYSTEM) - -# Info message before build -firstinfo : - @echo "Building all, please wait" - @echo "In case of problems, type in:" - @echo "make help" - @echo "" - @echo "OS TYPE: $(SHORTUNAME)" - @echo "Make mode: $(MAKECMDGOALS)" - @echo - -## -info_lib : - @echo "Please copy htssystem.h to src/htssystem.h by typing:" - @echo "cp htssystem.h src/htssystem.h" - -### Targets for compiling -build_solaris : $(BINARIES) - $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS) -lnsl -lsocket - chmod 755 $(BOUTPUT) - -build_nopthread: $(BINARIES) - $(CC) $(NOPCFLAGS) $(BINARIES) -o $(BOUTPUT) $(NOPLFLAGS) - chmod 755 $(BOUTPUT) - -build_default : $(BINARIES) - $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS) - chmod 755 $(BOUTPUT) - -## Defines for "library" (program compiled with HTTrack) -build_lib : $(BINARIES_LIB) - $(CC) $(CFLAGS) $(BINARIES_LIB) -o $(BOUTPUT_LIB) $(LFLAGS) - chmod 644 $(BOUTPUT_LIB) - -## Auto -build_auto : build_bin__DYNAMIC__ - -build_bin : $(BINARIES) - $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS) - chmod 755 $(BOUTPUT) - -build_binso : $(SOBINARIES) - $(CC) $(CFLAGS) -shared -Wl,-x,-soname,$(BOUTPUTSO) -o $(BOUTPUTSO) $(SOBINARIES) -lc $(LFLAGS) - $(CC) -L. -lhttrack $(MAINBIN) -o $(BOUTPUT) - chmod 755 $(BOUTPUT) - -## -# Strip file so that is can be shorter -strip : - strip --strip-all $(BOUTPUT) || strip $(BOUTPUT) - __STRIPLIB__ - -strip_lib : - strip --strip-unneeded $(BOUTPUT_LIB) - -# Cleaning up.. -clean : - rm -f $(BINARIES) - -## Defines for "library" (program compiled with HTTrack) -clean_lib : - rm -f $(BINARIES_LIB) - -# Bye bye -lastinfo : - @echo "Build successful" - -# Installing httrack into the correct folder -install : __INSTALL__ -uninstall : remove -remove : __UNINSTALL__ - -# Install docs -docinstall : - (mkdir -p "$(PREFIX)/doc/httrack" && chmod 755 "$(PREFIX)/doc/httrack") || true - cp -fR $(DOCS) "$(PREFIX)/doc/httrack/" - -# Uninstall docs -docremove : - rm -rf "$(PREFIX)/doc/httrack" - -# Install binaries and conf files -bininstall : - @echo "Copying $(BOUTPUT) to $(BINPATH).." - test ! -d "$(BINPATH)" && (mkdir -p "$(BINPATH)" && chmod 755 "$(BINPATH)") || true - cp -f $(BOUTPUT) $(BINPATH) - chmod 755 $(BINPATH)/$(BOUTPUT) - -libremove : - rm -f "$(LIBPATH)/$(BOUTPUTSO)" - test -L "$(PREFIX)/lib/$(BOUTPUTSO).1" && rm -f "$(PREFIX)/lib/$(BOUTPUTSO).1" || true - -libinstall : - @echo "Copying $(BOUTPUTSO) to $(LIBPATH)/.." - test ! -d "$(LIBPATH)" && (mkdir -p "$(LIBPATH)" && chmod 755 "$(LIBPATH)") || true - cp -f $(BOUTPUTSO) $(LIBPATH)/ - chmod 644 $(LIBPATH)/$(BOUTPUTSO) - ln -sf "$(BOUTPUTSO)" "$(PREFIX)/lib/$(BOUTPUTSO).1" - - -# Uninstall binaries -binremove : - rm -f $(BINPATH)/$(BOUTPUT) - rm -f $(ETCPATH)/httrack.conf - -# Configure program -config : - @./postinst-config - -### +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +#SUBDIRS = swf + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_triplet = @host@ +ACLOCAL = @ACLOCAL@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFAULT_CFLAGS = @DEFAULT_CFLAGS@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DL_LIBS = @DL_LIBS@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LFS_FLAG = @LFS_FLAG@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@ +MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@ +MAKEINFO = @MAKEINFO@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOCKET_LIBS = @SOCKET_LIBS@ +STRIP = @STRIP@ +THREADS_CFLAGS = @THREADS_CFLAGS@ +THREADS_LIBS = @THREADS_LIBS@ +V6_FLAG = @V6_FLAG@ +VERSION = @VERSION@ +VERSION_INFO = @VERSION_INFO@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_RANLIB = @ac_ct_RANLIB@ +ac_ct_STRIP = @ac_ct_STRIP@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__quote = @am__quote@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +oldincludedir = @oldincludedir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ + +DevIncludesdir = $(includedir)/httrack +DevIncludes_DATA = httrack-library.h \ + htsglobal.h \ + htsopt.h \ + htswrap.h \ + htssystem.h \ + htsconfig.h \ + ../config.h \ + htsmodules.h \ + htsbasenet.h \ + htsbauth.h + + +INCLUDES = \ + @DEFAULT_CFLAGS@ \ + @THREADS_CFLAGS@ \ + @V6_FLAG@ \ + @LFS_FLAG@ \ + -DPREFIX=\""$(prefix)"\" \ + -DSYSCONFDIR=\""$(sysconfdir)"\" \ + -DDATADIR=\""$(datadir)"\" \ + -DLIBDIR=\""$(libdir)"\" + + +bin_PROGRAMS = httrack htsserver + +httrack_LDADD = $(THREADS_LIBS) -lhttrack +htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack + +lib_LTLIBRARIES = libhttrack.la + +htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h + +whttrackrundir = $(bindir) +whttrackrun_SCRIPTS = webhttrack + +libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ + htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ + htshelp.c htsjava.c htslib.c htscoremain.c \ + htsname.c htsrobots.c htstools.c htswizard.c \ + htsalias.c htsthread.c htsindex.c htsbauth.c \ + htsmd5.c htszlib.c htsnostatic.c htswrap.c \ + htsmodules.c \ + md5.c \ + hts-indextmpl.h htsalias.h htsback.h htsbase.h \ + htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ + htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ + htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ + htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \ + htsmodules.h htsname.h htsnet.h htsnostatic.h \ + htsopt.h htsrobots.h htssystem.h htsthread.h \ + htstools.h htswizard.h htswrap.h htszlib.h \ + httrack-library.h md5.h + + +libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS) +libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) + +EXTRA_DIST = httrack.h webhttrack \ + httrack.dsp httrack.dsw \ + webhttrack.dsp webhttrack.dsw + +subdir = src +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(lib_LTLIBRARIES) + +libhttrack_la_DEPENDENCIES = +am_libhttrack_la_OBJECTS = htscore.lo htsparse.lo htsback.lo htscache.lo \ + htscatchurl.lo htsfilters.lo htsftp.lo htshash.lo htsinthash.lo \ + htshelp.lo htsjava.lo htslib.lo htscoremain.lo htsname.lo \ + htsrobots.lo htstools.lo htswizard.lo htsalias.lo htsthread.lo \ + htsindex.lo htsbauth.lo htsmd5.lo htszlib.lo htsnostatic.lo \ + htswrap.lo htsmodules.lo md5.lo +libhttrack_la_OBJECTS = $(am_libhttrack_la_OBJECTS) +bin_PROGRAMS = httrack$(EXEEXT) htsserver$(EXEEXT) +PROGRAMS = $(bin_PROGRAMS) + +am_htsserver_OBJECTS = htsserver.$(OBJEXT) htsweb.$(OBJEXT) +htsserver_OBJECTS = $(am_htsserver_OBJECTS) +htsserver_DEPENDENCIES = +htsserver_LDFLAGS = +httrack_SOURCES = httrack.c +httrack_OBJECTS = httrack.$(OBJEXT) +httrack_DEPENDENCIES = +httrack_LDFLAGS = +SCRIPTS = $(whttrackrun_SCRIPTS) + + +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/htsalias.Plo ./$(DEPDIR)/htsback.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsbauth.Plo ./$(DEPDIR)/htscache.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htscatchurl.Plo ./$(DEPDIR)/htscore.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htscoremain.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsfilters.Plo ./$(DEPDIR)/htsftp.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htshash.Plo ./$(DEPDIR)/htshelp.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsindex.Plo ./$(DEPDIR)/htsinthash.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsjava.Plo ./$(DEPDIR)/htslib.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsmd5.Plo ./$(DEPDIR)/htsmodules.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsname.Plo ./$(DEPDIR)/htsnostatic.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsparse.Plo ./$(DEPDIR)/htsrobots.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htsserver.Po ./$(DEPDIR)/htsthread.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htstools.Plo ./$(DEPDIR)/htsweb.Po \ +@AMDEP_TRUE@ ./$(DEPDIR)/htswizard.Plo ./$(DEPDIR)/htswrap.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/htszlib.Plo ./$(DEPDIR)/httrack.Po \ +@AMDEP_TRUE@ ./$(DEPDIR)/md5.Plo +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ + $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DIST_SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c +DATA = $(DevIncludes_DATA) + +DIST_COMMON = Makefile.am Makefile.in +SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) +libLTLIBRARIES_INSTALL = $(INSTALL) +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(libdir) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \ + $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \ + else :; fi; \ + done + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + p="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \ + $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" = "$$p" && dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libhttrack.la: $(libhttrack_la_OBJECTS) $(libhttrack_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libhttrack_la_LDFLAGS) $(libhttrack_la_OBJECTS) $(libhttrack_la_LIBADD) $(LIBS) +binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(bindir) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + if test -f $$p \ + || test -f $$p1 \ + ; then \ + f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f || exit 1; \ + else :; fi; \ + done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " rm -f $(DESTDIR)$(bindir)/$$f"; \ + rm -f $(DESTDIR)$(bindir)/$$f; \ + done + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +htsserver$(EXEEXT): $(htsserver_OBJECTS) $(htsserver_DEPENDENCIES) + @rm -f htsserver$(EXEEXT) + $(LINK) $(htsserver_LDFLAGS) $(htsserver_OBJECTS) $(htsserver_LDADD) $(LIBS) +httrack$(EXEEXT): $(httrack_OBJECTS) $(httrack_DEPENDENCIES) + @rm -f httrack$(EXEEXT) + $(LINK) $(httrack_LDFLAGS) $(httrack_OBJECTS) $(httrack_LDADD) $(LIBS) +whttrackrunSCRIPT_INSTALL = $(INSTALL_SCRIPT) +install-whttrackrunSCRIPTS: $(whttrackrun_SCRIPTS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(whttrackrundir) + @list='$(whttrackrun_SCRIPTS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f $$d$$p; then \ + f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \ + echo " $(whttrackrunSCRIPT_INSTALL) $$d$$p $(DESTDIR)$(whttrackrundir)/$$f"; \ + $(whttrackrunSCRIPT_INSTALL) $$d$$p $(DESTDIR)$(whttrackrundir)/$$f; \ + else :; fi; \ + done + +uninstall-whttrackrunSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(whttrackrun_SCRIPTS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \ + echo " rm -f $(DESTDIR)$(whttrackrundir)/$$f"; \ + rm -f $(DESTDIR)$(whttrackrundir)/$$f; \ + done + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsalias.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsback.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsbauth.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscache.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscatchurl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscore.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscoremain.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsfilters.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsftp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htshash.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htshelp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsindex.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsinthash.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsjava.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htslib.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmd5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmodules.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsname.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsnostatic.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsparse.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsrobots.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsserver.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsthread.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htstools.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsweb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htswizard.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htswrap.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htszlib.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/httrack.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ + +distclean-depend: + -rm -rf ./$(DEPDIR) + +.c.o: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \ +@am__fastdepCC_TRUE@ -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< + +.c.obj: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \ +@am__fastdepCC_TRUE@ -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'`; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'` + +.c.lo: +@am__fastdepCC_TRUE@ if $(LTCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \ +@am__fastdepCC_TRUE@ -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<; \ +@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Plo"; \ +@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \ +@am__fastdepCC_TRUE@ fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: +DevIncludesDATA_INSTALL = $(INSTALL_DATA) +install-DevIncludesDATA: $(DevIncludes_DATA) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(DevIncludesdir) + @list='$(DevIncludes_DATA)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(DevIncludesDATA_INSTALL) $$d$$p $(DESTDIR)$(DevIncludesdir)/$$f"; \ + $(DevIncludesDATA_INSTALL) $$d$$p $(DESTDIR)$(DevIncludesdir)/$$f; \ + done + +uninstall-DevIncludesDATA: + @$(NORMAL_UNINSTALL) + @list='$(DevIncludes_DATA)'; for p in $$list; do \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " rm -f $(DESTDIR)$(DevIncludesdir)/$$f"; \ + rm -f $(DESTDIR)$(DevIncludesdir)/$$f; \ + done + +ETAGS = etags +ETAGSFLAGS = + +CTAGS = ctags +CTAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(DATA) +install-binPROGRAMS: install-libLTLIBRARIES + + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) $(DESTDIR)$(whttrackrundir) $(DESTDIR)$(DevIncludesdir) + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ + clean-libtool mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-compile distclean-depend \ + distclean-generic distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: install-DevIncludesDATA install-whttrackrunSCRIPTS + +install-exec-am: install-binPROGRAMS install-libLTLIBRARIES + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-DevIncludesDATA uninstall-binPROGRAMS \ + uninstall-info-am uninstall-libLTLIBRARIES \ + uninstall-whttrackrunSCRIPTS + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libLTLIBRARIES clean-libtool ctags \ + distclean distclean-compile distclean-depend distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am info \ + info-am install install-DevIncludesDATA install-am \ + install-binPROGRAMS install-data install-data-am install-exec \ + install-exec-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-strip \ + install-whttrackrunSCRIPTS installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \ + uninstall-DevIncludesDATA uninstall-am uninstall-binPROGRAMS \ + uninstall-info-am uninstall-libLTLIBRARIES \ + uninstall-whttrackrunSCRIPTS +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/configure b/src/configure deleted file mode 100755 index 7c2d472..0000000 --- a/src/configure +++ /dev/null @@ -1,603 +0,0 @@ -#!/bin/sh -# No, this isn't generated by autoconf -# Some parts are inspired by autoconf (Free Software Foundation), however -# And the idea is slightly the same - -# Usage: -# './configure' and then 'make' and 'make install', or -# './configure --make --install' - -SHELL=/bin/sh - -ac_prev= -for ac_option -do - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - case "$ac_option" in - - --pthread | --thread) THREADS=1 ;; - --nopthread | --nothread) THREADS=0 ;; - - --zlib | --gzip) ZLIB=1 ;; - --nozlib | --nogzip) ZLIB=0 ;; - - --static | --noso) DYNAMIC=0 - ;; - --dynamic | --so) DYNAMIC=1 - ;; - - --longlong) LONGLONG=1 ;; - --nolonglong) LONGLONG=0 ;; - - --inaddrt) NODECLINADDRT=0 ;; - --noinaddrt) NODECLINADDRT=1 ;; - - --ipv6) IPV6=1 ;; - --noipv6) IPV6=0 ;; - - --ssl) SSL=1 ;; - --https) SSL=1 ;; - --nossl) SSL=0 ;; - --nohttps) SSL=0 ;; - - --useuid) NOUID=0 ;; - --nouseuid) NOUID=1 ;; - - --useftime) NOFTIME=0 ;; - --nouseftime) NOFTIME=1 ;; - - --system=*) SYSTEMTYPE="$ac_optarg" ;; - --system) ac_prev=SYSTEMTYPE ;; - - --debug) OTYPE="-O0 -g3" - MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo" ;; - - --make) DOMAKE=1 ;; - --install) DOINSTALL=1 ;; - --bininstall) DOINSTALL=1 - NODOCINSTALL=1 - ;; - --docinstall) DOINSTALL=1 - DOCINSTALL=1 - ;; - --cls) - cd .. - chmod 'u=rw,go=r' `find ./ -type f` - chmod 'u=rwx,go=rx' `find ./ -type d` - chmod 'u=rwx,go=rx' ./src/configure - chmod 'u=rwx,go=rx' ./src/strip_cr.in - chmod 'u=rwx,go=rx' ./src/postinst-config.in - cd src - strip_cr *.c *.h - rm -f ./httrack 2>/dev/null - exit - ;; - -prefix | --prefix | --prefi | --pref | --pre | --pr) - ac_prev=PREFIX ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=*) - PREFIX="$ac_optarg" ;; - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=BINPATH ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - BINPATH="$ac_optarg" ;; - -etcdir | --etcdir | --etcdi | --etcd | --etc | --et) - ac_prev=ETCPATH ;; - -etcdir=* | --etcdir=* | --etcdi=* | --etcd=* | --etc=* | --et=*) - ETCPATH="$ac_optarg" ;; - -libdir | --libdir | --libdi | --libd | --lib | --li) - ac_prev=LIBPATH ;; - -libdir=* | --libdir=* | --libdi=* | --libd=* | --lib=* | --li=*) - LIBPATH="$ac_optarg" ;; - - --help) - cat <<EOF ---prefix=.. : prefix (/usr) ---bindir=.. : binary dir (/usr/bin) ---etcdir=.. : config dir (/usr/etc or /etc) ---libdir=.. : library dir (/usr/lib) ---dynamic : do use dynamic (.so) mode ---static : do use static mode ---nopthread : do not use threads (pthread.h) ---pthread : do use threads (pthread.h) ---noipv6 : do not use ipv6 extensions ---ipv6 : do use ipv6 extensions ---nohttps : do not use SSL extensions ---https : do use SSL extensions ---nozlib : do not use compression (zlib) ---zlib : do use compression (zlib) ---nolonglong : do not use 64-bit int ---longlong : do use 64-bit int ---noinaddrt : do not redeclare in_addr_t ---inaddrt : do redeclare in_addr_t ---nouseuid : do not use setuid()/setgid() ---useuid : do use setuid()/setgid() ---nouseftime : do not use ftime() ---useftime : do use ftime() ---system=<type> : override system type (uname) - NOT RECOMMENDED! (types: 'Default','Linux','SunOS','AIX') ---make : 'make' after configure ---install : 'make install' after configure ---bininstall : 'make bininstall' after configure ---docinstall : 'make docinstall' after configure ---debug : add debug information (for gdb) -EOF - exit - ;; - - *) echo "Unrecognized option: $ac_option" - exit - ;; - - esac - -done - -echo "Welcome to HTTrack Website Copier!" -echo "Type in ./configure --help for more details" -echo "If this script fails, you can enter supplemental options through '--option=value'" -echo "or enter in manual make, through 'make help'" -echo "" - -if cp -f Makefile.in Makefile; then - -SEDEXEC= - -# System (OS) type? -printf "Checking for OS type.. " -if test -z "$SYSTEMTYPE"; then - SYSTEMTYPE="`uname`" -fi -case "$SYSTEMTYPE" in - SunOS) printf "SunOS/Solaris\n"; - PLATFORM=0 - SOLSOCK=1 - ;; - AIX) printf "AIX\n"; PLATFORM=2 ;; - *) printf "Linux type\n"; PLATFORM=3 ;; -esac - -WTYPE="-Wall -Wcast-align -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wpointer-arith -Wnested-externs" - -if test -z "$OTYPE"; then - OTYPE="-O3" -fi -if test -z "$MKTYPE"; then - MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo" -fi - -# Root group -if test -z "$ROOTGROUP"; then - printf "Checking for 'root' group.." - if egrep "^root:" /etc/group >/dev/null; then - ROOTGROUP="root" - elif egrep "^wheel:" /etc/group >/dev/null; then - ROOTGROUP="wheel" - fi - printf "$ROOTGROUP\n" -else - echo "Overriding ROOTGROUP=$ROOTGROUP" -fi - - -# Binaries location -if test -z "$BINPATH"; then -printf "Checking for bin directory.. " -if test -n "$PREFIX"; then - BINPATH="$PREFIX/bin" -elif test -d "/usr/bin"; then - BINPATH="/usr/bin" -else - BINPATH="/bin" -fi -printf "$BINPATH\n" -else - echo "Overriding BINPATH=$BINPATH" -fi - -# shlib? -if test -d "/usr/shlib"; then -SYSLIB=/usr/shlib -else -SYSLIB=/usr/lib -fi - -# /etc location -if test -z "$ETCPATH"; then -printf "Checking for etc directory.. " -if test -n "$PREFIX"; then - ETCPATH="$PREFIX/etc" -elif test -d "/usr/etc"; then - ETCPATH="/usr/etc" -else - ETCPATH="/etc" -fi -printf "$ETCPATH\n" -else - echo "Overriding ETCPATH=$ETCPATH" -fi - -# /usr/lib location -if test -z "$LIBPATH"; then -printf "Checking for lib directory.. " -if test -n "$PREFIX"; then - LIBPATH="$PREFIX/lib" -elif test -d "/usr/lib"; then - LIBPATH="/usr/lib" -elif test -d "/usr/local/lib"; then - LIBPATH="/usr/local/lib" -else - LIBPATH="/lib" -fi -printf "$LIBPATH\n" -else - echo "Overriding LIBPATH=$LIBPATH" -fi - -# Prefix location -if test -z "$PREFIX"; then - printf "Checking for prefix directory.. " - PREFIX="/usr" - printf "$PREFIX\n" -else - echo "Overriding PREFIX=$PREFIX" -fi - - -# 64-bit (long long) cause some troubles to some processors -# because some alignements aren't properly defined -# we only accept 64-bit on tested processors here -if test -z "$LONGLONG"; then -printf "Checking for long long.. " -LONGLONG= -if grep "long long" /usr/include/sys/types.h >/dev/null; then -if uname -a|egrep ' i[3-9]86 ' >/dev/null; then - LONGLONG=1 -fi -fi -if test -n "$LONGLONG"; then - printf "supported\n" -else - printf "not tested/supported. Use --LONGLONG=1 to override\n" -fi -else - echo "Overriding LONGLONG=$LONGLONG" -fi - -# IPV6? -# NOT TESTED FOR OTHER PLATFORMS.. FIXME! -if test -z "$IPV6"; then -printf "Checking for ipv6 support.. " -if test -f "/usr/include/linux/in6.h" -o -f "/usr/local/include/linux/in6.h"; then -IPV6=1 -else -IPV6=0 -fi -if test "$IPV6" -eq 1; then -printf "supported\n" -else -printf "not supported\n" -fi -else - echo "Overriding IPV6=$IPV6" -fi -if test "$IPV6" -eq 1; then -IPTYPE="-DINET6" -LIPTYPE="" -else -IPTYPE= -LIPTYPE= -fi - -# HTTPS? -# NOT TESTED FOR OTHER PLATFORMS.. FIXME! -if test -z "$SSL"; then -printf "Checking for SSL support.. " -if test -f "/usr/include/openssl/ssl.h" -o -f "/usr/local/include/openssl/ssl.h"; then -SSL=1 -else -SSL=0 -fi -if test "$SSL" -eq 1; then -printf "supported\n" -else -printf "not supported\n" -fi -else - echo "Overriding SSL=$SSL" -fi -if test "$SSL" -eq 1; then -SSTYPE="-lssl -lcrypto" -else -SSTYPE= -fi - -# Alignement -if test -z "$PTRALIGN"; then -printf "Checking for pointer alignements.. " -PTRALIGN= -if test `uname -p` = "alpha" -o `uname -p` = "sparc"; then -PTRALIGN=8 -else -PTRALIGN=4 -fi -fi -printf "$PTRALIGN\n" - - -# Dynamic (.so) module? -if test -z "$DYNAMIC"; then - DYNAMIC=1 -fi -printf "Checking for compilation mode: " -if test "$DYNAMIC" -eq "1"; then - echo "dynamic" - SOTYPE=-fPIC -else - echo "static" - SOTYPE= -fi - -# Do we not have to redeclare in_addr_t ? -# Sometimes this type is defined, or not.. -if test -z "$NODECLINADDRT"; then -printf "Checking for in_addr_t declaration in in.h.. " -if grep -E "typedef .* in_addr_t" /usr/include/netinet/in.h >/dev/null || grep -E "typedef .* in_addr_t" /usr/include/sys/types.h; then - printf "found, do not redeclare\n" - NODECLINADDRT=1 -else - printf "not found, declaring\n" - NODECLINADDRT= -fi -else - echo "Overriding NODECLINADDRT=$NODECLINADDRT" -fi - -# Test if we can use zlib (/usr/lib/libz.so) -# This allow to speed up transfers using HTTP compression -if test -z "$ZLIB"; then -printf "Checking for ${SYSLIB}/libz.so.. " -if test -f "${SYSLIB}/libz.so"; then - printf "found\n" - ZLIB=1 -else - printf "library not found (too bad), no http compression will be available\n" - ZLIB=0 -fi -else - echo "Overriding ZLIB=$ZLIB" -fi - -# Sometimes, pthread.h doesn't exists on some systems -# This is sad, because it speeds up some useful things, like DNS or ftp -if test -z "$THREADS"; then -printf "Checking for /usr/include/pthread.h.. " -if test -f "/usr/include/pthread.h" -o -f "/usr/local/include/pthread.h"; then -if test -f "${SYSLIB}/libpthread.so"; then - printf "found\n" - THREADS=1 -else - printf "library not found (too bad), no threads will be available\n" - THREADS= -fi -else - printf "not found, no threads will be available\n" - THREADS= -fi -else - echo "Overriding THREADS=$THREADS" -fi - -# Sometimes, setuid and setgid can't be used (missing pwd.h and unistd.h ?!) -if test -z "$NOUID"; then -NOUID=1 -printf "Checking for /usr/include/pwd.h and /usr/include/unistd.h.. " -if test -f "/usr/include/pwd.h" -o "/usr/local/include/pwd.h"; then -if test -f "/usr/include/unistd.h" -o -f "/usr/local/include/unistd.h" ; then - NOUID= -fi -fi -if test -z "$NOUID"; then - printf "found\n" -else - printf "not found, not using setuid() and setgid()\n" -fi -else - echo "Overriding NOUID=$NOUID" -fi - -# Sometimes, ftime can't be used (missing declaration...) -if test -z "$NOFTIME"; then -NOFTIME=1 -printf "Checking for ftime in /usr/include/sys/timeb.h.. " -if grep "int ftime" /usr/include/sys/timeb.h >/dev/null; then - NOFTIME= -fi -if test -z "$NOFTIME"; then - printf "found\n" -else - printf "not found (too bad), not using ftime()\n" -fi -else - echo "Overriding NOFTIME=$NOFTIME" -fi - -# Test variables -if test "$NOUID" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEUID__/HTS_DO_NOT_USE_UID/'" -fi -if test "$NOFTIME" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEFTI__/HTS_DO_NOT_USE_FTIME/'" -fi -if test "$NODECLINADDRT" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEINA__/HTS_DO_NOT_REDEFINE_in_addr_t/'" -fi -if test "$THREADS" = 1; then -THTYPE="-D_REENTRANT" -LPTHTYPE="-lpthread" -else -THTYPE= -LPTHTYPE= -fi -SEDEXEC="$SEDEXEC | sed -e \"s/__CFLAGS__/$SOTYPE $OTYPE $WTYPE $IPTYPE $THTYPE/g\"" -SEDEXEC="$SEDEXEC | sed -e \"s/__LFLAGS__/$LPTHTYPE $SSTYPE $LIPTYPE/g\"" -if test ! "$THREADS" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEPTH__/HTS_DO_NOT_USE_PTHREAD/'" -fi -if test "$ZLIB" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__/-lz/g'" -else - SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__//g'" -fi -SEDEXEC="$SEDEXEC | sed -e \"s/__ZLIB__/$ZLIB/\"" -if test "$SOLSOCK" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__/-lnsl -lsocket/g'" -else - SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__//g'" -fi -if test ! "$LONGLONG" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__DEFINE64B__/HTS_NO_64_BIT/'" -fi -if test -n "$PTRALIGN"; then - SEDEXEC="$SEDEXEC | sed -e \"s/__PTRALIGN__/$PTRALIGN/g\"" -fi -if test -n "$IPV6"; then - SEDEXEC="$SEDEXEC | sed -e \"s/__INET6__/$IPV6/g\"" -fi -if test -n "$SSL"; then - SEDEXEC="$SEDEXEC | sed -e \"s/__SSL__/$SSL/g\"" -fi -if test "$DYNAMIC" = 1; then - SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__/so/'" - SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall libinstall docinstall/'" - SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove libremove docremove/'" - SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__/strip --strip-unneeded \\\$(BOUTPUTSO)/'" -else - SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__//'" - SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall docinstall/'" - SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove docremove/'" - SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__//'" -fi - -SEDEXEC="$SEDEXEC | sed -e \"s/__PLATFORM__/$PLATFORM/g\"" -SEDEXEC="$SEDEXEC | sed -e 's/#__AUTONAME__/AUTONAME = auto/'" -SEDEXEC="$SEDEXEC | sed -e 's/#define __DEFINE.*__//g'" - -# Paths -TMP=`echo $BINPATH | sed -e 's/\\//\\\\\\//g'` -SEDEXEC="$SEDEXEC | sed -e \"s/__BINPATH__/$TMP/g\"" -TMP=`echo $ETCPATH | sed -e 's/\\//\\\\\\//g'` -SEDEXEC="$SEDEXEC | sed -e \"s/__ETCPATH__/$TMP/g\"" -TMP=`echo $LIBPATH | sed -e 's/\\//\\\\\\//g'` -SEDEXEC="$SEDEXEC | sed -e \"s/__LIBPATH__/$TMP/g\"" -TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'` -SEDEXEC="$SEDEXEC | sed -e \"s/__PREFIX__/$TMP/g\"" -TMP=`echo $ROOTGROUP | sed -e 's/\\//\\\\\\//g'` -SEDEXEC="$SEDEXEC | sed -e \"s/__ROOTGROUP__/$TMP/g\"" - -TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'` -SEDEXEC="$SEDEXEC | sed -e \"s/__DEFINEPRE__/$TMP/g\"" -SEDEXEC="$SEDEXEC | sed -e 's/__MAKEAUTO__/$MKTYPE/'" - -# Search for gmake -printf "Checking for make.. " -MAKEPATH= -if test -f "/usr/bin/gmake"; then -MAKEPATH=/usr/bin/gmake -else -if test -f "/bin/gmake"; then -MAKEPATH=/bin/gmake -else -if test -f "/usr/local/bin/gmake"; then -MAKEPATH=/usr/local/bin/gmake -fi -fi -fi -if test -n "$MAKEPATH"; then -printf "found $MAKEPATH\n" -else -MAKEPATH=make -printf "not found, assume make will work\n" -fi - -# Sed strip_cr -EXCL='#!' -printf "Checking for perl.. " -PERLPATH= -cp -f strip_cr.in strip_cr -if test -f "/usr/bin/perl"; then -PERLPATH=/usr/bin/perl -else -if test -f "/bin/perl"; then -PERLPATH=/bin/perl -else -if test -f "/usr/local/bin/perl"; then -PERLPATH=/usr/local/bin/perl -fi -fi -fi -if test -n "$PERLPATH"; then -printf "found $PERLPATH\nEnsuring that *.c/*.h source files don't contains CR (^M).. " -TMP=`echo $PERLPATH | sed -e 's/\\//\\\\\\//g'` -cat strip_cr | sed -e "s/__PERL__/${EXCL}${TMP}/" > __tmp; mv __tmp strip_cr -chmod 755 strip_cr -./strip_cr *.c *.h -printf "done\n" -fi - -# Sed postinst-config -cp -f postinst-config.in postinst-config -CMD="cat postinst-config $SEDEXEC > __tmp; mv __tmp postinst-config" -if eval $CMD; then -chmod 755 postinst-config -else -echo "Error while seding postinst-config" -exit 1 -fi - -# Sed all __VARS__ -CMD="cat Makefile $SEDEXEC > __tmp; mv __tmp Makefile" -echo "Command: $CMD" -if eval $CMD; then - -echo "" -echo "Makefile created!" -echo "Type in '$MAKEPATH' to build and '$MAKEPATH install' to install" - -if test -n "$DOMAKE"; then - echo "Making.." - eval $MAKEPATH clean - eval $MAKEPATH -fi -if test -n "$DOINSTALL"; then - if test -n "$NODOCINSTALL"; then - echo "Installing binary.." - eval $MAKEPATH bininstall - elif test -n "$DOCINSTALL"; then - echo "Installing docs.." - eval $MAKEPATH docinstall - else - echo "Installing.." - eval $MAKEPATH install - fi -fi - -else - echo "Error while seding Makefile.." - exit 1 -fi - -else - echo "Error copying Makefile.in -> Makefile.. giving up" - exit 1 -fi - diff --git a/src/gpl.txt b/src/gpl.txt deleted file mode 100644 index 546a71a..0000000 --- a/src/gpl.txt +++ /dev/null @@ -1,287 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - - diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h index a82b69d..29dd122 100644 --- a/src/hts-indextmpl.h +++ b/src/hts-indextmpl.h @@ -167,7 +167,7 @@ regen: " <BR>"LF\ " <BR>"LF\ " <H6 ALIGN=\"RIGHT\">"LF\ - " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2002]</I>"LF\ + " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2003]</I>"LF\ " </H6>"LF\ " %s"LF\ " <!-- Thanks for using HTTrack Website Copier! -->"LF\ @@ -186,7 +186,7 @@ regen: ""LF\ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -317,7 +317,7 @@ regen: " </TABLE>"LF\ " <BR>"LF\ " <H6 ALIGN=\"RIGHT\">"LF\ - " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2002]</I>"LF\ + " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2003]</I>"LF\ " </H6>"LF\ " %s"LF\ " <!-- Thanks for using HTTrack Website Copier! -->"LF\ @@ -335,7 +335,7 @@ regen: ""LF\ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -476,7 +476,7 @@ regen: ""LF\ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -613,7 +613,7 @@ regen: ""LF\ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ diff --git a/src/htsalias.c b/src/htsalias.c index 65a34e3..1b65945 100644 --- a/src/htsalias.c +++ b/src/htsalias.c @@ -38,13 +38,14 @@ Please visit our Website: http://www.httrack.com #include <stdio.h> #include <stdlib.h> #include <string.h> +#include "htsbase.h" #include "htsalias.h" #include "htsglobal.h" void linput(FILE* fp,char* s,int max); void hts_lowcase(char* s); #define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" ) -#define is_realspace(c) (strchr(" \x0d\x0a\x09\x0c",(c))!=NULL) +#define is_realspace(c) (strchr(" \x0d\x0a\x09\x0b\x0c",(c))!=NULL) // COPY OF cmdl_ins in htsmain.c // Insert a command in the argc/argv @@ -55,7 +56,7 @@ void hts_lowcase(char* s); argv[i]=argv[i-1];\ } \ argv[0]=(buff+ptr); \ - strcpy(argv[0],token); \ + strcpybuff(argv[0],token); \ ptr += (strlen(argv[0])+1); \ argc++ // END OF COPY OF cmdl_ins in htsmain.c @@ -80,7 +81,7 @@ void hts_lowcase(char* s); param1 : this option must be alone, and needs one distinct parameter (-P <path>) param0 : this option must be alone, but the parameter should be put together (+*.gif) */ -const char hts_optalias[][4][64] = { +const char* hts_optalias[][4] = { /* {"","","",""}, */ {"path","-O","param1","output path"}, {"chroot","-%O","param1","default top path"}, @@ -90,6 +91,7 @@ const char hts_optalias[][4][64] = { {"quiet","-q","single",""}, {"mirrorlinks","-Y","single",""}, {"proxy","-P","param1","proxy name:port"}, + {"bind","-%b","param1","hostname to bind"}, {"httpproxy-ftp","-%f","param",""}, {"depth","-r","param",""},{"recurse-levels","-r","param",""}, {"ext-depth","-%e","param",""}, @@ -108,12 +110,14 @@ const char hts_optalias[][4][64] = { {"near","-n","single",""}, {"test","-t","single",""}, {"list","-%L","param1",""}, + {"urllist","-%S","param1",""}, {"language","-%l","param1",""}, {"lang","-%l","param1",""}, {"structure","-N","param",""}, {"user-structure","-N","param1",""}, {"long-names","-L","param",""}, {"keep-links","-K","param",""}, + {"mime-html","-%M","param",""}, {"mht","-%M","param",""}, {"replace-external","-x","single",""}, - {"no-passwords","-%x","single",""},{"no-password","-%x","single",""}, + {"disable-passwords","-%x","single",""},{"disable-password","-%x","single",""}, {"include-query-string","-%q","single",""}, {"generate-errors","-o","single",""}, {"purge-old","-X","param",""}, @@ -124,9 +128,12 @@ const char hts_optalias[][4][64] = { {"protocol","-@i","param",""}, {"robots","-s","param",""}, {"http-10","-%h","single",""},{"http-1.0","-%h","single",""}, - {"no-compression","-%z","single",""}, + {"keep-alive","-%k","single",""}, + {"build-top-index","-%i","single",""}, + {"disable-compression","-%z","single",""}, {"tolerant","-%B","single",""}, {"updatehack","-%s","single",""}, {"sizehack","-%s","single",""}, + {"urlhack","-%u","single",""}, {"user-agent","-F","param1","user-agent identity"}, {"footer","-%F","param1",""}, {"cache","-C","param","number of retries for non-fatal errors"}, @@ -143,6 +150,7 @@ const char hts_optalias[][4][64] = { {"priority","-p","param",""}, {"debug-headers","-%H","single",""}, {"userdef-cmd","-V","param1",""}, + {"callback","-%W","param1",""}, {"wrapper","-%W","param1",""}, {"structure","-N","param1","user-defined structure"}, {"usercommand","-V","param1","user-defined command"}, {"display","-%v","single","show files transfered and other funny realtime information"}, @@ -167,13 +175,16 @@ const char hts_optalias[][4][64] = { {"version","-#h","single",""}, {"debug-scanstdin","-#K","single",""}, {"advanced-maxlinks","-#L","single",""}, - {"advanced-progressinfo","-#p","single",""}, + {"advanced-progressinfo","-#p","single","deprecated"}, {"catch-url","-#P","single","catch complex URL through proxy"}, {"debug-oldftp","-#R","single",""}, {"debug-xfrstats","-#T","single",""}, {"advanced-wait","-#u","single",""}, {"debug-ratestats","-#Z","single",""}, {"exec","-#!","param1",""}, + {"fast-engine","-#X","single","Enable fast routines"}, + {"debug-overflows","-#X0","single","Attempt to detect buffer overflows"}, + {"debug-cache","-#C","param1","List files in the cache"}, /* STANDARD ALIASES */ {"spider","-p0C0I0t","single",""}, @@ -194,6 +205,7 @@ const char hts_optalias[][4][64] = { {"ultrawide","-c48","single",""}, {"http10","-%h","single",""}, {"filelist","-%L","single",""}, {"list","-%L","single",""}, + {"filterlist","-%S","single",""}, /* END OF ALIASES */ /* Filters */ @@ -249,25 +261,25 @@ int optalias_check(int argc,const char * const * argv,int n_arg, /* --sockets=8 */ if ( (position=strchr(argv[n_arg],'=')) ) { /* Copy command */ - strncat(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) ); + strncatbuff(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) ); /* Copy parameter */ - strcpy(param,position+1); + strcpybuff(param,position+1); } /* --nocache */ else if (strncmp(argv[n_arg]+2,"no",2)==0) { - strcpy(command,argv[n_arg]+4); - strcpy(param,"0"); + strcpybuff(command,argv[n_arg]+4); + strcpybuff(param,"0"); } /* --sockets 8 */ else { if (strncmp(argv[n_arg]+2,"wide-",5)==0) { - strcpy(addcommand,"c32"); - strcpy(command,strchr(argv[n_arg]+2,'-')+1); + strcpybuff(addcommand,"c32"); + strcpybuff(command,strchr(argv[n_arg]+2,'-')+1); } else if (strncmp(argv[n_arg]+2,"tiny-",5)==0) { - strcpy(addcommand,"c1"); - strcpy(command,strchr(argv[n_arg]+2,'-')+1); + strcpybuff(addcommand,"c1"); + strcpybuff(command,strchr(argv[n_arg]+2,'-')+1); } else - strcpy(command,argv[n_arg]+2); + strcpybuff(command,argv[n_arg]+2); need_param=2; } @@ -275,7 +287,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg, pos=optalias_find(command); if (pos>=0) { /* Copy real name */ - strcpy(command,hts_optalias[pos][1]); + strcpybuff(command,hts_optalias[pos][1]); /* With parameters? */ if (strncmp(hts_optalias[pos][2],"param",5)==0) { /* Copy parameters? */ @@ -286,7 +298,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg, command,command,_NOT_NULL(optalias_help(command))); return 0; } - strcpy(param,argv[n_arg+1]); + strcpybuff(param,argv[n_arg+1]); need_param=2; } } else @@ -296,30 +308,30 @@ int optalias_check(int argc,const char * const * argv,int n_arg, /* Must be alone (-P /tmp) */ if (strcmp(hts_optalias[pos][2],"param1")==0) { - strcpy(return_argv[0],command); - strcpy(return_argv[1],param); + strcpybuff(return_argv[0],command); + strcpybuff(return_argv[1],param); *return_argc=2; /* 2 parameters returned */ } /* Alone with parameter (+*.gif) */ else if (strcmp(hts_optalias[pos][2],"param0")==0) { /* Command */ - strcpy(return_argv[0],command); - strcat(return_argv[0],param); + strcpybuff(return_argv[0],command); + strcatbuff(return_argv[0],param); } /* Together (-c8) */ else { /* Command */ - strcpy(return_argv[0],command); + strcpybuff(return_argv[0],command); /* Parameters accepted */ if (strncmp(hts_optalias[pos][2],"param",5)==0) { /* --cache=off or --index=on */ if (strcmp(param,"off")==0) - strcat(return_argv[0],"0"); + strcatbuff(return_argv[0],"0"); else if (strcmp(param,"on")==0) { // on is the default - // strcat(return_argv[0],"1"); + // strcatbuff(return_argv[0],"1"); } else - strcat(return_argv[0],param); + strcatbuff(return_argv[0],param); } *return_argc=1; /* 1 parameter returned */ } @@ -342,8 +354,8 @@ int optalias_check(int argc,const char * const * argv,int n_arg, return 0; } /* Copy parameters */ - strcpy(return_argv[0],argv[n_arg]); - strcpy(return_argv[1],argv[n_arg+1]); + strcpybuff(return_argv[0],argv[n_arg]); + strcpybuff(return_argv[1],argv[n_arg+1]); /* And return */ *return_argc=2; /* 2 parameters returned */ return 2; /* 2 parameters used */ @@ -352,7 +364,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg, } /* Copy and return other unknown option */ - strcpy(return_argv[0],argv[n_arg]); + strcpybuff(return_argv[0],argv[n_arg]); return 1; } @@ -461,9 +473,9 @@ int optinclude_file(const char* name, char _tmp_argv[4][HTS_CDLMAXSIZE]; char* tmp_argv[4]; tmp_argv[0]=_tmp_argv[0]; tmp_argv[1]=_tmp_argv[1]; tmp_argv[2]=_tmp_argv[2]; tmp_argv[3]=_tmp_argv[3]; - strcpy(tmp_argv[0],"--"); - strcat(tmp_argv[0],a); - strcpy(tmp_argv[1],b); + strcpybuff(tmp_argv[0],"--"); + strcatbuff(tmp_argv[0],a); + strcpybuff(tmp_argv[1],b); result=optalias_check(2,(const char * const *)tmp_argv,0, &return_argc,(tmp_argv+2), @@ -513,8 +525,8 @@ char* hts_gethome(void) { void expand_home(char* str) { if (str[0] == '~') { char tempo[HTS_URLMAXSIZE*2]; - strcpy(tempo,hts_gethome()); - strcat(tempo,str+1); - strcpy(str,tempo); + strcpybuff(tempo,hts_gethome()); + strcatbuff(tempo,str+1); + strcpybuff(str,tempo); } } diff --git a/src/htsalias.h b/src/htsalias.h index 1c94b19..e5e8f82 100644 --- a/src/htsalias.h +++ b/src/htsalias.h @@ -39,7 +39,7 @@ Please visit our Website: http://www.httrack.com #ifndef HTSALIAS_DEFH #define HTSALIAS_DEFH -extern const char hts_optalias[][4][64]; +extern const char* hts_optalias[][4]; int optalias_check(int argc,const char * const * argv,int n_arg, int* return_argc,char** return_argv, char* return_error); diff --git a/src/htsback.c b/src/htsback.c index d99564f..6d0b119 100644 --- a/src/htsback.c +++ b/src/htsback.c @@ -122,7 +122,16 @@ int back_nsoc(lien_back* back,int back_max) { int n=0; int i; for(i=0;i<back_max;i++) - if (back[i].status>0) // réception uniquement + if (back[i].status > 0) // only receive + n++; + + return n; +} +int back_nsoc_overall(lien_back* back,int back_max) { + int n=0; + int i; + for(i=0;i<back_max;i++) + if (back[i].status > 0 || back[i].status == -103) n++; return n; @@ -144,32 +153,32 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) { /* décompression */ #if HTS_USEZLIB - if (back[p].r.compressed) { + if (gz_is_available && back[p].r.compressed) { if (back[p].r.size > 0) { //if ( (back[p].r.adr) && (back[p].r.size>0) ) { // stats back[p].compressed_size=back[p].r.size; // en mémoire -> passage sur disque if (!back[p].r.is_write) { - back[p].tmpfile[0]='\0'; - strcpy(back[p].tmpfile,tempnam(NULL,"httrz")); - if (back[p].tmpfile[0]) { + back[p].tmpfile_buffer[0]='\0'; + back[p].tmpfile=tmpnam(back[p].tmpfile_buffer); + if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') { back[p].r.out=fopen(back[p].tmpfile,"wb"); if (back[p].r.out) { if ((back[p].r.adr) && (back[p].r.size>0)) { - if ((INTsys)fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) { + if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) { back[p].r.statuscode=-1; - strcpy(back[p].r.msg,"Write error when decompressing"); + strcpybuff(back[p].r.msg,"Write error when decompressing"); } } else { back[p].tmpfile[0]='\0'; back[p].r.statuscode=-1; - strcpy(back[p].r.msg,"Empty compressed file"); + strcpybuff(back[p].r.msg,"Empty compressed file"); } } else { back[p].tmpfile[0]='\0'; back[p].r.statuscode=-1; - strcpy(back[p].r.msg,"Open error when decompressing"); + strcpybuff(back[p].r.msg,"Open error when decompressing"); } } } @@ -179,17 +188,18 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) { back[p].r.out=NULL; } // décompression - if (back[p].tmpfile[0] && back[p].url_sav[0]) { + if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) { LLint size; filecreateempty(back[p].url_sav); // filenote & co if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) { back[p].r.size=back[p].r.totalsize=size; // fichier -> mémoire if (!back[p].r.is_write) { + deleteaddr(&back[p].r); back[p].r.adr=readfile(back[p].url_sav); if (!back[p].r.adr) { back[p].r.statuscode=-1; - strcpy(back[p].r.msg,"Read error when decompressing"); + strcpybuff(back[p].r.msg,"Read error when decompressing"); } remove(back[p].url_sav); } @@ -214,39 +224,45 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) { struct tm* A; tt=time(NULL); A=localtime(&tt); + if (A == NULL) { + int localtime_returned_null=0; + assert(localtime_returned_null); + } strftime(s,250,"%H:%M:%S",A); flags[0]='\0'; /* input flags */ if (back[p].is_update) - strcat(flags, "U"); // update request + strcatbuff(flags, "U"); // update request else - strcat(flags, "-"); + strcatbuff(flags, "-"); if (back[p].range_req_size) - strcat(flags, "R"); // range request + strcatbuff(flags, "R"); // range request else - strcat(flags, "-"); + strcatbuff(flags, "-"); /* state flags */ if (back[p].r.is_file) // direct to disk - strcat(flags, "F"); + strcatbuff(flags, "F"); else - strcat(flags, "-"); + strcatbuff(flags, "-"); /* output flags */ if (!back[p].r.notmodified) - strcat(flags, "M"); // modified + strcatbuff(flags, "M"); // modified else - strcat(flags, "-"); + strcatbuff(flags, "-"); if (back[p].r.is_chunk) // chunked - strcat(flags, "C"); + strcatbuff(flags, "C"); else - strcat(flags, "-"); + strcatbuff(flags, "-"); if (back[p].r.compressed) - strcat(flags, "Z"); // gzip + strcatbuff(flags, "Z"); // gzip else - strcat(flags, "-"); - fprintf(cache->txt,"%s\t"LLintP"/"LLintP"\t%s\t", s, - back[p].r.size, back[p].r.totalsize, - flags); + strcatbuff(flags, "-"); + /* Err I had to split these.. */ + fprintf(cache->txt,"%s\t", s); + fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size); + fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize); + fprintf(cache->txt,"\t%s\t",flags); } if (back[p].r.statuscode==200) { if (back[p].r.size>=0) { @@ -323,7 +339,7 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) { /* Cache */ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav); - + // status finished callback #if HTS_ANALYSTE hts_htmlcheck_xfrstatus(&back[p]); @@ -333,9 +349,165 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) { return -1; } +/* try to keep the connection alive */ +int back_letlive(httrackp* opt, lien_back* back, int p) { + htsblk* src = &back[p].r; + if (src && !src->is_file + && src->soc != INVALID_SOCKET + && src->statuscode >= 0 /* no timeout errors & co */ + && src->keep_alive_trailers == 0 /* not yet supported (chunk trailers) */ + && !check_sockerror(src->soc) + /*&& !check_sockdata(src->soc)*/ /* no unexpected data */ + ) { + htsblk tmp; + memset(&tmp, 0, sizeof(tmp)); + /* clear everything but connection: switch, close, and reswitch */ + back_connxfr(src, &tmp); + back_delete(opt, back, p); + //deletehttp(src); + back_connxfr(&tmp, src); + src->req.flush_garbage=1; /* ignore CRLF garbage */ + return 1; + } + return 0; +} + +void back_connxfr(htsblk* src, htsblk* dst) { + dst->soc = src->soc; + src->soc = INVALID_SOCKET; +#if HTS_USEOPENSSL + dst->ssl = src->ssl; + src->ssl = 0; + dst->ssl_con = src->ssl_con; + src->ssl_con = NULL; +#endif + dst->keep_alive = src->keep_alive; + src->keep_alive = 0; + dst->keep_alive_max = src->keep_alive_max; + src->keep_alive_max = 0; + dst->keep_alive_t = src->keep_alive_t; + src->keep_alive_t = 0; + dst->debugid = src->debugid; + src->debugid = 0; +} + +// clear, or leave for keep-alive +int back_maydelete(httrackp* opt,lien_back* back, int p) { + if (p>=0) { // on sait jamais.. + if (!opt->nokeepalive + && back[p].r.keep_alive + && back[p].r.keep_alive_max > 1 + && back[p].ka_time_start + && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t + ) { + lien_back tmp; + strcpybuff(tmp.url_adr, back[p].url_adr); + if (back_letlive(opt, back, p)) { + strcpybuff(back[p].url_adr, tmp.url_adr); + back[p].status = -103; // alive & waiting + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully saved #%d (%s)"LF, + back[p].r.debugid, + back[p].url_adr); test_flush; + } + return 1; + } + } + back_delete(opt,back, p); + } + return 0; +} + +// clear, or leave for keep-alive +void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p) { + if (back[p].r.soc!=INVALID_SOCKET) { + int q; + if (!opt->nokeepalive + && back[p].r.keep_alive + && back[p].r.keep_alive_max > 1 + && back[p].ka_time_start + && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t + && ( q = back_search(opt, back, back_max) ) >= 0 + ) + { + lien_back tmp; + strcpybuff(tmp.url_adr, back[p].url_adr); + deletehttp(&back[q].r); // security check + back_connxfr(&back[p].r, &back[q].r); // transfer live connection settings from p to q + back[q].ka_time_start = back[p].ka_time_start; // refresh + back[p].r.soc = INVALID_SOCKET; + strcpybuff(back[q].url_adr, tmp.url_adr); // address + back[q].status = -103; // alive & waiting + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully preserved #%d (%s)"LF, + back[q].r.debugid, + back[q].url_adr); test_flush; + } + } else { + deletehttp(&back[p].r); + back[p].r.soc = INVALID_SOCKET; + } + } +} + + +/* attempt to attach a live connection to this slot */ +int back_trylive(httrackp* opt,lien_back* back, int back_max, int p) { + if (p>=0 && back[p].status != -103) { // we never know.. + int i = back_searchlive(opt,back, back_max, back[p].url_adr); // search slot + if (i >= 0 && i != p) { + deletehttp(&back[p].r); // security check + back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p + back_delete(opt,back, i); // delete old slot + back[p].status=100; // ready to connect + return 1; // success: will reuse live connection + } + } + return 0; +} + +/* search for a live position, or, if not possible, try to return a new one */ +int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr) { + int i; + + /* search for a live socket */ + for(i = 0 ; i < back_max ; i++ ) { + if (back[i].status == -103) { + if (strfield2(back[i].url_adr, search_addr)) { /* same location (xxc: check also virtual hosts?) */ + if (time_local() < back[i].ka_time_start + back[i].r.keep_alive_t) { + return i; + } + } + } + } + return -1; +} + +int back_search(httrackp* opt,lien_back* back, int back_max) { + int i; + + /* try to find an empty place */ + for(i = 0 ; i < back_max ; i++ ) { + if (back[i].status == -1) { + return i; + } + } + + /* couldn't find an empty place, try to requisition a keep-alive place */ + for(i = 0 ; i < back_max ; i++ ) { + if (back[i].status == -103) { + /* close this place */ + back_delete(opt,back, i); + return i; + } + } + + /* oops, can't find a place */ + return -1; +} // effacer entrée -int back_delete(lien_back* back,int p) { +int back_delete(httrackp* opt, lien_back* back, int p) { if (p>=0) { // on sait jamais.. // Vérificateur d'intégrité #if DEBUG_CHECKINT @@ -356,21 +528,6 @@ int back_delete(lien_back* back,int p) { back[p].r.soc=INVALID_SOCKET; } -#if HTS_USEOPENSSL - /* Free OpenSSL structures */ - if (back[p].r.ssl_con) { - SSL_shutdown(back[p].r.ssl_con); - SSL_free(back[p].r.ssl_con); - back[p].r.ssl_con=NULL; - } - /* - if (back[p].r.ssl_soc) { - BIO_free_all(back[p].r.ssl_soc); - back[p].r.ssl_soc=NULL; - } - */ -#endif - if (back[p].r.adr!=NULL) { // reste un bloc à désallouer freet(back[p].r.adr); back[p].r.adr=NULL; @@ -379,6 +536,7 @@ int back_delete(lien_back* back,int p) { freet(back[p].chunk_adr); back[p].chunk_adr=NULL; back[p].chunk_size=0; + back[p].chunk_blocksize=0; back[p].is_chunk=0; } // if (back[p].r.is_file) { // fermer fichier entrée @@ -402,7 +560,7 @@ int back_delete(lien_back* back,int p) { set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified); /* executer commande utilisateur après chargement du fichier */ - usercommand(0,NULL,back[p].url_sav); + //xx usercommand(opt,0,NULL,back[p].url_sav, back[p].url_adr, back[p].url_fil); back[p].r.is_write=0; } @@ -426,7 +584,7 @@ int back_stack_available(lien_back* back,int back_max) { } // ajouter un lien en backing -int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr) { +int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) { int p=0; // vérifier cohérence de adr et fil (non vide!) @@ -444,37 +602,37 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* } // FIN vérifier cohérence de adr et fil (non vide!) + // stats + opt->state.back_add_stats++; + // rechercher emplacement - while((p<back_max) && back[p].status!=-1) p++; - if (back[p].status==-1) { // ok on a de la place + back_clean(opt, cache, back, back_max); + if ( ( p = back_search(opt, back, back_max) ) >= 0) { back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur - // ne sert à rien normalement - if (back[p].r.soc!=INVALID_SOCKET) { -#if HTS_DEBUG_CLOSESOCK - DEBUG_W("back_add: deletehttp\n"); -#endif + // clear r + if (back[p].r.soc!=INVALID_SOCKET) { /* we never know */ deletehttp(&back[p].r); } - - // effacer r - memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer; + memset(&(back[p].r), 0, sizeof(htsblk)); + back[p].r.soc=INVALID_SOCKET; + back[p].r.location=back[p].location_buffer; // créer entrée - strcpy(back[p].url_adr,adr); - strcpy(back[p].url_fil,fil); - strcpy(back[p].url_sav,save); + strcpybuff(back[p].url_adr,adr); + strcpybuff(back[p].url_fil,fil); + strcpybuff(back[p].url_sav,save); back[p].pass2_ptr=pass2_ptr; // copier referer si besoin - strcpy(back[p].referer_adr,""); - strcpy(back[p].referer_fil,""); + strcpybuff(back[p].referer_adr,""); + strcpybuff(back[p].referer_fil,""); if ((referer_adr) && (referer_fil)) { // existe if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide if (referer_adr[0]!='!') { // non détruit if (strcmp(referer_adr,"file://")) { // PAS file:// if (strcmp(referer_adr,"primary")) { // pas referer 1er lien - strcpy(back[p].referer_adr,referer_adr); - strcpy(back[p].referer_fil,referer_fil); + strcpybuff(back[p].referer_adr,referer_adr); + strcpybuff(back[p].referer_fil,referer_fil); } } } @@ -482,7 +640,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* } // sav ne sert à rien pour le moment back[p].r.size=0; // rien n'a encore été chargé - back[p].r.soc=INVALID_SOCKET; // pas de socket back[p].r.adr=NULL; // pas de bloc de mémoire back[p].r.is_write=0; // à priori stockage en mémoire back[p].maxfile_html=opt->maxfile_html; @@ -496,11 +653,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* else if (strcmp(back[p].url_sav,BACK_ADD_TEST2)==0) // test en GET back[p].head_request=2; // test en get - /* Stop requested - abort backing */ if (opt->state.stop) { back[p].r.statuscode=-1; // fatal - strcpy(back[p].r.msg,"mirror stopped by user"); + strcpybuff(back[p].r.msg,"mirror stopped by user"); back[p].status=0; // terminé if ((opt->debug>0) && (opt->log!=NULL)) { fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush; @@ -508,7 +664,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* return 0; } - // tester cache if ((strcmp(adr,"file://")) /* pas fichier */ && ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */ @@ -528,10 +683,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* #endif char buff[HTS_URLMAXSIZE*4]; #if HTS_FAST_CACHE - strcpy(buff,adr); strcat(buff,fil); + strcpybuff(buff,adr); strcatbuff(buff,fil); hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); #else - buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n"); + buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n"); a=strstr(cache->use,buff); #endif @@ -550,21 +705,45 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* sscanf(a,"%d",&pos); // lire position #endif if (pos<0) { // pas de mise en cache data, vérifier existence - if (fsize(antislash(save)) <= 0) { // fichier existe pas ou est vide! + if (fsize(fconv(save)) <= 0) { // fichier existe pas ou est vide! + int found=0; + + /* It is possible that the file has been moved due to changes in build structure */ + { + char previous_save[HTS_URLMAXSIZE*2]; + previous_save[0] = '\0'; + back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0); + if (previous_save[0] != '\0' && fexist(fconv(previous_save))) { + rename(fconv(previous_save), fconv(save)); + if (fexist(fconv(save))) { + found = 1; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush; + } + } else { + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"error"); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush; + } + } + } + } + + if (!found) { #if HTS_FAST_CACHE - hash_pos_return=0; + hash_pos_return=0; #else - a=NULL; -#endif - // dévalider car non présent sur disque dans structure originale!!! - // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour - // en être sûr - if (opt->norecatch) { // tester norecatch - if (!fexist(antislash(save))) { // fichier existe pas mais déclaré: on l'a effacé - FILE* fp=fopen(antislash(save),"wb"); - if (fp) fclose(fp); - if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + a=NULL; +#endif + // dévalider car non présent sur disque dans structure originale!!! + // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour + // en être sûr + if (opt->norecatch) { // tester norecatch + if (!fexist(fconv(save))) { // fichier existe pas mais déclaré: on l'a effacé + FILE* fp=fopen(fconv(save),"wb"); + if (fp) fclose(fp); + if (opt->log!=NULL) { + fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + } } } } @@ -587,18 +766,15 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* if (a!=NULL) { // OK existe en cache (et données aussi)! #endif if (cache->type==1) { // cache prioritaire (pas de test if-modified..) - // dans ce cas on peut également lire des réponses cachées comme 404,302... + // dans ce cas on peut également lire des réponses cachées comme 404,302... // lire dans le cache if (!test) - back[p].r=cache_read(opt,cache,adr,fil,save); + back[p].r = cache_read(opt,cache,adr,fil,save, back[p].location_buffer); else - back[p].r=cache_read(opt,cache,adr,fil,NULL); // charger en tête uniquement du cache - if (!back[p].r.location) - back[p].r.location=back[p].location_buffer; - else { /* recopier */ - strcpy(back[p].location_buffer,back[p].r.location); - back[p].r.location=back[p].location_buffer; - } + back[p].r = cache_read(opt,cache,adr,fil,NULL, back[p].location_buffer); // charger en tête uniquement du cache + + /* ensure correct location buffer set */ + back[p].r.location=back[p].location_buffer; /* Interdiction taille par le wizard? --> détruire */ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture @@ -606,9 +782,9 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* back[p].status=0; // FINI back[p].r.statuscode=-1; if (!back[p].testmode) - strcpy(back[p].r.msg,"Cached file skipped (too big)"); + strcpybuff(back[p].r.msg,"Cached file skipped (too big)"); else - strcpy(back[p].r.msg,"Test: Cached file skipped (too big)"); + strcpybuff(back[p].r.msg,"Test: Cached file skipped (too big)"); return 0; } } @@ -639,42 +815,43 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* } } else if (cache->type==2) { // si en cache, demander de tester If-Modified-Since - htsblk* r=cache_header(opt,cache,adr,fil); + htsblk r; + cache_header(opt,cache,adr,fil,&r); /* Interdiction taille par le wizard? */ { LLint save_totalsize=back[p].r.totalsize; - back[p].r.totalsize=r->totalsize; + back[p].r.totalsize=r.totalsize; if (!back_checksize(opt,&back[p],1)) { - r=NULL; + r.statuscode = -1; // back[p].status=0; // FINI deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET; if (!back[p].testmode) - strcpy(back[p].r.msg,"File too big"); + strcpybuff(back[p].r.msg,"File too big"); else - strcpy(back[p].r.msg,"Test: File too big"); + strcpybuff(back[p].r.msg,"Test: File too big"); return 0; } back[p].r.totalsize=save_totalsize; } - if (r) { - if (r->statuscode==200) { // uniquement des 200 (OK) - if (strnotempty(r->etag)) { // ETag (RFC2616) + if (r.statuscode != -1) { + if (r.statuscode==200) { // uniquement des 200 (OK) + if (strnotempty(r.etag)) { // ETag (RFC2616) /* - If both an entity tag and a Last-Modified value have been provided by the origin server, SHOULD use both validators in cache-conditional requests. This allows both HTTP/1.0 and HTTP/1.1 caches to respond appropriately. */ - if (strnotempty(r->lastmodified)) - sprintf(back[p].send_too,"If-None-Match: %s\r\nIf-Modified-Since: %s\r\n",r->etag,r->lastmodified); + if (strnotempty(r.lastmodified)) + sprintf(back[p].send_too,"If-None-Match: %s\r\nIf-Modified-Since: %s\r\n",r.etag,r.lastmodified); else - sprintf(back[p].send_too,"If-None-Match: %s\r\n",r->etag); + sprintf(back[p].send_too,"If-None-Match: %s\r\n",r.etag); } - else if (strnotempty(r->lastmodified)) - sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",r->lastmodified); + else if (strnotempty(r.lastmodified)) + sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",r.lastmodified); else if (strnotempty(cache->lastmodified)) sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified); @@ -754,7 +931,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* filenote(save,NULL); // ne pas purger tout de même back[p].status=0; // OK prêt back[p].r.statuscode=-1; // erreur - strcpy(back[p].r.msg,"Null-size file not recaught"); + strcpybuff(back[p].r.msg,"Null-size file not recaught"); return 0; } } else { @@ -781,12 +958,13 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* // recopier proxy memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy)); // et user-agent - strcpy(back[p].r.req.user_agent,opt->user_agent); - strcpy(back[p].r.req.lang_iso,opt->lang_iso); + strcpybuff(back[p].r.req.user_agent,opt->user_agent); + strcpybuff(back[p].r.req.lang_iso,opt->lang_iso); back[p].r.req.user_agent_send=opt->user_agent_send; // et http11 back[p].r.req.http11=back[p].http11; back[p].r.req.nocompression=opt->nocompression; + back[p].r.req.nokeepalive=opt->nokeepalive; // mode ftp, court-circuit! if (strfield(back[p].url_adr,"ftp://")) { @@ -804,7 +982,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* { char nid[32]; sprintf(nid,"htsftp%d-in_progress.lock",p); - strcpy(back[p].location_buffer,fconcat(opt->path_log,nid)); + strcpybuff(back[p].location_buffer,fconcat(opt->path_log,nid)); } launch_ftp(&(back[p]),back[p].location_buffer,opt->exec); #endif @@ -812,49 +990,60 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* } } #if HTS_USEOPENSSL - else if (strfield(back[p].url_adr,"https://")) { // let's rock + else if (SSL_is_available && strfield(back[p].url_adr,"https://")) { // let's rock back[p].r.ssl = 1; // back[p].r.ssl_soc = NULL; back[p].r.ssl_con = NULL; } #endif + if (!back_trylive(opt,back, back_max, p)) { #if HTS_XGETHOST #if HDEBUG - printf("back_solve..\n"); + printf("back_solve..\n"); #endif - back[p].status=101; // tentative de résolution du nom de host - soc=INVALID_SOCKET; // pas encore ouverte - back_solve(&back[p]); // préparer - if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns + back[p].status=101; // tentative de résolution du nom de host + soc=INVALID_SOCKET; // pas encore ouverte + back_solve(&back[p]); // préparer + if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns #if HDEBUG - printf("ok, dns cache ready..\n"); + printf("ok, dns cache ready..\n"); #endif - soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r)); - if (soc==INVALID_SOCKET) { - back[p].status=0; // fini, erreur + soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r)); + if (soc==INVALID_SOCKET) { + back[p].status=0; // fini, erreur + } } - } -// + // #else -// + // #if CNXDEBUG - printf("XFopen..\n"); + printf("XFopen..\n"); #endif - - if (strnotempty(back[p].send_too)) // envoyer un if-modified-since + + if (strnotempty(back[p].send_too)) // envoyer un if-modified-since #if HTS_XCONN - soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r)); + soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r)); #else - soc=http_xfopen(0,0,1,back[p].send_too,adr,fil,&(back[p].r)); + soc=http_xfopen(0,0,1,back[p].send_too,adr,fil,&(back[p].r)); #endif - else + else #if HTS_XCONN - soc=http_xfopen(test,0,0,NULL,adr,fil,&(back[p].r)); + soc=http_xfopen(test,0,0,NULL,adr,fil,&(back[p].r)); #else - soc=http_xfopen(test,0,1,NULL,adr,fil,&(back[p].r)); + soc=http_xfopen(test,0,1,NULL,adr,fil,&(back[p].r)); #endif #endif + } else { + soc = back[p].r.soc; + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully linked #%d (for %s%s)"LF, + back[p].r.debugid, + back[p].url_adr, back[p].url_fil); test_flush; + } + } + if (opt->timeout>0) { // gestion du opt->timeout back[p].timeout=opt->timeout; back[p].timeout_refresh=time_local(); @@ -938,7 +1127,7 @@ PTHREAD_TYPE Hostlookup(void* iadr_p) { #if DEBUGDNS printf("resolv in background: %s\n",jump_identification(iadr_p)); #endif - strcpy(iadr,jump_identification(iadr_p)); + strcpybuff(iadr,jump_identification(iadr_p)); // couper éventuel : { char *a; @@ -965,7 +1154,7 @@ PTHREAD_TYPE Hostlookup(void* iadr_p) { cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache)); if (cache->n!=NULL) { t_fullhostent fullhostent_buffer; - strcpy(cache->n->iadr,iadr); + strcpybuff(cache->n->iadr,iadr); cache->n->host_length=0; /* pour le moment rien */ cache->n->n=NULL; _hts_lockdns(0); // délocker @@ -1019,7 +1208,7 @@ void back_solve(lien_back* back) { { char* p = calloct(strlen(a)+2,1); if (p) { - strcpy(p,a); + strcpybuff(p,a); _beginthread( Hostlookup , 0, p ); } } @@ -1031,7 +1220,7 @@ void back_solve(lien_back* back) { #if USE_BEGINTHREAD char* p = calloct(strlen(a)+2,1); if (p) { - strcpy(p,a); + strcpybuff(p,a); _beginthread( Hostlookup , 0, p ); } #else @@ -1061,32 +1250,79 @@ int host_wait(lien_back* back) { // élimine les fichiers non html en backing (anticipation) // cleanup non-html files in backing to save backing space // and allow faster "save in cache" operation +// also cleanup keep-alive sockets and ensure that not too many sockets are being opened void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { +#if HTS_ANALYSTE + int oneMore = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links +#endif int i; for(i=0;i<back_max;i++) { if (back[i].status == 0) { // ready + /* Check autoclean */ if (!back[i].testmode) { // not test mode if (strnotempty(back[i].url_sav)) { // filename exists - if (back[i].r.is_write) { // not in memory (on disk, ready) + if (back[i].r.statuscode==200) { // HTTP "OK" if (back[i].r.size>0) { // size>0 - if (back[i].r.statuscode==200) { // HTTP "OK" - if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML/hypertext - if (!may_be_hypertext_mime(back[i].r.contenttype)) { // may NOT be parseable mime type - if (back[i].pass2_ptr) { - // finalize - // // back_finalize(opt,cache,back,i); - // stats - //HTS_STAT.stat_bytes+=back[i].r.size; - //HTS_STAT.stat_files++; - //if ( (!back[i].r.notmodified) && (opt->is_update) ) { - // HTS_STAT.stat_updated_files++; // page modifiée - //} - //cache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav); - *back[i].pass2_ptr=-1; // Done! - back_delete(back,i); // Delete backing entry - if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; + if (back[i].r.is_write // not in memory (on disk, ready) + && !is_hypertext_mime(back[i].r.contenttype) // not HTML/hypertext + && !may_be_hypertext_mime(back[i].r.contenttype) // may NOT be parseable mime type + ) { + if (back[i].pass2_ptr) { + // finalize + // // back_finalize(opt,cache,back,i); + // stats + //HTS_STAT.stat_bytes+=back[i].r.size; + //HTS_STAT.stat_files++; + //if ( (!back[i].r.notmodified) && (opt->is_update) ) { + // HTS_STAT.stat_updated_files++; // page modifiée + //} + //xxxcache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav); + usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil); + *back[i].pass2_ptr=-1; // Done! + back_maydelete(opt,back,i); // May delete backing entry + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; + } + } + } else { + if (!back[i].finalized) { + if (1) { + /* Ensure deleted or recycled socket */ + /* BUT DO NOT YET WIPE back[i].r.adr */ + back_maydeletehttp(opt, back, back_max, i); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush; + } + } else { + /* + NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO) + */ + /* Lock the entry but do not keep the html data in memory (in cache) */ + if (opt->cache) { + htsblk r; + + /* Ensure deleted or recycled socket */ + back_maydeletehttp(opt, back, back_max, i); + assertf(back[i].r.soc == INVALID_SOCKET); + + /* Check header */ + cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r); + if (r.statuscode == 200) { + if (back[i].r.soc == INVALID_SOCKET) { + /* Delete buffer and sockets */ + deleteaddr(&back[i].r); + deletehttp(&back[i].r); + back[i].finalized = 1; + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush; + } + } + } else { + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected html cache lookup error during back clean"LF); test_flush; + } } + // xxc xxc } } } @@ -1095,8 +1331,49 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) { } } } + } else if (back[i].status == -103) { // waiting (keep-alive) + if ( + ! back[i].r.keep_alive + || back[i].r.soc == INVALID_SOCKET + || back[i].r.keep_alive_max < 1 + || time_local() >= back[i].ka_time_start + back[i].r.keep_alive_t + ) { + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): live socket closed #%d (%s)"LF, + back[i].r.debugid, + back[i].url_adr); + test_flush; + } + back_delete(opt,back, i); // delete backing entry + } } - } + } + /* switch connections to live ones */ + for(i=0;i<back_max;i++) { + if (back[i].status == 0) { // ready + if (back[i].r.soc != INVALID_SOCKET) { + back_maydeletehttp(opt,back, back_max, i); + } + + } + } + /* delete sockets if too many keep-alive'd sockets in background */ + if (opt->maxsoc > 0) { + int max = opt->maxsoc + oneMore; + int curr = back_nsoc_overall(back, back_max); + if (curr > max) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF, + curr - max); test_flush; + } + } + for(i = 0 ; i < back_max && curr > max ; i++) { + if (back[i].status == -103) { + back_delete(opt,back, i); // delete backing entry + curr--; + } + } + } } @@ -1208,9 +1485,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } else { back[i].r.statuscode=-4; if (back[i].status==100) - strcpy(back[i].r.msg,"Connect Error"); + strcpybuff(back[i].r.msg,"Connect Error"); else - strcpy(back[i].r.msg,"Receive Error"); + strcpybuff(back[i].r.msg,"Receive Error"); back[i].status=0; // terminé if ((opt->debug>0) && (opt->log!=NULL)) { fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush; @@ -1251,6 +1528,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (nsockets) { if (opt->maxrate>0) { max_read_bytes = ( check_downloadable_bytes(opt->maxrate) / nsockets ); + if (max_read_bytes > TAILLE_BUFFER) { + /* limit size */ + max_read_bytes = TAILLE_BUFFER; + } else if (max_read_bytes < TAILLE_BUFFER) { + /* a small pause */ + Sleep(10); + } } } if (!max_read_bytes) @@ -1273,10 +1557,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-4; if (back[i].status==100) - strcpy(back[i].r.msg,"Connect Error"); + strcpybuff(back[i].r.msg,"Connect Error"); else - strcpy(back[i].r.msg,"Receive Error"); - back[i].status=0; // terminé + strcpybuff(back[i].r.msg,"Receive Error"); + if (back[i].status == -103) { /* Keep-alive socket */ + back_delete(opt,back, i); + } else { + back[i].status=0; // terminé + } } } } @@ -1297,7 +1585,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #if HTS_USEOPENSSL /* SSL mode */ - if (back[i].r.ssl) { + if (SSL_is_available && back[i].r.ssl) { // handshake not yet launched if (!back[i].r.ssl_con) { SSL_CTX_set_options(openssl_ctx, SSL_OP_ALL); @@ -1315,7 +1603,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } /* Error */ if (back[i].r.statuscode == -6) { - strcpy(back[i].r.msg, "bad SSL/TLS handshake"); + strcpybuff(back[i].r.msg, "bad SSL/TLS handshake"); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-5; @@ -1335,14 +1623,16 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta Sleep(1000/opt->maxconn); } + back[i].ka_time_start=time_local(); if (back[i].timeout>0) { // refresh timeout si besoin est - back[i].timeout_refresh=time_local(); + back[i].timeout_refresh=back[i].ka_time_start; } if (back[i].rateout>0) { // le taux de transfert de base sur le début de la connexion - back[i].rateout_time=time_local(); + back[i].rateout_time=back[i].ka_time_start; } // envoyer header //if (strcmp(back[i].url_sav,BACK_ADD_TEST)!=0) // vrai get + HTS_STAT.stat_nrequests++; if (!back[i].head_request) http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); else if (back[i].head_request==2) // test en GET! @@ -1356,7 +1646,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta // attente gethostbyname } #if HTS_USEOPENSSL - else if (back[i].status==102) { // wait for SSL handshake + else if (SSL_is_available && back[i].status==102) { // wait for SSL handshake /* SSL mode */ if (back[i].r.ssl) { int conn_code; @@ -1372,7 +1662,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta tmp[0]='\0'; ERR_error_string(err_code, tmp); back[i].r.msg[0]='\0'; - strncat(back[i].r.msg, tmp, sizeof(back[i].r.msg) - 2); + strncatbuff(back[i].r.msg, tmp, sizeof(back[i].r.msg) - 2); if (!strnotempty(back[i].r.msg)) { sprintf(back[i].r.msg, "SSL/TLS error %d", err_code); } @@ -1385,7 +1675,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].status=100; // back to waitconnect } } else { - strcpy(back[i].r.msg, "unexpected SSL/TLS error"); + strcpybuff(back[i].r.msg, "unexpected SSL/TLS error"); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-5; @@ -1425,7 +1715,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-5; if (strnotempty(back[i].r.msg)==0) - strcpy(back[i].r.msg,"Unable to resolve host name"); + strcpybuff(back[i].r.msg,"Unable to resolve host name"); } } @@ -1451,9 +1741,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].r.msg[j++]='\0'; fclose(fp); remove(fconcat(back[i].location_buffer,".ok")); - strcpy(fconcat(back[i].location_buffer,".ok"),""); + strcpybuff(fconcat(back[i].location_buffer,".ok"),""); } else { - strcpy(back[i].r.msg,"Unknown ftp result, check if file is ok"); + strcpybuff(back[i].r.msg,"Unknown ftp result, check if file is ok"); back[i].r.statuscode=-1; } back[i].status=0; @@ -1507,20 +1797,26 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].r.statuscode==200) { // 'OK' if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html + int fcheck=0; back[i].r.is_write=1; // écrire if (back[i].r.compressed && /* .gz are *NOT* depacked!! */ (strfield(get_ext(back[i].url_sav),"gz") == 0) ) { - back[i].tmpfile[0]='\0'; - strcpy(back[i].tmpfile,tempnam(NULL,"httrZ")); - if (back[i].tmpfile[0]) + back[i].tmpfile_buffer[0]='\0'; + back[i].tmpfile=tmpnam(back[i].tmpfile_buffer); + if (back[i].tmpfile != NULL && back[i].tmpfile[0]) back[i].r.out=fopen(back[i].tmpfile,"wb"); } else { back[i].r.compressed=0; back[i].r.out=filecreate(back[i].url_sav); } + if (back[i].r.out==NULL) { + if ((fcheck=check_fatal_io_errno())) { + opt->state.exit_xh=-1; /* fatal error */ + } + } #if HDEBUG printf("direct-disk: %s\n",back[i].url_sav); #endif @@ -1531,7 +1827,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].r.out==NULL) { if (opt->errlog) { fspc(opt->errlog,"error"); - fprintf(opt->errlog,"Unable to save file %s"LF,back[i].url_sav); + fprintf(opt->errlog,"Unable to save file %s : %s"LF,back[i].url_sav, strerror(errno)); + if (fcheck) { + fspc(opt->errlog,"error"); + fprintf(opt->errlog,"* * Fatal write error, giving up"LF); + } test_flush; } back[i].r.is_write=0; // erreur, abandonner @@ -1573,7 +1873,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].status) { if (back[i].status==99) // recevoir par bloc de lignes retour_fread=http_xfread1(&(back[i].r),0); - else if (back[i].status==98) { // recevoir longueur chunk en hexa caractère par caractère + else if (back[i].status==98 || back[i].status==97) { // recevoir longueur chunk en hexa caractère par caractère // backuper pour lire dans le buffer chunk htsblk r; memcpy(&r, &(back[i].r), sizeof(htsblk)); @@ -1593,7 +1893,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } else if (back[i].is_chunk) { // attention chunk, limiter taille à lire #if CHUNKDEBUG==1 - printf("read %d bytes\n",(int)min(back[i].r.totalsize-back[i].r.size,max_read_bytes)); + printf("[%d] read %d bytes\n",(int)back[i].r.soc,(int)min(back[i].r.totalsize-back[i].r.size,max_read_bytes)); #endif retour_fread=(int) http_xfread1(&(back[i].r),(int) min(back[i].r.totalsize-back[i].r.size,max_read_bytes)); } else @@ -1607,38 +1907,47 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].is_chunk) { // attendre prochain chunk if (back[i].r.size==back[i].r.totalsize) { // fin chunk! //printf("chunk end at %d\n",back[i].r.size); - back[i].status=98; // prochain chunk - if (back[i].chunk_adr!=NULL) { freet(back[i].chunk_adr); back[i].chunk_adr=NULL; } back[i].chunk_size=0; + back[i].status=97; /* fetch ending CRLF */ + if (back[i].chunk_adr!=NULL) { + freet(back[i].chunk_adr); + back[i].chunk_adr=NULL; + } + back[i].chunk_size=0; retour_fread=0; // pas d'erreur #if CHUNKDEBUG==1 - printf("waiting for next chunk header (soc %d)..\n",back[i].r.soc); + printf("[%d] waiting for current chunk CRLF..\n",(int)back[i].r.soc); #endif } + } else if (back[i].r.keep_alive) { + if (back[i].r.size==back[i].r.totalsize) { // fin! + retour_fread=-1; // end + } } } - - if (retour_fread < 0) { // erreur réception + + if (retour_fread < 0) { // fin réception back[i].status=0; // terminé if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(4): deletehttp\n"); #endif - deletehttp(&back[i].r); + /*KA deletehttp(&back[i].r);*/ + back_maydeletehttp(opt, back, back_max, i); } - back[i].r.soc=INVALID_SOCKET; + /*KA back[i].r.soc=INVALID_SOCKET; */ #if CHUNKDEBUG==1 if (back[i].is_chunk) - printf("must be the last chunk for %s (connection closed) - %d/%d\n",back[i].url_fil,back[i].r.size,back[i].r.totalsize); + printf("[%d] must be the last chunk for %s (connection closed) - %d/%d\n",(int)back[i].r.soc,back[i].url_fil,back[i].r.size,back[i].r.totalsize); #endif //if ((back[i].r.statuscode==-1) && (strnotempty(back[i].r.msg)==0)) { - if ((back[i].r.statuscode<0) && (strnotempty(back[i].r.msg)==0)) { + if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) { #if HDEBUG printf("error interruped: %s\n",back[i].r.adr); #endif if (back[i].r.size>0) - strcat(back[i].r.msg,"Interrupted transfer"); + strcatbuff(back[i].r.msg,"Interrupted transfer"); else - strcat(back[i].r.msg,"No data (connection closed)"); + strcatbuff(back[i].r.msg,"No data (connection closed)"); back[i].r.statuscode=-4; } @@ -1652,15 +1961,15 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].r.totalsize!=back[i].r.size) { // pas la même! if (!opt->tolerant) { //#if HTS_CL_IS_FATAL - if (back[i].r.adr) freet(back[i].r.adr); back[i].r.adr=NULL; + deleteaddr(&back[i].r); if (back[i].r.size<back[i].r.totalsize) back[i].r.statuscode=-4; // recatch - sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",back[i].r.size,back[i].r.totalsize); + sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",(LLint)back[i].r.size,(LLint)back[i].r.totalsize); } else { //#else // Un warning suffira.. if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } //#endif } @@ -1675,92 +1984,156 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } // Traitement des en têtes chunks ou en têtes - if (back[i].status==98) { // réception taille chunk en hexa ( après les en têtes, peut ne pas - if (back[i].chunk_size>=2) { + if (back[i].status==98 || back[i].status==97) { // réception taille chunk en hexa ( après les en têtes, peut ne pas + if (back[i].chunk_size > 0 && back[i].chunk_adr[back[i].chunk_size-1]==10) { int chunk_size=-1; - // être présent) - if (back[i].chunk_adr[back[i].chunk_size-1]==10) { // LF, fin ligne chunk - char chunk_data[64]; - if (back[i].chunk_size<32) { // pas trop gros - back[i].chunk_adr[ back[i].chunk_size-1]='\0'; // octet nul - strcpy(chunk_data,""); // hex number - strcat(chunk_data,back[i].chunk_adr); + char chunk_data[64]; + if (back[i].chunk_size<32) { // pas trop gros + char* chstrip=back[i].chunk_adr; + back[i].chunk_adr[ back[i].chunk_size-1]='\0'; // octet nul + // skip leading spaces or cr + while(isspace(*chstrip)) chstrip++; + chunk_data[0] = '\0'; + strncatbuff(chunk_data, chstrip, sizeof(chunk_data) - 2); + // strip chunk-extension + while( (chstrip = strchr(chunk_data, ';'))) *chstrip='\0'; + while( (chstrip = strchr(chunk_data, ' '))) *chstrip='\0'; + while( (chstrip = strchr(chunk_data, '\r'))) *chstrip='\0'; #if CHUNKDEBUG==1 - printf("chunk received and read: %s\n",chunk_data); + printf("[%d] chunk received and read: %s\n",(int)back[i].r.soc,chunk_data); #endif - if (back[i].r.totalsize<0) - back[i].r.totalsize=0; // initialiser à 0 + if (back[i].r.totalsize<0) + back[i].r.totalsize=0; // initialiser à 0 + if (back[i].status==98) { // "real" chunk if (sscanf(chunk_data,"%x",&chunk_size) == 1) { + if (chunk_size > 0) + back[i].chunk_blocksize = chunk_size; /* the data block chunk size */ + else + back[i].chunk_blocksize = -1; /* ending */ back[i].r.totalsize+=chunk_size; // noter taille back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1); if (!back[i].r.adr) { if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } } #if CHUNKDEBUG==1 - printf("chunk length: %d - next total "LLintP":\n",chunk_size,back[i].r.totalsize); + printf("[%d] chunk length: %d - next total "LLintP":\n",(int)back[i].r.soc,(int)chunk_size,(LLint)back[i].r.totalsize); #endif - } else + } else { if (cache->errlog!=NULL) { fprintf(cache->errlog,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); } - } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,back[i].chunk_size,back[i].url_adr,back[i].url_fil); + } + } else { /* back[i].status==97 : just receiving ending CRLF after data */ + if (chunk_data[0] == '\0') { + if (back[i].chunk_blocksize > 0) + chunk_size=(int)back[i].chunk_blocksize; /* recent data chunk size */ + else if (back[i].chunk_blocksize == -1) + chunk_size=0; /* ending chunk */ + else + chunk_size=1; /* fake positive size for 1st chunk history */ +#if CHUNKDEBUG==1 + printf("[%d] chunk CRLF seen\n", (int)back[i].r.soc); +#endif + } else { + if (cache->errlog!=NULL) { + fprintf(cache->errlog,"Warning: Illegal chunk CRLF (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); + } +#if CHUNKDEBUG==1 + printf("[%d] chunk CRLF ERROR!! : '%s'\n", (int)back[i].r.soc, chunk_data); +#endif } } + } else { + if (cache->errlog!=NULL) { + fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,(LLint)back[i].chunk_size,back[i].url_adr,back[i].url_fil); + } + } - // ok, continuer sur le body + // ok, continuer sur le body - // si chunk non nul continuer (ou commencer) - if (chunk_size>0) { - back[i].status=1; // continuer body + // si chunk non nul continuer (ou commencer) + if (back[i].status==97 && chunk_size > 0) { + back[i].status = 98; /* waiting for next chunk (NN\r\n<data>\r\nNN\r\n<data>..\r\n0\r\n\r\n) */ +#if CHUNKDEBUG==1 + printf("[%d] waiting for next chunk\n", (int)back[i].r.soc); +#endif + } else if (back[i].status==98 && chunk_size == 0) { /* final chunk */ + back[i].status=97; /* final CRLF */ +#if CHUNKDEBUG==1 + printf("[%d] waiting for final CRLF (chunk)\n", (int)back[i].r.soc); +#endif + } else if (back[i].status==98 && chunk_size >= 0) { /* will fetch data now */ + back[i].status=1; // continuer body #if CHUNKDEBUG==1 - printf("waiting for body (chunk)\n"); + printf("[%d] waiting for body (chunk)\n", (int)back[i].r.soc); #endif - } else { // chunk nul, c'est la fin + } else { /* zero-size-chunk-CRLF (end) or error */ #if CHUNKDEBUG==1 - printf("chunk end, total: %d\n",back[i].r.size); + printf("[%d] chunk end, total: %d\n",(int)back[i].r.soc,back[i].r.size); #endif - back[i].status=0; // fin - // finalize transfer - back_finalize(opt,cache,back,i); - if (back[i].r.soc!=INVALID_SOCKET) { + /* End */ + //if (back[i].status==97) { + back[i].status=0; // fin + //} + + // finalize transfer + back_finalize(opt,cache,back,i); + if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK - DEBUG_W("back_wait(5): deletehttp\n"); + DEBUG_W("back_wait(5): deletehttp\n"); #endif + /* Error */ + if (chunk_size < 0) { deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - + deleteaddr(&back[i].r); + back[i].r.statuscode=-1; + strcpybuff(back[i].r.msg,"Invalid chunk"); +#if CHUNKDEBUG==1 + printf("[%d] chunk error\n", (int)back[i].r.soc); +#endif + } else /* if chunk_size == 0 */ { +#if CHUNKDEBUG==1 + printf("[%d] all chunks now received\n", (int)back[i].r.soc); +#endif + /* Tester totalsize en fin de chunk */ if ((back[i].r.totalsize>0)) { // tester totalsize if (back[i].r.totalsize!=back[i].r.size) { // pas la même! #if HTS_CL_IS_FATAL - if (back[i].r.adr) { freet(back[i].r.adr); back[i].r.adr=NULL; } + deleteaddr(&back[i].r); back[i].r.statuscode=-1; - strcpy(back[i].r.msg,"Incorrect length"); + strcpybuff(back[i].r.msg,"Incorrect length"); #else // Un warning suffira.. if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } #endif } } - - + + /* Oops, trailers! */ + if (back[i].r.keep_alive_trailers) { + /* fixme (not yet supported) */ + } + } + + } - - // effacer buffer (chunk en tete) - if (back[i].chunk_adr!=NULL) { - freet(back[i].chunk_adr); - back[i].chunk_adr=NULL; - back[i].chunk_size=0; - } + } + + // effacer buffer (chunk en tete) + if (back[i].chunk_adr!=NULL) { + freet(back[i].chunk_adr); + back[i].chunk_adr=NULL; + back[i].chunk_size=0; + // NO! xxback[i].chunk_blocksize = 0; + } - } // chunk LF? - } // taille buffer chunk>2 + } // taille buffer chunk > 1 && LF // } else if (back[i].status==99) { // en têtes (avant le chunk si il est présent) // @@ -1779,15 +2152,34 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta printf("..ok, header received\n"); #endif + + // Callback +#if HTS_ANALYSTE + if (hts_htmlcheck_receivehead != NULL) { + int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); + if (test_head!=1) { + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + } + back[i].status=0; // FINI + deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + strcpybuff(back[i].r.msg,"External wrapper aborted transfer"); + back[i].r.statuscode = -1; + } + } +#endif + /* Hack for zero-length headers */ - if (back[i].r.adr[0] != '<') { + if (back[i].status != 0 && back[i].r.adr[0] != '<') { // ---------------------------------------- // traiter en-tête! // status-line à récupérer ptr+=binput(back[i].r.adr+ptr,rcvd,2000); - if (strnotempty(rcvd)==0) - ptr+=binput(back[i].r.adr+ptr,rcvd,2000); // "certains serveurs buggés envoient un \n au début" (RFC) + if (strnotempty(rcvd)==0) { + /* Bogus CRLF, OR recycled connection and trailing chunk CRLF */ + ptr+=binput(back[i].r.adr+ptr,rcvd,2000); + } // traiter status-line treatfirstline(&back[i].r,rcvd); @@ -1797,7 +2189,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #endif if (_DEBUG_HEAD) { if (ioinfo) { - fprintf(ioinfo,"response for %s%s:\r\ncode=%d\r\n",jump_identification(back[i].url_adr),back[i].url_fil,back[i].r.statuscode); + fprintf(ioinfo,"[%d] response for %s%s:\r\ncode=%d\r\n", + back[i].r.debugid, jump_identification(back[i].url_adr),back[i].url_fil,back[i].r.statuscode); fprintfio(ioinfo,back[i].r.adr,">>> "); fprintf(ioinfo,"\r\n"); fflush(ioinfo); @@ -1831,7 +2224,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta // ---------------------------------------- // libérer mémoire -- après! -- - if (back[i].r.adr!=NULL) { freet(back[i].r.adr); back[i].r.adr=NULL; } + deleteaddr(&back[i].r); } else { // assume text/html, OK treatfirstline(&back[i].r, back[i].r.adr); @@ -1870,10 +2263,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta // -> // Content-Range: bytes */2830 if (back[i].range_req_size == back[i].r.crange) { + filenote(back[i].url_sav,NULL); + //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].status=0; // READY back[i].r.size=back[i].r.totalsize=back[i].range_req_size; - filenote(back[i].url_sav,NULL); back[i].r.statuscode=304; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; @@ -1897,8 +2291,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta // If the size is the same, and the option has been set, we assume // that the file is identical - and therefore let's break the connection if (back[i].is_update) { // mise à jour - if (back[i].r.statuscode==200) { // 'OK' - htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL); // lire entrée cache + if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' + htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL,NULL); // lire entrée cache if (r.statuscode == 200) { // OK pas d'erreur cache LLint len1,len2; len1=r.totalsize; @@ -1930,7 +2324,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta // Detect already downloaded file (with another browser, for example) if (opt->sizehack) { if (!back[i].is_update) { // mise à jour - if (back[i].r.statuscode==200) { // 'OK' + if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML if (strnotempty(back[i].url_sav)) { // target found int size = fsize(back[i].url_sav); // target size @@ -1940,6 +2334,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].status=0; // READY back[i].r.size=back[i].r.totalsize; filenote(back[i].url_sav,NULL); + //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil); back[i].r.statuscode=304; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; @@ -1976,6 +2371,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].status=0; // READY back[i].r.size=back[i].r.totalsize; filenote(back[i].url_sav,NULL); + //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil); back[i].r.statuscode=304; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; @@ -2006,9 +2402,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].status=0; // FINI deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; if (!back[i].testmode) - strcpy(back[i].r.msg,"File too big"); + strcpybuff(back[i].r.msg,"File too big"); else - strcpy(back[i].r.msg,"Test: File too big"); + strcpybuff(back[i].r.msg,"Test: File too big"); } } @@ -2023,7 +2419,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta DEBUG_W("back_wait(head request): deletehttp\n"); #endif // Couper connexion - deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + if (!back[i].http11) { /* NO KA */ + deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + } back[i].status=0; // terminé } // traiter une éventuelle erreur 304 (cache à jour utilisable) @@ -2033,13 +2431,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(file is not modified): deletehttp\n"); #endif - deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].r=cache_read(opt,cache,back[i].url_adr,back[i].url_fil,back[i].url_sav); - if (!back[i].r.location) - back[i].r.location=back[i].location_buffer; - else { /* recopier */ - strcpy(back[i].location_buffer,back[i].r.location); + /* clear everything but connection: switch, close, and reswitch */ + { + htsblk tmp; + memset(&tmp, 0, sizeof(tmp)); + back_connxfr(&back[i].r, &tmp); + back[i].r=cache_read(opt,cache,back[i].url_adr,back[i].url_fil,back[i].url_sav,back[i].location_buffer); back[i].r.location=back[i].location_buffer; + back_connxfr(&tmp,&back[i].r); } // hack: @@ -2048,6 +2447,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (back[i].r.statuscode == -1) { if (fexist(back[i].url_sav)) { back[i].r.statuscode=200; // OK + strcpybuff(back[i].r.msg, "OK (cached)"); + back[i].r.is_file=1; + back[i].r.totalsize = back[i].r.size = fsize(back[i].url_sav); + get_httptype(back[i].r.contenttype, back[i].url_sav, 1); if ((opt->debug>0) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } @@ -2078,6 +2481,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } +/********** NO - must complete the body! ********** */ +#if 0 } else if ((back[i].r.statuscode==301) || (back[i].r.statuscode==302) || (back[i].r.statuscode==303) @@ -2089,20 +2494,21 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta DEBUG_W("back_wait(301,302,303,307,412,416..): deletehttp\n"); #endif // Couper connexion - deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + /*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/ + back_maydeletehttp(opt, back, back_max, i); + back[i].status=0; // terminé // finalize if (back[i].r.statuscode>0) { back_finalize(opt,cache,back,i); } +#endif +/********** **************************** ********** */ } else { // il faut aller le chercher // effacer buffer (requète) if (!noFreebuff) { - if (back[i].r.adr!=NULL) { - freet(back[i].r.adr); - back[i].r.adr=NULL; - } + deleteaddr(&back[i].r); back[i].r.size=0; } @@ -2130,7 +2536,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #endif } else { // On est dans la m** back[i].status=0; // terminé (voir plus loin) - strcpy(back[i].r.msg,"Can not open partial file"); + strcpybuff(back[i].r.msg,"Can not open partial file"); } } } else { // mémoire @@ -2139,13 +2545,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta LLint alloc_mem=sz + 1; if (back[i].r.totalsize>0) alloc_mem+=back[i].r.totalsize; // AJOUTER RESTANT! - if ( (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) { + if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) { back[i].r.size=sz; if (back[i].r.totalsize>0) back[i].r.totalsize+=sz; // plus en fait - if (((int) fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) { + if (( fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) { back[i].status=0; // terminé (voir plus loin) - strcpy(back[i].r.msg,"Can not read partial file"); + strcpybuff(back[i].r.msg,"Can not read partial file"); } else { back[i].r.statuscode=200; // Forcer 'OK' #if HDEBUG @@ -2154,17 +2560,17 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } } else { back[i].status=0; // terminé (voir plus loin) - strcpy(back[i].r.msg,"No memory for partial file"); + strcpybuff(back[i].r.msg,"No memory for partial file"); } fclose(fp); } else { // Argh.. back[i].status=0; // terminé (voir plus loin) - strcpy(back[i].r.msg,"Can not open partial file"); + strcpybuff(back[i].r.msg,"Can not open partial file"); } } } else { // Non trouvé?? back[i].status=0; // terminé (voir plus loin) - strcpy(back[i].r.msg,"Can not find partial file"); + strcpybuff(back[i].r.msg,"Can not find partial file"); } // Erreur? if (back[i].status==0) { @@ -2178,25 +2584,36 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta //back[i].r.statuscode=206; ???????? back[i].r.statuscode=-5; if (strnotempty(back[i].r.msg)) - strcpy(back[i].r.msg,"Error attempting to solve status 206 (partial file)"); + strcpybuff(back[i].r.msg,"Error attempting to solve status 206 (partial file)"); } } if (back[i].status!=0) { // non terminé (erreur) if (!back[i].testmode) { // fichier normal - if (!back[i].r.is_chunk) { // pas de chunk + if (back[i].r.empty && back[i].r.statuscode==200) { // empty response + // Couper connexion + deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; + back[i].status=0; // terminé + if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) { + back[i].r.adr[0] = 0; + } + back_finalize(opt,cache,back,i); + } + else if (!back[i].r.is_chunk) { // pas de chunk //if (back[i].r.http11!=2) { // pas de chunk back[i].is_chunk=0; back[i].status=1; // start body } else { #if CHUNKDEBUG==1 - printf("chunk encoding detected %s..\n",back[i].url_fil); + printf("[%d] chunk encoding detected %s..\n",(int)back[i].r.soc, back[i].url_fil); #endif back[i].is_chunk=1; back[i].chunk_adr=NULL; back[i].chunk_size=0; + back[i].chunk_blocksize=0; back[i].status=98; // start body wait chunk + back[i].r.totalsize=0; /* devalidate size! (rfc) */ } if (back[i].rateout>0) { back[i].rateout_time=time_local(); // refresh pour transfer rate @@ -2211,14 +2628,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta #endif deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; if (back[i].r.statuscode==200) { - strcpy(back[i].r.msg,"Test: OK"); + strcpybuff(back[i].r.msg,"Test: OK"); back[i].r.statuscode=-10; // test réussi } else { // test a échoué, on ne change rien sauf que l'erreur est à titre indicatif char tempo[1000]; - strcpy(tempo,back[i].r.msg); - strcpy(back[i].r.msg,"Test: "); - strcat(back[i].r.msg,tempo); + strcpybuff(tempo,back[i].r.msg); + strcpybuff(back[i].r.msg,"Test: "); + strcatbuff(back[i].r.msg,tempo); } } @@ -2254,9 +2671,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta if (opt->verbosedisplay==1) { if (back[i].status==0) { if (back[i].r.statuscode==200) - printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size); + printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size); else - printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size,back[i].r.statuscode); + printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size,back[i].r.statuscode); fflush(stdout); } } @@ -2284,11 +2701,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-2; if (back[i].status==100) - strcpy(back[i].r.msg,"Connect Time Out"); + strcpybuff(back[i].r.msg,"Connect Time Out"); else if (back[i].status==101) - strcpy(back[i].r.msg,"DNS Time Out"); + strcpybuff(back[i].r.msg,"DNS Time Out"); else - strcpy(back[i].r.msg,"Receive Time Out"); + strcpybuff(back[i].r.msg,"Receive Time Out"); back[i].status=0; // terminé } else if ((back[i].rateout>0) && (back[i].status<99)) { if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s @@ -2302,7 +2719,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta } back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-3; - strcpy(back[i].r.msg,"Transfer Rate Too Low"); + strcpybuff(back[i].r.msg,"Transfer Rate Too Low"); } } } @@ -2333,7 +2750,7 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { if (size_to_test>=0) { /* Interdiction taille par le wizard? */ - if (hts_testlinksize(opt,eback->url_adr,eback->url_fil,(eback->r.totalsize+1023)/1024)==-1) { + if (hts_testlinksize(opt,eback->url_adr,eback->url_fil,eback->r.totalsize/1024)==-1) { return 0; /* interdit */ } @@ -2345,13 +2762,31 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { return 1; } +int back_checkmirror(httrackp* opt) { + // Check max time + if ((opt->maxsite>0) && (HTS_STAT.stat_bytes >= opt->maxsite)) { + if (opt->errlog) { + fprintf(opt->errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,(LLint)opt->maxsite); + test_flush; + } + return 0; + } else if ((opt->maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt->maxtime)) { + if (opt->errlog) { + fprintf(opt->errlog,"More than %d seconds passed.. giving up"LF,opt->maxtime); + test_flush; + } + return 0; + } + return 1; /* Ok, go on */ +} + // octets transférés + add LLint back_transfered(LLint nb,lien_back* back,int back_max) { int i; // ajouter octets en instance for(i=0;i<back_max;i++) - if ((back[i].status>0) && (back[i].status<99)) + if ((back[i].status>0) && (back[i].status<99 || back[i].status>=1000)) nb+=back[i].r.size; return nb; } @@ -2360,10 +2795,10 @@ LLint back_transfered(LLint nb,lien_back* back,int back_max) { // j: 1 afficher sockets 2 afficher autres 3 tout afficher void back_info(lien_back* back,int i,int j,FILE* fp) { if (back[i].status>=0) { - char s[256]; + char s[HTS_URLMAXSIZE*2+1024]; s[0]='\0'; back_infostr(back,i,j,s); - strcat(s,LF); + strcatbuff(s,LF); fprintf(fp,"%s",s); } } @@ -2375,19 +2810,19 @@ void back_infostr(lien_back* back,int i,int j,char* s) { int aff=0; if (j & 1) { if (back[i].status==100) { - strcat(s,"CONNECT "); + strcatbuff(s,"CONNECT "); } else if (back[i].status==99) { - strcat(s,"INFOS "); + strcatbuff(s,"INFOS "); aff=1; - } else if (back[i].status==98) { - strcat(s,"INFOSC"); // infos chunk + } else if (back[i].status==98 || back[i].status==97) { + strcatbuff(s,"INFOSC"); // infos chunk aff=1; } else if (back[i].status>0) { #if HTS_ANALYSTE==2 - strcat(s,"WAIT "); + strcatbuff(s,"WAIT "); #else - strcat(s,"RECEIVE "); + strcatbuff(s,"RECEIVE "); #endif aff=1; } @@ -2396,47 +2831,47 @@ void back_infostr(lien_back* back,int i,int j,char* s) { if (back[i].status==0) { switch (back[i].r.statuscode) { case 200: - strcat(s,"READY "); + strcatbuff(s,"READY "); aff=1; break; #if HTS_ANALYSTE==2 default: - strcat(s,"ERROR "); + strcatbuff(s,"ERROR "); break; #else case -1: - strcat(s,"ERROR "); + strcatbuff(s,"ERROR "); aff=1; break; case -2: - strcat(s,"TIMEOUT "); + strcatbuff(s,"TIMEOUT "); aff=1; break; case -3: - strcat(s,"TOOSLOW "); + strcatbuff(s,"TOOSLOW "); aff=1; break; case 400: - strcat(s,"BADREQUEST "); + strcatbuff(s,"BADREQUEST "); aff=1; break; case 401: case 403: - strcat(s,"FORBIDDEN "); + strcatbuff(s,"FORBIDDEN "); aff=1; break; case 404: - strcat(s,"NOT FOUND "); + strcatbuff(s,"NOT FOUND "); aff=1; break; case 500: - strcat(s,"SERVERROR "); + strcatbuff(s,"SERVERROR "); aff=1; break; default: { char s2[256]; sprintf(s2,"ERROR(%d)",back[i].r.statuscode); - strcat(s,s2); + strcatbuff(s,s2); } aff=1; #endif @@ -2446,12 +2881,12 @@ void back_infostr(lien_back* back,int i,int j,char* s) { if (aff) { { - char s2[1024]; - sprintf(s2,"\"%s",back[i].url_adr); strcat(s,s2); + char s2[HTS_URLMAXSIZE*2+1024]; + sprintf(s2,"\"%s",back[i].url_adr); strcatbuff(s,s2); - if (back[i].url_fil[0]!='/') strcat(s,"/"); - sprintf(s2,"%s\" ",back[i].url_fil); strcat(s,s2); - sprintf(s,LLintP" "LLintP" ",back[i].r.size,back[i].r.totalsize); strcat(s,s2); + if (back[i].url_fil[0]!='/') strcatbuff(s,"/"); + sprintf(s2,"%s\" ",back[i].url_fil); strcatbuff(s,s2); + sprintf(s,LLintP" "LLintP" ",(LLint)back[i].r.size,(LLint)back[i].r.totalsize); strcatbuff(s,s2); } } } diff --git a/src/htsback.h b/src/htsback.h index af5fe6c..74fd540 100644 --- a/src/htsback.h +++ b/src/htsback.h @@ -50,11 +50,19 @@ int back_available(lien_back* back,int back_max); LLint back_incache(lien_back* back,int back_max); HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav); int back_nsoc(lien_back* back,int back_max); -int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr); +int back_nsoc_overall(lien_back* back,int back_max); +int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr); int back_stack_available(lien_back* back,int back_max); +int back_search(httrackp* opt, lien_back* back, int back_max); void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max); void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart); -int back_delete(lien_back* back,int p); +int back_letlive(httrackp* opt, lien_back* back, int p); +int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr); +void back_connxfr(htsblk* src, htsblk* dst); +int back_delete(httrackp* opt,lien_back* back,int p); +int back_maydelete(httrackp* opt, lien_back* back, int p); +void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p); +int back_trylive(httrackp* opt,lien_back* back, int back_max, int p); int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p); void back_info(lien_back* back,int i,int j,FILE* fp); void back_infostr(lien_back* back,int i,int j,char* s); @@ -65,6 +73,7 @@ void back_solve(lien_back* back); int host_wait(lien_back* back); #endif int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize); +int back_checkmirror(httrackp* opt); #if HTS_XGETHOST #if USE_BEGINTHREAD diff --git a/src/htsbase.h b/src/htsbase.h index 3e83471..139e3ed 100644 --- a/src/htsbase.h +++ b/src/htsbase.h @@ -38,20 +38,43 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_BASICH #define HTS_BASICH +#ifdef __cplusplus +extern "C" { +#endif + #include "htsglobal.h" // size_t et mode_t #include <stdio.h> -#if HTS_WIN -#else -#include <fcntl.h> +#include <stdlib.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_DLFCN_H +#include <dlfcn.h> +#endif + +#ifndef _WIN32 +#include <errno.h> #endif #if HTS_WIN #else - #define min(a,b) ((a)>(b)?(b):(a)) - #define max(a,b) ((a)>(b)?(a):(b)) +#include <fcntl.h> #endif +#include <assert.h> + +#undef min +#undef max +#define min(a,b) ((a)>(b)?(b):(a)) +#define max(a,b) ((a)>(b)?(a):(b)) // teste égalité de 2 chars, case insensitive #define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a)) @@ -63,6 +86,7 @@ Please visit our Website: http://www.httrack.com ( (strfield2((a),"text/html")!=0)\ || (strfield2((a),"application/x-javascript")!=0) \ || (strfield2((a),"text/css")!=0) \ + /*|| (strfield2((a),"text/vnd.wap.wml")!=0)*/ \ || (strfield2((a),"image/svg+xml")!=0) \ || (strfield2((a),"image/svg-xml")!=0) \ /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\ @@ -77,60 +101,287 @@ Please visit our Website: http://www.httrack.com // caractère maj #define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') ) -// conversion éventuelle / vers antislash -#if HTS_WIN -char* antislash(char* s); +// functions +#ifdef _WIN32 +#define DynamicGet(handle, sym) GetProcAddress(handle, sym) #else -#define antislash(A) (A) +#define DynamicGet(handle, sym) dlsym(handle, sym) #endif +// emergency log +typedef void (*t_abortLog)(char* msg, char* file, int line); +extern HTSEXT_API t_abortLog abortLog__; +#define abortLog(a) abortLog__(a, __FILE__, __LINE__) +#define abortLogFmt(a) do { \ + FILE* fp = fopen("CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \ + if (fp) { \ + fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \ + fprintf(fp, "Reason:\r\n"); \ + fprintf(fp, a); \ + fprintf(fp, "\r\n"); \ + fflush(fp); \ + fclose(fp); \ + } \ +} while(0) -// functions -#if HTS_PLATFORM!=3 -#ifdef __cplusplus -extern "C" { -#endif -#if HTS_PLATFORM!=2 -#if HTS_PLATFORM!=1 - int open (const char *, int, ...); -#endif - //int read (int,const char*,int); - //int write (int,char*,int); -#endif -#if HTS_PLATFORM!=1 - int close (int); - void* calloc (size_t,size_t); - void* malloc (size_t); - void* realloc (void*,size_t); - void free (void*); -#endif -#if HTS_WIN -#else - int mkdir (const char*,mode_t); -#endif -#ifdef __cplusplus -} -#endif -#endif +#define _ , +#define abortLogFmt(a) do { \ + FILE* fp = fopen("CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \ + if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \ + if (fp) { \ + fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \ + fprintf(fp, "Reason:\r\n"); \ + fprintf(fp, a); \ + fprintf(fp, "\r\n"); \ + fflush(fp); \ + fclose(fp); \ + } \ +} while(0) +#define assertf(exp) do { \ + if (! ( exp ) ) { \ + abortLog("assert failed: " #exp); \ + if (htsCallbackErr != NULL) { \ + htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \ + } \ + assert(exp); \ + abort(); \ + } \ +} while(0) +/* non-fatal assert */ +#define assertnf(exp) do { \ + if (! ( exp ) ) { \ + abortLog("assert failed: " #exp); \ + if (htsCallbackErr != NULL) { \ + htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \ + } \ + } \ +} while(0) -// tracer malloc() -#if HTS_TRACE_MALLOC -#define malloct(A) hts_malloc(A,0) -#define calloct(A,B) hts_malloc(A,B) -#define freet(A) hts_free(A) + +/* regular malloc's() */ +#ifndef HTS_TRACE_MALLOC +#define malloct(A) malloc(A) +#define calloct(A,B) calloc((A), (B)) +#define freet(A) do { assertnf((A) != NULL); if ((A) != NULL) { free(A); (A) = NULL; } } while(0) +#define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) ) +#define memcpybuff(A, B, N) memcpy((A), (B), (N)) +#else +/* debug version */ +#define malloct(A) hts_malloc(A) +#define calloct(A,B) hts_calloc(A,B) +#define freet(A) do { hts_free(A); (A) = NULL; } while(0) #define realloct(A,B) hts_realloc(A,B) void hts_freeall(); -void* hts_malloc (size_t,size_t); +void* hts_malloc (size_t); +void* hts_calloc(size_t,size_t); +void* hts_xmalloc(size_t,size_t); void hts_free (void*); void* hts_realloc (void*,size_t); +mlink* hts_find(char* adr); +/* protected memcpy */ +#define memcpybuff(A, B, N) do { \ + mlink* lnk = hts_find((void*)(A)); \ + if (lnk != NULL) { \ + assertf(lnk != NULL); \ + assertf( * ( (t_htsboundary*) ( ((char*) lnk->adr) - sizeof(htsboundary) ) ) == htsboundary ); \ + assertf( * ( (t_htsboundary*) ( ((char*) lnk->adr) + lnk->len ) ) == htsboundary ); \ + assertf( ( ((char*)(A)) + (N)) < (char*) (lnk->adr + lnk->len) ); \ + } \ + memcpy(A, B, N); \ +} while(0) + +#endif + +typedef void (* htsErrorCallback)(char* msg, char* file, int line); +extern HTSEXT_API htsErrorCallback htsCallbackErr; +extern HTSEXT_API int htsMemoryFastXfr; + +/* +*/ + + +#ifdef STRDEBUG + +/* protected strcat, strncat and strcpy - definitely useful */ +#define strcatbuff(A, B) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strcat(A, B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int sz = (unsigned int) strlen(A); \ + unsigned int szf = (unsigned int) strlen(B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(sz + szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (sz + szf + 1 < sizeof(A)) { \ + memcpy((A) + sz, (B), szf + 1); \ + } \ + } \ + } else if (szf > 0) { \ + memcpybuff((A) + sz, (B), szf + 1); \ + } \ + } \ +} while(0) +#define strncatbuff(A, B, N) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strncat(A, B, N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int sz = (unsigned int) strlen(A); \ + unsigned int szf = (unsigned int) strlen(B); \ + if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(sz + szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (sz + szf + 1 < sizeof(A)) { \ + memcpy((A) + sz, (B), szf); \ + * ( (A) + sz + szf) = '\0'; \ + } \ + } \ + } else if (szf > 0) { \ + memcpybuff((A) + sz, (B), szf); \ + * ( (A) + sz + szf) = '\0'; \ + } \ + } \ +} while(0) +#define strcpybuff(A, B) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strcpy(A, B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int szf = (unsigned int) strlen(B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (szf + 1 < sizeof(A)) { \ + memcpy((A), (B), szf + 1); \ + } else { \ + * (A) = '\0'; \ + } \ + } else { \ + * (A) = '\0'; \ + } \ + } else { \ + memcpybuff((A), (B), szf + 1); \ + } \ + } \ +} while(0) +#define strncpybuff(A, B, N) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (htsMemoryFastXfr) { \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strncpy(A, B, N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ + } else { \ + unsigned int szf = (unsigned int) strlen(B); \ + if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf(szf + 1 < sizeof(A)); \ + if (szf > 0) { \ + if (szf + 1 < sizeof(A)) { \ + memcpy((A), (B), szf); \ + } \ + } \ + } else { \ + memcpybuff((A), (B), szf); \ + } \ + } \ +} while(0) + +#else + +#ifdef STRDEBUGFAST + +/* protected strcat, strncat and strcpy - definitely useful */ +#define strcatbuff(A, B) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strcat(A, B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ +} while(0) +#define strncatbuff(A, B, N) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strncat(A, B, N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ +} while(0) +#define strcpybuff(A, B) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strcpy(A, B); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ +} while(0) +#define strncpybuff(A, B, N) do { \ + assertf( (A) != NULL ); \ + if ( ! (B) ) { assertf( 0 ); } \ + if (sizeof(A) != sizeof(char*)) { \ + (A)[sizeof(A) - 1] = '\0'; \ + } \ + strncpy(A, B, N); \ + if (sizeof(A) != sizeof(char*)) { \ + assertf((A)[sizeof(A) - 1] == '\0'); \ + } \ +} while(0) + #else -#define malloct(A) malloc(A) -#define calloct(A,B) calloc(A,B) -#define freet(A) free(A) -#define realloct(A,B) realloc(A,B) + +#define strcatbuff strcat +#define strncatbuff strncat +#define strcpybuff strcpy +#define strncpybuff strncpy + +#endif + #endif +#ifdef __cplusplus + }; #endif +#endif diff --git a/src/htsbasenet.h b/src/htsbasenet.h index d63a2e7..71ac9c9 100644 --- a/src/htsbasenet.h +++ b/src/htsbasenet.h @@ -70,9 +70,89 @@ Please visit our Website: http://www.httrack.com #ifdef __cplusplus extern "C" { #endif + +/* #include <openssl/ssl.h> #include <openssl/crypto.h> #include <openssl/err.h> +*/ + +/* OpenSSL definitions */ +#define SSL_shutdown hts_ptrfunc_SSL_shutdown +#define SSL_free hts_ptrfunc_SSL_free +#define SSL_new hts_ptrfunc_SSL_new +#define SSL_clear hts_ptrfunc_SSL_clear +#define SSL_set_fd hts_ptrfunc_SSL_set_fd +#define SSL_set_connect_state hts_ptrfunc_SSL_set_connect_state +#define SSL_connect hts_ptrfunc_SSL_connect +#define SSL_get_error hts_ptrfunc_SSL_get_error +#define SSL_write hts_ptrfunc_SSL_write +#define SSL_read hts_ptrfunc_SSL_read +#define SSL_library_init hts_ptrfunc_SSL_library_init +#define ERR_load_crypto_strings hts_ptrfunc_ERR_load_crypto_strings +#define ERR_load_SSL_strings hts_ptrfunc_ERR_load_SSL_strings +#define SSLv23_client_method hts_ptrfunc_SSLv23_client_method +#define SSL_CTX_new hts_ptrfunc_SSL_CTX_new +#define ERR_error_string hts_ptrfunc_ERR_error_string +#define SSL_load_error_strings hts_ptrfunc_SSL_load_error_strings +#define SSL_CTX_ctrl hts_ptrfunc_SSL_CTX_ctrl +/* */ +typedef void SSL_CTX; +typedef void* SSL; +typedef void SSL_METHOD; +typedef int (*t_SSL_shutdown)(SSL *ssl); +typedef void (*t_SSL_free)(SSL *ssl); +typedef SSL (*t_SSL_new)(SSL_CTX *ctx); +typedef int (*t_SSL_clear)(SSL *ssl); +typedef int (*t_SSL_set_fd)(SSL *ssl, int fd); +typedef void (*t_SSL_set_connect_state)(SSL *ssl); +typedef int (*t_SSL_connect)(SSL *ssl); +typedef int (*t_SSL_get_error)(SSL *ssl, int ret); +typedef int (*t_SSL_write)(SSL *ssl, const void *buf, int num); +typedef int (*t_SSL_read)(SSL *ssl, void *buf, int num); +typedef int (*t_SSL_library_init)(void); +typedef void (*t_ERR_load_crypto_strings)(void); +typedef void (*t_ERR_load_SSL_strings)(void); +typedef SSL_METHOD * (*t_SSLv23_client_method)(void); +typedef SSL_CTX * (*t_SSL_CTX_new)(SSL_METHOD *method); +typedef char * (*t_ERR_error_string)(unsigned long e, char *buf); +typedef void (*t_SSL_load_error_strings)(void); +typedef long (*t_SSL_CTX_ctrl)(SSL_CTX *ctx, int cmd, long larg, char *parg); +extern int SSL_is_available; +extern t_SSL_shutdown SSL_shutdown; +extern t_SSL_free SSL_free; +extern t_SSL_new SSL_new; +extern t_SSL_clear SSL_clear; +extern t_SSL_set_fd SSL_set_fd; +extern t_SSL_set_connect_state SSL_set_connect_state; +extern t_SSL_connect SSL_connect; +extern t_SSL_get_error SSL_get_error; +extern t_SSL_write SSL_write; +extern t_SSL_read SSL_read; +extern t_SSL_library_init SSL_library_init; +extern t_ERR_load_crypto_strings ERR_load_crypto_strings; +extern t_ERR_load_SSL_strings ERR_load_SSL_strings; +extern t_SSLv23_client_method SSLv23_client_method; +extern t_SSL_CTX_new SSL_CTX_new; +extern t_ERR_error_string ERR_error_string; +extern t_SSL_load_error_strings SSL_load_error_strings; +extern t_SSL_CTX_ctrl SSL_CTX_ctrl; +/* +From /usr/include/openssl/ssl.h +*/ +#define SSL_ERROR_NONE 0 +#define SSL_ERROR_SSL 1 +#define SSL_ERROR_WANT_READ 2 +#define SSL_ERROR_WANT_WRITE 3 +#define SSL_ERROR_WANT_X509_LOOKUP 4 +#define SSL_ERROR_SYSCALL 5 /* look at error stack/return value/errno */ +#define SSL_ERROR_ZERO_RETURN 6 +#define SSL_ERROR_WANT_CONNECT 7 +#define SSL_OP_ALL 0x000FFFFFL +#define SSL_CTRL_OPTIONS 32 +#define SSL_CTX_set_options(ctx,op) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_OPTIONS,op,NULL) + //#include <openssl/bio.h> #ifdef __cplusplus }; diff --git a/src/htsbauth.c b/src/htsbauth.c index a1506c1..23a22af 100644 --- a/src/htsbauth.c +++ b/src/htsbauth.c @@ -86,20 +86,20 @@ int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,ch } } // construction du cookie - strcpy(cook,domain); - strcat(cook,"\t"); - strcat(cook,"TRUE"); - strcat(cook,"\t"); - strcat(cook,path); - strcat(cook,"\t"); - strcat(cook,"FALSE"); - strcat(cook,"\t"); - strcat(cook,"1999999999"); - strcat(cook,"\t"); - strcat(cook,cook_name); - strcat(cook,"\t"); - strcat(cook,cook_value); - strcat(cook,"\n"); + strcpybuff(cook,domain); + strcatbuff(cook,"\t"); + strcatbuff(cook,"TRUE"); + strcatbuff(cook,"\t"); + strcatbuff(cook,path); + strcatbuff(cook,"\t"); + strcatbuff(cook,"FALSE"); + strcatbuff(cook,"\t"); + strcatbuff(cook,"1999999999"); + strcatbuff(cook,"\t"); + strcatbuff(cook,cook_name); + strcatbuff(cook,"\t"); + strcatbuff(cook,cook_value); + strcatbuff(cook,"\n"); if (!( ((int) strlen(cookie->data) + (int) strlen(cook)) < cookie->max_len)) return -1; // impossible d'ajouter cookie_insert(insert,cook); #if DEBUG_COOK @@ -179,8 +179,8 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { WIN32_FIND_DATA find; HANDLE h; char pth[MAX_PATH + 32]; - strcpy(pth,fpath); - strcat(pth,"*@*.txt"); + strcpybuff(pth,fpath); + strcatbuff(pth,"*@*.txt"); h = FindFirstFile(pth,&find); if (h != INVALID_HANDLE_VALUE) { do { @@ -233,10 +233,10 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { char path[256]; // chemin (/) char cook_name[256]; // nom cookie (MYCOOK) char cook_value[8192]; // valeur (ID=toto,S=1234) - strcpy(domain,cookie_get(line,0)); // host - strcpy(path,cookie_get(line,2)); // path - strcpy(cook_name,cookie_get(line,5)); // name - strcpy(cook_value,cookie_get(line,6)); // value + strcpybuff(domain,cookie_get(line,0)); // host + strcpybuff(path,cookie_get(line,2)); // path + strcpybuff(cook_name,cookie_get(line,5)); // name + strcpybuff(cook_value,cookie_get(line,6)); // value #if DEBUG_COOK printf("%s\n",line); #endif @@ -277,14 +277,14 @@ int cookie_save(t_cookie* cookie,char* name) { void cookie_insert(char* s,char* ins) { char* buff; if (strnotempty(s)==0) { // rien à faire, juste concat - strcat(s,ins); + strcatbuff(s,ins); } else { - buff=(char*) malloc(strlen(s)+2); + buff=(char*) malloct(strlen(s)+2); if (buff) { - strcpy(buff,s); // copie temporaire - strcpy(s,ins); // insérer - strcat(s,buff); // copier - free(buff); + strcpybuff(buff,s); // copie temporaire + strcpybuff(s,ins); // insérer + strcatbuff(s,buff); // copier + freet(buff); } } } @@ -294,11 +294,11 @@ void cookie_delete(char* s,int pos) { if (strnotempty(s+pos)==0) { // rien à faire, effacer s[0]='\0'; } else { - buff=(char*) malloc(strlen(s+pos)+2); + buff=(char*) malloct(strlen(s+pos)+2); if (buff) { - strcpy(buff,s+pos); // copie temporaire - strcpy(s,buff); // copier - free(buff); + strcpybuff(buff,s+pos); // copie temporaire + strcpybuff(s,buff); // copier + freet(buff); } } } @@ -329,7 +329,7 @@ char* cookie_get(char* cookie_base,int param) { char* a = cookie_base; while( (*a) && (*a!='\t') && (*a!='\n')) a++; buffer[0]='\0'; - strncat(buffer,cookie_base,(int) (a - cookie_base)); + strncatbuff(buffer,cookie_base,(int) (a - cookie_base)); return buffer; } else return ""; @@ -357,8 +357,8 @@ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) { if (chain->next) { chain=chain->next; chain->next=NULL; - strcpy(chain->auth,auth); - strcpy(chain->prefix,prefix); + strcpybuff(chain->auth,auth); + strcpybuff(chain->prefix,prefix); return 1; } } @@ -388,8 +388,8 @@ char* bauth_prefix(char* adr,char* fil) { char* prefix; char* a; NOSTATIC_RESERVE(prefix, char, HTS_URLMAXSIZE*2); - strcpy(prefix,jump_identification(adr)); - strcat(prefix,fil); + strcpybuff(prefix,jump_identification(adr)); + strcatbuff(prefix,fil); a=strchr(prefix,'?'); if (a) *a='\0'; if (strchr(prefix,'/')) { diff --git a/src/htscache.c b/src/htscache.c index da8791e..b90fa67 100644 --- a/src/htscache.c +++ b/src/htscache.c @@ -91,8 +91,14 @@ with <int>(size) <string>(msg) <string>(contenttype) + <string>(charset) [version 3] <string>(last-modified) <string>(Etag) + <string>location + <string>Content-disposition [version 2] + <string>hostname [version 4] + <string>URI filename [version 4] + <string>local filename [version 4] [<string>"SD" <string>(supplemental data)] [<string>"SD" <string>(supplemental data)] ... @@ -191,16 +197,22 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n // Construction header ok=0; - if (cache_wint(cache_dat,r.statuscode)!=-1) // statuscode - if (cache_wLLint(cache_dat,r.size)!=-1) // size - if (cache_wstr(cache_dat,r.msg)!=-1) // msg - if (cache_wstr(cache_dat,r.contenttype)!=-1) // contenttype - if (cache_wstr(cache_dat,r.lastmodified)!=-1) // last-modified - if (cache_wstr(cache_dat,r.etag)!=-1) // Etag - if (cache_wstr(cache_dat,(r.location!=NULL)?r.location:"")!=-1) // 'location' pour moved - if (cache_wstr(cache_dat,r.cdispo)!=-1) // Content-disposition - if (cache_wstr(cache_dat,"HTS")!=-1) // end of header + if (cache_wint(cache_dat,r.statuscode) != -1 // statuscode + && cache_wLLint(cache_dat,r.size) != -1 // size + && cache_wstr(cache_dat,r.msg) != -1 // msg + && cache_wstr(cache_dat,r.contenttype) != -1 // contenttype + && cache_wstr(cache_dat,r.charset) != -1 // contenttype + && cache_wstr(cache_dat,r.lastmodified) != -1 // last-modified + && cache_wstr(cache_dat,r.etag) != -1 // Etag + && cache_wstr(cache_dat,(r.location!=NULL)?r.location:"") != -1 // 'location' pour moved + && cache_wstr(cache_dat,r.cdispo) != -1 // Content-disposition + && cache_wstr(cache_dat,url_adr) != -1 // Original address + && cache_wstr(cache_dat,url_fil) != -1 // Original URI filename + && cache_wstr(cache_dat,url_save) != -1 // Original save filename + && cache_wstr(cache_dat,"HTS") != -1 // end of header + ) { ok=1; /* ok */ + } // Fin construction header /*if ((int) fwrite((char*) &r,1,sizeof(htsblk),cache_dat) == sizeof(htsblk)) {*/ @@ -212,7 +224,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n } else if (r.is_write==0) { // en mémoire, recopie directe if (cache_wLLint(cache_dat,r.size)!=-1) { if (r.size>0) { // taille>0 - if ((INTsys) fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size) + if (fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size) ok=0; } else // taille=0, ne rien écrire ok=0; @@ -227,11 +239,11 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n fp=fopen(fconv(url_save),"rb"); if (fp!=NULL) { char buff[32768]; - int nl; + INTsys nl; do { nl=fread(buff,1,32768,fp); if (nl>0) { - if ((INTsys) fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur + if ((INTsys)fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur nl=-1; ok=0; } @@ -254,7 +266,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n // index // adresse+cr+fichier+cr if (ok) { - buff[0]='\0'; strcat(buff,url_adr); strcat(buff,"\n"); strcat(buff,url_fil); strcat(buff,"\n"); + buff[0]='\0'; strcatbuff(buff,url_adr); strcatbuff(buff,"\n"); strcatbuff(buff,url_fil); strcatbuff(buff,"\n"); cache_wstr(cache_ndx,buff); fwrite(s,1,strlen(s),cache_ndx); } // si ok=0 on a peut être écrit des données pour rien mais on s'en tape @@ -264,9 +276,18 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n } +htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { + return cache_readex(opt,cache,adr,fil,save,location,NULL,0); +} + +htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { + return cache_readex(opt,cache,adr,fil,save,location,NULL,1); +} + // lecture d'un fichier dans le cache // si save==null alors test unqiquement -htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save) { +htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, + char* return_save, int readonly) { #if HTS_FAST_CACHE long int hash_pos; int hash_pos_return; @@ -274,30 +295,46 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save char* a; #endif char buff[HTS_URLMAXSIZE*2]; - char location[HTS_URLMAXSIZE*2]; + char location_default[HTS_URLMAXSIZE*2]; + char previous_save[HTS_URLMAXSIZE*2]; htsblk r; int ok=0; int header_only=0; - memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; strcpy(location,""); r.location=location; + memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; + if (location) { + r.location = location; + } else { + r.location = location_default; + } + strcpybuff(r.location, ""); #if HTS_FAST_CACHE - strcpy(buff,adr); strcat(buff,fil); + strcpybuff(buff,adr); strcatbuff(buff,fil); hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); #else - buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n"); + buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n"); if (cache->use) a=strstr(cache->use,buff); else a=NULL; // forcer erreur #endif + /* avoid errors on data entries */ + if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') { +#if HTS_FAST_CACHE + hash_pos_return = 0; +#else + a = NULL; +#endif + } + // en cas de succès #if HTS_FAST_CACHE if (hash_pos_return) { #else if (a!=NULL) { // OK existe en cache! #endif - int pos; + INTsys pos; #if DEBUGCA fprintf(stdout,"..cache: %s%s at ",adr,fil); #endif @@ -320,8 +357,8 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save if (fread((char*) &old_r,1,sizeof(old_r),cache->olddat)==sizeof(old_r)) { // lire tout (y compris statuscode etc) r.statuscode=old_r.statuscode; r.size=old_r.size; // taille fichier - strcpy(r.msg,old_r.msg); - strcpy(r.contenttype,old_r.contenttype); + strcpybuff(r.msg,old_r.msg); + strcpybuff(r.contenttype,old_r.contenttype); ok=1; /* import ok */ } /* */ @@ -335,11 +372,22 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save cache_rLLint(cache->olddat,&r.size); cache_rstr(cache->olddat,r.msg); cache_rstr(cache->olddat,r.contenttype); + if (cache->version >= 3) + cache_rstr(cache->olddat,r.charset); cache_rstr(cache->olddat,r.lastmodified); cache_rstr(cache->olddat,r.etag); cache_rstr(cache->olddat,r.location); if (cache->version >= 2) cache_rstr(cache->olddat,r.cdispo); + if (cache->version >= 4) { + cache_rstr(cache->olddat, previous_save); // adr + cache_rstr(cache->olddat, previous_save); // fil + previous_save[0] = '\0'; + cache_rstr(cache->olddat, previous_save); // save + if (return_save != NULL) { + strcpybuff(return_save, previous_save); + } + } // cache_rstr(cache->olddat,check); if (strcmp(check,"HTS")==0) { /* intégrité OK */ @@ -377,14 +425,15 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save #if HTS_DIRECTDISK // Court-circuit: // Peut-on stocker le fichier directement sur disque? - if ((r.statuscode==200) && (!is_hypertext_mime(r.contenttype)) && (strnotempty(save))) { // pas HTML, écrire sur disk directement + if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype) && strnotempty(save)) { // pas HTML, écrire sur disk directement int ok=0; r.is_write=1; // écrire - if (fexist(antislash(save))) { // un fichier existe déja - //if (fsize(antislash(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) + if (fexist(fconv(save))) { // un fichier existe déja + //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) ok=1; // plus rien à faire filenote(save,NULL); // noter comme connu + //xxusercommand(opt,0,NULL,save,adr,fil); //} } @@ -393,11 +442,11 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save filecreateempty(save); // r.statuscode=-1; - strcpy(r.msg,"File deleted by user not recaught"); + strcpybuff(r.msg,"File deleted by user not recaught"); ok=1; // ne pas récupérer (et pas d'erreur) } else { r.statuscode=-1; - strcpy(r.msg,"Previous cache file not found"); + strcpybuff(r.msg,"Previous cache file not found"); ok=1; // ne pas récupérer } } @@ -409,29 +458,30 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save #endif if (r.out!=NULL) { char buff[32768+4]; - LLint nl; - LLint size; - size=r.size; - do { - nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat); - if (nl>0) { - size-=nl; - if ((INTsys) fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur - r.statuscode=-1; - strcpy(r.msg,"Cache Read Error : Read To Disk"); + LLint size = r.size; + if (size > 0) { + INTsys nl; + do { + nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat); + if (nl>0) { + size-=nl; + if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur + r.statuscode=-1; + strcpybuff(r.msg,"Cache Read Error : Read To Disk"); + } } - } - } while((nl>0) && (size>0) && (r.statuscode!=-1)); + } while((nl>0) && (size>0) && (r.statuscode!=-1)); + } fclose(r.out); r.out=NULL; #if HTS_WIN==0 chmod(save,HTS_ACCESS_FILE); #endif - usercommand(0,NULL,antislash(save)); + //xxusercommand(opt,0,NULL,fconv(save), adr, fil); } else { r.statuscode=-1; - strcpy(r.msg,"Cache Write Error : Unable to Create File"); + strcpybuff(r.msg,"Cache Write Error : Unable to Create File"); //printf("%s\n",save); } } @@ -440,24 +490,46 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save #endif { // lire en mémoire - if (pos<0) { // Pas de donnée en cache, bizarre car html!!! - r.statuscode=-1; - strcpy(r.msg,"Previous cache file not found (2)"); + if (pos<0) { + if (strnotempty(save)) { // Pas de donnée en cache, bizarre car html!!! + r.statuscode=-1; + strcpybuff(r.msg,"Previous cache file not found (2)"); + } else { /* Read in memory from cache */ + if (strnotempty(return_save) && fexist(return_save)) { + FILE* fp = fopen(fconv(return_save), "rb"); + if (fp != NULL) { + r.adr=(char*) malloct((INTsys)r.size + 4); + if (adr != NULL) { + if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) { + r.statuscode=-1; + strcpybuff(r.msg,"Read error in cache disk data"); + } + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Read error (memory exhausted) from cache"); + } + fclose(fp); + } + } else { + r.statuscode=-1; + strcpybuff(r.msg,"Cache file not found on disk"); + } + } } else { // lire fichier (d'un coup) r.adr=(char*) malloct((INTsys)r.size+4); if (r.adr!=NULL) { - if ((INTsys) fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur + if (fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur freet(r.adr); r.adr=NULL; r.statuscode=-1; - strcpy(r.msg,"Cache Read Error : Read Data"); + strcpybuff(r.msg,"Cache Read Error : Read Data"); } else *(r.adr+r.size)='\0'; //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode); } else { // erreur r.statuscode=-1; - strcpy(r.msg,"Cache Memory Error"); + strcpybuff(r.msg,"Cache Memory Error"); } } } @@ -467,28 +539,31 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save printf("Cache Read Error : Bad Data"); #endif r.statuscode=-1; - strcpy(r.msg,"Cache Read Error : Bad Data"); + strcpybuff(r.msg,"Cache Read Error : Bad Data"); } } else { // erreur #if DEBUGCA printf("Cache Read Error : Read Header"); #endif r.statuscode=-1; - strcpy(r.msg,"Cache Read Error : Read Header"); + strcpybuff(r.msg,"Cache Read Error : Read Header"); } } else { #if DEBUGCA printf("Cache Read Error : Seek Failed"); #endif r.statuscode=-1; - strcpy(r.msg,"Cache Read Error : Seek Failed"); + strcpybuff(r.msg,"Cache Read Error : Seek Failed"); } } else { #if DEBUGCA printf("File Cache Not Found"); #endif r.statuscode=-1; - strcpy(r.msg,"File Cache Not Found"); + strcpybuff(r.msg,"File Cache Entry Not Found"); + } + if (!location) { /* don't export internal buffer */ + r.location = NULL; } return r; } @@ -504,12 +579,12 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* pos=ftell(cache_dat); /* first write data */ if (cache_wint(cache_dat,len)!=-1) { // length - if ((INTsys) fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data + if ((INTsys)fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data /* then write index */ sprintf(s,"%d\n",pos); - buff[0]='\0'; strcat(buff,str1); strcat(buff,"\n"); strcat(buff,str2); strcat(buff,"\n"); + buff[0]='\0'; strcatbuff(buff,str1); strcatbuff(buff,"\n"); strcatbuff(buff,str2); strcatbuff(buff,"\n"); cache_wstr(cache_ndx,buff); - if (fwrite(s,1,strlen(s),cache_ndx) == strlen(s)) { + if (fwrite(s,1,(INTsys)strlen(s),cache_ndx) == strlen(s)) { fflush(cache_dat); fflush(cache_ndx); return 1; } @@ -526,15 +601,15 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* in if (cache->hashtable) { char buff[HTS_URLMAXSIZE*4]; long int pos; - strcpy(buff,str1); strcat(buff,str2); + strcpybuff(buff,str1); strcatbuff(buff,str2); if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) { if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) { - int len; + INTsys len; cache_rint(cache->olddat,&len); if (len>0) { char* mem_buff=(char*)malloct(len+4); /* Plus byte 0 */ if (mem_buff) { - if ((int)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/ + if ((INTsys)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/ *inbuff=mem_buff; *inlen=len; return 1; @@ -552,17 +627,16 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* in } // renvoyer uniquement en tête, ou NULL si erreur -htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil) { - htsblk* r; - NOSTATIC_RESERVE(r, htsblk, 1); - *r=cache_read(opt,cache,adr,fil,NULL); // test uniquement +// return NULL upon error, and set -1 to r.statuscode +htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r) { + *r=cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement if (r->statuscode != -1) return r; else return NULL; } - + // Initialisation du cache: créer nouveau, renomer ancien, charger.. void cache_init(cache_back* cache,httrackp* opt) { // --- @@ -571,80 +645,91 @@ void cache_init(cache_back* cache,httrackp* opt) { #if DEBUGCA printf("cache init: "); #endif + if (!cache->ro) { #if HTS_WIN - mkdir(fconcat(opt->path_log,"hts-cache")); + mkdir(fconcat(opt->path_log,"hts-cache")); #else - mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER); + mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER); #endif - if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer #if DEBUGCA - printf("work with former cache\n"); + printf("work with former cache\n"); #endif - if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) - remove(fconcat(opt->path_log,"hts-cache/old.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/old.ndx")); - - rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); - rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); - } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel + if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) + remove(fconcat(opt->path_log,"hts-cache/old.dat")); + if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) + remove(fconcat(opt->path_log,"hts-cache/old.ndx")); + + rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); + rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); + } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel #if DEBUGCA - printf("new cache\n"); + printf("new cache\n"); #endif - if (fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) - remove(fconcat(opt->path_log,"hts-cache/new.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/new.ndx")); + if (fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) + remove(fconcat(opt->path_log,"hts-cache/new.dat")); + if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx"))) + remove(fconcat(opt->path_log,"hts-cache/new.ndx")); + } } // charger index cache précédent - if ((fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))) { // cache précédent - if ((fsize(fconcat(opt->path_log,"hts-cache/old.dat"))>=0) && (fsize(fconcat(opt->path_log,"hts-cache/old.ndx"))>0)) { - FILE* oldndx=NULL; + if ( + ( + !cache->ro && + fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0 + ) + || + ( + cache->ro && + fsize(fconcat(opt->path_log,"hts-cache/new.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/new.ndx")) > 0 + ) + ) { + FILE* oldndx=NULL; #if DEBUGCA - printf("..load cache\n"); + printf("..load cache\n"); #endif + if (!cache->ro) { cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/old.dat"),"rb"); oldndx=fopen(fconcat(opt->path_log,"hts-cache/old.ndx"),"rb"); - // les deux doivent être ouvrables - if ((cache->olddat==NULL) && (oldndx!=NULL)) { - fclose(oldndx); - oldndx=NULL; - } - if ((cache->olddat!=NULL) && (oldndx==NULL)) { - fclose(cache->olddat); - cache->olddat=NULL; - } - // lire index - if (oldndx!=NULL) { - int buffl; - fclose(oldndx); oldndx=NULL; - // lire ndx, et lastmodified + } else { + cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"rb"); + oldndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"rb"); + } + // les deux doivent être ouvrables + if ((cache->olddat==NULL) && (oldndx!=NULL)) { + fclose(oldndx); + oldndx=NULL; + } + if ((cache->olddat!=NULL) && (oldndx==NULL)) { + fclose(cache->olddat); + cache->olddat=NULL; + } + // lire index + if (oldndx!=NULL) { + int buffl; + fclose(oldndx); oldndx=NULL; + // lire ndx, et lastmodified + if (!cache->ro) { buffl=fsize(fconcat(opt->path_log,"hts-cache/old.ndx")); cache->use=readfile(fconcat(opt->path_log,"hts-cache/old.ndx")); - if (cache->use!=NULL) { - char firstline[256]; - char* a=cache->use; - a+=cache_brstr(a,firstline); - if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache - if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x - cache->version=(int)(firstline[8]-'0'); // cache 1.x - if (cache->version <= 2) { - a+=cache_brstr(a,firstline); - strcpy(cache->lastmodified,firstline); - } else { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); - fflush(opt->errlog); - } - fclose(cache->olddat); - cache->olddat=NULL; - freet(cache->use); - cache->use=NULL; - } - } else { // non supporté + } else { + buffl=fsize(fconcat(opt->path_log,"hts-cache/new.ndx")); + cache->use=readfile(fconcat(opt->path_log,"hts-cache/new.ndx")); + } + if (cache->use!=NULL) { + char firstline[256]; + char* a=cache->use; + a+=cache_brstr(a,firstline); + if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache + if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x + cache->version=(int)(firstline[8]-'0'); // cache 1.x + if (cache->version <= 4) { + a+=cache_brstr(a,firstline); + strcpybuff(cache->lastmodified,firstline); + } else { if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline); + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); fflush(opt->errlog); } fclose(cache->olddat); @@ -652,126 +737,141 @@ void cache_init(cache_back* cache,httrackp* opt) { freet(cache->use); cache->use=NULL; } - /* */ - } else { // Vieille version du cache - /* */ - if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); - fflush(opt->log); + } else { // non supporté + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline); + fflush(opt->errlog); } - cache->version=0; // cache 1.0 - strcpy(cache->lastmodified,firstline); + fclose(cache->olddat); + cache->olddat=NULL; + freet(cache->use); + cache->use=NULL; } - opt->is_update=1; // signaler comme update - - /* Create hash table for the cache (MUCH FASTER!) */ + /* */ + } else { // Vieille version du cache + /* */ + if (opt->log) { + fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); + fflush(opt->log); + } + cache->version=0; // cache 1.0 + strcpybuff(cache->lastmodified,firstline); + } + opt->is_update=1; // signaler comme update + + /* Create hash table for the cache (MUCH FASTER!) */ #if HTS_FAST_CACHE - if (cache->use) { - char line[HTS_URLMAXSIZE*2]; - char linepos[256]; - int pos; - while ( (a!=NULL) && (a < (cache->use+buffl) ) ) { - a=strchr(a+1,'\n'); /* start of line */ - if (a) { - a++; - /* read "host/file" */ - a+=binput(a,line,HTS_URLMAXSIZE); - a+=binput(a,line+strlen(line),HTS_URLMAXSIZE); - /* read position */ - a+=binput(a,linepos,200); - sscanf(linepos,"%d",&pos); - inthash_add((inthash)cache->hashtable,line,pos); - } + if (cache->use) { + char line[HTS_URLMAXSIZE*2]; + char linepos[256]; + int pos; + while ( (a!=NULL) && (a < (cache->use+buffl) ) ) { + a=strchr(a+1,'\n'); /* start of line */ + if (a) { + a++; + /* read "host/file" */ + a+=binput(a,line,HTS_URLMAXSIZE); + a+=binput(a,line+strlen(line),HTS_URLMAXSIZE); + /* read position */ + a+=binput(a,linepos,200); + sscanf(linepos,"%d",&pos); + inthash_add((inthash)cache->hashtable,line,pos); } - /* Not needed anymore! */ - freet(cache->use); - cache->use=NULL; } -#endif + /* Not needed anymore! */ + freet(cache->use); + cache->use=NULL; } +#endif } + } } // taille cache>0 - } // cache precedent existe - + #if DEBUGCA - printf("..create cache\n"); + printf("..create cache\n"); #endif - // ouvrir caches actuels - cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb"); - cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb"); - // les deux doivent être ouvrables - if ((cache->dat==NULL) && (cache->ndx!=NULL)) { - fclose(cache->ndx); - cache->ndx=NULL; - } - if ((cache->dat!=NULL) && (cache->ndx==NULL)) { - fclose(cache->dat); - cache->dat=NULL; - } - - if (cache->ndx!=NULL) { - char s[256]; - - cache_wstr(cache->dat,"CACHE-1.2"); - fflush(cache->dat); - cache_wstr(cache->ndx,"CACHE-1.2"); - fflush(cache->ndx); - // - time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since.. - cache_wstr(cache->ndx,s); - fflush(cache->ndx); // un petit fflush au cas où - - // supprimer old.lst - if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) - remove(fconcat(opt->path_log,"hts-cache/old.lst")); - // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) - rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); - // ouvrir - cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); - { - filecreate_params tmp; - strcpy(tmp.path,opt->path_html); // chemin - tmp.lst=cache->lst; // fichier lst - filenote("",&tmp); // initialiser filecreate - } - - // supprimer old.txt - if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) - remove(fconcat(opt->path_log,"hts-cache/old.txt")); - // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) - rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); - // ouvrir - cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); - if (cache->txt) { - fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); - fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); + if (!cache->ro) { + // ouvrir caches actuels + structcheck(fconcat(opt->path_log, "hts-cache/")); + cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb"); + cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb"); + // les deux doivent être ouvrables + if ((cache->dat==NULL) && (cache->ndx!=NULL)) { + fclose(cache->ndx); + cache->ndx=NULL; + } + if ((cache->dat!=NULL) && (cache->ndx==NULL)) { + fclose(cache->dat); + cache->dat=NULL; + } + + if (cache->ndx!=NULL) { + char s[256]; + + cache_wstr(cache->dat,"CACHE-1.4"); + fflush(cache->dat); + cache_wstr(cache->ndx,"CACHE-1.4"); + fflush(cache->ndx); + // + time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since.. + cache_wstr(cache->ndx,s); + fflush(cache->ndx); // un petit fflush au cas où + + // supprimer old.lst + if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) + remove(fconcat(opt->path_log,"hts-cache/old.lst")); + // renommer + if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) + rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); + // ouvrir + cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); + { + filecreate_params tmp; + strcpybuff(tmp.path,opt->path_html); // chemin + tmp.lst=cache->lst; // fichier lst + filenote("",&tmp); // initialiser filecreate + } + + // supprimer old.txt + if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) + remove(fconcat(opt->path_log,"hts-cache/old.txt")); + // renommer + if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) + rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + // ouvrir + cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + if (cache->txt) { + fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); + fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); + } + + // test + // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9); + } + + } else { + cache->lst = cache->dat = cache->ndx = NULL; } - - // test - // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9); - } - + } } - - - + + + // lire un fichier.. (compatible \0) char* readfile(char* fil) { char* adr=NULL; - int len=0; + INTsys len=0; len=fsize(fil); - if (len>0) { // existe + if (len >= 0) { // exists FILE* fp; fp=fopen(fconv(fil),"rb"); if (fp!=NULL) { // n'existe pas (!) adr=(char*) malloct(len+1); if (adr!=NULL) { - if ((int) fread(adr,1,len,fp)!=len) { // fichier endommagé ? + if (len > 0 && (INTsys)fread(adr,1,len,fp) != len) { // fichier endommagé ? freet(adr); adr=NULL; } else @@ -794,7 +894,7 @@ char* readfile_or(char* fil,char* defaultdata) { else { char *adr=malloct(strlen(defaultdata)+2); if (adr) { - strcpy(adr,defaultdata); + strcpybuff(adr,defaultdata); return adr; } } @@ -804,22 +904,24 @@ char* readfile_or(char* fil,char* defaultdata) { // écriture/lecture d'une chaîne sur un fichier // -1 : erreur, sinon 0 int cache_wstr(FILE* fp,char* s) { - int i; + INTsys i; char buff[256+4]; i=strlen(s); - sprintf(buff,"%d\n",i); - if (fwrite(buff,1,strlen(buff),fp) != strlen(buff)) + sprintf(buff,INTsysP "\n",i); + if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff)) return -1; if (i>0) - if ((int) fwrite(s,1,i,fp) != i) + if ((INTsys)fwrite(s,1,i,fp) != i) return -1; return 0; } void cache_rstr(FILE* fp,char* s) { - int i; + INTsys i; char buff[256+4]; linput(fp,buff,256); - sscanf(buff,"%d",&i); + sscanf(buff,INTsysP,&i); + if (i < 0 || i > 32768) /* error, something nasty happened */ + i=0; if (i>0) fread(s,1,i,fp); *(s+i)='\0'; diff --git a/src/htscache.h b/src/htscache.h index 08069d1..ef897f1 100644 --- a/src/htscache.h +++ b/src/htscache.h @@ -45,8 +45,10 @@ Please visit our Website: http://www.httrack.com // cache void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save); void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache); -htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save); -htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil); +htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); +htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); +htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,char* return_save,int readonly); +htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r); void cache_init(cache_back* cache,httrackp* opt); int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len); diff --git a/src/htscatchurl.c b/src/htscatchurl.c index c119677..8455ea0 100644 --- a/src/htscatchurl.c +++ b/src/htscatchurl.c @@ -67,7 +67,7 @@ Please visit our Website: http://www.httrack.com // 0- Init the URL catcher with standard port // catch_url_init(&port,&return_host); -T_SOC catch_url_init_std(int* port_prox,char* adr_prox) { +HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox) { T_SOC soc; int try_to_listen_to[]={8080,3128,80,81,82,8081,3129,31337,0,-1}; int i=0; @@ -83,28 +83,10 @@ T_SOC catch_url_init_std(int* port_prox,char* adr_prox) { // 1- Init the URL catcher // catch_url_init(&port,&return_host); -T_SOC catch_url_init(int* port,char* adr) { +HTSEXT_API T_SOC catch_url_init(int* port,char* adr) { T_SOC soc = INVALID_SOCKET; char h_loc[256+2]; - /* -#ifdef _WIN32 - { - WORD wVersionRequested; - WSADATA wsadata; - int stat; - wVersionRequested = 0x0101; - stat = WSAStartup( wVersionRequested, &wsadata ); - if (stat != 0) { - return INVALID_SOCKET; - } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { - WSACleanup(); - return INVALID_SOCKET; - } - } -#endif - */ - if (gethostname(h_loc,256)==0) { // host name SOCaddr server; int server_size=sizeof(server); @@ -132,7 +114,7 @@ T_SOC catch_url_init(int* port,char* adr) { if (listen(soc,10)>=0) { // au pif le 10 SOCaddr_inetntoa(adr, 128, server2, len); } else { -#if _WIN32 +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -142,7 +124,7 @@ T_SOC catch_url_init(int* port,char* adr) { } else { -#if _WIN32 +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -152,7 +134,7 @@ T_SOC catch_url_init(int* port,char* adr) { } else { -#if _WIN32 +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -171,7 +153,7 @@ T_SOC catch_url_init(int* port,char* adr) { // returns 0 if error // url: buffer where URL must be stored - or ip:port in case of failure // data: 32Kb -int catch_url(T_SOC soc,char* url,char* method,char* data) { +HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { int retour=0; // connexion (accept) @@ -234,11 +216,11 @@ int catch_url(T_SOC soc,char* url,char* method,char* data) { while(strnotempty(line)) { socinput(soc,line,1000); treathead(NULL,NULL,NULL,&blkretour,line); // traiter - strcat(data,line); - strcat(data,"\r\n"); + strcatbuff(data,line); + strcatbuff(data,"\r\n"); } // CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus - //strcat(data,"\r\n"); + //strcatbuff(data,"\r\n"); if (blkretour.totalsize>0) { int len=(int)min(blkretour.totalsize,32000); int pos=strlen(data); diff --git a/src/htscatchurl.h b/src/htscatchurl.h index 77036fd..a2514ef 100644 --- a/src/htscatchurl.h +++ b/src/htscatchurl.h @@ -43,9 +43,11 @@ Please visit our Website: http://www.httrack.com // Fonctions void socinput(T_SOC soc,char* s,int max); -T_SOC catch_url_init_std(int* port_prox,char* adr_prox); -T_SOC catch_url_init(int* port,char* adr); -int catch_url(T_SOC soc,char* url,char* method,char* data); +#ifndef HTTRACK_DEFLIB +HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox); +HTSEXT_API T_SOC catch_url_init(int* port,char* adr); +HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data); +#endif #define CATCH_RESPONSE \ "HTTP/1.0 200 OK\r\n"\ diff --git a/src/htscore.c b/src/htscore.c index 1b9db7a..ba1e226 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -51,30 +51,40 @@ Please visit our Website: http://www.httrack.com #include "htsmd5.h" #include "htsindex.h" +/* external modules */ +#include "htsmodules.h" + // htswrap_add #include "htswrap.h" + +// parser +#include "htsparse.h" + /* END specific definitions */ /* HTML parsing */ #if HTS_ANALYSTE -t_hts_htmlcheck_init hts_htmlcheck_init; -t_hts_htmlcheck_uninit hts_htmlcheck_uninit; -t_hts_htmlcheck_start hts_htmlcheck_start; -t_hts_htmlcheck_end hts_htmlcheck_end; -t_hts_htmlcheck_chopt hts_htmlcheck_chopt; -t_hts_htmlcheck hts_htmlcheck; -t_hts_htmlcheck_query hts_htmlcheck_query; -t_hts_htmlcheck_query2 hts_htmlcheck_query2; -t_hts_htmlcheck_query3 hts_htmlcheck_query3; -t_hts_htmlcheck_loop hts_htmlcheck_loop; -t_hts_htmlcheck_check hts_htmlcheck_check; -t_hts_htmlcheck_pause hts_htmlcheck_pause; -t_hts_htmlcheck_filesave hts_htmlcheck_filesave; -t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; -t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; -t_hts_htmlcheck_savename hts_htmlcheck_savename; +t_hts_htmlcheck_init hts_htmlcheck_init = NULL; +t_hts_htmlcheck_uninit hts_htmlcheck_uninit = NULL; +t_hts_htmlcheck_start hts_htmlcheck_start = NULL; +t_hts_htmlcheck_end hts_htmlcheck_end = NULL; +t_hts_htmlcheck_chopt hts_htmlcheck_chopt = NULL; +t_hts_htmlcheck hts_htmlcheck = NULL; +t_hts_htmlcheck_query hts_htmlcheck_query = NULL; +t_hts_htmlcheck_query2 hts_htmlcheck_query2 = NULL; +t_hts_htmlcheck_query3 hts_htmlcheck_query3 = NULL; +t_hts_htmlcheck_loop hts_htmlcheck_loop = NULL; +t_hts_htmlcheck_check hts_htmlcheck_check = NULL; +t_hts_htmlcheck_pause hts_htmlcheck_pause = NULL; +t_hts_htmlcheck_filesave hts_htmlcheck_filesave = NULL; +t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected = NULL; +t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus = NULL; +t_hts_htmlcheck_savename hts_htmlcheck_savename = NULL; +t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead = NULL; +t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead = NULL; + char _hts_errmsg[1100]=""; int _hts_in_html_parsing=0; @@ -84,6 +94,10 @@ int _hts_setpause=0; //httrackp* _hts_setopt=NULL; char** _hts_addurl=NULL; +/* external modules */ +extern int hts_parse_externals(htsmoduleStruct* str); +extern void htspe_init(void); + // int _hts_cancel=0; #endif @@ -163,7 +177,7 @@ hts_htmlcheck_end(); \ #define HTMLCHECK_UNINIT #endif -#define XH_extuninit { \ +#define XH_extuninit do { \ int i; \ HTMLCHECK_UNINIT \ if (liens!=NULL) { \ @@ -187,7 +201,7 @@ hts_htmlcheck_end(); \ if (back) { \ int i; \ for(i=0;i<back_max;i++) { \ - back_delete(back,i); \ + back_delete(&opt,back,i); \ } \ freet(back); back=NULL; \ } \ @@ -208,9 +222,9 @@ hts_htmlcheck_end(); \ if (template_header) { freet(template_header); template_header=NULL; } \ if (template_body) { freet(template_body); template_body=NULL; } \ if (template_footer) { freet(template_footer); template_footer=NULL; } \ - structcheck_init(-1); \ -} -#define XH_uninit XH_extuninit if (r.adr) { freet(r.adr); r.adr=NULL; } + /*structcheck_init(-1);*/ \ +} while(0) +#define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0) // Enregistrement d'un lien: // on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité) @@ -218,14 +232,13 @@ hts_htmlcheck_end(); \ // enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite // codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes // FA,FS: former_adr et former_fil, lien original -#define REALLOC_SIZE 8192 #if HTS_HASH #define liens_record_sav_len(A) #else #define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav) #endif -#define liens_record(A,F,S,FA,FF) { \ +#define liens_record(A,F,S,FA,FF,NORM) { \ int notecode=0; \ int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ @@ -257,179 +270,21 @@ liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \ liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \ liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \ liens[lien_tot]->cod=NULL; \ -if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpy(liens[lien_tot]->cod,codebase); } \ +if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \ if (former_adr_len>0) {\ liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \ liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \ -strcpy(liens[lien_tot]->former_adr,FA); \ -strcpy(liens[lien_tot]->former_fil,FF); \ +strcpybuff(liens[lien_tot]->former_adr,FA); \ +strcpybuff(liens[lien_tot]->former_fil,FF); \ }\ -strcpy(liens[lien_tot]->adr,A); \ -strcpy(liens[lien_tot]->fil,F); \ -strcpy(liens[lien_tot]->sav,S); \ +strcpybuff(liens[lien_tot]->adr,A); \ +strcpybuff(liens[lien_tot]->fil,F); \ +strcpybuff(liens[lien_tot]->sav,S); \ liens_record_sav_len(liens[lien_tot]); \ -hash_write(&hash,lien_tot); \ +hash_write(hashptr,lien_tot,NORM); \ } \ } -/* - abandonné (simplifie) - -// Ajouter à un lien EXISTANT deux champs former_adr et former_fil pour indiquer le nom d'un fichier avant un "move" -// NOTE: si un alloc est fait ici il n'y aura pas de freet() à la fin, tant pis (firstbloc) -#define liens_add_former(index,A,F) { \ -int adr_len=strlen(A),fil_len=strlen(F); \ -adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; \ -if ((int) lien_size < (int) (adr_len+fil_len)) { \ -lien_buffer=(char*) calloct(add_tab_alloc,1); \ -lien_size=add_tab_alloc; \ -} \ -if (lien_buffer!=NULL) { \ -if (liens[lien_tot]!=NULL) { \ -liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \ -liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \ -strcpy(liens[lien_tot]->former_adr,A); \ -strcpy(liens[lien_tot]->former_fil,F); \ -} \ -} \ -} -*/ - -#if 0 -#define HT_ADD_ADR { \ - fwrite(lastsaved,1,((int) (adr - lastsaved)),fp); \ - lastsaved=adr; } -#define HT_ADD(A) fwrite(A,1,(int) strlen(A),fp); -#define HT_ADD_START -#define HT_ADD_END if (fp) { fclose(fp); fp=NULL; } -#define HT_ADD_FOP { \ - fp=filecreate(savename); \ - if (fp==NULL) { \ - if (opt.errlog) { \ - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to create %s for %s%s"LF,savename,urladr,urlfil); \ - test_flush; \ - } \ - freet(r.adr); r.adr=NULL; \ - error=1; \ - } \ - } -#else -// version optimisée, qui permet de ne pas toucher aux html non modifiés (update) -#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \ - ht_size=(A)+ht_len+REALLOC_SIZE; \ - ht_buff=(char*) realloct(ht_buff,ht_size); \ - if (ht_buff==NULL) { \ - printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ - XH_uninit; \ - exit(1); \ - } \ - } \ - ht_len+=A; -/* -(Optimized) -#define HT_ADD_ADR { int i,j=ht_len; HT_ADD_CHK(((int) adr)- ((int) lastsaved)) \ - for(i=0;i<((int) adr)- ((int) lastsaved);i++) \ - ht_buff[j+i]=lastsaved[i]; \ - ht_buff[j+((int) adr)- ((int) lastsaved)]='\0'; \ - lastsaved=adr; } -*/ -#define HT_ADD_ADR \ - if ((opt.getmode & 1) && (ptr>0)) { \ - int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ - memcpy(ht_buff+j, lastsaved, i); \ - ht_buff[j+i]='\0'; \ - lastsaved=adr; \ - } -/* -(Optimized) -#define HT_ADD(A) { HT_ADD_CHK(strlen(A)) strcat(ht_buff,A); } -*/ -#define HT_ADD(A) \ - if ((opt.getmode & 1) && (ptr>0)) { \ - int i=strlen(A),j=ht_len; \ - if (i) { \ - HT_ADD_CHK(i) \ - memcpy(ht_buff+j, A, i); \ - ht_buff[j+i]='\0'; \ - } } -#define HT_ADD_START \ - int ht_size=(int)(r.size*5)/4+REALLOC_SIZE; \ - int ht_len=0; \ - char* ht_buff=NULL; \ - if ((opt.getmode & 1) && (ptr>0)) { \ - ht_buff=(char*) malloct(ht_size); \ - if (ht_buff==NULL) { \ - printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ - XH_uninit; \ - exit(1); \ - } \ - ht_buff[0]='\0'; \ - } -#define HT_ADD_END { \ - int ok=0;\ - if (ht_buff) { \ - int file_len=(int) strlen(ht_buff);\ - char digest[32+2];\ - digest[0]='\0';\ - domd5mem(ht_buff,file_len,digest,1);\ - if (fsize(antislash(savename))==file_len) { \ - int mlen;\ - char* mbuff;\ - cache_readdata(&cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\ - if (mlen) mbuff[mlen]='\0';\ - if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\ - ok=1;\ - if ( (opt.debug>1) && (opt.log!=NULL) ) {\ - fspc(opt.log,"debug"); fprintf(opt.log,"File not re-written (md5): %s"LF,savename);\ - test_flush;\ - }\ - } else {\ - ok=0;\ - } \ - }\ - if (!ok) { \ - fp=filecreate(savename); \ - if (fp) { \ - if (file_len>0) {\ - if ((int)fwrite(ht_buff,1,file_len,fp) != file_len) { \ - if (opt.errlog) { \ - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to write HTML file %s"LF,savename);\ - test_flush;\ - }\ - }\ - }\ - fclose(fp); fp=NULL; \ - if (strnotempty(r.lastmodified)) \ - set_filetime_rfc822(savename,r.lastmodified); \ - usercommand(0,NULL,antislash(savename)); \ - } else {\ - if (opt.errlog) { \ - fspc(opt.errlog,"error");\ - fprintf(opt.errlog,"Unable to save file %s"LF,savename);\ - test_flush;\ - }\ - }\ - } else {\ - filenote(savename,NULL); \ - }\ - if (cache.ndx)\ - cache_writedata(cache.ndx,cache.dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ - } \ - freet(ht_buff); ht_buff=NULL; \ - } -#define HT_ADD_FOP -#endif - -// libérer filters[0] pour insérer un élément dans filters[0] -#define HT_INSERT_FILTERS0 {\ - int i;\ - if (filptr>0) {\ - for(i=filptr-1;i>=0;i--) {\ - strcpy(filters[i+1],filters[i]);\ - }\ - }\ - strcpy(filters[0],"");\ - filptr++;\ - filptr=minimum(filptr,filter_max);\ -} #define HT_INDEX_END do { \ if (!makeindex_done) { \ @@ -446,7 +301,7 @@ if (makeindex_fp) { \ fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ - usercommand(0,NULL,fconcat(opt.path_html,"index.html")); \ + usercommand(&opt,0,NULL,fconcat(opt.path_html,"index.html"),"",""); \ } \ } \ makeindex_done=1; /* ok c'est fait */ \ @@ -463,6 +318,7 @@ int httpmirror(char* url1,httrackp* ptropt) { int lien_tot=0; // nombre de liens pour le moment lien_url** liens=NULL; // les pointeurs sur les liens hash_struct hash; // système de hachage, accélère la recherche dans les liens + hash_struct* hashptr = &hash; t_cookie cookie; // gestion des cookies int lien_max=0; int lien_size=0; // octets restants dans buffer liens dispo @@ -522,8 +378,10 @@ int httpmirror(char* url1,httrackp* ptropt) { /* reset stats */ HTS_STAT.HTS_TOTAL_RECV=0; HTS_STAT.istat_bytes[0]=HTS_STAT.istat_bytes[1]=0; + /* if (opt.aff_progress) lastime=HTS_STAT.stat_timestart; + */ if (opt.shell) { last_info_shell=HTS_STAT.stat_timestart; } @@ -533,16 +391,17 @@ int httpmirror(char* url1,httrackp* ptropt) { // initialiser compteur erreurs fspc(NULL,NULL); + // init external modules + htspe_init(); + // initialiser cookie if (opt.accept_cookie) { opt.cookie=&cookie; cookie.max_len=30000; // max len - strcpy(cookie.data,""); + strcpybuff(cookie.data,""); // Charger cookies.txt par défaut ou cookies.txt du miroir - if (fexist(fconcat(opt.path_log,"cookies.txt"))) - cookie_load(opt.cookie,opt.path_log,"cookies.txt"); - else if (fexist("cookies.txt")) - cookie_load(opt.cookie,"","cookies.txt"); + cookie_load(opt.cookie,opt.path_log,"cookies.txt"); + cookie_load(opt.cookie,"","cookies.txt"); } else opt.cookie=NULL; @@ -550,16 +409,16 @@ int httpmirror(char* url1,httrackp* ptropt) { exit_xh=0; // sortir prématurément (var globale) // initialiser usercommand - usercommand(opt.sys_com_exec,opt.sys_com,""); + usercommand(&opt,opt.sys_com_exec,opt.sys_com,"","",""); // initialiser structcheck - structcheck_init(1); + // structcheck_init(1); // initialiser tableau options accessible par d'autres fonctions (signal) hts_declareoptbuffer(&opt); // initialiser verif_backblue - verif_backblue(NULL); + verif_backblue(&opt,NULL); verif_external(0,0); verif_external(1,0); @@ -597,7 +456,7 @@ int httpmirror(char* url1,httrackp* ptropt) { _hts_lockdns(-999); // robots.txt - strcpy(robots.adr,"!"); // dummy + strcpybuff(robots.adr,"!"); // dummy robots.token[0]='\0'; robots.next=NULL; // suivant opt.robotsptr = &robots; @@ -615,6 +474,9 @@ int httpmirror(char* url1,httrackp* ptropt) { opt.filters.filptr=&filptr; //opt.filters.filter_max=&filter_max; + // hash table + opt.hash = &hash; + // tableau de pointeurs sur les liens lien_max=maximum(opt.maxlink,32); liens=(lien_url**) malloct(lien_max*sizeof(lien_url*)); // tableau de pointeurs sur les liens @@ -674,12 +536,6 @@ int httpmirror(char* url1,httrackp* ptropt) { joker=1; else if (*a=='-') joker=1; - /* NON, certaines URL ont des * (!) - else { - int i=0; - while((a[i]!=0) && (a[i]!=' ')) if (a[i++]=='*') joker=1; - } - */ if (joker) { // joker ou filters //char* p; @@ -697,29 +553,29 @@ int httpmirror(char* url1,httrackp* ptropt) { // recopier prochaine chaine (+ ou -) i=0; - while((*a!=0) && (*a!=' ')) { tempo[i++]=*a; a++; } + while((*a!=0) && (!isspace((unsigned char)*a))) { tempo[i++]=*a; a++; } tempo[i++]='\0'; - while(*a==' ') { a++; } + while(isspace((unsigned char)*a)) { a++; } // sauter les + sans rien après.. if (strnotempty(tempo)) { if ((plus==0) && (type==1)) { // implicite: *www.edf.fr par exemple if (tempo[strlen(tempo)-1]!='*') { - strcat(tempo,"*"); // ajouter un * + strcatbuff(tempo,"*"); // ajouter un * } } if (type) - strcpy(filters[filptr],"+"); + strcpybuff(filters[filptr],"+"); else - strcpy(filters[filptr],"-"); + strcpybuff(filters[filptr],"-"); /* if (strfield(tempo,"http://")) - strcat(filters[filptr],tempo+7); // ignorer http:// + strcatbuff(filters[filptr],tempo+7); // ignorer http:// else if (strfield(tempo,"ftp://")) - strcat(filters[filptr],tempo+6); // ignorer ftp:// + strcatbuff(filters[filptr],tempo+6); // ignorer ftp:// else */ - strcat(filters[filptr],tempo); + strcatbuff(filters[filptr],tempo); filptr++; /* sanity check */ @@ -745,16 +601,16 @@ int httpmirror(char* url1,httrackp* ptropt) { char url[HTS_URLMAXSIZE*2]; // prochaine adresse i=0; - while((*a!=0) && (*a!=' ')) { url[i++]=*a; a++; } - while(*a==' ') { a++; } + while((*a!=0) && (!isspace((unsigned char)*a))) { url[i++]=*a; a++; } + while(isspace((unsigned char)*a)) { a++; } url[i++]='\0'; - //strcat(primary,"<PRIMARY=\""); + //strcatbuff(primary,"<PRIMARY=\""); if (strstr(url,":/")==NULL) - strcat(primary,"http://"); - strcat(primary,url); - //strcat(primary,"\">"); - strcat(primary,"\n"); + strcatbuff(primary,"http://"); + strcatbuff(primary,url); + //strcatbuff(primary,"\">"); + strcatbuff(primary,"\n"); } } // while @@ -762,13 +618,13 @@ int httpmirror(char* url1,httrackp* ptropt) { /* OPTIMIZED for fast load */ if (strnotempty(opt.filelist)) { char* filelist_buff=NULL; - int filelist_sz=fsize(opt.filelist); + INTsys filelist_sz=fsize(opt.filelist); if (filelist_sz>0) { FILE* fp=fopen(opt.filelist,"rb"); if (fp) { filelist_buff=malloct(filelist_sz + 2); if (filelist_buff) { - if ((int)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) { + if ((INTsys)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) { freet(filelist_buff); filelist_buff=NULL; } else { @@ -790,12 +646,12 @@ int httpmirror(char* url1,httrackp* ptropt) { if (count && line[0]) { n++; if (strstr(line,":/")==NULL) { - strcpy(primary_ptr, "http://"); + strcpybuff(primary_ptr, "http://"); primary_ptr += strlen(primary_ptr); } - strcpy(primary_ptr, line); + strcpybuff(primary_ptr, line); primary_ptr += strlen(primary_ptr); - strcpy(primary_ptr, "\n"); + strcpybuff(primary_ptr, "\n"); primary_ptr += 1; } } @@ -815,7 +671,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // lien primaire - liens_record("primary","/primary","primary.html","",""); + liens_record("primary","/primary",fslash(fconcat(opt.path_html,"index.html")),"","",opt.urlhack); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); if (opt.errlog) { @@ -836,7 +692,22 @@ int httpmirror(char* url1,httrackp* ptropt) { lien_tot++; // Initialiser cache - cache_init(&cache,&opt); + { + int backupXFR = htsMemoryFastXfr; +#if HTS_ANALYSTE + _hts_in_html_parsing=4; +#endif + if (!hts_htmlcheck_loop(NULL,0,0,0,lien_tot,0,NULL)) { + exit_xh=1; // exit requested + } + htsMemoryFastXfr = 1; /* fast load */ + cache_init(&cache,&opt); + htsMemoryFastXfr = backupXFR; +#if HTS_ANALYSTE + _hts_in_html_parsing=0; +#endif + } + } #if BDEBUG==3 @@ -961,8 +832,10 @@ int httpmirror(char* url1,httrackp* ptropt) { // note: recopie de plus haut // noter heure actuelle de départ en secondes HTS_STAT.stat_timestart=time_local(); + /* if (opt.aff_progress) lastime=HTS_STAT.stat_timestart; + */ if (opt.shell) { last_info_shell=HTS_STAT.stat_timestart; } @@ -1003,7 +876,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // recopier proxy memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy)); // et user-agent - strcpy(r.req.user_agent,opt.user_agent); + strcpybuff(r.req.user_agent,opt.user_agent); r.req.user_agent_send=opt.user_agent_send; if (!error) { @@ -1041,502 +914,93 @@ int httpmirror(char* url1,httrackp* ptropt) { r.statuscode=200; r.size=strlen(r.adr); r.soc=INVALID_SOCKET; - strcpy(r.contenttype,"text/html"); + strcpybuff(r.contenttype,"text/html"); /*} else if (opt.maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing) // charger le fichier en mémoire tout bêtement r=xhttpget(urladr,urlfil); // */ } else { // backing, multiples sockets - // - int b; - int n; -#if BDEBUG==1 - printf("\nBack test..\n"); -#endif - - // pause/lock files - { - int do_pause=0; - - // user pause lockfile : create hts-paused.lock --> HTTrack will be paused - if (fexist(fconcat(opt.path_log,"hts-stop.lock"))) { - // remove lockfile - remove(fconcat(opt.path_log,"hts-stop.lock")); - if (!fexist(fconcat(opt.path_log,"hts-stop.lock"))) { - do_pause=1; - } - } - - // after receving N bytes, pause - if (opt.fragment>0) { - if ((HTS_STAT.stat_bytes-stat_fragment) > opt.fragment) { - do_pause=1; - } - } - - // pause? - if (do_pause) { - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: pause requested.."LF); - } - while (back_nsoc(back,back_max)>0) { // attendre fin des transferts - back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); - Sleep(200); -#if HTS_ANALYSTE - { - back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(back,back_max); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - - b=0; - if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF); - test_flush; - } - exit_xh=1; // exit requested - XH_uninit; - return 0; - } - } -#endif - } - // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause - // l'utilisateur ferait un rm -r après avoir effectué un tar - structcheck_init(1); - { - FILE* fp = fopen(fconcat(opt.path_log,"hts-paused.lock"),"wb"); - if (fp) { - fspc(fp,"info"); // dater - fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror..."LF""LF"",HTS_STAT.stat_bytes); - fclose(fp); - } - } - stat_fragment=HTS_STAT.stat_bytes; - /* Info for wrappers */ - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: pause: %s"LF,fconcat(opt.path_log,"hts-paused.lock")); - } -#if HTS_ANALYSTE - hts_htmlcheck_pause(fconcat(opt.path_log,"hts-paused.lock")); -#else - while (fexist(fconcat(opt.path_log,"hts-paused.lock"))) { - //back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives) - Sleep(1000); - } -#endif - } - // - } - // end of pause/lock files -#if HTS_ANALYSTE - // changement dans les préférences -/* - if (_hts_setopt) { - copy_htsopt(_hts_setopt,&opt); // copier au besoin - _hts_setopt=NULL; // effacer callback - } -*/ - if (_hts_addurl) { - char add_adr[HTS_URLMAXSIZE*2]; - char add_fil[HTS_URLMAXSIZE*2]; - while(*_hts_addurl) { - char add_url[HTS_URLMAXSIZE*2]; - add_adr[0]=add_fil[0]=add_url[0]='\0'; - if (!link_has_authority(*_hts_addurl)) - strcpy(add_url,"http://"); // ajouter http:// - strcat(add_url,*_hts_addurl); - if (ident_url_absolute(add_url,add_adr,add_fil)>=0) { - // ----Ajout---- - // noter NOUVEAU lien - char add_sav[HTS_URLMAXSIZE*2]; - // calculer lien et éventuellement modifier addresse/fichier - if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) { - if (hash_read(&hash,add_sav,"",0)<0) { // n'existe pas déja - // enregistrer lien (MACRO) - liens_record(add_adr,add_fil,add_sav,"",""); - if (liens[lien_tot]!=NULL) { // OK, pas d'erreur - liens[lien_tot]->testmode=0; // mode test? - liens[lien_tot]->link_import=0; // mode normal - liens[lien_tot]->depth=opt.depth; - liens[lien_tot]->pass2=max(0,numero_passe); - liens[lien_tot]->retry=opt.retry; - liens[lien_tot]->premier=lien_tot; - liens[lien_tot]->precedent=lien_tot; - lien_tot++; - // - if ((opt.debug>0) && (opt.log!=NULL)) { - fspc(opt.log,"info"); fprintf(opt.log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush; - } - // - } else { // oups erreur, plus de mémoire!! - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); - test_flush; - } - //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } - XH_uninit; // désallocation mémoire & buffers - return 0; - } - } else { - if ( (opt.debug>0) && (opt.errlog!=NULL) ) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil); - test_flush; - } - } - - } - } else { - if (opt.errlog) { - fspc(opt.errlog,"error"); - fprintf(opt.errlog,"Error during URL decoding for %s"LF,add_url); - test_flush; - } - } - // ----Fin Ajout---- - _hts_addurl++; // suivante - } - _hts_addurl=NULL; // libérer _hts_addurl - } - // si une pause a été demandée - if (_hts_setpause) { - // index du lien actuel - int b=back_index(back,back_max,urladr,urlfil,savename); - if (b<0) b=0; // forcer pour les stats - while(_hts_setpause) { // on fait la pause.. - back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(back,back_max); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - - if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF); - test_flush; - } - exit_xh=1; // exit requested - XH_uninit; - return 0; - } - if (back_nsoc(back,back_max)==0) - Sleep(250); // tite pause - } - } -#endif - - // si le fichier n'est pas en backing, le mettre.. - if (!back_exist(back,back_max,urladr,urlfil,savename)) { -#if BDEBUG==1 - printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil); -#endif - if (back_add(back,back_max,&opt,&cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) { - printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__); -#if BDEBUG==1 - printf("error while crash adding\n"); -#endif - if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil); - test_flush; - } - - } - } - -#if BDEBUG==1 - printf("test number of socks\n"); -#endif - - // ajouter autant de socket qu'on peut ajouter - n=opt.maxsoc-back_nsoc(back,back_max); -#if BDEBUG==1 - printf("%d sockets available for backing\n",n); -#endif - -#if HTS_ANALYSTE - if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter -#else - if (n>0) { // si sockets libre -#endif - // remplir autant que l'on peut le cache (backing) - back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot); - } - - // index du lien actuel -/* - b=back_index(back,back_max,urladr,urlfil,savename); - - if (b>=0) -*/ + /* + ************************************** + Get the next link, waiting for other files, handling external callbacks + */ { - // ------------------------------------------------------------ - // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE - do { - - // index du lien actuel - b=back_index(back,back_max,urladr,urlfil,savename); -#if BDEBUG==1 - printf("back index %d, waiting\n",b); -#endif - // Continue to the loop if link still present - if (b<0) - continue; - - // Receive data - if (back[b].status>0) - back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); - - // Continue to the loop if link still present - b=back_index(back,back_max,urladr,urlfil,savename); - if (b<0) - continue; - - // And fill the backing stack - if (back[b].status>0) - back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot); - - // Continue to the loop if link still present - b=back_index(back,back_max,urladr,urlfil,savename); - if (b<0) - continue; - - // autres occupations de HTTrack: statistiques, boucle d'attente, etc. - if ((opt.makestat) || (opt.maketrack)) { - TStamp l=time_local(); - if ((int) (l-makestat_time) >= 60) { - if (makestat_fp != NULL) { - fspc(makestat_fp,"info"); - fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-makestat_total)/(l-makestat_time)), HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-makestat_lnk,(int) lien_tot); - fflush(makestat_fp); - makestat_total=HTS_STAT.HTS_TOTAL_RECV; - makestat_lnk=lien_tot; - } - if (maketrack_fp!=NULL) { - int i; - fspc(maketrack_fp,"info"); fprintf(maketrack_fp,LF); - for(i=0;i<back_max;i++) { - back_info(back,i,3,maketrack_fp); - } - fprintf(maketrack_fp,LF); - - } - makestat_time=l; - } - } -#if HTS_ANALYSTE - { - int i; - { - char* s=hts_cancel_file(""); - if (strnotempty(s)) { // fichier à canceller - for(i=0;i<back_max;i++) { - if ((back[i].status>0)) { - if (strcmp(back[i].url_sav,s)==0) { // ok trouvé - if (back[i].status != 1000) { -#if HTS_DEBUG_CLOSESOCK - DEBUG_W("user cancel: deletehttp\n"); -#endif - if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r); - back[i].r.soc=INVALID_SOCKET; - back[i].r.statuscode=-1; - strcpy(back[i].r.msg,"Cancelled by User"); - back[i].status=0; // terminé - } else // cancel ftp.. flag à 1 - back[i].stop_ftp = 1; - } - } - } - s[0]='\0'; - } - } - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(back,back_max); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - - if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF); - test_flush; - } - exit_xh=1; // exit requested - XH_uninit; - return 0; - } - } - -#endif -#if HTS_POLL - if ((opt.shell) || (opt.keyboard) || (opt.verbosedisplay) || (!opt.quiet)) { - TStamp tl; - info_shell=1; - - /* Toggle with ENTER */ - if (!opt.quiet) { - if (check_stdin()) { - char com[256]; - linput(stdin,com,200); - if (opt.verbosedisplay==2) - opt.verbosedisplay=1; - else - opt.verbosedisplay=2; - /* Info for wrappers */ - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: change-options"LF); - } -#if HTS_ANALYSTE - hts_htmlcheck_chopt(&opt); -#endif - } - } - - /* - ..useless.. - while (check_stdin()) { // données disponibles - char com[256]; - com[0]='\0'; - - if (!rcvd) rcvd=1; - linput(stdin,com,256); - - if (strnotempty(com)) { - if (strlen(com)<=2) { - switch(*com) { - case '?': { // Status? - if (back[b].status>0) printf("WAIT\n"); - else printf("READY\n"); - } - break; - case 'f': { // Fichier en attente? - if (back[b].status>0) printf("WAIT %s\n",back[b].url_fil); - else printf("READY %s\n",back[b].url_fil); - } - break; - case 'A': case 'F': { // filters - int i; - for(i=0;i<filptr;i++) { - printf("%s ",filters[i]); - } - printf("\n"); - } - break; - case '#': { // Afficher statistique sur le nombre de liens, etc - switch(*(com+1)) { - case 'l': printf("%d\n",lien_tot); break; // nombre de liens enregistrés - case 's': printf("%d\n",back_nsoc(back,back_max)); break; // nombre de sockets - case 'r': printf("%d\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); break; // taux de transfert - } - } - break; - case 'K': if (*(com+1)=='!') { // Kill - XH_uninit; - return -1; - } - break; - case 'X': if (*(com+1)=='!') { // exit - exit_xh=1; - } - break; - case 'I': if (*(com+1)=='+') info_shell=1; else info_shell=0; - break; - } - io_flush; - } else if (*com=='@') { - printf("%s\n",com+1); - io_flush; - } - } - - } // while - */ - tl=time_local(); - - // générer un message d'infos sur l'état actuel - if (opt.shell) { // si shell - if ((tl-last_info_shell)>0) { // toute les 1 sec - FILE* fp=stdout; - int a=0; - last_info_shell=tl; - if (fexist(fconcat(opt.path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant - // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi) - // (libérons les robots esclaves de l'internet!) - remove(fconcat(opt.path_log,"hts-autopsy")); - fp=fopen(fconcat(opt.path_log,"hts-isalive"),"wb"); - a=1; - } - if ((info_shell) || a) { - int i,j; - - fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart)); - fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes); - fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); - fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max)); - fprintf(fp,"LINK %d"LF,lien_tot); - { - LLint mem=0; - for(i=0;i<back_max;i++) - if (back[i].r.adr!=NULL) - mem+=back[i].r.size; - fprintf(fp,"INMEM "LLintP""LF,mem); - } - for(j=0;j<2;j++) { // passes pour ready et wait - for(i=0;i<back_max;i++) { - back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // ** - } - } - fprintf(fp,LF); - if (a) - fclose(fp); - io_flush; - } - } - } // si shell - - } // si shell ou keyboard (option) - // -#endif - } while((b>=0) && (back[max(b,0)].status>0)); - - - // If link not found on the stack, it's because it has already been downloaded - // in background - // Then, skip it and go to the next one - if (b<0) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil); - test_flush; - } - - // prochain lien - // ptr++; - + char buff_err_msg[1024]; + htsmoduleStruct str; + htsmoduleStructExtended stre; + buff_err_msg[0] = '\0'; + memset(&str, 0, sizeof(str)); + memset(&stre, 0, sizeof(stre)); + /* */ + str.err_msg = buff_err_msg; + str.filename = savename; + str.mime = r.contenttype; + str.url_host = urladr; + str.url_file = urlfil; + str.size = (int) r.size; + /* */ + str.addLink = htsAddLink; + /* */ + str.liens = liens; + str.opt = &opt; + str.back = back; + str.back_max = back_max; + str.cache = &cache; + str.hashptr = hashptr; + str.numero_passe = numero_passe; + str.add_tab_alloc = add_tab_alloc; + /* */ + str.lien_tot_ = &lien_tot; + str.ptr_ = &ptr; + str.lien_size_ = &lien_size; + str.lien_buffer_ = &lien_buffer; + /* */ + /* */ + stre.r_ = &r; + /* */ + stre.error_ = &error; + stre.exit_xh_ = &exit_xh; + stre.store_errpage_ = &store_errpage; + /* */ + stre.base = base; + stre.codebase = codebase; + /* */ + stre.filters_ = &filters; + stre.filptr_ = &filptr; + stre.robots_ = &robots; + stre.hash_ = &hash; + stre.lien_max_ = &lien_max; + /* */ + stre.makeindex_done_ = &makeindex_done; + stre.makeindex_fp_ = &makeindex_fp; + stre.makeindex_links_ = &makeindex_links; + stre.makeindex_firstlink_ = makeindex_firstlink; + /* */ + stre.template_header_ = template_header; + stre.template_body_ = template_body; + stre.template_footer_ = template_footer; + /* */ + stre.stat_fragment_ = &stat_fragment; + stre.makestat_time = makestat_time; + stre.makestat_fp = makestat_fp; + stre.makestat_total_ = &makestat_total; + stre.makestat_lnk_ = &makestat_lnk; + stre.maketrack_fp = maketrack_fp; + /* FUNCTION DEPENDANT */ + stre.loc_ = loc; + stre.last_info_shell_ = &last_info_shell; + stre.info_shell_ = &info_shell; + + /* Parse */ + switch(hts_mirror_wait_for_next_file(&str, &stre)) { + case -1: + XH_uninit; + return -1; + break; + case 2: // Jump to 'continue' // This is one of the very very rare cases where goto // is acceptable @@ -1544,136 +1008,21 @@ int httpmirror(char* url1,httrackp* ptropt) { goto jump_if_done; } - -#if HTS_ANALYSTE==2 -#else - //if (!opt.quiet) { // petite animation - if (!opt.verbosedisplay) { - if (!opt.quiet) { - static int roll=0; /* static: ok */ - roll=(roll+1)%4; - printf("%c\x0d",("/-\\|")[roll]); - fflush(stdout); - } - } else if (opt.verbosedisplay==1) { - if (back[b].r.statuscode==200) - printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size); - else - printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size,back[b].r.statuscode); - fflush(stdout); - } - //} -#endif - // ------------------------------------------------------------ - // Vérificateur d'intégrité -#if DEBUG_CHECKINT - _CHECKINT(&back[b],"Retour de back_wait, après le while") - { - int i; - for(i=0;i<back_max;i++) { - char si[256]; - sprintf(si,"Test global après back_wait, index %d",i); - _CHECKINT(&back[i],si) - } - } -#endif - - // copier structure réponse htsblk - memcpy(&r, &(back[b].r), sizeof(htsblk)); - r.location=loc; // ne PAS copier location!! adresse, pas de buffer - if (back[b].r.location) - strcpy(r.location,back[b].r.location); - back[b].r.adr=NULL; // ne pas faire de desalloc ensuite - - // libérer emplacement backing - back_delete(back,b); - - // progression - if (opt.aff_progress) { - TStamp tl=time_local(); - if ((tl-HTS_STAT.stat_timestart)>0) { - char s[32]; - int i=0; - lastime=tl; - _CLRSCR; _GOTOXY("1","1"); - printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); - while(i<minimum(back_max,99)) { // ** - if (back[i].status>=0) { // loading.. - s[0]='\0'; - if (strlen(back[i].url_fil)>16) - strcat(s,back[i].url_fil+strlen(back[i].url_fil)-16); - else - strncat(s,back[i].url_fil,16); - printf("%s : ",s); - - printf("["); - if (back[i].r.totalsize>0) { - int p; - int j; - p=(int)((back[i].r.size*10)/back[i].r.totalsize); - p=minimum(10,p); - for(j=0;j<p;j++) printf("*"); - for(j=0;j<(10-p);j++) printf("-"); - } else { - printf(LLintP,back[i].r.size); - } - printf("]"); - - //} else if (back[i].status==0) { - // strcpy(s,"ENDED"); - } - printf("\n"); - i++; - } - io_flush; - } - } - - // débug graphique -#if BDEBUG==2 - { - char s[12]; - int i=0; - _GOTOXY(1,1); - printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); - while(i<minimum(back_max,160)) { - if (back[i].status>0) { - sprintf(s,"%d",back[i].r.size); - } else if (back[i].status==0) { - strcpy(s,"ENDED"); - } else - strcpy(s," - "); - while(strlen(s)<8) strcat(s," "); - printf("%s",s); io_flush; - i++; - } - } -#endif - - -#if BDEBUG==1 - printf("statuscode=%d with %s / msg=%s\n",r.statuscode,r.contenttype,r.msg); -#endif - - } - /*else { -#if BDEBUG==1 - printf("back index error\n"); -#endif } - */ + } // FIN --RECUPERATION LIEN--- // ------------------------------------------------------------ - - - + + + } else { // lien vide.. if (opt.errlog) { fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush; - error=1; } + error=1; + goto jump_if_done; } // test si url existe (non vide!) @@ -1706,7 +1055,7 @@ int httpmirror(char* url1,httrackp* ptropt) { } // ---fin tester taille a posteriori--- - + // -------------------- // BOGUS MIME TYPE HACK // Check if we have a bogus MIME type @@ -1724,39 +1073,82 @@ int httpmirror(char* url1,httrackp* ptropt) { if (strnotempty(r.cdispo)) { // Content-disposition set! if (ishtml(savename) == 0) { // Non HTML!! // patch it! - strcpy(r.contenttype,"application/octet-stream"); + strcpybuff(r.contenttype,"application/octet-stream"); } } } } } - } - - // ------------------------------------ - // BOGUS MIME TYPE HACK II (the revenge) - // Check if we have a bogus MIME type - if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */ - ) { - if ((r.adr) && (r.size)) { - unsigned int map[256]; - int i; - unsigned int nspec = 0; - map_characters((unsigned char*)r.adr, (unsigned int)r.size, (unsigned int*)map); - for(i = 1 ; i < 32 ; i++) { // null chars ignored.. - if (!is_realspace(i) - && i != 27 /* Damn you ISO2022-xx! */ + + // ------------------------------------ + // BOGUS MIME TYPE HACK II (the revenge) + // Check if we have a bogus MIME type + if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */ + ) { + if ((r.adr) && (r.size)) { + unsigned int map[256]; + int i; + unsigned int nspec = 0; + map_characters((unsigned char*)r.adr, (unsigned int)r.size, (unsigned int*)map); + for(i = 1 ; i < 32 ; i++) { // null chars ignored.. + if (!is_realspace(i) + && i != 27 /* Damn you ISO2022-xx! */ + ) { + nspec += map[i]; + } + } + /* On-the-fly UCS2 to ISO-8859-1 conversion (note: UCS2 should never be used on the net) */ + if ( + map[0] > r.size/10 + && + r.size % 2 == 0 + && + ( + ( ((unsigned char) r.adr[0]) == 0xff && ((unsigned char) r.adr[1]) == 0xfe) + || + ( ((unsigned char) r.adr[0]) == 0xfe && ((unsigned char) r.adr[1]) == 0xff) + ) ) { - nspec += map[i]; + int lost=0; + int i; + int swap = (r.adr[0] == 0xff); + for(i = 0 ; i < r.size / 2 - 1 ; i++) { + unsigned int unic = 0; + if (swap) + unic = (r.adr[i*2 + 2] << 8) + r.adr[i*2 + 2 + 1]; + else + unic = r.adr[i*2 + 2] + (r.adr[i*2 + 2 + 1] << 8); + if (unic <= 255) + r.adr[i] = (char) unic; + else { + r.adr[i] = '?'; + lost++; + } + } + r.size = r.size / 2 - 1; + r.adr[r.size] = '\0'; + + if (opt.errlog) { + fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File %s%s converted from UCS2 to 8-bit, %d characters lost during conversion (better to use UTF-8)"LF, urladr, urlfil, lost); + test_flush; + } + } else if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters + strcpybuff(r.contenttype,"application/octet-stream"); + if (opt.errlog) { + fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil); + test_flush; + } } - } - if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters - strcpy(r.contenttype,"application/octet-stream"); - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil); - test_flush; + + /* This hack allows to avoid problems with parsing '\0' characters */ + for(i = 0 ; i < r.size ; i++) { + if (r.adr[i] == '\0') r.adr[i] = ' '; } + } + + } } @@ -1776,8 +1168,15 @@ int httpmirror(char* url1,httrackp* ptropt) { if (fp) { r.adr=malloct((int)sz + 2); if (r.adr) { - fread(r.adr,(int)sz,1,fp); - r.size=sz; + if (fread(r.adr,1,(INTsys)sz,fp) == sz) { + r.size=sz; + } else { + freet(r.adr); + r.size=0; + r.adr = NULL; + r.statuscode=-1; + strcpybuff(r.msg, ".RAM read error"); + } fclose(fp); fp=NULL; // remove (temporary) file! @@ -1801,7 +1200,7 @@ int httpmirror(char* url1,httrackp* ptropt) { if (!error) { if (ptr>0) { if (liens[ptr]) { - cache_mayadd(&opt,&cache,&r,urladr,urlfil,savename); + xxcache_mayadd(&opt,&cache,&r,urladr,urlfil,savename); } else error=1; } @@ -1809,392 +1208,91 @@ int httpmirror(char* url1,httrackp* ptropt) { */ // ---fin stockage en cache--- - - - // DEBUT rattrapage des 301,302,307.. - // ------------------------------------------------------------ + + + /* + ************************************** + Check "Moved permanently" and other similar errors, retrying URLs if necessary and handling + redirect pages. + */ if (!error) { - ////////{ - // on a chargé un fichier en plus - // if (!error) stat_loaded+=r.size; + char buff_err_msg[1024]; + htsmoduleStruct str; + htsmoduleStructExtended stre; + buff_err_msg[0] = '\0'; + memset(&str, 0, sizeof(str)); + memset(&stre, 0, sizeof(stre)); + /* */ + str.err_msg = buff_err_msg; + str.filename = savename; + str.mime = r.contenttype; + str.url_host = urladr; + str.url_file = urlfil; + str.size = (int) r.size; + /* */ + str.addLink = htsAddLink; + /* */ + str.liens = liens; + str.opt = &opt; + str.back = back; + str.back_max = back_max; + str.cache = &cache; + str.hashptr = hashptr; + str.numero_passe = numero_passe; + str.add_tab_alloc = add_tab_alloc; + /* */ + str.lien_tot_ = &lien_tot; + str.ptr_ = &ptr; + str.lien_size_ = &lien_size; + str.lien_buffer_ = &lien_buffer; + /* */ + /* */ + stre.r_ = &r; + /* */ + stre.error_ = &error; + stre.exit_xh_ = &exit_xh; + stre.store_errpage_ = &store_errpage; + /* */ + stre.base = base; + stre.codebase = codebase; + /* */ + stre.filters_ = &filters; + stre.filptr_ = &filptr; + stre.robots_ = &robots; + stre.hash_ = &hash; + stre.lien_max_ = &lien_max; + /* */ + stre.makeindex_done_ = &makeindex_done; + stre.makeindex_fp_ = &makeindex_fp; + stre.makeindex_links_ = &makeindex_links; + stre.makeindex_firstlink_ = makeindex_firstlink; + /* */ + stre.template_header_ = template_header; + stre.template_body_ = template_body; + stre.template_footer_ = template_footer; + /* */ + stre.stat_fragment_ = &stat_fragment; + stre.makestat_time = makestat_time; + stre.makestat_fp = makestat_fp; + stre.makestat_total_ = &makestat_total; + stre.makestat_lnk_ = &makestat_lnk; + stre.maketrack_fp = maketrack_fp; - // ------------------------------------------------------------ - // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing - // ------------------------------------------------------------ - if ( (r.statuscode==301) - || (r.statuscode==302) - || (r.statuscode==303) - || (r.statuscode==307) - ) { - //if (r.adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi] - //int i=0; - char *rn=NULL; - // char* p; - - if ( (opt.debug>0) && (opt.errlog!=NULL) ) { - //if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"%s for %s%s"LF,r.msg,urladr,urlfil); - test_flush; - } - - - { - char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; - int get_it=0; // ne pas prendre le fichier à la même adresse par défaut - int reponse=0; - mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0'; - // - - strcpy(mov_url,r.location); - - // url qque -> adresse+fichier - if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) { - int set_prio_to=0; // pas de priotité fixéd par wizard - - //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue - // c'est (en gros) la même URL.. - // si c'est un problème de casse dans le host c'est que le serveur est buggé - // ("RFC says.." : host name IS case insensitive) - if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près - // on tourne en rond - if (strcmp(mov_fil,urlfil)==0) { - error=1; - get_it=-1; // ne rien faire - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Can not bear crazy server (%s) for %s%s"LF,r.msg,urladr,urlfil); - test_flush; - } - } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois - get_it=1; - } - } else { // adresse différente - if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible) - // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil); - test_flush; - } - // accepté? - if (hts_acceptlink(&opt,ptr,lien_tot,liens, - mov_adr,mov_fil, - &filters,&filptr,opt.maxfilter, - &robots, - &set_prio_to, - NULL) != 1) { /* nouvelle adresse non refusée ? */ - get_it=1; - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"moved link accepted: %s%s"LF,mov_adr,mov_fil); - test_flush; - } - } - } /* sinon traité normalement */ - } - - //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près - if (get_it==1) { - // court-circuiter le reste du traitement - // et reculer pour mieux sauter - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil); - test_flush; - } - // canceller lien actuel - error=1; - strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash) -#if HTS_HASH -#else - liens[ptr]->sav_len=-1; // taille invalide -#endif - // noter NOUVEAU lien - { - char mov_sav[HTS_URLMAXSIZE*2]; - // calculer lien et éventuellement modifier addresse/fichier - if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) { - if (hash_read(&hash,mov_sav,"",0)<0) { // n'existe pas déja - // enregistrer lien (MACRO) avec SAV IDENTIQUE - liens_record(mov_adr,mov_fil,liens[ptr]->sav,"",""); - //liens_record(mov_adr,mov_fil,mov_sav,"",""); - if (liens[lien_tot]!=NULL) { // OK, pas d'erreur - // mode test? - liens[lien_tot]->testmode=liens[ptr]->testmode; - liens[lien_tot]->link_import=0; // mode normal - if (!set_prio_to) - liens[lien_tot]->depth=liens[ptr]->depth; - else - liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page) - liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); - liens[lien_tot]->retry=liens[ptr]->retry; - liens[lien_tot]->premier=liens[ptr]->premier; - liens[lien_tot]->precedent=liens[ptr]->precedent; - lien_tot++; - } else { // oups erreur, plus de mémoire!! - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); - test_flush; - } - //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } - XH_uninit; // désallocation mémoire & buffers - return 0; - } - } else { - if ( (opt.debug>0) && (opt.errlog!=NULL) ) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil); - test_flush; - } - } - - } - } - - //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav); - - // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML - // sous DOS ca marche pas très bien... mais comme je suis génial url_savename() - // est à même de régler ce problème - } - } // ident_url_xx - - if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie) - rn=(char*) calloct(8192,1); - if (rn!=NULL) { - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url); - test_flush; - } - escape_uri(mov_url); - // On prépare une page qui sautera immédiatement sur la bonne URL - // Le scanner re-changera, ensuite, cette URL, pour la mirrorer! - strcpy(rn,"<HTML>"CRLF); - strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); - strcat(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF); - strcat(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL="); - strcat(rn,mov_url); // URL - strcat(rn,"\">"CRLF); - strcat(rn,"<A HREF=\""); - strcat(rn,mov_url); - strcat(rn,"\">"); - strcat(rn,"<B>Click here...</B></A>"CRLF); - strcat(rn,"</BODY>"CRLF); - strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); - strcat(rn,"</HTML>"CRLF); - - // changer la page - if (r.adr) { freet(r.adr); r.adr=NULL; } - r.adr=rn; - r.size=strlen(r.adr); - strcpy(r.contenttype,"text/html"); - } - } // get_it==0 - - } // bloc - // erreur HTTP (ex: 404, not found) - } else if ( - (r.statuscode==412) - || (r.statuscode==416) - ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier - if (fexist(liens[ptr]->sav)) { - remove(liens[ptr]->sav); // Eliminer - if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..) -#if HDEBUG - printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav); -#endif - if ( (opt.debug>1) && (opt.errlog!=NULL) ) { - //if (opt.errlog) { - fspc(opt.errlog,"debug"); fprintf(opt.errlog,"Partial file reget (%s) for %s%s"LF,r.msg,urladr,urlfil); - test_flush; - } - // enregistrer le MEME lien (MACRO) - liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"",""); - if (liens[lien_tot]!=NULL) { // OK, pas d'erreur - liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test? - liens[lien_tot]->link_import=0; // pas mode import - liens[lien_tot]->depth=liens[ptr]->depth; - liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); - liens[lien_tot]->retry=liens[ptr]->retry; - liens[lien_tot]->premier=liens[ptr]->premier; - liens[lien_tot]->precedent=ptr; - lien_tot++; - // - // canceller lien actuel - error=1; - strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash) -#if HTS_HASH -#else - liens[ptr]->sav_len=-1; // taille invalide -#endif - // - } else { // oups erreur, plus de mémoire!! - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); - test_flush; - } - //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } - XH_uninit; // désallocation mémoire & buffers - return 0; - } - } else { - if (opt.errlog!=NULL) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Can not remove old file %s"LF,urlfil); - test_flush; - } - } - } else { - if (opt.errlog!=NULL) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r.msg,urladr,urlfil); - test_flush; - } - } - } else if (r.statuscode!=200) { - int can_retry=0; - - // cas où l'on peut reessayer - // -2=timeout -3=rateout (interne à httrack) - switch(r.statuscode) { - //case -1: can_retry=1; break; - case -2: if (opt.hostcontrol) { // timeout et retry épuisés - if ((opt.hostcontrol & 1) && (liens[ptr]->retry<=0)) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; - } - host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr)); - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; - } - } else can_retry=1; - } else can_retry=1; - break; - case -3: if ((opt.hostcontrol) && (liens[ptr]->retry<=0)) { // too slow - if (opt.hostcontrol & 2) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; - } - host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr)); - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; - } - } else can_retry=1; - } else can_retry=1; - break; - case -4: // connect closed - can_retry=1; - break; - case -5: // other (non fatal) error - can_retry=1; - break; - case -6: // bad SSL handskake - can_retry=1; - break; - case 408: case 409: case 500: case 502: case 504: can_retry=1; - break; - } - - if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0) - if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible) - if (opt.errlog) { - if ((opt.retry>0) && (can_retry)){ - fspc(opt.errlog,"error"); - fprintf(opt.errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r.msg,r.statuscode,opt.retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); - } else { - if (r.statuscode==-10) { // test OK - if ((opt.debug>0) && (opt.errlog!=NULL)) { - fspc(opt.errlog,"info"); - fprintf(opt.errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); - } - } else { - if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut - fspc(opt.errlog,"error"); - fprintf(opt.errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r.msg,r.statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); - } else { - if (opt.debug>1) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"No robots.txt rules at %s"LF,urladr); - test_flush; - } - } - } - } - test_flush; - } - - // NO error in trop level - // due to the "no connection -> previous restored" hack - // This prevent the engine from wiping all data if the website has been deleted (or moved) - // since last time (which is quite annoying) - if (liens[ptr]->precedent != 0) { - // ici on teste si on doit enregistrer la page tout de même - if (opt.errpage) { - store_errpage=1; - } - } else { - if (strcmp(urlfil,"/robots.txt") != 0) { - /* - This is an error caused by a link entered by the user - That is, link(s) entered by user are invalid (404, 500, connect error, proxy error..) - If all links entered are invalid, the session failed and we will attempt to restore - the previous one - Example: Try to update a website which has been deleted remotely: this may delete - the website locally, which is really not desired (especially if the website disappeared!) - With this hack, the engine won't wipe local files (how clever) - */ - HTS_STAT.stat_errors_front++; - } - } - - } else { // retry!! - if (opt.debug>0 && opt.errlog != NULL) { // on fera un alert si le retry échoue - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r.statuscode,r.msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); - test_flush; - } - // redemander fichier - liens_record(urladr,urlfil,savename,"",""); - if (liens[lien_tot]!=NULL) { // OK, pas d'erreur - liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test? - liens[lien_tot]->link_import=0; // pas mode import - liens[lien_tot]->depth=liens[ptr]->depth; - liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); - liens[lien_tot]->retry=liens[ptr]->retry-1; // moins 1 retry! - liens[lien_tot]->premier=liens[ptr]->premier; - liens[lien_tot]->precedent=liens[ptr]->precedent; - lien_tot++; - } else { // oups erreur, plus de mémoire!! - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fspc(opt.errlog,"panic"); - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); - test_flush; - } - //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } - XH_uninit; // désallocation mémoire & buffers - return 0; - } - } - } else { - if (opt.errlog) { - if (opt.debug>1) { - fspc(opt.errlog,"info"); - fprintf(opt.errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil); - } - } - } - if (!store_errpage) { - if (r.adr) { freet(r.adr); r.adr=NULL; } // désalloc - error=1; // erreur! - } + /* Parse */ + if (hts_mirror_check_moved(&str, &stre) != 0) { + XH_uninit; + return -1; } - // FIN rattrapage des 301,302,307.. - // ------------------------------------------------------------ - + } - } // if !error } // if !error if (!error) { #if DEBUG_SHOWTYPES if (strstr(REG,r.contenttype)==NULL) { - strcat(REG,r.contenttype); - strcat(REG,"\n"); + strcatbuff(REG,r.contenttype); + strcatbuff(REG,"\n"); printf("%s\n",r.contenttype); io_flush; } @@ -2265,8 +1363,79 @@ int httpmirror(char* url1,httrackp* ptropt) { fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil); } { + char buff_err_msg[1024]; + htsmoduleStruct str; + htsmoduleStructExtended stre; + buff_err_msg[0] = '\0'; + memset(&str, 0, sizeof(str)); + memset(&stre, 0, sizeof(stre)); + /* */ + str.err_msg = buff_err_msg; + str.filename = savename; + str.mime = r.contenttype; + str.url_host = urladr; + str.url_file = urlfil; + str.size = (int) r.size; + /* */ + str.addLink = htsAddLink; + /* */ + str.liens = liens; + str.opt = &opt; + str.back = back; + str.back_max = back_max; + str.cache = &cache; + str.hashptr = hashptr; + str.numero_passe = numero_passe; + str.add_tab_alloc = add_tab_alloc; + /* */ + str.lien_tot_ = &lien_tot; + str.ptr_ = &ptr; + str.lien_size_ = &lien_size; + str.lien_buffer_ = &lien_buffer; + /* */ + /* */ + stre.r_ = &r; + /* */ + stre.error_ = &error; + stre.exit_xh_ = &exit_xh; + stre.store_errpage_ = &store_errpage; + /* */ + stre.base = base; + stre.codebase = codebase; + /* */ + stre.filters_ = &filters; + stre.filptr_ = &filptr; + stre.robots_ = &robots; + stre.hash_ = &hash; + stre.lien_max_ = &lien_max; + /* */ + stre.makeindex_done_ = &makeindex_done; + stre.makeindex_fp_ = &makeindex_fp; + stre.makeindex_links_ = &makeindex_links; + stre.makeindex_firstlink_ = makeindex_firstlink; + /* */ + stre.template_header_ = template_header; + stre.template_body_ = template_body; + stre.template_footer_ = template_footer; + /* */ + stre.stat_fragment_ = &stat_fragment; + stre.makestat_time = makestat_time; + stre.makestat_fp = makestat_fp; + stre.makestat_total_ = &makestat_total; + stre.makestat_lnk_ = &makestat_lnk; + stre.maketrack_fp = maketrack_fp; + + /* Parse */ + if (htsparse(&str, &stre) != 0) { + XH_uninit; + return -1; + } + + // I'll have to segment this part -#include "htsparse.c" +// #include "htsparse.c" + + } } // Fin parsing HTML @@ -2348,15 +1517,28 @@ int httpmirror(char* url1,httrackp* ptropt) { printf("robots.txt dump:\n%s\n",r.adr); #endif do { + char* comm; + int llen; bptr+=binput(r.adr+bptr, line, sizeof(line) - 2); + /* strip comment */ + comm=strchr(line, '#'); + if (comm != NULL) { + *comm = '\0'; + } + /* strip spaces */ + llen=strlen(line); + while(llen > 0 && is_realspace(line[llen - 1])) { + line[llen - 1] = '\0'; + llen--; + } if (strfield(line,"user-agent:")) { char* a; a=line+11; - while(*a==' ') a++; // sauter espace(s) - if (*a == '*') { + while(is_realspace(*a)) a++; // sauter espace(s) + if ( *a == '*') { if (record != 2) record=1; // c pour nous - } else if (strfield(a,"httrack")) { + } else if (strfield(a,"httrack") || strfield(a,"winhttrack") || strfield(a,"webhttrack")) { buff[0]='\0'; // re-enregistrer infobuff[0]='\0'; record=2; // locked @@ -2367,23 +1549,18 @@ int httpmirror(char* url1,httrackp* ptropt) { else record=0; } else if (record) { if (strfield(line,"disallow:")) { - char* a; - a=strchr(line,'#'); - if (a) *a='\0'; - while((line[strlen(line)-1]==' ') - || (line[strlen(line)-1]==10) - || (line[strlen(line)-1]==13)) - line[strlen(line)-1]='\0'; // supprimer espaces - a=line+9; - while((*a==' ') || (*a==10) || (*a==13)) + char* a=line+9; + while(is_realspace(*a)) a++; // sauter espace(s) if (strnotempty(a)) { if (strcmp(a,"/") != 0) { /* ignoring disallow: / */ if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) { - strcat(buff,a); - strcat(buff,"\n"); - if (strnotempty(infobuff)) strcat(infobuff,", "); - strcat(infobuff,a); + strcatbuff(buff,a); + strcatbuff(buff,"\n"); + if ( (strlen(infobuff) + strlen(a) + 8) < sizeof(infobuff)) { + if (strnotempty(infobuff)) strcatbuff(infobuff,", "); + strcatbuff(infobuff,a); + } } } else { if (opt.errlog!=NULL) { @@ -2428,8 +1605,8 @@ int httpmirror(char* url1,httrackp* ptropt) { char tempo[HTS_URLMAXSIZE*2]; FILE* fp; tempo[0]='\0'; - strcpy(tempo,savename); - strcat(tempo,".readme"); + strcpybuff(tempo,savename); + strcatbuff(tempo,".readme"); #if HTS_DOSNAME // remplacer / par des slash arrière @@ -2445,7 +1622,7 @@ int httpmirror(char* url1,httrackp* ptropt) { #endif if ((fp=fopen(tempo,"wb"))!=NULL) { - fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION""CRLF""CRLF); + fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, WHAT_is_available); fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename); fprintf(fp,"Some links contained in it may be unreachable locally."CRLF); fprintf(fp,"If you want to get these files, you have to set an upper recurse level, "); @@ -2454,7 +1631,7 @@ int httpmirror(char* url1,httrackp* ptropt) { #if HTS_WIN==0 chmod(tempo,HTS_ACCESS_FILE); #endif - usercommand(0,NULL,antislash(tempo)); + usercommand(&opt,0,NULL,fconv(tempo),"",""); } @@ -2473,7 +1650,9 @@ int httpmirror(char* url1,httrackp* ptropt) { fspc(opt.log,"debug"); fprintf(opt.log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil); test_flush; } - freet(r.adr); r.adr=NULL; + if (r.adr) { + freet(r.adr); r.adr=NULL; + } } } @@ -2481,9 +1660,17 @@ int httpmirror(char* url1,httrackp* ptropt) { // ATTENTION C'EST ICI QU'ON SAUVE LE FICHIER!! if (r.adr) { - if (filesave(r.adr,(int)r.size,savename)!=0) { + if (filesave(&opt,r.adr,(int)r.size,savename,urladr,urlfil)!=0) { + int fcheck; + if ((fcheck=check_fatal_io_errno())) { + exit_xh=-1; /* fatal error */ + } if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s"LF,savename); + fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s : %s"LF, savename, strerror(errno)); + if (fcheck) { + fspc(opt.errlog,"error"); + fprintf(opt.errlog,"* * Fatal write error, giving up"LF); + } test_flush; } } else { @@ -2520,284 +1707,71 @@ int httpmirror(char* url1,httrackp* ptropt) { } } } else */ - if (opt.parsejava) { - if (strlen(savename)>6) { // fichier.class - if (strfield(savename+strlen(savename)-6,".class")) { // ok c'est une classe - if (fexist(savename)) { // ok, existe bien! - char err_msg[1100]; - int r; - err_msg[0]='\0'; - - //##char* buffer; - // JavaParsing f34R! - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing %s"LF,savename); test_flush; - } - - //##buffer=(char*) malloct(32768); - //##if (buffer) { - // - //##strcpy(buffer,"$BUFFER$"); - //##hts_add_file(buffer); // déclarer buffer - while(hts_add_file(NULL,-1) >= 0); // clear chain - - r=hts_parse_java(savename,(char*) &err_msg); // parsing - if (!r) { // error - if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to parse java file %s : %s"LF,savename,err_msg); - test_flush; - } - } else { // ok - char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2],save[HTS_URLMAXSIZE*2]; // nom du fichier à sauver dans la boucle - char codebase[HTS_URLMAXSIZE*2]; // codebase classe java - char lien[HTS_URLMAXSIZE*2]; - //##char* a; - int file_position; - int pass_fix,prio_fix; - codebase[0]='\0'; - // - - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing finished, now copying links.."LF); test_flush; - } - // recopie de "creer le lien" - // - - // adr = c'est la même - // fil et save: save2 et fil2 - prio_fix=maximum(liens[ptr]->depth-1,0); - pass_fix=max(liens[ptr]->pass2,numero_passe); - if (liens[ptr]->cod) strcpy(codebase,liens[ptr]->cod); // codebase valable pour tt les classes descendantes - if (strnotempty(codebase)==0) { // pas de codebase, construire - char* a; - strcpy(codebase,liens[ptr]->fil); - a=codebase+strlen(codebase)-1; - while((*a) && (*a!='/') && ( a > codebase)) a--; - if (*a=='/') - *(a+1)='\0'; // couper - } else { // couper http:// éventuel - if (strfield(codebase,"http://")) { - char tempo[HTS_URLMAXSIZE*2]; - char* a=codebase+7; - a=strchr(a,'/'); // après host - if (a) { // ** msg erreur et vérifier? - strcpy(tempo,a); - strcpy(codebase,tempo); // couper host - } else { - if (opt.errlog) { - fprintf(opt.errlog,"Unexpected strstr error in base %s"LF,codebase); - test_flush; - } - } - } - } - //##a=buffer; - //##strcat(buffer,"&"); // fin du buffer - if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long - if (opt.errlog) { - fprintf(opt.errlog,"Codebase too long, parsing skipped (%s)"LF,codebase); - test_flush; - } - //##a=NULL; - while(hts_add_file(NULL,-1) >= 0); // clear chain - } - while ( (file_position=hts_add_file(lien,-1)) >= 0 ) { - int dejafait=0; - /* //## - char* b; - - // prochain fichier à noter! - lien[0]='\0'; - b=strchr(a,'&'); // marqueur de fin de chaine (voir hts_add_file) - if (b) { - if ( ( ((int) b-(int) a) + strlen(codebase)) < HTS_URLMAXSIZE) - strncat(lien,a,(int) b-(int) a); // nom du fichier - else { - if (opt.errlog) { - fprintf(opt.errlog,"Error: Java-Parser generated link that exceeds %d bytes"LF,HTS_URLMAXSIZE); - test_flush; - } - } - } else a=NULL; - - if (strnotempty(lien)==0) a=NULL; // fin - if (a) - a=b+1; - */ - - if (strnotempty(lien)) { - - // calculer les chemins et noms de sauvegarde - if (ident_url_relatif(lien,urladr,codebase,adr,fil)>=0) { // reformage selon chemin - int r; - - // patcher opt pour garder structure originale!! (on ne patche pas les noms dans la classe java!) - //##if (!strstr(lien,"://")) { // PAS tester les http://.. inutile (on ne va pas patcher le binaire :-( ) - if (1) { - char tempo[HTS_URLMAXSIZE*2]; - int a,b; - tempo[0]='\0'; - a=opt.savename_type; - b=opt.savename_83; - opt.savename_type=0; - opt.savename_83=0; - // note: adr,fil peuvent être patchés - r=url_savename(adr,fil,save,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe); - opt.savename_type=a; - opt.savename_83=b; - if (r != -1) { - if (savename) { - if (lienrelatif(tempo,save,savename)==0) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); - test_flush; - } - // - // xxc xxc xxc xxc TODO java: - // rebuild the java class with patched strings... - // - if (strlen(tempo)<=strlen(lien)) { - FILE* fp=fopen(savename,"r+b"); - if (fp) { - if (!fseek(fp,file_position,SEEK_SET)) { - //unsigned short int string_length=strlen(tempo); - //fwrite(&valint,sizeof(string_length),1,fp); - // xxc xxc ARGH! SI la taille est <, décaler le code ?! - } else { - if (opt.log!=NULL) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to patch: %s"LF,savename); - test_flush; - } - } - fclose(fp); - } else { - if (opt.log!=NULL) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to open: %s"LF,savename); - test_flush; - } - } - } else { - if (opt.log!=NULL) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): link too long, unable to write it: %s"LF,tempo); - test_flush; - } - } - } - } - } - } else { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): file not caught: %s"LF,lien); test_flush; - } - r=-1; - } - // - if (r != -1) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush; - } - - // modifié par rapport à l'autre version (cf prio_fix notamment et save2) - - // vérifier que le lien n'a pas déja été noté - // si c'est le cas, alors il faut s'assurer que la priorité associée - // au fichier est la plus grande des deux priorités - // - // On part de la fin et on essaye de se presser (économise temps machine) -#if HTS_HASH - { - int i=hash_read(&hash,save,"",0); // lecture type 0 (sav) - if (i>=0) { - liens[i]->depth=maximum(liens[i]->depth,prio_fix); - dejafait=1; - } - } -#else - { - int l; - int i; - l=strlen(save); - for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) { - if (liens[i]->sav_len==l) { // même taille de chaîne - if (strcmp(liens[i]->sav,save)==0) { // existe déja - liens[i]->depth=maximum(liens[i]->depth,prio_fix); - dejafait=1; - } - } - } - } -#endif - - - if (!dejafait) { - // - // >>>> CREER LE LIEN JAVA <<<< - - // enregistrer fichier de java (MACRO) - liens_record(adr,fil,save,"",""); - if (liens[lien_tot]==NULL) { // erreur, pas de place réservée - printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); - test_flush; - } - // if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } - XH_extuninit; // désallocation mémoire & buffers - return 0; - } - - // mode test? - liens[lien_tot]->testmode=0; // pas mode test - - liens[lien_tot]->link_import=0; // pas mode import - - // écrire autres paramètres de la structure-lien - //if (meme_adresse) - liens[lien_tot]->premier=liens[ptr]->premier; - //else // sinon l'objet père est le précédent lui même - // liens[lien_tot]->premier=ptr; - - liens[lien_tot]->precedent=ptr; - // noter la priorité - liens[lien_tot]->depth=prio_fix; - liens[lien_tot]->pass2=max(pass_fix,numero_passe); - liens[lien_tot]->retry=opt.retry; - - //strcpy(liens[lien_tot]->adr,adr); - //strcpy(liens[lien_tot]->fil,fil); - //strcpy(liens[lien_tot]->sav,save); - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); - test_flush; - } - - lien_tot++; // UN LIEN DE PLUS - } - } - } - - } - } - - } - //##// effacer buffer temporaire - //##if (buffer) freet(buffer); buffer=NULL; - //##} // if buffer - } // if exist - } // if .class - } // if strlen-savename - } // if opt.parsejava - - + + + /* External modules */ + if (opt.parsejava && fexist(savename)) { + char buff_err_msg[1024]; + htsmoduleStruct str; + buff_err_msg[0] = '\0'; + memset(&str, 0, sizeof(str)); + /* */ + str.err_msg = buff_err_msg; + str.filename = savename; + str.mime = r.contenttype; + str.url_host = urladr; + str.url_file = urlfil; + str.size = (int) r.size; + /* */ + str.addLink = htsAddLink; + /* */ + str.liens = liens; + str.opt = &opt; + str.back = back; + str.back_max = back_max; + str.cache = &cache; + str.hashptr = hashptr; + str.numero_passe = numero_passe; + str.add_tab_alloc = add_tab_alloc; + /* */ + str.lien_tot_ = &lien_tot; + str.ptr_ = &ptr; + str.lien_size_ = &lien_size; + str.lien_buffer_ = &lien_buffer; + /* Parse if recognized */ + switch(hts_parse_externals(&str)) { + case 1: + if ((opt.debug>1) && (opt.log!=NULL)) { + fspc(opt.log,"debug"); fprintf(opt.log,"(External module): parsed successfully %s"LF,savename); test_flush; + } + break; + case 0: + if ((opt.debug>1) && (opt.log!=NULL)) { + fspc(opt.log,"debug"); fprintf(opt.log,"(External module): couldn't parse successfully %s : %s"LF,savename, str.err_msg); test_flush; + } + break; + } + } + } // text/html ou autre + + /* Post-processing */ + if (fexist(savename)) { + usercommand(&opt, 0, NULL, savename, urladr, urlfil); + } + + } // if !error jump_if_done: // libérer les liens - if (r.adr) { freet(r.adr); r.adr=NULL; } // libérer la mémoire! + if (r.adr) { + freet(r.adr); + r.adr=NULL; + } // libérer la mémoire! // prochain lien ptr++; @@ -2826,23 +1800,22 @@ jump_if_done: } } } - + + // copy abort state if necessary from outside + if (!exit_xh && opt.state.exit_xh) { + exit_xh=opt.state.exit_xh; + } // a-t-on dépassé le quota? - if ((opt.maxsite>0) && (HTS_STAT.stat_bytes>=opt.maxsite)) { - if (opt.errlog) { - fprintf(opt.errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,opt.maxsite); - test_flush; - } - ptr=lien_tot; - } else if ((opt.maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt.maxtime)) { - if (opt.errlog) { - fprintf(opt.errlog,"More than %d seconds passed.. giving up"LF,opt.maxtime); - test_flush; - } + if (!back_checkmirror(&opt)) { ptr=lien_tot; } else if (exit_xh) { // sortir if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF); + fspc(opt.errlog,"info"); + if (exit_xh==1) { + fprintf(opt.errlog,"Exit requested by shell or user"LF); + } else { + fprintf(opt.errlog,"Exit requested by engine"LF); + } test_flush; } ptr=lien_tot; @@ -2910,15 +1883,15 @@ jump_if_done: if ((new_lst) && (sz>0)) { char* adr=(char*) malloct((INTsys)sz); if (adr) { - if ((int) fread(adr,1,(INTsys)sz,new_lst) == sz) { + if (fread(adr,1,(INTsys)sz,new_lst) == sz) { char line[1100]; int purge=0; while(!feof(old_lst)) { linput(old_lst,line,1000); if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau? char file[HTS_URLMAXSIZE*2]; - strcpy(file,opt.path_html); - strcat(file,line+1); + strcpybuff(file,opt.path_html); + strcatbuff(file,line+1); file[strlen(file)-1]='\0'; if (fexist(file)) { // toujours sur disque: virer if (opt.log) { @@ -2940,8 +1913,8 @@ jump_if_done: if (strnotempty(line)) if (!strstr(adr,line)) { // non trouvé? char file[HTS_URLMAXSIZE*2]; - strcpy(file,opt.path_html); - strcat(file,line+1); + strcpybuff(file,opt.path_html); + strcatbuff(file,line+1); while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait) purge=1; if (opt.log) { @@ -2987,15 +1960,39 @@ jump_if_done: int warning = fspc(NULL,"warning"); int info = fspc(NULL,"info"); char htstime[256]; + char infoupdated[256]; // int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart)); - int n=(int) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart))); + LLint n=(LLint) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart))); sec2str(htstime,time_local()-HTS_STAT.stat_timestart); //fprintf(opt.log,LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n); - fprintf(opt.log,LF"HTTrack mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]",htstime,(int)lien_tot-1,(int)HTS_STAT.stat_files,(int)HTS_STAT.stat_bytes,(int)HTS_STAT.HTS_TOTAL_RECV,(int)n); - if (HTS_STAT.total_packed) { + infoupdated[0] = '\0'; + if (opt.is_update) { + if (HTS_STAT.stat_updated_files < 0) { + sprintf(infoupdated, ", %d files updated", (int)HTS_STAT.stat_updated_files); + } else { + sprintf(infoupdated, ", no files updated"); + } + } + fprintf(opt.log,LF + "HTTrack mirror complete in %s : " + "%d links scanned, %d files written ("LLintP" bytes overall)%s " + "["LLintP" bytes received at "LLintP" bytes/sec]", + htstime, + (int)lien_tot-1, + (int)HTS_STAT.stat_files, + (LLint)HTS_STAT.stat_bytes, + infoupdated, + (LLint)HTS_STAT.HTS_TOTAL_RECV, + (LLint)n + ); + if (HTS_STAT.total_packed > 0 && HTS_STAT.total_unpacked > 0) { int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked); - fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,packed_ratio); + fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio); + } + if (!opt.nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) { + int rq = (HTS_STAT.stat_nrequests * 10) / HTS_STAT.stat_sockid; + fprintf(opt.log,", %d.%d requests per connection", rq/10, rq%10); } fprintf(opt.log,LF); if (error) @@ -3029,10 +2026,12 @@ jump_if_done: } #endif // fin afficher résumé dans log - - // désallocation mémoire & buffers - XH_uninit + // ending + usercommand(&opt,0,NULL,NULL,NULL,NULL); + + // désallocation mémoire & buffers + XH_uninit; return 1; // OK } @@ -3108,8 +2107,12 @@ fprintf(debug_fp,"resync timer 1\n"); fflush(debug_fp); } +#define _FILTERS (*opt->filters.filters) +#define _FILTERS_PTR (opt->filters.filptr) +#define _ROBOTS ((robots_wizard*)opt->robotsptr) + // bannir host (trop lent etc) -void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host) { +void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char* host) { //int l; int i; @@ -3117,26 +2120,26 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac return; // erreur.. déja cancellé.. bizarre.. devrait pas arriver /* sanity check */ - if (*filptr + 1 >= opt->maxfilter) { + if (*_FILTERS_PTR + 1 >= opt->maxfilter) { opt->maxfilter += HTS_FILTERSINC; - if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) { - printf("PANIC! : Too many filters : >%d [%d]\n",*filptr,__LINE__); + if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) { + printf("PANIC! : Too many filters : >%d [%d]\n",*_FILTERS_PTR,__LINE__); if (opt->errlog) { - fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*filptr); + fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*_FILTERS_PTR); fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); fflush(opt->errlog); } - abort(); + assertf("too many filters - giving up" == NULL); } - //opt->filters.filters=&filters; } // interdire host - if (*filptr < filter_max) { - strcpy(filters[*filptr],"-"); - strcat(filters[*filptr],host); - strcat(filters[*filptr],"/*"); // host/ * interdit - (*filptr)++; *filptr=minimum(*filptr,filter_max); + assertf((*_FILTERS_PTR) < opt->maxfilter); + if (*_FILTERS_PTR < opt->maxfilter) { + strcpybuff(_FILTERS[*_FILTERS_PTR],"-"); + strcatbuff(_FILTERS[*_FILTERS_PTR],host); + strcatbuff(_FILTERS[*_FILTERS_PTR],"/*"); // host/ * interdit + (*_FILTERS_PTR)++; } // oups @@ -3161,7 +2164,7 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=-2; // timeout (peu importe si c'est un traffic jam) - strcpy(back[i].r.msg,"Link Cancelled by host control"); + strcpybuff(back[i].r.msg,"Link Cancelled by host control"); if ((opt->debug>1) && (opt->log!=NULL)) { fprintf(opt->log,"Shutdown: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; @@ -3183,7 +2186,7 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac if ((opt->debug>1) && (opt->log!=NULL)) { fprintf(opt->log,"Cancel: %s%s"LF,liens[i]->adr,liens[i]->fil); test_flush; } - strcpy(liens[i]->adr,"!"); // cancel (invalide hash) + strcpybuff(liens[i]->adr,"!"); // cancel (invalide hash) #if HTS_HASH #else liens[i]->sav_len=-1; // taille invalide @@ -3194,7 +2197,7 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac if (opt->log!=NULL) { char dmp[1040]; dmp[0]='\0'; - strncat(dmp,liens[i]->adr,1024); + strncatbuff(dmp,liens[i]->adr,1024); fprintf(opt->log,"WARNING! HostCancel detected memory leaks [len %d at %d]"LF,l,i); test_flush; fprintf(opt->log,"dump 1024 bytes (address %p): "LF"%s"LF,liens[i]->adr,dmp); test_flush; } @@ -3214,42 +2217,46 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac } +#if 0 /* Init structure */ /* 1 : init */ /* -1 : off */ -char* structcheck_init(int init) { - char** structcheck_buff; - int* structcheck_buff_size; - NOSTATIC_RESERVE(structcheck_buff, char*, 1); - NOSTATIC_RESERVE(structcheck_buff_size, int, 1); - if (init < 2) { +/* 0 : query */ +/* 2 : LOCK */ +/* -2 : UNLOCK */ +void* structcheck_init(int init) { + int structcheck_size = 1024; + inthash structcheck_hash=NULL; + /* */ + static PTHREAD_LOCK_TYPE structcheck_init_mutex; + static int structcheck_init_mutex_init=0; + + if (init == 1 || init == -1) { if (init) { - if (*structcheck_buff) - freet(*structcheck_buff); - *structcheck_buff=NULL; + if (structcheck_hash) + inthash_delete(&structcheck_hash); + structcheck_hash=NULL; } if (init != -1) { - if (*structcheck_buff==NULL) { - *structcheck_buff_size = 65536; - *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx - if (*structcheck_buff) - strcpy(*structcheck_buff,"#"); + if (structcheck_init_mutex_init == 0) { + htsSetLock(&structcheck_init_mutex, -999); + structcheck_init_mutex_init=1; } - } - } else { /* Ensure enough room */ - if (*structcheck_buff_size < init) { - *structcheck_buff_size = init + 65536; - *structcheck_buff=(char*) realloct(*structcheck_buff, *structcheck_buff_size); - if (*structcheck_buff == NULL) { /* Reset :( */ - *structcheck_buff_size = 65536; - *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx - if (*structcheck_buff) - strcpy(*structcheck_buff,"#"); + if (structcheck_hash==NULL) { + structcheck_hash=inthash_new(structcheck_size); // désalloué xh_xx } } } - return *structcheck_buff; + + /* Lock / Unlock */ + if (init == 2) { // Lock + htsSetLock(&structcheck_init_mutex, 1); + } else if (init == -2) { // Unlock + htsSetLock(&structcheck_init_mutex, 0); + } + return structcheck_hash; } +#endif int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { char** filters = *ptrfilters; @@ -3291,94 +2298,78 @@ int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { } // vérifier présence de l'arbo -int structcheck(char* s) { +HTSEXT_API int structcheck(char* s) { // vérifier la présence des dossier(s) char *a=s; char nom[HTS_URLMAXSIZE*2]; char *b; - char* structcheck_buff=NULL; + //inthash structcheck_hash=NULL; if (strnotempty(s)==0) return 0; if (strlen(s)>HTS_URLMAXSIZE) return 0; // Get buffer address - structcheck_buff=structcheck_init(0); - if (!structcheck_buff) + /* + structcheck_hash = (inthash)structcheck_init(0); + if (structcheck_hash == NULL) { return -1; - - if (strlen(structcheck_buff) > 65000) { - strcpy(structcheck_buff,"#"); // réinit.. c'est idiot ** ** } - - if (structcheck_buff) { - b=nom; - do { - if (*a) *b++=*a++; - while((*a!='/') && (*a!='\0')) *b++=*a++; - *b='\0'; // pas de ++ pour boucler - if (*a=='/') { // toujours dossier - if (strnotempty(nom)) { - char tempo[HTS_URLMAXSIZE*2]; - - strcpy(tempo,"#"); strcat(tempo,nom); strcat(tempo,"#"); - if (strstr(structcheck_buff,tempo)==NULL) { // non encore créé - - /* Check room */ - structcheck_init(strlen(structcheck_buff) + strlen(nom) + 8192); - if (!structcheck_buff) - return -1; + */ - strcat(structcheck_buff,"#"); strcat(structcheck_buff,nom); strcat(structcheck_buff,"#"); // ajouter à la liste - + b=nom; + do { + if (*a) *b++=*a++; + while((*a!='/') && (*a!='\0')) *b++=*a++; + *b='\0'; // pas de ++ pour boucler + if (*a=='/') { // toujours dossier + if (strnotempty(nom)) { + //if (inthash_write(structcheck_hash, nom, 1)) { // non encore créé #if HTS_WIN - if (mkdir(fconv(nom))!=0) + if (mkdir(fconv(nom))!=0) #else - if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0) + if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0) #endif - { + { #if HTS_REMOVE_ANNOYING_INDEX - // might be a filename with same name than this folder - // then, remove it to allow folder creation - // it happends when servers gives a folder index while - // requesting / page - // -> if the file can be opened (not a folder) then rename it - FILE* fp=fopen(fconv(nom),"ab"); - if (fp) { - fclose(fp); - rename(fconv(nom),fconcat(fconv(nom),".txt")); - } - // if it fails, that's too bad + // might be a filename with same name than this folder + // then, remove it to allow folder creation + // it happends when servers gives a folder index while + // requesting / page + // -> if the file can be opened (not a folder) then rename it + if (fexist(fconv(nom))) { + rename(fconv(nom),fconcat(fconv(nom),".txt")); + } + // if it fails, that's too bad #if HTS_WIN - mkdir(fconv(nom)); + mkdir(fconv(nom)); #else - mkdir(fconv(nom),HTS_ACCESS_FOLDER); + mkdir(fconv(nom),HTS_ACCESS_FOLDER); #endif #endif - // Si existe déja renvoie une erreur.. tant pis - } + // Si existe déja renvoie une erreur.. tant pis + } #if HTS_WIN==0 - chmod(fconv(nom),HTS_ACCESS_FOLDER); + /*chmod(fconv(nom),HTS_ACCESS_FOLDER);*/ #endif - } - } - *b++=*a++; // slash - } - } while(*a); - } + //} + } + *b++=*a++; // slash + } + } while(*a); return 0; } // sauver un fichier -int filesave(char* adr,int len,char* s) { +int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr,char* url_fil) { FILE* fp; // écrire le fichier if ((fp=filecreate(s))!=NULL) { int nl=0; if (len>0) { - nl=(int) fwrite(adr,1,len,fp); + nl=(int) fwrite(adr,1,(INTsys)len,fp); } fclose(fp); - usercommand(0,NULL,antislash(s)); + //xxusercommand(opt,0,NULL,fconv(s),url_adr,url_fil); if (nl!=len) // erreur return -1; } else @@ -3387,6 +2378,24 @@ int filesave(char* adr,int len,char* s) { return 0; } +/* We should stop */ +int check_fatal_io_errno(void) { + switch(errno) { +#ifdef EMFILE + case EMFILE: /* Too many open files */ +#endif +#ifdef ENOSPC + case ENOSPC: /* No space left on device */ +#endif +#ifdef EROFS + case EROFS: /* Read-only file system */ +#endif + return 1; + break; + } + return 0; +} + // ouvrir un fichier (avec chemin Un*x) FILE* filecreate(char* s) { @@ -3397,8 +2406,8 @@ FILE* filecreate(char* s) { // noter lst filenote(s,NULL); - // if (*s=='/') strcpy(fname,s+1); else strcpy(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path> - strcpy(fname,s); + // if (*s=='/') strcpybuff(fname,s+1); else strcpybuff(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path> + strcpybuff(fname,s); #if HTS_DOSNAME // remplacer / par des slash arrière @@ -3413,13 +2422,14 @@ FILE* filecreate(char* s) { // a partir d'ici le slash devient antislash #endif - // construite le chemin si besoin est - if (structcheck(s)!=0) { - return NULL; - } - // ouvrir fp=fopen(fname,"wb"); + if (fp == NULL) { + // construire le chemin si besoin est + (void)structcheck(s); + fp=fopen(fname,"wb"); + } + #if HTS_WIN==0 if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE); #endif @@ -3450,16 +2460,16 @@ int filenote(char* s,filecreate_params* params) { // gestion du fichier liste liste if (params) { //filecreate_params* p = (filecreate_params*) params; - strcpy(strc->path,params->path); + strcpybuff(strc->path,params->path); strc->lst=params->lst; return 0; } else if (strc->lst) { char savelst[HTS_URLMAXSIZE*2]; - strcpy(savelst,fslash(s)); + strcpybuff(savelst,fslash(s)); // couper chemin? if (strnotempty(strc->path)) { if (strncmp(fslash(strc->path),savelst,strlen(strc->path))==0) { // couper - strcpy(savelst,s+strlen(strc->path)); + strcpybuff(savelst,s+strlen(strc->path)); } } fprintf(strc->lst,"[%s]"LF,savelst); @@ -3469,30 +2479,35 @@ int filenote(char* s,filecreate_params* params) { } // executer commande utilisateur +static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil); typedef struct { int exe; char cmd[2048]; } usercommand_strc; -HTS_INLINE void usercommand(int _exe,char* _cmd,char* file) { +HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* adr,char* fil) { usercommand_strc* strc; NOSTATIC_RESERVE(strc, usercommand_strc, 1); - + + /* Callback */ if (_exe) { - strcpy(strc->cmd,_cmd); + strcpybuff(strc->cmd,_cmd); if (strnotempty(strc->cmd)) strc->exe=_exe; else strc->exe=0; } + /* post-processing */ + postprocess_file(opt, file, adr, fil); + #if HTS_ANALYSTE - if (hts_htmlcheck_filesave) - if (strnotempty(file)) + if (hts_htmlcheck_filesave != NULL) + if (file != NULL && strnotempty(file)) hts_htmlcheck_filesave(file); #endif if (strc->exe) { - if (strnotempty(file)) { + if (file != NULL && strnotempty(file)) { if (strnotempty(strc->cmd)) { usercommand_exe(strc->cmd,file); } @@ -3507,16 +2522,119 @@ void usercommand_exe(char* cmd,char* file) { // for(i=0;i<(int) strlen(cmd);i++) { if ((cmd[i]=='$') && (cmd[i+1]=='0')) { - strcat(temp,file); + strcatbuff(temp,file); i++; } else { c[0]=cmd[i]; c[1]='\0'; - strcat(temp,c); + strcatbuff(temp,c); } } system(temp); } + +static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { + int first = 0; + /* MIME-html archive to build */ + if (opt != NULL && opt->mimehtml) { + if (adr != NULL && strcmp(adr, "primary") == 0) { + adr = NULL; + } + if (save != NULL && opt != NULL && adr != NULL && adr[0] && strnotempty(save) && fexist(save)) { + char* rsc_save = save; + char* rsc_fil = strrchr(fil, '/'); + int n; + if (rsc_fil == NULL) + rsc_fil = fil; + if (strncmp(fslash(save), fslash(opt->path_html), (n = (int)strlen(opt->path_html))) == 0) { + rsc_save += n; + } + + if (!opt->state.mimehtml_created) { + first = 1; + opt->state.mimefp = fopen(fconcat(opt->path_html,"index.mht"), "wb"); + if (opt->state.mimefp != NULL) { + char rndtmp[1024], currtime[256]; + srand(time(NULL)); + time_gmt_rfc822(currtime); + sprintf(rndtmp, "%d_%d", (int)time(NULL), (int) rand()); + sprintf(opt->state.mimemid, "----=_MIMEPart_%s_=----", rndtmp); + fprintf(opt->state.mimefp, "From: HTTrack Website Copier <nobody@localhost>\r\n" + "Subject: Local mirror\r\n" + "Date: %s\r\n" + "Message-ID: <httrack_%s@localhost>\r\n" + "Content-Type: multipart/related;\r\n" + "\tboundary=\"%s\";\r\n" + "\ttype=\"text/html\"\r\n" + "MIME-Version: 1.0\r\n" + "\r\nThis message is a RFC MIME-compliant multipart message.\r\n" + "\r\n" + , currtime, rndtmp, opt->state.mimemid); + opt->state.mimehtml_created = 1; + } else { + opt->state.mimehtml_created = -1; + if ( opt->errlog != NULL ) { + fspc(opt->errlog,"error"); fprintf(opt->log,"unable to create index.mht"LF); + } + } + } + if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) { + FILE* fp = fopen(save, "rb"); + if (fp != NULL) { + char buff[60*100 + 2]; + char mimebuff[256]; + char cid[HTS_URLMAXSIZE*3]; + int len; + int isHtml = ( ishtml(save) == 1 ); + mimebuff[0] = '\0'; + + /* CID */ + strcpybuff(cid, adr); + strcatbuff(cid, fil); + escape_in_url(cid); + { char* a = cid; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } + + guess_httptype(mimebuff, save); + fprintf(opt->state.mimefp, "--%s\r\n", opt->state.mimemid); + /*if (first) + fprintf(opt->state.mimefp, "Content-disposition: inline\r\n"); + else*/ + fprintf(opt->state.mimefp, "Content-disposition: attachment; filename=\"%s\"\r\n", rsc_save); + fprintf(opt->state.mimefp, + "Content-Type: %s\r\n" + "Content-Transfer-Encoding: %s\r\n" + /*"Content-Location: http://localhost/%s\r\n"*/ + "Content-ID: <%s>\r\n" + "\r\n" + , mimebuff + , isHtml ? "8bit" : "base64" + /*, rsc_save*/ + , cid); + while((len = fread(buff, 1, sizeof(buff) - 2, fp)) > 0) { + buff[len] = '\0'; + if (!isHtml) { + char base64buff[60*100*2]; + code64((unsigned char*)buff, len, (unsigned char*)base64buff, 1); + fprintf(opt->state.mimefp, "%s", base64buff); + } else { + fprintf(opt->state.mimefp, "%s", buff); + } + } + fclose(fp); + fprintf(opt->state.mimefp, "\r\n\r\n"); + } + } + } else if (save == NULL) { + if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) { + fprintf(opt->state.mimefp, + "--%s--\r\n", opt->state.mimemid); + fclose(opt->state.mimefp); + opt->state.mimefp = NULL; + } + } + } +} + // écrire n espaces dans fp typedef struct { int error; @@ -3534,6 +2652,10 @@ HTS_INLINE int fspc(FILE* fp,char* type) { struct tm* A; tt=time(NULL); A=localtime(&tt); + if (A == NULL) { + int localtime_returned_null=0; + assert(localtime_returned_null); + } strftime(s,250,"%H:%M:%S",A); if (strnotempty(type)) fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1); @@ -3611,9 +2733,10 @@ HTS_INLINE int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_bac // remplir backing int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) { int n; + int oneLess = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links // ajouter autant de socket qu'on peut ajouter - n=opt->maxsoc-back_nsoc(back,back_max); + n=opt->maxsoc-back_nsoc(back,back_max) - oneLess; // vérifier qu'il restera assez de place pour les tests ensuite (en théorie, 1 entrée libre restante suffirait) n=min( n, back_available(back,back_max) - 8 ); @@ -3633,7 +2756,7 @@ int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_ /* on a déja parcouru */ if (p<cache->ptr_ant) p=cache->ptr_ant; - while( (p<lien_tot) && (n>0) ) { + while( (p<lien_tot) && (n>0) && back_checkmirror(opt)) { //while((p<lien_tot) && (n>0) && (p < ptr+opt->maxcache_anticipate)) { int ok=1; @@ -3750,6 +2873,7 @@ void sig_ask( int code ) { // demander httrackp* opt=hts_declareoptbuffer(NULL); if (opt) { // ask for stop + printf("finishing pending transfers.. please wait\n"); opt->state.stop=1; } signal(code,sig_ask); // remettre signal @@ -3762,14 +2886,16 @@ void sig_ask( int code ) { // demander void sig_ignore( int code ) { // ignorer signal } void sig_brpipe( int code ) { // treat if necessary + /* if (!sig_ignore_flag(-1)) { sig_term(code); } + */ } void sig_doback(int blind) { // mettre en backing int out=-1; // - printf("\nMoving to background to complete the mirror...\n"); fflush(stdout); + printf("\nMoving into background to complete the mirror...\n"); fflush(stdout); { httrackp* opt=hts_declareoptbuffer(NULL); @@ -3839,6 +2965,29 @@ HTS_INLINE int check_stdin(void) { #endif #endif +HTS_INLINE int check_sockerror(T_SOC s) { + fd_set fds; + struct timeval tv; + FD_ZERO(&fds); + FD_SET((T_SOC) s,&fds); + tv.tv_sec=0; + tv.tv_usec=0; + select(s+1,NULL,NULL,&fds,&tv); + return FD_ISSET(s,&fds); +} + +/* check incoming data */ +HTS_INLINE int check_sockdata(T_SOC s) { + fd_set fds; + struct timeval tv; + FD_ZERO(&fds); + FD_SET((T_SOC) s,&fds); + tv.tv_sec=0; + tv.tv_usec=0; + select(s+1,&fds,NULL,NULL,&tv); + return FD_ISSET(s,&fds); +} + // Attente de touche #if HTS_ANALYSTE int ask_continue(void) { @@ -3896,12 +3045,18 @@ char* next_token(char* p,int flag) { if (c) { char tempo[8192]; tempo[0]=c; tempo[1]='\0'; - strcat(tempo,p+2); - strcpy(p,tempo); + strcatbuff(tempo,p+2); + strcpybuff(p,tempo); } } } else if (*p==34) { // guillemets (de fin) + char tempo[8192]; + tempo[0]='\0'; + strcatbuff(tempo,p+1); + strcpybuff(p,tempo); /* wipe "" */ + p--; + /* */ quote=!quote; } else if (*p==32) { @@ -3920,18 +3075,18 @@ char* next_token(char* p,int flag) { #if HTS_ANALYSTE // canceller un fichier (noter comme cancellable) // !!NOT THREAD SAFE!! -char* hts_cancel_file(char * s) { +HTSEXT_API char* hts_cancel_file(char * s) { static char sav[HTS_URLMAXSIZE*2]=""; if (s[0]!='\0') if (sav[0]=='\0') - strcpy(sav,s); + strcpybuff(sav,s); return sav; } -void hts_cancel_test(void) { +HTSEXT_API void hts_cancel_test(void) { if (_hts_in_html_parsing==2) _hts_cancel=2; } -void hts_cancel_parsing(void) { +HTSEXT_API void hts_cancel_parsing(void) { if (_hts_in_html_parsing) _hts_cancel=1; } @@ -3940,7 +3095,7 @@ void hts_cancel_parsing(void) { // i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel) // if (back[i].status>=0) { // signifie "lien actif" - +#if 0 /* hts_add_file, add/get elements in the add chain for java parsing if file_position >= 0 @@ -3958,7 +3113,7 @@ typedef struct addfile_chain { struct addfile_chain* next; } addfile_chain; typedef addfile_chain* addfile_chain_ptr; -int hts_add_file(char* file,int file_position) { +int opt->(char* file,int file_position) { addfile_chain** chain; NOSTATIC_RESERVE(chain, addfile_chain_ptr, 1); @@ -3977,7 +3132,7 @@ int hts_add_file(char* file,int file_position) { (*current)->name[0]='\0'; } if (*current) { - strcpy((*current)->name,file); + strcpybuff((*current)->name,file); (*current)->pos=file_position; return 1; } else { @@ -3995,7 +3150,7 @@ int hts_add_file(char* file,int file_position) { current=&( (*current)->next ); /* 'next' address */ } if (file) - strcpy(file,(*current)->name); + strcpybuff(file,(*current)->name); pos=(*current)->pos; freet(*current); *current=NULL; @@ -4006,11 +3161,12 @@ int hts_add_file(char* file,int file_position) { return 0; } +#endif #if HTS_ANALYSTE // en train de parser un fichier html? réponse: % effectués // flag>0 : refresh demandé -int hts_is_parsing(int flag) { +HTSEXT_API int hts_is_parsing(int flag) { if (_hts_in_html_parsing) { // parsing? if (flag>=0) _hts_in_html_poll=1; // faudrait un tit refresh return max(_hts_in_html_done,1); // % effectués @@ -4018,24 +3174,29 @@ int hts_is_parsing(int flag) { return 0; // non } } -int hts_is_testing(void) { // 0 non 1 test 2 purge +HTSEXT_API int hts_is_testing(void) { // 0 non 1 test 2 purge if (_hts_in_html_parsing==2) return 1; else if (_hts_in_html_parsing==3) return 2; + else if (_hts_in_html_parsing==4) + return 3; return 0; } +HTSEXT_API int hts_is_exiting(void) { + return exit_xh; +} // message d'erreur? char* hts_errmsg(void) { return _hts_errmsg; } // mode pause transfer -int hts_setpause(int p) { +HTSEXT_API int hts_setpause(int p) { if (p>=0) _hts_setpause=p; return _hts_setpause; } // ask for termination -int hts_request_stop(int force) { +HTSEXT_API int hts_request_stop(int force) { httrackp* opt=hts_declareoptbuffer(NULL); if (opt) { opt->state.stop=1; @@ -4044,7 +3205,7 @@ int hts_request_stop(int force) { } // régler en cours de route les paramètres réglables.. // -1 : erreur -int hts_setopt(httrackp* set_opt) { +HTSEXT_API int hts_setopt(httrackp* set_opt) { if (set_opt) { httrackp* engine_opt=hts_declareoptbuffer(NULL); if (engine_opt) { @@ -4056,16 +3217,16 @@ int hts_setopt(httrackp* set_opt) { } // ajout d'URL // -1 : erreur -int hts_addurl(char** url) { +HTSEXT_API int hts_addurl(char** url) { if (url) _hts_addurl=url; return (_hts_addurl!=NULL); } -int hts_resetaddurl(void) { +HTSEXT_API int hts_resetaddurl(void) { _hts_addurl=NULL; return (_hts_addurl!=NULL); } // copier nouveaux paramètres si besoin -int copy_htsopt(httrackp* from,httrackp* to) { +HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { if (from->maxsite > -1) to->maxsite = from->maxsite; @@ -4094,7 +3255,7 @@ int copy_htsopt(httrackp* from,httrackp* to) { to->maxrate = from->maxrate; if (strnotempty(from->user_agent)) - strcpy(to->user_agent , from->user_agent); + strcpybuff(to->user_agent , from->user_agent); if (from->retry > -1) to->retry = from->retry; @@ -4124,6 +3285,248 @@ int copy_htsopt(httrackp* from,httrackp* to) { #endif // +/* External modules callback */ +int htsAddLink(htsmoduleStruct* str, char* link) { + if (link != NULL && str != NULL && link[0] != '\0') { + lien_url** liens = (lien_url**) str->liens; + httrackp* opt = (httrackp*) str->opt; + lien_back* back = (lien_back*) str->back; + cache_back* cache = (cache_back*) str->cache; + hash_struct* hashptr = (hash_struct*) str->hashptr; + int back_max = str->back_max; + int numero_passe = str->numero_passe; + int add_tab_alloc = str->add_tab_alloc; + /* */ + int lien_tot = * ( (int*) (str->lien_tot_) ); + int ptr = * ( (int*) (str->ptr_) ); + int lien_size = * ( (int*) (str->lien_size_) ); + char* lien_buffer = * ( (char**) (str->lien_buffer_) ); + /* */ + /* */ + char adr[HTS_URLMAXSIZE*2], + fil[HTS_URLMAXSIZE*2], + save[HTS_URLMAXSIZE*2]; + char codebase[HTS_URLMAXSIZE*2]; + /* */ + int pass_fix, prio_fix; + /* */ + int forbidden_url = 1; + + codebase[0]='\0'; + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(module): adding link : '%s'"LF, link); test_flush; + } + // recopie de "creer le lien" + // + +#if HTS_ANALYSTE + if (!hts_htmlcheck_linkdetected(link)) { + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF, link); + test_flush; + } + return 0; + } +#endif + + // adr = c'est la même + // fil et save: save2 et fil2 + prio_fix=maximum(liens[ptr]->depth-1,0); + pass_fix=max(liens[ptr]->pass2,numero_passe); + if (liens[ptr]->cod) strcpybuff(codebase,liens[ptr]->cod); // codebase valable pour tt les classes descendantes + if (strnotempty(codebase)==0) { // pas de codebase, construire + char* a; + if (str->relativeToHtmlLink == 0) + strcpybuff(codebase,liens[ptr]->fil); + else + strcpybuff(codebase,liens[liens[ptr]->precedent]->fil); + a=codebase+strlen(codebase)-1; + while((*a) && (*a!='/') && ( a > codebase)) a--; + if (*a=='/') + *(a+1)='\0'; // couper + } else { // couper http:// éventuel + if (strfield(codebase,"http://")) { + char tempo[HTS_URLMAXSIZE*2]; + char* a=codebase+7; + a=strchr(a,'/'); // après host + if (a) { // ** msg erreur et vérifier? + strcpybuff(tempo,a); + strcpybuff(codebase,tempo); // couper host + } else { + if (opt->errlog) { + fprintf(opt->errlog,"Unexpected strstr error in base %s"LF,codebase); + test_flush; + } + } + } + } + + if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long + if (opt->errlog) { + fprintf(opt->errlog,"Codebase too long, parsing skipped (%s)"LF,codebase); + test_flush; + } + } + + { + char* lien = link; + int dejafait=0; + + if (strnotempty(lien) && strlen(lien) < HTS_URLMAXSIZE) { + + // calculer les chemins et noms de sauvegarde + if (ident_url_relatif(lien,urladr,codebase,adr,fil)>=0) { // reformage selon chemin + int r; + int set_prio_to = 0; + int just_test_it = 0; + forbidden_url = hts_acceptlink(opt, ptr, lien_tot, liens, + adr,fil, + &set_prio_to, + &just_test_it); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard external module link: %d"LF,forbidden_url); + test_flush; + } + + /* Link accepted */ + if (!forbidden_url) { + char tempo[HTS_URLMAXSIZE*2]; + int a,b; + tempo[0]='\0'; + a=opt->savename_type; + b=opt->savename_83; + opt->savename_type=0; + opt->savename_83=0; + // note: adr,fil peuvent être patchés + r=url_savename(adr,fil,save,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hashptr,ptr,numero_passe); + opt->savename_type=a; + opt->savename_83=b; + if (r != -1) { + if (savename) { + if (lienrelatif(tempo,save,savename)==0) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(module): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); + test_flush; + if (str->localLink && str->localLinkSize > (int) strlen(tempo) + 1) { + strcpybuff(str->localLink, tempo); + } + } + } + } + } + } else { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(module): file not caught: %s"LF,lien); test_flush; + } + if (str->localLink && str->localLinkSize > (int) ( strlen(adr) + strlen(fil) + 8 ) ) { + str->localLink[0] = '\0'; + if (!link_has_authority(adr)) + strcpybuff(str->localLink,"http://"); + strcatbuff(str->localLink, adr); + strcatbuff(str->localLink, fil); + } + r=-1; + } + // + if (r != -1) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(module): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush; + } + + // modifié par rapport à l'autre version (cf prio_fix notamment et save2) + + // vérifier que le lien n'a pas déja été noté + // si c'est le cas, alors il faut s'assurer que la priorité associée + // au fichier est la plus grande des deux priorités + // + // On part de la fin et on essaye de se presser (économise temps machine) +#if HTS_HASH + { + int i=hash_read(hashptr,save,"",0,opt->urlhack); // lecture type 0 (sav) + if (i>=0) { + liens[i]->depth=maximum(liens[i]->depth,prio_fix); + dejafait=1; + } + } +#else + { + int l; + int i; + l=strlen(save); + for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) { + if (liens[i]->sav_len==l) { // même taille de chaîne + if (strcmp(liens[i]->sav,save)==0) { // existe déja + liens[i]->depth=maximum(liens[i]->depth,prio_fix); + dejafait=1; + } + } + } + } +#endif + + + if (!dejafait) { + // + // >>>> CREER LE LIEN JAVA <<<< + + // enregistrer fichier (MACRO) + liens_record(adr,fil,save,"","",opt->urlhack); + if (liens[lien_tot]==NULL) { // erreur, pas de place réservée + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + exit_xh=-1; /* fatal error -> exit */ + return 0; + } + + // mode test? + liens[lien_tot]->testmode=0; // pas mode test + + liens[lien_tot]->link_import=0; // pas mode import + + // écrire autres paramètres de la structure-lien + //if (meme_adresse) + liens[lien_tot]->premier=liens[ptr]->premier; + //else // sinon l'objet père est le précédent lui même + // liens[lien_tot]->premier=ptr; + + liens[lien_tot]->precedent=ptr; + // noter la priorité + if (!set_prio_to) + liens[lien_tot]->depth=prio_fix; + else + liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1)); // PRIORITE NULLE (catch page) + liens[lien_tot]->pass2=max(pass_fix,numero_passe); + liens[lien_tot]->retry=opt->retry; + + //strcpybuff(liens[lien_tot]->adr,adr); + //strcpybuff(liens[lien_tot]->fil,fil); + //strcpybuff(liens[lien_tot]->sav,save); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(module): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); + test_flush; + } + + lien_tot++; // UN LIEN DE PLUS + } + } + } + } + } + + /* Apply changes */ + * ( (int*) (str->lien_tot_) ) = lien_tot; + * ( (int*) (str->ptr_) ) = ptr; + * ( (int*) (str->lien_size_) ) = lien_size; + * ( (char**) (str->lien_buffer_) ) = lien_buffer; + return (forbidden_url == 0); + } + return 0; +} + diff --git a/src/htscore.h b/src/htscore.h index a50aac8..d9e5d0a 100644 --- a/src/htscore.h +++ b/src/htscore.h @@ -55,8 +55,10 @@ Please visit our Website: http://www.httrack.com #include <direct.h> #else #include <signal.h> +#ifdef HAVE_UNISTD_H #include <unistd.h> #endif +#endif /* END specific definitions */ @@ -69,13 +71,13 @@ Please visit our Website: http://www.httrack.com typedef struct { char firstblock; // flag 1=premier malloc char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down - short int depth; // profondeur autorisée lien ; >0 forte 0=faible - short int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background + int depth; // profondeur autorisée lien ; >0 forte 0=faible + int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background int premier; // pointeur sur le premier lien qui a donné lieu aux autres liens du domaine int precedent; // pointeur sur le lien qui a donné lieu à ce lien précis - //int moved; // pointeur sur moved - short int retry; // nombre de retry restants - short int testmode; // mode test uniquement, envoyer juste un head! + //int moved; // pointeur sur moved + int retry; // nombre de retry restants + int testmode; // mode test uniquement, envoyer juste un head! char* adr; // adresse char* fil; // nom du fichier distant char* sav; // nom à sauver sur disque (avec chemin éventuel) @@ -101,10 +103,11 @@ typedef struct { char referer_adr[HTS_URLMAXSIZE*2]; // adresse host page referer char referer_fil[HTS_URLMAXSIZE*2]; // fichier page referer char location_buffer[HTS_URLMAXSIZE*2]; // "location" en cas de "moved" (302,..) - char tmpfile[HTS_URLMAXSIZE*2]; // nom à sauver temporairement (compressé) + char* tmpfile; // nom à sauver temporairement (compressé) + char tmpfile_buffer[HTS_URLMAXSIZE*2]; // buffer pour le nom à sauver temporairement char send_too[1024]; // données à envoyer en même temps que le header - int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours) - int testmode; // mode de test + int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours) + int testmode; // mode de test int timeout; // gérer des timeouts? (!=0 : nombre de secondes) TStamp timeout_refresh; // si oui, time refresh int rateout; // timeout refresh? (!=0 : taux minimum toléré en octets/s) @@ -112,20 +115,23 @@ typedef struct { LLint maxfile_nonhtml; // taille max d'un fichier non html LLint maxfile_html; // idem pour un ficheir html htsblk r; // structure htsblk de chaque objet en background - short int is_update; // mode update + int is_update; // mode update int head_request; // requète HEAD? LLint range_req_size; // range utilisé + TStamp ka_time_start; // refresh time for KA // int http11; // L'en tête doit être signé HTTP/1.1 et non HTTP/1.0 int is_chunk; // chunk? char* chunk_adr; // adresse chunk en cours de chargement LLint chunk_size; // taille chunk en cours de chargement + LLint chunk_blocksize; // taille data declaree par le chunk LLint compressed_size; // taille compressés (stats uniquement) // - short int* pass2_ptr; // pointeur sur liens[ptr]->pass2 + int* pass2_ptr; // pointeur sur liens[ptr]->pass2 // - char info[256]; // éventuel status pour le ftp - int stop_ftp; // flag stop pour ftp + char info[256]; // éventuel status pour le ftp + int stop_ftp; // flag stop pour ftp + int finalized; // finalized (optim memory) #if DEBUG_CHECKINT char magic2; #endif @@ -136,6 +142,7 @@ typedef struct { int version; // 0 ou 1 /* */ int type; + int ro; FILE *dat,*ndx,*olddat; char *use; // liste des adr+fil FILE *lst; // liste des fichiers pour la "purge" @@ -209,6 +216,7 @@ typedef struct { // gestion hashage #include "htshash.h" +#include "htsinthash.h" // gestion réentrance #include "htsnostatic.h" @@ -236,42 +244,57 @@ typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url typedef char* (* t_hts_htmlcheck_query)(char* question); typedef char* (* t_hts_htmlcheck_query2)(char* question); typedef char* (* t_hts_htmlcheck_query3)(char* question); -typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,LLint stat_bytes,LLint stat_bytes_recv,int stat_time,int stat_nsocket, LLint stat_written, int stat_updated, int stat_errors, int irate, int nbk ); +typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); typedef void (* t_hts_htmlcheck_pause)(char* lockfile); +typedef void (* t_hts_htmlcheck_filesave)(char* file); +typedef int (* t_hts_htmlcheck_linkdetected)(char* link); +typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); +typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); +typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); +typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); */ // demande d'interaction avec le shell #if HTS_ANALYSTE //char HTbuff[1024]; /* -extern t_hts_htmlcheck_init hts_htmlcheck_init; -extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; -extern t_hts_htmlcheck_start hts_htmlcheck_start; -extern t_hts_htmlcheck_end hts_htmlcheck_end; -extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; -extern t_hts_htmlcheck hts_htmlcheck; -extern t_hts_htmlcheck_query hts_htmlcheck_query; -extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; -extern t_hts_htmlcheck_query3 hts_htmlcheck_query3; -extern t_hts_htmlcheck_loop hts_htmlcheck_loop; -extern t_hts_htmlcheck_check hts_htmlcheck_check; -extern t_hts_htmlcheck_pause hts_htmlcheck_pause; +extern t_hts_htmlcheck_init hts_htmlcheck_init; +extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; +extern t_hts_htmlcheck_start hts_htmlcheck_start; +extern t_hts_htmlcheck_end hts_htmlcheck_end; +extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; +extern t_hts_htmlcheck hts_htmlcheck; +extern t_hts_htmlcheck_query hts_htmlcheck_query; +extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; +extern t_hts_htmlcheck_query3 hts_htmlcheck_query3; +extern t_hts_htmlcheck_loop hts_htmlcheck_loop; +extern t_hts_htmlcheck_check hts_htmlcheck_check; +extern t_hts_htmlcheck_pause hts_htmlcheck_pause; +extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; +extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; +extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; +extern t_hts_htmlcheck_savename hts_htmlcheck_savename; +extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; +extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; */ // -int hts_is_parsing(int flag); -int hts_is_testing(void); -int hts_setopt(httrackp* opt); -int hts_addurl(char** url); -int hts_resetaddurl(void); -int copy_htsopt(httrackp* from,httrackp* to); -char* hts_errmsg(void); -int hts_setpause(int); // pause transfer -int hts_request_stop(int force); +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_is_parsing(int flag); +HTSEXT_API int hts_is_testing(void); +HTSEXT_API int hts_is_exiting(void); +HTSEXT_API int hts_setopt(httrackp* opt); +HTSEXT_API int hts_addurl(char** url); +HTSEXT_API int hts_resetaddurl(void); +HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to); +HTSEXT_API char* hts_errmsg(void); +HTSEXT_API int hts_setpause(int); // pause transfer +HTSEXT_API int hts_request_stop(int force); // -char* hts_cancel_file(char * s); -void hts_cancel_test(void); -void hts_cancel_parsing(void); +HTSEXT_API char* hts_cancel_file(char * s); +HTSEXT_API void hts_cancel_test(void); +HTSEXT_API void hts_cancel_parsing(void); +#endif // // Variables globales extern int _hts_in_html_parsing; @@ -291,17 +314,20 @@ extern int _hts_cancel; //int httpmirror(char* url,int level,httrackp opt); int httpmirror(char* url1,httrackp* opt); -int filesave(char* adr,int len,char* s); +int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr /* = NULL */,char* url_fil /* = NULL */); +int check_fatal_io_errno(void); int engine_stats(void); -void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host); +void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char* host); FILE* filecreate(char* s); int filecreateempty(char* filename); int filenote(char* s,filecreate_params* params); -HTS_INLINE void usercommand(int exe,char* cmd,char* file); +HTS_INLINE void usercommand(httrackp* opt,int exe,char* cmd,char* file,char* adr,char* fil); void usercommand_exe(char* cmd,char* file); -char* structcheck_init(int init); +//void* structcheck_init(int init); int filters_init(char*** ptrfilters, int maxfilter, int filterinc); -int structcheck(char* s); +#ifndef HTTRACK_DEFLIB +HTSEXT_API int structcheck(char* s); +#endif HTS_INLINE int fspc(FILE* fp,char* type); char* next_token(char* p,int flag); // @@ -322,16 +348,20 @@ int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,li // cancel file #if HTS_ANALYSTE -char* hts_cancel_file(char * s); -void hts_cancel_test(void); -void hts_cancel_parsing(void); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* hts_cancel_file(char * s); +HTSEXT_API void hts_cancel_test(void); +HTSEXT_API void hts_cancel_parsing(void); +#endif #endif int ask_continue(void); int nombre_digit(int n); // Java +#if 0 int hts_add_file(char* file,int file_position); +#endif // Polling #if HTS_POLL @@ -339,6 +369,8 @@ HTS_INLINE int check_flot(T_SOC s); HTS_INLINE int check_stdin(void); int read_stdin(char* s,int max); #endif +HTS_INLINE int check_sockerror(T_SOC s); +HTS_INLINE int check_sockdata(T_SOC s); httrackp* hts_declareoptbuffer(httrackp* optdecl); void sig_finish( int code ); // finir et quitter @@ -353,6 +385,9 @@ void sig_brpipe( int code ); // treat if necessary void sig_doback(int); // mettre en arrière plan #endif +/* external modules */ +int htsAddLink(htsmoduleStruct* str, char* link); + // Void void voidf(void); diff --git a/src/htscoremain.c b/src/htscoremain.c index a03635f..1162c18 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -42,15 +42,19 @@ Please visit our Website: http://www.httrack.com #include "htsdefines.h" #include "htsalias.h" #include "htswrap.h" +#include "htsmodules.h" + #include <ctype.h> #if HTS_WIN #else #ifndef HTS_DO_NOT_USE_UID /* setuid */ #include <pwd.h> +#ifdef HAVE_UNISTD_H #include <unistd.h> #endif #endif +#endif extern int exit_xh; // sortir prématurément @@ -61,7 +65,7 @@ extern int IPV6_resolver; // Add a command in the argc/argv #define cmdl_add(token,argc,argv,buff,ptr) \ argv[argc]=(buff+ptr); \ - strcpy(argv[argc],token); \ + strcpybuff(argv[argc],token); \ ptr += (strlen(argv[argc])+2); \ argc++ @@ -73,15 +77,56 @@ extern int IPV6_resolver; argv[i]=argv[i-1];\ } \ argv[0]=(buff+ptr); \ - strcpy(argv[0],token); \ + strcpybuff(argv[0],token); \ ptr += (strlen(argv[0])+2); \ argc++ #define htsmain_free() do { if (url != NULL) { free(url); } } while(0) +#define ensureUrlCapacity(url, urlsize, size) do { \ + if (urlsize < size || url == NULL) { \ + urlsize = size; \ + if (url == NULL) { \ + url = (char*) malloct(urlsize); \ + if (url != NULL) url[0]='\0'; \ + } else { \ + url = (char*) realloct(url, urlsize); \ + } \ + if (url == NULL) { \ + HTS_PANIC_PRINTF("* memory exhausted"); \ + htsmain_free(); \ + return -1; \ + } \ + } \ +} while(0) + +static void set_wrappers(void) { +#if HTS_ANALYSTE + // custom wrappers + hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); + hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); + hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); + hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); + hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); + hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); + hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); + hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); + hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); + hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); + hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); + hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); + hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); + hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); + hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); + hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); + hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header"); + hts_htmlcheck_receivehead = (t_hts_htmlcheck_receivehead) htswrap_read("receive-header"); +#endif +} + // Main, récupère les paramètres et appelle le robot #if HTS_ANALYSTE -int hts_main(int argc, char **argv) { +HTSEXT_API int hts_main(int argc, char **argv) { #else int main(int argc, char **argv) { #endif @@ -96,6 +141,7 @@ int main(int argc, char **argv) { int argv_url=-1; // ==0 : utiliser cache et doit.log char* argv_firsturl=NULL; // utilisé pour nommage par défaut char* url = NULL; // URLS séparées par un espace + int url_sz = 65535; //char url[65536]; // URLS séparées par un espace // the parametres httrackp httrack; @@ -113,33 +159,12 @@ int main(int argc, char **argv) { int switch_chroot=0; /* chroot ? */ #endif // - url = malloc(65536); - if (url == NULL) { - HTS_PANIC_PRINTF("* memory exhausted"); - htsmain_free(); - return -1; - } - url[0]='\0'; + ensureUrlCapacity(url, url_sz, 65536); // #if HTS_ANALYSTE // custom wrappers - hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); - hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); - hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); - hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); - hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); - hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); - hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); - hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); - hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); - hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); - hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); - hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); - hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); - hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); - hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); - hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); + set_wrappers(); #endif // options par défaut @@ -157,7 +182,7 @@ int main(int argc, char **argv) { httrack.maxsite=-1; // taille max site (aucune) httrack.maxfile_nonhtml=-1; // taille max fichier non html httrack.maxfile_html=-1; // idem pour html - httrack.maxsoc=8; // nbre socket max + httrack.maxsoc=4; // nbre socket max httrack.fragment=-1; // pas de fragmentation httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents" httrack.makeindex=1; // faire un index @@ -169,10 +194,12 @@ int main(int argc, char **argv) { httrack.cache=1; // cache prioritaire httrack.shell=0; // pas de shell par defaut httrack.proxy.active=0; // pas de proxy + strcpybuff(httrack.proxy.bindhost, ""); // bind default host httrack.user_agent_send=1; // envoyer un user-agent - strcpy(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); + strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); httrack.savename_83=0; // noms longs par défaut httrack.savename_type=0; // avec structure originale + httrack.mimehtml=0; // pas MIME-html httrack.parsejava=1; // parser classes httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer httrack.retry=2; // 2 retry par défaut @@ -187,26 +214,29 @@ int main(int argc, char **argv) { httrack.accept_cookie=1; // gérer les cookies httrack.cookie=NULL; httrack.http10=0; // laisser http/1.1 + httrack.nokeepalive = 0; // pas keep-alive httrack.nocompression=0; // pas de compression httrack.tolerant=0; // ne pas accepter content-length incorrect httrack.parseall=1; // tout parser (tags inconnus, par exemple) httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur httrack.verbosedisplay=0; // pas d'animation texte - strcpy(httrack.footer,HTS_DEFAULT_FOOTER); + httrack.sizehack=0; // size hack + httrack.urlhack=1; // url hack (normalizer) + strcpybuff(httrack.footer,HTS_DEFAULT_FOOTER); httrack.ftp_proxy=1; // proxy http pour ftp - strcpy(httrack.filelist,""); - strcpy(httrack.lang_iso,"en, *"); - strcpy(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) + strcpybuff(httrack.filelist,""); + strcpybuff(httrack.lang_iso,"en, *"); + strcpybuff(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) // httrack.log=stdout; httrack.errlog=stderr; httrack.flush=1; // flush sur les fichiers log - httrack.aff_progress=0; + //httrack.aff_progress=0; httrack.keyboard=0; // - strcpy(httrack.path_html,""); - strcpy(httrack.path_log,""); - strcpy(httrack.path_bin,""); + strcpybuff(httrack.path_html,""); + strcpybuff(httrack.path_log,""); + strcpybuff(httrack.path_bin,""); // httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb) httrack.maxfilter=200; // 200 filtres max par défaut @@ -222,8 +252,10 @@ int main(int argc, char **argv) { httrack.dir_topindex=0; // do not built top index (yet) // httrack.state.stop=0; // stopper + httrack.state.exit_xh=0; // abort // _DEBUG_HEAD=0; // pas de debuggage en têtes + #if HTS_WIN #if HTS_ANALYSTE!=2 @@ -269,9 +301,9 @@ int main(int argc, char **argv) { lien_back r; char* path; FILE* fp; - strcpy(r.url_adr,argv[2]); - strcpy(r.url_fil,argv[3]); - strcpy(r.url_sav,argv[4]); + strcpybuff(r.url_adr,argv[2]); + strcpybuff(r.url_fil,argv[3]); + strcpybuff(r.url_sav,argv[4]); path=argv[5]; r.status=1000; run_launch_ftp(&r); @@ -298,11 +330,11 @@ int main(int argc, char **argv) { char* a; if ((a=strrchr(path,'/'))) { httrack.path_bin[0]='\0'; - strncat(httrack.path_bin,argv[0],(int) a - (int) path); + strncatbuff(httrack.path_bin,argv[0],(int) a - (int) path); } } #else - strcpy(httrack.path_bin,HTS_HTTRACKDIR); + strcpybuff(httrack.path_bin, HTS_HTTRACKDIR); #endif @@ -316,7 +348,7 @@ int main(int argc, char **argv) { while( (a=strchr(argv[na],9)) ) *a=' '; /* equivalent to "empty parameter" */ if ((strcmp(argv[na],HTS_NOPARAM)==0) || (strcmp(argv[na],HTS_NOPARAM2)==0)) // (none) - strcpy(argv[na],"\"\""); + strcpybuff(argv[na],"\"\""); if (strncmp(argv[na],"-&",2)==0) argv[na][1]='%'; } @@ -402,6 +434,11 @@ int main(int argc, char **argv) { argv_url=-1; /* forcer */ httrack.quiet=1; } + } else if (strcmp(tmp_argv[0] + 2,"quiet") == 0) { + httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + } else if (strcmp(tmp_argv[0] + 2,"continue") == 0) { + argv_url=-1; /* forcer */ + httrack.quiet=1; } } } @@ -436,7 +473,7 @@ int main(int argc, char **argv) { FILE* fp; int x_argc2; - //strcpy(x_argvblk2,"httrack "); + //strcpybuff(x_argvblk2,"httrack "); fp=fopen("config","rb"); if (fp) { linput(fp,x_argvblk2+strlen(x_argvblk2),32000); @@ -482,7 +519,7 @@ int main(int argc, char **argv) { if (argv[na][0]=='"') { char tempo[HTS_CDLMAXSIZE]; - strcpy(tempo,argv[na]+1); + strcpybuff(tempo,argv[na]+1); if (tempo[strlen(tempo)-1]!='"') { char s[HTS_CDLMAXSIZE]; sprintf(s,"Missing quote in %s",argv[na]); @@ -491,7 +528,7 @@ int main(int argc, char **argv) { return -1; } tempo[strlen(tempo)-1]='\0'; - strcpy(argv[na],tempo); + strcpybuff(argv[na],tempo); } if (cmdl_opt(argv[na])) { // option @@ -509,34 +546,34 @@ int main(int argc, char **argv) { } else { char* a; na++; - strcpy(httrack.path_html,""); - strcpy(httrack.path_log,""); + strcpybuff(httrack.path_html,""); + strcpybuff(httrack.path_log,""); a=strstr(argv[na],"\",\""); // rechercher en premier, au cas ou -O "c:\pipo,test","c:\test" if (!a) a=strchr(argv[na],','); // 2 path else a++; // position , if (a) { - strncat(httrack.path_html,argv[na],(int) (a-argv[na])); - strcat(httrack.path_log,a+1); + strncatbuff(httrack.path_html,argv[na],(int) (a-argv[na])); + strcatbuff(httrack.path_log,a+1); } else { - strcpy(httrack.path_log,argv[na]); - strcpy(httrack.path_html,argv[na]); + strcpybuff(httrack.path_log,argv[na]); + strcpybuff(httrack.path_html,argv[na]); } // Eliminer les cas comme -O "C:\mirror\" if (httrack.path_log[0]=='"') { // Guillemets char tmp[256]; - strcpy(tmp,httrack.path_log+1); + strcpybuff(tmp,httrack.path_log+1); if (tmp[strlen(tmp)-1]=='"') tmp[strlen(tmp)-1]='\0'; - strcpy(httrack.path_log,tmp); + strcpybuff(httrack.path_log,tmp); } if (httrack.path_html[0]=='"') { char tmp[256]; - strcpy(tmp,httrack.path_html+1); + strcpybuff(tmp,httrack.path_html+1); if (tmp[strlen(tmp)-1]=='"') tmp[strlen(tmp)-1]='\0'; - strcpy(httrack.path_html,tmp); + strcpybuff(httrack.path_html,tmp); } check_path(httrack.path_log,argv_firsturl); if (check_path(httrack.path_html,argv_firsturl)) { @@ -583,8 +620,6 @@ int main(int argc, char **argv) { } // traiter -O - - /* load doit.log and insert in current command line */ if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) { FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); @@ -611,8 +646,21 @@ int main(int argc, char **argv) { /* Insert parameters BUT so that they can be in the same order */ if (lastp) { if (strnotempty(lastp)) { + //char* argv0; + //int len; insert_after_argc=argc-insert_after; + //argv0 = (argv+insert_after)[0]; cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr); + /* + DONE IN 'next_token' + len = strlen(argv0); + if (len >= 2 && argv0[0]=='\"' && argv0[len-1]=='\"') { // "foo" + char tempo[1024]; + tempo[0] = '\0'; + strncatbuff(tempo, argv0+1, len-2); + strcpybuff(argv0, tempo); + } + */ argc=insert_after_argc+insert_after; insert_after++; } @@ -668,7 +716,7 @@ int main(int argc, char **argv) { if (argv[i][0]=='-') { if (argv[i][1]=='-') { // --xxx if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer - strcpy(argv[i]+1,""); + strcpybuff(argv[i]+1,""); if (fexist(fconcat(httrack.path_log,"hts-log.txt"))) remove(fconcat(httrack.path_log,"hts-log.txt")); if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) @@ -699,7 +747,7 @@ int main(int argc, char **argv) { // } else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire! argv_url=1; // forcer a passer les parametres - strcpy(argv[i]+1,"#P"); + strcpybuff(argv[i]+1,"#P"); // } else if (strfield2(argv[i]+2,"updatehttrack")) { #ifdef _WIN32 @@ -714,10 +762,10 @@ int main(int argc, char **argv) { char *args[8]; printf("Cheking for updates...\n"); - strcpy(_args[0],argv[0]); - strcpy(_args[1],"--get"); + strcpybuff(_args[0],argv[0]); + strcpybuff(_args[1],"--get"); sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,""); - strcpy(_args[3],"--quickinfo"); + strcpybuff(_args[3],"--quickinfo"); args[0]=_args[0]; args[1]=_args[1]; args[2]=_args[2]; @@ -781,7 +829,7 @@ int main(int argc, char **argv) { FILE* fp; int x_argc; - //strcpy(x_argvblk,"httrack "); + //strcpybuff(x_argvblk,"httrack "); fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); if (fp) { linput(fp,x_argvblk+strlen(x_argvblk),8192); @@ -892,6 +940,10 @@ int main(int argc, char **argv) { } } else { // aucune URL définie et pas de cache + if (argc > 1 && strcmp(argv[0], "-#h") == 0) { + printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + exit(0); + } #if HTS_ANALYSTE!=2 if (httrack.quiet) { #endif @@ -976,7 +1028,7 @@ int main(int argc, char **argv) { if (argv[na][0]=='"') { char tempo[HTS_CDLMAXSIZE]; - strcpy(tempo,argv[na]+1); + strcpybuff(tempo,argv[na]+1); if (tempo[strlen(tempo)-1]!='"') { char s[HTS_CDLMAXSIZE]; sprintf(s,"Missing quote in %s",argv[na]); @@ -985,7 +1037,7 @@ int main(int argc, char **argv) { return -1; } tempo[strlen(tempo)-1]='\0'; - strcpy(argv[na],tempo); + strcpybuff(argv[na],tempo); } if (cmdl_opt(argv[na])) { // option @@ -1009,6 +1061,7 @@ int main(int argc, char **argv) { httrack.savename_type=1003; // mettre dans le répertoire courant httrack.depth=0; // ne pas explorer la page httrack.accept_cookie=0; // pas de cookies + httrack.robots=0; // pas de robots break; case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions) httrack.travel=0; @@ -1078,7 +1131,7 @@ int main(int argc, char **argv) { sscanf(com+1,"%d",&httrack.maxsoc); while(isdigit((unsigned char)*(com+1))) com++; httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1 - } else httrack.maxsoc=8; + } else httrack.maxsoc=4; break; // @@ -1122,7 +1175,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.savename_userdef,argv[na]); + strcpybuff(httrack.savename_userdef,argv[na]); if (strnotempty(httrack.savename_userdef)) httrack.savename_type = -1; // userdef! else @@ -1175,6 +1228,8 @@ int main(int argc, char **argv) { case '&': case '%': { // deuxième jeu d'options com++; switch(*com) { + case 'M': httrack.mimehtml = 1; if (*(com+1)=='0') { httrack.mimehtml=0; com++; } break; + case 'k': httrack.nokeepalive = 0; if (*(com+1)=='0') { httrack.nokeepalive = 1; com++; } break; case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; } @@ -1188,7 +1243,9 @@ int main(int argc, char **argv) { case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length + case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; + case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break; // preserve: no footer, original links case 'p': @@ -1208,7 +1265,53 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.filelist,argv[na]); + strcpybuff(httrack.filelist,argv[na]); + } + break; + case 'b': // bind + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %b needs to be followed by a blank space, and a local hostname"); + printf("Example: -%%b \"ip4.localhost\"\n"); + htsmain_free(); + return -1; + } else{ + na++; + if (strlen(argv[na])>=254) { + HTS_PANIC_PRINTF("Hostname string too long"); + htsmain_free(); + return -1; + } + strcpybuff(httrack.proxy.bindhost,argv[na]); + } + break; + case 'S': // Scan Rules list + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %S needs to be followed by a blank space, and a text filename"); + printf("Example: -%%S \"myfilterlist.txt\"\n"); + htsmain_free(); + return -1; + } else{ + INTsys fz; + na++; + fz = fsize(argv[na]); + if (fz < 0) { + HTS_PANIC_PRINTF("File url list could not be opened"); + htsmain_free(); + return -1; + } else { + FILE* fp = fopen(argv[na], "rb"); + if (fp != NULL) { + int cl = (int) strlen(url); + ensureUrlCapacity(url, url_sz, cl + fz + 8192); + if ((INTsys)fread(url + cl, 1, fz, fp) != fz) { + HTS_PANIC_PRINTF("File url list could not be read"); + htsmain_free(); + return -1; + } + fclose(fp); + *(url + cl + fz) = '\0'; + } + } } break; case 'A': // assume @@ -1227,12 +1330,12 @@ int main(int argc, char **argv) { } // --assume standard if (strcmp(argv[na],"standard") == 0) { - strcpy(httrack.mimedefs,"\n"); - strcat(httrack.mimedefs,HTS_ASSUME_STANDARD); - strcat(httrack.mimedefs,"\n"); + strcpybuff(httrack.mimedefs,"\n"); + strcatbuff(httrack.mimedefs,HTS_ASSUME_STANDARD); + strcatbuff(httrack.mimedefs,"\n"); } else { - strcat(httrack.mimedefs,argv[na]); - strcat(httrack.mimedefs,"\n"); + strcatbuff(httrack.mimedefs,argv[na]); + strcatbuff(httrack.mimedefs,"\n"); } a=httrack.mimedefs; while(*a) { @@ -1259,7 +1362,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.lang_iso,argv[na]); + strcpybuff(httrack.lang_iso,argv[na]); } break; // @@ -1276,7 +1379,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.footer,argv[na]); + strcpybuff(httrack.footer,argv[na]); } break; case 'H': // debug headers @@ -1316,6 +1419,81 @@ int main(int argc, char **argv) { } break; + case 'W': // Wrapper callback + // --wrapper check-link=obj.so:check_link + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %W needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; + } else { + char callbackname[128]; + char* a = argv[na + 1]; + char* pos = strchr(a, '='); + na++; + if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { + char* posf = strchr(pos + 1, ':'); + char filename[1024]; + callbackname[0] = '\0'; + strncatbuff(callbackname, a, pos - a); + pos++; + if (posf != NULL && (posf - pos) > 0 && (posf - pos + 2) < sizeof(filename)) { + void* userfunction; + filename[0] = '\0'; + strncatbuff(filename, pos, posf - pos); + posf++; + userfunction = getFunctionPtr(filename, posf); + if (userfunction != NULL) { + if ((void*)htswrap_read(callbackname) != NULL) { + if (htswrap_add(callbackname, userfunction)) { + if (!httrack.quiet) { + set_wrappers(); + if ((void*)htswrap_read(callbackname) == userfunction) { + printf("successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename); + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } else { + char tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } + } else { + HTS_PANIC_PRINTF("Syntax error in option %W : filename error : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; + } + } else { + HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; + } + } + break; + default: { char s[HTS_CDLMAXSIZE]; sprintf(s,"invalid option %%%c\n",*com); @@ -1376,17 +1554,185 @@ int main(int argc, char **argv) { } } break; - + // - case '#': { // non documenté (appel de l'interface) + case '#': { // non documenté com++; switch(*com) { + case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file + { + int hasFilter = 0; + int found = 0; + char* filter=NULL; + cache_back cache; + inthash cache_hashtable=inthash_new(HTS_HASH_SIZE); + int backupXFR = htsMemoryFastXfr; + int sendb = 0; + if (isdigit((unsigned char)*(com+1))) { + sscanf(com+1,"%d",&sendb); + while(isdigit((unsigned char)*(com+1))) com++; + } else sendb=0; + if (!((na+1>=argc) || (argv[na+1][0]=='-'))) { + na++; + hasFilter = 1; + filter=argv[na]; + } + htsMemoryFastXfr = 1; /* fast load */ + + memset(&cache, 0, sizeof(cache_back)); + cache.type=1; // cache? + cache.log=stdout; // log? + cache.errlog=stderr; // err log? + cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper + cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */ + cache.ro = 1; /* read only */ + if (cache.hashtable) { + char adr[HTS_URLMAXSIZE*2]; + char fil[HTS_URLMAXSIZE*2]; + char url[HTS_URLMAXSIZE*2]; + char linepos[256]; + int pos; + char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx")); + cache_init(&cache,&httrack); /* load cache */ + if (cacheNdx != NULL) { + char firstline[256]; + char* a = cacheNdx; + a+=cache_brstr(a, firstline); + a+=cache_brstr(a, firstline); + while ( a != NULL ) { + a=strchr(a+1,'\n'); /* start of line */ + if (a) { + htsblk r; + /* */ + a++; + /* read "host/file" */ + a+=binput(a,adr,HTS_URLMAXSIZE); + a+=binput(a,fil,HTS_URLMAXSIZE); + url[0]='\0'; + if (!link_has_authority(adr)) + strcatbuff(url, "http://"); + strcatbuff(url, adr); + strcatbuff(url, fil); + /* read position */ + a+=binput(a,linepos,200); + sscanf(linepos,"%d",&pos); + if (!hasFilter + || + (strjoker(url, filter, NULL, NULL) != NULL) + ) { + r = cache_read(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data + if (r.statuscode != -1) { // No errors + found++; + if (!hasFilter) { + fprintf(stdout, "%s%s%s\r\n", + (link_has_authority(adr)) ? "" : "http://", + adr, fil); + } else { + char msg[256], cdate[256]; + char sav[HTS_URLMAXSIZE*2]; + infostatuscode(msg, r.statuscode); + time_gmt_rfc822(cdate); + + fprintf(stdout, "HTTP/1.1 %d %s\r\n", + r.statuscode, + r.msg[0] ? r.msg : msg + ); + fprintf(stdout, "X-Host: %s\r\n", adr); + fprintf(stdout, "X-File: %s\r\n", fil); + fprintf(stdout, "X-URL: %s%s%s\r\n", + (link_has_authority(adr)) ? "" : "http://", + adr, fil); + if (url_savename(adr, fil, sav, NULL, NULL, NULL, NULL, + &httrack, NULL, 0, NULL, 0, &cache, NULL, 0, 0)!=-1) { + if (fexist(sav)) { + fprintf(stdout, "Content-location: %s\r\n", sav); + } + } + fprintf(stdout, "Date: %s\r\n", cdate); + fprintf(stdout, "Server: HTTrack Website Copier/"HTTRACK_VERSION"\r\n"); + if (r.lastmodified[0]) { + fprintf(stdout, "Last-Modified: %s\r\n", r.lastmodified); + } + if (r.etag[0]) { + fprintf(stdout, "Etag: %s\r\n", r.etag); + } + if (r.totalsize >= 0) { + fprintf(stdout, "Content-Length: "LLintP"\r\n", r.totalsize); + } + fprintf(stdout, "X-Content-Length: "LLintP"\r\n", (r.size >= 0) ? r.size : (-r.size) ); + if (r.contenttype >= 0) { + fprintf(stdout, "Content-Type: %s\r\n", r.contenttype); + } + if (r.cdispo[0]) { + fprintf(stdout, "Content-Disposition: %s\r\n", r.cdispo); + } + if (r.contentencoding[0]) { + fprintf(stdout, "Content-Encoding: %s\r\n", r.contentencoding); + } + if (r.is_chunk) { + fprintf(stdout, "Transfer-Encoding: chunked\r\n"); + } +#if HTS_USEOPENSSL + if (r.ssl) { + fprintf(stdout, "X-SSL: yes\r\n"); + } +#endif + if (r.is_write) { + fprintf(stdout, "X-Direct-To-Disk: yes\r\n"); + } + if (r.compressed) { + fprintf(stdout, "X-Compressed: yes\r\n"); + } + if (r.notmodified) { + fprintf(stdout, "X-Not-Modified: yes\r\n"); + } + if (r.is_chunk) { + fprintf(stdout, "X-Chunked: yes\r\n"); + } + fprintf(stdout, "\r\n"); + /* Send the body */ + if (sendb && r.adr) { + fprintf(stdout, "%s\r\n", r.adr); + } + } + } + } + } + } + freet(cacheNdx); + } + } + if (!found) { + fprintf(stderr, "No cache entry found%s%s%s\r\n", + (hasFilter)?" for '":"", + (hasFilter)?filter:"", + (hasFilter)?"'":"" + ); + } + htsMemoryFastXfr = backupXFR; + return 0; + } + break; + case 'X': +#ifndef STRDEBUG + fprintf(stderr, "warning: no string debugging support built, option has no effect\n"); +#endif + htsMemoryFastXfr=1; + if (*(com+1)=='0') { htsMemoryFastXfr=0; com++; } + break; + case '~': /* internal lib test */ + { + char thisIsATestYouShouldSeeAnError[12]; + strcpybuff(thisIsATestYouShouldSeeAnError, "0123456789012345678901234567890123456789"); + return 0; + } + break; case 'f': httrack.flush=1; break; case 'h': - printf("HTTrack version "HTTRACK_VERSION"\n"); - exit(1); + printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + return 0; break; - case 'p': httrack.aff_progress=1; break; + case 'p': /* httrack.aff_progress=1; deprecated */ break; case 'S': httrack.shell=1; break; // stdin sur un shell case 'K': httrack.keyboard=1; break; // vérifier stdin // @@ -1458,10 +1804,10 @@ int main(int argc, char **argv) { if (*a == ':') { // un port est présent, <proxy>:port sscanf(a+1,"%d",&httrack.proxy.port); httrack.proxy.name[0]='\0'; - strncat(httrack.proxy.name,argv[na],(int) (a - argv[na])); + strncatbuff(httrack.proxy.name,argv[na],(int) (a - argv[na])); } else { // <proxy> httrack.proxy.port=8080; - strcpy(httrack.proxy.name,argv[na]); + strcpybuff(httrack.proxy.name,argv[na]); } } break; @@ -1478,7 +1824,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.user_agent,argv[na]); + strcpybuff(httrack.user_agent,argv[na]); if (strnotempty(httrack.user_agent)) httrack.user_agent_send=1; else @@ -1499,7 +1845,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpy(httrack.sys_com,argv[na]); + strcpybuff(httrack.sys_com,argv[na]); if (strnotempty(httrack.sys_com)) httrack.sys_com_exec=1; else @@ -1521,10 +1867,10 @@ int main(int argc, char **argv) { } else { // URL/filters char tempo[1024]; - if (strnotempty(url)) strcat(url," "); // espace de séparation - strcpy(tempo,unescape_http_unharm(argv[na],1)); + if (strnotempty(url)) strcatbuff(url," "); // espace de séparation + strcpybuff(tempo,unescape_http_unharm(argv[na],1)); escape_spc_url(tempo); - strcat(url,tempo); + strcatbuff(url,tempo); } // if argv=- etc. } // for @@ -1563,28 +1909,28 @@ int main(int argc, char **argv) { rpath[0]='\0'; if (c != httrack.path_html) { if (httrack.path_html[0]!='/') - strcat(rpath,"./"); - strncat(rpath,httrack.path_html,(int) (c - httrack.path_html)); + strcatbuff(rpath,"./"); + strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html)); } { char tmp[1024]; - strcpy(tmp,c); strcpy(httrack.path_html,tmp); - strcpy(tmp,d); strcpy(httrack.path_log,tmp); + strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp); + strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp); } } else { - strcpy(rpath,"./"); - strcpy(httrack.path_html,"/"); - strcpy(httrack.path_log,"/"); + strcpybuff(rpath,"./"); + strcpybuff(httrack.path_html,"/"); + strcpybuff(httrack.path_log,"/"); } if (rpath[0]) { printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log); if (chroot(rpath)) { printf("ERROR! Can not chroot to %s!\n",rpath); - exit(0); + return -1; } if (chdir("/")) { /* new root */ printf("ERROR! Can not chdir to %s!\n",rpath); - exit(0); + return -1; } } else printf("WARNING: chroot not possible with these paths\n"); @@ -1668,6 +2014,9 @@ int main(int argc, char **argv) { if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) remove(fconcat(httrack.path_log,"hts-err.txt")); + /* Check FS directory structure created */ + structcheck(httrack.path_log); + httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w"); if (httrack_logmode==2) httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w"); @@ -1705,7 +2054,7 @@ int main(int argc, char **argv) { if (fp) { fprintf(fp,"What's in this folder?"LF); fprintf(fp,""LF); - fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION""LF); + fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, WHAT_is_available); fprintf(fp,"and is used for updating this website."LF); fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF); fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF); @@ -1732,8 +2081,8 @@ int main(int argc, char **argv) { }*/ // vérifier existence de la structure - structcheck(httrack.path_html); - structcheck(httrack.path_log); + structcheck(fconcat(httrack.path_html, "/")); + structcheck(fconcat(httrack.path_log, "/")); // reprise/update if (httrack.cache) { @@ -1799,7 +2148,9 @@ int main(int argc, char **argv) { // fichier log if (httrack.log) { int i; - fprintf(httrack.log,"HTTrack"HTTRACK_VERSION" launched on %s at %s"LF,t,url); + fprintf(httrack.log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF, + WHAT_is_available, + t, url); fprintf(httrack.log,"("); for(i=0;i<argc;i++) { if ((strchr(argv[i],' ')==NULL) || (strchr(argv[i],'\"'))) @@ -1814,8 +2165,8 @@ int main(int argc, char **argv) { fprintf(httrack.log,LF); } - if (httrack_logmode) { - printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS""LF,t); + if (httrack_logmode) { + printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,WHAT_is_available); if (httrack.wizard==0) { printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode); } else { // the magic wizard @@ -1877,7 +2228,7 @@ deprecated - see SIGCHLD if (httrack.dir_topindex) { char rpath[1024*2]; char* a; - strcpy(rpath,httrack.path_html); + strcpybuff(rpath,httrack.path_html); if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') rpath[strlen(rpath)-1]='\0'; @@ -1885,7 +2236,7 @@ deprecated - see SIGCHLD a=strrchr(rpath,'/'); if (a) { *a='\0'; - hts_buildtopindex(rpath,httrack.path_bin); + hts_buildtopindex(&httrack,rpath,httrack.path_bin); if (httrack.log) { fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF); } @@ -1931,7 +2282,7 @@ deprecated - see SIGCHLD // WSACleanup(); // ** non en cas de thread tjs présent!.. #endif #endif -#if HTS_TRACE_MALLOC +#ifdef HTS_TRACE_MALLOC hts_freeall(); #endif @@ -1968,9 +2319,9 @@ int check_path(char* s,char* defaultname) { char* a=strchr(defaultname,'#'); // we never know.. if (a) *a='\0'; tempo[0]='\0'; - strncat(tempo,s,i-1); - strcat(tempo,defaultname); - strcpy(s,tempo); + strncatbuff(tempo,s,i-1); + strcatbuff(tempo,defaultname); + strcpybuff(s,tempo); } else s[0]='\0'; // Clear path (no name/default url given) return_value=1; // expanded @@ -1980,7 +2331,7 @@ int check_path(char* s,char* defaultname) { // ending / if (strnotempty(s)) if (s[strlen(s)-1]!='/') // ajouter slash à la fin - strcat(s,"/"); + strcatbuff(s,"/"); return return_value; } diff --git a/src/htscoremain.h b/src/htscoremain.h index 0775492..3662793 100644 --- a/src/htscoremain.h +++ b/src/htscoremain.h @@ -42,13 +42,15 @@ Please visit our Website: http://www.httrack.com // --assume standard #define HTS_ASSUME_STANDARD \ - "php2,php3,php4,php,cgi,asp,jsp,pl,cfm=text/html" + "php2,php3,php4,php,cgi,asp,jsp,pl,cfm,nsf=text/html" #include "htsglobal.h" // Main, récupère les paramètres et appelle le robot #if HTS_ANALYSTE -int hts_main(int argc, char **argv); +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_main(int argc, char **argv); +#endif #else int main(int argc, char **argv); #endif diff --git a/src/htsdefines.h b/src/htsdefines.h index 223fae1..0ab2cfa 100644 --- a/src/htsdefines.h +++ b/src/htsdefines.h @@ -54,6 +54,8 @@ typedef void (* t_hts_htmlcheck_filesave)(char* file); typedef int (* t_hts_htmlcheck_linkdetected)(char* link); typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); +typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); +typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); // demande d'interaction avec le shell #if HTS_ANALYSTE @@ -74,17 +76,19 @@ extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; extern t_hts_htmlcheck_savename hts_htmlcheck_savename; +extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; +extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; #endif -#if HTS_ANALYSTE==2 -#define HT_PRINT(A) strcat(HTbuff,A); +#if HTS_ANALYSTE +#define HT_PRINT(A) strcatbuff(HTbuff,A); #define HT_REQUEST_START HTbuff[0]='\0'; #define HT_REQUEST_END #define HTT_REQUEST_START HTbuff[0]='\0'; #define HTT_REQUEST_END #define HTS_REQUEST_START HTbuff[0]='\0'; #define HTS_REQUEST_END -#define HTS_PANIC_PRINTF(S) strcpy(_hts_errmsg,S); +#define HTS_PANIC_PRINTF(S) strcpybuff(_hts_errmsg,S); #else #define HT_PRINT(A) printf("%s",A); #define HT_REQUEST_START /*printf("§\n");*/ diff --git a/src/htsfilters.c b/src/htsfilters.c index ed0dee4..be8b482 100644 --- a/src/htsfilters.c +++ b/src/htsfilters.c @@ -301,7 +301,7 @@ HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag) { } // recherche multiple -// exemple: find dans un texte de strcpy(*[A-Z,a-z],"*[0-9]"); va rechercher la première occurence +// exemple: find dans un texte de strcpybuff(*[A-Z,a-z],"*[0-9]"); va rechercher la première occurence // d'un strcpy sur une variable ayant un nom en lettres et copiant une chaine de chiffres // ATTENTION!! Eviter les jokers en début, où gare au temps machine! char* strjokerfind(char* chaine,char* joker) { diff --git a/src/htsftp.c b/src/htsftp.c index 5fbe895..68a8af5 100644 --- a/src/htsftp.c +++ b/src/htsftp.c @@ -152,13 +152,13 @@ void launch_ftp(lien_back* back,char* path,char* exec) { char *args[8]; fclose(fp); fp=NULL; - strcpy(_args[0],exec); - strcpy(_args[1],"-#R"); - strcpy(_args[2],back->url_adr); - strcpy(_args[3],back->url_fil); - strcpy(_args[4],back->url_sav); - strcpy(_args[5],path); - //strcpy(_args[6],""); + strcpybuff(_args[0],exec); + strcpybuff(_args[1],"-#R"); + strcpybuff(_args[2],back->url_adr); + strcpybuff(_args[3],back->url_fil); + strcpybuff(_args[4],back->url_sav); + strcpybuff(_args[5],path); + //strcpybuff(_args[6],""); args[0]=_args[0]; args[1]=_args[1]; args[2]=_args[2]; @@ -234,7 +234,7 @@ int run_launch_ftp(lien_back* back) { timeout=300; // effacer - strcpy(back->r.msg,""); + strcpybuff(back->r.msg,""); back->r.statuscode=0; back->r.size=0; @@ -265,10 +265,19 @@ int run_launch_ftp(lien_back* back) { // Calculer RETR <nom> { char* a; +#if 0 a=back->url_fil + strlen(back->url_fil)-1; while( (a > back->url_fil) && (*a!='/')) a--; - if (*a == '/') { // ok repéré + if (*a != '/') { + a = NULL; + } +#else + a = back->url_fil; +#endif + if (a != NULL && *a != '\0') { +#if 0 a++; // sauter / +#endif ftp_filename=a; if (strnotempty(a)) { char* ua=unescape_http(a); @@ -288,7 +297,7 @@ int run_launch_ftp(lien_back* back) { sprintf(line_retr,"LIST -A"); } } else { - strcpy(back->r.msg,"Unexpected PORT error"); + strcpybuff(back->r.msg,"Unexpected PORT error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -314,15 +323,15 @@ int run_launch_ftp(lien_back* back) { a=strchr(adr,':'); // port if (a) { sscanf(a+1,"%d",&port); - strncat(_adr,adr,(int) (a - adr)); + strncatbuff(_adr,adr,(int) (a - adr)); } else - strcpy(_adr,adr); + strcpybuff(_adr,adr); // récupérer adresse résolue - strcpy(back->info,"host name"); + strcpybuff(back->info,"host name"); hp = hts_gethostbyname(_adr, &fullhostent_buffer); if (hp == NULL) { - strcpy(back->r.msg,"Unable to get server's address"); + strcpybuff(back->r.msg,"Unable to get server's address"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-5; _HALT_FTP @@ -339,7 +348,7 @@ int run_launch_ftp(lien_back* back) { // créer ("attachement") une socket (point d'accès) internet,en flot soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_ctl==INVALID_SOCKET) { - strcpy(back->r.msg,"Unable to create a socket"); + strcpybuff(back->r.msg,"Unable to create a socket"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; _HALT_FTP @@ -350,14 +359,14 @@ int run_launch_ftp(lien_back* back) { // server.sin_port = htons((unsigned short int) port); // connexion (bloquante, on est en thread) - strcpy(back->info,"connect"); + strcpybuff(back->info,"connect"); #if HTS_WIN if (connect(soc_ctl, (const struct sockaddr FAR *)&server, server_size) != 0) { #else if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) { #endif - strcpy(back->r.msg,"Unable to connect to the server"); + strcpybuff(back->r.msg,"Unable to connect to the server"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; _HALT_FTP @@ -378,19 +387,20 @@ int run_launch_ftp(lien_back* back) { _CHECK_HALT_FTP; if (line[0]=='2') { // ok, connecté - strcpy(back->info,"login: user"); + strcpybuff(back->info,"login: user"); sprintf(line,"USER %s",user); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; if ((line[0]=='3') || (line[0]=='2')) { // --PASS-- - strcpy(back->info,"login: pass"); + strcpybuff(back->info,"login: pass"); sprintf(line,"PASS %s",pass); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; if (line[0]=='2') { // ok +#if 0 // --CWD-- char* a; a=back->url_fil + strlen(back->url_fil)-1; @@ -398,10 +408,10 @@ int run_launch_ftp(lien_back* back) { if (*a == '/') { // ok repéré char target[1024]; target[0]='\0'; - strncat(target,back->url_fil,(int) (a - back->url_fil)); + strncatbuff(target,back->url_fil,(int) (a - back->url_fil)); if (strnotempty(target)==0) - strcat(target,"/"); - strcpy(back->info,"cwd"); + strcatbuff(target,"/"); + strcpybuff(back->info,"cwd"); sprintf(line,"CWD %s",target); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); @@ -413,7 +423,7 @@ int run_launch_ftp(lien_back* back) { if (line[0]=='2') { // ok.. } else { - strcpy(back->r.msg,"TYPE I error"); + strcpybuff(back->r.msg,"TYPE I error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -423,10 +433,11 @@ int run_launch_ftp(lien_back* back) { back->r.statuscode=-1; } // sinon on est prêts } else { - strcpy(back->r.msg,"Unexpected ftp error"); + strcpybuff(back->r.msg,"Unexpected ftp error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } +#endif } else { sprintf(back->r.msg,"Bad password: %s",linejmp(line)); @@ -453,7 +464,7 @@ int run_launch_ftp(lien_back* back) { // #if FTP_PASV if (SOCaddr_getproto(server, server_size) == '1') { - strcpy(back->info,"pasv"); + strcpybuff(back->info,"pasv"); sprintf(line,"PASV"); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); @@ -475,7 +486,7 @@ int run_launch_ftp(lien_back* back) { c=a; while( (c=strchr(c,',')) ) *c='.'; // remplacer , par . if (b) *b='\0'; // - strcpy(adr_ip,a); // copier adresse ip + strcpybuff(adr_ip,a); // copier adresse ip // if (b) { a=b+1; // début du port @@ -506,7 +517,7 @@ int run_launch_ftp(lien_back* back) { /* * try epsv (ipv6) * */ - strcpy(back->info,"pasv"); + strcpybuff(back->info,"pasv"); sprintf(line,"EPSV"); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); @@ -564,14 +575,23 @@ int run_launch_ftp(lien_back* back) { } // SIZE? - strcpy(back->info,"size"); + strcpybuff(back->info,"size"); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; if (line[0]=='2') { // SIZE compris, ALORS tester REST (sinon pas tester: cf probleme des txt.gz decompresses a la volee) + char* szstr = strchr(line, ' '); + if (szstr) { + LLint size = 0; + szstr++; + if (sscanf(szstr, LLintP, &size) == 1) { + back->r.totalsize = size; + } + } + // REST? if (fexist(back->url_sav) && (transfer_list==0)) { - strcpy(back->info,"rest"); + strcpybuff(back->info,"rest"); sprintf(line,"REST "LLintP,(LLint)fsize(back->url_sav)); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); @@ -600,7 +620,7 @@ int run_launch_ftp(lien_back* back) { memset(&server, 0, sizeof(server)); // infos - strcpy(back->info,"resolv"); + strcpybuff(back->info,"resolv"); // résoudre if (adr_ip[0]) { @@ -616,7 +636,7 @@ int run_launch_ftp(lien_back* back) { } // infos - strcpy(back->info,"cnxdata"); + strcpybuff(back->info,"cnxdata"); #if FTP_DEBUG printf("Data: Connecting to %s:%d...\n", adr_ip, port_pasv); #endif @@ -632,8 +652,8 @@ int run_launch_ftp(lien_back* back) { #else if (connect(soc_dat, (struct sockaddr *)&server, server_size) != -1) { #endif - strcpy(back->info,"retr"); - strcpy(line,line_retr); + strcpybuff(back->info,"retr"); + strcpybuff(line,line_retr); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; @@ -652,12 +672,12 @@ int run_launch_ftp(lien_back* back) { #endif deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // - strcpy(back->r.msg,"Unable to connect"); + strcpybuff(back->r.msg,"Unable to connect"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts } else { - strcpy(back->r.msg,"Unable to create a socket"); + strcpybuff(back->r.msg,"Unable to create a socket"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } // sinon on est prêts @@ -673,15 +693,15 @@ int run_launch_ftp(lien_back* back) { } // sinon on est prêts #else //T_SOC soc_servdat; - strcpy(back->info,"listening"); + strcpybuff(back->info,"listening"); if ( (soc_servdat = get_datasocket(line)) != INVALID_SOCKET) { _CHECK_HALT_FTP; send_line(soc_ctl,line); // envoi du RETR get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; if (line[0]=='2') { // ok - strcpy(back->info,"retr"); - strcpy(line,line_retr); + strcpybuff(back->info,"retr"); + strcpybuff(line,line_retr); send_line(soc_ctl,line); get_ftp_line(soc_ctl,line,timeout); _CHECK_HALT_FTP; @@ -690,7 +710,7 @@ int run_launch_ftp(lien_back* back) { struct sockaddr dummyaddr; int dummylen = sizeof(struct sockaddr); if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) { - strcpy(back->r.msg,"Unable to accept connection"); + strcpybuff(back->r.msg,"Unable to accept connection"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -710,7 +730,7 @@ int run_launch_ftp(lien_back* back) { close(soc_servdat); #endif } else { - strcpy(back->r.msg,"Unable to listen to a port"); + strcpybuff(back->r.msg,"Unable to listen to a port"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -725,7 +745,7 @@ int run_launch_ftp(lien_back* back) { back->r.fp = fopen(fconv(back->url_sav),"ab"); } else back->r.fp = filecreate(back->url_sav); - strcpy(back->info,"receiving"); + strcpybuff(back->info,"receiving"); if (back->r.fp != NULL) { char buff[1024]; int len=1; @@ -737,7 +757,7 @@ int run_launch_ftp(lien_back* back) { len=1; // pas d'erreur pour le moment switch(wait_socket_receive(soc_dat,timeout)) { case -1: - strcpy(back->r.msg,"Read error"); + strcpybuff(back->r.msg,"FTP read error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; len=0; // fin @@ -757,21 +777,30 @@ int run_launch_ftp(lien_back* back) { back->r.size+=len; HTS_STAT.HTS_TOTAL_RECV+=len; if (back->r.fp) { - if ((int) fwrite(buff,1,len,back->r.fp) != len) { - strcpy(back->r.msg,"Write error"); + if ((INTsys)fwrite(buff,1,(INTsys)len,back->r.fp) != len) { + /* + int fcheck; + if ((fcheck=check_fatal_io_errno())) { + opt->state.exit_xh=-1; + } + */ + strcpybuff(back->r.msg,"Write error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; len=0; // error } } else { - strcpy(back->r.msg,"Unexpected write error"); + strcpybuff(back->r.msg,"Unexpected write error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } } else { // Erreur ou terminé - //strcpy(back->r.msg,"Read error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=0; + if (back->r.totalsize > 0 && back->r.size != back->r.totalsize) { + back->r.statuscode=-1; + strcpybuff(back->r.msg,"FTP file incomplete"); + } } read_len=1024; //HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues @@ -782,7 +811,7 @@ int run_launch_ftp(lien_back* back) { back->r.fp=NULL; } } else { - strcpy(back->r.msg,"Unable to write file"); + strcpybuff(back->r.msg,"Unable to write file"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -798,7 +827,7 @@ int run_launch_ftp(lien_back* back) { // récupérer 226 transfer complete get_ftp_line(soc_ctl,line,timeout); if (line[0]=='2') { // OK - strcpy(back->r.msg,"OK"); + strcpybuff(back->r.msg,"OK"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=200; } else { @@ -807,7 +836,7 @@ int run_launch_ftp(lien_back* back) { back->r.statuscode=-1; } } else { - strcpy(back->r.msg,"Read error"); + strcpybuff(back->r.msg,"FTP read error"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; } @@ -823,7 +852,7 @@ int run_launch_ftp(lien_back* back) { } _CHECK_HALT_FTP; - strcpy(back->info,"quit"); + strcpybuff(back->info,"quit"); send_line(soc_ctl,"QUIT"); // bye bye get_ftp_line(soc_ctl,NULL,timeout); #if HTS_WIN @@ -835,7 +864,7 @@ int run_launch_ftp(lien_back* back) { if (back->r.statuscode!=-1) { back->r.statuscode=200; - strcpy(back->r.msg,"OK"); + strcpybuff(back->r.msg,"OK"); } back->status=FTP_STATUS_READY; // fini return 0; @@ -887,7 +916,7 @@ T_SOC get_datasocket(char* to_send) { SOCaddr_inetntoa(dot, 256, server2, sizeof(server2)); // dots[0]='\0'; - strncat(dots, dot, 128); + strncatbuff(dots, dot, 128); while( (a=strchr(dots,'.')) ) *a=','; // virgules! while( (a=strchr(dots,':')) ) *a=','; // virgules! sprintf(to_send,"PORT %s,%d,%d",dots,n1,n2); @@ -992,7 +1021,7 @@ int get_ftp_line(T_SOC soc,char* line,int timeout) { // vérifier données switch(wait_socket_receive(soc,timeout)) { case -1: // erreur de lecture - if (line) strcpy(line,"500 *read error"); + if (line) strcpybuff(line,"500 *read error"); return 0; break; case 0: @@ -1010,7 +1039,7 @@ int get_ftp_line(T_SOC soc,char* line,int timeout) { data[i++]=b; break; default: - if (line) strcpy(line,"500 *read error"); + if (line) strcpybuff(line,"500 *read error"); return 0; // error break; } @@ -1041,7 +1070,7 @@ int get_ftp_line(T_SOC soc,char* line,int timeout) { fprintf(dd,"<--- %s\n",data); fflush(dd); printf("<--- %s\n",data); #endif - if (line) strcpy(line,data); + if (line) strcpybuff(line,data); return (strnotempty(data)); } @@ -1122,7 +1151,7 @@ int wait_socket_receive(T_SOC soc,int timeout) { // cancel reçu? int stop_ftp(lien_back* back) { if (back->stop_ftp) { - strcpy(back->r.msg,"Cancelled by User"); + strcpybuff(back->r.msg,"Cancelled by User"); back->status=FTP_STATUS_READY; // fini back->r.statuscode=-1; return 1; diff --git a/src/htsglobal.h b/src/htsglobal.h index ce54d3d..38faebc 100644 --- a/src/htsglobal.h +++ b/src/htsglobal.h @@ -40,10 +40,10 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_GLOBAL_DEFH // Version -#define HTTRACK_VERSION "3.20-2" -#define HTTRACK_VERSIONID "3.20.02" +#define HTTRACK_VERSION "3.30" +#define HTTRACK_VERSIONID "3.30.01" #define HTTRACK_AFF_VERSION "3.x" -//#define HTTRACK_AFF_WARNING "This is a RELEASE CANDIDATE version of WinHTTrack Website Copier 3.0\nPlease report us any bug or problem" +//#define HTTRACK_AFF_WARNING "This is a BETA release of WinHTTrack Website Copier ("HTTRACK_VERSION")\nPlease report any crashes, bugs or problems" @@ -51,11 +51,79 @@ Please visit our Website: http://www.httrack.com #include "htssystem.h" #include "htsconfig.h" +// config.h +#ifdef _WIN32 + +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#ifndef DLLIB +#define DLLIB 1 +#endif +#ifndef HTS_INET6 +#define HTS_INET6 1 +#endif +#ifndef S_ISREG +#define S_ISREG(m) ((m) & _S_IFREG) +#endif + +#else + +#include "config.h" + +#ifndef FTIME +#define HTS_DO_NOT_USE_FTIME +#endif + +#ifndef SETUID +#define HTS_DO_NOT_USE_UID +#endif + +#ifndef HTS_LONGLONG +#ifdef SIZEOF_LONG_LONG +#if SIZEOF_LONG_LONG==8 +#define HTS_LONGLONG 1 +#endif +#endif + +#ifndef HTS_LONGLONG +#ifdef __sun +#define HTS_LONGLONG 0 +#endif +#ifdef __osf__ +#define HTS_LONGLONG 0 +#endif +#ifdef __linux +#define HTS_LONGLONG 1 +#endif +#ifdef _WIN32 +#define HTS_LONGLONG 1 +#endif +#endif +#endif + +#ifdef DLLIB +#define HTS_DLOPEN 1 +#else +#define HTS_DLOPEN 0 +#endif + +#endif + + // Socket windows ou socket unix -#if HTS_PLATFORM==1 +#ifdef _WIN32 +#undef HTS_PLATFORM +#define HTS_PLATFORM 1 #define HTS_WIN 1 + #else + #define HTS_WIN 0 +#ifdef __linux +#undef HTS_PLATFORM +#define HTS_PLATFORM 3 +#endif #endif // compatibilité DOS @@ -66,12 +134,10 @@ Please visit our Website: http://www.httrack.com #endif // utiliser zlib? -#if HTS_USEZLIB -#else -#ifdef _WINDOWS +#ifndef HTS_USEZLIB +// autoload #define HTS_USEZLIB 1 #endif -#endif #ifndef HTS_INET6 #define HTS_INET6 0 @@ -79,28 +145,27 @@ Please visit our Website: http://www.httrack.com // utiliser openssl? #ifndef HTS_USEOPENSSL +// autoload #define HTS_USEOPENSSL 1 #endif -#if HTS_WIN -#else -#define __cdecl +#ifndef HTS_DLOPEN +#define HTS_DLOPEN 1 #endif -/* -#if HTS_XGETHOST -#if HTS_PLATFORM==1 -#ifndef __cplusplus -#undef HTS_XGMETHOD -#undef HTS_XGETHOST -#endif +#ifndef HTS_USESWF +#define HTS_USESWF 1 #endif + +#if HTS_WIN #else -#undef HTS_XGMETHOD -#undef HTS_XGETHOST +#define __cdecl #endif -*/ +#ifdef HTS_ANALYSTE_CONSOLE +#undef HTS_ANALYSTE_CONSOLE +#define HTS_ANALYSTE_CONSOLE 1 +#endif #if HTS_ANALYSTE #else @@ -134,19 +199,25 @@ Please visit our Website: http://www.httrack.com #define HTS_HTTRACKRC ".httrackrc" #define HTS_HTTRACKCNF HTS_ETCPATH"/httrack.conf" -#define HTS_HTTRACKDIR HTS_PREFIX"/doc/httrack/" + +#ifdef DATADIR +#define HTS_HTTRACKDIR DATADIR"/httrack/" +#else +#define HTS_HTTRACKDIR HTS_PREFIX"/share/httrack/" +#endif #endif /* Gestion des tables de hashage */ #define HTS_HASH_SIZE 20147 /* Taille max d'une URL */ -#define HTS_URLMAXSIZE 512 +#define HTS_URLMAXSIZE 1024 /* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */ #define HTS_CDLMAXSIZE 1024 /* Copyright (C) Xavier Roche and other contributors */ -#define HTTRACK_AFF_AUTHORS "[XR&CO'2002]" +#define HTTRACK_AFF_AUTHORS "[XR&CO'2003]" #define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->" +#define HTTRACK_WEB "http://www.httrack.com" #define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s" #define H_CRLF "\x0d\x0a" @@ -175,43 +246,69 @@ Please visit our Website: http://www.httrack.com #define HTS_INLINE #endif +#ifdef _WIN32 +#ifdef LIBHTTRACK_EXPORTS +#define HTSEXT_API __declspec(dllexport) +#else +#define HTSEXT_API __declspec(dllimport) +#endif +#else +#define HTSEXT_API +#endif + +#ifndef HTS_LONGLONG #ifdef HTS_NO_64_BIT #define HTS_LONGLONG 0 #else #define HTS_LONGLONG 1 #endif +#endif // long long int? (or int) // (and int cast for system functions like malloc() ) + #if HTS_LONGLONG +#ifdef LLINT_FORMAT + typedef LLINT_TYPE LLint; + typedef LLINT_TYPE TStamp; + #define LLintP LLINT_FORMAT +#else #if HTS_WIN typedef __int64 LLint; typedef __int64 TStamp; - typedef int INTsys; #define LLintP "%I64d" #else #if HTS_PLATFORM==0 typedef long long int LLint; typedef long long int TStamp; - typedef int INTsys; #define LLintP "%lld" #else typedef long long int LLint; typedef long long int TStamp; - typedef int INTsys; #define LLintP "%Ld" #endif #endif +#endif #else typedef int LLint; - typedef int INTsys; - typedef double TStamp; #define LLintP "%d" + typedef double TStamp; +#endif + +#ifdef LFS_FLAG +typedef LLint INTsys; +#define INTsysP LLintP +#ifdef __linux +#define HTS_FSEEKO +#endif +#else +typedef int INTsys; +#define INTsysP "%d" #endif -/* Alignement */ +/* Default alignement */ #ifndef HTS_ALIGN -#define HTS_ALIGN 4 +#define HTS_ALIGN (sizeof(void*)) #endif /* IPV4, IPV6 and various unified structures */ @@ -265,6 +362,11 @@ Please visit our Website: http://www.httrack.com #if HTS_WIN #else // use pthreads.h + +#ifndef THREADS +#define HTS_DO_NOT_USE_PTHREAD +#endif + #ifdef HTS_DO_NOT_USE_PTHREAD #define USE_PTHREAD 0 #else @@ -283,6 +385,27 @@ Please visit our Website: http://www.httrack.com #endif #endif +#ifdef _DEBUG +// trace mallocs +//#define HTS_TRACE_MALLOC +#ifdef HTS_TRACE_MALLOC +typedef unsigned long int t_htsboundary; +typedef struct _mlink { + char* adr; + int len; + int id; + struct _mlink* next; +} mlink; +static const t_htsboundary htsboundary = 0xDEADBEEF; +#endif +#endif + +/* strxxx debugging */ +#ifndef NOSTRDEBUG +#define STRDEBUG 1 +#endif + + /* ------------------------------------------------------------ */ /* Debugging */ /* ------------------------------------------------------------ */ @@ -309,8 +432,6 @@ Please visit our Website: http://www.httrack.com #define DEBUG_CHECKINT 0 // nbr sockets debug #define NSDEBUG 0 -// tracer mallocs -#define HTS_TRACE_MALLOC 0 // débuggage HTSLib #define HDEBUG 0 diff --git a/src/htshash.c b/src/htshash.c index b02f2ba..3cbdb5f 100644 --- a/src/htshash.c +++ b/src/htshash.c @@ -39,12 +39,21 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ #include "htsbase.h" +#include "htsglobal.h" #include "htsmd5.h" #include <stdio.h> #include <stdlib.h> #include <string.h> /* END specific definitions */ +/* Specific macros */ +#ifndef malloct +#define malloct malloc +#define freet free +#define calloct calloc +#define strcpybuff strcpy +#endif + // GESTION DES TABLES DE HACHAGE // Méthode à 2 clés (adr+fil), 2e cle facultative // hash[no_enregistrement][pos]->hash est un index dans le tableau général liens @@ -53,7 +62,10 @@ Please visit our Website: http://www.httrack.com #if HTS_HASH // recherche dans la table selon nom1,nom2 et le no d'enregistrement // retour: position ou -1 si non trouvé -int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { +int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { + char normfil_[HTS_URLMAXSIZE*2]; + char* normfil; + char* normadr; unsigned int cle; int pos; // calculer la clé de recherche, non modulée @@ -64,7 +76,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { // la position se calcule en modulant pos = (int) (cle%HTS_HASH_SIZE); // entrée trouvée? - if (hash->hash[type][pos] >= 0) { // un enregistrement avec une telle clé existe.. + if (hash->hash[type][pos] >= 0) { // un ou plusieurs enregistrement(s) avec une telle clé existe.. // tester table de raccourcis (hash) // pos est maintenant la position recherchée dans liens pos = hash->hash[type][pos]; @@ -79,20 +91,42 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { } break; case 1: // adr+fil - if ((strcmp(nom1,jump_identification(hash->liens[pos]->adr))==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) { + { + if (!normalized) + normfil=hash->liens[pos]->fil; + else + normfil=fil_normalized(hash->liens[pos]->fil,normfil_); + if (!normalized) + normadr = jump_identification(hash->liens[pos]->adr); + else + normadr = jump_normalized(hash->liens[pos]->adr); + if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) { #if DEBUG_HASH==2 - printf("hash: found shortcut at %d\n",pos); + printf("hash: found shortcut at %d\n",pos); #endif - return pos; + return pos; + } } break; case 2: // former_adr+former_fil - if (hash->liens[pos]->former_adr) - if ((strcmp(nom1,jump_identification(hash->liens[pos]->former_adr))==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) { + { + if (hash->liens[pos]->former_adr) { + if (!normalized) + normfil=hash->liens[pos]->former_fil; + else + normfil=fil_normalized(hash->liens[pos]->former_fil,normfil_); + if (!normalized) + normadr = jump_identification(hash->liens[pos]->former_adr); + else + normadr = jump_normalized(hash->liens[pos]->former_adr); + + if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) { #if DEBUG_HASH==2 - printf("hash: found shortcut at %d\n",pos); + printf("hash: found shortcut at %d\n",pos); #endif - return pos; + return pos; + } + } } break; } @@ -164,7 +198,9 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { } // enregistrement lien lpos dans les 3 tables hash1..3 -void hash_write(hash_struct* hash,int lpos) { +void hash_write(hash_struct* hash,int lpos,int normalized) { + char normfil_[HTS_URLMAXSIZE*2]; + char* normfil; unsigned int cle; int pos; int* ptr; @@ -185,7 +221,14 @@ void hash_write(hash_struct* hash,int lpos) { printf("[%d",pos); #endif // - cle = hash_cle(jump_identification(hash->liens[lpos]->adr),hash->liens[lpos]->fil); + if (!normalized) + normfil=hash->liens[lpos]->fil; + else + normfil=fil_normalized(hash->liens[lpos]->fil,normfil_); + if (!normalized) + cle = hash_cle(jump_identification(hash->liens[lpos]->adr),normfil); + else + cle = hash_cle(jump_normalized(hash->liens[lpos]->adr),normfil); pos = (int) (cle%HTS_HASH_SIZE); ptr = hash_calc_chaine(hash,1,pos); // calculer adresse chaine *ptr = lpos; // noter dernier enregistré @@ -194,7 +237,14 @@ void hash_write(hash_struct* hash,int lpos) { #endif // if (hash->liens[lpos]->former_adr) { // former_adr existe? - cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),hash->liens[lpos]->former_fil); + if (!normalized) + normfil=hash->liens[lpos]->former_fil; + else + normfil=fil_normalized(hash->liens[lpos]->former_fil,normfil_); + if (!normalized) + cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),normfil); + else + cle = hash_cle(jump_normalized(hash->liens[lpos]->former_adr),normfil); pos = (int) (cle%HTS_HASH_SIZE); ptr = hash_calc_chaine(hash,2,pos); // calculer adresse chaine *ptr = lpos; // noter dernier enregistré @@ -209,6 +259,7 @@ void hash_write(hash_struct* hash,int lpos) { #if DEBUT_HASH else { printf("* hash_write=0!!\n"); + abortLogFmt("unexpected error in hash_write (pos=%d)" _ pos); exit(1); } #endif @@ -263,191 +314,3 @@ int* hash_calc_chaine(hash_struct* hash,int type,int pos) { #endif // FIN GESTION DES TABLES DE HACHAGE - - - - - - - - - - - -// inthash -- simple hash table, using a key (char[]) and a value (ulong int) - -unsigned long int inthash_key(char* value) { - return md5sum32(value); -} - -// Check for duplicate entry (==1 : added) -int inthash_write(inthash hashtable,char* name,long int value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain* h=hashtable->hash[pos]; - while (h) { - if (strcmp(h->name,name)==0) { - h->value.intg=value; - return 0; - } - h=h->next; - } - // Not found, add it! - inthash_add(hashtable,name,value); - return 1; -} - -// Increment pos value, create one if necessary (=0) -// (==1 : created) -int inthash_inc(inthash hashtable,char* name) { - long int value=0; - int r=0; - if (inthash_read(hashtable,name,&value)) { - value++; - } - else { /* create new value */ - value=0; - r=1; - } - inthash_write(hashtable,name,value); - return (r); -} - - -// Does not check for duplicate entry -void inthash_add(inthash hashtable,char* name,long int value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain** h=&hashtable->hash[pos]; - - while (*h) - h=&((*h)->next); - *h=(inthash_chain*)calloc(1, - sizeof(inthash_chain) - + - strlen(name)+2 - ); - if (*h) { - (*h)->name=((char*)(*h)) + sizeof(inthash_chain); - (*h)->next=NULL; - strcpy((*h)->name,name); - (*h)->value.intg=value; - } -} - -void* inthash_addblk(inthash hashtable,char* name,int blksize) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain** h=&hashtable->hash[pos]; - - while (*h) - h=&((*h)->next); - *h=(inthash_chain*)calloc(1, - sizeof(inthash_chain) - + - strlen(name)+2 - + - blksize - ); - if (*h) { - (*h)->name = ((char*)(*h)) + sizeof(inthash_chain); - (*h)->next=NULL; - strcpy((*h)->name,name); - (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2; - return (void*)(*h)->value.intg; - } - return NULL; -} - -int inthash_read(inthash hashtable,char* name,long int* value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain* h=hashtable->hash[pos]; - while (h) { - if (strcmp(h->name,name)==0) { - *value=h->value.intg; - return 1; - } - h=h->next; - } - return 0; -} - -void inthash_init(inthash hashtable) { - unsigned int i; - for(i=0;i<hashtable->hash_size;i++) { - hashtable->hash[i]=NULL; - } -} - -void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) { - if (hash) { - inthash_delchain(hash->next,free_handler); - if (free_handler) { // pos is a malloc() block, delete it! - if (hash->value.intg) { - if (free_handler) - free_handler((void*)hash->value.intg); - else - free((void*)hash->value.intg); - } - hash->value.intg=0; - } - free(hash); - } -} - -void inthash_default_free_handler(void* value) { - if (value) - free(value); -} - -// -- - -inthash inthash_new(int size) { - inthash hashtable=(inthash)calloc(1,sizeof(struct_inthash)); - if (hashtable) { - hashtable->hash_size=0; - hashtable->flag_valueismalloc=0; - if ((hashtable->hash=(inthash_chain**)calloc(size,sizeof(inthash_chain*)))) { - hashtable->hash_size=size; - inthash_init(hashtable); - } - } - return hashtable; -} - -int inthash_created(inthash hashtable) { - if (hashtable) - if (hashtable->hash) - return 1; - return 0; -} - -void inthash_value_is_malloc(inthash hashtable,int flag) { - hashtable->flag_valueismalloc=flag; -} - -void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) { - hashtable->free_handler = free_handler; -} - -void inthash_delete(inthash* hashtable) { - if (hashtable) { - if (*hashtable) { - if ((*hashtable)->hash) { - unsigned int i; - t_inthash_freehandler free_handler=NULL; - if ( (*hashtable)->flag_valueismalloc ) { - if ( (*hashtable)->free_handler ) - free_handler=(*hashtable)->free_handler; - else - free_handler=inthash_default_free_handler; - } - for(i=0;i<(*hashtable)->hash_size;i++) { - inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler); - (*hashtable)->hash[i]=NULL; - } - } - free(*hashtable); - *hashtable=NULL; - } - } -} - - diff --git a/src/htshash.h b/src/htshash.h index 9a54710..c4acff1 100644 --- a/src/htshash.h +++ b/src/htshash.h @@ -43,62 +43,9 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" // tables de hashage -int hash_read(hash_struct* hash,char* nom1,char* nom2,int type); -void hash_write(hash_struct* hash,int lpos); +int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized); +void hash_write(hash_struct* hash,int lpos,int normalized); int* hash_calc_chaine(hash_struct* hash,int type,int pos); unsigned long int hash_cle(char* nom1,char* nom2); - - - -// inthash -- simple hash table, using a key (char[]) and a value (ulong int) - -// simple hash table for other routines -typedef struct inthash_chain { - char* name; /* key (name) */ - union { - unsigned long int intg; /* integer value */ - void* ptr; /* ptr value */ - } value; - struct inthash_chain* next; /* next element */ -} inthash_chain; - -// structure behind inthash -typedef void (* t_inthash_freehandler)(void* value); -typedef struct { - inthash_chain** hash; - t_inthash_freehandler free_handler; - unsigned int hash_size; - unsigned short flag_valueismalloc; -} struct_inthash; - -// main inthash type -typedef struct_inthash* inthash; - -// subfunctions -unsigned long int inthash_key(char* value); -void inthash_init(inthash hashtable); -void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler); -void inthash_default_free_handler(void* value); - -// main functions: - - -/* Hash functions: */ -inthash inthash_new(int size); /* Create a new hash table */ -int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */ -void inthash_delete(inthash* hashtable); /* Delete an hash table */ -void inthash_value_is_malloc(inthash hashtable,int flag); /* Is the 'value' member a value that needs to be free()'ed ? */ -void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */ - t_inthash_freehandler free_handler); -/* */ -int inthash_read(inthash hashtable,char* name,long int* value); /* Read entry from the hash table */ -/* */ -void inthash_add(inthash hashtable,char* name,long int value); /* Add entry in the hash table */ -void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */ -int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */ -int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */ -/* End of hash functions: */ - - #endif diff --git a/src/htshelp.c b/src/htshelp.c index 3d743fe..7046929 100644 --- a/src/htshelp.c +++ b/src/htshelp.c @@ -48,8 +48,10 @@ Please visit our Website: http://www.httrack.com #include <string.h> #if HTS_WIN #else +#ifdef HAVE_UNISTD_H #include <unistd.h> #endif +#endif /* END specific definitions */ #define waitkey if (more) { char s[4]; printf("\nMORE.. q to quit\n"); linput(stdin,s,4); if (strcmp(s,"q")==0) quit=1; else printf("Page %d\n\n",++m); } @@ -115,15 +117,15 @@ void infomsg(char* msg) { } } void help_wizard(httrackp* opt) { - char* urls = (char*) malloc(HTS_URLMAXSIZE*2); - char* mainpath = (char*) malloc(256); - char* projname = (char*) malloc(256); - char* stropt = (char*) malloc(2048); // options - char* stropt2 = (char*) malloc(2048); // options longues - char* strwild = (char*) malloc(2048); // wildcards - char* cmd = (char*) malloc(4096); - char* str = (char*) malloc(256); - char** argv = (char**) malloc(256 * sizeof(char*)); + char* urls = (char*) malloct(HTS_URLMAXSIZE*2); + char* mainpath = (char*) malloct(256); + char* projname = (char*) malloct(256); + char* stropt = (char*) malloct(2048); // options + char* stropt2 = (char*) malloct(2048); // options longues + char* strwild = (char*) malloct(2048); // wildcards + char* cmd = (char*) malloct(4096); + char* str = (char*) malloct(256); + char** argv = (char**) malloct(256 * sizeof(char*)); // char* a; // @@ -134,12 +136,12 @@ void help_wizard(httrackp* opt) { } urls[0] = mainpath[0] = projname[0] = stropt[0] = stropt2[0] = strwild[0] = cmd[0] = str[0] = '\0'; // - strcpy(stropt,"-"); + strcpybuff(stropt,"-"); mainpath[0]=projname[0]=stropt2[0]=strwild[0]='\0'; // printf("\n"); - printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"\n"); + printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", WHAT_is_available); printf("Copyright (C) Xavier Roche and other contributors\n"); #ifdef _WIN32 printf("Note: You are running the commandline version,\n"); @@ -174,13 +176,13 @@ void help_wizard(httrackp* opt) { printf("\nBase path (return=current directory) :"); linput(stdin,str,250); if (!strnotempty(str)) { - strcat(str,hts_gethome()); - strcat(str,"/websites/"); + strcatbuff(str,hts_gethome()); + strcatbuff(str,"/websites/"); } if (strnotempty(str)) if ((str[strlen(str)-1]!='/') && (str[strlen(str)-1]!='\\')) - strcat(str,"/"); - strcat(stropt2,"-O \""); strcat(stropt2,str); strcat(stropt2,projname); strcat(stropt2,"\" "); + strcatbuff(str,"/"); + strcatbuff(stropt2,"-O \""); strcatbuff(stropt2,str); strcatbuff(stropt2,projname); strcatbuff(stropt2,"\" "); // Créer si ce n'est fait un index.html 1er niveau make_empty_index(str); // @@ -196,11 +198,11 @@ void help_wizard(httrackp* opt) { printf("\nAction:\n"); switch(help_query("Mirror Web Site(s)|Mirror Web Site(s) with Wizard|Just Get Files Indicated|Mirror ALL links in URLs (Multiple Mirror)|Test Links In URLs (Bookmark Test)|Update/Continue a Mirror",1)) { case 1: break; - case 2: strcat(stropt,"W"); break; - case 3: strcat(stropt2,"--get "); break; - case 4: strcat(stropt2,"--mirrorlinks "); break; - case 5: strcat(stropt2,"--testlinks "); break; - case 6: strcat(stropt2,"--update "); break; + case 2: strcatbuff(stropt,"W"); break; + case 3: strcatbuff(stropt2,"--get "); break; + case 4: strcatbuff(stropt2,"--mirrorlinks "); break; + case 5: strcatbuff(stropt2,"--testlinks "); break; + case 6: strcatbuff(stropt2,"--update "); break; case 0: return; break; } @@ -213,17 +215,17 @@ void help_wizard(httrackp* opt) { char str2[256]; printf("\nProxy port (return=8080) :"); linput(stdin,str2,250); - strcat(str,":"); + strcatbuff(str,":"); if (strnotempty(str2)==0) - strcat(str,"8080"); + strcatbuff(str,"8080"); else - strcat(str,str2); + strcatbuff(str,str2); } - strcat(stropt2,"-P "); strcat(stropt2,str); strcat(stropt2," "); + strcatbuff(stropt2,"-P "); strcatbuff(stropt2,str); strcatbuff(stropt2," "); } // Display - strcat(stropt2," -%v "); + strcatbuff(stropt2," -%v "); // Wildcards printf("\nYou can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip\n"); @@ -239,8 +241,8 @@ void help_wizard(httrackp* opt) { if (strfield2(str,"help")) { help("httrack",2); } else if (strnotempty(str)) { - strcat(stropt2,str); - strcat(stropt2," "); + strcatbuff(stropt2,str); + strcatbuff(stropt2," "); } } while(strfield2(str,"help")); @@ -288,14 +290,14 @@ void help_wizard(httrackp* opt) { } /* Free buffers */ - free(urls); - free(mainpath); - free(projname); - free(stropt); - free(stropt2); - free(strwild); - free(cmd); - free(str); + freet(urls); + freet(mainpath); + freet(projname); + freet(stropt); + freet(stropt2); + freet(strwild); + freet(cmd); + freet(str); } int help_query(char* list,int def) { char s[256]; @@ -309,7 +311,7 @@ int help_query(char* list,int def) { char str[256]; str[0]='\0'; // - strncat(str,a,(int) (b - a)); + strncatbuff(str,a,(int) (b - a)); if (n==def) printf("(enter)\t%d\t%s\n",n++,str); else @@ -394,12 +396,13 @@ void make_empty_index(char* str) { // mini-aide (h: help) // y void help(char* app,int more) { + char info[2048]; infomsg(""); if (more) infomsg("1"); if (more != 2) { - char info[2048]; - infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")"); + sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available); + infomsg(info); #ifdef HTTRACK_AFF_WARNING infomsg("NOTE: "HTTRACK_AFF_WARNING); #endif @@ -424,6 +427,7 @@ void help(char* app,int more) { infomsg("Proxy options:"); infomsg(" P proxy use (-P proxy:port or -P user:pass@proxy:port)"); infomsg(" %f *use proxy for ftp (f0 don't use)"); + infomsg(" %b use this local hostname to make/send requests (-%b hostname)"); infomsg(""); infomsg("Limits options:"); infomsg(" rN set the mirror depth to N (* r9999)"); @@ -448,10 +452,12 @@ void help(char* app,int more) { infomsg(" n get non-html files 'near' an html file (ex: an image located outside)"); infomsg(" t test all URLs (even forbidden ones)"); infomsg(" %L <file> add all URL located in this text file (one URL per line)"); + infomsg(" %S <file> add all scan rules located in this text file (one scan rule per line)"); infomsg(""); infomsg("Build options:"); infomsg(" NN structure type (0 *original structure, 1+: see below)"); infomsg(" or user defined structure (-N \"%h%p/%n%q.%t\")"); + infomsg(" %M generate a RFC MIME-encapsulated full-archive (.mht)"); infomsg(" LN long names (L1 *long names / L0 8-3 conversion / L2 ISO9660 compatible)"); infomsg(" KN keep original links (e.g. http://www.adr/link) (K0 *relative link, K absolute links, K4 original links, K3 absolute URI links)"); infomsg(" x replace external html links by error pages"); @@ -467,8 +473,10 @@ void help(char* app,int more) { infomsg(" j *parse Java Classes (j0 don't parse)"); infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)"); infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)"); + infomsg(" %k use keep-alive if possible, greately reducing latency for small files and test requests (%k0 don't use)"); infomsg(" %B tolerant requests (accept bogus responses on some servers, but not standard!)"); infomsg(" %s update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..)"); + infomsg(" %u url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..)"); infomsg(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)"); infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD); infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)"); @@ -491,6 +499,7 @@ void help(char* app,int more) { infomsg(" f *log in files"); infomsg(" f2 one single log file"); infomsg(" I *make an index (I0 don't make)"); + infomsg(" %i make a top index for a project folder (* %i0 don't make)"); infomsg(" %I make an searchable index for this mirror (* %I0 don't make)"); infomsg(""); infomsg("Expert options:"); @@ -511,23 +520,26 @@ void help(char* app,int more) { infomsg(" %H debug HTTP headers in logfile"); infomsg(""); infomsg("Guru options: (do NOT use if possible)"); - infomsg(" #0 Filter test (-#0 '*.gif' 'www.bar.com/foo.gif')"); - infomsg(" #f Always flush log files"); - infomsg(" #FN Maximum number of filters"); - infomsg(" #h Version info"); - infomsg(" #K Scan stdin (debug)"); - infomsg(" #L Maximum number of links (-#L1000000)"); - infomsg(" #p Display ugly progress information"); - infomsg(" #P Catch URL"); - infomsg(" #R Old FTP routines (debug)"); - infomsg(" #T Generate transfer ops. log every minutes"); - infomsg(" #u Wait time"); - infomsg(" #Z Generate transfer rate statictics every minutes"); - infomsg(" #! Execute a shell command (-#! \"echo hello\")"); + infomsg(" #X *use optimized engine (limited memory boundary checks)"); + infomsg(" #0 filter test (-#0 '*.gif' 'www.bar.com/foo.gif')"); + infomsg(" #C cache list (-#C '*.com/spider*.gif'"); + infomsg(" #f always flush log files"); + infomsg(" #FN maximum number of filters"); + infomsg(" #h version info"); + infomsg(" #K scan stdin (debug)"); + infomsg(" #L maximum number of links (-#L1000000)"); + infomsg(" #p display ugly progress information"); + infomsg(" #P catch URL"); + infomsg(" #R old FTP routines (debug)"); + infomsg(" #T generate transfer ops. log every minutes"); + infomsg(" #u wait time"); + infomsg(" #Z generate transfer rate statictics every minutes"); + infomsg(" #! execute a shell command (-#! \"echo hello\")"); infomsg(""); infomsg("Command-line specific options:"); infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")"); infomsg(" %U run the engine with another id when called as root (-%U smith)"); + infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction)"); /* infomsg(" %O do a chroot before setuid"); */ infomsg(""); infomsg("Details: Option N"); @@ -562,6 +574,15 @@ void help(char* app,int more) { infomsg(" '%q' small query string MD5 (16 bits, 4 ascii bytes)"); infomsg(" '%s?' Short name version (ex: %sN)"); infomsg(" '%[param]' param variable in query string"); + infomsg(" '%[param:before:after:notfound:empty]' advanced variable extraction"); + infomsg("Details: User-defined option N and advanced variable extraction"); + infomsg(" %[param:before:after:notfound:empty]"); + infomsg(" param : parameter name"); + infomsg(" before : string to prepend if the parameter was found"); + infomsg(" after : string to append if the parameter was found"); + infomsg(" notfound : string replacement if the parameter could not be found"); + infomsg(" empty : string replacement if the parameter was empty"); + infomsg(" all fields, except the first one (the parameter name), can be empty"); infomsg(""); infomsg("Details: Option K"); infomsg(" K0 foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, default)"); @@ -586,6 +607,24 @@ void help(char* app,int more) { infomsg(""); infomsg("--http10 force http/1.0 requests (-%h)"); infomsg(""); + infomsg("Details: Option %W: External callbacks prototypes"); + infomsg("'init' : void (* myfunction)(void);"); + infomsg("'free' : void (* myfunction)(void);"); + infomsg("'start' : int (* myfunction)(httrackp* opt);"); + infomsg("'end' : int (* myfunction)(void);"); + infomsg("'change-options' : int (* myfunction)(httrackp* opt);"); + infomsg("'check-html' : int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);"); + infomsg("'query' : char* (* myfunction)(char* question);"); + infomsg("'query2' : char* (* myfunction)(char* question);"); + infomsg("'query3' : char* (* myfunction)(char* question);"); + infomsg("'loop' : int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);"); + infomsg("'check-link' : int (* myfunction)(char* adr,char* fil,int status);"); + infomsg("'pause' : void (* myfunction)(char* lockfile);"); + infomsg("'save-file' : void (* myfunction)(char* file);"); + infomsg("'link-detected' : int (* myfunction)(char* link);"); + infomsg("'transfer-status' : int (* myfunction)(lien_back* back);"); + infomsg("'save-name' : int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);"); + infomsg(""); infomsg(""); infomsg("example: httrack www.someweb.com/bob/"); infomsg("means: mirror site www.someweb.com/bob/ and only this site"); @@ -608,7 +647,8 @@ void help(char* app,int more) { infomsg("example: httrack --continue"); infomsg("continues a mirror in the current folder"); infomsg(""); - infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")"); + sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available); + infomsg(info); infomsg("Copyright (C) Xavier Roche and other contributors"); #ifdef HTS_PLATFORM_NAME infomsg("[compiled: "HTS_PLATFORM_NAME"]"); diff --git a/src/htsindex.c b/src/htsindex.c index 5a66724..1a75103 100644 --- a/src/htsindex.c +++ b/src/htsindex.c @@ -44,6 +44,7 @@ Please visit our Website: http://www.httrack.com #if HTS_MAKE_KEYWORD_INDEX #include "htshash.h" +#include "htsinthash.h" /* Keyword Indexer Parameters */ @@ -92,7 +93,7 @@ Please visit our Website: http://www.httrack.com // Words begining with these (accepted) characters will be ignored #define KEYW_NOT_BEG "0123456789" // Treat these characters as space characters - MUST NOT BE EMPTY!!! -#define KEYW_SPACE " ',;:!?\"\x0d\x0a\x09\x0c" +#define KEYW_SPACE " ',;:!?\"\x0d\x0a\x09\x0b\x0c" // Common words (the,for..) detector // If a word represents more than KEYW_USELESS1K (%1000) of total words, then ignore it // 5 (0.5%) @@ -184,6 +185,8 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* || (strfield2(mime,"text/css")) ) { inscript=1; + //} else if (strfield2(mime, "text/vnd.wap.wml")) { // humm won't work in many cases + // inscript=0; } else return 0; @@ -299,9 +302,9 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* e++; /* 0 means "once" */ if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper - strcpy(savelst,filename+strlen(indexpath)); + strcpybuff(savelst,filename+strlen(indexpath)); else - strcpy(savelst,filename); + strcpybuff(savelst,filename); // Add entry for this file and word fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst); @@ -331,7 +334,7 @@ void index_finish(const char* indexpath,int mode) { #if HTS_MAKE_KEYWORD_INDEX char** tab; char* blk; - int size; + INTsys size; size=fpsize(fp_tmpproject); if (size>0) { @@ -342,7 +345,7 @@ void index_finish(const char* indexpath,int mode) { blk = malloct(size+4); if (blk) { fseek(fp_tmpproject,0,SEEK_SET); - if ((int)fread(blk,1,size,fp_tmpproject) == size) { + if ((INTsys)fread(blk,1,size,fp_tmpproject) == size) { char *a=blk,*b; int index=0; int i; @@ -430,7 +433,7 @@ void index_finish(const char* indexpath,int mode) { fprintf(fp,"<tr>\r\n<td>%s</td>\r\n<td>\r\n",word); } fflush(fp); last_pos=ftell(fp); - strcpy(current_word,word); + strcpybuff(current_word,word); total_hit=total_line=0; } total_hit+=hit; diff --git a/src/htsinthash.c b/src/htsinthash.c new file mode 100644 index 0000000..95b8711 --- /dev/null +++ b/src/htsinthash.c @@ -0,0 +1,252 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: httrack.c subroutines: */ +/* hash table system (fast index) */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#include "htsinthash.h" + +/* specific definitions */ +#include "htsbase.h" +#include "htsglobal.h" +#include "htsmd5.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +/* END specific definitions */ + +/* Specific macros */ +#ifndef malloct +#define malloct malloc +#define freet free +#define calloct calloc +#define strcpybuff strcpy +#endif + +// inthash -- simple hash table, using a key (char[]) and a value (ulong int) + +unsigned long int inthash_key(char* value) { + return md5sum32(value); +} + +// Check for duplicate entry (==1 : added) +int inthash_write(inthash hashtable,char* name,long int value) { + int pos = (inthash_key(name) % hashtable->hash_size); + inthash_chain* h=hashtable->hash[pos]; + while (h) { + if (strcmp(h->name,name)==0) { + /* Delete element */ + if (hashtable->flag_valueismalloc) { + if (h->value.intg) { + if (hashtable->free_handler) + hashtable->free_handler((void*)h->value.intg); + else + freet((void*)h->value.intg); + } + } + /* Insert */ + h->value.intg=value; + return 0; + } + h=h->next; + } + // Not found, add it! + inthash_add(hashtable,name,value); + return 1; +} + +// Increment pos value, create one if necessary (=0) +// (==1 : created) +int inthash_inc(inthash hashtable,char* name) { + long int value=0; + int r=0; + if (inthash_read(hashtable,name,&value)) { + value++; + } + else { /* create new value */ + value=0; + r=1; + } + inthash_write(hashtable,name,value); + return (r); +} + + +// Does not check for duplicate entry +void inthash_add(inthash hashtable,char* name,long int value) { + int pos = (inthash_key(name) % hashtable->hash_size); + inthash_chain** h=&hashtable->hash[pos]; + + while (*h) + h=&((*h)->next); + *h=(inthash_chain*)calloct(1, + sizeof(inthash_chain) + + + strlen(name)+2 + ); + if (*h) { + (*h)->name=((char*)(*h)) + sizeof(inthash_chain); + (*h)->next=NULL; + strcpybuff((*h)->name,name); + (*h)->value.intg=value; + } +} + +void* inthash_addblk(inthash hashtable,char* name,int blksize) { + int pos = (inthash_key(name) % hashtable->hash_size); + inthash_chain** h=&hashtable->hash[pos]; + + while (*h) + h=&((*h)->next); + *h=(inthash_chain*)calloct(1, + sizeof(inthash_chain) + + + strlen(name)+2 + + + blksize + ); + if (*h) { + (*h)->name = ((char*)(*h)) + sizeof(inthash_chain); + (*h)->next=NULL; + strcpybuff((*h)->name,name); + (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2; + return (void*)(*h)->value.intg; + } + return NULL; +} + +int inthash_read(inthash hashtable,char* name,long int* value) { + int pos = (inthash_key(name) % hashtable->hash_size); + inthash_chain* h=hashtable->hash[pos]; + while (h) { + if (strcmp(h->name,name)==0) { + *value=h->value.intg; + return 1; + } + h=h->next; + } + return 0; +} + +int inthash_readptr(inthash hashtable,char* name,long int* value) { + int ret; + *value = 0; + ret = inthash_read(hashtable, name, value); + if (*value == 0) + ret = 0; + return ret; +} + +void inthash_init(inthash hashtable) { + unsigned int i; + for(i=0;i<hashtable->hash_size;i++) { + hashtable->hash[i]=NULL; + } +} + +void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) { + if (hash) { + inthash_delchain(hash->next,free_handler); + if (free_handler) { // pos is a malloc() block, delete it! + if (hash->value.intg) { + if (free_handler) + free_handler((void*)hash->value.intg); + else + freet((void*)hash->value.intg); + } + hash->value.intg=0; + } + freet(hash); + } +} + +void inthash_default_free_handler(void* value) { + if (value) + freet(value); +} + +// -- + +inthash inthash_new(int size) { + inthash hashtable=(inthash)calloct(1,sizeof(struct_inthash)); + if (hashtable) { + hashtable->hash_size=0; + hashtable->flag_valueismalloc=0; + if ((hashtable->hash=(inthash_chain**)calloct(size,sizeof(inthash_chain*)))) { + hashtable->hash_size=size; + inthash_init(hashtable); + } + } + return hashtable; +} + +int inthash_created(inthash hashtable) { + if (hashtable) + if (hashtable->hash) + return 1; + return 0; +} + +void inthash_value_is_malloc(inthash hashtable,int flag) { + hashtable->flag_valueismalloc=flag; +} + +void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) { + hashtable->free_handler = free_handler; +} + +void inthash_delete(inthash* hashtable) { + if (hashtable) { + if (*hashtable) { + if ((*hashtable)->hash) { + unsigned int i; + t_inthash_freehandler free_handler=NULL; + if ( (*hashtable)->flag_valueismalloc ) { + if ( (*hashtable)->free_handler ) + free_handler=(*hashtable)->free_handler; + else + free_handler=inthash_default_free_handler; + } + for(i=0;i<(*hashtable)->hash_size;i++) { + inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler); + (*hashtable)->hash[i]=NULL; + } + freet((*hashtable)->hash); + (*hashtable)->hash = NULL; + } + freet(*hashtable); + *hashtable=NULL; + } + } +} diff --git a/src/htsinthash.h b/src/htsinthash.h new file mode 100644 index 0000000..c667cd4 --- /dev/null +++ b/src/htsinthash.h @@ -0,0 +1,94 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: httrack.c subroutines: */ +/* hash table system (fast index) */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + + + +#ifndef HTSINTHASH_DEFH +#define HTSINTHASH_DEFH + +// inthash -- simple hash table, using a key (char[]) and a value (ulong int) + +// simple hash table for other routines +typedef struct inthash_chain { + char* name; /* key (name) */ + union { + unsigned long int intg; /* integer value */ + void* ptr; /* ptr value */ + } value; + struct inthash_chain* next; /* next element */ +} inthash_chain; + +// structure behind inthash +typedef void (* t_inthash_freehandler)(void* value); +typedef struct { + inthash_chain** hash; + t_inthash_freehandler free_handler; + unsigned int hash_size; + unsigned short flag_valueismalloc; +} struct_inthash; + +// main inthash type +typedef struct_inthash* inthash; + +// subfunctions +unsigned long int inthash_key(char* value); +void inthash_init(inthash hashtable); +void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler); +void inthash_default_free_handler(void* value); + +// main functions: + + +/* Hash functions: */ +inthash inthash_new(int size); /* Create a new hash table */ +int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */ +void inthash_delete(inthash* hashtable); /* Delete an hash table */ +void inthash_value_is_malloc(inthash hashtable,int flag); /* Is the 'value' member a value that needs to be free()'ed ? */ +void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */ + t_inthash_freehandler free_handler); +/* */ +int inthash_read(inthash hashtable,char* name,long int* value); /* Read entry from the hash table */ +int inthash_readptr(inthash hashtable,char* name,long int* value); /* Same function, but returns 0 upon null ptr */ +/* */ +void inthash_add(inthash hashtable,char* name,long int value); /* Add entry in the hash table */ +void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */ +int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */ +int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */ +/* End of hash functions: */ + + +#endif diff --git a/src/htsjava.c b/src/htsjava.c index bb29692..afb166b 100644 --- a/src/htsjava.c +++ b/src/htsjava.c @@ -41,8 +41,9 @@ Please visit our Website: http://www.httrack.com // htsjava.c - Parseur de classes java #include "stdio.h" -#include "htssystem.h" +#include "htsglobal.h" #include "htscore.h" + #include "htsjava.h" #include <stdio.h> @@ -53,11 +54,10 @@ Please visit our Website: http://www.httrack.com //#include <math.h> -#ifndef HTS_LITTLE_ENDIAN -#define REVERSE_ENDIAN 1 -#else -#define REVERSE_ENDIAN 0 -#endif +static int reverse_endian(void) { + int endian = 1; + return ( * ( (char*) &endian) == 1); +} /* big/little endian swap */ #define hts_swap16(A) ( (((A) & 0xFF)<<8) | (((A) & 0xFF00)>>8) ) @@ -80,19 +80,33 @@ Please visit our Website: http://www.httrack.com #define JAVADEBUG 0 -int hts_parse_java(char *file,char* err_msg) +int hts_detect_java(htsmoduleStruct* str) { + char* savename = str->filename; + if (savename) { + int len = (int) strlen(savename); + if (len > 6 && strfield(savename + len - 6,".class")) { + return 1; + } + } + return 0; +} + +int hts_parse_java(htsmoduleStruct* str) { FILE *fpout; JAVA_HEADER header; RESP_STRUCT *tab; + char* file = str->filename; + str->relativeToHtmlLink = 1; + #if JAVADEBUG printf("fopen\n"); #endif if ((fpout = fopen(fconv(file), "r+b")) == NULL) { //fprintf(stderr, "Cannot open input file.\n"); - sprintf(err_msg,"Unable to open file %s",file); + sprintf(str->err_msg,"Unable to open file %s",file); return 0; // une erreur.. } @@ -102,7 +116,7 @@ int hts_parse_java(char *file,char* err_msg) //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. if (fread(&header,1,10,fpout) != 10) { // pas complet.. fclose(fpout); - sprintf(err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file)); + sprintf(str->err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file)); return 0; } @@ -110,19 +124,19 @@ int hts_parse_java(char *file,char* err_msg) printf("header\n"); #endif // tester en tête -#if REVERSE_ENDIAN - header.magic = hts_swap32(header.magic); - header.count = hts_swap16(header.count); -#endif + if (reverse_endian()) { + header.magic = hts_swap32(header.magic); + header.count = hts_swap16(header.count); + } if(header.magic!=0xCAFEBABE) { - sprintf(err_msg,"non java file"); + sprintf(str->err_msg,"non java file"); if (fpout) { fclose(fpout); fpout=NULL; } return 0; } tab =(RESP_STRUCT*)calloct(header.count,sizeof(RESP_STRUCT)); if (!tab) { - sprintf(err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT)); + sprintf(str->err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT)); if (fpout) { fclose(fpout); fpout=NULL; } return 0; // erreur.. } @@ -135,12 +149,12 @@ int hts_parse_java(char *file,char* err_msg) for (i = 1; i < header.count; i++) { int err=0; // ++ - tab[i]=readtable(fpout,tab[i],&err,err_msg); + tab[i]=readtable(str,fpout,tab[i],&err); if (!err) { if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float } else { // ++ une erreur est survenue! - if (strnotempty(err_msg)==0) - strcpy(err_msg,"Internal readtable error"); + if (strnotempty(str->err_msg)==0) + strcpybuff(str->err_msg,"Internal readtable error"); freet(tab); if (fpout) { fclose(fpout); fpout=NULL; } return 0; @@ -180,7 +194,7 @@ int hts_parse_java(char *file,char* err_msg) printf("add %s\n",tempo); #endif if (tab[tab[i].index1].file_position >= 0) - hts_add_file(tempo,tab[tab[i].index1].file_position); + str->addLink(str,tempo); /* tab[tab[i].index1].file_position */ } } @@ -205,7 +219,8 @@ int hts_parse_java(char *file,char* err_msg) // error: !=0 si erreur fatale -RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg) +RESP_STRUCT readtable(htsmoduleStruct* str, + FILE *fp, RESP_STRUCT trans, int* error) { unsigned short int length; int j; @@ -214,54 +229,54 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg) trans.type = (int)(unsigned char)fgetc(fp); switch (trans.type) { case HTS_CLASS: - strcpy(trans.name,"Class"); + strcpybuff(trans.name,"Class"); trans.index1 = readshort(fp); break; case HTS_FIELDREF: - strcpy(trans.name,"Field Reference"); + strcpybuff(trans.name,"Field Reference"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_METHODREF: - strcpy(trans.name,"Method Reference"); + strcpybuff(trans.name,"Method Reference"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_INTERFACE: - strcpy(trans.name,"Interface Method Reference"); + strcpybuff(trans.name,"Interface Method Reference"); trans.index1 =readshort(fp); readshort(fp); break; case HTS_NAMEANDTYPE: - strcpy(trans.name,"Name and Type"); + strcpybuff(trans.name,"Name and Type"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_STRING: // CONSTANT_String - strcpy(trans.name,"String"); + strcpybuff(trans.name,"String"); trans.index1 = readshort(fp); break; case HTS_INTEGER: - strcpy(trans.name,"Integer"); + strcpybuff(trans.name,"Integer"); for(j=0;j<4;j++) fgetc(fp); break; case HTS_FLOAT: - strcpy(trans.name,"Float"); + strcpybuff(trans.name,"Float"); for(j=0;j<4;j++) fgetc(fp); break; case HTS_LONG: - strcpy(trans.name,"Long"); + strcpybuff(trans.name,"Long"); for(j=0;j<8;j++) fgetc(fp); break; case HTS_DOUBLE: - strcpy(trans.name,"Double"); + strcpybuff(trans.name,"Double"); for(j=0;j<8;j++) fgetc(fp); break; @@ -269,9 +284,9 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg) case HTS_UNICODE: if (trans.type == HTS_ASCIZ) - strcpy(trans.name,"HTS_ASCIZ"); + strcpybuff(trans.name,"HTS_ASCIZ"); else - strcpy(trans.name,"HTS_UNICODE"); + strcpybuff(trans.name,"HTS_UNICODE"); { char buffer[1024]; @@ -295,10 +310,10 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg) // if(tris(buffer)==1) printf("%s\n ",buffer); // if(tris(buffer)==2) printf("%s\n ",printname(buffer)); //#endif - if(tris(buffer)==1) hts_add_file(buffer,trans.file_position); - else if(tris(buffer)==2) hts_add_file(printname(buffer),trans.file_position); + if(tris(buffer)==1) str->addLink(str, buffer); /* trans.file_position */ + else if(tris(buffer)==2) str->addLink(str, printname(buffer)); - strcpy(trans.name,buffer); + strcpybuff(trans.name,buffer); } else { // gros pb while ( (length > 0) && (!feof(fp))) { fgetc(fp); @@ -307,7 +322,7 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg) if (!feof(fp)) { trans.type=-1; } else { - sprintf(err_msg,"Internal stucture error (ASCII)"); + sprintf(str->err_msg,"Internal stucture error (ASCII)"); *error = 1; } return(trans); @@ -317,7 +332,7 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg) default: // printf("Type inconnue\n"); // on arrête tout - sprintf(err_msg,"Internal structure unknown (type %d)",trans.type); + sprintf(str->err_msg,"Internal structure unknown (type %d)",trans.type); *error = 1; return(trans); break; @@ -331,11 +346,10 @@ unsigned short int readshort(FILE *fp) unsigned short int valint; fread(&valint,sizeof(valint),1,fp); -#if REVERSE_ENDIAN - return hts_swap16(valint); -#else - return valint; -#endif + if (reverse_endian()) + return hts_swap16(valint); + else + return valint; } @@ -383,7 +397,7 @@ char * printname(char name[1024]) for (j = 0; j < (int) strlen(name); j++,p++) { if (*p == '/') *p1='.'; if (*p==';'){*p1='\0'; - strcat(rname,".class"); + strcatbuff(rname,".class"); return (rname);} else *p1=*p; p1++; diff --git a/src/htsjava.h b/src/htsjava.h index 66a75a5..b3d17d4 100644 --- a/src/htsjava.h +++ b/src/htsjava.h @@ -38,8 +38,8 @@ Please visit our Website: http://www.httrack.com #ifndef HTSJAVA_DEFH #define HTSJAVA_DEFH -/* LLint fsize(char* s); */ -int fsize(char* s); +#include <stdio.h> +#include "htsmodules.h" typedef struct { unsigned long int magic; @@ -57,10 +57,11 @@ typedef struct { } RESP_STRUCT; -int hts_parse_java(char *file,char* err_msg); +int hts_detect_java(htsmoduleStruct* str); +int hts_parse_java(htsmoduleStruct* str); RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5); //unsigned int swap(long int nomber,int digit); -RESP_STRUCT readtable(FILE *fp,RESP_STRUCT,int*,char*); +RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*); unsigned short int readshort(FILE *fp); int tris(char*); char * printname(char [1024]); diff --git a/src/htslib.c b/src/htslib.c index e4e6006..3954f9c 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -50,8 +50,16 @@ Please visit our Website: http://www.httrack.com #if HTS_WIN #include <direct.h> #else +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_UNISTD_H #include <unistd.h> #endif +#endif #include <stdlib.h> #include <string.h> #include <time.h> @@ -61,11 +69,7 @@ Please visit our Website: http://www.httrack.com #if HTS_WIN #include <sys/utime.h> #else -#if HTS_PLATFORM!=3 #include <utime.h> -#else -#include <utime.h> -#endif #endif /* END specific definitions */ @@ -95,7 +99,7 @@ int IPV6_resolver = 0; /* détection complémentaire */ -const char hts_detect[][32] = { +const char* hts_detect[] = { "archive", "background", "data", // OBJECT @@ -112,13 +116,13 @@ const char hts_detect[][32] = { }; /* détecter début */ -const char hts_detectbeg[][32] = { +const char* hts_detectbeg[] = { "hotspot", /* hotspot1=..,hotspot2=.. */ "" }; /* ne pas détcter de liens dedans */ -const char hts_nodetect[][32] = { +const char* hts_nodetect[] = { "accept-charset", "accesskey", "action", @@ -160,7 +164,7 @@ const char hts_nodetect[][32] = { /* détection de mini-code javascript */ /* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */ -const char hts_detect_js[][32] = { +const char* hts_detect_js[] = { "onAbort", "onBlur", "onChange", @@ -184,30 +188,42 @@ const char hts_detect_js[][32] = { "onSelect", "onSubmit", "onUnload", + "style", /* hack for CSS code data */ + "" +}; + +const char* hts_main_mime[] = { + "application", + "audio", + "image", + "message", + "multipart", + "text", + "video", "" }; /* détection "...URL=<url>" */ -const char hts_detectURL[][32] = { +const char* hts_detectURL[] = { "content", "" }; /* tags où l'URL doit être réécrite mais non capturée */ -const char hts_detectandleave[][32] = { +const char* hts_detectandleave[] = { "action", "" }; -/* ne pas renommer les types renvoyés (couvent types inconnus) */ -const char hts_mime_keep[][32] = { +/* ne pas renommer les types renvoyés (souvent types inconnus) */ +const char* hts_mime_keep[] = { "application/octet-stream", "text/plain", "" }; /* pas de type mime connu, mais extension connue */ -const char hts_ext_dynamic[][32] = { +const char* hts_ext_dynamic[] = { "php3", "php", "php4", @@ -218,11 +234,14 @@ const char hts_ext_dynamic[][32] = { "pl", /*"exe",*/ "cfm", + "nsf", /* lotus */ "" }; -/* types MIME */ -const char hts_mime[][2][32] = { +/* types MIME + note: application/octet-stream should not be used here +*/ +const char* hts_mime[][2] = { {"application/acad","dwg"}, {"application/arj","arj"}, {"application/clariscad","ccad"}, @@ -386,24 +405,167 @@ const char hts_mime[][2][32] = { {"x-conference/x-cooltalk","ice"}, /*{"application/x-httpd-cgi","cgi"},*/ {"x-world/x-vrml","wrl"}, - + + /* More from w3schools.com */ + { "application/envoy", "evy" }, + { "application/fractals", "fif" }, + { "application/futuresplash", "spl" }, + { "application/hta", "hta" }, + { "application/internet-property-stream", "acx" }, + { "application/msword", "dot" }, + { "application/olescript", "axs" }, + { "application/pics-rules", "prf" }, + { "application/pkcs10", "p10" }, + { "application/pkix-crl", "crl" }, + { "application/set-payment-initiation", "setpay" }, + { "application/set-registration-initiation", "setreg" }, + { "application/vnd.ms-excel", "xla" }, + { "application/vnd.ms-excel", "xlc" }, + { "application/vnd.ms-excel", "xlm" }, + { "application/vnd.ms-excel", "xls" }, + { "application/vnd.ms-excel", "xlt" }, + { "application/vnd.ms-excel", "xlw" }, + { "application/vnd.ms-pkicertstore", "sst" }, + { "application/vnd.ms-pkiseccat", "cat" }, + { "application/vnd.ms-powerpoint", "pot" }, + { "application/vnd.ms-powerpoint", "pps" }, + { "application/vnd.ms-powerpoint", "ppt" }, + { "application/vnd.ms-project", "mpp" }, + { "application/vnd.ms-works", "wcm" }, + { "application/vnd.ms-works", "wdb" }, + { "application/vnd.ms-works", "wks" }, + { "application/vnd.ms-works", "wps" }, + { "application/x-compress", "z" }, + { "application/x-compressed", "tgz" }, + { "application/x-internet-signup", "ins" }, + { "application/x-internet-signup", "isp" }, + { "application/x-iphone", "iii" }, + { "application/x-javascript", "js" }, + { "application/x-msaccess", "mdb" }, + { "application/x-mscardfile", "crd" }, + { "application/x-msclip", "clp" }, + { "application/x-msmediaview", "m13" }, + { "application/x-msmediaview", "m14" }, + { "application/x-msmediaview", "mvb" }, + { "application/x-msmetafile", "wmf" }, + { "application/x-msmoney", "mny" }, + { "application/x-mspublisher", "pub" }, + { "application/x-msschedule", "scd" }, + { "application/x-msterminal", "trm" }, + { "application/x-perfmon", "pma" }, + { "application/x-perfmon", "pmc" }, + { "application/x-perfmon", "pml" }, + { "application/x-perfmon", "pmr" }, + { "application/x-perfmon", "pmw" }, + { "application/x-pkcs12", "p12" }, + { "application/x-pkcs12", "pfx" }, + { "application/x-pkcs7-certificates", "p7b" }, + { "application/x-pkcs7-certificates", "spc" }, + { "application/x-pkcs7-certreqresp", "p7r" }, + { "application/x-pkcs7-mime", "p7c" }, + { "application/x-pkcs7-mime", "p7m" }, + { "application/x-pkcs7-signature", "p7s" }, + { "application/x-troff-me", "me" }, + { "application/x-x509-ca-cert", "cer" }, + { "application/x-x509-ca-cert", "crt" }, + { "application/x-x509-ca-cert", "der" }, + { "application/ynd.ms-pkipko", "pko" }, + { "audio/mid", "mid" }, + { "audio/mid", "rmi" }, + { "audio/mpeg", "mp3" }, + { "audio/x-mpegurl", "m3u" }, + { "image/bmp", "bmp" }, + { "image/cis-cod", "cod" }, + { "image/pipeg", "jfif" }, + { "image/x-cmx", "cmx" }, + { "image/x-icon", "ico" }, + { "image/x-portable-bitmap", "pbm" }, + { "message/rfc822", "mht" }, + { "message/rfc822", "mhtml" }, + { "message/rfc822", "nws" }, + { "text/css", "css" }, + { "text/h323", "323" }, + { "text/html", "stm" }, + { "text/iuls", "uls" }, + { "text/plain", "bas" }, + { "text/scriptlet", "sct" }, + { "text/webviewhtml", "htt" }, + { "text/x-component", "htc" }, + { "text/x-vcard", "vcf" }, + { "video/mpeg", "mp2" }, + { "video/mpeg", "mpa" }, + { "video/mpeg", "mpv2" }, + { "video/x-la-asf", "lsf" }, + { "video/x-la-asf", "lsx" }, + { "video/x-ms-asf", "asf" }, + { "video/x-ms-asf", "asr" }, + { "video/x-ms-asf", "asx" }, + { "x-world/x-vrml", "flr" }, + { "x-world/x-vrml", "vrml" }, + { "x-world/x-vrml", "wrz" }, + { "x-world/x-vrml", "xaf" }, + { "x-world/x-vrml", "xof" }, + + /* Various */ + { "application/ogg", "ogg" }, + {"*","class"}, {"",""}}; // Reserved (RFC2396) -#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 ) +#define CIS(c,ch) ( ((unsigned char)(c)) == (ch) ) +#define CHAR_RESERVED(c) ( CIS(c,';') \ + || CIS(c,'/') \ + || CIS(c,'?') \ + || CIS(c,':') \ + || CIS(c,'@') \ + || CIS(c,'&') \ + || CIS(c,'=') \ + || CIS(c,'+') \ + || CIS(c,'$') \ + || CIS(c,',') ) +//#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 ) // Delimiters (RFC2396) -#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 ) +#define CHAR_DELIM(c) ( CIS(c,'<') \ + || CIS(c,'>') \ + || CIS(c,'#') \ + || CIS(c,'%') \ + || CIS(c,'\"') ) +//#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 ) // Unwise (RFC2396) -#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 ) +#define CHAR_UNWISE(c) ( CIS(c,'{') \ + || CIS(c,'}') \ + || CIS(c,'|') \ + || CIS(c,'\\') \ + || CIS(c,'^') \ + || CIS(c,'[') \ + || CIS(c,']') \ + || CIS(c,'`') ) +//#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 ) // Special (escape chars) (RFC2396 + >127 ) #define CHAR_LOW(c) ( ((unsigned char)(c) <= 31) ) #define CHAR_HIG(c) ( ((unsigned char)(c) >= 127) ) #define CHAR_SPECIAL(c) ( CHAR_LOW(c) || CHAR_HIG(c) ) // We try to avoid them and encode them instead -#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 ) +#define CHAR_XXAVOID(c) ( CIS(c,' ') \ + || CIS(c,'*') \ + || CIS(c,'\'') \ + || CIS(c,'\"') \ + || CIS(c,'!') ) +//#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 ) +#define CHAR_MARK(c) ( CIS(c,'-') \ + || CIS(c,'_') \ + || CIS(c,'.') \ + || CIS(c,'!') \ + || CIS(c,'~') \ + || CIS(c,'*') \ + || CIS(c,'\'') \ + || CIS(c,'(') \ + || CIS(c,')') ) +//#define CHAR_MARK(c) ( strchr("-_.!~*'()",(unsigned char)(c)) != 0 ) + // conversion éventuelle / vers antislash @@ -413,7 +575,7 @@ char* antislash(char* s) { char* a; NOSTATIC_RESERVE(buff, char, HTS_URLMAXSIZE*2); - strcpy(buff,s); + strcpybuff(buff,s); while(a=strchr(buff,'/')) *a='\\'; return buff; } @@ -442,7 +604,7 @@ htsblk httpget(char* url) { retour.size=0; retour.msg[0]='\0'; retour.statuscode=-1; - strcpy(retour.msg,"Error invalid URL"); + strcpybuff(retour.msg,"Error invalid URL"); return retour; } @@ -514,7 +676,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f if (retour) { if (retour->msg) { if (!strnotempty(retour->msg)) { - strcpy(retour->msg,"Connect error"); + strcpybuff(retour->msg,"Connect error"); } } } @@ -531,8 +693,8 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f if (!fexist(fconv(unescape_http(fil)))) if (fexist(fconv(unescape_http(fil+1)))) { char tempo[HTS_URLMAXSIZE*2]; - strcpy(tempo,fil+1); - strcpy(fil,tempo); + strcpybuff(tempo,fil+1); + strcpybuff(fil,tempo); } // Ouvrir @@ -540,9 +702,9 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f retour->msg[0]='\0'; soc=INVALID_SOCKET; if (retour->totalsize<0) - strcpy(retour->msg,"Unable to open file"); + strcpybuff(retour->msg,"Unable to open local file"); else if (retour->totalsize==0) - strcpy(retour->msg,"File empty"); + strcpybuff(retour->msg,"File empty"); else { // Note: On passe par un FILE* (plus propre) //soc=open(fil,O_RDONLY,0); // en lecture seule! @@ -555,13 +717,13 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f retour->soc=soc; if (soc!=INVALID_SOCKET) { retour->statuscode=200; // OK - strcpy(retour->msg,"OK"); + strcpybuff(retour->msg,"OK"); guess_httptype(retour->contenttype,fil); } else if (strnotempty(retour->msg)==0) - strcpy(retour->msg,"Unable to open file"); + strcpybuff(retour->msg,"Unable to open local file"); return soc; // renvoyer } else { // HEAD ou POST : interdit sur un local!!!! (c'est idiot!) - strcpy(retour->msg,"Unexpected Head/Post local request"); + strcpybuff(retour->msg,"Unexpected Head/Post local request"); soc=INVALID_SOCKET; // erreur retour->soc=soc; return soc; @@ -647,10 +809,10 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char buff[0]='\0'; // header Date - //strcat(buff,"Date: "); + //strcatbuff(buff,"Date: "); //time_gmt_rfc822(buff); // obtenir l'heure au format rfc822 //sendc("\n"); - //strcat(buff,buff); + //strcatbuff(buff,buff); // possibilité non documentée: >post: et >postfile: // si présence d'un tag >post: alors executer un POST @@ -686,58 +848,58 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (strnotempty(buff)==0) { // PAS POSTFILE // Type de requète? if ((search_tag) && (mode==0)) { - strcat(buff,"POST "); + strcatbuff(buff,"POST "); } else if (mode==0) { // GET - strcat(buff,"GET "); + strcatbuff(buff,"GET "); } else { // if (mode==1) { if (!retour->req.http11) // forcer HTTP/1.0 - strcat(buff,"GET "); // certains serveurs (cgi) buggent avec HEAD + strcatbuff(buff,"GET "); // certains serveurs (cgi) buggent avec HEAD else - strcat(buff,"HEAD "); + strcatbuff(buff,"HEAD "); } // si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom - if (retour->req.proxy.active) { + if ( retour->req.proxy.active && (strncmp(adr,"https://", 8) != 0) ) { if (!link_has_authority(adr)) { // default http #if HDEBUG printf("Proxy Use: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port); #endif - strcat(buff,"http://"); - strcat(buff,jump_identification(adr)); + strcatbuff(buff,"http://"); + strcatbuff(buff,jump_identification(adr)); } else { // ftp:// en proxy http #if HDEBUG printf("Proxy Use for ftp: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port); #endif direct_url=1; // ne pas analyser user/pass - strcat(buff,adr); + strcatbuff(buff,adr); } } // NOM DU FICHIER // on slash doit être présent en début, sinon attention aux bad request! (400) - if (*fil!='/') strcat(buff,"/"); + if (*fil!='/') strcatbuff(buff,"/"); { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; if (search_tag) - strncat(tempo,fil,(int) (search_tag - fil)); + strncatbuff(tempo,fil,(int) (search_tag - fil)); else - strcpy(tempo,fil); + strcpybuff(tempo,fil); escape_check_url(tempo); - strcat(buff,tempo); // avec échappement + strcatbuff(buff,tempo); // avec échappement } // protocole if (!retour->req.http11) { // forcer HTTP/1.0 //use_11=0; - strcat(buff," HTTP/1.0\x0d\x0a"); + strcatbuff(buff," HTTP/1.0\x0d\x0a"); } else { // Requète 1.1 //use_11=1; - strcat(buff," HTTP/1.1\x0d\x0a"); + strcatbuff(buff," HTTP/1.1\x0d\x0a"); } /* supplemental data */ - if (xsend) strcat(buff,xsend); // éventuelles autres lignes + if (xsend) strcatbuff(buff,xsend); // éventuelles autres lignes // tester proxy authentication if (retour->req.proxy.active) { @@ -748,12 +910,12 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char char user_pass[256]; autorisation[0]=user_pass[0]='\0'; // - strncat(user_pass,astart,(int) (a - astart) - 1); - strcpy(user_pass,unescape_http(user_pass)); - code64(user_pass,autorisation); - strcat(buff,"Proxy-Authorization: Basic "); - strcat(buff,autorisation); - strcat(buff,H_CRLF); + strncatbuff(user_pass,astart,(int) (a - astart) - 1); + strcpybuff(user_pass,unescape_http(user_pass)); + code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0); + strcatbuff(buff,"Proxy-Authorization: Basic "); + strcatbuff(buff,autorisation); + strcatbuff(buff,H_CRLF); #if HDEBUG printf("Proxy-Authenticate, %s (code: %s)\n",user_pass,autorisation); #endif @@ -772,11 +934,11 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */ ) ) { // PAS file:// - strcat(buff,"Referer: "); - strcat(buff,"http://"); - strcat(buff,jump_identification(referer_adr)); - strcat(buff,referer_fil); - strcat(buff,H_CRLF); + strcatbuff(buff,"Referer: "); + strcatbuff(buff,"http://"); + strcatbuff(buff,jump_identification(referer_adr)); + strcatbuff(buff,referer_fil); + strcatbuff(buff,H_CRLF); } } } @@ -786,7 +948,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (search_tag) { char clen[256]; sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(search_tag+strlen(POSTTOK)+1)))); - strcat(buff,clen); + strcatbuff(buff,clen); } } @@ -802,40 +964,40 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (b) { max_cookies--; if (!cook) { - strcat(buff,"Cookie: "); - strcat(buff,"$Version=1; "); + strcatbuff(buff,"Cookie: "); + strcatbuff(buff,"$Version=1; "); cook=1; } else - strcat(buff,"; "); - strcat(buff,cookie_get(b,5)); - strcat(buff,"="); - strcat(buff,cookie_get(b,6)); - strcat(buff,"; $Path="); - strcat(buff,cookie_get(b,2)); + strcatbuff(buff,"; "); + strcatbuff(buff,cookie_get(b,5)); + strcatbuff(buff,"="); + strcatbuff(buff,cookie_get(b,6)); + strcatbuff(buff,"; $Path="); + strcatbuff(buff,cookie_get(b,2)); b=cookie_nextfield(b); } } while( (b) && (max_cookies>0) && ((int)strlen(buff)<max_size)); if (cook) { // on a envoyé un (ou plusieurs) cookie? - strcat(buff,H_CRLF); + strcatbuff(buff,H_CRLF); #if DEBUG_COOK printf("Header:\n%s\n",buff); #endif } } - // connection close? - //if (use_11) // Si on envoie une requète 1.1, préciser qu'on ne veut pas de keep-alive!! - strcat(buff,"Connection: close"H_CRLF); - // gérer le keep-alive (garder socket) - //strcat(buff,"Connection: Keep-Alive\n"); + if (retour->req.http11 && !retour->req.nokeepalive) { + strcatbuff(buff,"Connection: Keep-Alive"H_CRLF); + } else { + strcatbuff(buff,"Connection: close"H_CRLF); + } { char* real_adr=jump_identification(adr); //if ((use_11) || (retour->user_agent_send)) { // Pour le 1.1 on utilise un Host: if (!direct_url) { // pas ftp:// par exemple //if (!retour->req.proxy.active) { - strcat(buff,"Host: "); strcat(buff,real_adr); strcat(buff,H_CRLF); + strcatbuff(buff,"Host: "); strcatbuff(buff,real_adr); strcatbuff(buff,H_CRLF); //} } //} @@ -845,26 +1007,45 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char char s[256]; // HyperTextSeeker/"HTSVERSION sprintf(s,"User-Agent: %s"H_CRLF,retour->req.user_agent); - strcat(buff,s); + strcatbuff(buff,s); // pour les serveurs difficiles - strcat(buff,"Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/svg+xml, */*"H_CRLF); + strcatbuff(buff,"Accept: " + "image/png, image/jpeg, image/pjpeg, image/x-xbitmap, image/svg+xml" /* Accepted */ + ", " + "image/gif;q=0.9" /* also accepted but with lower preference */ + ", " + "*/*;q=0.1" /* also accepted but with even lower preference */ + H_CRLF); if (strnotempty(retour->req.lang_iso)) { - strcat(buff,"Accept-Language: "); strcat(buff,retour->req.lang_iso); strcat(buff,H_CRLF); + strcatbuff(buff,"Accept-Language: "); strcatbuff(buff,retour->req.lang_iso); strcatbuff(buff,H_CRLF); } - strcat(buff,"Accept-Charset: iso-8859-1, *"H_CRLF); + strcatbuff(buff,"Accept-Charset: " + "iso-8859-1" /* we prefer ISO-8859-1 */ + ", " + "iso-8859-*;q=0.9" /* or ISO-8859-* */ + ", " + "utf-8;q=0.66" /* UTF8 is also accepted */ + ", " + "*;q=0.33" /* and any other charset */ + H_CRLF); if (retour->req.http11) { #if HTS_USEZLIB - if ((!retour->req.range_used) && (!retour->req.nocompression)) - strcat(buff,"Accept-Encoding: gzip, deflate, compress, identity"H_CRLF); + //strcatbuff(buff,"Accept-Encoding: gzip, deflate, compress, identity"H_CRLF); + if (gz_is_available && (!retour->req.range_used) && (!retour->req.nocompression)) + strcatbuff(buff,"Accept-Encoding: " + "gzip" /* gzip if the preffered encoding */ + ", " + "identity;q=0.9" + H_CRLF); else - strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */ + strcatbuff(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */ #else - strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */ + strcatbuff(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */ #endif } } else { - strcat(buff,"Accept: */*"H_CRLF); // le minimum + strcatbuff(buff,"Accept: */*"H_CRLF); // le minimum } /* Authentification */ @@ -878,40 +1059,40 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (!direct_url) { // pas ftp:// par exemple char user_pass[256]; user_pass[0]='\0'; - strncat(user_pass,astart,(int) (a - astart) - 1); - strcpy(user_pass,unescape_http(user_pass)); - code64(user_pass,autorisation); + strncatbuff(user_pass,astart,(int) (a - astart) - 1); + strcpybuff(user_pass,unescape_http(user_pass)); + code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0); if (strcmp(fil,"/robots.txt")) /* pas robots.txt */ bauth_add(cookie,astart,fil,autorisation); } } else if ( (a=bauth_check(cookie,real_adr,fil)) ) - strcpy(autorisation,a); + strcpybuff(autorisation,a); /* On a une autorisation a donner? */ if (strnotempty(autorisation)) { - strcat(buff,"Authorization: Basic "); - strcat(buff,autorisation); - strcat(buff,H_CRLF); + strcatbuff(buff,"Authorization: Basic "); + strcatbuff(buff,autorisation); + strcatbuff(buff,H_CRLF); } } } - //strcat(buff,"Accept-Language: en\n"); - //strcat(buff,"Accept-Charset: iso-8859-1,*,utf-8\n"); + //strcatbuff(buff,"Accept-Language: en\n"); + //strcatbuff(buff,"Accept-Charset: iso-8859-1,*,utf-8\n"); // CRLF de fin d'en tête - strcat(buff,H_CRLF); + strcatbuff(buff,H_CRLF); // données complémentaires? if (search_tag) if (mode==0) // GET! - strcat(buff,unescape_http(search_tag+strlen(POSTTOK)+1)); + strcatbuff(buff,unescape_http(search_tag+strlen(POSTTOK)+1)); } #if HDEBUG #endif if (_DEBUG_HEAD) { if (ioinfo) { - fprintf(ioinfo,"request for %s%s:\r\n",jump_identification(adr),fil); + fprintf(ioinfo,"[%d] request for %s%s:\r\n",retour->debugid,jump_identification(adr),fil); fprintfio(ioinfo,buff,"<<< "); fprintf(ioinfo,"\r\n"); fflush(ioinfo); @@ -919,13 +1100,25 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char } // Fin test pas postfile // + // Callback +#if HTS_ANALYSTE + if (hts_htmlcheck_sendhead != NULL) { + int test_head=hts_htmlcheck_sendhead(buff, adr, fil, referer_adr, referer_fil, retour); + if (test_head!=1) { + deletesoc_r(retour); + strcpybuff(retour->msg,"Header refused by external wrapper"); + retour->soc=INVALID_SOCKET; + } + } +#endif + // Envoi if (sendc(retour, buff)<0) { // ERREUR, socket rompue?... //if (sendc(retour->soc,buff) != strlen(buff)) { // ERREUR, socket rompue?... deletesoc_r(retour); // fermer tout de même // et tenter de reconnecter - strcpy(retour->msg,"Broken pipe"); + strcpybuff(retour->msg,"Write error"); retour->soc=INVALID_SOCKET; } @@ -955,44 +1148,46 @@ void treatfirstline(htsblk* retour,char* rcvd) { while ((*a!=' ') && (*a!='\0') && (*a!=10) && (*a!=13) && (*a!=9)) a++; while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces if ((strlen(a) > 1) && (strlen(a) < 64) ) // message retour - strcpy(retour->msg,a); + strcpybuff(retour->msg,a); else infostatuscode(retour->msg,retour->statuscode); // type MIME par défaut2 - strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); + strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); } else { // pas de code! retour->statuscode=-1; - strcpy(retour->msg,"Unknown response structure"); + strcpybuff(retour->msg,"Unknown response structure"); } } else { // euhh?? retour->statuscode=-1; - strcpy(retour->msg,"Unknown response structure"); + strcpybuff(retour->msg,"Unknown response structure"); } } else { if (*a == '<') { /* This is dirty .. */ retour->statuscode=200; - strcpy(retour->msg, "Unknown, assuming junky server"); - strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); + retour->keep_alive=0; + strcpybuff(retour->msg, "Unknown, assuming junky server"); + strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); } else if (strnotempty(a)) { retour->statuscode=-1; - strcpy(retour->msg,"Unknown response structure, no HTTP/ response given"); + strcpybuff(retour->msg,"Unknown (not HTTP/xx) response structure"); } else { /* This is dirty .. */ retour->statuscode=200; - strcpy(retour->msg, "Unknown, assuming junky server"); - strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); + retour->keep_alive=0; + strcpybuff(retour->msg, "Unknown, assuming junky server"); + strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); } } } else { // vide! /* retour->statuscode=-1; - strcpy(retour->msg,"Empty reponse or internal error"); + strcpybuff(retour->msg,"Empty reponse or internal error"); */ /* This is dirty .. */ retour->statuscode=200; - strcpy(retour->msg, "Unknown, assuming junky server"); - strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); + strcpybuff(retour->msg, "Unknown, assuming junky server"); + strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); } } @@ -1005,13 +1200,16 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { printf("ok, Content-length: détecté\n"); #endif sscanf(rcvd+p,LLintP,&(retour->totalsize)); + if (retour->totalsize == 0) { + retour->empty = 1; + } } else if ((p=strfield(rcvd,"Content-Disposition:"))!=0) { - while(*(rcvd+p)==' ') p++; // sauter espaces + while(is_realspace(*(rcvd+p))) p++; // sauter espaces if ((int) strlen(rcvd+p)<250) { // pas trop long? char tmp[256]; char *a=NULL,*b=NULL; - strcpy(tmp,rcvd+p); + strcpybuff(tmp,rcvd+p); a=strstr(tmp,"filename="); if (a) { a+=strlen("filename="); @@ -1029,7 +1227,7 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { if (b) { *b='\0'; if ((int) strlen(a) < 200) { // pas trop long? - strcpy(retour->cdispo,a); + strcpybuff(retour->cdispo,a); } } } @@ -1037,36 +1235,40 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { } } else if ((p=strfield(rcvd,"Last-Modified:"))!=0) { - while(*(rcvd+p)==' ') p++; // sauter espaces + while(is_realspace(*(rcvd+p))) p++; // sauter espaces if ((int) strlen(rcvd+p)<64) { // pas trop long? //struct tm* tm_time=convert_time_rfc822(rcvd+p); - strcpy(retour->lastmodified,rcvd+p); + strcpybuff(retour->lastmodified,rcvd+p); } } else if ((p=strfield(rcvd,"Date:"))!=0) { if (strnotempty(retour->lastmodified)==0) { /* pas encore de last-modified */ - while(*(rcvd+p)==' ') p++; // sauter espaces + while(is_realspace(*(rcvd+p))) p++; // sauter espaces if ((int) strlen(rcvd+p)<64) { // pas trop long? //struct tm* tm_time=convert_time_rfc822(rcvd+p); - strcpy(retour->lastmodified,rcvd+p); + strcpybuff(retour->lastmodified,rcvd+p); } } } else if ((p=strfield(rcvd,"Etag:"))!=0) { /* Etag */ if (retour) { - while(*(rcvd+p)==' ') p++; // sauter espaces + while(is_realspace(*(rcvd+p))) p++; // sauter espaces if ((int) strlen(rcvd+p)<64) // pas trop long? - strcpy(retour->etag,rcvd+p); + strcpybuff(retour->etag,rcvd+p); else // erreur.. ignorer retour->etag[0]='\0'; } } - else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk! - retour->is_chunk=1; // chunked - //retour->http11=2; // chunked + // else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk! + else if ((p=strfield(rcvd,"Transfer-Encoding:"))!=0) { // chunk! + while(is_realspace(*(rcvd+p))) p++; // sauter espaces + if (strfield(rcvd+p,"chunked")) { + retour->is_chunk=1; // chunked + //retour->http11=2; // chunked #if HDEBUG - printf("ok, Transfer-Encoding: détecté\n"); + printf("ok, Transfer-Encoding: détecté\n"); #endif + } } else if ((p=strfield(rcvd,"Content-type:"))!=0) { if (retour) { @@ -1074,13 +1276,37 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { // éviter les text/html; charset=foo { char* a=strchr(rcvd+p,';'); - if (a) *a='\0'; + if (a) { // extended information + *a='\0'; + a++; + while(is_space(*a)) a++; + if (strfield(a, "charset")) { + a += 7; + while(is_space(*a)) a++; + if (*a == '=') { + a++; + while(is_space(*a)) a++; + if (*a == '\"') a++; + while(is_space(*a)) a++; + if (*a) { + char* chs = a; + while(*a && !is_space(*a) && *a != '\"' && *a != ';') a++; + *a = '\0'; + if (*chs) { + if (strlen(chs) < sizeof(retour->charset) - 2) { + strcpybuff(retour->charset, chs); + } + } + } + } + } + } } sscanf(rcvd+p,"%s",tempo); - if (strlen(tempo)<64) // pas trop long!! - strcpy(retour->contenttype,tempo); + if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!! + strcpybuff(retour->contenttype,tempo); else - strcpy(retour->contenttype,"application/octet-stream-unknown"); // erreur + strcpybuff(retour->contenttype,"application/octet-stream-unknown"); // erreur } } else if ((p=strfield(rcvd,"Content-Range:"))!=0) { @@ -1091,16 +1317,63 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { } } } - else if ((p=strfield(rcvd,"Content-Encoding:"))!=0) { - if (retour) { - char tempo[1100]; - { - char* a=strchr(rcvd+p,';'); - if (a) *a='\0'; + else if ((p=strfield(rcvd,"Connection:"))!=0) { + char* a = rcvd + p; + while(is_space(*a)) a++; + if (*a) { + if (strfield(a, "Keep-Alive")) { + if (!retour->keep_alive) { + retour->keep_alive_max = 10; + retour->keep_alive_t = 15; + } + retour->keep_alive = 1; + } else { + retour->keep_alive = 0; } - sscanf(rcvd+p,"%s",tempo); + } + } + else if ((p=strfield(rcvd,"Keep-Alive:"))!=0) { + char* a = rcvd + p; + while(is_space(*a)) a++; + if (*a) { + char* p; + retour->keep_alive = 1; + retour->keep_alive_max = 10; + retour->keep_alive_t = 15; + if ((p=strstr(a, "timeout="))) { + p+=strlen("timeout="); + sscanf(p, "%d", &retour->keep_alive_t); + } + if ((p=strstr(a, "max="))) { + p+=strlen("max="); + sscanf(p, "%d", &retour->keep_alive_max); + } + if (retour->keep_alive_max <= 1 || retour->keep_alive_t < 3) { + retour->keep_alive = 0; + } + } + } + else if ((p=strfield(rcvd,"TE:"))!=0) { + char* a = rcvd + p; + while(is_space(*a)) a++; + if (*a) { + if (strfield(a, "trailers")) { + retour->keep_alive_trailers=1; + } + } + } + else if ((p=strfield(rcvd,"Content-Encoding:"))!=0) { + if (retour) { + char tempo[1100]; + char* a = rcvd + p; + while(is_space(*a)) a++; + { + char* a=strchr(rcvd+p,';'); + if (a) *a='\0'; + } + sscanf(a,"%s",tempo); if (strlen(tempo)<64) // pas trop long!! - strcpy(retour->contentencoding,tempo); + strcpybuff(retour->contentencoding,tempo); else retour->contentencoding[0]='\0'; // erreur #if HTS_USEZLIB @@ -1125,20 +1398,14 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { else if ((p=strfield(rcvd,"Location:"))!=0) { if (retour) { if (retour->location) { - while(*(rcvd+p)==' ') p++; // sauter espaces + while(is_realspace(*(rcvd+p))) p++; // sauter espaces if ((int) strlen(rcvd+p)<HTS_URLMAXSIZE) // pas trop long? - strcpy(retour->location,rcvd+p); + strcpybuff(retour->location,rcvd+p); else // erreur.. ignorer retour->location[0]='\0'; } } } - else if ((p=strfield(rcvd,"Connection: Keep-Alive"))!=0) { - // non, pas de keep-alive! on déconnectera.. - } - else if ((p=strfield(rcvd,"Keep-Alive:"))!=0) { // params keep-alive - // rien à faire - } else if ( ((p=strfield(rcvd,"Set-Cookie:"))!=0) && (cookie) ) { // ohh un cookie char* a = rcvd+p; // pointeur char domain[256]; // domaine cookie (.netscape.com) @@ -1159,10 +1426,10 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { // initialiser cookie lu actuellement if (adr) - strcpy(domain,jump_identification(adr)); // domaine - strcpy(path,"/"); // chemin (/) - strcpy(cook_name,""); // nom cookie (MYCOOK) - strcpy(cook_value,""); // valeur (ID=toto,S=1234) + strcpybuff(domain,jump_identification(adr)); // domaine + strcpybuff(path,"/"); // chemin (/) + strcpybuff(cook_name,""); // nom cookie (MYCOOK) + strcpybuff(cook_value,""); // valeur (ID=toto,S=1234) // boucler jusqu'au prochain cookie ou la fin do { char* start_loop=a; @@ -1184,16 +1451,16 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { && (((int) (token_end - token_st))>0) && (((int) (value_end - value_st))>0) ) { name[0]='\0'; value[0]='\0'; - strncat(name,token_st,(int) (token_end - token_st)); - strncat(value,value_st,(int) (value_end - value_st)); + strncatbuff(name,token_st,(int) (token_end - token_st)); + strncatbuff(value,value_st,(int) (value_end - value_st)); #if DEBUG_COOK printf("detected cookie-av: name=\"%s\" value=\"%s\"\n",name,value); #endif if (strfield2(name,"domain")) { - strcpy(domain,value); + strcpybuff(domain,value); } else if (strfield2(name,"path")) { - strcpy(path,value); + strcpybuff(path,value); } else if (strfield2(name,"max-age")) { // ignoré.. @@ -1212,8 +1479,8 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { } else { if (strnotempty(cook_name)==0) { // noter premier: nom et valeur cookie - strcpy(cook_name,name); - strcpy(cook_value,value); + strcpybuff(cook_name,name); + strcpybuff(cook_value,value); } else { // prochain cookie a=start_loop; // on devra recommencer à cette position next=1; // enregistrer @@ -1238,52 +1505,52 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) { // transforme le message statuscode en chaîne -void infostatuscode(char* msg,int statuscode) { +HTSEXT_API void infostatuscode(char* msg,int statuscode) { switch( statuscode) { // Erreurs HTTP, selon RFC - case 100: strcpy( msg,"Continue"); break; - case 101: strcpy( msg,"Switching Protocols"); break; - case 200: strcpy( msg,"OK"); break; - case 201: strcpy( msg,"Created"); break; - case 202: strcpy( msg,"Accepted"); break; - case 203: strcpy( msg,"Non-Authoritative Information"); break; - case 204: strcpy( msg,"No Content"); break; - case 205: strcpy( msg,"Reset Content"); break; - case 206: strcpy( msg,"Partial Content"); break; - case 300: strcpy( msg,"Multiple Choices"); break; - case 301: strcpy( msg,"Moved Permanently"); break; - case 302: strcpy( msg,"Moved Temporarily"); break; - case 303: strcpy( msg,"See Other"); break; - case 304: strcpy( msg,"Not Modified"); break; - case 305: strcpy( msg,"Use Proxy"); break; - case 306: strcpy( msg,"Undefined 306 error"); break; - case 307: strcpy( msg,"Temporary Redirect"); break; - case 400: strcpy( msg,"Bad Request"); break; - case 401: strcpy( msg,"Unauthorized"); break; - case 402: strcpy( msg,"Payment Required"); break; - case 403: strcpy( msg,"Forbidden"); break; - case 404: strcpy( msg,"Not Found"); break; - case 405: strcpy( msg,"Method Not Allowed"); break; - case 406: strcpy( msg,"Not Acceptable"); break; - case 407: strcpy( msg,"Proxy Authentication Required"); break; - case 408: strcpy( msg,"Request Time-out"); break; - case 409: strcpy( msg,"Conflict"); break; - case 410: strcpy( msg,"Gone"); break; - case 411: strcpy( msg,"Length Required"); break; - case 412: strcpy( msg,"Precondition Failed"); break; - case 413: strcpy( msg,"Request Entity Too Large"); break; - case 414: strcpy( msg,"Request-URI Too Large"); break; - case 415: strcpy( msg,"Unsupported Media Type"); break; - case 416: strcpy( msg,"Requested Range Not Satisfiable"); break; - case 417: strcpy( msg,"Expectation Failed"); break; - case 500: strcpy( msg,"Internal Server Error"); break; - case 501: strcpy( msg,"Not Implemented"); break; - case 502: strcpy( msg,"Bad Gateway"); break; - case 503: strcpy( msg,"Service Unavailable"); break; - case 504: strcpy( msg,"Gateway Time-out"); break; - case 505: strcpy( msg,"HTTP Version Not Supported"); break; + case 100: strcpybuff( msg,"Continue"); break; + case 101: strcpybuff( msg,"Switching Protocols"); break; + case 200: strcpybuff( msg,"OK"); break; + case 201: strcpybuff( msg,"Created"); break; + case 202: strcpybuff( msg,"Accepted"); break; + case 203: strcpybuff( msg,"Non-Authoritative Information"); break; + case 204: strcpybuff( msg,"No Content"); break; + case 205: strcpybuff( msg,"Reset Content"); break; + case 206: strcpybuff( msg,"Partial Content"); break; + case 300: strcpybuff( msg,"Multiple Choices"); break; + case 301: strcpybuff( msg,"Moved Permanently"); break; + case 302: strcpybuff( msg,"Moved Temporarily"); break; + case 303: strcpybuff( msg,"See Other"); break; + case 304: strcpybuff( msg,"Not Modified"); break; + case 305: strcpybuff( msg,"Use Proxy"); break; + case 306: strcpybuff( msg,"Undefined 306 error"); break; + case 307: strcpybuff( msg,"Temporary Redirect"); break; + case 400: strcpybuff( msg,"Bad Request"); break; + case 401: strcpybuff( msg,"Unauthorized"); break; + case 402: strcpybuff( msg,"Payment Required"); break; + case 403: strcpybuff( msg,"Forbidden"); break; + case 404: strcpybuff( msg,"Not Found"); break; + case 405: strcpybuff( msg,"Method Not Allowed"); break; + case 406: strcpybuff( msg,"Not Acceptable"); break; + case 407: strcpybuff( msg,"Proxy Authentication Required"); break; + case 408: strcpybuff( msg,"Request Time-out"); break; + case 409: strcpybuff( msg,"Conflict"); break; + case 410: strcpybuff( msg,"Gone"); break; + case 411: strcpybuff( msg,"Length Required"); break; + case 412: strcpybuff( msg,"Precondition Failed"); break; + case 413: strcpybuff( msg,"Request Entity Too Large"); break; + case 414: strcpybuff( msg,"Request-URI Too Large"); break; + case 415: strcpybuff( msg,"Unsupported Media Type"); break; + case 416: strcpybuff( msg,"Requested Range Not Satisfiable"); break; + case 417: strcpybuff( msg,"Expectation Failed"); break; + case 500: strcpybuff( msg,"Internal Server Error"); break; + case 501: strcpybuff( msg,"Not Implemented"); break; + case 502: strcpybuff( msg,"Bad Gateway"); break; + case 503: strcpybuff( msg,"Service Unavailable"); break; + case 504: strcpybuff( msg,"Gateway Time-out"); break; + case 505: strcpybuff( msg,"HTTP Version Not Supported"); break; // - default: if (strnotempty(msg)==0) strcpy( msg,"Unknown error"); break; + default: if (strnotempty(msg)==0) strcpybuff( msg,"Unknown error"); break; } } @@ -1376,6 +1643,25 @@ int check_readinput(htsblk* r) { return 0; } +// check if data is available +int check_readinput_t(T_SOC soc, int timeout) { + if (soc != INVALID_SOCKET) { + fd_set fds; // poll structures + struct timeval tv; // structure for select + FD_ZERO(&fds); + FD_SET(soc,&fds); + tv.tv_sec=timeout; + tv.tv_usec=0; + select(soc + 1,&fds,NULL,NULL,&tv); + if (FD_ISSET(soc,&fds)) + return 1; + else + return 0; + } else + return 0; +} + + // lecture d'un bloc sur une socket (ou un fichier!) // >=0 : nombre d'octets lus // <0 : fin ou erreur @@ -1462,9 +1748,9 @@ LLint http_xfread1(htsblk* r,int bufl) { // nouvelle taille if (nl > 0) { r->size+=nl; - if ((int) fwrite(buff,1,nl,r->out)!=nl) { + if ((INTsys)fwrite(buff,1,nl,r->out)!=nl) { r->statuscode=-1; - strcpy(r->msg,"Write error on disk"); + strcpybuff(r->msg,"Write error on disk"); nl=-1; } } @@ -1669,7 +1955,7 @@ htsblk http_test(char* adr,char* fil,char* loc) { } } else { retour.statuscode=-2; - strcpy(retour.msg,"Timeout While Testing"); + strcpybuff(retour.msg,"Timeout While Testing"); } @@ -1730,7 +2016,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { } // adresse véritable (sans :xx) - strncat(iadr2,iadr,(int) (a - iadr)); + strncatbuff(iadr2,iadr,(int) (a - iadr)); // adresse sans le :xx hp = hts_gethostbyname(iadr2, &fullhostent_buffer); @@ -1755,7 +2041,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #endif if (retour) if (retour->msg) - strcpy(retour->msg,"Unable to get server's address"); + strcpybuff(retour->msg,"Unable to get server's address"); return INVALID_SOCKET; } // copie adresse @@ -1770,15 +2056,33 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { DEBUG_W("socket\n"); #endif soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + if (retour != NULL) { + retour->debugid = HTS_STAT.stat_sockid++; + } #if HTS_WIDE_DEBUG DEBUG_W("socket done\n"); #endif if (soc==INVALID_SOCKET) { if (retour) if (retour->msg) - strcpy(retour->msg,"Unable to create a socket"); + strcpybuff(retour->msg,"Unable to create a socket"); return INVALID_SOCKET; // erreur création socket impossible } + + // bind this address + if (retour != NULL && retour->req.proxy.bindhost[0] != '\0') { + t_fullhostent bind_buffer; + hp = hts_gethostbyname(retour->req.proxy.bindhost, &bind_buffer); + if (hp == NULL || + bind(soc, (struct sockaddr *)hp->h_addr_list[0], hp->h_length) != 0) { + if (retour) + if (retour->msg) + strcpybuff(retour->msg,"Unable to bind the specificied server address"); + deletesoc(soc); + return INVALID_SOCKET; + } + } + // structure: connexion au domaine internet, port 80 (ou autre) SOCaddr_initport(server, port); #if HDEBUG @@ -1820,7 +2124,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #endif if (retour) if (retour->msg) - strcpy(retour->msg,"Unable to connect to the server"); + strcpybuff(retour->msg,"Unable to connect to the server"); /* Close the socket and notify the error!!! */ deletesoc(soc); return INVALID_SOCKET; @@ -1877,15 +2181,15 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // 1. optional scheme ":" if ((pos=strfield(url,"file:"))) { // fichier local!! (pour les tests) //!! p+=3; - strcpy(adr,"file://"); + strcpybuff(adr,"file://"); } else if ((pos=strfield(url,"http:"))) { // HTTP //!!p+=3; } else if ((pos=strfield(url,"ftp:"))) { // FTP - strcpy(adr,"ftp://"); // FTP!! + strcpybuff(adr,"ftp://"); // FTP!! //!!p+=3; #if HTS_USEOPENSSL - } else if ((pos=strfield(url,"https:"))) { // HTTPS - strcpy(adr,"https://"); + } else if (SSL_is_available && (pos=strfield(url,"https:"))) { // HTTPS + strcpybuff(adr,"https://"); #endif } else if (scheme) { return -1; // erreur non reconnu @@ -1911,17 +2215,17 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // chemin www... trop long!! if ( ( ((int) (q - p)) ) > HTS_URLMAXSIZE) { - //strcpy(retour.msg,"Path too long"); + //strcpybuff(retour.msg,"Path too long"); return -1; // erreur } // recopier adresse www.. - strncat(adr,p, ((int) (q - p)) ); + strncatbuff(adr,p, ((int) (q - p)) ); // *( adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette! // recopier chemin /pub/.. if (q[0] != '/') // page par défaut (/) - strcat(fil,"/"); - strcat(fil,q); + strcatbuff(fil,"/"); + strcatbuff(fil,q); // SECURITE: // simplifier url pour les ../ fil_simplifie(fil); @@ -1931,8 +2235,13 @@ int ident_url_absolute(char* url,char* adr,char* fil) { char* a; p=url+pos; - - strcat(fil,p); // fichier local ; adr="#" + if (*p == '/' || *p == '\\') { /* file:///.. */ + strcatbuff(fil,p); // fichier local ; adr="#" + } else { + strcatbuff(fil,"//"); /* file://server/foo */ + strcatbuff(fil,p); + } + a=strchr(fil,'?'); if (a) *a='\0'; /* couper query (inutile pour file:// lors de la requête) */ @@ -1948,7 +2257,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // nommer au besoin.. (non utilisé normalement) if (!strnotempty(fil)) - strcpy(fil,"default-index.html"); + strcpybuff(fil,"default-index.html"); // case insensitive pour adresse { @@ -1981,12 +2290,12 @@ void fil_simplifie(char* f) { tempo[0]='\0'; // if (!last) /* can't go upper.. */ - strcpy(tempo,"/"); + strcpybuff(tempo,"/"); else strncpy(tempo,f,last+1); tempo[last+1]='\0'; - strcat(tempo,f+i+4); - strcpy(f,tempo); // remplacer + strcatbuff(tempo,f+i+4); + strcpybuff(f,tempo); // remplacer i=-1; // recommencer last=0; } @@ -2004,20 +2313,19 @@ void fil_simplifie(char* f) { while ( (a=strstr(f,"./")) ) { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strcpy(tempo,a+2); - strcpy(a,tempo); + strcpybuff(tempo,a+2); + strcpybuff(a,tempo); } // delete all remaining ../ (potential threat) while ( (a=strstr(f,"../")) ) { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strcpy(tempo,a+3); - strcpy(a,tempo); + strcpybuff(tempo,a+3); + strcpybuff(a,tempo); } } - // fermer liaison fichier ou socket HTS_INLINE void deletehttp(htsblk* r) { #if HTS_DEBUG_CLOSESOCK @@ -2025,6 +2333,14 @@ HTS_INLINE void deletehttp(htsblk* r) { sprintf(info,"deletehttp: (htsblk*) %d\n",r); DEBUG_W2(info); #endif +#if HTS_USEOPENSSL + /* Free OpenSSL structures */ + if (SSL_is_available && r->ssl_con) { + SSL_shutdown(r->ssl_con); + SSL_free(r->ssl_con); + r->ssl_con=NULL; + } +#endif if (r->soc!=INVALID_SOCKET) { if (r->is_file) { if (r->fp) @@ -2038,6 +2354,16 @@ HTS_INLINE void deletehttp(htsblk* r) { } } +// free the addr buffer +// always returns 1 +HTS_INLINE int deleteaddr(htsblk* r) { + if (r->adr) { + freet(r->adr); + r->adr = NULL; + } + return 1; +} + // fermer une socket HTS_INLINE void deletesoc(T_SOC soc) { if (soc!=INVALID_SOCKET) { @@ -2067,7 +2393,7 @@ HTS_INLINE void deletesoc(T_SOC soc) { /* Will also clean other things */ HTS_INLINE void deletesoc_r(htsblk* r) { #if HTS_USEOPENSSL - if (r->ssl_con) { + if (SSL_is_available && r->ssl_con) { SSL_shutdown(r->ssl_con); // SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1); SSL_free(r->ssl_con); @@ -2084,7 +2410,7 @@ HTS_INLINE TStamp time_local(void) { } // number of millisec since 1970 -HTS_INLINE TStamp mtime_local(void) { +HTSEXT_API HTS_INLINE TStamp mtime_local(void) { #ifndef HTS_DO_NOT_USE_FTIME struct timeb B; ftime( &B ); @@ -2120,7 +2446,7 @@ void sec2str(char *st,TStamp t) { } // idem, plus court (chaine) -void qsec2str(char *st,TStamp t) { +HTSEXT_API void qsec2str(char *st,TStamp t) { int j,h,m,s; j=(int) (t/(3600*24)); @@ -2181,7 +2507,7 @@ struct tm* convert_time_rfc822(char* s) { if ((int) strlen(s) > 200) return NULL; - strcpy(str,s); + strcpybuff(str,s); hts_lowcase(str); /* éliminer :,- */ while( (a=strchr(str,'-')) ) *a=' '; @@ -2200,7 +2526,7 @@ struct tm* convert_time_rfc822(char* s) { tok[0]='\0'; if (first!=last) { char* pos; - strncat(tok,first,(int) (last - first)); + strncatbuff(tok,first,(int) (last - first)); /* analyser */ if ( (pos=strstr(months,tok)) ) { /* month always in letters */ result_mm=((int) (pos - months))/4; @@ -2274,36 +2600,44 @@ int set_filetime_rfc822(char* file,char* date) { // heure au format rfc (taille buffer 256o) HTS_INLINE void time_rfc822(char* s,struct tm * A) { + if (A == NULL) { + int localtime_returned_null=0; + assert(localtime_returned_null); + } strftime(s,256,"%a, %d %b %Y %H:%M:%S GMT",A); } // heure locale au format rfc (taille buffer 256o) HTS_INLINE void time_rfc822_local(char* s,struct tm * A) { + if (A == NULL) { + int localtime_returned_null=0; + assert(localtime_returned_null); + } strftime(s,256,"%a, %d %b %Y %H:%M:%S",A); } // conversion en b,Kb,Mb -char* int2bytes(LLint n) { +HTSEXT_API char* int2bytes(LLint n) { char** a=int2bytes2(n); char* buff; NOSTATIC_RESERVE(buff, char, 256); - strcpy(buff,a[0]); - strcat(buff,a[1]); + strcpybuff(buff,a[0]); + strcatbuff(buff,a[1]); return concat(buff,""); } // conversion en b/s,Kb/s,Mb/s -char* int2bytessec(long int n) { +HTSEXT_API char* int2bytessec(long int n) { char* buff; char** a=int2bytes2(n); NOSTATIC_RESERVE(buff, char, 256); - strcpy(buff,a[0]); - strcat(buff,a[1]); + strcpybuff(buff,a[0]); + strcatbuff(buff,a[1]); return concat(buff,"/s"); } -char* int2char(int n) { +HTSEXT_API char* int2char(int n) { char* buffer; NOSTATIC_RESERVE(buffer, char, 32); sprintf(buffer,"%d",n); @@ -2327,35 +2661,35 @@ typedef struct { char buff2[32]; char* buffadr[2]; } strc_int2bytes2; -char** int2bytes2(LLint n) { +HTSEXT_API char** int2bytes2(LLint n) { strc_int2bytes2* strc; NOSTATIC_RESERVE(strc, strc_int2bytes2, 1); if (n < ToLLintKiB) { sprintf(strc->buff1,"%d",(int)(LLint)n); - strcpy(strc->buff2,"B"); + strcpybuff(strc->buff2,"B"); } else if (n < ToLLintMiB) { sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/ToLLintKiB)),(int)((LLint)((n%ToLLintKiB)*100)/ToLLintKiB)); - strcpy(strc->buff2,"KiB"); + strcpybuff(strc->buff2,"KiB"); } #ifdef HTS_LONGLONG else if (n < ToLLintGiB) { sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB)))); - strcpy(strc->buff2,"MiB"); + strcpybuff(strc->buff2,"MiB"); } else if (n < ToLLintTiB) { sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintGiB))),(int)((LLint)(((n%(ToLLintGiB))*100)/(ToLLintGiB)))); - strcpy(strc->buff2,"GiB"); + strcpybuff(strc->buff2,"GiB"); } else if (n < ToLLintPiB) { sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintTiB))),(int)((LLint)(((n%(ToLLintTiB))*100)/(ToLLintTiB)))); - strcpy(strc->buff2,"TiB"); + strcpybuff(strc->buff2,"TiB"); } else { sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintPiB))),(int)((LLint)(((n%(ToLLintPiB))*100)/(ToLLintPiB)))); - strcpy(strc->buff2,"PiB"); + strcpybuff(strc->buff2,"PiB"); } #else else { sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB)))); - strcpy(strc->buff2,"MiB"); + strcpybuff(strc->buff2,"MiB"); } #endif strc->buffadr[0]=strc->buff1; @@ -2376,34 +2710,34 @@ int sig_ignore_flag( int setflag ) { // flag ignore // envoi de texte (en têtes généralement) sur la socket soc HTS_INLINE int sendc(htsblk* r, char* s) { - int n; + int n, ssz = (int)strlen(s); #if HTS_WIN #else sig_ignore_flag(1); #endif #if HDEBUG - write(0,s,strlen(s)); + write(0,s,ssz); #endif #if HTS_USEOPENSSL - if (r->ssl) { - n = SSL_write(r->ssl_con, s, strlen(s)); + if (SSL_is_available && r->ssl) { + n = SSL_write(r->ssl_con, s, ssz); } else #endif - n = send(r->soc,s,strlen(s),0); + n = send(r->soc,s,ssz,0); #if HTS_WIN #else sig_ignore_flag(0); #endif - return n; + return ( n == ssz ) ? n : -1; } // Remplace read -void finput(int fd,char* s,int max) { +int finput(int fd,char* s,int max) { char c; int j=0; do { @@ -2419,7 +2753,8 @@ void finput(int fd,char* s,int max) { } } } while((c!=0) && (j<max-1)); - s[j++]='\0'; + s[j]='\0'; + return j; } // Like linput, but in memory (optimized) @@ -2444,7 +2779,7 @@ int binput(char* buff,char* s,int max) { count--; // copy if (count > 0) { - strncat(s, buff, count); + strncatbuff(s, buff, count); } // and terminate with a null char s[count]='\0'; @@ -2470,6 +2805,34 @@ int linput(FILE* fp,char* s,int max) { s[j]='\0'; return j; } +int linputsoc(T_SOC soc, char* s, int max) { + int c; + int j=0; + do { + unsigned char ch; + if (recv(soc, &ch, 1, 0) == 1) { + c = ch; + } else { + c = EOF; + } + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} +int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { + if (check_readinput_t(soc, timeout)) { + return linputsoc(soc, s, max); + } + return -1; +} int linput_trim(FILE* fp,char* s,int max) { int rlen=0; char* ls=(char*) malloct(max+2); @@ -2660,7 +3023,7 @@ int ishtml(char* fil) { char fil_noquery[HTS_URLMAXSIZE*2]; fil_noquery[0]='\0'; a++; // pointer sur extension - strncat(fil_noquery,a,HTS_URLMAXSIZE); + strncatbuff(fil_noquery,a,HTS_URLMAXSIZE); a=strchr(fil_noquery,'?'); if (a) *a='\0'; @@ -2710,7 +3073,7 @@ HTS_INLINE int ishttperror(int err) { // retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant // une identification -char* jump_identification(char* source) { +HTSEXT_API char* jump_identification(char* source) { char *a,*trytofind; // rechercher dernier @ (car parfois email transmise dans adresse!) // mais sauter ftp:// éventuel @@ -2719,9 +3082,55 @@ char* jump_identification(char* source) { return (trytofind != NULL)?trytofind:a; } +HTSEXT_API char* jump_normalized(char* source) { + source = jump_identification(source); + if (strfield(source, "www") && source[3] != '\0') { + if (source[3] == '.') { // www.foo.com -> foo.com + source += 4; + } else { // www-4.foo.com -> foo.com + char* a = source + 3; + while(*a && ( isdigit(*a) || *a == '-') ) a++; + if (*a == '.') { + source = a + 1; + } + } + } + return source; +} + +HTSEXT_API char* fil_normalized(char* source, char* dest_) { + char* dest=dest_; + char lastc = 0; + int gotquery=0; + while(*source) { + if (*source == '?') + gotquery=1; + if ( + (!gotquery && lastc == '/' && *source == '/') // foo//bar -> foo/bar + ) { + } + else { + *dest++ = *source; + } + lastc = *source; + source++; + } + *dest++ = '\0'; + return dest_; +} + +#define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 ); +HTSEXT_API char* adr_normalized(char* source, char* dest) { + /* not yet too aggressive (no com<->net<->org checkings) */ + strcpybuff(dest, jump_normalized(source)); + return dest; +} +#undef endwith + + // find port (:80) or NULL if not found // can handle IPV6 addresses -char* jump_toport(char* source) { +HTSEXT_API char* jump_toport(char* source) { char *a,*trytofind; a = jump_identification(source); trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html) @@ -2732,7 +3141,7 @@ char* jump_toport(char* source) { // strrchr, but not too far char* strrchr_limit(char* s, char c, char* limit) { if (limit == NULL) { - char* p = strchr(s, c); + char* p = strrchr(s, c); return p?(p+1):NULL; } else { char *a=NULL, *p; @@ -2765,17 +3174,18 @@ HTS_INLINE char* jump_protocol(char* source) { } // codage base 64 a vers b -void code64(char* a,char* b) { +void code64(unsigned char* a,int size_a,unsigned char* b,int crlf) { int i1=0,i2=0,i3=0,i4=0; - unsigned long store; + int loop=0; + unsigned long int store; int n; const char _hts_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - b[0]='\0'; - while(*a) { + while(size_a-- > 0) { // 24 bits - n=1; store=0; store |= ((*a++) & 0xff); - if (*a) { n=2; store <<= 8; store |= ((*a++) & 0xff); } - if (*a) { n=3; store <<= 8; store |= ((*a++) & 0xff); } + n=1; + store = *a++; + if (size_a-- > 0) { n=2; store <<= 8; store |= *a++; } + if (size_a-- > 0) { n=3; store <<= 8; store |= *a++; } if (n==3) { i4=store & 63; i3=(store>>6) & 63; @@ -2802,6 +3212,11 @@ void code64(char* a,char* b) { *b++ = _hts_base64[i4]; else *b++ = '='; + + if (crlf && ( ( loop += 3 ) % 60) == 0 ) { + *b++ = '\r'; + *b++ = '\n'; + } } *b++='\0'; } @@ -2809,7 +3224,7 @@ void code64(char* a,char* b) { // remplacer " par " etc.. // buffer MAX 1Ko #define strcmpbeg(a, b) strncmp(a, b, strlen(b)) -void unescape_amp(char* s) { +HTSEXT_API void unescape_amp(char* s) { while(*s) { if (*s=='&') { char* end=strchr(s,';'); @@ -3043,8 +3458,8 @@ void unescape_amp(char* s) { if (c) { char buff[HTS_URLMAXSIZE*2]; buff[0]=(char) c; - strcpy(buff+1,end+1); - strcpy(s,buff); + strcpybuff(buff+1,end+1); + strcpybuff(s,buff); } } } @@ -3054,7 +3469,7 @@ void unescape_amp(char* s) { // remplacer %20 par ' ', | par : etc.. // buffer MAX 1Ko -char* unescape_http(char* s) { +HTSEXT_API char* unescape_http(char* s) { char* tempo; int i,j=0; NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2); @@ -3078,7 +3493,7 @@ char* unescape_http(char* s) { } // unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI -char* unescape_http_unharm(char* s, int no_high) { +HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { char* tempo; int i,j=0; NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2); @@ -3120,32 +3535,46 @@ char* unescape_http_unharm(char* s, int no_high) { // remplacer " par %xx etc.. // buffer MAX 1Ko -void escape_spc_url(char* s) { +HTSEXT_API void escape_spc_url(char* s) { x_escape_http(s,2); } // smith / john -> smith%20%2f%20john -void escape_in_url(char* s) { +HTSEXT_API void escape_in_url(char* s) { x_escape_http(s,1); } // smith / john -> smith%20/%20john -void escape_uri(char* s) { +HTSEXT_API void escape_uri(char* s) { x_escape_http(s,3); } -void escape_uri_utf(char* s) { +HTSEXT_API void escape_uri_utf(char* s) { x_escape_http(s,30); } -void escape_check_url(char* s) { +HTSEXT_API void escape_check_url(char* s) { x_escape_http(s,0); } // same as escape_check_url, but returns char* -char* escape_check_url_addr(char* s) { +HTSEXT_API char* escape_check_url_addr(char* s) { char* adr; escape_check_url(adr = concat(s,"")); return adr; } +// strip all control characters +HTSEXT_API void escape_remove_control(char* s) { + unsigned char* ss = (unsigned char*) s; + while(*ss) { + if (*ss < 32) { /* CONTROL characters go away! */ + char tmp[HTS_URLMAXSIZE*2]; + strcpybuff(tmp, ss+1); + strcpybuff(ss, tmp); + } else { + ss++; + } + } +} + -void x_escape_http(char* s,int mode) { +HTSEXT_API void x_escape_http(char* s,int mode) { while(*s) { int test=0; if (mode == 0) @@ -3155,7 +3584,8 @@ void x_escape_http(char* s,int mode) { || CHAR_DELIM(*s) || CHAR_UNWISE(*s) || CHAR_SPECIAL(*s) - || CHAR_XXAVOID(*s) ); + || CHAR_XXAVOID(*s) + || CHAR_MARK(*s)); } else if (mode==2) test=(strchr(" ",*s)!=0); // n'escaper que espace @@ -3171,12 +3601,12 @@ void x_escape_http(char* s,int mode) { } if (test) { - char buffer[HTS_URLMAXSIZE*2]; + char buffer[HTS_URLMAXSIZE*3]; int n; n=(int)(unsigned char) *s; - strcpy(buffer,s+1); + strcpybuff(buffer,s+1); sprintf(s,"%%%02x",n); - strcat(s,buffer); + strcatbuff(s,buffer); } s++; } @@ -3206,8 +3636,8 @@ char* concat(const char* a,const char* b) { concat_strc* strc; NOSTATIC_RESERVE(strc, concat_strc, 1); strc->rol=((strc->rol+1)%16); // roving pointer - strcpy(strc->buff[strc->rol],a); - if (b) strcat(strc->buff[strc->rol],b); + strcpybuff(strc->buff[strc->rol],a); + if (b) strcatbuff(strc->buff[strc->rol],b); return strc->buff[strc->rol]; } // conversion fichier / -> antislash @@ -3244,7 +3674,7 @@ char* convtolower(char* a) { concat_strc* strc; NOSTATIC_RESERVE(strc, concat_strc, 1); strc->rol=((strc->rol+1)%16); // roving pointer - strcpy(strc->buff[strc->rol],a); + strcpybuff(strc->buff[strc->rol],a); hts_lowcase(strc->buff[strc->rol]); // lower case return strc->buff[strc->rol]; } @@ -3308,7 +3738,7 @@ void guess_httptype(char *s,char *fil) { // flag: 1 si toujours renvoyer un type void get_httptype(char *s,char *fil,int flag) { if (ishtml(fil)==1) - strcpy(s,"text/html"); + strcpybuff(s,"text/html"); else { char *a=fil+strlen(fil)-1; while ( (*a!='.') && (*a!='/') && (a>fil)) a--; @@ -3319,7 +3749,7 @@ void get_httptype(char *s,char *fil,int flag) { while( (!ok) && (strnotempty(hts_mime[j][1])) ) { if (strfield2(hts_mime[j][1],a)) { if (hts_mime[j][0][0]!='*') { // Une correspondance existe - strcpy(s,hts_mime[j][0]); + strcpybuff(s,hts_mime[j][0]); ok=1; } } @@ -3328,7 +3758,7 @@ void get_httptype(char *s,char *fil,int flag) { if (!ok) if (flag) sprintf(s,"application/%s",a); } else { - if (flag) strcpy(s,"application/octet-stream"); + if (flag) strcpybuff(s,"application/octet-stream"); } } } @@ -3364,7 +3794,7 @@ int get_userhttptype(int setdefs,char *s,char *ext) { char* a; a=strchr(detect,'\n'); if (a) { - strncat(s,detect,(int) (a - detect)); + strncatbuff(s,detect,(int) (a - detect)); } } return 1; @@ -3383,7 +3813,7 @@ void give_mimext(char *s,char *st) { while( (!ok) && (strnotempty(hts_mime[j][1])) ) { if (strfield2(hts_mime[j][0],st)) { if (hts_mime[j][1][0]!='*') { // Une correspondance existe - strcpy(s,hts_mime[j][1]); + strcpybuff(s,hts_mime[j][1]); ok=1; } } @@ -3403,7 +3833,7 @@ void give_mimext(char *s,char *st) { if (a) { if ((int)strlen(a) >= 1) { if ((int)strlen(a) <= 4) { - strcpy(s,a); + strcpybuff(s,a); ok=1; } } @@ -3441,7 +3871,7 @@ char* get_ext(char *fil) { if (*a=='.') { fil_noquery[0]='\0'; a++; // pointer sur extension - strncat(fil_noquery,a,HTS_URLMAXSIZE); + strncatbuff(fil_noquery,a,HTS_URLMAXSIZE); a=strchr(fil_noquery,'?'); if (a) *a='\0'; @@ -3531,60 +3961,54 @@ void fprintfio(FILE* fp,char* buff,char* prefix) { /* Le fichier existe-t-il? (ou est-il accessible?) */ int fexist(char* s) { - FILE* fp; - if (strnotempty(s)==0) // nom vide: non trouvé - return 0; - fp=fopen(fconv(s),"rb"); - if (fp!=NULL) fclose(fp); - return (fp!=NULL); + struct stat st; + memset(&st, 0, sizeof(st)); + if (stat(s, &st) == 0) { + if (S_ISREG(st.st_mode)) { + return 1; + } + } + return 0; } /* Taille d'un fichier, -1 si n'existe pas */ /* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */ /* Note: NOT YET READY FOR 64-bit */ -//LLint fsize(char* s) { -int fsize(char* s) { - /* -#if HTS_WIN - HANDLE hFile; - DWORD dwSizeHigh = 0; - DWORD dwSizeLow = 0; - hFile = CreateFile(s,0,0,NULL,OPEN_EXISTING,0,NULL); - if (hFile) { - dwSizeLow = GetFileSize (hFile, & dwSizeHigh) ; - CloseHandle(hFile); - if (dwSizeLow != 0xFFFFFFFF) - return (dwSizeLow & (dwSizeHigh<<32)); - else - return -1; - } else - return -1; -#else - */ +INTsys fsize(char* s) { FILE* fp; if (strnotempty(s)==0) // nom vide: erreur return -1; fp=fopen(fconv(s),"rb"); if (fp!=NULL) { - int i; + INTsys i; fseek(fp,0,SEEK_END); +#ifdef HTS_FSEEKO + i=ftello(fp); +#else i=ftell(fp); +#endif fclose(fp); return i; } else return -1; - /* -#endif - */ } -int fpsize(FILE* fp) { - int oldpos,size; +INTsys fpsize(FILE* fp) { + INTsys oldpos,size; if (!fp) return -1; +#ifdef HTS_FSEEKO + oldpos=ftello(fp); +#else oldpos=ftell(fp); +#endif fseek(fp,0,SEEK_END); +#ifdef HTS_FSEEKO + size=ftello(fp); + fseeko(fp,oldpos,SEEK_SET); +#else size=ftell(fp); fseek(fp,oldpos,SEEK_SET); +#endif return size; } @@ -3593,7 +4017,7 @@ typedef struct { char path[1024+4]; int init; } hts_rootdir_strc; -char* hts_rootdir(char* file) { +HTSEXT_API char* hts_rootdir(char* file) { static hts_rootdir_strc strc = {"", 0}; //NOSTATIC_RESERVE(strc, hts_rootdir_strc, 1); if (file) { @@ -3602,7 +4026,7 @@ char* hts_rootdir(char* file) { strc.init=1; if (strnotempty(file)) { char* a; - strcpy(strc.path,file); + strcpybuff(strc.path,file); while((a=strrchr(strc.path,'\\'))) *a='/'; if ((a=strrchr(strc.path,'/'))) { *(a+1)='\0'; @@ -3613,7 +4037,7 @@ char* hts_rootdir(char* file) { if( getcwd( strc.path, 1024 ) == NULL ) strc.path[0]='\0'; else - strcat(strc.path,"/"); + strcatbuff(strc.path,"/"); } } return NULL; @@ -3625,7 +4049,7 @@ char* hts_rootdir(char* file) { -hts_stat_struct HTS_STAT; +HTSEXT_API hts_stat_struct HTS_STAT; // // return number of downloadable bytes, depending on rate limiter // see engine_stats() routine, too @@ -3693,7 +4117,7 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { DEBUG_W("read\n"); #endif if (r->fp) - retour=fread(buff,1,size,r->fp); + retour=(int)fread(buff,1,size,r->fp); else retour=-1; } else { @@ -3704,7 +4128,7 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { #endif //HTS_TOTAL_RECV_CHECK(size); // Diminuer au besoin si trop de données reçues #if HTS_USEOPENSSL - if (r->ssl) { + if (SSL_is_available && r->ssl) { retour = SSL_read(r->ssl_con, buff, size); if (retour <= 0) { int err_code = SSL_get_error(r->ssl_con, retour); @@ -3745,6 +4169,21 @@ t_dnscache* _hts_cache(void) { NOSTATIC_RESERVE(cache, t_dnscache, 1); return cache; } +// free the cache +static void hts_cache_free_(t_dnscache* cache) { + if (cache != NULL) { + if (cache->n != NULL) { + hts_cache_free_(cache->n); + } + freet(cache); + } +} +void hts_cache_free(t_dnscache* cache) { + if (cache != NULL) { + hts_cache_free_(cache->n); + cache->n = NULL; + } +} // lock le cache dns pour tout opération d'ajout // plus prudent quand plusieurs threads peuvent écrire dedans.. @@ -3819,7 +4258,7 @@ int hts_dnstest(char* _iadr) { NOSTATIC_RESERVE(iadr, char, HTS_URLMAXSIZE*2); // sauter user:pass@ éventuel - strcpy(iadr,jump_identification(_iadr)); + strcpybuff(iadr,jump_identification(_iadr)); // couper éventuel : { char *a; @@ -3852,7 +4291,7 @@ int hts_dnstest(char* _iadr) { } -t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { +HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { t_fullhostent* buffer = (t_fullhostent*) v_buffer; /* Clear */ fullhostent_init(buffer); @@ -3869,8 +4308,8 @@ t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { if ((hostname[0] == '[') && (hostname[strlen(hostname)-1] == ']')) { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strncat(tempo, hostname+1, strlen(hostname)-2); - strcpy(hostname, tempo); + strncatbuff(tempo, hostname+1, strlen(hostname)-2); + strcpybuff(hostname, tempo); } { @@ -3935,7 +4374,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { /* Clear */ fullhostent_init(buffer); - strcpy(iadr,jump_identification(_iadr)); + strcpybuff(iadr,jump_identification(_iadr)); // couper éventuel : { char *a; @@ -3993,7 +4432,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { #endif cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache)); if (cache->n!=NULL) { - strcpy(cache->n->iadr,iadr); + strcpybuff(cache->n->iadr,iadr); if (hp!=NULL) { memcpy(cache->n->host_addr, hp->h_addr_list[0], hp->h_length); cache->n->host_length=hp->h_length; @@ -4028,102 +4467,161 @@ HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) { // --- Tracage des mallocs() --- -#if HTS_TRACE_MALLOC -typedef struct _mlink { +#ifdef HTS_TRACE_MALLOC +//#define htsLocker(A, N) htsLocker(A, N) +#define htsLocker(A, N) do {} while(0) +static mlink trmalloc = {NULL,0,0,NULL}; +static int trmalloc_id=0; +static PTHREAD_LOCK_TYPE* mallocMutex = NULL; +static void hts_meminit(void) { + //if (mallocMutex == NULL) { + // mallocMutex = calloc(sizeof(*mallocMutex), 1); + // htsLocker(mallocMutex, -999); + //} +} +void* hts_malloc(size_t len) { void* adr; - int len; - int id; - struct _mlink* next; -} mlink; -mlink trmalloc = {NULL,0,0,NULL}; -int trmalloc_id=0; - -HTS_INLINE void* hts_malloc(size_t len,size_t len2) { + hts_meminit(); + htsLocker(mallocMutex, 1); + fassert(len > 0); + adr = hts_xmalloc(len, 0); + htsLocker(mallocMutex, 0); + return adr; +} +void* hts_calloc(size_t len,size_t len2) { + void* adr; + hts_meminit(); + fassert(len > 0); + fassert(len2 > 0); + htsLocker(mallocMutex, 1); + adr = hts_xmalloc(len, len2); + htsLocker(mallocMutex, 0); + memset(adr, 0, len * len2); + return adr; +} +void* hts_xmalloc(size_t len,size_t len2) { mlink* lnk = (mlink*) calloc(1,sizeof(mlink)); - void* r = NULL; + fassert(lnk != NULL); + fassert(len > 0); + fassert(len2 >= 0); if (lnk) { + void* r = NULL; + int size, bsize = sizeof(t_htsboundary); if (len2) - r = calloc(len,len2); + size = len * len2; else - r = malloc(len); + size = len; + size += ((bsize - (size % bsize)) % bsize); /* check alignement */ + r = malloc(size + bsize*2); + fassert(r != NULL); if (r) { - lnk->adr=r; - if (len2) - lnk->len=len*len2; - else - lnk->len=len; - lnk->id=trmalloc_id++; - lnk->next=trmalloc.next; - trmalloc.next=lnk; -#if MEMDEBUG - //printf("malloc: %d\n",r); -#endif - } else free(lnk); + * ( (t_htsboundary*) ((char*) r ) ) + = * ( (t_htsboundary*) ( (char*) r + size + bsize ) ) + = htsboundary; + ((char*) r) += bsize; /* boundary */ + lnk->adr = r; + lnk->len = size; + lnk->id = trmalloc_id++; + lnk->next = trmalloc.next; + trmalloc.next = lnk; + return r; + } else { + free(lnk); + } } - return r; + return NULL; } -HTS_INLINE void hts_free(void* adr) { +void hts_free(void* adr) { mlink* lnk = &trmalloc; + int bsize = sizeof(t_htsboundary); + fassert(adr != NULL); if (!adr) { -#if MEMDEBUG - printf("* unexpected free() error at %d\n",adr); -#endif return; } - do { - if (lnk->next->adr==adr) { + htsLocker(mallocMutex, 1); + while(lnk->next != NULL) { + if (lnk->next->adr == adr) { mlink* blk_free=lnk->next; -#if 1 + fassert(blk_free->id != -1); + fassert( * ( (t_htsboundary*) ( (char*) adr - bsize ) ) == htsboundary ); + fassert( * ( (t_htsboundary*) ( (char*) adr + blk_free->len ) ) == htsboundary ); lnk->next=lnk->next->next; free((void*) blk_free); -#else -#if MEMDEBUG - if (blk_free->id==-1) { - printf("* memory has already been freed: %d (id=%d)\n",blk_free->adr,blk_free->id); - } -#endif - blk_free->id=-1; -#endif - free(adr); -#if MEMDEBUG - //printf("free: %d (id=%d)\n",blk_free->adr,blk_free->id); -#endif + //blk_free->id=-1; + free((char*) adr - bsize); + htsLocker(mallocMutex, 0); return; } - lnk=lnk->next; - } while(lnk->next != NULL); -#if MEMDEBUG - printf("* unexpected free() error at %d\n",adr); -#endif + lnk = lnk->next; + fassert(lnk->next != NULL); + } free(adr); + htsLocker(mallocMutex, 0); } -HTS_INLINE void* hts_realloc(void* adr,size_t len) { +void* hts_realloc(void* adr,size_t len) { + int bsize = sizeof(t_htsboundary); + len += ((bsize - (len % bsize)) % bsize); /* check alignement */ + if (adr != NULL) { + mlink* lnk = &trmalloc; + htsLocker(mallocMutex, 1); + while(lnk->next != NULL) { + if (lnk->next->adr==adr) { + { + mlink* blk_free=lnk->next; + fassert(blk_free->id != -1); + fassert( * ( (t_htsboundary*) ( (char*) adr - bsize ) ) == htsboundary ); + fassert( * ( (t_htsboundary*) ( (char*) adr + blk_free->len ) ) == htsboundary ); + } + adr = realloc((char*) adr - bsize, len + bsize * 2); + fassert(adr != NULL); + lnk->next->adr = (char*) adr + bsize; + lnk->next->len = len; + * ( (t_htsboundary*) ( (char*) adr ) ) + = * ( (t_htsboundary*) ( (char*) adr + len + bsize) ) + = htsboundary; + htsLocker(mallocMutex, 0); + return (char*) adr + bsize; + } + lnk = lnk->next; + fassert(lnk->next != NULL); + } + htsLocker(mallocMutex, 0); + } + return hts_malloc(len); +} +mlink* hts_find(char* adr) { + char* stkframe = (char*) &stkframe; mlink* lnk = &trmalloc; - do { - if (lnk->next->adr==adr) { - adr = realloc(adr,len); - lnk->next->adr = adr; - lnk->next->len = len; -#if MEMDEBUG - //printf("realloc: %d (id=%d)\n",lnk->next->adr,lnk->next->id); -#endif - return adr; + int bsize = sizeof(t_htsboundary); + fassert(adr != NULL); + if (!adr) { + return NULL; + } + htsLocker(mallocMutex, 1); + while(lnk->next != NULL) { + if (adr >= lnk->next->adr && adr <= lnk->next->adr + lnk->next->len) { /* found */ + htsLocker(mallocMutex, 0); + return lnk->next; } - lnk=lnk->next; - } while(lnk->next != NULL); -#if MEMDEBUG - printf("* unexpected realloc() error at %d\n",adr); -#endif - return realloc(adr,len); + lnk = lnk->next; + } + htsLocker(mallocMutex, 0); + { + int depl = (int) (adr - stkframe); + if (depl < 0) depl = -depl; + //fassert(depl < 512000); /* near the stack frame.. doesn't look like malloc but stack variable */ + return NULL; + } } // check the malloct() and calloct() trace stack void hts_freeall(void) { + int bsize = sizeof(t_htsboundary); while(trmalloc.next) { #if MEMDEBUG printf("* block %d\t not released: at %d\t (%d\t bytes)\n",trmalloc.next->id,trmalloc.next->adr,trmalloc.next->len); #endif if (trmalloc.next->id != -1) { - freet(trmalloc.next->adr); + free((char*) trmalloc.next->adr - bsize); } } } @@ -4145,8 +4643,8 @@ void cut_path(char* fullpath,char* path,char* pname) { a=fullpath+strlen(fullpath)-2; while( (*a!='/') && ( a > fullpath)) a--; if (*a=='/') a++; - strcpy(pname,a); - strncat(path,fullpath,(int) (a - fullpath)); + strcpybuff(pname,a); + strncatbuff(path,fullpath,(int) (a - fullpath)); } } } @@ -4168,8 +4666,12 @@ int ftp_available(void) { -int hts_init(void) { +HTSEXT_API int hts_init(void) { static int hts_init_ok = 0; + + /* Ensure external modules are loaded */ + htspe_init(); + if (!hts_init_ok) { hts_init_ok = 1; // default wrappers @@ -4196,17 +4698,18 @@ int hts_init(void) { /* Initialize the OpensSSL library */ - if (!openssl_ctx) { + if (!openssl_ctx && SSL_is_available) { + if (SSL_load_error_strings) SSL_load_error_strings(); SSL_library_init(); - SSL_load_error_strings(); - ERR_load_crypto_strings(); - ERR_load_SSL_strings(); - SSLeay_add_ssl_algorithms(); + ///if (SSL_load_error_strings) SSL_load_error_strings(); + //if (ERR_load_crypto_strings) ERR_load_crypto_strings(); + // if (ERR_load_SSL_strings) ERR_load_SSL_strings(); ???!!! // OpenSSL_add_all_algorithms(); openssl_ctx = SSL_CTX_new(SSLv23_client_method()); if (!openssl_ctx) { fprintf(stderr, "fatal: unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)\n"); - abort(); + abortLog("unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)"); + assertf("unable to initialize TLS" == NULL); } } #endif @@ -4214,9 +4717,13 @@ int hts_init(void) { /* Init vars and thread-specific values */ hts_initvar(); + /* initialiser structcheck */ + // structcheck_init(1); + return 1; } -int hts_uninit(void) { +HTSEXT_API int hts_uninit(void) { + hts_cache_free(_hts_cache()); hts_freevar(); /* htswrap_free(); */ return 1; diff --git a/src/htslib.h b/src/htslib.h index 9b2aca3..d3881d3 100644 --- a/src/htslib.h +++ b/src/htslib.h @@ -60,9 +60,11 @@ Please visit our Website: http://www.httrack.com typedef struct { short int user_agent_send; // user agent (ex: httrack/1.0 [sun]) short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0 + short int nokeepalive; // pas de keep-alive short int range_used; // Range utilisé short int nocompression; // Pas de compression - char user_agent[64]; + short int flush_garbage; // recycled + char user_agent[128]; char lang_iso[64]; t_proxy proxy; // proxy } htsrequest; @@ -75,11 +77,17 @@ typedef struct { short int is_write; // sortie sur disque (out) ou en mémoire (adr) short int is_chunk; // mode chunk short int compressed; // compressé? + short int empty; // vide? + short int keep_alive; // Keep-Alive? + short int keep_alive_trailers; // ..with trailers extension + int keep_alive_t; // KA timeout + int keep_alive_max; // KA number of requests char* adr; // adresse du bloc de mémoire, NULL=vide FILE* out; // écriture directe sur disque (si is_write=1) LLint size; // taille fichier char msg[80]; // message éventuel si échec ("\0"=non précisé) char contenttype[64]; // content-type ("text/html" par exemple) + char charset[64]; // charset ("iso-8859-1" par exemple) char contentencoding[64]; // content-encoding ("gzip" par exemple) char* location; // on copie dedans éventuellement la véritable 'location' LLint totalsize; // taille totale à télécharger (-1=inconnue) @@ -95,6 +103,7 @@ typedef struct { char etag[64]; // Etag char cdispo[256]; // Content-Disposition coupé LLint crange; // Content-Range + int debugid; // debug connection /* */ htsrequest req; // paramètres pour la requête /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/ @@ -147,9 +156,10 @@ int hts_read(htsblk* r,char* buff,int size); //int HTS_TOTAL_RECV_CHECK(int var); LLint check_downloadable_bytes(int rate); -int hts_init(void); -int hts_uninit(void); - +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_init(void); +HTSEXT_API int hts_uninit(void); +#endif // fonctions principales int http_fopen(char* adr,char* fil,htsblk* retour); @@ -159,26 +169,33 @@ htsblk httpget(char* url); //int newhttp(char* iadr,char* err=NULL); int newhttp(char* iadr,htsblk* retour,int port,int waitconnect); HTS_INLINE void deletehttp(htsblk* r); +HTS_INLINE int deleteaddr(htsblk* r); HTS_INLINE void deletesoc(T_SOC soc); HTS_INLINE void deletesoc_r(htsblk* r); htsblk http_location(char* adr,char* fil,char* loc); htsblk http_test(char* adr,char* fil,char* loc); int check_readinput(htsblk* r); +int check_readinput_t(T_SOC soc, int timeout); void http_fread(T_SOC soc,htsblk* retour); LLint http_fread1(htsblk* r); void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd); void treatfirstline(htsblk* retour,char* rcvd); -void infostatuscode(char* msg,int statuscode); +#ifndef HTTRACK_DEFLIB +HTSEXT_API void infostatuscode(char* msg,int statuscode); +#endif // sous-fonctions htsblk xhttpget(char* adr,char* fil); htsblk http_gethead(char* adr,char* fil); LLint http_xfread1(htsblk* r,int bufl); HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, void* v_buffer); -t_hostent* vxgethostbyname(char* hostname, void* v_buffer); +#ifndef HTTRACK_DEFLIB +HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer); +#endif t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour); int ftp_available(void); #if HTS_DNSCACHE +void hts_cache_free(t_dnscache* cache); int hts_dnstest(char* _iadr); t_dnscache* _hts_cache(void); int _hts_lockdns(int i); @@ -186,9 +203,13 @@ int _hts_lockdns(int i); // outils divers HTS_INLINE TStamp time_local(void); -HTS_INLINE TStamp mtime_local(void); +#ifndef HTTRACK_DEFLIB +HTSEXT_API HTS_INLINE TStamp mtime_local(void); +#endif void sec2str(char *s,TStamp t); -void qsec2str(char *st,TStamp t); +#ifndef HTTRACK_DEFLIB +HTSEXT_API void qsec2str(char *st,TStamp t); +#endif void time_gmt_rfc822(char* s); void time_local_rfc822(char* s); struct tm* convert_time_rfc822(char* s); @@ -196,14 +217,18 @@ int set_filetime(char* file,struct tm* tm_time); int set_filetime_rfc822(char* file,char* date); HTS_INLINE void time_rfc822(char* s,struct tm * A); HTS_INLINE void time_rfc822_local(char* s,struct tm * A); -char* int2char(int n); -char* int2bytes(LLint n); -char* int2bytessec(long int n); -char** int2bytes2(LLint n); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* int2char(int n); +HTSEXT_API char* int2bytes(LLint n); +HTSEXT_API char* int2bytessec(long int n); +HTSEXT_API char** int2bytes2(LLint n); +#endif HTS_INLINE int sendc(htsblk* r, char* s); -void finput(int fd,char* s,int max); +int finput(int fd,char* s,int max); int binput(char* buff,char* s,int max); int linput(FILE* fp,char* s,int max); +int linputsoc(T_SOC soc, char* s, int max); +int linputsoc_t(T_SOC soc, char* s, int max, int timeout); int linput_trim(FILE* fp,char* s,int max); int linput_cpp(FILE* fp,char* s,int max); void rawlinput(FILE* fp,char* s,int max); @@ -226,23 +251,33 @@ int is_userknowntype(char *fil); int is_dyntype(char *fil); char* get_ext(char *fil); int may_unknown(char* st); -char* jump_identification(char*); -char* jump_toport(char*); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* jump_identification(char*); +HTSEXT_API char* jump_normalized(char*); +HTSEXT_API char* jump_toport(char*); +HTSEXT_API char* fil_normalized(char* source, char* dest); +HTSEXT_API char* adr_normalized(char* source, char* dest); +#endif char* strrchr_limit(char* s, char c, char* limit); HTS_INLINE char* jump_protocol(char* source); -void code64(char* a,char* b); -void unescape_amp(char* s); -void escape_spc_url(char* s); -void escape_in_url(char* s); -void escape_uri(char* s); -void escape_uri_utf(char* s); -void escape_check_url(char* s); -char* escape_check_url_addr(char* s); -void x_escape_http(char* s,int mode); -HTS_INLINE int ehexh(char c); -char* unescape_http(char* s); -char* unescape_http_unharm(char* s, int no_high); -char* antislash_unescaped(char* s); +void code64(unsigned char* a,int size_a,unsigned char* b,int crlf); +#ifndef HTTRACK_DEFLIB +HTSEXT_API void unescape_amp(char* s); +HTSEXT_API void escape_spc_url(char* s); +HTSEXT_API void escape_in_url(char* s); +HTSEXT_API void escape_uri(char* s); +HTSEXT_API void escape_uri_utf(char* s); +HTSEXT_API void escape_check_url(char* s); +HTSEXT_API char* escape_check_url_addr(char* s); +HTSEXT_API void x_escape_http(char* s,int mode); +HTSEXT_API void escape_remove_control(char* s); +#endif +int ehexh(char c); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* unescape_http(char* s); +HTSEXT_API char* unescape_http_unharm(char* s, int no_high); +HTSEXT_API char* antislash_unescaped(char* s); +#endif int ehex(char* s); char* concat(const char* a,const char* b); #define copychar(a) concat((a),NULL) @@ -262,10 +297,11 @@ void hts_lowcase(char* s); void hts_replace(char *s,char from,char to); /* Spaces: CR,LF,TAB,FF */ -#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)=='\'') ) -#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) ) +#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) +#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) ) #define is_taborspace(c) ( ((c)==' ') || ((c)==9) ) #define is_quote(c) ( ((c)=='\"') || ((c)=='\'') ) +#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) ) //HTS_INLINE int is_space(char); //HTS_INLINE int is_realspace(char); @@ -279,10 +315,12 @@ int sig_ignore_flag( int setflag ); // flag ignore void cut_path(char* fullpath,char* path,char* pname); int fexist(char* s); /*LLint fsize(char* s); */ -int fpsize(FILE* fp); -int fsize(char* s); +INTsys fpsize(FILE* fp); +INTsys fsize(char* s); /* root dir */ -char* hts_rootdir(char* file); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* hts_rootdir(char* file); +#endif // Threads #if USE_PTHREAD @@ -301,19 +339,20 @@ unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, /* variables globales */ //extern LLint HTS_TOTAL_RECV; // flux entrant reçu //extern int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup -extern hts_stat_struct HTS_STAT; +extern HTSEXT_API hts_stat_struct HTS_STAT; extern int _DEBUG_HEAD; extern FILE* ioinfo; /* constantes */ -extern const char hts_mime_keep[][32]; -extern const char hts_mime[][2][32]; -extern const char hts_detect[][32]; -extern const char hts_detectbeg[][32]; -extern const char hts_nodetect[][32]; -extern const char hts_detectURL[][32]; -extern const char hts_detectandleave[][32]; -extern const char hts_detect_js[][32]; +extern const char* hts_mime_keep[]; +extern const char* hts_mime[][2]; +extern const char* hts_main_mime[]; +extern const char* hts_detect[]; +extern const char* hts_detectbeg[]; +extern const char* hts_nodetect[]; +extern const char* hts_detectURL[]; +extern const char* hts_detectandleave[]; +extern const char* hts_detect_js[]; // defaut wrappers void __cdecl htsdefault_init(void); diff --git a/src/htsmodules.c b/src/htsmodules.c new file mode 100644 index 0000000..27ab855 --- /dev/null +++ b/src/htsmodules.c @@ -0,0 +1,305 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: htsmodules.c subroutines: */ +/* external modules (parsers) */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#ifndef _WIN32 +#if HTS_DLOPEN +#include <dlfcn.h> +#endif +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "htsglobal.h" +#include "htsmodules.h" +#include "htsopt.h" +extern int fspc(FILE* fp,char* type); + +/* >>> Put all modules definitions here */ +#include "htszlib.h" +#include "htsbase.h" + +typedef int (*t_hts_detect_swf)(htsmoduleStruct* str); +typedef int (*t_hts_parse_swf)(htsmoduleStruct* str); +/* <<< */ + +/* >>> Put all modules includes here */ +#include "htsjava.h" +#if HTS_USESWF +#endif +/* <<< */ + +/* >>> Put all modules variables here */ + +int swf_is_available = 0; +t_hts_detect_swf hts_detect_swf = NULL; +t_hts_parse_swf hts_parse_swf = NULL; + +int gz_is_available = 0; +t_gzopen gzopen = NULL; +t_gzread gzread = NULL; +t_gzclose gzclose = NULL; + +int SSL_is_available = 0; +t_SSL_shutdown SSL_shutdown = NULL; +t_SSL_free SSL_free = NULL; +t_SSL_CTX_ctrl SSL_CTX_ctrl = NULL; +t_SSL_new SSL_new = NULL; +t_SSL_clear SSL_clear = NULL; +t_SSL_set_fd SSL_set_fd = NULL; +t_SSL_set_connect_state SSL_set_connect_state = NULL; +t_SSL_connect SSL_connect = NULL; +t_SSL_get_error SSL_get_error = NULL; +t_SSL_write SSL_write = NULL; +t_SSL_read SSL_read = NULL; +t_SSL_library_init SSL_library_init = NULL; +t_ERR_load_crypto_strings ERR_load_crypto_strings = NULL; +t_ERR_load_SSL_strings ERR_load_SSL_strings = NULL; +t_SSLv23_client_method SSLv23_client_method = NULL; +t_SSL_CTX_new SSL_CTX_new = NULL; +t_ERR_error_string ERR_error_string = NULL; +t_SSL_load_error_strings SSL_load_error_strings = NULL; + +int V6_is_available = HTS_INET6; + +char WHAT_is_available[64]=""; +/* <<< */ + +/* memory checks */ +HTSEXT_API htsErrorCallback htsCallbackErr = NULL; +HTSEXT_API int htsMemoryFastXfr = 1; /* fast xfr by default */ +void abortLog__fnc(char* msg, char* file, int line); +void abortLog__fnc(char* msg, char* file, int line) { + FILE* fp = fopen("CRASH.TXT", "wb"); + if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); + if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); + if (fp) { + fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '%s', line %d\r\n", file, line); + fprintf(fp, "Reason:\r\n%s\r\n", msg); + fflush(fp); + fclose(fp); + } +} +HTSEXT_API t_abortLog abortLog__ = abortLog__fnc; /* avoid VC++ inlining */ + +static void htspe_log(htsmoduleStruct* str, char* msg); + +int hts_parse_externals(htsmoduleStruct* str) { + /* >>> Put all module calls here */ + + /* JAVA */ + if (hts_detect_java(str)) { + htspe_log(str, "java-lib"); + return hts_parse_java(str); + } + +#if HTS_USESWF + /* FLASH + (external module derivated from Macromedia(tm)'s classes) + */ + else if (swf_is_available && hts_detect_swf(str)) { + htspe_log(str, "swf-lib"); + return hts_parse_swf(str); + } +#endif + + /* <<< */ + + /* Not detected */ + return -1; +} + +/* NOTE: handled NOT closed */ +void* getFunctionPtr(char* file_, char* fncname) { + char file[1024]; + void* handle; + void* userfunction = NULL; + strcpybuff(file, file_); +#ifdef _WIN32 + handle = LoadLibrary(file); + if (handle == NULL) { + strcatbuff(file, ".dll"); + handle = LoadLibrary(file); + } +#else + handle = dlopen(file, RTLD_LAZY); + if (handle == NULL) { + strcatbuff(file, ".so"); + handle = dlopen(file, RTLD_LAZY); + } +#endif + if (handle) { + userfunction = (void*) DynamicGet(handle, fncname); + if (userfunction == NULL) { +#ifdef _WIN32 + FreeLibrary(handle); +#else + dlclose(handle); +#endif + } + } + return userfunction; +} + +void htspe_init() { + static int initOk = 0; + if (!initOk) { + initOk = 1; + + /* >>> Put all module initializations here */ + + /* Zlib */ +#if HTS_DLOPEN + { + void* handle; +#ifdef _WIN32 + handle = LoadLibrary("zlib"); +#else + handle = dlopen("libz.so.1", RTLD_LAZY); +#endif + if (handle) { + gzopen = (t_gzopen) DynamicGet(handle, "gzopen"); + gzread = (t_gzread) DynamicGet(handle, "gzread"); + gzclose = (t_gzclose) DynamicGet(handle, "gzclose"); + if (gzopen && gzread && gzclose) { + gz_is_available = 1; + } + } + } +#endif + + /* OpenSSL */ +#if HTS_DLOPEN + { + void* handle; +#ifdef _WIN32 + handle = LoadLibrary("ssleay32"); +#else + /* We are compatible with 0.9.6/7 and potentially above */ + handle = dlopen("libssl.so.0.9.7", RTLD_LAZY); + if (handle == NULL) { + handle = dlopen("libssl.so.0.9.6", RTLD_LAZY); + } + if (handle == NULL) { + /* Try harder */ + handle = dlopen("libssl.so.0", RTLD_LAZY); + } +#endif + if (handle) { + SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, "SSL_shutdown"); + SSL_free = (t_SSL_free) DynamicGet(handle, "SSL_free"); + SSL_new = (t_SSL_new) DynamicGet(handle, "SSL_new"); + SSL_clear = (t_SSL_clear) DynamicGet(handle, "SSL_clear"); + SSL_set_fd = (t_SSL_set_fd) DynamicGet(handle, "SSL_set_fd"); + SSL_set_connect_state = (t_SSL_set_connect_state) DynamicGet(handle, "SSL_set_connect_state"); + SSL_connect = (t_SSL_connect) DynamicGet(handle, "SSL_connect"); + SSL_get_error = (t_SSL_get_error) DynamicGet(handle, "SSL_get_error"); + SSL_write = (t_SSL_write) DynamicGet(handle, "SSL_write"); + SSL_read = (t_SSL_read) DynamicGet(handle, "SSL_read"); + SSL_library_init = (t_SSL_library_init) DynamicGet(handle, "SSL_library_init"); + ERR_load_SSL_strings = (t_ERR_load_SSL_strings) DynamicGet(handle, "ERR_load_SSL_strings"); + SSLv23_client_method = (t_SSLv23_client_method) DynamicGet(handle, "SSLv23_client_method"); + SSL_CTX_new = (t_SSL_CTX_new) DynamicGet(handle, "SSL_CTX_new"); + SSL_load_error_strings = (t_SSL_load_error_strings) DynamicGet(handle, "SSL_load_error_strings"); + SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, "SSL_CTX_ctrl"); +#ifdef _WIN32 + handle = LoadLibrary("libeay32"); +#endif + ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, "ERR_load_crypto_strings"); + ERR_error_string = (t_ERR_error_string) DynamicGet(handle, "ERR_error_string"); + + if (SSL_shutdown && SSL_free && SSL_CTX_ctrl && SSL_new && SSL_clear && + SSL_set_fd && SSL_set_connect_state && SSL_connect && SSL_get_error && SSL_write + && SSL_read && SSL_library_init && SSLv23_client_method && SSL_CTX_new + && SSL_load_error_strings && ERR_error_string) { + SSL_is_available = 1; + } + } + } +#endif + /* */ + + /* + FLASH + Load the library on-the-fly, if available + If not, that's not a problem + */ +#if HTS_DLOPEN + { +#ifdef _WIN32 + void* handle = LoadLibrary("htsswf"); +#else + void* handle = dlopen("libhtsswf.so.1", RTLD_LAZY); +#endif + if (handle) { + hts_detect_swf = (t_hts_detect_swf) DynamicGet(handle, "hts_detect_swf"); + hts_parse_swf = (t_hts_parse_swf) DynamicGet(handle, "hts_parse_swf"); + if (hts_detect_swf && hts_parse_swf) { + swf_is_available = 1; + } + } + // FreeLibrary(handle); + // dlclose(handle); + } +#endif + + /* <<< */ + + /* Options availability */ + sprintf(WHAT_is_available, "%s%s%s%s", + V6_is_available ? "" : "-noV6", + gz_is_available ? "" : "-nozip", + SSL_is_available ? "" : "-nossl", + swf_is_available ? "+swf" : ""); + + + } +} + +static void htspe_log(htsmoduleStruct* str, char* msg) { + char* savename = str->filename; + httrackp* opt = (httrackp*) str->opt; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(External module): parsing %s using module %s"LF, + savename, msg); + } +} + +HTSEXT_API const char* hts_is_available(void) { + return WHAT_is_available; +} diff --git a/src/htsmodules.h b/src/htsmodules.h new file mode 100644 index 0000000..7d1154b --- /dev/null +++ b/src/htsmodules.h @@ -0,0 +1,111 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: htsmodules.h subroutines: */ +/* external modules (parsers) */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#ifndef HTS_MODULES +#define HTS_MODULES + +/* Function type to add links inside the module + link : link to add (absolute or relative) + str : structure defined below + Returns 1 if the link was added, 0 if not +*/ +typedef struct htsmoduleStruct htsmoduleStruct; +typedef int (* t_htsAddLink)(htsmoduleStruct* str, char* link); + +/* Structure passed to the module */ +struct htsmoduleStruct { + /* Read-only elements */ + char* filename; /* filename (C:\My Web Sites\...) */ + int size; /* size of filename (should be > 0) */ + char* mime; /* MIME type of the object */ + char* url_host; /* incoming hostname (www.foo.com) */ + char* url_file; /* incoming filename (/bar/bar.gny) */ + + /* Write-only */ + char* err_msg; /* if an error occured, the error message (max. 1KB) */ + + /* Read/Write */ + int relativeToHtmlLink; /* set this to 1 if all urls you pass to addLink + are in fact relative to the html file where your + module was originally */ + + /* Callbacks */ + t_htsAddLink addLink; /* call this function when links are + being detected. it if not your responsability to decide + if the engine will keep them, or not. */ + + /* Optional */ + char* localLink; /* if non null, the engine will write there the local + relative filename of the link added by addLink(), or + the absolute path if the link was refused by the wizard */ + int localLinkSize; /* size of the optionnal buffer */ + + /* User-defined */ + void* userdef; /* can be used by callback routines + */ + + /* ---- ---- ---- */ + + /* Internal use - please don't touch */ + void* liens; + void* opt; + void* back; + int back_max; + void* cache; + void* hashptr; + int numero_passe; + int add_tab_alloc; + /* */ + int* lien_tot_; + int* ptr_; + int* lien_size_; + char** lien_buffer_; + /* Internal use - please don't touch */ + +}; + +extern void htspe_init(void); +extern int hts_parse_externals(htsmoduleStruct* str); +extern void* getFunctionPtr(char* file, char* fncname); + +extern int gz_is_available; +extern int swf_is_available; +extern int SSL_is_available; +extern int V6_is_available; +extern char WHAT_is_available[64]; + +#endif diff --git a/src/htsname.c b/src/htsname.c index 2df0c98..56fa6a6 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -53,7 +53,7 @@ Please visit our Website: http://www.httrack.com { /* ajout nom */\ char buff[HTS_URLMAXSIZE*2];\ buff[0]='\0';\ - strncat(buff,start_pos,(int) (nom_pos - start_pos));\ + strncatbuff(buff,start_pos,(int) (nom_pos - start_pos));\ url_savename_addstr(save,buff);\ } @@ -83,6 +83,10 @@ static const char *hts_tbdev[] = // système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html) int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe) { char newfil[HTS_URLMAXSIZE*2]; /* ="" */ + /*char normadr_[HTS_URLMAXSIZE*2];*/ + char normfil_[HTS_URLMAXSIZE*2]; + char* normadr; + char* normfil; char* fil; char* adr; char* print_adr; @@ -111,11 +115,25 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a save[0]='\0'; // fil fil = fil_complete; + // copy of fil, used for lookups (see urlhack) + normfil = fil; // et adr (sauter user/pass) // on prend le parti de mettre les fichiers avec login/pass au même endroit que si ils // étaient capturés sans ces paramètres // c'est pour cette raison qu'on ignore totalement adr_complete (même pour la recherche en table de hachage) - adr=jump_identification(adr_complete); + adr = jump_identification(adr_complete); + // copy of adr, used for lookups (see urlhack) + normadr = adr; + + // normalize the URL: + // www.foo.com -> foo.com + // www-42.foo.com -> foo.com + // foo.com/bar//foobar -> foo.com/bar/foobar + if (opt->urlhack) { + // copy of adr (withiotu protocol), used for lookups (see urlhack) + normadr=jump_normalized(adr); + normfil=fil_normalized(fil,normfil_); + } // à afficher sans ftp:// print_adr=jump_protocol(adr); @@ -123,7 +141,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // court-circuit pour lien primaire if (strnotempty(adr)==0) { if (strcmp(fil,"primary")==0) { - strcat(save,"primary.html"); + strcatbuff(save,"primary.html"); return 0; } } @@ -136,43 +154,43 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a int i; #if HTS_HASH - i=hash_read(hash,adr,fil_complete,1); // recherche table 1 (adr+fil) + i=hash_read(hash,normadr,normfil,1,opt->urlhack); // recherche table 1 (adr+fil) if (i>=0) { // ok, trouvé - strcpy(save,liens[i]->sav); + strcpybuff(save,liens[i]->sav); return 0; } - i=hash_read(hash,adr,fil_complete,2); // recherche table 2 (former_adr+former_fil) + i=hash_read(hash,normadr,normfil,2,opt->urlhack); // recherche table 2 (former_adr+former_fil) if (i>=0) { // ok, trouvé // copier location moved! - strcpy(adr_complete,liens[i]->adr); - strcpy(fil_complete,liens[i]->fil); + strcpybuff(adr_complete,liens[i]->adr); + strcpybuff(fil_complete,liens[i]->fil); // et save - strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!) + strcpybuff(save,liens[i]->sav); // copier (formé à partir du nouveau lien!) return 0; } #else for(i=lien_tot-1;i>=0;i--) { #if HTS_CASSE - if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0)) + if ((strcmp(liens[i]->adr,normadr)==0) && (strcmp(liens[i]->fil,normfil)==0)) #else - if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete))) + if ((strfield2(liens[i]->adr,normadr)) && (strfield2(liens[i]->fil,normfil))) #endif { // ok c'est le même lien, adresse déja définie - strcpy(save,liens[i]->sav); + strcpybuff(save,liens[i]->sav); return 0; } if (liens[i]->former_adr) { // tester ancienne loc? #if HTS_CASSE - if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete)==0)) + if ((strcmp(liens[i]->former_adr,normadr)==0) && (strcmp(liens[i]->former_fil,normfil)==0)) #else - if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete))) + if ((strfield2(liens[i]->former_adr,normadr)) && (strfield2(liens[i]->former_fil,normfil))) #endif { // copier location moved! - strcpy(adr_complete,liens[i]->adr); - strcpy(fil_complete,liens[i]->fil); + strcpybuff(adr_complete,liens[i]->adr); + strcpybuff(fil_complete,liens[i]->fil); // et save - strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!) + strcpybuff(save,liens[i]->sav); // copier (formé à partir du nouveau lien!) return 0; } } @@ -182,20 +200,20 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // chercher sans / ou avec / dans former { char fil_complete_patche[HTS_URLMAXSIZE*2]; - strcpy(fil_complete_patche,fil_complete); + strcpybuff(fil_complete_patche,normfil); // Version avec ou sans / if (fil_complete_patche[strlen(fil_complete_patche)-1]=='/') fil_complete_patche[strlen(fil_complete_patche)-1]='\0'; else - strcat(fil_complete_patche,"/"); + strcatbuff(fil_complete_patche,"/"); #if HTS_HASH - i=hash_read(hash,adr,fil_complete_patche,2); // recherche table 2 (former_adr+former_fil) + i=hash_read(hash,normadr,fil_complete_patche,2,opt->urlhack); // recherche table 2 (former_adr+former_fil) if (i>=0) { // écraser fil et adr (pas former_fil?????) - strcpy(adr_complete,liens[i]->adr); - strcpy(fil_complete,liens[i]->fil); + strcpybuff(adr_complete,liens[i]->adr); + strcpybuff(fil_complete,liens[i]->fil); // écrire save - strcpy(save,liens[i]->sav); + strcpybuff(save,liens[i]->sav); return 0; } #else @@ -203,16 +221,16 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a for(i=lien_tot-1;i>=0;i--) { if (liens[i]->former_adr) { // former-adr? #if HTS_CASSE - if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete_patche)==0)) + if ((strcmp(liens[i]->former_adr,normadr)==0) && (strcmp(liens[i]->former_fil,fil_complete_patche)==0)) #else - if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete_patche))) + if ((strfield2(liens[i]->former_adr,normadr)) && (strfield2(liens[i]->former_fil,fil_complete_patche))) #endif { // ok c'est le même lien, adresse déja définie // écraser fil et adr (pas former_fil?????) - strcpy(adr_complete,liens[i]->adr); - strcpy(fil_complete,liens[i]->fil); + strcpybuff(adr_complete,liens[i]->adr); + strcpybuff(fil_complete,liens[i]->fil); // écrire save - strcpy(save,liens[i]->sav); + strcpybuff(save,liens[i]->sav); return 0; } } @@ -228,14 +246,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a char* a; a=strchr(fil,'?'); if (a!=NULL) { - strncat(newfil,fil,(int) (a - fil)); + strncatbuff(newfil,fil,(int) (a - fil)); } else { - strcpy(newfil,fil); + strcpybuff(newfil,fil); } fil=newfil; } // décoder % - strcpy(fil,unescape_http(fil)); + strcpybuff(fil,unescape_http(fil)); /* { char tempo[HTS_URLMAXSIZE*2]; @@ -249,7 +267,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a tempo[j++]=fil[i]; } tempo[j++]='\0'; - strcpy(fil,tempo); + strcpybuff(fil,tempo); } */ @@ -261,7 +279,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a (strcmp(get_ext(fil),"html") != 0) && (strcmp(get_ext(fil),"htm") != 0) ) { - strcpy(ext,"html"); + strcpybuff(ext,"html"); ext_chg=1; } break; @@ -285,14 +303,15 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // si option check_type activée if ((opt->check_type) && (!ext_chg)) { + int ishtest; if ( (!strfield(adr_complete,"file://")) && (!strfield(adr_complete,"ftp://")) ) { // tester type avec requète HEAD si on ne connait pas le type du fichier if (!( (opt->check_type==1) && (fil[strlen(fil)-1]=='/') )) // slash doit être html? - if (ishtml(fil)<0) { // on ne sait pas si c'est un html ou un fichier.. + if ((ishtest=ishtml(fil)) < 0) { // on ne sait pas si c'est un html ou un fichier.. // lire dans le cache - htsblk r = cache_read(opt,cache,adr,fil,NULL); // test uniquement + htsblk r = cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement if (r.statuscode != -1) { // pas d'erreur de lecture cache char s[16]; s[0]='\0'; if ( (opt->debug>1) && (opt->log!=NULL) ) { @@ -301,13 +320,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } if (strnotempty(r.cdispo)) { /* filename given */ ext_chg=2; /* change filename */ - strcpy(ext,r.cdispo); + strcpybuff(ext,r.cdispo); } - else if (!may_unknown(r.contenttype)) { // on peut patcher à priori? + else if (!may_unknown(r.contenttype) || ishtest == -2) { // on peut patcher à priori? give_mimext(s,r.contenttype); // obtenir extension if (strnotempty(s)>0) { // on a reconnu l'extension ext_chg=1; - strcpy(ext,s); + strcpybuff(ext,s); } } // @@ -326,21 +345,25 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete); test_flush; } - strcpy(curr_adr,adr_complete); - strcpy(curr_fil,fil_complete); + strcpybuff(curr_adr,adr_complete); + strcpybuff(curr_fil,fil_complete); // ajouter dans le backing le fichier en mode test // savename: rien car en mode test if (back_add(back,back_max,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1,NULL)!=-1) { int b; b=back_index(back,back_max,curr_adr,curr_fil,BACK_ADD_TEST); if (b>=0) { + int stop_looping=0; int petits_tours=0; int get_test_request=0; // en cas de bouclage sur soi même avec HEAD, tester avec GET.. parfois c'est la cause des problèmes do { // temps à attendre, et remplir autant que l'on peut le cache (backing) - if (back[b].status>0) back_wait(back,back_max,opt,cache,0); - if (ptr>=0) + if (back[b].status>0) { + back_wait(back,back_max,opt,cache,0); + } + if (ptr>=0) { back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + } // on est obligé d'appeler le shell pour le refresh.. #if HTS_ANALYSTE @@ -359,8 +382,9 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { return -1; - } else if (_hts_cancel) { // cancel 2 ou 1 (cancel parsing) - back_delete(back,b); // cancel test + } else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) + back_delete(opt,back,b); // cancel test + stop_looping = 1; } } #endif @@ -378,7 +402,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; mov_url[0]=mov_adr[0]=mov_fil[0]='\0'; // - strcpy(mov_url,back[b].r.location); // copier URL + strcpybuff(mov_url,back[b].r.location); // copier URL if (ident_url_relatif(mov_url,curr_adr,curr_fil,mov_adr,mov_fil)>=0) { // si non bouclage sur soi même, ou si test avec GET non testé if ((strcmp(mov_adr,curr_adr)) || (strcmp(mov_fil,curr_fil)) || (get_test_request==0)) { @@ -389,8 +413,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // recopier former_adr/fil? if ((former_adr) && (former_fil)) { if (strnotempty(former_adr)==0) { // Pas déja noté - strcpy(former_adr,curr_adr); - strcpy(former_fil,curr_fil); + strcpybuff(former_adr,curr_adr); + strcpybuff(former_fil,curr_fil); } } @@ -400,25 +424,25 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a robots_wizard* robots = (robots_wizard*) opt->robotsptr; if (hts_acceptlink(opt,ptr,lien_tot,liens, mov_adr,mov_fil, - opt->filters.filters,opt->filters.filptr,opt->maxfilter, - robots, &set_prio_to, NULL) == 1) { /* forbidden */ has_been_moved = 1; - back_delete(back,b); // ok - strcpy(curr_adr,mov_adr); - strcpy(curr_fil,mov_fil); + back_maydelete(opt,back,b); // ok + strcpybuff(curr_adr,mov_adr); + strcpybuff(curr_fil,mov_fil); mov_url[0]='\0'; + stop_looping = 1; } } // ftp: stop! if (strfield(mov_url,"ftp://")) { // ftp, ok on arrête has_been_moved = 1; - back_delete(back,b); // ok - strcpy(curr_adr,mov_adr); - strcpy(curr_fil,mov_fil); + back_maydelete(opt,back,b); // ok + strcpybuff(curr_adr,mov_adr); + strcpybuff(curr_fil,mov_fil); + stop_looping = 1; } else if (*mov_url) { char* methode; if (!get_test_request) @@ -438,9 +462,9 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } // libérer emplacement backing actuel et attendre le prochain - back_delete(back,b); - strcpy(curr_adr,mov_adr); - strcpy(curr_fil,mov_fil); + back_maydelete(opt,back,b); + strcpybuff(curr_adr,mov_adr); + strcpybuff(curr_fil,mov_fil); b=back_index(back,back_max,curr_adr,curr_fil,methode); if (!get_test_request) has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé @@ -470,21 +494,20 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } } // ok, leaving } - - } while(back[b].status>0); + } while(!stop_looping && back[b].status > 0 && back[b].status < 1000); // Si non déplacé, forcer type? if (!has_been_moved) { if (back[b].r.statuscode!=-10) { // erreur if (strnotempty(back[b].r.contenttype)==0) - strcpy(back[b].r.contenttype,"text/html"); // message d'erreur en html + strcpybuff(back[b].r.contenttype,"text/html"); // message d'erreur en html // Finalement on, renvoie un erreur, pour ne toucher à rien dans le code // libérer emplacement backing /*if (opt->errlog!=NULL) { fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); test_flush; } - back_delete(back,b); + back_delete(opt,back,b); return -1; // ERREUR (404 par exemple) */ } @@ -494,13 +517,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a s[0]='\0'; if (strnotempty(back[b].r.cdispo)) { /* filename given */ ext_chg=2; /* change filename */ - strcpy(ext,back[b].r.cdispo); + strcpybuff(ext,back[b].r.cdispo); } - else if ((!may_unknown(back[b].r.contenttype)) || (!get_ext(back[b].url_fil)) ) { // on peut patcher à priori? (pas interdit ou pas de type) + else if (!may_unknown(back[b].r.contenttype) || ishtest == -2 ) { // on peut patcher à priori? (pas interdit ou pas de type) give_mimext(s,back[b].r.contenttype); // obtenir extension if (strnotempty(s)>0) { // on a reconnu l'extension ext_chg=1; - strcpy(ext,s); + strcpybuff(ext,s); } } } @@ -508,14 +531,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // FIN Si non déplacé, forcer type? // libérer emplacement backing - back_delete(back,b); + back_maydelete(opt,back,b); // --- --- --- // oops, a été déplacé.. on recalcule en récursif (osons!) if (has_been_moved) { // copier adr, fil (optionnel, mais sinon marche pas pour le rip) - strcpy(adr_complete,curr_adr); - strcpy(fil_complete,curr_fil); + strcpybuff(adr_complete,curr_adr); + strcpybuff(fil_complete,curr_fil); // copier adr, fil return url_savename(curr_adr,curr_fil,save,NULL,NULL,referer_adr,referer_fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe); @@ -551,12 +574,12 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // Donner nom par défaut? if (fil[strlen(fil)-1]=='/') { if (!strfield(adr_complete,"ftp://")) - strcat(fil,DEFAULT_HTML); // nommer page par défaut!! + strcatbuff(fil,DEFAULT_HTML); // nommer page par défaut!! else { if (!opt->proxy.active) - strcat(fil,DEFAULT_FTP); // nommer page par défaut (texte) + strcatbuff(fil,DEFAULT_FTP); // nommer page par défaut (texte) else - strcat(fil,DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http) + strcatbuff(fil,DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http) } } // Changer extension? @@ -574,13 +597,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (ext_chg==1) { while((a > fil) && (*a!='.') && (*a!='/')) a--; if (*a=='.') *a='\0'; // couper - strcat(fil,"."); // recopier point + strcatbuff(fil,"."); // recopier point } else { while(( a > fil) && (*a!='/')) a--; if (*a=='/') a++; *a='\0'; } - strcat(fil,ext); // copier ext/nom + strcatbuff(fil,ext); // copier ext/nom } // Rechercher premier / et dernier . @@ -636,24 +659,63 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a } *b='\0'; switch(tok=*a++) { - case '[': // %[param] + case '[': // %[param:prefix_if_not_empty:suffix_if_not_empty:empty_replacement:notfound_replacement] if (strchr(a,']')) { - char name[256]; - char* c=name; + int pos=0; + char name[5][256]; + char* c=name[0]; + for(pos = 0 ; pos < 5 ; pos++) { + name[pos][0]='\0'; + } + pos=0; while(*a!=']') { - *c++=*a++; + if (pos < 5) { + if (*a == ':') { // next token + c=name[++pos]; + a++; + } else { + *c++=*a++; + *c='\0'; + } + } } a++; - *c++='\0'; - strcat(name,"="); /* param=.. */ + strcatbuff(name[0],"="); /* param=.. */ c=strchr(fil_complete,'?'); /* parameters exists */ if (c) { - c=strstr(c,name); /* finds param= */ - if (c) { - c+=strlen(name); /* jumps "param=" */ - while( (*c) && (*c!='&')) - *b++=*c++; + char* cp; + while((cp = strstr(c+1, name[0])) && *(cp-1) != '?' && *(cp-1) != '&') { /* finds [?&]param= */ + c = cp; + } + if (cp) { + c = cp + strlen(name[0]); /* jumps "param=" */ + strcpybuff(b, name[1]); /* prefix */ + b += strlen(b); + if (*c != '\0' && *c != '&') { + char* d = name[0]; + /* */ + while(*c != '\0' && *c != '&') { + *d++ = *c++; + } + *d = '\0'; + d = unescape_http(name[0]); + if (d && *d) { + strcpybuff(b, d); /* value */ + b += strlen(b); + } else { + strcpybuff(b, name[3]); /* empty replacement if any */ + b += strlen(b); + } + } else { + strcpybuff(b, name[3]); /* empty replacement if any */ + b += strlen(b); + } + strcpybuff(b, name[2]); /* suffix */ + b += strlen(b); + } else { + strcpybuff(b, name[4]); /* not found replacement if any */ + b += strlen(b); } } } @@ -662,14 +724,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a case 'n': // nom sans ext if (dot_pos) { if (!short_ver) // Noms longs - strncat(b,nom_pos,(int) (dot_pos - nom_pos)); + strncatbuff(b,nom_pos,(int) (dot_pos - nom_pos)); else - strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8)); + strncatbuff(b,nom_pos,min((int) (dot_pos - nom_pos),8)); } else { if (!short_ver) // Noms longs - strcpy(b,nom_pos); + strcpybuff(b,nom_pos); else - strncat(b,nom_pos,8); + strncatbuff(b,nom_pos,8); } b+=strlen(b); // pointer à la fin break; @@ -678,28 +740,28 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a *b='\0'; if (dot_pos) { if (!short_ver) // Noms longs - strncat(b,nom_pos,(int) (dot_pos - nom_pos)); + strncatbuff(b,nom_pos,(int) (dot_pos - nom_pos)); else - strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8)); + strncatbuff(b,nom_pos,min((int) (dot_pos - nom_pos),8)); } else { if (!short_ver) // Noms longs - strcpy(b,nom_pos); + strcpybuff(b,nom_pos); else - strncat(b,nom_pos,8); + strncatbuff(b,nom_pos,8); } b+=strlen(b); // pointer à la fin // RECOPIE NOM + EXT *b='\0'; if (dot_pos) { if (!short_ver) // Noms longs - strcpy(b,dot_pos+1); + strcpybuff(b,dot_pos+1); else - strncat(b,dot_pos+1,3); + strncatbuff(b,dot_pos+1,3); } else { if (!short_ver) // Noms longs - strcpy(b,DEFAULT_EXT); // pas de.. + strcpybuff(b,DEFAULT_EXT); // pas de.. else - strcpy(b,DEFAULT_EXT_SHORT); // pas de.. + strcpybuff(b,DEFAULT_EXT_SHORT); // pas de.. } b+=strlen(b); // pointer à la fin // @@ -708,14 +770,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a *b='\0'; if (dot_pos) { if (!short_ver) // Noms longs - strcpy(b,dot_pos+1); + strcpybuff(b,dot_pos+1); else - strncat(b,dot_pos+1,3); + strncatbuff(b,dot_pos+1,3); } else { if (!short_ver) // Noms longs - strcpy(b,DEFAULT_EXT); // pas de.. + strcpybuff(b,DEFAULT_EXT); // pas de.. else - strcpy(b,DEFAULT_EXT_SHORT); // pas de.. + strcpybuff(b,DEFAULT_EXT_SHORT); // pas de.. } b+=strlen(b); // pointer à la fin break; @@ -723,14 +785,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a *b='\0'; if (nom_pos != fil + 1) { // pas: /index.html (chemin nul) if (!short_ver) { // Noms longs - strncat(b,fil,(int) (nom_pos - fil) - 1); + strncatbuff(b,fil,(int) (nom_pos - fil) - 1); } else { char pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2]; pth[0]=n83[0]='\0'; // - strncat(pth,fil,(int) (nom_pos - fil) - 1); + strncatbuff(pth,fil,(int) (nom_pos - fil) - 1); long_to_83(opt->savename_83,n83,pth); - strcpy(b,n83); + strcpybuff(b,n83); } } b+=strlen(b); // pointer à la fin @@ -739,14 +801,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a *b='\0'; if (strcmp(adr_complete,"file://")==0) { if (!short_ver) // Noms longs - strcpy(b,"localhost"); + strcpybuff(b,"localhost"); else - strcpy(b,"local"); + strcpybuff(b,"local"); } else { if (!short_ver) // Noms longs - strcpy(b,print_adr); + strcpybuff(b,print_adr); else - strncat(b,print_adr,8); + strncatbuff(b,print_adr,8); } b+=strlen(b); // pointer à la fin break; @@ -756,17 +818,17 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a char digest[32+2]; char buff[HTS_URLMAXSIZE*2]; digest[0]=buff[0]='\0'; - strcpy(buff,adr); - strcat(buff,fil_complete); + strcpybuff(buff,adr); + strcatbuff(buff,fil_complete); domd5mem(buff,strlen(buff),digest,1); - strcpy(b,digest); + strcpybuff(b,digest); } b+=strlen(b); // pointer à la fin break; case 'Q': case 'q': /* query MD5 (128-bits/16-bits) GENERATED ONLY IF query string exists! */ *b='\0'; - strncat(b,url_md5(fil_complete),(tok == 'Q')?32:4); + strncatbuff(b,url_md5(fil_complete),(tok == 'Q')?32:4); b+=strlen(b); // pointer à la fin break; } @@ -788,23 +850,23 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (strcmp(adr_complete,"file://")==0) { //## if (*adr==lOCAL_CHAR) { if (opt->savename_83 != 1) // noms longs - strcat(save,"localhost"); + strcatbuff(save,"localhost"); else - strcat(save,"local"); + strcatbuff(save,"local"); } else { // adresse url if (!opt->savename_83) { // noms longs (et pas de .) - strcat(save,print_adr); + strcatbuff(save,print_adr); } else { // noms 8-3 if (strlen(print_adr)>4) { if (strfield(print_adr,"www.")) - strncat(save,print_adr+4,max_char); + strncatbuff(save,print_adr+4,max_char); else - strncat(save,print_adr,8); - } else strncat(save,print_adr,max_char); + strncatbuff(save,print_adr,8); + } else strncatbuff(save,print_adr,max_char); } } - if (*fil!='/') strcat(save,"/"); + if (*fil!='/') strcatbuff(save,"/"); } } @@ -819,7 +881,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a else // index.html ou /index.html url_savename_addstr(save,fil); if (save[strlen(save)-1]=='/') - strcat(save,DEFAULT_HTML); // nommer page par défaut!! + strcatbuff(save,DEFAULT_HTML); // nommer page par défaut!! */ /* add name */ @@ -836,38 +898,38 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (strcmp(adr_complete,"file://")==0) { //## if (*adr==lOCAL_CHAR) { if (opt->savename_83 != 1) // noms longs - strcat(save,"localhost/"); + strcatbuff(save,"localhost/"); else - strcat(save,"local/"); + strcatbuff(save,"local/"); } else { // adresse url if (!opt->savename_83) { // noms longs - strcat(save,print_adr); strcat(save,"/"); + strcatbuff(save,print_adr); strcatbuff(save,"/"); } else { // noms 8-3 if (strlen(print_adr)>4) { if (strfield(print_adr,"www.")) - strncat(save,print_adr+4,max_char); + strncatbuff(save,print_adr+4,max_char); else - strncat(save,print_adr,max_char); - strcat(save,"/"); + strncatbuff(save,print_adr,max_char); + strcatbuff(save,"/"); } else { - strncat(save,print_adr,max_char); strcat(save,"/"); + strncatbuff(save,print_adr,max_char); strcatbuff(save,"/"); } } } } else { - strcat(save,"web/"); // répertoire général + strcatbuff(save,"web/"); // répertoire général } } // si un html à coup sûr if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) { if (opt->savename_type%100==2) { // html/ - strcat(save,"html/"); + strcatbuff(save,"html/"); } } else { if ((opt->savename_type%100==1) || (opt->savename_type%100==2)) { // html & images - strcat(save,"images/"); + strcatbuff(save,"images/"); } } @@ -881,17 +943,17 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // html? if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) { if (opt->savename_type%100==5) - strcat(save,"html/"); + strcatbuff(save,"html/"); } else { char* a=fil+strlen(fil)-1; while(( a> fil) && (*a != '/') && (*a != '.')) a--; if (*a!='.') - strcat(save,"other"); + strcatbuff(save,"other"); else - strcat(save,a+1); - strcat(save,"/"); + strcatbuff(save,a+1); + strcatbuff(save,"/"); } - /*strcat(save,a);*/ + /*strcatbuff(save,a);*/ /* add name */ ADD_STANDARD_NAME(0); } @@ -923,7 +985,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a a=fil+strlen(fil)-1; while(( a > fil) && (*a != '/') && (*a != '.')) a--; if (*a=='.') { - strcat(save,a); // ajouter + strcatbuff(save,a); // ajouter } } break; @@ -933,7 +995,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a char* a=fil+strlen(fil)-1; while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) a--; if ((*a=='/') || (*a=='\\')) a++; - strcat(save,a); + strcatbuff(save,a); */ /* add name */ @@ -947,7 +1009,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a #endif if (save[strlen(save)-1]=='/') - strcat(save,DEFAULT_HTML); // nommer page par défaut!! + strcatbuff(save,DEFAULT_HTML); // nommer page par défaut!! } @@ -958,8 +1020,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a while(((int) a>(int) save) && (*a!='.') && (*a!='/')) a--; if (*a=='.') *a='\0'; // couper // recopier extension - strcat(save,"."); - strcat(save,ext); // copier ext + strcatbuff(save,"."); + strcatbuff(save,ext); // copier ext }*/ // de même en cas de manque d'extension on en place une de manière forcée.. // cela évite les /chez/toto et les /chez/toto/index.html incompatibles @@ -967,8 +1029,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a char* a=save+strlen(save)-1; while(( a > save) && (*a!='.') && (*a!='/')) a--; if (*a!='.') { // agh pas de point - //strcat(save,".none"); // a éviter - strcat(save,".html"); // préférable! + //strcatbuff(save,".none"); // a éviter + strcatbuff(save,".html"); // préférable! if ( (opt->debug>1) && (opt->errlog!=NULL) ) { fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Default HTML type set for %s%s"LF,adr_complete,fil_complete); test_flush; @@ -985,14 +1047,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a char tempo[HTS_URLMAXSIZE*2]; char *b; tempo[0]='\0'; - strcpy(tempo,"["); + strcpybuff(tempo,"["); b=strchr(save,':'); if (!b) b=strchr(save,'@'); if (b) - strncat(tempo,save,(int) b-(int) a); - strcat(tempo,"]"); - strcat(tempo,a); - strcpy(save,a); + strncatbuff(tempo,save,(int) b-(int) a); + strcatbuff(tempo,"]"); + strcatbuff(tempo,a); + strcpybuff(save,a); } } */ @@ -1000,8 +1062,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // éviter les / au début (cause: N100) if (save[0]=='/') { char tempo[HTS_URLMAXSIZE*2]; - strcpy(tempo,save+1); - strcpy(save,tempo); + strcpybuff(tempo,save+1); + strcpybuff(save,tempo); } // changer les ~,:,",*,? en _ pour sauver sur disque @@ -1017,6 +1079,12 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a hts_replace(save,'|','_'); // interdit sous windows // hts_replace(save,'@','_'); + if (opt->savename_83 == 2) { // CDROM + // maybe other ones? + hts_replace(save,'-','_'); + hts_replace(save,'=','_'); + hts_replace(save,'+','_'); + } // { // éliminer les // (comme ftp://) char* a; @@ -1031,20 +1099,22 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a #if HTS_OVERRIDE_DOS_FOLDERS - /* Replace /foo/nul/bar by /foo/nul-/bar */ + /* Replace /foo/nul/bar by /foo/nul_/bar */ { int i=0; while(hts_tbdev[i][0]) { char* a=save; - while((a=strstr(a,hts_tbdev[i]))) { + while((a=strstrcase(a,(char*)hts_tbdev[i]))) { switch ( (int) a[strlen(hts_tbdev[i])] ) { case '\0': - case '/': { + case '/': + case '.': + { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strncat(tempo,save,(int) (a - save) + strlen(hts_tbdev[i])); - strcat(tempo,"-"); - strcat(tempo,a+strlen(hts_tbdev[i])); - strcpy(save,tempo); + strncatbuff(tempo,save,(int) (a - save) + strlen(hts_tbdev[i])); + strcatbuff(tempo,"_"); + strcatbuff(tempo,a+strlen(hts_tbdev[i])); + strcpybuff(save,tempo); } break; } @@ -1059,7 +1129,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a if (opt->savename_83) { char n83[HTS_URLMAXSIZE*2]; long_to_83(opt->savename_83,n83,save); - strcpy(save,n83); + strcpybuff(save,n83); } @@ -1079,9 +1149,9 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a // chemin primaire éventuel A METTRE AVANT if (strnotempty(opt->path_html)) { char tempo[HTS_URLMAXSIZE*2]; - strcpy(tempo,opt->path_html); - strcat(tempo,save); - strcpy(save,tempo); + strcpybuff(tempo,opt->path_html); + strcatbuff(tempo,save); + strcpybuff(save,tempo); } @@ -1100,7 +1170,7 @@ printf("\nStart search\n"); #endif #if HTS_HASH - i=hash_read(hash,save,"",0); // lecture type 0 (sav) + i=hash_read(hash,save,"",0,0); // lecture type 0 (sav) if (i>=0) #else for(i=lien_tot-1;i>=0;i--) { @@ -1143,9 +1213,9 @@ printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n",liens[i]->fil,fil_complete) while(( a > save) && (*a!='.') && (*a!='\\') && (*a!='/')) a--; if (*a=='.') - strncat(tempo,save,(int) (a - save)); + strncatbuff(tempo,save,(int) (a - save)); else - strcat(tempo,save); + strcatbuff(tempo,save); // tester la présence d'un -xx (ex: index-2.html -> index-3.html) b=tempo+strlen(tempo)-1; @@ -1172,9 +1242,9 @@ printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n",liens[i]->fil,fil_complete) // ajouter extension if (*a=='.') - strcat(tempo,a); + strcatbuff(tempo,a); - strcpy(save,tempo); + strcpybuff(save,tempo); //printf("switched: %s\n",save); @@ -1204,29 +1274,29 @@ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int sh /* Nom */ if (dot_pos) { if (!short_ver) // Noms longs - strncat(b,nom_pos,(int) (dot_pos - nom_pos)); + strncatbuff(b,nom_pos,(int) (dot_pos - nom_pos)); else - strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8)); + strncatbuff(b,nom_pos,min((int) (dot_pos - nom_pos),8)); } else { if (!short_ver) // Noms longs - strcat(b,nom_pos); + strcatbuff(b,nom_pos); else - strncat(b,nom_pos,8); + strncatbuff(b,nom_pos,8); } /* MD5 - 16 bits */ - strncat(b,url_md5(fil_complete),4); + strncatbuff(b,url_md5(fil_complete),4); /* Ext */ if (dot_pos) { - strcat(b,"."); + strcatbuff(b,"."); if (!short_ver) // Noms longs - strcat(b,dot_pos+1); + strcatbuff(b,dot_pos+1); else - strncat(b,dot_pos+1,3); + strncatbuff(b,dot_pos+1,3); } else { if (!short_ver) // Noms longs - strcat(b,DEFAULT_EXT); // pas de.. + strcatbuff(b,DEFAULT_EXT); // pas de.. else - strcat(b,DEFAULT_EXT_SHORT); // pas de.. + strcatbuff(b,DEFAULT_EXT_SHORT); // pas de.. } } @@ -1243,7 +1313,7 @@ char* url_md5(char* fil_complete) { char buff[HTS_URLMAXSIZE*2]; a++; digest[0]=buff[0]='\0'; - strcat(buff,a); /* query string MD5 */ + strcatbuff(buff,a); /* query string MD5 */ domd5mem(buff,strlen(buff),digest,1); } } diff --git a/src/htsnet.h b/src/htsnet.h index d12b1e4..dbdbcc6 100644 --- a/src/htsnet.h +++ b/src/htsnet.h @@ -56,23 +56,26 @@ Please visit our Website: http://www.httrack.com #include <sys/socket.h> #include <netinet/in.h> #include <sys/time.h> + /* Force for sun env. */ + #ifndef BSD_COMP + #define BSD_COMP + #endif #include <sys/ioctl.h> /* gethostname & co */ +#ifdef HAVE_UNISTD_H #include <unistd.h> +#endif /* inet_addr */ #include <arpa/inet.h> // pas la peine normalement.. -#if HTS_PLATFORM!=3 - #include <sys/filio.h> -#else #ifndef HTS_DO_NOT_REDEFINE_in_addr_t typedef unsigned long in_addr_t; #endif -#endif -#ifndef min - #define min(a,b) ((a)>(b)?(b):(a)) - #define max(a,b) ((a)>(b)?(a):(b)) -#endif +#undef min +#undef max +#undef Sleep +#define min(a,b) ((a)>(b)?(b):(a)) +#define max(a,b) ((a)>(b)?(a):(b)) #define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); } #endif @@ -104,9 +107,16 @@ typedef struct { /* Set port to sockaddr structure */ #define SOCaddr_initport(server, port) do { \ - SOCaddr_sinport(server) = htons((unsigned short int) (port)); \ + SOCaddr_sinport(server) = htons((unsigned short int) (port)); \ +} while(0) + +#define SOCaddr_initany(server, server_len) do { \ + SOCaddr_sinfamily(server) = AF_INET; \ + memset(&SOCaddr_sinaddr(server), 0, sizeof(struct sockaddr_in)); \ + server_len=sizeof(struct sockaddr_in); \ } while(0) + /* Copy sockaddr to another one */ #define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \ if (hpsize == sizeof(struct sockaddr_in)) { \ @@ -171,6 +181,12 @@ typedef struct { SOCaddr_sinport(server) = htons((unsigned short int) (port)); \ } while(0) +#define SOCaddr_initany(server, server_len) do { \ + SOCaddr_sinfamily(server) = AF_INET; \ + memset(&SOCaddr_sinaddr(server), 0, sizeof(struct sockaddr_in)); \ + server_len=sizeof(struct sockaddr_in); \ +} while(0) + /* Copy sockaddr to SOCaddr diff --git a/src/htsnostatic.c b/src/htsnostatic.c index 5971d5d..eff6184 100644 --- a/src/htsnostatic.c +++ b/src/htsnostatic.c @@ -39,6 +39,7 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htshash.h" +#include "htsinthash.h" typedef struct { /* @@ -156,7 +157,7 @@ int hts_freevar() { return 1; } -int hts_resetvar() { +HTSEXT_API int hts_resetvar() { int r; hts_lockvar(); { diff --git a/src/htsnostatic.h b/src/htsnostatic.h index 6dbb072..f24f0ad 100644 --- a/src/htsnostatic.h +++ b/src/htsnostatic.h @@ -88,7 +88,9 @@ Please visit our Website: http://www.httrack.com */ int hts_initvar(void); int hts_freevar(void); -int hts_resetvar(void); +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_resetvar(void); +#endif int hts_maylockvar(void); int hts_lockvar(void); int hts_unlockvar(void); @@ -183,6 +185,7 @@ if ( cKey.localInit ) { \ } \ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \ if (!hts_maylockvar()) { \ + abortLog("unable to lock mutex (not initialized?!)"); \ abort(); \ } \ hts_lockvar(); \ @@ -190,6 +193,7 @@ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \ { \ name = (type *) calloc((nelt), sizeof(type)); \ if (name == NULL) { \ + abortLog("unable to allocate memory for variable!"); \ abort(); \ } \ { \ @@ -202,6 +206,7 @@ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \ name = NULL; \ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ if (name == NULL) { \ + abortLog("unable to load thread key!"); \ abort(); \ } \ if ( ! cKey.localInit ) { \ @@ -214,6 +219,7 @@ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \ else { \ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ if (name == NULL) { \ + abortLog("unable to load thread key! (2)"); \ abort(); \ } \ } \ diff --git a/src/htsopt.h b/src/htsopt.h index 13bc962..77910b6 100644 --- a/src/htsopt.h +++ b/src/htsopt.h @@ -48,6 +48,7 @@ typedef struct { int active; char name[1024]; int port; + char bindhost[256]; // bind this host } t_proxy; /* Structure utile pour copier en bloc les paramètres */ @@ -60,6 +61,12 @@ typedef struct { /* Structure état du miroir */ typedef struct { int stop; + int exit_xh; + int back_add_stats; + /* */ + int mimehtml_created; + char mimemid[256]; + FILE* mimefp; } htsoptstate; @@ -92,12 +99,13 @@ typedef struct { int maxconn; // nombre max de connexions/s int waittime; // démarrage programmé int cache; // génération d'un cache - int aff_progress; // barre de progression + //int aff_progress; // barre de progression int shell; // gestion d'un shell par pipe stdin/stdout t_proxy proxy; // configuration du proxy int savename_83; // conversion 8-3 pour les noms de fichiers int savename_type; // type de noms: structure originale/html-images en un seul niveau char savename_userdef[256]; // structure userdef (ex: %h%p/%n%q.%t) + int mimehtml; // MIME-html int user_agent_send; // user agent (ex: httrack/1.0 [sun]) char user_agent[128]; char path_log[1024]; // chemin pour cache et log @@ -121,8 +129,10 @@ typedef struct { int accept_cookie; // gestion des cookies t_cookie* cookie; int http10; // forcer http 1.0 + int nokeepalive; // pas de keep-alive int nocompression; // pas de compression int sizehack; // forcer réponse "mis à jour" si taille identique + int urlhack; // force "url normalization" to avoid loops int tolerant; // accepter content-length incorrect int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple) int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur @@ -132,7 +142,9 @@ typedef struct { //int maxcache_anticipate; // maximum de liens à anticiper (majorant) int ftp_proxy; // proxy http pour ftp char filelist[1024]; // fichier liste URL à inclure + char urllist[1024]; // fichier liste de filtres à inclure htsfilters filters; // contient les pointeurs pour les filtres + void* hash; // hash structure void* robotsptr; // robots ptr char lang_iso[64]; // en, fr .. char mimedefs[2048]; // ext1=mimetype1\next2=mimetype2.. @@ -170,6 +182,8 @@ typedef struct { int stat_files; // nombre de fichiers écrits int stat_updated_files; // nombre de fichiers mis à jour // + int stat_nrequests; // nombre de requêtes sur socket + int stat_sockid; // nombre de sockets allouées au total int stat_nsocket; // nombre de sockets int stat_errors; // nombre d'erreurs int stat_errors_front; // idem, mais au tout premier niveau diff --git a/src/htsparse.c b/src/htsparse.c index b012a8d..3d35252 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -30,641 +30,1040 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ -/* File: Main source */ -/* DIRECT INCLUDE TO httrack.c */ +/* File: htsparse.c parser */ +/* html/javascript/css parser */ +/* and other parser routines */ /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -#if HTS_ANALYSTE -if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { -#endif - FILE* fp=NULL; // fichier écrit localement - char* adr=r.adr; // pointeur (on parcourt) - char* lastsaved; // adresse du dernier octet sauvé + 1 - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"scan file.."LF); test_flush; - } +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <fcntl.h> +#include <ctype.h> - // Indexing! -#if HTS_MAKE_KEYWORD_INDEX - if (opt.kindex) { - if (index_keyword(r.adr,r.size,r.contenttype,savename,opt.path_html)) { - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..done"LF); test_flush; - } - } else { - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..error!"LF); test_flush; - } - } +/* File defs */ +#include "htscore.h" + +/* specific definitions */ +#include "htsbase.h" +#include "htsnet.h" +#include "htsbauth.h" +#include "htsmd5.h" +#include "htsindex.h" + +/* external modules */ +#include "htsmodules.h" + +// htswrap_add +#include "htswrap.h" + +// parser +#include "htsparse.h" + + +// specific defines +#define urladr (liens[ptr]->adr) +#define urlfil (liens[ptr]->fil) +#define savename (liens[ptr]->sav) +#define parenturladr (liens[liens[ptr]->precedent]->adr) +#define parenturlfil (liens[liens[ptr]->precedent]->fil) +#define parentsavename (liens[liens[ptr]->precedent]->sav) +#define relativeurladr ((!parent_relative)?urladr:parenturladr) +#define relativeurlfil ((!parent_relative)?urlfil:parenturlfil) +#define relativesavename ((!parent_relative)?savename:parentsavename) + +#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } } + +// does nothing +#define XH_uninit do {} while(0) + +// version optimisée, qui permet de ne pas toucher aux html non modifiés (update) +#define REALLOC_SIZE 8192 +#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \ + ht_size=(A)+ht_len+REALLOC_SIZE; \ + ht_buff=(char*) realloct(ht_buff,ht_size); \ + if (ht_buff==NULL) { \ + printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ + XH_uninit; \ + abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \ + exit(1); \ + } \ + } \ + ht_len+=A; +#define HT_ADD_ADR \ + if ((opt->getmode & 1) && (ptr>0)) { \ + int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ + memcpy(ht_buff+j, lastsaved, i); \ + ht_buff[j+i]='\0'; \ + lastsaved=adr; \ + } +#define HT_ADD(A) \ + if ((opt->getmode & 1) && (ptr>0)) { \ + int i=strlen(A),j=ht_len; \ + if (i) { \ + HT_ADD_CHK(i) \ + memcpy(ht_buff+j, A, i); \ + ht_buff[j+i]='\0'; \ + } } +#define HT_ADD_START \ + int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \ + int ht_len=0; \ + char* ht_buff=NULL; \ + if ((opt->getmode & 1) && (ptr>0)) { \ + ht_buff=(char*) malloct(ht_size); \ + if (ht_buff==NULL) { \ + printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ + XH_uninit; \ + abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \ + exit(1); \ + } \ + ht_buff[0]='\0'; \ + } +#define HT_ADD_END { \ + int ok=0;\ + if (ht_buff) { \ + INTsys file_len=(INTsys) strlen(ht_buff);\ + char digest[32+2];\ + digest[0]='\0';\ + domd5mem(ht_buff,file_len,digest,1);\ + if (fsize(fconv(savename))==file_len) { \ + int mlen;\ + char* mbuff;\ + cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\ + if (mlen) mbuff[mlen]='\0';\ + if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\ + ok=1;\ + if ( (opt->debug>1) && (opt->log!=NULL) ) {\ + fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ + test_flush;\ + }\ + } else {\ + ok=0;\ + } \ + }\ + if (!ok) { \ + fp=filecreate(savename); \ + if (fp) { \ + if (file_len>0) {\ + if ((INTsys)fwrite(ht_buff,1,file_len,fp) != file_len) { \ + int fcheck;\ + if ((fcheck=check_fatal_io_errno())) {\ + opt->state.exit_xh=-1;\ + }\ + if (opt->errlog) { \ + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\ + if (fcheck) {\ + fspc(opt->errlog,"error");\ + fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + }\ + test_flush;\ + }\ + }\ + }\ + fclose(fp); fp=NULL; \ + if (strnotempty(r->lastmodified)) \ + set_filetime_rfc822(savename,r->lastmodified); \ + } else {\ + int fcheck;\ + if ((fcheck=check_fatal_io_errno())) {\ + opt->state.exit_xh=-1;\ + }\ + if (opt->errlog) { \ + fspc(opt->errlog,"error");\ + fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\ + if (fcheck) {\ + fspc(opt->errlog,"error");\ + fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + }\ + test_flush;\ + }\ + }\ + } else {\ + filenote(savename,NULL); \ + }\ + if (cache->ndx)\ + cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ + } \ + freet(ht_buff); ht_buff=NULL; \ } +#define HT_ADD_FOP + +// COPY IN HTSCORE.C +#define HT_INDEX_END do { \ +if (!makeindex_done) { \ +if (makeindex_fp) { \ + char tempo[1024]; \ + if (makeindex_links == 1) { \ + sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \ + } else \ + tempo[0]='\0'; \ + fprintf(makeindex_fp,template_footer, \ + "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \ + tempo \ + ); \ + fflush(makeindex_fp); \ + fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ + makeindex_fp=NULL; \ + usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \ +} \ +} \ +makeindex_done=1; /* ok c'est fait */ \ +} while(0) + +// Enregistrement d'un lien: +// on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité) +// puis on vérifie qu'on a assez de marge dans le buffer - sinon on en réalloue un autre +// enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite +// codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes +// FA,FS: former_adr et former_fil, lien original +#if HTS_HASH +#define liens_record_sav_len(A) +#else +#define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav) #endif - // Now, parsing - if ((opt.getmode & 1) && (ptr>0)) { // récupérer les html sur disque - // créer le fichier html local - HT_ADD_FOP; // écrire peu à peu le fichier - } - - if (!error) { - int detect_title=0; // détection du title - // - char* in_media=NULL; // in other media type (real media and so..) - int intag=0; // on est dans un tag - int incomment=0; // dans un <!-- - int inscript=0; // dans un scipt pour applets javascript) - int inscript_tag=0; // on est dans un <body onLoad="... terminé par > - char inscript_tag_lastc='\0'; - // terminaison (" ou ') du "<body onLoad=.." - int inscriptgen=0; // on est dans un code générant, ex après obj.write(".. - char scriptgen_q='\0'; // caractère faisant office de guillemet (' ou ") - int no_esc_utf=0; // ne pas echapper chars > 127 - int nofollow=0; // ne pas scanner - // - int parseall_lastc='\0'; // dernier caractère parsé pour parseall - int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";) - // - char* intag_start=adr; - char* intag_startattr=NULL; - int intag_start_valid=0; - HT_ADD_START; // débuter +// COPIE DE HTSCORE.C +#define liens_record(A,F,S,FA,FF) { \ +int notecode=0; \ +int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ + adr_len=strlen(A),\ + fil_len=strlen(F),\ + sav_len=strlen(S),\ + cod_len=0,\ + former_adr_len=strlen(FA),\ + former_fil_len=strlen(FF); \ +if (former_adr_len>0) {\ + former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ + former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ +} else former_adr_len=former_fil_len=0;\ +if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \ +cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \ +adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \ +if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \ +lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \ +lien_size=add_tab_alloc; \ +if (lien_buffer!=NULL) { \ +liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \ +liens[lien_tot]->firstblock=1; \ +} \ +} else { \ +liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \ +liens[lien_tot]->firstblock=0; \ +} \ +if (liens[lien_tot]!=NULL) { \ +liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \ +liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \ +liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \ +liens[lien_tot]->cod=NULL; \ +if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \ +if (former_adr_len>0) {\ +liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \ +liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \ +strcpybuff(liens[lien_tot]->former_adr,FA); \ +strcpybuff(liens[lien_tot]->former_fil,FF); \ +}\ +strcpybuff(liens[lien_tot]->adr,A); \ +strcpybuff(liens[lien_tot]->fil,F); \ +strcpybuff(liens[lien_tot]->sav,S); \ +liens_record_sav_len(liens[lien_tot]); \ +hash_write(hashptr,lien_tot,opt->urlhack); \ +} \ +} +#define ENGINE_LOAD_CONTEXT() \ + lien_url** liens = (lien_url**) str->liens; \ + httrackp* opt = (httrackp*) str->opt; \ + lien_back* back = (lien_back*) str->back; \ + cache_back* cache = (cache_back*) str->cache; \ + hash_struct* hashptr = (hash_struct*) str->hashptr; \ + int back_max = str->back_max; \ + int numero_passe = str->numero_passe; \ + int add_tab_alloc = str->add_tab_alloc; \ + /* */ \ + int lien_tot = * ( (int*) (str->lien_tot_) ); \ + int ptr = * ( (int*) (str->ptr_) ); \ + int lien_size = * ( (int*) (str->lien_size_) ); \ + char* lien_buffer = * ( (char**) (str->lien_buffer_) ); \ + /* */ \ + /* */ \ + htsblk* r = stre->r_; \ + hash_struct* hash = stre->hash_; \ + int lien_max = *stre->lien_max_; \ + /* */ \ + int error = * stre->error_; \ + int store_errpage = * stre->store_errpage_; \ + char* codebase = stre->codebase; \ + char* base = stre->base; \ + /* */ \ + int makeindex_done = *stre->makeindex_done_; \ + FILE* makeindex_fp = *stre->makeindex_fp_; \ + int makeindex_links = *stre->makeindex_links_; \ + char* makeindex_firstlink = stre->makeindex_firstlink_; \ + /* */ \ + char *template_header = stre->template_header_; \ + char *template_body = stre->template_body_; \ + char *template_footer = stre->template_footer_; \ + /* */ \ + LLint stat_fragment = *stre->stat_fragment_; \ + TStamp makestat_time = stre->makestat_time; \ + FILE* makestat_fp = stre->makestat_fp - /* statistics */ - if ((opt.getmode & 1) && (ptr>0)) { - /* - HTS_STAT.stat_files++; - HTS_STAT.stat_bytes+=r.size; - */ - } +#define ENGINE_SAVE_CONTEXT() \ + /* Apply changes */ \ + * ( (int*) (str->lien_tot_) ) = lien_tot; \ + * ( (int*) (str->ptr_) ) = ptr; \ + * ( (int*) (str->lien_size_) ) = lien_size; \ + * ( (char**) (str->lien_buffer_) ) = lien_buffer; \ + /* */ \ + * stre->error_ = error; \ + * stre->store_errpage_ = store_errpage; \ + * stre->lien_max_ = lien_max; \ + /* */ \ + *stre->makeindex_done_ = makeindex_done; \ + *stre->makeindex_fp_ = makeindex_fp; \ + *stre->makeindex_links_ = makeindex_links; \ + /* */ \ + *stre->stat_fragment_ = stat_fragment + +#define _FILTERS (*opt->filters.filters) +#define _FILTERS_PTR (opt->filters.filptr) +#define _ROBOTS ((robots_wizard*)opt->robotsptr) - /* Primary list or URLs */ - if (ptr == 0) { - intag=1; - intag_start_valid=0; + +/* Main parser */ +int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { + /* Load engine variables */ + ENGINE_LOAD_CONTEXT(); + +#if HTS_ANALYSTE + if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) { +#endif + FILE* fp=NULL; // fichier écrit localement + char* adr=r->adr; // pointeur (on parcourt) + char* lastsaved; // adresse du dernier octet sauvé + 1 + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush; } - /* Check is the file is a .js file */ - else if ( - (strfield2(r.contenttype,"application/x-javascript")!=0) - || (strfield2(r.contenttype,"text/css")!=0) - ) { /* JavaScript js file */ - inscript=1; - intag=1; // because après <script> on y est .. - pas utile - intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"note: this file is a javascript file"LF); test_flush; + + + // Indexing! +#if HTS_MAKE_KEYWORD_INDEX + if (opt->kindex) { + if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) { + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush; + } + } else { + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush; + } } } - /* Or a real audio */ - else if (strfield2(r.contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */ - inscript=intag=1; - intag_start_valid=0; - in_media="RAM"; // real media! - } - // Detect UTF8 format - if (is_unicode_utf8((unsigned char*) r.adr, (unsigned int) r.size) == 1) { - no_esc_utf=1; - } else { - no_esc_utf=0; +#endif + + // Now, parsing + if ((opt->getmode & 1) && (ptr>0)) { // récupérer les html sur disque + // créer le fichier html local + HT_ADD_FOP; // écrire peu à peu le fichier } - // Hack to prevent any problems with ram files of other files - * ( r.adr + r.size ) = '\0'; + + if (!error) { + int detect_title=0; // détection du title + int back_add_stats = opt->state.back_add_stats; + // + char* in_media=NULL; // in other media type (real media and so..) + int intag=0; // on est dans un tag + int incomment=0; // dans un <!-- + int inscript=0; // dans un scipt pour applets javascript) + signed char inscript_state[10][257]; + typedef enum { + INSCRIPT_START=0, + INSCRIPT_ANTISLASH, + INSCRIPT_INQUOTE, + INSCRIPT_INQUOTE2, + INSCRIPT_SLASH, + INSCRIPT_SLASHSLASH, + INSCRIPT_COMMENT, + INSCRIPT_COMMENT2, + INSCRIPT_ANTISLASH_IN_QUOTE, + INSCRIPT_ANTISLASH_IN_QUOTE2, + INSCRIPT_DEFAULT=256 + } INSCRIPT; + INSCRIPT inscript_state_pos=INSCRIPT_START; + char* inscript_name=NULL; // script tag name + int inscript_tag=0; // on est dans un <body onLoad="... terminé par > + char inscript_tag_lastc='\0'; + // terminaison (" ou ') du "<body onLoad=.." + int inscriptgen=0; // on est dans un code générant, ex après obj.write(".. + //int inscript_check_comments=0, inscript_in_comments=0; // javascript comments + char scriptgen_q='\0'; // caractère faisant office de guillemet (' ou ") + int no_esc_utf=0; // ne pas echapper chars > 127 + int nofollow=0; // ne pas scanner + // + int parseall_lastc='\0'; // dernier caractère parsé pour parseall + //int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";) + // + char* intag_start=adr; + char* intag_startattr=NULL; + int intag_start_valid=0; + // + int parent_relative=0; // the parent is the base path (.js, .css..) + HT_ADD_START; // débuter + + /* Initialize script automate for comments, quotes.. */ + memset(inscript_state, 0xff, sizeof(inscript_state)); + inscript_state[INSCRIPT_START][INSCRIPT_DEFAULT]=INSCRIPT_START; /* by default, stay in START */ + inscript_state[INSCRIPT_START]['\\']=INSCRIPT_ANTISLASH; /* #1: \ escapes the next character whatever it is */ + inscript_state[INSCRIPT_ANTISLASH][INSCRIPT_DEFAULT]=INSCRIPT_START; + inscript_state[INSCRIPT_START]['\'']=INSCRIPT_INQUOTE; /* #2: ' opens quote and only ' returns to 0 */ + inscript_state[INSCRIPT_INQUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE; + inscript_state[INSCRIPT_INQUOTE]['\'']=INSCRIPT_START; + inscript_state[INSCRIPT_INQUOTE]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE; + inscript_state[INSCRIPT_START]['\"']=INSCRIPT_INQUOTE2; /* #3: " opens double-quote and only " returns to 0 */ + inscript_state[INSCRIPT_INQUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2; + inscript_state[INSCRIPT_INQUOTE2]['\"']=INSCRIPT_START; + inscript_state[INSCRIPT_INQUOTE2]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE2; + inscript_state[INSCRIPT_START]['/']=INSCRIPT_SLASH; /* #4: / state, default to #0 */ + inscript_state[INSCRIPT_SLASH][INSCRIPT_DEFAULT]=INSCRIPT_START; + inscript_state[INSCRIPT_SLASH]['/']=INSCRIPT_SLASHSLASH; /* #5: // with only LF to escape */ + inscript_state[INSCRIPT_SLASHSLASH][INSCRIPT_DEFAULT]=INSCRIPT_SLASHSLASH; + inscript_state[INSCRIPT_SLASHSLASH]['\n']=INSCRIPT_START; + inscript_state[INSCRIPT_SLASH]['*']=INSCRIPT_COMMENT; /* #6: / * with only * / to escape */ + inscript_state[INSCRIPT_COMMENT][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT; + inscript_state[INSCRIPT_COMMENT]['*']=INSCRIPT_COMMENT2; /* #7: closing comments */ + inscript_state[INSCRIPT_COMMENT2][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT; + inscript_state[INSCRIPT_COMMENT2]['/']=INSCRIPT_START; + inscript_state[INSCRIPT_COMMENT2]['*']=INSCRIPT_COMMENT2; + inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE; /* #8: escape in "" */ + inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2; /* #9: escape in '' */ - // ------------------------------------------------------------ - // analyser ce qu'il y a en mémoire (fichier html) - // on scanne les balises - // ------------------------------------------------------------ + /* statistics */ + if ((opt->getmode & 1) && (ptr>0)) { + /* + HTS_STAT.stat_files++; + HTS_STAT.stat_bytes+=r->size; + */ + } + + /* Primary list or URLs */ + if (ptr == 0) { + intag=1; + intag_start_valid=0; + } + /* Check is the file is a .js file */ + else if ( + (strfield2(r->contenttype,"application/x-javascript")!=0) + || (strfield2(r->contenttype,"text/css")!=0) + ) { /* JavaScript js file */ + inscript=1; + inscript_name="script"; + intag=1; // because après <script> on y est .. - pas utile + intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush; + } + // all links must be checked against parent, not this link + if (liens[ptr]->precedent != 0) { + parent_relative=1; + } + } + /* Or a real audio */ + else if (strfield2(r->contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */ + inscript=intag=1; + inscript_name="media"; + intag_start_valid=0; + in_media="RAM"; // real media! + } + // Detect UTF8 format + if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) { + no_esc_utf=1; + } else { + no_esc_utf=0; + } + // Hack to prevent any problems with ram files of other files + * ( r->adr + r->size ) = '\0'; + + + // ------------------------------------------------------------ + // analyser ce qu'il y a en mémoire (fichier html) + // on scanne les balises + // ------------------------------------------------------------ #if HTS_ANALYSTE - _hts_in_html_done=0; // 0% scannés - _hts_cancel=0; // pas de cancel - _hts_in_html_parsing=1; // flag pour indiquer un parsing + _hts_in_html_done=0; // 0% scannés + _hts_cancel=0; // pas de cancel + _hts_in_html_parsing=1; // flag pour indiquer un parsing #endif - base[0]='\0'; // effacer base-href - lastsaved=adr; - do { - int p=0; - int valid_p=0; // force to take p even if == 0 - int ending_p='\0'; // ending quote? - error=0; - - /* Hack to avoid NULL char problems with C syntax */ - /* Yes, some bogus HTML pages can embed null chars - and therefore can not be properly handled if this hack is not done - */ - if ( ! (*adr) ) { - if ( ((int) (adr - r.adr)) < r.size) - *adr=' '; - } - - - - /* - index.html built here - */ - // Construction index.html (sommaire) - // Avant de tester les a href, - // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s) - if (!makeindex_done) { // autoriation d'écrire un index - if (!detect_title) { - if (opt.depth == liens[ptr]->depth) { // on note toujours les premiers liens - if (!in_media) { - if (opt.makeindex && (ptr>0)) { - if (opt.getmode & 1) { // autorisation d'écrire - p=strfield(adr,"title"); - if (p) { - if (*(adr-1)=='/') p=0; // /title - } else { - if (strfield(adr,"/html")) - p=-1; // noter, mais sans titre - else if (strfield(adr,"body")) - p=-1; // noter, mais sans titre - else if ( ((int) (adr - r.adr) ) >= (r.size-1) ) - p=-1; // noter, mais sans titre - else if ( (int) (adr - r.adr) >= r.size - 2) // we got to hurry - p=-1; // xxc xxc xxc - } - } else - p=0; - - if (p) { // ok center - if (makeindex_fp==NULL) { - verif_backblue(opt.path_html); // générer gif - makeindex_fp=filecreate(fconcat(opt.path_html,"index.html")); - if (makeindex_fp!=NULL) { - - // Header - fprintf(makeindex_fp,template_header, - "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->" - ); - - } else makeindex_done=-1; // fait, erreur - } + base[0]='\0'; // effacer base-href + lastsaved=adr; + do { + int p=0; + int valid_p=0; // force to take p even if == 0 + int ending_p='\0'; // ending quote? + int archivetag_p=0; // avoid multiple-archives with commas + INSCRIPT inscript_state_pos_prev=inscript_state_pos; + error=0; + + /* Hack to avoid NULL char problems with C syntax */ + /* Yes, some bogus HTML pages can embed null chars + and therefore can not be properly handled if this hack is not done + */ + if ( ! (*adr) ) { + if ( ((int) (adr - r->adr)) < r->size) + *adr=' '; + } + + + + /* + index.html built here + */ + // Construction index.html (sommaire) + // Avant de tester les a href, + // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s) + if (!makeindex_done) { // autoriation d'écrire un index + if (!detect_title) { + if (opt->depth == liens[ptr]->depth) { // on note toujours les premiers liens + if (!in_media) { + if (opt->makeindex && (ptr>0)) { + if (opt->getmode & 1) { // autorisation d'écrire + p=strfield(adr,"title"); + if (p) { + if (*(adr-1)=='/') p=0; // /title + } else { + if (strfield(adr,"/html")) + p=-1; // noter, mais sans titre + else if (strfield(adr,"body")) + p=-1; // noter, mais sans titre + else if ( ((int) (adr - r->adr) ) >= (r->size-1) ) + p=-1; // noter, mais sans titre + else if ( (int) (adr - r->adr) >= r->size - 2) // we got to hurry + p=-1; // xxc xxc xxc + } + } else + p=0; - if (makeindex_fp!=NULL) { - char tempo[HTS_URLMAXSIZE*2]; - char s[HTS_URLMAXSIZE*2]; - char* a=NULL; - char* b=NULL; - s[0]='\0'; - if (p>0) { - a=strchr(adr,'>'); - if (a!=NULL) { - a++; - while(is_space(*a)) a++; // sauter espaces & co - b=strchr(a,'<'); // prochain tag - } + if (p) { // ok center + if (makeindex_fp==NULL) { + verif_backblue(opt,opt->path_html); // générer gif + makeindex_fp=filecreate(fconcat(opt->path_html,"index.html")); + if (makeindex_fp!=NULL) { + + // Header + fprintf(makeindex_fp,template_header, + "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->" + ); + + } else makeindex_done=-1; // fait, erreur } - if (lienrelatif(tempo,liens[ptr]->sav,concat(opt.path_html,"index.html"))==0) { - detect_title=1; // ok détecté pour cette page! - makeindex_links++; // un de plus - strcpy(makeindex_firstlink,tempo); - // - if ((b==a) || (a==NULL) || (b==NULL)) { // pas de titre - strcpy(s,tempo); - } else if ((b-a)<256) { - b--; - while(is_space(*b)) b--; - strncpy(s,a,b-a+1); - *(s+(b-a)+1)='\0'; + + if (makeindex_fp!=NULL) { + char tempo[HTS_URLMAXSIZE*2]; + char s[HTS_URLMAXSIZE*2]; + char* a=NULL; + char* b=NULL; + s[0]='\0'; + if (p>0) { + a=strchr(adr,'>'); + if (a!=NULL) { + a++; + while(is_space(*a)) a++; // sauter espaces & co + b=strchr(a,'<'); // prochain tag + } } + if (lienrelatif(tempo,liens[ptr]->sav,concat(opt->path_html,"index.html"))==0) { + detect_title=1; // ok détecté pour cette page! + makeindex_links++; // un de plus + strcpybuff(makeindex_firstlink,tempo); + // - // Body - fprintf(makeindex_fp,template_body, - tempo, - s - ); + /* Hack */ + if (opt->mimehtml) { + strcpybuff(makeindex_firstlink, "cid:primary/primary"); + } + if ((b==a) || (a==NULL) || (b==NULL)) { // pas de titre + strcpybuff(s,tempo); + } else if ((b-a)<256) { + b--; + while(is_space(*b)) b--; + strncpy(s,a,b-a+1); + *(s+(b-a)+1)='\0'; + } + + // Body + fprintf(makeindex_fp,template_body, + tempo, + s + ); + + } } } } } + + } else if (liens[ptr]->depth<opt->depth) { // on a sauté level1+1 et level1 + HT_INDEX_END; } - - } else if (liens[ptr]->depth<opt.depth) { // on a sauté level1+1 et level1 - HT_INDEX_END; - } - } // if (opt.makeindex) - } - // FIN Construction index.html (sommaire) - /* - end -- index.html built here - */ - - - - /* Parse */ - if ( - (*adr=='<') /* No starting tag */ - && (!inscript) /* Not in (java)script */ - && (!incomment) /* Not in comment (<!--) */ - ) { - intag=1; - parseall_incomment=0; - //inquote=0; // effacer quote - intag_start=adr; intag_start_valid=1; - codebase[0]='\0'; // effacer éventuel codebase + } // if (opt->makeindex) + } + // FIN Construction index.html (sommaire) + /* + end -- index.html built here + */ - if (opt.getmode & 1) { // sauver html - p=strfield(adr,"</html"); - if (p==0) p=strfield(adr,"<head>"); - // if (p==0) p=strfield(adr,"<doctype"); - if (p) { - if (strnotempty(opt.footer)) { - char tempo[1024+HTS_URLMAXSIZE*2]; - char gmttime[256]; + + + /* Parse */ + if ( + (*adr=='<') /* No starting tag */ + && (!inscript) /* Not in (java)script */ + && (!incomment) /* Not in comment (<!--) */ + ) { + intag=1; + //parseall_incomment=0; + //inquote=0; // effacer quote + intag_start=adr; intag_start_valid=1; + codebase[0]='\0'; // effacer éventuel codebase + + if (opt->getmode & 1) { // sauver html + p=strfield(adr,"</html"); + if (p==0) p=strfield(adr,"<head>"); + // if (p==0) p=strfield(adr,"<doctype"); + if (p) { char* eol="\n"; - tempo[0]='\0'; - if (strchr(r.adr,'\r')) + if (strchr(r->adr,'\r')) eol="\r\n"; - time_gmt_rfc822(gmttime); - strcat(tempo,eol); - sprintf(tempo+strlen(tempo),opt.footer,jump_identification(urladr),urlfil,gmttime,"","","","","","","",""); - strcat(tempo,eol); - //fwrite(tempo,1,strlen(tempo),fp); - HT_ADD(tempo); - } - } - } - - // éliminer les <!-- (commentaires) : intag dévalidé - if (*(adr+1)=='!') - if (*(adr+2)=='-') - if (*(adr+3)=='-') { - intag=0; - incomment=1; - intag_start_valid=0; + if (strnotempty(opt->footer)) { + char tempo[1024+HTS_URLMAXSIZE*2]; + char gmttime[256]; + tempo[0]='\0'; + time_gmt_rfc822(gmttime); + strcatbuff(tempo,eol); + sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","",""); + strcatbuff(tempo,eol); + //fwrite(tempo,1,strlen(tempo),fp); + HT_ADD(tempo); + } + if (r->charset[0]) { + HT_ADD("<meta http-equiv=\"content-type\" content=\"text/html;charset="); + HT_ADD(r->charset); + HT_ADD("\">"); + HT_ADD(eol); + } } - - } - else if ( - (*adr=='>') /* ending tag */ - && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */ - ) { - if (inscript_tag) { - inscript_tag=inscript=0; - intag=0; - incomment=0; - intag_start_valid=0; - } else if (!incomment) { - intag=0; //inquote=0; + } - // entrée dans du javascript? - // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans - //if (!inscript) { // sinon on est dans un obj.write(".. - if ((intag_start_valid) && - ( - check_tag(intag_start,"script") - || - check_tag(intag_start,"style") - ) - ) { - char* a=intag_start; // < - // ** while(is_realspace(*(--a))); - if (*a=='<') { // sûr que c'est un tag? - inscript=1; - intag=1; // because après <script> on y est .. - pas utile - intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag - } - } - } else { /* end of comment? */ - // vérifier fermeture correcte - if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) { + // éliminer les <!-- (commentaires) : intag dévalidé + if (*(adr+1)=='!') + if (*(adr+2)=='-') + if (*(adr+3)=='-') { + intag=0; + incomment=1; + intag_start_valid=0; + } + + } + else if ( + (*adr=='>') /* ending tag */ + && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */ + ) { + if (inscript_tag) { + inscript_tag=inscript=0; intag=0; incomment=0; intag_start_valid=0; - } -#if GT_ENDS_COMMENT - /* wrong comment ending */ - else { - /* check if correct ending does not exists - <!-- foo > example <!-- bar > is sometimes accepted by browsers - when no --> is used somewhere else.. darn those browsers are dirty - */ - if (!strstr(adr,"-->")) { + } else if (!incomment) { + intag=0; //inquote=0; + + // entrée dans du javascript? + // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans + //if (!inscript) { // sinon on est dans un obj.write(".. + if ((intag_start_valid) && + ( + check_tag(intag_start,"script") + || + check_tag(intag_start,"style") + ) + ) { + char* a=intag_start; // < + // ** while(is_realspace(*(--a))); + if (*a=='<') { // sûr que c'est un tag? + if (check_tag(intag_start,"script")) + inscript_name="script"; + else + inscript_name="style"; + inscript=1; + inscript_state_pos=INSCRIPT_START; + intag=1; // because après <script> on y est .. - pas utile + intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag + } + } + } else { /* end of comment? */ + // vérifier fermeture correcte + if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) { intag=0; incomment=0; intag_start_valid=0; } - } -#endif - } - //} - } - //else if (*adr==34) { - // inquote=(inquote?0:1); - //} - else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag - int p_type=0; - int p_nocatch=0; - int p_searchMETAURL=0; // chercher ..URL=<url> - int add_class=0; // ajouter .class - int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>" - char* p_flush=NULL; - - - // ------------------------------------------------------------ - // parsing évolé - // ------------------------------------------------------------ - if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester.. - - - /* caractère de terminaison pour "miniparsing" javascript=.. ? - (ex: <a href="javascript:()" action="foo"> ) */ - if (inscript_tag) { - if (inscript_tag_lastc) { - if (*adr == inscript_tag_lastc) { - /* sortir */ - inscript_tag=inscript=0; +#if GT_ENDS_COMMENT + /* wrong comment ending */ + else { + /* check if correct ending does not exists + <!-- foo > example <!-- bar > is sometimes accepted by browsers + when no --> is used somewhere else.. darn those browsers are dirty + */ + if (!strstr(adr,"-->")) { + intag=0; incomment=0; + intag_start_valid=0; } } +#endif } + //} + } + //else if (*adr==34) { + // inquote=(inquote?0:1); + //} + else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag + int p_type=0; + int p_nocatch=0; + int p_searchMETAURL=0; // chercher ..URL=<url> + int add_class=0; // ajouter .class + int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>" + char* p_flush=NULL; - // Note: - // Certaines pages ne respectent pas le html - // notamment les guillements ne sont pas fixés - // Nous sommes dans un tag, donc on peut faire un test plus - // large pour pouvoi prendre en compte ces particularités - - // à vérifier: ACTION, CODEBASE, VRML - - if (in_media) { - if (strcmp(in_media,"RAM")==0) { // real media - p=0; - valid_p=1; - } - } else if (ptr>0) { /* pas première page 0 (primary) */ - p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p + // ------------------------------------------------------------ + // parsing évolé + // ------------------------------------------------------------ + if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester.. - // ------------------------------ - // détection d'écriture JavaScript. - // osons les obj.write et les obj.href=.. ! osons! - // note: inscript==1 donc on sautera après les \" + + /* caractère de terminaison pour "miniparsing" javascript=.. ? + (ex: <a href="javascript:()" action="foo"> ) */ + if (inscript_tag) { + if (inscript_tag_lastc) { + if (*adr == inscript_tag_lastc) { + /* sortir */ + inscript_tag=inscript=0; + incomment=0; + } + } + } + + /* automate */ if (inscript) { - if (inscriptgen) { // on est déja dans un objet générant.. - if (*adr==scriptgen_q) { // fermeture des " ou ' - if (*(adr-1)!='\\') { // non - inscriptgen=0; // ok parsing terminé + int new_state_pos; + new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; + if (new_state_pos < 0) { + new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; + } + assertf(new_state_pos >= 0); + assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state)); + inscript_state_pos=new_state_pos; + } + + + // Note: + // Certaines pages ne respectent pas le html + // notamment les guillements ne sont pas fixés + // Nous sommes dans un tag, donc on peut faire un test plus + // large pour pouvoi prendre en compte ces particularités + + // à vérifier: ACTION, CODEBASE, VRML + + if (in_media) { + if (strcmp(in_media,"RAM")==0) { // real media + p=0; + valid_p=1; + } + } else if (ptr>0) { /* pas première page 0 (primary) */ + p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p + + // ------------------------------ + // détection d'écriture JavaScript. + // osons les obj.write et les obj.href=.. ! osons! + // note: inscript==1 donc on sautera après les \" + if (inscript) { + if (inscriptgen) { // on est déja dans un objet générant.. + if (*adr==scriptgen_q) { // fermeture des " ou ' + if (*(adr-1)!='\\') { // non + inscriptgen=0; // ok parsing terminé + } } - } - } else { - char* a=NULL; - char check_this_fking_line=0; // parsing code javascript.. - char must_be_terminated=0; // caractère obligatoire de terminaison! - int token_size; - if (!(token_size=strfield(adr,".writeln"))) // détection ...objet.write[ln]("code html")... - token_size=strfield(adr,".write"); - if (token_size) { - a=adr+token_size; + } else { + char* a=NULL; + char check_this_fking_line=0; // parsing code javascript.. + char must_be_terminated=0; // caractère obligatoire de terminaison! + int token_size; + if (!(token_size=strfield(adr,".writeln"))) // détection ...objet.write[ln]("code html")... + token_size=strfield(adr,".write"); + if (token_size) { + a=adr+token_size; + while(is_realspace(*a)) a++; // sauter espaces + if (*a=='(') { // début parenthèse + check_this_fking_line=2; // à parser! + must_be_terminated=')'; + a++; // sauter ( + } + } + // euhh ??? ??? + /* else if (strfield(adr,".href")) { // détection ...objet.href="... + a=adr+5; while(is_realspace(*a)) a++; // sauter espaces - if (*a=='(') { // début parenthèse - check_this_fking_line=2; // à parser! - must_be_terminated=')'; - a++; // sauter ( + if (*a=='=') { // ohh un égal + check_this_fking_line=1; // à noter! + must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder + a++; // sauter = } - } - // euhh ??? ??? - /* else if (strfield(adr,".href")) { // détection ...objet.href="... - a=adr+5; - while(is_realspace(*a)) a++; // sauter espaces - if (*a=='=') { // ohh un égal - check_this_fking_line=1; // à noter! - must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder - a++; // sauter = - } - + }*/ - - // on a un truc du genre instruction"code généré" dont on parse le code - if (check_this_fking_line) { - while(is_realspace(*a)) a++; - if ((*a=='\'') || (*a=='"')) { // départ de '' ou "" - char *b; - int ex=0; - scriptgen_q=*a; // quote - b=a+1; // départ de la chaîne - // vérifier forme ("code") et pas ("code"+var), ingérable - do { - a++; // caractère suivant - if (*a==scriptgen_q) if (*(a-1)!='\\') // quote non slash - ex=1; // sortie - if ((*a==10) || (*a==13)) - ex=1; - } while(!ex); - if (*a==scriptgen_q) { // fin du quote - a++; - while(is_realspace(*a)) a++; - if (*a==must_be_terminated) { // parenthèse fermante: ("..") - - // bon, on doit parser une ligne javascript - // 1) si check.. ==1 alors c'est un nom de fichier direct, donc - // on fixe p sur le saut nécessaire pour atteindre le nom du fichier - // et le moteur se débrouillera ensuite tout seul comme un grand - // 2) si check==2 c'est un peu plus tordu car là on génére du - // code html au sein de code javascript au sein de code html - // dans ce cas on doit fixer un flag à un puis ensuite dans la boucle - // on devra parser les instructions standard comme <a href etc - // NOTE: le code javascript autogénéré n'est pas pris en compte!! - // (et ne marche pas dans 50% des cas de toute facon!) - if (check_this_fking_line==1) { - p=(int) (b - adr); // calculer saut! - } else { - inscriptgen=1; // SCRIPTGEN actif - adr=b; // jump + + // on a un truc du genre instruction"code généré" dont on parse le code + if (check_this_fking_line) { + while(is_realspace(*a)) a++; + if ((*a=='\'') || (*a=='"')) { // départ de '' ou "" + char *b; + int ex=0; + scriptgen_q=*a; // quote + b=a+1; // départ de la chaîne + // vérifier forme ("code") et pas ("code"+var), ingérable + do { + a++; // caractère suivant + if (*a==scriptgen_q && *(a-1)!='\\') // quote non slash + ex=1; // sortie + if (*a==10 && *(a-1) != '\\' /* LF and no continue (\) character */ + && ( *(a-1) != '\r' || *(a-2) != '\\' ) ) /* and not CRLF and no .. */ + ex=1; + } while(!ex); + if (*a==scriptgen_q) { // fin du quote + a++; + while(is_realspace(*a)) a++; + if (*a==must_be_terminated) { // parenthèse fermante: ("..") + + // bon, on doit parser une ligne javascript + // 1) si check.. ==1 alors c'est un nom de fichier direct, donc + // on fixe p sur le saut nécessaire pour atteindre le nom du fichier + // et le moteur se débrouillera ensuite tout seul comme un grand + // 2) si check==2 c'est un peu plus tordu car là on génére du + // code html au sein de code javascript au sein de code html + // dans ce cas on doit fixer un flag à un puis ensuite dans la boucle + // on devra parser les instructions standard comme <a href etc + // NOTE: le code javascript autogénéré n'est pas pris en compte!! + // (et ne marche pas dans 50% des cas de toute facon!) + if (check_this_fking_line==1) { + p=(int) (b - adr); // calculer saut! + } else { + inscriptgen=1; // SCRIPTGEN actif + adr=b; // jump + } + + if ((opt->debug>1) && (opt->log!=NULL)) { + char str[512]; + str[0]='\0'; + strncatbuff(str,b,minimum((int) (a - b + 1), 32)); + fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush; + } } - if ((opt.debug>1) && (opt.log!=NULL)) { - char str[512]; - str[0]='\0'; - strncat(str,b,minimum((int) (a - b + 1), 32)); - fspc(opt.log,"debug"); fprintf(opt.log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush; - } } } + } - - } } - } - // fin detection code générant javascript vers html - // ------------------------------ - - - // analyse proprement dite, A HREF=.. etc.. - if (!p) { - // si dans un tag, et pas dans un script - sauf si on analyse un obj.write(".. - if ((intag && (!inscript)) || inscriptgen) { - if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc - // <A HREF=.. pour les liens HTML - p=rech_tageq(adr,"href"); - if (p) { // href.. tester si c'est une bas href! - if ((intag_start_valid) && check_tag(intag_start,"base")) { // oui! - // ** note: base href et codebase ne font pas bon ménage.. - p_type=2; // c'est un chemin - } - } - - /* Tags supplémentaires à vérifier (<img src=..> etc) */ - if (p==0) { - int i=0; - while( (p==0) && (strnotempty(hts_detect[i])) ) { - p=rech_tageq(adr,hts_detect[i]); - i++; - } - } - - /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */ - if (p==0) { - int i=0; - while( (p==0) && (strnotempty(hts_detectbeg[i])) ) { - p=rech_tageqbegdigits(adr,hts_detectbeg[i]); - i++; + // fin detection code générant javascript vers html + // ------------------------------ + + + // analyse proprement dite, A HREF=.. etc.. + if (!p) { + // si dans un tag, et pas dans un script - sauf si on analyse un obj.write(".. + if ((intag && (!inscript)) || inscriptgen) { + if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc + // <A HREF=.. pour les liens HTML + p=rech_tageq(adr,"href"); + if (p) { // href.. tester si c'est une bas href! + if ((intag_start_valid) && check_tag(intag_start,"base")) { // oui! + // ** note: base href et codebase ne font pas bon ménage.. + p_type=2; // c'est un chemin + } } - } - - /* Tags supplémentaires à vérifier : URL=.. */ - if (p==0) { - int i=0; - while( (p==0) && (strnotempty(hts_detectURL[i])) ) { - p=rech_tageq(adr,hts_detectURL[i]); - i++; + + /* Tags supplémentaires à vérifier (<img src=..> etc) */ + if (p==0) { + int i=0; + while( (p==0) && (strnotempty(hts_detect[i])) ) { + p=rech_tageq(adr,hts_detect[i]); + if (p) { + /* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */ + if (strcmp(hts_detect[i], "archive") == 0) { + archivetag_p = 1; + } + } + i++; + } } - if (p) - p_searchMETAURL=1; - } - - /* Tags supplémentaires à vérifier, mais à ne pas capturer */ - if (p==0) { - int i=0; - while( (p==0) && (strnotempty(hts_detectandleave[i])) ) { - p=rech_tageq(adr,hts_detectandleave[i]); - i++; + + /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */ + if (p==0) { + int i=0; + while( (p==0) && (strnotempty(hts_detectbeg[i])) ) { + p=rech_tageqbegdigits(adr,hts_detectbeg[i]); + i++; + } } - if (p) - p_nocatch=1; /* ne pas rechercher */ - } - - /* Evénements */ - if (p==0) { - int i=0; - /* détection onLoad etc */ - while( (p==0) && (strnotempty(hts_detect_js[i])) ) { - p=rech_tageq(adr,hts_detect_js[i]); - i++; + + /* Tags supplémentaires à vérifier : URL=.. */ + if (p==0) { + int i=0; + while( (p==0) && (strnotempty(hts_detectURL[i])) ) { + p=rech_tageq(adr,hts_detectURL[i]); + i++; + } + if (p) + p_searchMETAURL=1; } - /* non détecté - détecter également les onXxxxx= */ + + /* Tags supplémentaires à vérifier, mais à ne pas capturer */ if (p==0) { - if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) { - p=0; - while(isalpha((unsigned char)adr[p]) && (p<64) ) p++; - if (p<64) { - while(is_space(adr[p])) p++; - if (adr[p]=='=') - p++; - else p=0; - } else p=0; + int i=0; + while( (p==0) && (strnotempty(hts_detectandleave[i])) ) { + p=rech_tageq(adr,hts_detectandleave[i]); + i++; } + if (p) + p_nocatch=1; /* ne pas rechercher */ } - /* OK, événement repéré */ - if (p) { - inscript_tag_lastc=*(adr+p); /* à attendre à la fin */ - adr+=p; /* saut */ - /* - On est désormais dans du code javascript - */ - inscript_tag=inscript=1; + + /* Evénements */ + if (p==0 && + ! inscript /* we don't want events inside document.write */ + ) { + int i=0; + /* détection onLoad etc */ + while( (p==0) && (strnotempty(hts_detect_js[i])) ) { + p=rech_tageq(adr,hts_detect_js[i]); + i++; + } + /* non détecté - détecter également les onXxxxx= */ + if (p==0) { + if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) { + p=0; + while(isalpha((unsigned char)adr[p]) && (p<64) ) p++; + if (p<64) { + while(is_space(adr[p])) p++; + if (adr[p]=='=') + p++; + else p=0; + } else p=0; + } + } + /* OK, événement repéré */ + if (p) { + inscript_tag_lastc=*(adr+p); /* à attendre à la fin */ + adr+=p+1; /* saut */ + /* + On est désormais dans du code javascript + */ + inscript_name=""; + inscript=inscript_tag=1; + inscript_state_pos=INSCRIPT_START; + } + p=0; /* quoi qu'il arrive, ne rien démarrer ici */ } - p=0; /* quoi qu'il arrive, ne rien démarrer ici */ - } - - // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire] - if (p==0) { - p=rech_tageq(adr,"code"); - if (p) { - if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! - p_type=-1; // juste le nom de fichier+dossier, écire avant codebase - add_class=1; // ajouter .class au besoin - - // vérifier qu'il n'y a pas de codebase APRES - // sinon on swappe les deux. - // pas très propre mais c'est ce qu'il y a de plus simple à faire!! - - { - char *a; - a=adr; - while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++; - if (rech_tageq(a,"codebase")) { // banzai! codebase= - char* b; - b=strchr(a,'>'); - if (b) { - if (((int) (b - adr)) < 1000) { // au total < 1Ko - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strncat(tempo,a,(int) (b - a) ); - strcat( tempo," "); - strncat(tempo,adr,(int) (a - adr - 1)); - // éventuellement remplire par des espaces pour avoir juste la taille - while((int) strlen(tempo)<((int) (b - adr))) - strcat(tempo," "); - // pas d'erreur? - if ((int) strlen(tempo) == ((int) (b - adr) )) { - strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin! - p=0; // DEVALIDER!! - p_type=0; - add_class=0; + + // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire] + if (p==0) { + p=rech_tageq(adr,"code"); + if (p) { + if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! + p_type=-1; // juste le nom de fichier+dossier, écire avant codebase + add_class=1; // ajouter .class au besoin + + // vérifier qu'il n'y a pas de codebase APRES + // sinon on swappe les deux. + // pas très propre mais c'est ce qu'il y a de plus simple à faire!! + + { + char *a; + a=adr; + while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++; + if (rech_tageq(a,"codebase")) { // banzai! codebase= + char* b; + b=strchr(a,'>'); + if (b) { + if (((int) (b - adr)) < 1000) { // au total < 1Ko + char tempo[HTS_URLMAXSIZE*2]; + tempo[0]='\0'; + strncatbuff(tempo,a,(int) (b - a) ); + strcatbuff( tempo," "); + strncatbuff(tempo,adr,(int) (a - adr - 1)); + // éventuellement remplire par des espaces pour avoir juste la taille + while((int) strlen(tempo)<((int) (b - adr))) + strcatbuff(tempo," "); + // pas d'erreur? + if ((int) strlen(tempo) == ((int) (b - adr) )) { + strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin! + p=0; // DEVALIDER!! + p_type=0; + add_class=0; + } } } } } + } - } } - } - - // liens à patcher mais pas à charger (ex: codebase) - if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même - p=rech_tageq(adr,"codebase"); - if (p) { - if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! - p_type=-2; - } else p=-1; // ne plus chercher + + // liens à patcher mais pas à charger (ex: codebase) + if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même + p=rech_tageq(adr,"codebase"); + if (p) { + if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet ! + p_type=-2; + } else p=-1; // ne plus chercher + } } - } - - - // Meta tags pour robots - if (p==0) { - if (opt.robots) { - if ((intag_start_valid) && check_tag(intag_start,"meta")) { - if (rech_tageq(adr,"name")) { // name=robots.txt - char tempo[1100]; - char* a; - tempo[0]='\0'; - a=strchr(adr,'>'); + + + // Meta tags pour robots + if (p==0) { + if (opt->robots) { + if ((intag_start_valid) && check_tag(intag_start,"meta")) { + if (rech_tageq(adr,"name")) { // name=robots.txt + char tempo[1100]; + char* a; + tempo[0]='\0'; + a=strchr(adr,'>'); #if DEBUG_ROBOTS - printf("robots.txt meta tag detected\n"); + printf("robots.txt meta tag detected\n"); #endif - if (a) { - if (((int) (a - adr)) < 999 ) { - strncat(tempo,adr,(int) (a - adr)); - if (strstrcase(tempo,"content")) { - if (strstrcase(tempo,"robots")) { - if (strstrcase(tempo,"nofollow")) { + if (a) { + if (((int) (a - adr)) < 999 ) { + strncatbuff(tempo,adr,(int) (a - adr)); + if (strstrcase(tempo,"content")) { + if (strstrcase(tempo,"robots")) { + if (strstrcase(tempo,"nofollow")) { #if DEBUG_ROBOTS - printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil); + printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil); #endif - nofollow=1; // NE PLUS suivre liens dans cette page - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil); - test_flush; + nofollow=1; // NE PLUS suivre liens dans cette page + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil); + test_flush; + } } } } @@ -674,28 +1073,46 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } } } - } - - // entrée dans une applet javascript - /*if (!inscript) { // sinon on est dans un obj.write(".. - if (p==0) - if (rech_sampletag(adr,"script")) - if (check_tag(intag_start,"script")) { - inscript=1; - } + + // entrée dans une applet javascript + /*if (!inscript) { // sinon on est dans un obj.write(".. + if (p==0) + if (rech_sampletag(adr,"script")) + if (check_tag(intag_start,"script")) { + inscript=1; + } }*/ - - // Ici on procède à une analyse du code javascript pour tenter de récupérer - // certains fichiers évidents. - // C'est devenu obligatoire vu le nombre de pages qui intègrent - // des images réactives par exemple + + // Ici on procède à une analyse du code javascript pour tenter de récupérer + // certains fichiers évidents. + // C'est devenu obligatoire vu le nombre de pages qui intègrent + // des images réactives par exemple } } else if (inscript) { + +#if 0 + /* Check // javascript comments */ + if (*adr == 10 || *adr == 13) { + inscript_check_comments = 1; + inscript_in_comments = 0; + } + else if (inscript_check_comments) { + if (!is_realspace(*adr)) { + inscript_check_comments = 0; + if (adr[0] == '/' && adr[1] == '/') { + inscript_in_comments = 1; + } + } + } +#endif + + /* Parse */ + assertf(inscript_name != NULL); if ( ( - (strfield(adr,"/script")) + (strfield(adr,"/script") && strfield(inscript_name, "script")) || - (strfield(adr,"/style")) + (strfield(adr,"/style") && strfield(inscript_name, "style")) ) ) { char* a=adr; @@ -705,26 +1122,29 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (*a=='<') { // sûr que c'est un tag? inscript=0; } - } else { - /* - Script Analyzing - different types supported: - foo="url" - foo("url") or foo(url) - foo "url" + } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) { + /* + Script Analyzing - different types supported: + foo="url" + foo("url") or foo(url) + foo "url" */ int nc; char expected = '='; // caractère attendu après char* expected_end = ";"; int can_avoid_quotes=0; char quotes_replacement='\0'; + int ensure_not_mime=0; if (inscript_tag) expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'" nc = strfield(adr,".src"); // nom.src="image"; if (!nc) nc = strfield(adr,".location"); // document.location="doc" + if (!nc) nc = strfield(adr,":location"); // javascript:location="doc" if (!nc) nc = strfield(adr,".href"); // document.location="doc" if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",.. expected='('; // parenthèse expected_end="),"; // fin: virgule ou parenthèse + ensure_not_mime=1; //* ensure the url is not a mime type */ } if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url") expected='('; // parenthèse @@ -734,7 +1154,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { expected='('; // parenthèse expected_end=")"; // fin: parenthèse } - if (!nc) if ( (nc = strfield(adr,"url")) ) { // url(url) + if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) + && *(adr - 1) != '_' + ) { // url(url) expected='('; // parenthèse expected_end=")"; // fin: parenthèse can_avoid_quotes=1; @@ -773,15 +1195,27 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) { c-=(ndelim+1); if ((int) (c - a + 1)) { - if ((opt.debug>1) && (opt.log!=NULL)) { - char str[512]; - str[0]='\0'; - strncat(str,a,minimum((int) (c - a + 1),32)); - fspc(opt.log,"debug"); fprintf(opt.log,"link detected in javascript: %s"LF,str); test_flush; + if (ensure_not_mime) { + int i = 0; + while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') { + int p; + if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') { + a=NULL; + } + i++; + } } - p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER - if (can_avoid_quotes) { - ending_p=quotes_replacement; + if (a != NULL) { + if ((opt->debug>1) && (opt->log!=NULL)) { + char str[512]; + str[0]='\0'; + strncatbuff(str,a,minimum((int) (c - a + 1),32)); + fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush; + } + p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER + if (can_avoid_quotes) { + ending_p=quotes_replacement; + } } } } @@ -808,176 +1242,205 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } else if (isspace((unsigned char)*adr)) { intag_startattr=adr+1; // attribute in tag (for dirty parsing) } - - - // ------------------------------------------------------------ - // dernier recours - parsing "sale" : détection systématique des .gif, etc. - // risque: générer de faux fichiers parazites - // fix: ne parse plus dans les commentaires - // ------------------------------------------------------------ - if ( (opt.parseall) && (ptr>0) && (!in_media) ) { // option parsing "brut" - int incomment_justquit=0; - if (!is_realspace(*adr)) { - int noparse=0; - - // Gestion des /* */ - if (inscript) { - if (parseall_incomment) { - if ((*adr=='/') && (*(adr-1)=='*')) - parseall_incomment=0; - incomment_justquit=1; // ne pas noter dernier caractère - } else { - if ((*adr=='/') && (*(adr+1)=='*')) - parseall_incomment=1; - } - } else - parseall_incomment=0; - - /* vérifier que l'on est pas dans un <!-- --> pur */ - if ( (!intag) && (incomment) && (!inscript)) - noparse=1; /* commentaire */ - - // recherche d'URLs - if ((!parseall_incomment) && (!noparse)) { - if (!p) { // non déja trouvé - if (adr != r.adr) { // >1 caractère - // scanner les chaines - if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif' - if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. - char *a=adr; - char stop=*adr; // " ou ' - int count=0; - - // sauter caractères + + + // ------------------------------------------------------------ + // dernier recours - parsing "sale" : détection systématique des .gif, etc. + // risque: générer de faux fichiers parazites + // fix: ne parse plus dans les commentaires + // ------------------------------------------------------------ + if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut" + //int incomment_justquit=0; + if (!is_realspace(*adr)) { + int noparse=0; + + // Gestion des /* */ +#if 0 + if (inscript) { + if (parseall_incomment) { + if ((*adr=='/') && (*(adr-1)=='*')) + parseall_incomment=0; + incomment_justquit=1; // ne pas noter dernier caractère + } else { + if ((*adr=='/') && (*(adr+1)=='*')) + parseall_incomment=1; + } + } else + parseall_incomment=0; +#endif + /* ensure automate state 0 (not in comments, quotes..) */ + if (inscript && ( + inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2 + ) ) { + noparse=1; + } + + /* vérifier que l'on est pas dans un <!-- --> pur */ + if ( (!intag) && (incomment) && (!inscript)) + noparse=1; /* commentaire */ + + // recherche d'URLs + if (!noparse) { + //if ((!parseall_incomment) && (!noparse)) { + if (!p) { // non déja trouvé + if (adr != r->adr) { // >1 caractère + // scanner les chaines + if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif' + if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. (handles comments) + char *a=adr; + char stop=*adr; // " ou ' + int count=0; + + // sauter caractères + a++; + // copier + while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; } + + // ok chaine terminée par " ou ' + if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) { + char c; + char* aend; + // + aend=a; // sauver début a++; - // copier - while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; } - - // ok chaine terminée par " ou ' - if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) { - char c; - char* aend; + while(is_taborspace(*a)) a++; + c=*a; + if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif"; + // le / est pour funct("img.gif" /* URL */); + char tempo[HTS_URLMAXSIZE*2]; + char type[256]; + int url_ok=0; // url valide? + tempo[0]='\0'; type[0]='\0'; // - aend=a; // sauver début - a++; - while(is_taborspace(*a)) a++; - c=*a; - if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif"; - // le / est pour funct("img.gif" /* URL */); - char tempo[HTS_URLMAXSIZE*2]; - char type[256]; - int url_ok=0; // url valide? - tempo[0]='\0'; type[0]='\0'; - // - strncat(tempo,adr+1,count); - // - if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript) - int invalid_url=0; - - // escape - unescape_amp(tempo); - - // Couper au # ou ? éventuel - { - char* a=strchr(tempo,'#'); - if (a) - *a='\0'; - a=strchr(tempo,'?'); - if (a) - *a='\0'; - } - - // vérifier qu'il n'y a pas de caractères spéciaux - if (!strnotempty(tempo)) - invalid_url=1; - else if (strchr(tempo,'*') - || strchr(tempo,'<') - || strchr(tempo,'>')) - invalid_url=1; - - /* non invalide? */ - if (!invalid_url) { - // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag) - if (c!='+') { // PAS de plus à la fin - char* a; - // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) - //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème - if ( - (strfield(tempo,"http:")) - || (strfield(tempo,"ftp:")) + strncatbuff(tempo,adr+1,count); + // + if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript) + int invalid_url=0; + + // escape + unescape_amp(tempo); + + // Couper au # ou ? éventuel + { + char* a=strchr(tempo,'#'); + if (a) + *a='\0'; + a=strchr(tempo,'?'); + if (a) + *a='\0'; + } + + // vérifier qu'il n'y a pas de caractères spéciaux + if (!strnotempty(tempo)) + invalid_url=1; + else if (strchr(tempo,'*') + || strchr(tempo,'<') + || strchr(tempo,'>') + || strchr(tempo,',') /* list of files ? */ + || strchr(tempo,'\"') /* potential parsing bug */ + || strchr(tempo,'\'') /* potential parsing bug */ + ) + invalid_url=1; + else if (tempo[0] == '.' && isalnum(tempo[1])) // ".gif" + invalid_url=1; + + /* non invalide? */ + if (!invalid_url) { + // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag) + if (c!='+') { // PAS de plus à la fin +#if 0 + char* a; +#endif + // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) + //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème + if ( + (strfield(tempo,"http:")) + || (strfield(tempo,"ftp:")) #if HTS_USEOPENSSL - || (strfield(tempo,"https:")) + || ( + SSL_is_available && + (strfield(tempo,"https:")) + ) #endif - ) // ok pas de problème + ) // ok pas de problème + url_ok=1; + else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok.. + if (inscript) // sinon si pas javascript, méfiance (répertoire style base?) url_ok=1; - else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok.. - if (inscript) // sinon si pas javascript, méfiance (répertoire style base?) + } +#if 0 + else if ((a=strchr(tempo,'/'))) { // un slash: ok.. + if (inscript) { // sinon si pas javascript, méfiance (style "text/css") + if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css') + if (!strchr(tempo,' ')) // avoid spaces (too dangerous for comments) url_ok=1; - } else if ((a=strchr(tempo,'/'))) { // un slash: ok.. - if (inscript) { // sinon si pas javascript, méfiance (style "text/css") - if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css') - url_ok=1; - } } } - // Prendre si extension reconnue - if (!url_ok) { - get_httptype(type,tempo,0); - if (strnotempty(type)) // type reconnu! - url_ok=1; - else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp.. - url_ok=1; - // MAIS pas les foobar@aol.com !! - if (strchr(tempo,'@')) - url_ok=0; - } - // - // Ok, cela pourrait être une URL - if (url_ok) { - - // Check if not fodbidden tag (id,name..) - if (intag_start_valid) { - if (intag_start) - if (intag_startattr) - if (intag) - if (!inscript) - if (!incomment) { - int i=0,nop=0; - while( (nop==0) && (strnotempty(hts_nodetect[i])) ) { - nop=rech_tageq(intag_startattr,hts_nodetect[i]); - i++; - } - // Forbidden tag - if (nop) { - url_ok=0; - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush; - } +#endif + } + // Prendre si extension reconnue + if (!url_ok) { + get_httptype(type,tempo,0); + if (strnotempty(type)) // type reconnu! + url_ok=1; + else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp.. + url_ok=1; + // MAIS pas les foobar@aol.com !! + if (strchr(tempo,'@')) + url_ok=0; + } + // + // Ok, cela pourrait être une URL + if (url_ok) { + + // Check if not fodbidden tag (id,name..) + if (intag_start_valid) { + if (intag_start) + if (intag_startattr) + if (intag) + if (!inscript) + if (!incomment) { + int i=0,nop=0; + while( (nop==0) && (strnotempty(hts_nodetect[i])) ) { + nop=rech_tageq(intag_startattr,hts_nodetect[i]); + i++; + } + // Forbidden tag + if (nop) { + url_ok=0; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush; } } - } - - - // Accepter URL, on la traitera comme une URL normale!! - if (url_ok) - p=1; - + } + } + + + // Accepter URL, on la traitera comme une URL normale!! + if (url_ok) { + valid_p = 1; + p = 0; } + } } } + } } } } } - } // p == 0 - - // plus dans un commentaire - if (!incomment_justquit) - parseall_lastc=*adr; // caractère avant le prochain + } // p == 0 } // not in comment + // plus dans un commentaire + if ( inscript_state_pos == INSCRIPT_START + && inscript_state_pos_prev == INSCRIPT_START) { + parseall_lastc=*adr; // caractère avant le prochain + } + + } // if realspace } // if parseall @@ -992,14 +1455,16 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { char* quote_adr=NULL; /* adresse du ? dans l'adresse */ int ok=1; char quote='\0'; + int quoteinscript=0; + int noquote=0; // si nofollow ou un stop a été déclenché, réécrire tous les liens en externe - if ((nofollow) || (opt.state.stop)) + if ((nofollow) || (opt->state.stop)) p_nocatch=1; - + // écrire codebase avant, flusher avant code if ((p_type==-1) || (p_type==-2)) { - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { HT_ADD_ADR; // refresh } lastsaved=adr; // dernier écrit+1 @@ -1007,20 +1472,31 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // sauter espaces adr+=p; - while((is_space(*adr)) && (quote=='\0')) { + while( ( is_space(*adr) || ( + inscriptgen + && adr[0] == '\\' + && is_space(adr[1]) + ) + ) + && quote == '\0' + ) { if (!quote) - if ((*adr=='\"') || (*adr=='\'')) + if ((*adr=='\"') || (*adr=='\'')) { quote=*adr; // on doit attendre cela à la fin - // puis quitter + if (inscriptgen && *(adr - 1) == '\\') { + quoteinscript=1; /* will wait for \" */ + } + } + // puis quitter adr++; // sauter les espaces, "" et cie } - + /* Stop at \n (LF) if primary links*/ if (ptr == 0) quote='\n'; /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */ else if (inscript) - quote='\0'; + noquote=1; // sauter éventuel \" ou \' javascript if (inscript) { // on est dans un obj.write(".. @@ -1035,7 +1511,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (p_searchMETAURL) { int l=0; while( - (adr + l + 4 < r.adr + r.size) + (adr + l + 4 < r->adr + r->size) && (!strfield(adr+l,"URL=")) && (l<128) ) l++; if (!strfield(adr+l,"URL=")) @@ -1043,15 +1519,19 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { else adr+=(l+4); } - + /* éviter les javascript:document.location=.. : les parser, plutôt */ if (ok!=-1) { - if (strfield(adr,"javascript:")) { + if (strfield(adr,"javascript:") + && ! inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */ + ) { ok=-1; /* On est désormais dans du code javascript */ + inscript_name=""; inscript_tag=inscript=1; + inscript_state_pos=INSCRIPT_START; inscript_tag_lastc=quote; /* à attendre à la fin */ } } @@ -1065,7 +1545,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // ne pas flusher après code si on doit écrire le codebase avant! if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) { - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { HT_ADD_ADR; // refresh } lastsaved=adr; // dernier écrit+1 @@ -1086,7 +1566,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (ok > 0) { //if (*eadr!=' ') { if (is_space(*eadr)) { // guillemets,CR, etc - if ((!quote) || (*eadr==quote)) // si pas d'attente de quote spéciale ou si quote atteinte + if ( + ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') ) // end quote + || ( noquote && (*eadr == '\"' || *eadr == '\'') ) // end at any quote + || (!noquote && quote == '\0' && is_realspace(*eadr) ) // unquoted href + ) // si pas d'attente de quote spéciale ou si quote atteinte ok=0; } else if (ending_p && (*eadr==ending_p)) ok=0; @@ -1114,7 +1598,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { //} } eadr++; - } while(ok==1); + } while(ok==1); // Empty link detected if ( (((int) (eadr - adr))) <= 1) { // link empty @@ -1122,13 +1606,18 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (*adr != '#') { // Not empty+unique # if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr) if (quote) { - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { HT_ADD("#"); // We add this for a <href=""> } } } } } + + // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag + if (strfield(adr, "(Empty Reference!)")) { + ok=-1; // No + } } @@ -1145,7 +1634,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { //printf("link: %s\n",lien); // supprimer les espaces while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0'; - + #if HTS_STRIP_DOUBLE_SLASH // supprimer les // en / (sauf pour http://) @@ -1169,14 +1658,14 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } else { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strncat(tempo,a,(int) p - (int) a); - strcat (tempo,p+1); - strcpy(a,tempo); // recopier + strncatbuff(tempo,a,(int) p - (int) a); + strcatbuff (tempo,p+1); + strcpybuff(a,tempo); // recopier } } } #endif - + } else lien[0]='\0'; // erreur @@ -1194,57 +1683,61 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // 0: autorisé // 1: interdit (patcher tout de même adresse) - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link detected in html: %s"LF,lien); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush; } - + // external check #if HTS_ANALYSTE if (!hts_htmlcheck_linkdetected(lien)) { error=1; // erreur - if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s refused by external wrapper"LF,lien); + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien); test_flush; } } #endif // purger espaces de début et fin, CR,LF résiduels - // (IMG SRC="foo.<\n>gif") + // (IMG SRC="foo.<\n><\t>gif<\t>") { - char* a; - while (is_realspace(lien[0])) { - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strcpy(tempo,lien+1); - strcpy(lien,tempo); - } - while(strnotempty(lien) - && (is_realspace(lien[max(0,(int)(strlen(lien))-1)])) ) { - lien[strlen(lien)-1]='\0'; + char* a = lien; + int llen; + + // strip ending spaces + llen = ( *a != '\0' ) ? strlen(a) : 0; + while(llen > 0 && is_realspace(lien[llen - 1]) ) { + a[--llen]='\0'; } - while ((a=strchr(lien,'\n'))) { - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strncat(tempo,lien,(int) (a - lien)); - strcat(tempo,a+1); - strcpy(lien,tempo); - } - while ((a=strchr(lien,'\r'))) { - char tempo[HTS_URLMAXSIZE*2]; - tempo[0]='\0'; - strncat(tempo,lien,(int) (a - lien)); - strcat(tempo,a+1); - strcpy(lien,tempo); + // skip leading ones + while(is_realspace(*a)) a++; + // strip cr, lf, tab inside URL + llen = 0; + while(*a) { + if (*a != '\n' && *a != '\r' && *a != '\t') { + lien[llen++] = *a; + } + a++; } + lien[llen] = '\0'; } + + // commas are forbidden + if (archivetag_p) { + if (strchr(lien, ',')) { + error=1; // erreur + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush; + } + } + } /* Unescape/escape %20 and other */ { char query[HTS_URLMAXSIZE*2]; char* a=strchr(lien,'?'); if (a) { - strcpy(query,a); + strcpybuff(query,a); *a='\0'; } else query[0]='\0'; @@ -1252,10 +1745,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { unescape_amp(lien); unescape_amp(query); // décoder l'inutile (%2E par exemple) et coder espaces - // XXXXXXXXXXXXXXXXX strcpy(lien,unescape_http(lien)); - strcpy(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1)); + // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien)); + strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1)); + escape_remove_control(lien); escape_spc_url(lien); - strcat(lien,query); /* restore */ + strcatbuff(lien,query); /* restore */ } // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance! @@ -1267,11 +1761,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // supprimer le(s) ./ while ((lien[0]=='.') && (lien[1]=='/')) { char tempo[HTS_URLMAXSIZE*2]; - strcpy(tempo,lien+2); - strcpy(lien,tempo); + strcpybuff(tempo,lien+2); + strcpybuff(lien,tempo); } if (strnotempty(lien)==0) // sauf si plus de nom de fichier - strcpy(lien,"./"); + strcpybuff(lien,"./"); // vérifie les /~machin -> /~machin/ // supposition dangereuse? @@ -1282,7 +1776,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // éviter aussi index~1.html while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--; if (*a=='~') { - strcat(lien,"/"); // ajouter slash + strcatbuff(lien,"/"); // ajouter slash } } #endif @@ -1305,7 +1799,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } while((b != a) && (b)); } } - + // éliminer les éventuels :80 (port par défaut!) if (link_has_authority(lien)) { char * a; @@ -1329,9 +1823,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (port==defport) { // port 80, default - c'est débile char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strncat(tempo,lien,(int) (a - lien)); - strcat(tempo,a+3); // sauter :80 - strcpy(lien,tempo); + strncatbuff(tempo,lien,(int) (a - lien)); + strcatbuff(tempo,a+3); // sauter :80 + strcpybuff(lien,tempo); } } } @@ -1339,9 +1833,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // filtrer les parazites (mailto & cie) /* if (strfield(lien,"mailto:")) { // ne pas traiter - error=1; + error=1; } else if (strfield(lien,"news:")) { // ne pas traiter - error=1; + error=1; } */ @@ -1351,16 +1845,16 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { char *a = lien+strlen(lien)-1; while(( a > lien) && (*a!='/') && (*a!='.')) a--; if (*a != '.') - strcat(lien,".class"); // ajouter .class + strcatbuff(lien,".class"); // ajouter .class else if (!strfield2(a,".class")) - strcat(lien,".class"); // idem + strcatbuff(lien,".class"); // idem } } // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/) if (!error) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"position link check %s"LF,lien); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush; } if ((p_type==2) || (p_type==-2)) { // code ou codebase @@ -1368,14 +1862,24 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (p_type==-2) { // codebase if (strnotempty(lien)) { if (fil[strlen(lien)-1]!='/') { // pas répertoire - strcat(lien,"/"); + strcatbuff(lien,"/"); } } } + + /* base has always authority */ + if (p_type==2 && !link_has_authority(lien)) { + char tmp[HTS_URLMAXSIZE*2]; + strcpybuff(tmp, "http://"); + strcatbuff(tmp, lien); + strcpybuff(lien, tmp); + } + /* only one ending / (bug on some pages) */ if ((int)strlen(lien)>2) { - while( (lien[strlen(lien)-2]=='/') && ((int)strlen(lien)>2) ) /* double // (bug) */ - lien[strlen(lien)-1]='\0'; + int len = (int) strlen(lien); + while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' ) /* double // (bug) */ + lien[--len]='\0'; } // copier nom host si besoin est if (!link_has_authority(lien)) { // pas de http:// @@ -1383,11 +1887,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) { error=1; } else { - strcpy(lien,"http://"); - strcat(lien,adr2); + strcpybuff(lien,"http://"); + strcatbuff(lien,adr2); if (*fil2!='/') - strcat(lien,"/"); - strcat(lien,fil2); + strcatbuff(lien,"/"); + strcatbuff(lien,fil2); { char* a; a=lien+strlen(lien)-1; @@ -1397,12 +1901,12 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } } //char tempo[HTS_URLMAXSIZE*2]; - //strcpy(tempo,"http://"); - //strcat(tempo,urladr); // host + //strcpybuff(tempo,"http://"); + //strcatbuff(tempo,urladr); // host //if (*lien!='/') - // strcat(tempo,"/"); - //strcat(tempo,lien); - //strcpy(lien,tempo); + // strcatbuff(tempo,"/"); + //strcatbuff(tempo,lien); + //strcpybuff(lien,tempo); } } @@ -1421,19 +1925,19 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // stocker base ou codebase? switch(p_type) { case 2: { - //if (*lien!='/') strcat(base,"/"); - strcpy(base,lien); + //if (*lien!='/') strcatbuff(base,"/"); + strcpybuff(base,lien); } break; // base case -2: { - //if (*lien!='/') strcat(codebase,"/"); - strcpy(codebase,lien); + //if (*lien!='/') strcatbuff(codebase,"/"); + strcpybuff(codebase,lien); } break; // base } - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"code/codebase link %s base %s"LF,lien,base); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush; } //printf("base code: %s - %s\n",lien,base); } @@ -1449,438 +1953,463 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // ajouter chemin de base href.. if (strnotempty(_base)) { // considérer base if (!link_has_authority(lien)) { // non absolue - //if (*lien!='/') { // non absolu sur le site (/) - if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) { - // mailto: and co: do NOT add base - if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) { - char tempo[HTS_URLMAXSIZE*2]; - // base est absolue - strcpy(tempo,_base); - strcat(tempo,lien + ((*lien=='/')?1:0) ); - strcpy(lien,tempo); // patcher en considérant base - // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..) - - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link modified with code/codebase %s"LF,lien); test_flush; + if (*lien!='/') { // non absolu sur le site (/) + if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) { + // mailto: and co: do NOT add base + if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) { + char tempo[HTS_URLMAXSIZE*2]; + // base est absolue + strcpybuff(tempo,_base); + strcatbuff(tempo,lien + ((*lien=='/')?1:0) ); + strcpybuff(lien,tempo); // patcher en considérant base + // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..) + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + } + } + } else { + error=1; // erreur + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien); + test_flush; } } } else { - error=1; // erreur - if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s too long with base href"LF,lien); - test_flush; + char badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2]; + if (ident_url_absolute(_base, badr, bfil) >=0 ) { + if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) { + char tempo[HTS_URLMAXSIZE*2]; + // base est absolue + tempo[0] = '\0'; + if (!link_has_authority(badr)) { + strcatbuff(tempo, "http://"); + } + strcatbuff(tempo,badr); + strcatbuff(tempo,lien); + strcpybuff(lien,tempo); // patcher en considérant base + + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + } + } else { + error=1; // erreur + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien); + test_flush; + } + } } } - //} } } } + } + + + // transformer lien quelconque (http, relatif, etc) en une adresse + // et un chemin+fichier (adr,fil) + if (!error) { + int reponse; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; } - - - // transformer lien quelconque (http, relatif, etc) en une adresse - // et un chemin+fichier (adr,fil) - if (!error) { - int reponse; - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"build relative link %s with %s%s"LF,lien,urladr,urlfil); test_flush; - } - if ((reponse=ident_url_relatif(lien,urladr,urlfil,adr,fil))<0) { - adr[0]='\0'; // erreur - if (reponse==-2) { - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s not caught (unknown ftp:// protocol)"LF,lien); - test_flush; - } - } else { - if ((opt.debug>1) && (opt.errlog!=NULL)) { - fspc(opt.errlog,"debug"); fprintf(opt.errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,urladr,urlfil); test_flush; - } + if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) { + adr[0]='\0'; // erreur + if (reponse==-2) { + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien); + test_flush; + } + } else { + if ((opt->debug>1) && (opt->errlog!=NULL)) { + fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; } } } else { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link %s not build, error detected before"LF,lien); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush; } - adr[0]='\0'; } - + } else { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush; + } + adr[0]='\0'; + } + #if HTS_CHECK_STRANGEDIR - // !ATTENTION! - // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin) - // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire - // et un fichier en http A PRIORI : je fais donc un test - // En cas de moved xxx, on recalcule adr et fil, tout simplement - // DEFAUT: test effectué plusieurs fois! à revoir!!! - if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) { - //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) { - if (fil[strlen(fil)-1]!='/') { // pas répertoire - if (ishtml(fil)==-2) { // pas d'extension - char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position - loc[0]='\0'; - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link-check-directory: %s%s"LF,adr,fil); - test_flush; - } - - // tester éventuelle nouvelle position - switch (http_location(adr,fil,loc).statuscode) { - case 200: // ok au final - if (strnotempty(loc)) { // a changé d'adresse - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil); + // !ATTENTION! + // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin) + // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire + // et un fichier en http A PRIORI : je fais donc un test + // En cas de moved xxx, on recalcule adr et fil, tout simplement + // DEFAUT: test effectué plusieurs fois! à revoir!!! + if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) { + //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) { + if (fil[strlen(fil)-1]!='/') { // pas répertoire + if (ishtml(fil)==-2) { // pas d'extension + char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position + loc[0]='\0'; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil); + test_flush; + } + + // tester éventuelle nouvelle position + switch (http_location(adr,fil,loc).statuscode) { + case 200: // ok au final + if (strnotempty(loc)) { // a changé d'adresse + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil); + test_flush; + } + + // recalculer adr et fil! + if (ident_url_absolute(loc,adr,fil)==-1) { + adr[0]='\0'; // cancel + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil); test_flush; } - - // recalculer adr et fil! - if (ident_url_absolute(loc,adr,fil)==-1) { - adr[0]='\0'; // cancel - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link-check-dir: %s%s"LF,adr,fil); - test_flush; - } - } - - } - break; - case -2: case -3: // timeout ou erreur grave - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil); - test_flush; } - break; + } + break; + case -2: case -3: // timeout ou erreur grave + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil); + test_flush; } + break; } - } - } -#endif - - // Le lien doit juste être réécrit, mais ne doit pas générer un lien - // exemple: <FORM ACTION="url_cgi"> - if (p_nocatch) { - forbidden_url=1; // interdire récupération du lien - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link forced external at %s%s"LF,adr,fil); - test_flush; + } + } + } +#endif + + // Le lien doit juste être réécrit, mais ne doit pas générer un lien + // exemple: <FORM ACTION="url_cgi"> + if (p_nocatch) { + forbidden_url=1; // interdire récupération du lien + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil); + test_flush; } - - // Tester si un lien doit être accepté ou refusé (wizard) - // forbidden_url=1 : lien refusé - // forbidden_url=0 : lien accepté - //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? - if ((p_type!=2) && (p_type!=-2)) { // tester autorisations? - if (!p_nocatch) { - if (adr[0]!='\0') { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test at %s%s.."LF,adr,fil); - test_flush; - } - forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens, - adr,fil, - &filters,&filptr,opt.maxfilter, - &robots, - &set_prio_to, - &just_test_it); - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard link test: %d"LF,forbidden_url); - test_flush; - } + } + + // Tester si un lien doit être accepté ou refusé (wizard) + // forbidden_url=1 : lien refusé + // forbidden_url=0 : lien accepté + //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? + if ((p_type!=2) && (p_type!=-2)) { // tester autorisations? + if (!p_nocatch) { + if (adr[0]!='\0') { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil); + test_flush; + } + forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, + adr,fil, + &set_prio_to, + &just_test_it); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url); + test_flush; } } } + } + + // calculer meme_adresse + meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr)); + + + + // Début partie sauvegarde + + // ici on forme le nom du fichier à sauver, et on patche l'URL + if (adr[0]!='\0') { + // savename: simplifier les ../ et autres joyeusetés + char save[HTS_URLMAXSIZE*2]; + int r_sv=0; + // En cas de moved, adresse première + char former_adr[HTS_URLMAXSIZE*2]; + char former_fil[HTS_URLMAXSIZE*2]; + // + save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0'; + // - // calculer meme_adresse - meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr)); - - - - // Début partie sauvegarde - - // ici on forme le nom du fichier à sauver, et on patche l'URL - if (adr[0]!='\0') { - // savename: simplifier les ../ et autres joyeusetés - char save[HTS_URLMAXSIZE*2]; - int r_sv=0; - // En cas de moved, adresse première - char former_adr[HTS_URLMAXSIZE*2]; - char former_fil[HTS_URLMAXSIZE*2]; - // - save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0'; - // - - // nom du chemin à sauver si on doit le calculer - // note: url_savename peut décider de tester le lien si il le trouve - // suspect, et modifier alors adr et fil - // dans ce cas on aura une référence directe au lieu des traditionnels - // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers - // gif sont impliqués par exemple) - if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase - if (forbidden_url!=1) { - char last_adr[HTS_URLMAXSIZE*2]; - last_adr[0]='\0'; - //char last_fil[HTS_URLMAXSIZE*2]=""; - strcpy(last_adr,adr); // ancienne adresse - //strcpy(last_fil,fil); // ancien chemin - r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe); - if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé - - // 2e test si moved - - // Tester si un lien doit être accepté ou refusé (wizard) - // forbidden_url=1 : lien refusé - // forbidden_url=0 : lien accepté - if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? - if (!p_nocatch) { - if (adr[0]!='\0') { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"wizard moved link retest at %s%s.."LF,adr,fil); - test_flush; - } - forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens, - adr,fil, - &filters,&filptr,opt.maxfilter, - &robots, - &set_prio_to, - &just_test_it); - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard moved link retest: %d"LF,forbidden_url); - test_flush; - } + // nom du chemin à sauver si on doit le calculer + // note: url_savename peut décider de tester le lien si il le trouve + // suspect, et modifier alors adr et fil + // dans ce cas on aura une référence directe au lieu des traditionnels + // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers + // gif sont impliqués par exemple) + if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase + if (forbidden_url!=1) { + char last_adr[HTS_URLMAXSIZE*2]; + last_adr[0]='\0'; + //char last_fil[HTS_URLMAXSIZE*2]=""; + strcpybuff(last_adr,adr); // ancienne adresse + //strcpybuff(last_fil,fil); // ancien chemin + r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe); + if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé + + // 2e test si moved + + // Tester si un lien doit être accepté ou refusé (wizard) + // forbidden_url=1 : lien refusé + // forbidden_url=0 : lien accepté + if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations? + if (!p_nocatch) { + if (adr[0]!='\0') { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil); + test_flush; + } + forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, + adr,fil, + &set_prio_to, + &just_test_it); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url); + test_flush; } } } - - //import_done=1; // c'est un import! - meme_adresse=0; // on a changé } - } else { - strcpy(save,""); // dummy + + //import_done=1; // c'est un import! + meme_adresse=0; // on a changé } + } else { + strcpybuff(save,""); // dummy } - if (r_sv!=-1) { // pas d'erreur, on continue - /* log */ - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); - if (forbidden_url!=1) { // le lien va être chargé - if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien - fprintf(opt.log,"Code/Codebase: %s%s"LF,adr,fil); - } else if ((opt.getmode & 4)==0) { - fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save); - } else { - if (!ishtml(fil)) - fprintf(opt.log,"Record after: %s%s -> %s"LF,adr,fil,save); - else - fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save); - } - } else - fprintf(opt.log,"External: %s%s"LF,adr,fil); - test_flush; - } - /* FIN log */ - - // écrire lien - if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter - lastsaved=eadr-1+1; // sauter " - } - /* */ - else if (opt.urlmode==0) { // URL absolue dans tous les cas - if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html - if (!link_has_authority(adr)) { - HT_ADD("http://"); - } else { - char* aut = strstr(adr, "//"); - if (aut) { - char tmp[256]; - tmp[0]='\0'; - strncat(tmp, adr, (int) (aut - adr)); // scheme - HT_ADD(tmp); // Protocol - HT_ADD("//"); - } - } - - if (!opt.passprivacy) { - HT_ADD(jump_protocol(adr)); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password + } + if (r_sv!=-1) { // pas d'erreur, on continue + /* log */ + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); + if (forbidden_url!=1) { // le lien va être chargé + if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien + fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil); + } else if ((opt->getmode & 4)==0) { + fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); + } else { + if (!ishtml(fil)) + fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save); + else + fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); + } + } else + fprintf(opt->log,"External: %s%s"LF,adr,fil); + test_flush; + } + /* FIN log */ + + // écrire lien + if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter + lastsaved=eadr-1+1; // sauter " + } + /* */ + else if (opt->urlmode==0) { // URL absolue dans tous les cas + if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html + if (!link_has_authority(adr)) { + HT_ADD("http://"); + } else { + char* aut = strstr(adr, "//"); + if (aut) { + char tmp[256]; + tmp[0]='\0'; + strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + HT_ADD(tmp); // Protocol + HT_ADD("//"); } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); } - lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) + + if (!opt->passprivacy) { + HT_ADD(jump_protocol(adr)); // Password + } else { + HT_ADD(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD(fil); + } + lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) /* */ - } else if (opt.urlmode >= 4) { // ne rien faire dans tous les cas! + } else if (opt->urlmode >= 4) { // ne rien faire dans tous les cas! /* */ /* leave the link 'as is' */ /* Sinon, dépend de interne/externe */ - } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe! - if ((opt.getmode & 1) && (ptr>0)) { - if (p_type!=-1) { // pas que le nom de fichier (pas classe java) - if (!opt.external) { - if (!link_has_authority(adr)) { - HT_ADD("http://"); - if (!opt.passprivacy) { - HT_ADD(adr); // Password + } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe! + if ((opt->getmode & 1) && (ptr>0)) { + if (p_type!=-1) { // pas que le nom de fichier (pas classe java) + if (!opt->external) { + if (!link_has_authority(adr)) { + HT_ADD("http://"); + if (!opt->passprivacy) { + HT_ADD(adr); // Password + } else { + HT_ADD(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD(fil); + } else { + char* aut = strstr(adr, "//"); + if (aut) { + char tmp[256]; + tmp[0]='\0'; + strncatbuff(tmp, adr, (int) (aut - adr)); // scheme + HT_ADD(tmp); // Protocol + HT_ADD("//"); + if (!opt->passprivacy) { + HT_ADD(jump_protocol(adr)); // Password } else { HT_ADD(jump_identification(adr)); // No Password } if (*fil!='/') HT_ADD("/"); HT_ADD(fil); - } else { - char* aut = strstr(adr, "//"); - if (aut) { - char tmp[256]; - tmp[0]='\0'; - strncat(tmp, adr, (int) (aut - adr)); // scheme - HT_ADD(tmp); // Protocol - HT_ADD("//"); - if (!opt.passprivacy) { - HT_ADD(jump_protocol(adr)); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); - } } - // - } else { // fichier/page externe, mais on veut générer une erreur - // - int patch_it=0; - int add_url=0; - char* cat_name=NULL; - char* cat_data=NULL; - int cat_nb=0; - int cat_data_len=0; - - // ajouter lien external - switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) { - case 1: case -2: // html ou répertoire - if (opt.getmode & 1) { // sauver html - patch_it=1; // redirect - add_url=1; // avec link? - cat_name="external.html"; - cat_nb=0; - cat_data=HTS_DATA_UNKNOWN_HTML; - cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN; - } - break; - default: // inconnu - // asp, cgi.. - if (is_dyntype(get_ext(fil))) { - patch_it=1; // redirect - add_url=1; // avec link? - cat_name="external.html"; - cat_nb=0; - cat_data=HTS_DATA_UNKNOWN_HTML; - cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN; - } else if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif")) - || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg")) - || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) - || (ishtml(fil)!=0) ) { - patch_it=1; // redirect - add_url=1; // avec link aussi - cat_name="external.gif"; - cat_nb=1; - cat_data=HTS_DATA_UNKNOWN_GIF; - cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN; - } - break; - }// html,gif - - if (patch_it) { - char save[HTS_URLMAXSIZE*2]; - char tempo[HTS_URLMAXSIZE*2]; - strcpy(save,opt.path_html); - strcat(save,cat_name); - if (lienrelatif(tempo,save,savename)==0) { - if (!no_esc_utf) - escape_uri(tempo); // escape with %xx - else - escape_uri_utf(tempo); // escape with %xx - HT_ADD(tempo); // page externe - if (add_url) { - HT_ADD("?link="); // page externe - - // same as above - if (!link_has_authority(adr)) { - HT_ADD("http://"); - if (!opt.passprivacy) { - HT_ADD(adr); // Password + } + // + } else { // fichier/page externe, mais on veut générer une erreur + // + int patch_it=0; + int add_url=0; + char* cat_name=NULL; + char* cat_data=NULL; + int cat_nb=0; + int cat_data_len=0; + + // ajouter lien external + switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) { + case 1: case -2: // html ou répertoire + if (opt->getmode & 1) { // sauver html + patch_it=1; // redirect + add_url=1; // avec link? + cat_name="external.html"; + cat_nb=0; + cat_data=HTS_DATA_UNKNOWN_HTML; + cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN; + } + break; + default: // inconnu + // asp, cgi.. + if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif")) + || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg")) + || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) + /*|| (ishtml(fil)!=0)*/ ) { + patch_it=1; // redirect + add_url=1; // avec link aussi + cat_name="external.gif"; + cat_nb=1; + cat_data=HTS_DATA_UNKNOWN_GIF; + cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN; + } else /* if (is_dyntype(get_ext(fil))) */ { + patch_it=1; // redirect + add_url=1; // avec link? + cat_name="external.html"; + cat_nb=0; + cat_data=HTS_DATA_UNKNOWN_HTML; + cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN; + } + break; + }// html,gif + + if (patch_it) { + char save[HTS_URLMAXSIZE*2]; + char tempo[HTS_URLMAXSIZE*2]; + strcpybuff(save,opt->path_html); + strcatbuff(save,cat_name); + if (lienrelatif(tempo,save, relativesavename)==0) { + if (!no_esc_utf) + escape_uri(tempo); // escape with %xx + else + escape_uri_utf(tempo); // escape with %xx + HT_ADD(tempo); // page externe + if (add_url) { + HT_ADD("?link="); // page externe + + // same as above + if (!link_has_authority(adr)) { + HT_ADD("http://"); + if (!opt->passprivacy) { + HT_ADD(adr); // Password + } else { + HT_ADD(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD(fil); + } else { + char* aut = strstr(adr, "//"); + if (aut) { + char tmp[256]; + tmp[0]='\0'; + strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme + HT_ADD(tmp); + if (!opt->passprivacy) { + HT_ADD(jump_protocol(adr)); // Password } else { HT_ADD(jump_identification(adr)); // No Password } if (*fil!='/') HT_ADD("/"); HT_ADD(fil); - } else { - char* aut = strstr(adr, "//"); - if (aut) { - char tmp[256]; - tmp[0]='\0'; - strncat(tmp, adr, (int) (aut - adr) + 2); // scheme - HT_ADD(tmp); - if (!opt.passprivacy) { - HT_ADD(jump_protocol(adr)); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); - } } - // - } + // + } - - // écrire fichier? - if (verif_external(cat_nb,1)) { - //if (!fexist(fconcat(opt.path_html,cat_name))) { - FILE* fp = filecreate(fconcat(opt.path_html,cat_name)); - if (fp) { - if (cat_data_len==0) { // texte - verif_backblue(opt.path_html); - fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data); - } else { // data - fwrite(cat_data,cat_data_len,1,fp); - } - fclose(fp); - usercommand(0,NULL,fconcat(opt.path_html,cat_name)); + } + + // écrire fichier? + if (verif_external(cat_nb,1)) { + //if (!fexist(fconcat(opt->path_html,cat_name))) { + FILE* fp = filecreate(fconcat(opt->path_html,cat_name)); + if (fp) { + if (cat_data_len==0) { // texte + verif_backblue(opt,opt->path_html); + fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data); + } else { // data + fwrite(cat_data,cat_data_len,1,fp); } + fclose(fp); + usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"",""); } - } else { // écrire normalement le nom de fichier - HT_ADD("http://"); - if (!opt.passprivacy) { - HT_ADD(adr); // Password - } else { - HT_ADD(jump_identification(adr)); // No Password - } - if (*fil!='/') - HT_ADD("/"); - HT_ADD(fil); - }// patcher? + } + } else { // écrire normalement le nom de fichier + HT_ADD("http://"); + if (!opt->passprivacy) { + HT_ADD(adr); // Password + } else { + HT_ADD(jump_identification(adr)); // No Password + } + if (*fil!='/') + HT_ADD("/"); + HT_ADD(fil); + }// patcher? } // external } else { // que le nom de fichier (classe java) // en gros recopie de plus bas: copier codebase et base if (p_flush) { char tempo[HTS_URLMAXSIZE*2]; // <-- ajouté char tempo_pat[HTS_URLMAXSIZE*2]; - + // Calculer chemin tempo_pat[0]='\0'; - strcpy(tempo,fil); // <-- ajouté + strcpybuff(tempo,fil); // <-- ajouté { char* a=strrchr(tempo,'/'); - + // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class" // we have to do the contrary now if (add_class_dots_to_patch>0) { @@ -1891,30 +2420,30 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } // if add_class_dots_to_patch, this is because there is a problem!! if (add_class_dots_to_patch) { - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo); + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo); test_flush; } } } - + // Cut path/filename if (a) { char tempo2[HTS_URLMAXSIZE*2]; - strcpy(tempo2,a+1); // FICHIER - strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin - strcpy(tempo,tempo2); // fichier + strcpybuff(tempo2,a+1); // FICHIER + strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin + strcpybuff(tempo,tempo2); // fichier } } // érire codebase="chemin" - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { char tempo4[HTS_URLMAXSIZE*2]; tempo4[0]='\0'; if (strnotempty(tempo_pat)) { HT_ADD("codebase=\"http://"); - if (!opt.passprivacy) { + if (!opt->passprivacy) { HT_ADD(adr); // Password } else { HT_ADD(jump_identification(adr)); // No Password @@ -1924,7 +2453,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { HT_ADD("\" "); } - strncat(tempo4,lastsaved,(int) (p_flush - lastsaved)); + strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved)); HT_ADD(tempo4); // refresh code=" HT_ADD(tempo); } @@ -1934,9 +2463,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { lastsaved=eadr-1; } /* - else if (opt.urlmode==1) { // ABSOLU, c'est le cas le moins courant + else if (opt->urlmode==1) { // ABSOLU, c'est le cas le moins courant // NE FONCTIONNE PAS!! (et est inutile) - if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html + if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html // écrire le lien modifié, absolu HT_ADD("file:"); if (*save=='/') @@ -1947,24 +2476,34 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } */ - else if (opt.urlmode==3) { // URI absolue / - if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html - HT_ADD(fil); - } - lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) + else if (opt->mimehtml) { + char buff[HTS_URLMAXSIZE*3]; + HT_ADD("cid:"); + strcpybuff(buff, adr); + strcatbuff(buff, fil); + escape_in_url(buff); + { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } + HT_ADD(buff); + lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) + } + else if (opt->urlmode==3) { // URI absolue / + if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html + HT_ADD(fil); } - else if (opt.urlmode==2) { // RELATIF + lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) + } + else if (opt->urlmode==2) { // RELATIF char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; // calculer le lien relatif - if (lienrelatif(tempo,save,savename)==0) { + if (lienrelatif(tempo,save,relativesavename)==0) { if (!no_esc_utf) escape_uri(tempo); // escape with %xx else escape_uri_utf(tempo); // escape with %xx - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo); test_flush; } @@ -1976,7 +2515,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { tempo_pat[0]='\0'; { char* a=strrchr(tempo,'/'); - + // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class" // we have to do the contrary now if (add_class_dots_to_patch>0) { @@ -1987,23 +2526,23 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } // if add_class_dots_to_patch, this is because there is a problem!! if (add_class_dots_to_patch) { - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo); + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo); test_flush; } } } - + if (a) { char tempo2[HTS_URLMAXSIZE*2]; - strcpy(tempo2,a+1); - strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin - strcpy(tempo,tempo2); // fichier + strcpybuff(tempo2,a+1); + strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin + strcpybuff(tempo,tempo2); // fichier } } // érire codebase="chemin" - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { char tempo4[HTS_URLMAXSIZE*2]; tempo4[0]='\0'; @@ -2013,20 +2552,20 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { HT_ADD("\" "); } - strncat(tempo4,lastsaved,(int) (p_flush - lastsaved)); + strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved)); HT_ADD(tempo4); // refresh code=" } } //lastsaved=adr; // dernier écrit+1 } - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { // écrire le lien modifié, relatif HT_ADD(tempo); - + // Add query-string, for informational purpose only // Useless, because all parameters-pages are saved into different targets - if (opt.includequery) { + if (opt->includequery) { char* a=strchr(lien,'?'); if (a) { HT_ADD(a); @@ -2035,8 +2574,8 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } else { - if (opt.errlog) { - fprintf(opt.errlog,"Error building relative link %s and %s"LF,save,savename); + if (opt->errlog) { + fprintf(opt->errlog,"Error building relative link %s and %s"LF,save,relativesavename); test_flush; } } @@ -2046,9 +2585,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { #if 0 if (fexist(save)) { // le fichier existe.. adr[0]='\0'; - //if ((opt.debug>0) && (opt.log!=NULL)) { - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link has already been written on disk, cancelled: %s"LF,save); + //if ((opt->debug>0) && (opt->log!=NULL)) { + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link has already been written on disk, cancelled: %s"LF,save); test_flush; } } @@ -2057,30 +2596,30 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { /* Security check */ if (strlen(save) >= HTS_URLMAXSIZE) { adr[0]='\0'; - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link is too long: %s"LF,save); + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link is too long: %s"LF,save); test_flush; } } - - if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && ( (forbidden_url!=1) || (just_test_it))) { // si le fichier n'existe pas, ajouter à la liste + + if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) { // si le fichier n'existe pas, ajouter à la liste // n'y a-t-il pas trop de liens? if (lien_tot+1 >= lien_max-4) { // trop de liens! printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max); - fprintf(opt.errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF); + if (opt->errlog) { + fprintf(opt->errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max); + fprintf(opt->errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF); test_flush; } - if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } + if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } XH_uninit; // désallocation mémoire & buffers - return 0; + return -1; } else { // noter le lien sur la listes des liens à charger int pass_fix,dejafait=0; // Calculer la priorité de ce lien - if ((opt.getmode & 4)==0) { // traiter html après + if ((opt->getmode & 4)==0) { // traiter html après pass_fix=0; } else { // vérifier que ce n'est pas un !html if (!ishtml(fil)) @@ -2092,11 +2631,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { /* If the file seems to be an html file, get depth-1 */ /* if (strnotempty(save)) { - if (ishtml(save) == 1) { - // descore_prio = 2; - } else { - // descore_prio = 1; - } + if (ishtml(save) == 1) { + // descore_prio = 2; + } else { + // descore_prio = 1; + } } */ @@ -2107,8 +2646,17 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // On part de la fin et on essaye de se presser (économise temps machine) #if HTS_HASH { - int i=hash_read(&hash,save,"",0); // lecture type 0 (sav) + int i=hash_read(hash,save,"",0,opt->urlhack); // lecture type 0 (sav) if (i>=0) { + if ((opt->debug>1) && (opt->log!=NULL)) { + if ( + strcmp(adr, liens[i]->adr) != 0 + || strcmp(fil, liens[i]->fil) != 0 + ) { + fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil); + test_flush; + } + } liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1); dejafait=1; } @@ -2143,23 +2691,23 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { if (!just_test_it) { if ( (!strfield(adr,"ftp://")) // non ftp - && (!strfield(adr,"file://")) ) { // non file - if (opt.robots) { // récupérer robots + && (!strfield(adr,"file://")) ) { // non file + if (opt->robots) { // récupérer robots if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés - if (checkrobots(&robots,adr,"") != -1) { // robots.txt ? - checkrobots_set(&robots,adr,""); // ajouter entrée vide - if (checkrobots(&robots,adr,"") == -1) { // robots.txt ? + if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ? + checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide + if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ? // enregistrer robots.txt (MACRO) liens_record(adr,"/robots.txt","","",""); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } - if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } + if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } XH_uninit; // désallocation mémoire & buffers - return 0; + return -1; } liens[lien_tot]->testmode=0; // pas mode test liens[lien_tot]->link_import=0; // pas mode import @@ -2172,13 +2720,13 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { #if DEBUG_ROBOTS printf("robots.txt: added file robots.txt for %s\n",adr); #endif - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"robots.txt added at %s"LF,adr); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr); test_flush; } } else { - if (opt.errlog) { - fprintf(opt.errlog,"Unexpected robots.txt error at %d"LF,__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__); test_flush; } } @@ -2193,13 +2741,13 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { liens_record(adr,fil,save,former_adr,former_fil); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } - if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } + if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } XH_uninit; // désallocation mémoire & buffers - return 0; + return -1; } // mode test? @@ -2226,24 +2774,24 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1)); // PRIORITE NULLE (catch page) // noter pass liens[lien_tot]->pass2=pass_fix; - liens[lien_tot]->retry=opt.retry; + liens[lien_tot]->retry=opt->retry; - //strcpy(liens[lien_tot]->adr,adr); - //strcpy(liens[lien_tot]->fil,fil); - //strcpy(liens[lien_tot]->sav,save); - if ((opt.debug>1) && (opt.log!=NULL)) { + //strcpybuff(liens[lien_tot]->adr,adr); + //strcpybuff(liens[lien_tot]->fil,fil); + //strcpybuff(liens[lien_tot]->sav,save); + if ((opt->debug>1) && (opt->log!=NULL)) { if (!just_test_it) { - fspc(opt.log,"debug"); fprintf(opt.log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); + fspc(opt->log,"debug"); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); } else { - fspc(opt.log,"debug"); fprintf(opt.log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil); + fspc(opt->log,"debug"); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil); } test_flush; } lien_tot++; // UN LIEN DE PLUS } else { // if !dejafait - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"link has already been recorded, cancelled: %s"LF,save); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save); test_flush; } @@ -2263,15 +2811,20 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } // if ok==0 adr=eadr-1; // ** sauter - + + /* We skipped bytes and skip the " : reset state */ + if (inscript) { + inscript_state_pos = INSCRIPT_START; + } + } // if (p) } // si '<' ou '>' // plus loin adr++; - - + + /* Otimization: if we are scanning in HTML data (not in tag or script), then jump to the next starting tag */ if (ptr>0) { @@ -2282,18 +2835,23 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { ) { /* Not at the end */ - if (( ((int) (adr - r.adr)) ) < r.size) { + if (( ((int) (adr - r->adr)) ) < r->size) { /* Not on a starting tag yet */ if (*adr != '<') { - char* adr_next = strchr(adr,'<'); + /* strchr does not well behave with null chrs.. */ + /* char* adr_next = strchr(adr,'<'); */ + char* adr_next = adr; + while(*adr_next != '<' && (adr_next - r->adr) < r->size ) { + adr_next++; + } /* Jump to near end (index hack) */ - if (!adr_next) { + if (!adr_next || *adr_next != '<') { if ( - ( (int)(adr - r.adr) < (r.size - 4)) + ( (int)(adr - r->adr) < (r->size - 4)) && - (r.size > 4) + (r->size > 4) ) { - adr = r.adr + r.size - 2; + adr = r->adr + r->size - 2; } } else { adr = adr_next; @@ -2305,20 +2863,30 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { // ---------- // écrire peu à peu - if ((opt.getmode & 1) && (ptr>0)) HT_ADD_ADR; + if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR; lastsaved=adr; // dernier écrit+1 // ---------- - + + // Checks + if (back_add_stats != opt->state.back_add_stats) { + back_add_stats = opt->state.back_add_stats; + + // Check max time + if (!back_checkmirror(opt)) { + adr = r->adr + r->size; + } + } + // pour les stats du shell si parsing trop long #if HTS_ANALYSTE - if (r.size) - _hts_in_html_done=(100 * ((int) (adr - r.adr)) ) / (int)(r.size); + if (r->size) + _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size); if (_hts_in_html_poll) { _hts_in_html_poll=0; // temps à attendre, et remplir autant que l'on peut le cache (backing) - back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); - back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot); - + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + // Transfer rate engine_stats(); @@ -2329,35 +2897,35 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { HTS_STAT.stat_infos=fspc(NULL,"info"); HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); - + if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF); + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); test_flush; } - exit_xh=1; // exit requested + *stre->exit_xh_=1; // exit requested XH_uninit; - return 0; - //adr = r.adr + r.size; // exit + return -1; + //adr = r->adr + r->size; // exit } else if (_hts_cancel==1) { - // adr = r.adr + r.size; // exit + // adr = r->adr + r->size; // exit nofollow=1; // moins violent _hts_cancel=0; } } - + // refresh the backing system each 2 seconds if (engine_stats()) { - back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); - back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot); + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); } #endif - } while(( ((int) (adr - r.adr)) ) < r.size); + } while(( ((int) (adr - r->adr)) ) < r->size); #if HTS_ANALYSTE _hts_in_html_parsing=0; // flag _hts_cancel=0; // pas de cancel #endif - if ((opt.getmode & 1) && (ptr>0)) { + if ((opt->getmode & 1) && (ptr>0)) { HT_ADD_END; // achever } // @@ -2366,12 +2934,1030 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) { } // if !error - if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } // sauver fichier //structcheck(savename); - //filesave(r.adr,r.size,savename); + //filesave(opt,r->adr,r->size,savename); #if HTS_ANALYSTE } // analyse OK #endif + + /* Apply changes */ + ENGINE_SAVE_CONTEXT(); + + return 0; +} + + + + +/* + Check 301, 302, .. statuscodes (moved) +*/ +int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) { + /* Load engine variables */ + ENGINE_LOAD_CONTEXT(); + + // DEBUT rattrapage des 301,302,307.. + // ------------------------------------------------------------ + if (!error) { + ////////{ + // on a chargé un fichier en plus + // if (!error) stat_loaded+=r.size; + + // ------------------------------------------------------------ + // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing + // ------------------------------------------------------------ + if ( (r->statuscode==301) + || (r->statuscode==302) + || (r->statuscode==303) + || (r->statuscode==307) + ) { + //if (r->adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi] + //int i=0; + char *rn=NULL; + // char* p; + + if ( (opt->debug>0) && (opt->errlog!=NULL) ) { + //if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil); + test_flush; + } + + + { + char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2]; + int get_it=0; // ne pas prendre le fichier à la même adresse par défaut + int reponse=0; + mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0'; + // + + strcpybuff(mov_url,r->location); + // url qque -> adresse+fichier + if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) { + int set_prio_to=0; // pas de priotité fixéd par wizard + + //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue + // c'est (en gros) la même URL.. + // si c'est un problème de casse dans le host c'est que le serveur est buggé + // ("RFC says.." : host name IS case insensitive) + if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près + // on tourne en rond + if (strcmp(mov_fil,urlfil)==0) { + error=1; + get_it=-1; // ne rien faire + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil); + test_flush; + } + } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois + get_it=1; + } + } else { // adresse différente + if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible) + // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil); + test_flush; + } + // accepté? + if (hts_acceptlink(opt,ptr,lien_tot,liens, + mov_adr,mov_fil, + &set_prio_to, + NULL) != 1) { /* nouvelle adresse non refusée ? */ + get_it=1; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil); + test_flush; + } + } + } /* sinon traité normalement */ + } + + //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près + if (get_it==1) { + // court-circuiter le reste du traitement + // et reculer pour mieux sauter + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil); + test_flush; + } + // canceller lien actuel + error=1; + strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash) +#if HTS_HASH +#else + liens[ptr]->sav_len=-1; // taille invalide +#endif + // noter NOUVEAU lien + //xxc xxc + // set_prio_to=0+1; // protection if the moved URL is an html page!! + //xxc xxc + { + char mov_sav[HTS_URLMAXSIZE*2]; + // calculer lien et éventuellement modifier addresse/fichier + if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { + if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja + // enregistrer lien (MACRO) avec SAV IDENTIQUE + liens_record(mov_adr,mov_fil,liens[ptr]->sav,"",""); + //liens_record(mov_adr,mov_fil,mov_sav,"",""); + if (liens[lien_tot]!=NULL) { // OK, pas d'erreur + // mode test? + liens[lien_tot]->testmode=liens[ptr]->testmode; + liens[lien_tot]->link_import=0; // mode normal + if (!set_prio_to) + liens[lien_tot]->depth=liens[ptr]->depth; + else + liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page) + liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); + liens[lien_tot]->retry=liens[ptr]->retry; + liens[lien_tot]->premier=liens[ptr]->premier; + liens[lien_tot]->precedent=liens[ptr]->precedent; + lien_tot++; + } else { // oups erreur, plus de mémoire!! + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + XH_uninit; // désallocation mémoire & buffers + return 0; + } + } else { + if ( (opt->debug>0) && (opt->errlog!=NULL) ) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil); + test_flush; + } + } + + } + } + + //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav); + + // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML + // sous DOS ca marche pas très bien... mais comme je suis génial url_savename() + // est à même de régler ce problème + } + } // ident_url_xx + + if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie) + rn=(char*) calloct(8192,1); + if (rn!=NULL) { + if (opt->errlog) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url); + test_flush; + } + if (!opt->mimehtml) { + escape_uri(mov_url); + } else { + char buff[HTS_URLMAXSIZE*3]; + strcpybuff(buff, mov_adr); + strcatbuff(buff, mov_fil); + escape_in_url(buff); + { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } + strcpybuff(mov_url, "cid:"); + strcatbuff(mov_url, buff); + } + // On prépare une page qui sautera immédiatement sur la bonne URL + // Le scanner re-changera, ensuite, cette URL, pour la mirrorer! + strcpybuff(rn,"<HTML>"CRLF); + strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); + strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF); + strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL="); + strcatbuff(rn,mov_url); // URL + strcatbuff(rn,"\">"CRLF); + strcatbuff(rn,"<A HREF=\""); + strcatbuff(rn,mov_url); + strcatbuff(rn,"\">"); + strcatbuff(rn,"<B>Click here...</B></A>"CRLF); + strcatbuff(rn,"</BODY>"CRLF); + strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF); + strcatbuff(rn,"</HTML>"CRLF); + + // changer la page + if (r->adr) { + freet(r->adr); + r->adr=NULL; + } + r->adr=rn; + r->size=strlen(r->adr); + strcpybuff(r->contenttype,"text/html"); + } + } // get_it==0 + + } // bloc + // erreur HTTP (ex: 404, not found) + } else if ( + (r->statuscode==412) + || (r->statuscode==416) + ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier + if (fexist(liens[ptr]->sav)) { + remove(liens[ptr]->sav); // Eliminer + if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..) +#if HDEBUG + printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav); +#endif + if ( (opt->debug>1) && (opt->errlog!=NULL) ) { + //if (opt->errlog) { + fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil); + test_flush; + } + // enregistrer le MEME lien (MACRO) + liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"",""); + if (liens[lien_tot]!=NULL) { // OK, pas d'erreur + liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test? + liens[lien_tot]->link_import=0; // pas mode import + liens[lien_tot]->depth=liens[ptr]->depth; + liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); + liens[lien_tot]->retry=liens[ptr]->retry; + liens[lien_tot]->premier=liens[ptr]->premier; + liens[lien_tot]->precedent=ptr; + lien_tot++; + // + // canceller lien actuel + error=1; + strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash) +#if HTS_HASH +#else + liens[ptr]->sav_len=-1; // taille invalide +#endif + // + } else { // oups erreur, plus de mémoire!! + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + XH_uninit; // désallocation mémoire & buffers + return 0; + } + } else { + if (opt->errlog!=NULL) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Can not remove old file %s"LF,urlfil); + test_flush; + } + } + } else { + if (opt->errlog!=NULL) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil); + test_flush; + } + } + } else if (r->statuscode!=200) { + int can_retry=0; + + // cas où l'on peut reessayer + // -2=timeout -3=rateout (interne à httrack) + switch(r->statuscode) { + //case -1: can_retry=1; break; + case -2: if (opt->hostcontrol) { // timeout et retry épuisés + if ((opt->hostcontrol & 1) && (liens[ptr]->retry<=0)) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; + } + host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr)); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; + } + } else can_retry=1; + } else can_retry=1; + break; + case -3: if ((opt->hostcontrol) && (liens[ptr]->retry<=0)) { // too slow + if (opt->hostcontrol & 2) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; + } + host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr)); + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; + } + } else can_retry=1; + } else can_retry=1; + break; + case -4: // connect closed + can_retry=1; + break; + case -5: // other (non fatal) error + can_retry=1; + break; + case -6: // bad SSL handskake + can_retry=1; + break; + case 408: case 409: case 500: case 502: case 504: can_retry=1; + break; + } + + if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0) + if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible) + if (opt->errlog) { + if ((opt->retry>0) && (can_retry)){ + fspc(opt->errlog,"error"); + fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + } else { + if (r->statuscode==-10) { // test OK + if ((opt->debug>0) && (opt->errlog!=NULL)) { + fspc(opt->errlog,"info"); + fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + } + } else { + if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut + fspc(opt->errlog,"error"); + fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + } else { + if (opt->debug>1) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr); + test_flush; + } + } + } + } + test_flush; + } + + // NO error in trop level + // due to the "no connection -> previous restored" hack + // This prevent the engine from wiping all data if the website has been deleted (or moved) + // since last time (which is quite annoying) + if (liens[ptr]->precedent != 0) { + // ici on teste si on doit enregistrer la page tout de même + if (opt->errpage) { + store_errpage=1; + } + } else { + if (strcmp(urlfil,"/robots.txt") != 0) { + /* + This is an error caused by a link entered by the user + That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.) + If all links entered are invalid, the session failed and we will attempt to restore + the previous one + Example: Try to update a website which has been deleted remotely: this may delete + the website locally, which is really not desired (especially if the website disappeared!) + With this hack, the engine won't wipe local files (how clever) + */ + HTS_STAT.stat_errors_front++; + } + } + + } else { // retry!! + if (opt->debug>0 && opt->errlog != NULL) { // on fera un alert si le retry échoue + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + test_flush; + } + // redemander fichier + liens_record(urladr,urlfil,savename,"",""); + if (liens[lien_tot]!=NULL) { // OK, pas d'erreur + liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test? + liens[lien_tot]->link_import=0; // pas mode import + liens[lien_tot]->depth=liens[ptr]->depth; + liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe); + liens[lien_tot]->retry=liens[ptr]->retry-1; // moins 1 retry! + liens[lien_tot]->premier=liens[ptr]->premier; + liens[lien_tot]->precedent=liens[ptr]->precedent; + lien_tot++; + } else { // oups erreur, plus de mémoire!! + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fspc(opt->errlog,"panic"); + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + XH_uninit; // désallocation mémoire & buffers + return 0; + } + } + } else { + if (opt->errlog) { + if (opt->debug>1) { + fspc(opt->errlog,"info"); + fprintf(opt->errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil); + } + } + } + if (!store_errpage) { + if (r->adr) { // désalloc + freet(r->adr); + r->adr=NULL; + } + error=1; // erreur! + } + } + // FIN rattrapage des 301,302,307.. + // ------------------------------------------------------------ + + } // if !error + + + /* Apply changes */ + ENGINE_SAVE_CONTEXT(); + + return 0; + + +} + + + +/* + Wait for next file and + check 301, 302, .. statuscodes (moved) +*/ +int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) { + /* Load engine variables */ + ENGINE_LOAD_CONTEXT(); + /* */ + int b; + int n; + +#if BDEBUG==1 + printf("\nBack test..\n"); +#endif + + // pause/lock files + { + int do_pause=0; + + // user pause lockfile : create hts-paused.lock --> HTTrack will be paused + if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) { + // remove lockfile + remove(fconcat(opt->path_log,"hts-stop.lock")); + if (!fexist(fconcat(opt->path_log,"hts-stop.lock"))) { + do_pause=1; + } + } + + // after receving N bytes, pause + if (opt->fragment>0) { + if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) { + do_pause=1; + } + } + + // pause? + if (do_pause) { + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: pause requested.."LF); + } + while (back_nsoc(back,back_max)>0) { // attendre fin des transferts + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + Sleep(200); +#if HTS_ANALYSTE + { + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(back,back_max); + HTS_STAT.stat_errors=fspc(NULL,"error"); + HTS_STAT.stat_warnings=fspc(NULL,"warning"); + HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); + + b=0; + if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT) + || !back_checkmirror(opt)) { + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + test_flush; + } + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; + } + } +#endif + } + // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause + // l'utilisateur ferait un rm -r après avoir effectué un tar + // structcheck_init(1); + { + FILE* fp = fopen(fconcat(opt->path_log,"hts-paused.lock"),"wb"); + if (fp) { + fspc(fp,"info"); // dater + fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes); + fclose(fp); + } + } + stat_fragment=HTS_STAT.stat_bytes; + /* Info for wrappers */ + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: pause: %s"LF,fconcat(opt->path_log,"hts-paused.lock")); + } +#if HTS_ANALYSTE + hts_htmlcheck_pause(fconcat(opt->path_log,"hts-paused.lock")); +#else + while (fexist(fconcat(opt->path_log,"hts-paused.lock"))) { + //back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives) + Sleep(1000); + } +#endif + } + // + } + // end of pause/lock files + +#if HTS_ANALYSTE + // changement dans les préférences + /* + if (_hts_setopt) { + copy_htsopt(_hts_setopt,opt); // copier au besoin + _hts_setopt=NULL; // effacer callback + } + */ + if (_hts_addurl) { + char add_adr[HTS_URLMAXSIZE*2]; + char add_fil[HTS_URLMAXSIZE*2]; + while(*_hts_addurl) { + char add_url[HTS_URLMAXSIZE*2]; + add_adr[0]=add_fil[0]=add_url[0]='\0'; + if (!link_has_authority(*_hts_addurl)) + strcpybuff(add_url,"http://"); // ajouter http:// + strcatbuff(add_url,*_hts_addurl); + if (ident_url_absolute(add_url,add_adr,add_fil)>=0) { + // ----Ajout---- + // noter NOUVEAU lien + char add_sav[HTS_URLMAXSIZE*2]; + // calculer lien et éventuellement modifier addresse/fichier + if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { + if (hash_read(hash,add_sav,"",0,0)<0) { // n'existe pas déja + // enregistrer lien (MACRO) + liens_record(add_adr,add_fil,add_sav,"",""); + if (liens[lien_tot]!=NULL) { // OK, pas d'erreur + liens[lien_tot]->testmode=0; // mode test? + liens[lien_tot]->link_import=0; // mode normal + liens[lien_tot]->depth=opt->depth; + liens[lien_tot]->pass2=max(0,numero_passe); + liens[lien_tot]->retry=opt->retry; + liens[lien_tot]->premier=lien_tot; + liens[lien_tot]->precedent=lien_tot; + lien_tot++; + // + if ((opt->debug>0) && (opt->log!=NULL)) { + fspc(opt->log,"info"); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush; + } + // + } else { // oups erreur, plus de mémoire!! + printf("PANIC! : Not enough memory [%d]\n",__LINE__); + if (opt->errlog) { + fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + test_flush; + } + //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } + XH_uninit; // désallocation mémoire & buffers + return 0; + } + } else { + if ( (opt->debug>0) && (opt->errlog!=NULL) ) { + fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil); + test_flush; + } + } + + } + } else { + if (opt->errlog) { + fspc(opt->errlog,"error"); + fprintf(opt->errlog,"Error during URL decoding for %s"LF,add_url); + test_flush; + } + } + // ----Fin Ajout---- + _hts_addurl++; // suivante + } + _hts_addurl=NULL; // libérer _hts_addurl + } + // si une pause a été demandée + if (_hts_setpause) { + // index du lien actuel + int b=back_index(back,back_max,urladr,urlfil,savename); + if (b<0) b=0; // forcer pour les stats + while(_hts_setpause) { // on fait la pause.. + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(back,back_max); + HTS_STAT.stat_errors=fspc(NULL,"error"); + HTS_STAT.stat_warnings=fspc(NULL,"warning"); + HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); + + if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + test_flush; + } + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; + } + if (back_nsoc(back,back_max)==0) + Sleep(250); // tite pause + } + } +#endif + + // si le fichier n'est pas en backing, le mettre.. + if (!back_exist(back,back_max,urladr,urlfil,savename)) { +#if BDEBUG==1 + printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil); +#endif + if (back_add(back,back_max,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) { + printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__); +#if BDEBUG==1 + printf("error while crash adding\n"); +#endif + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil); + test_flush; + } + + } + } + +#if BDEBUG==1 + printf("test number of socks\n"); +#endif + + // ajouter autant de socket qu'on peut ajouter + n=opt->maxsoc-back_nsoc(back,back_max); +#if BDEBUG==1 + printf("%d sockets available for backing\n",n); +#endif + +#if HTS_ANALYSTE + if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter +#else + if (n>0) { // si sockets libre +#endif + // remplir autant que l'on peut le cache (backing) + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + } + + // index du lien actuel + /* + b=back_index(back,back_max,urladr,urlfil,savename); + + if (b>=0) + */ + { + // ------------------------------------------------------------ + // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE + do { + + // index du lien actuel + b=back_index(back,back_max,urladr,urlfil,savename); +#if BDEBUG==1 + printf("back index %d, waiting\n",b); +#endif + // Continue to the loop if link still present + if (b<0) + continue; + + // Receive data + if (back[b].status>0) + back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); + + // Continue to the loop if link still present + b=back_index(back,back_max,urladr,urlfil,savename); + if (b<0) + continue; + + // Stop the mirror + if (!back_checkmirror(opt)) { + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; + } + + // And fill the backing stack + if (back[b].status>0) + back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot); + + // Continue to the loop if link still present + b=back_index(back,back_max,urladr,urlfil,savename); + if (b<0) + continue; + + // autres occupations de HTTrack: statistiques, boucle d'attente, etc. + if ((opt->makestat) || (opt->maketrack)) { + TStamp l=time_local(); + if ((int) (l-makestat_time) >= 60) { + if (makestat_fp != NULL) { + fspc(makestat_fp,"info"); + fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot); + fflush(makestat_fp); + *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV; + *stre->makestat_lnk_=lien_tot; + } + if (stre->maketrack_fp != NULL) { + int i; + fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF); + for(i=0;i<back_max;i++) { + back_info(back,i,3,stre->maketrack_fp); + } + fprintf(stre->maketrack_fp,LF); + + } + makestat_time=l; + } + } +#if HTS_ANALYSTE + { + int i; + { + char* s=hts_cancel_file(""); + if (strnotempty(s)) { // fichier à canceller + for(i=0;i<back_max;i++) { + if ((back[i].status>0)) { + if (strcmp(back[i].url_sav,s)==0) { // ok trouvé + if (back[i].status != 1000) { +#if HTS_DEBUG_CLOSESOCK + DEBUG_W("user cancel: deletehttp\n"); +#endif + if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r); + back[i].r.soc=INVALID_SOCKET; + back[i].r.statuscode=-1; + strcpybuff(back[i].r.msg,"Cancelled by User"); + back[i].status=0; // terminé + } else // cancel ftp.. flag à 1 + back[i].stop_ftp = 1; + } + } + } + s[0]='\0'; + } + } + + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(back,back_max); + HTS_STAT.stat_errors=fspc(NULL,"error"); + HTS_STAT.stat_warnings=fspc(NULL,"warning"); + HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max); + + if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->errlog) { + fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + test_flush; + } + *stre->exit_xh_=1; // exit requested + XH_uninit; + return 0; + } + } + +#endif +#if HTS_POLL + if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) { + TStamp tl; + *stre->info_shell_=1; + + /* Toggle with ENTER */ + if (!opt->quiet) { + if (check_stdin()) { + char com[256]; + linput(stdin,com,200); + if (opt->verbosedisplay==2) + opt->verbosedisplay=1; + else + opt->verbosedisplay=2; + /* Info for wrappers */ + if ( (opt->debug>0) && (opt->log!=NULL) ) { + fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF); + } +#if HTS_ANALYSTE + hts_htmlcheck_chopt(opt); +#endif + } + } + + tl=time_local(); + + // générer un message d'infos sur l'état actuel + if (opt->shell) { // si shell + if ((tl-*stre->last_info_shell_)>0) { // toute les 1 sec + FILE* fp=stdout; + int a=0; + *stre->last_info_shell_=tl; + if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant + // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi) + // (libérons les robots esclaves de l'internet!) + remove(fconcat(opt->path_log,"hts-autopsy")); + fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb"); + a=1; + } + if ((*stre->info_shell_) || a) { + int i,j; + + fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart)); + fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes); + fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); + fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max)); + fprintf(fp,"LINK %d"LF,lien_tot); + { + LLint mem=0; + for(i=0;i<back_max;i++) + if (back[i].r.adr!=NULL) + mem+=back[i].r.size; + fprintf(fp,"INMEM "LLintP""LF,(LLint)mem); + } + for(j=0;j<2;j++) { // passes pour ready et wait + for(i=0;i<back_max;i++) { + back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // ** + } + } + fprintf(fp,LF); + if (a) + fclose(fp); + io_flush; + } + } + } // si shell + + } // si shell ou keyboard (option) + // +#endif + } while((b>=0) && (back[max(b,0)].status>0)); + + + // If link not found on the stack, it's because it has already been downloaded + // in background + // Then, skip it and go to the next one + if (b<0) { + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil); + test_flush; + } + + // prochain lien + // ptr++; + + return 2; // goto jump_if_done; + + } + /* link put in cache by the backing system for memory spare - reclaim */ + else if (back[b].finalized) { + assertf(back[b].r.adr == NULL); + /* read file in cache */ + back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer); + /* ensure correct location buffer set */ + back[b].r.location=back[b].location_buffer; + if (back[b].r.statuscode == -1) { + if (opt->errlog) { + fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil); + test_flush; + } + } else { + if ( (opt->debug>1) && (opt->log!=NULL) ) { + fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush; + } + } + } + + +#if HTS_ANALYSTE==2 +#else + //if (!opt->quiet) { // petite animation + if (!opt->verbosedisplay) { + if (!opt->quiet) { + static int roll=0; /* static: ok */ + roll=(roll+1)%4; + printf("%c\x0d",("/-\\|")[roll]); + fflush(stdout); + } + } else if (opt->verbosedisplay==1) { + if (back[b].r.statuscode==200) + printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size); + else + printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode); + fflush(stdout); + } + //} +#endif + // ------------------------------------------------------------ + // Vérificateur d'intégrité +#if DEBUG_CHECKINT + _CHECKINT(&back[b],"Retour de back_wait, après le while") + { + int i; + for(i=0;i<back_max;i++) { + char si[256]; + sprintf(si,"Test global après back_wait, index %d",i); + _CHECKINT(&back[i],si) + } + } +#endif + + // copier structure réponse htsblk + memcpy(r, &(back[b].r), sizeof(htsblk)); + r->location=stre->loc_; // ne PAS copier location!! adresse, pas de buffer + if (back[b].r.location) + strcpybuff(r->location,back[b].r.location); + back[b].r.adr=NULL; // ne pas faire de desalloc ensuite + + // libérer emplacement backing + back_maydelete(opt,back,b); + + // progression +#if 0 + if (opt->aff_progress) { + TStamp tl=time_local(); + if ((tl-HTS_STAT.stat_timestart)>0) { + char s[32]; + int i=0; + lastime=tl; + _CLRSCR; _GOTOXY("1","1"); + printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart))); + while(i<minimum(back_max,99)) { // ** + if (back[i].status>=0) { // loading.. + s[0]='\0'; + if (strlen(back[i].url_fil)>16) + strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16); + else + strncatbuff(s,back[i].url_fil,16); + printf("%s : ",s); + + printf("["); + if (back[i].r.totalsize>0) { + int p; + int j; + p=(int)((back[i].r.size*10)/back[i].r.totalsize); + p=minimum(10,p); + for(j=0;j<p;j++) printf("*"); + for(j=0;j<(10-p);j++) printf("-"); + } else { + printf(LLintP,(LLint)back[i].r.size); + } + printf("]"); + + //} else if (back[i].status==0) { + // strcpybuff(s,"ENDED"); + } + printf("\n"); + i++; + } + io_flush; + } + } +#endif + + // débug graphique +#if BDEBUG==2 + { + char s[12]; + int i=0; + _GOTOXY(1,1); + printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); + while(i<minimum(back_max,160)) { + if (back[i].status>0) { + sprintf(s,"%d",back[i].r.size); + } else if (back[i].status==0) { + strcpybuff(s,"ENDED"); + } else + strcpybuff(s," - "); + while(strlen(s)<8) strcatbuff(s," "); + printf("%s",s); io_flush; + i++; + } + } +#endif + + +#if BDEBUG==1 + printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg); +#endif + + } + /*else { + #if BDEBUG==1 + printf("back index error\n"); + #endif + } + */ + + + + ENGINE_SAVE_CONTEXT(); + + return 0; + + +} + + diff --git a/src/htsparse.h b/src/htsparse.h new file mode 100644 index 0000000..4efc386 --- /dev/null +++ b/src/htsparse.h @@ -0,0 +1,108 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: htsparse.h parser */ +/* html/javascript/css parser */ +/* and other parser routines */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + + +typedef struct { + /* Main object */ + htsblk* r_; + + /* Error handling */ + int* error_; + int* exit_xh_; + int* store_errpage_; + + /* Structural */ + int* filptr_; + char*** filters_; + robots_wizard* robots_; + hash_struct* hash_; + int* lien_max_; + + /* Base & codebase */ + char* base; + char* codebase; + + /* Index */ + int* makeindex_done_; + FILE** makeindex_fp_; + int* makeindex_links_; + char* makeindex_firstlink_; + + /* Html templates */ + char *template_header_; + char *template_body_; + char *template_footer_; + + /* Specific to downloads */ + LLint* stat_fragment_; + TStamp makestat_time; + FILE* makestat_fp; + LLint* makestat_total_; + int* makestat_lnk_; + FILE* maketrack_fp; + + /* Function-dependant */ + char* loc_; + TStamp* last_info_shell_; + int* info_shell_; + +} htsmoduleStructExtended; + + +/* + Main parser, attempt to scan links inside the html/css/js file + Parameters: The public module structure, and the private module variables +*/ +int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre); + +/* + Check for 301,302.. errors ("moved") and handle them; re-isuue requests, make + rediretc file, handle filters considerations.. + Parameters: The public module structure, and the private module variables + Returns 0 upon success +*/ +int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre); + +/* + Get the next file on the queue, waiting for it, handling other files in background.. + Parameters: The public module structure, and the private module variables + Returns 0 upon success +*/ +int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre); + + diff --git a/src/htsrobots.c b/src/htsrobots.c index 8aabdd4..58e97fb 100644 --- a/src/htsrobots.c +++ b/src/htsrobots.c @@ -79,10 +79,11 @@ int checkrobots(robots_wizard* robots,char* adr,char* fil) { return 0; } int checkrobots_set(robots_wizard* robots,char* adr,char* data) { - if (((int) strlen(data)) > 999) return 0; + if (((int) strlen(adr)) >= sizeof(robots->adr) - 2) return 0; + if (((int) strlen(data)) >= sizeof(robots->token) - 2) return 0; while(robots) { if (strfield2(robots->adr,adr)) { // entrée existe - strcpy(robots->token,data); + strcpybuff(robots->token,data); #if DEBUG_ROBOTS printf("robots.txt: set %s to %s\n",adr,data); #endif @@ -92,8 +93,8 @@ int checkrobots_set(robots_wizard* robots,char* adr,char* data) { robots->next=(robots_wizard*) calloct(1,sizeof(robots_wizard)); if (robots->next) { robots->next->next=NULL; - strcpy(robots->next->adr,adr); - strcpy(robots->next->token,data); + strcpybuff(robots->next->adr,adr); + strcpybuff(robots->next->token,data); #if DEBUG_ROBOTS printf("robots.txt: new set %s to %s\n",adr,data); #endif diff --git a/src/htsrobots.h b/src/htsrobots.h index 62b9689..ef08183 100644 --- a/src/htsrobots.h +++ b/src/htsrobots.h @@ -41,8 +41,8 @@ Please visit our Website: http://www.httrack.com // robots wizard typedef struct robots_wizard { - char adr[1024]; - char token[1024]; + char adr[128]; + char token[4096]; struct robots_wizard* next; } robots_wizard; diff --git a/src/htsserver.c b/src/htsserver.c new file mode 100644 index 0000000..0408976 --- /dev/null +++ b/src/htsserver.c @@ -0,0 +1,1814 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: Mini-server */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + + +/* specific definitions */ +/* specific definitions */ +#include "htsbase.h" +#include "htsnet.h" +#include "htslib.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <fcntl.h> +#if HTS_WIN +#else +#include <arpa/inet.h> +#endif +#ifndef _WIN32 +#include <signal.h> +#endif +/* END specific definitions */ + +/* définitions globales */ +#include "htsglobal.h" + +/* htslib */ +/*#include "htslib.h"*/ + +/* HTTrack Website Copier Library */ +#include "httrack-library.h" + +/* Language files */ +#include "htsinthash.h" +int NewLangStrSz=1024; +inthash NewLangStr=NULL; +int NewLangStrKeysSz=1024; +inthash NewLangStrKeys=NULL; +int NewLangListSz=1024; +inthash NewLangList=NULL; +/* Language files */ + + +#include "htsserver.h" + +char* gethomedir(void); +int commandRunning = 0; +int commandEndRequested = 0; +int commandEnd = 0; +int commandReturn = 0; +char* commandReturnMsg = NULL; +char* commandReturnCmdl = NULL; +int commandReturnSet = 0; + +/* Extern */ +extern void webhttrack_main(char* cmd); +extern void webhttrack_lock(int lock); + +static int is_image(char* file) { + return ( (strstr(file, ".gif") != NULL) ); +} +static int is_text(char* file) { + return ( (strstr(file, ".txt") != NULL) ); +} +static int is_html(char* file) { + return ( (strstr(file, ".htm") != NULL) ); +} + +static void sig_brpipe( int code ) { + /* ignore */ +} + + +// URL Link catcher + +// 0- Init the URL catcher with standard port + +// smallserver_init(&port,&return_host); +T_SOC smallserver_init_std(int* port_prox,char* adr_prox) { + T_SOC soc; + int try_to_listen_to[]={8080,8081,8082,8083,8084,8085,8086,8087,8088,8089, + 32000,32001,32002,32003,32004,32005,32006,32007,32008,32009, + 42000,42001,42002,42003,42004,42005,42006,42007,42008,42009, + 0,-1}; + int i=0; + do { + soc=smallserver_init(&try_to_listen_to[i],adr_prox); + *port_prox=try_to_listen_to[i]; + i++; + } while( (soc == INVALID_SOCKET) && (try_to_listen_to[i]>=0)); + return soc; +} + + +// 1- Init the URL catcher + +// smallserver_init(&port,&return_host); +T_SOC smallserver_init(int* port,char* adr) { + T_SOC soc = INVALID_SOCKET; + char h_loc[256+2]; + + commandRunning = + commandEnd = + commandReturn = + commandReturnSet = + commandEndRequested = 0; + if (commandReturnMsg) + free(commandReturnMsg); + commandReturnMsg = NULL; + if (commandReturnCmdl) + free(commandReturnCmdl); + commandReturnCmdl = NULL; + + if (gethostname(h_loc,256)==0) { // host name + SOCaddr server; + int server_size=sizeof(server); + /*t_hostent* hp_loc; + t_fullhostent buffer;*/ + + // effacer structure + memset(&server, 0, sizeof(server)); + + /*if ( (hp_loc=vxgethostbyname(h_loc, &buffer)) )*/ { // notre host + + // copie adresse + // NO (bind all) + // SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length); + + SOCaddr_initany(server, server_size); + if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { + SOCaddr_initport(server, *port); + if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) { + /*int len; + SOCaddr server2; + len=sizeof(server2);*/ + // effacer structure + /*memset(&server2, 0, sizeof(server2)); + if (getsockname(soc,(struct sockaddr*) &server2,&len) == 0) { + *port=ntohs(SOCaddr_sinport(server)); // récupérer port*/ + if (listen(soc,10)>=0) { // au pif le 10 + // SOCaddr_inetntoa(adr, 128, server2, len); + strcpy(adr, h_loc); + } else { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + soc=INVALID_SOCKET; + } + + + /*} else { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + soc=INVALID_SOCKET; + }*/ + + + } else { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + soc=INVALID_SOCKET; + } + } + } + } + return soc; +} + +// 2 - Wait for URL + +static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout) { + if (check_readinput_t(soc, timeout)) { + int n = 1; + size_t size = len; + size_t offs = 0; + while(n > 0 && size > 0) { + n = recv(soc, ((char*)buffer) + offs, (int) size, 0); + if (n > 0) { + offs += n; + size -= n; + } + } + return (int)offs; + } + return -1; +} + + +// smallserver +// returns 0 if error +// url: buffer where URL must be stored - or ip:port in case of failure +// data: 32Kb + +typedef struct { + char* name; + int value; +} initIntElt; +typedef struct { + char* name; + char* value; +} initStrElt; + +int smallserver_setkey(char* key, char* value) { + return inthash_write(NewLangList, key, (unsigned long int)strdup(value)); +} +int smallserver_setkeyint(char* key, LLint value) { + char tmp[256]; + sprintf(tmp, LLintP, value); + return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp)); +} +int smallserver_setkeyarr(char* key, int id, char* key2, char* value) { + char tmp[256]; + sprintf(tmp, "%s%d%s", key, id, key2); + return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value)); +} + +#define SET_ERROR(err) do { \ + inthash_write(NewLangList, "error", (unsigned long int)strdup(err)); \ + error_redirect = "/server/error.html"; \ +} while(0) + +int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { + int timeout=30; + int retour=0; + int willexit=0; + int buffer_size = 32768; + char* buffer = (char*)malloc(buffer_size); + String headers = STRING_EMPTY; + String output = STRING_EMPTY; + String tmpbuff = STRING_EMPTY; + String fspath = STRING_EMPTY; + + /* Load strings */ + htslang_init(); + if (!htslang_load(NULL, path)) { + fprintf(stderr, "unable to find lang.def and/or lang/ strings in %s\n", path); + return 0; + } + LANG_T(path, 0); + + /* Init various values */ + { + char pth[1024]; + char* initOn[] = { "parseall", "Cache", "ka", + "cookies", "parsejava", "testall", "updhack", "index", NULL }; + initIntElt initInt[] = { + { "filter", 4 }, + { "travel", 2 }, + { "travel2", 1 }, + { "travel3", 1 }, + /* */ + { "connexion", 4 }, + /* */ + { "maxrate", 25000 }, + /* */ + { "build", 1 }, + /* */ + { "checktype", 2}, + { "robots", 3 }, + + { NULL, 0 } + }; + initStrElt initStr[] = { + { "user", "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" }, + { "footer", "<!-- Mirrored from %s%s by HTTrack Website Copier/3.x [XR&CO'2002], %s -->" }, + { "url2", "+*.png +*.gif +*.jpg +*.css +*.js -ad.doubleclick.net/*" }, + { NULL, NULL } + }; + int i = 0; + for(i = 0 ; initInt[i].name ; i++) { + char tmp[32]; + sprintf(tmp, "%d", initInt[i].value); + inthash_write(NewLangList, initInt[i].name, (unsigned long int)strdup(tmp)); + } + for(i = 0 ; initOn[i] ; i++) { + inthash_write(NewLangList, initOn[i], (unsigned long int)strdup("1")); /* "on" */ + } + for(i = 0 ; initStr[i].name ; i++) { + inthash_write(NewLangList, initStr[i].name, (unsigned long int)strdup(initStr[i].value)); + } + strcpybuff(pth, gethomedir()); + strcatbuff(pth, "/websites"); + inthash_write(NewLangList, "path", (unsigned long int)strdup(pth)); + } + + /* Lock */ + webhttrack_lock(1); + + // connexion (accept) + while(!willexit && buffer != NULL && soc != INVALID_SOCKET) { + char line1[1024]; + char line[8192]; + char line2[1024]; + T_SOC soc_c; + struct sockaddr dummyaddr; + int dummylen = sizeof(struct sockaddr); + LLint length = 0; + char* error_redirect = NULL; + + line[0] = '\0'; + buffer[0] = '\0'; + StringClear(&headers); + StringClear(&output); + StringClear(&tmpbuff); + StringClear(&fspath); + StringStrcat(&headers, ""); + StringStrcat(&output, ""); + StringStrcat(&tmpbuff, ""); + StringStrcat(&fspath, ""); + memset(&dummyaddr, 0, sizeof(dummyaddr)); + + /* UnLock */ + webhttrack_lock(0); + + /* sigpipe */ +#ifndef _WIN32 + signal( SIGPIPE , sig_brpipe ); +#endif + + /* Accept */ + while ( (soc_c = accept(soc, &dummyaddr, &dummylen)) == INVALID_SOCKET); + + /* Lock */ + webhttrack_lock(1); + + if(linputsoc_t(soc_c, line1, sizeof(line1) - 2, timeout) > 0) { + int meth = 0; + if (strfield(line1, "get ")) { + meth = 1; + } else if (strfield(line1, "post ")) { + meth = 2; + } else if (strfield(line1, "head ")) { /* yes, we can do that */ + meth = 10; + } else { +#ifdef _DEBUG + // assert(FALSE); +#endif + } + if (meth) { + /* Flush headers */ + length = buffer_size - 2; + while(linputsoc_t(soc_c, line, sizeof(line) - 2, timeout) > 0) { + int p; + if ((p=strfield(line,"Content-length:"))!=0) { + sscanf(line+p, LLintP, &(length)); + } + else if ((p=strfield(line,"Accept-language:"))!=0) { + char tmp[32]; + char* s = line + p; + /*int l;*/ + while(*s == ' ') s++; + tmp[0] = '\0'; + strncatbuff(tmp, s, 2); + /*l = LANG_SEARCH(path, tmp);*/ + } + } + if (meth == 2) { + int sz = 0; + if (length > buffer_size - 2) { + length = buffer_size - 2; + } + if (length > 0 && (sz=recv_bl(soc_c, buffer, (int)length, timeout)) < 0) { + meth = 0; + } else { + buffer[sz] = '\0'; + } + } + } + + /* Generated variables */ + if (commandEnd && !commandReturnSet) { + commandReturnSet = 1; + if (commandReturn) { + char tmp[32]; + sprintf(tmp, "%d", commandReturn); + inthash_write(NewLangList, "commandReturn", (unsigned long int)strdup(tmp)); + inthash_write(NewLangList, "commandReturnMsg", (unsigned long int)commandReturnMsg); + inthash_write(NewLangList, "commandReturnCmdl", (unsigned long int)commandReturnCmdl); + } else { + inthash_write(NewLangList, "commandReturn", (unsigned long int)NULL); + inthash_write(NewLangList, "commandReturnMsg", (unsigned long int)NULL); + inthash_write(NewLangList, "commandReturnCmdl", (unsigned long int)NULL); + } + } + + /* SID check */ + { + unsigned long int adr = 0; + if (inthash_readptr(NewLangList, "_sid", (long int *)&adr)) { + if (inthash_write(NewLangList, "sid", (unsigned long int)strdup((char*)adr))) { + } + } + } + + /* check variables */ + if (meth && buffer[0]) { + char* s = buffer; + char *e, *f; + strcatbuff(buffer, "&"); + while( s && (e = strchr(s, '=')) && (f = strchr(s, '&')) ) { + char* ua; + int len; + String sua = STRING_EMPTY; + *e = *f = '\0'; + ua = e + 1; + if (strfield2(ua, "on")) /* hack : "on" == 1 */ + ua = "1"; + len = strlen(ua); + unescapehttp(ua, &sua); + inthash_write(NewLangList, s, (unsigned long int)StringAcquire(&sua)); + s = f + 1; + } + } + + + /* Error check */ + { + unsigned long int adr = 0; + unsigned long int adr2 = 0; + if (inthash_readptr(NewLangList, "sid", (long int *)&adr)) { + if (inthash_readptr(NewLangList, "_sid", (long int *)&adr2)) { + if (strcmp((char*)adr, (char*)adr2) != 0) { + meth = 0; + } + } + } + } + + /* Check variables (internal) */ + if (meth) { + int doLoad=0; + unsigned long int adr = 0; + if (inthash_readptr(NewLangList, "lang", (long int *)&adr)) { + int n = 0; + if (sscanf((char*)adr, "%d", &n) == 1 && n - 1 != LANG_T(path, -1)) { + LANG_T(path, n - 1); + } + } + + /* Load existing project settings */ + if (inthash_readptr(NewLangList, "loadprojname", (long int *)&adr)) { + char* pname = (char*) adr; + if (*pname) { + inthash_write(NewLangList, "projname", (unsigned long int)strdup(pname)); + } + inthash_write(NewLangList, "loadprojname", (unsigned long int)NULL); + doLoad=1; + } + + /* path : <path>/<project> */ + if (!commandRunning) { + unsigned long int adrw = 0, adrpath = 0, adrprojname = 0; + if (inthash_readptr(NewLangList, "path", (long int *)&adrpath) + && inthash_readptr(NewLangList, "projname", (long int *)&adrprojname)) { + StringClear(&fspath); + StringStrcat(&fspath, (char*)adrpath); + StringStrcat(&fspath, "/"); + StringStrcat(&fspath, (char*)adrprojname); + } + } + + /* Load existing project settings */ + if (doLoad) { + FILE* fp; + StringStrcat(&fspath, "/hts-cache/winprofile.ini"); + fp = fopen(StringBuff(&fspath), "rb"); + if (fp) { + /* Read file */ + while(!feof(fp)) { + char* str = line; + char* pos; + if (!linput(fp, line, sizeof(line) - 2)) { + *str = '\0'; + } + pos=strchr(line, '='); + if (pos) { + String escline = STRING_EMPTY; + *pos++='\0'; + if (pos[0] == '0' && pos[1] == '\0') + *pos = '\0'; /* 0 => empty */ + unescapeini(pos, &escline); + inthash_write(NewLangList, line, (unsigned long int)StringAcquire(&escline)); + } + } + + fclose(fp); + } + } + + } + + /* Execute command */ + { + unsigned long int adr = 0; + int p = 0; + if (inthash_readptr(NewLangList, "command", (long int *)&adr)) { + if (strcmp((char*)adr, "cancel") == 0) { + if (commandRunning) { + if (!commandEndRequested) { + commandEndRequested=1; + hts_request_stop(0); + } else { + hts_request_stop(1); /* note: the force flag does not have anyeffect yet */ + commandEndRequested=2; /* will break the loop() callback */ + } + } + } else if ((p=strfield((char*)adr, "cancel-file="))) { + if (commandRunning) { + hts_cancel_file((char*)adr + p); + } + } else if (strcmp((char*)adr, "cancel-parsing") == 0) { + if (commandRunning) { + hts_cancel_parsing(); + } + } else if ((p=strfield((char*)adr, "pause="))) { + if (commandRunning) { + hts_setpause(1); + } + } else if ((p=strfield((char*)adr, "unpause"))) { + if (commandRunning) { + hts_setpause(0); + } + } else if (strcmp((char*)adr, "abort") == 0) { + if (commandRunning) { + hts_request_stop(1); + commandEndRequested=2; /* will break the loop() callback */ + } + } else if ((p=strfield((char*)adr, "add-url="))) { + if (commandRunning) { + char* ptraddr[2]; + ptraddr[0] = (char*)adr + p; + ptraddr[1] = NULL; + hts_addurl(ptraddr); + } + } else if ((p=strfield((char*)adr, "httrack"))) { + if (!commandRunning) { + unsigned long int adrcd = 0; + if (inthash_readptr(NewLangList, "command_do", (long int *)&adrcd)) { + unsigned long int adrw = 0, adrpath = 0, adrprojname = 0; + if (inthash_readptr(NewLangList, "winprofile", (long int *)&adrw)) { + StringClear(&tmpbuff); + StringStrcat(&tmpbuff, StringBuff(&fspath)); + StringStrcat(&tmpbuff, "/hts-cache/"); + + /* Create minimal directory structure */ + if (!structcheck(StringBuff(&tmpbuff))) { + FILE* fp; + StringStrcat(&tmpbuff, "winprofile.ini"); + fp = fopen(StringBuff(&tmpbuff), "wb"); + if (fp != NULL) { + int count = (int) strlen((char*)adrw); + if ((int)fwrite((void*)adrw, 1, count, fp) == count) { + + /* Wipe the doit.log file, useless here (all options are replicated) and + even a bit annoying (duplicate/ghost options) + The behaviour is exactly the same as in WinHTTrack + */ + StringClear(&tmpbuff); + StringStrcat(&tmpbuff, StringBuff(&fspath)); + StringStrcat(&tmpbuff, "/hts-cache/doit.log"); + remove(StringBuff(&tmpbuff)); + + /* + RUN THE SERVER + */ + if (strcmp((char*)adrcd, "start") == 0) { + webhttrack_main((char*)adr + p); + } else { + commandRunning = 0; + commandEnd = 1; + } + } else { + char tmp[1024]; + sprintf(tmp, "Unable to write %d bytes in the the init file %s", count, StringBuff(&fspath)); + SET_ERROR(tmp); + } + fclose(fp); + } else { + char tmp[1024]; + sprintf(tmp, "Unable to create the init file %s", StringBuff(&fspath)); + SET_ERROR(tmp); + } + } else { + char tmp[1024]; + sprintf(tmp, "Unable to create the directory structure in %s", StringBuff(&fspath)); + SET_ERROR(tmp); + } + + } else { + SET_ERROR("Internal server error: unable to fetch project name or path"); + } + } + } + } else if (strcmp((char*)adr, "quit") == 0) { + willexit=1; + } + inthash_write(NewLangList, "command", (unsigned long int)NULL); + } + } + + /* Response */ + if (meth) { + int virtualpath = 0; + char* pos; + char* url = strchr(line1, ' '); + if (url && *++url == '/' && (pos = strchr(url, ' ')) && !(*pos = '\0') ) { + char fsfile[1024]; + char* file; + FILE* fp; + char* qpos; + + /* get the URL */ + if (error_redirect == NULL) { + if ( (qpos = strchr(url, '?')) ) { + *qpos = '\0'; + } + fsfile[0] = '\0'; + if (strcmp(url, "/") == 0) { + file = "/server/index.html"; + meth = 2; + } else { + file = url; + } + } else { + file = error_redirect; + meth = 2; + } + + if (strncmp(file, "/website/", 9) == 0) { + virtualpath = 1; + } + + if (commandRunning) { + if (!is_image(file)) { + file = "/server/refresh.html"; + } + } else if (commandEnd && !virtualpath && !willexit) { + if (!is_image(file)) { + file = "/server/finished.html"; + } + } + + if (strlen(path) + strlen(file) + 32 < sizeof(fsfile)) { + if (strncmp(file, "/website/", 9) != 0) { + sprintf(fsfile, "%shtml%s", path, file); + } else { + unsigned long int adr = 0; + if (inthash_readptr(NewLangList, "projpath", (long int *)&adr)) { + sprintf(fsfile, "%s%s", (char*)adr, file + 9); + } + } + } + + if (fsfile[0] && strstr(file, "..") == NULL && (fp = fopen(fsfile, "rb"))) { + char ok[] = "HTTP/1.0 200 OK\r\n" + "Connection: close\r\n" + "Server: httrack-small-server\r\n" + "Content-type: text/html\r\n" + "Cache-Control: no-cache, must-revalidate, private\r\n" + "Pragma: no-cache\r\n" + ; + char ok_img[] = "HTTP/1.0 200 OK\r\n" + "Connection: close\r\n" + "Server: httrack small server\r\n" + "Content-type: image/gif\r\n" + ; + char ok_text[] = "HTTP/1.0 200 OK\r\n" + "Connection: close\r\n" + "Server: httrack small server\r\n" + "Content-type: text/plain\r\n" + ; + + /* register current page */ + inthash_write(NewLangList, "thisfile", (unsigned long int)strdup(file)); + + /* Force GET for the last request */ + if (meth == 2 && willexit) { + meth = 1; + } + + /* posted data are redirected to get protocol */ + if (meth == 2) { + char redir[] = "HTTP/1.0 302 Redirect\r\n" + "Connection: close\r\n" + "Server: httrack-small-server\r\n"; + unsigned long int adr = 0; + char* newfile = file; + if (inthash_readptr(NewLangList, "redirect", (long int *)&adr) && adr != 0) { + char* newadr = (char*)adr; + if (*newadr) { + newfile = newadr; + } + } + StringMemcat(&headers, redir, strlen(redir)); + { + char tmp[256]; + if (strlen(file) < sizeof(tmp) - 32) { + sprintf(tmp, "Location: %s\r\n", newfile); + StringMemcat(&headers, tmp, strlen(tmp)); + } + } + inthash_write(NewLangList, "redirect", (unsigned long int)NULL); + } + else if (is_html(file)) { + int outputmode = 0; + StringMemcat(&headers, ok, sizeof(ok) - 1); + while(!feof(fp)) { + char* str = line; + int prevlen = StringLength(&output); + int nocr = 0; + if (!linput(fp, line, sizeof(line) - 2)) { + *str = '\0'; + } + if (*str && str[strlen(str) - 1] == '\\') { + nocr = 1; + str[strlen(str) - 1] = '\0'; + } + while(*str) { + char* pos; + int n; + if (*str == '$' && *++str == '{' && (pos = strchr(++str, '}')) && (n = (pos - str) ) && n < 1024 ) { + char name_[1024 + 2]; + char* name = name_; + char* langstr = NULL; + int p; + int format = 0; + int listDefault = 0; + name[0] = '\0'; + strncatbuff(name, str, n); + if (strncmp(name, "/*", 2) == 0) { + /* comments */ + } + else if (( p = strfield(name, "html:"))) { + name += p; + format = 1; + } + else if (( p = strfield(name, "list:"))) { + name += p; + format = 2; + } + else if (( p = strfield(name, "liststr:"))) { + name += p; + format = -2; + } + else if (( p = strfield(name, "file-exists:"))) { + char* pos2; + name += p; + format = 0; + pos2 = strchr(name, ':'); + langstr = ""; + if (pos2 != NULL) { + *pos2 = '\0'; + if (strstr(name, "..") == NULL) { + if (fexist(fconcat(path, name))) { + langstr = pos2 + 1; + } + } + } + } + else if (( p = strfield(name, "do:"))) { + char* pos2; + name += p; + format = 1; + pos2 = strchr(name, ':'); + langstr = ""; + if (pos2 != NULL) { + *pos2 = '\0'; + pos2++; + } else { + pos2=""; + } + if (strcmp(name, "output-mode") == 0) { + if (strcmp(pos2, "html") == 0) { + outputmode = 1; + } else if (strcmp(pos2, "inifile") == 0) { + outputmode = 2; + } else if (strcmp(pos2, "html-urlescaped") == 0) { + outputmode = 3; + } else { + outputmode = 0; + } + } else if (strcmp(name, "if-file-exists") == 0) { + if (strstr(pos2, "..") == NULL) { + if (!fexist(fconcat(path, pos2))) { + outputmode = -1; + } + } + } else if (strcmp(name, "if-project-file-exists") == 0) { + if (strstr(pos2, "..") == NULL) { + if (!fexist(fconcat(StringBuff(&fspath), pos2))) { + outputmode = -1; + } + } + } else if (strcmp(name, "if-file-do-not-exists") == 0) { + if (strstr(pos2, "..") == NULL) { + if (fexist(fconcat(path, pos2))) { + outputmode = -1; + } + } + } else if (strcmp(name, "if-not-empty") == 0) { + unsigned long int adr = 0; + if (!inthash_readptr(NewLangList, pos2, (long int *)&adr) || *((char*)adr) == 0 ) { + outputmode = -1; + } + } else if (strcmp(name, "if-empty") == 0) { + unsigned long int adr = 0; + if (inthash_readptr(NewLangList, pos2, (long int *)&adr) && *((char*)adr) != 0 ) { + outputmode = -1; + } + } else if (strcmp(name, "end-if") == 0) { + outputmode = 0; + } else if (strcmp(name, "loadhash") == 0) { + unsigned long int adr = 0; + if (inthash_readptr(NewLangList, "path", (long int *)&adr)) { + char* rpath = (char*) adr; + find_handle h; + if (rpath[0]) { + if (rpath[strlen(rpath)-1]=='/') { + rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */ + } + } + h = hts_findfirst(rpath); + if (h) { + struct topindex_chain * chain=NULL; + struct topindex_chain * startchain=NULL; + StringClear(&tmpbuff); + do { + if (hts_findisdir(h)) { + char iname[HTS_URLMAXSIZE*2]; + strcpybuff(iname,rpath); + strcatbuff(iname,"/"); + strcatbuff(iname,hts_findgetname(h)); + strcatbuff(iname,"/hts-cache/winprofile.ini"); + if (fexist(iname)) { + if (StringLength(&tmpbuff) > 0) { + StringStrcat(&tmpbuff, "\r\n"); + } + StringStrcat(&tmpbuff, hts_findgetname(h)); + } + + } + } while(hts_findnext(h)); + hts_findclose(h); + inthash_write(NewLangList, "winprofile", (unsigned long int)StringAcquire(&tmpbuff)); + } + } + } else if (strcmp(name, "copy") == 0) { + if (*pos2) { + char* pos3 = strchr(pos2, ':'); + if ( pos3 && *(pos3 + 1) ) { + unsigned long int adr = 0; + *pos3++ = '\0'; + if (inthash_readptr(NewLangList, pos2, (long int *)&adr)) { + inthash_write(NewLangList, pos3, (unsigned long int)strdup((char*)adr)); + inthash_write(NewLangList, pos2, (unsigned long int)NULL); + } + } + } + } else if (strcmp(name, "set") == 0) { + if (*pos2) { + char* pos3 = strchr(pos2, ':'); + if ( pos3 ) { + *pos3++ = '\0'; + inthash_write(NewLangList, pos2, (unsigned long int)strdup(pos3)); + } else { + inthash_write(NewLangList, pos2, (unsigned long int)NULL); + } + } + } + } + /* + test:<if exist> + test:<if ==0>:<if ==1>:<if == 2>.. + ztest:<if == 0 || !exist>:<if == 1>:<if == 2>.. + */ + else if ( ( p = strfield(name, "test:")) || ( p = strfield(name, "ztest:")) ) { + unsigned long int adr = 0; + char* pos2; + int ztest = (name[0] == 'z'); + langstr = ""; + name += p; + pos2 = strchr(name, ':'); + if (pos2 != NULL) { + *pos2 = '\0'; + if (inthash_readptr(NewLangList, name, (long int *)&adr) || ztest) { + char* newadr = (char*)adr; + if (!newadr) + newadr = ""; + if (*newadr || ztest) { + int npos = 0; + name = pos2 + 1; + format = 4; + if (strchr(name, ':') == NULL) { + npos = 0; /* first is good if only one : */ + format = 0; + } else { + if (sscanf(newadr, "%d", &npos) != 1) { + if (strfield(newadr, "on")) { + npos = 1; + } else { + npos = 0; /* first one will be ok */ + format = 0; + } + } + } + while( *name && *name != '}' && npos >= 0) { + int end=0; + char* fpos = strchr(name, ':'); + int n2; + if (fpos == NULL) { + fpos = name + strlen(name); + end=1; + } + n2 = (int) (fpos - name); + if (npos == 0) { + langstr = name; + *fpos='\0'; + } else if (end) { + npos=0; + } + name += n2 + 1; + npos--; + } + } + } + } + } + else if (( p = strfield(name, "listid:"))) { + char* pos2; + name += p; + format = 2; + pos2 = strchr(name, ':'); + if (pos2) { + char dname[32]; + int n2 = (int) (pos2 - name); + if (n2 > 0 && n2 < sizeof(dname) - 2) { + unsigned long int adr = 0; + dname[0] = '\0'; + strncatbuff(dname, name, n2); + if (inthash_readptr(NewLangList, dname, (long int *)&adr)) { + int n = 0; + if (sscanf((char*)adr, "%d", &n) == 1) { + listDefault = n; + } + } + name += n2 + 1; + } + } + } + else if (( p = strfield(name, "checked:"))) { + name += p; + format = 3; + } + if (langstr == NULL) { + if (strfield2(name, "#iso")) { + langstr = line2; + langstr[0] = '\0'; + LANG_LIST(path, langstr); + assertf(strlen(langstr) < sizeof(line2) - 2); + } else { + langstr = LANGSEL(name); + if (langstr == NULL || *langstr == '\0') { + unsigned long int adr = 0; + if (inthash_readptr(NewLangList, name, (long int *)&adr)) { + char* newadr = (char*)adr; + langstr = newadr; + } + } + } + } + if (langstr && outputmode != -1) { + switch(format) { + case 0: + { + char* a = langstr; + while(*a) { + if (a[0] == '\\' && isxdigit(a[1]) && isxdigit(a[2])) { + int n; + char c; + if (sscanf(a+1, "%x", &n) == 1) { + c = (char)n; + StringMemcat(&output, &c, 1); + } + a += 2; + } else if (outputmode && a[0] == '<') { + StringStrcat(&output, "<"); + } else if (outputmode && a[0] == '>') { + StringStrcat(&output, ">"); + } else if (outputmode && a[0] == '&') { + StringStrcat(&output, "&"); + } else if (outputmode == 3 && a[0] == ' ') { + StringStrcat(&output, "%20"); + } else if (outputmode >= 2 && ((unsigned char)a[0]) < 32) { + char tmp[32]; + sprintf(tmp, "%%%02x", (unsigned char)a[0]); + StringStrcat(&output, tmp); + } else if (outputmode == 2 && a[0] == '%') { + StringStrcat(&output, "%%"); + } else if (outputmode == 3 && a[0] == '%') { + StringStrcat(&output, "%25"); + } else { + StringMemcat(&output, a, 1); + } + a++; + } + } + break; + case 3: + if (*langstr) { + StringStrcat(&output, "checked"); + } + break; + default: + if (*langstr) { + int id=1; + char* fstr = langstr; + StringClear(&tmpbuff); + if (format == 2) { + StringStrcat(&output, "<option value=1>"); + } else if (format == -2) { + StringStrcat(&output, "<option value=\""); + } + while(*fstr) { + switch(*fstr) { + case 13: break; + case 10: + if (format == 1) { + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "<br>\r\n"); + } else if (format == -2) { + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "\">"); + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "</option>\r\n"); + StringStrcat(&output, "<option value=\""); + } else { + char tmp[32]; + sprintf(tmp, "%d", ++id); + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "</option>\r\n"); + StringStrcat(&output, "<option value="); + StringStrcat(&output, tmp); + if (listDefault == id) { + StringStrcat(&output, " selected"); + } + StringStrcat(&output, ">"); + } + StringClear(&tmpbuff); + break; + case '<': + StringStrcat(&tmpbuff, "<"); + break; + case '>': + StringStrcat(&tmpbuff, ">"); + break; + case '&': + StringStrcat(&tmpbuff, "&"); + break; + default: + StringMemcat(&tmpbuff, fstr, 1); + break; + } + fstr++; + } + if (format == 2) { + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "</option>"); + } else if (format == -2) { + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "\">"); + StringStrcat(&output, StringBuff(&tmpbuff)); + StringStrcat(&output, "</option>"); + } else { + StringStrcat(&output, StringBuff(&tmpbuff)); + } + StringClear(&tmpbuff); + } + } + } + str = pos; + } else { + if (outputmode != -1) { + StringMemcat(&output, str, 1); + } + } + str++; + } + if (!nocr && prevlen != StringLength(&output)) { + StringStrcat(&output, "\r\n"); + } + } +#ifdef _DEBUG + { + int len = (int)strlen((char*)StringBuff(&output)); + assert(len == (int)StringLength(&output)); + } +#endif + } else if (is_text(file)) { + StringMemcat(&headers, ok_text, sizeof(ok_text) - 1); + while(!feof(fp)) { + int n = fread(line, 1, sizeof(line) - 2, fp); + if (n > 0) { + StringMemcat(&output, line, n); + } + } + } else { + StringMemcat(&headers, ok_img, sizeof(ok_img) - 1); + while(!feof(fp)) { + int n = fread(line, 1, sizeof(line) - 2, fp); + if (n > 0) { + StringMemcat(&output, line, n); + } + } + } + fclose(fp); + } else { + char error_hdr[] = "HTTP/1.0 404 Not Found\r\n" + "Server: httrack small server\r\n" + "Content-type: text/html\r\n"; + char error[] = + "Page not found.\r\n"; + StringStrcat(&headers, error_hdr); + StringStrcat(&output, error); + //assert(file == NULL); + } + } + } else { +#ifdef _DEBUG + char error_hdr[] = "HTTP/1.0 500 Server Error\r\n" + "Server: httrack small server\r\n" + "Content-type: text/html\r\n"; + char error[] = + "Server error.\r\n"; + StringStrcat(&headers, error_hdr); + StringStrcat(&output, error); +#endif + } + { + char tmp[256]; + sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(&output)); + StringStrcat(&headers, tmp); + } + StringStrcat(&headers, "\r\n"); + if ( + (send(soc_c, StringBuff(&headers), StringLength(&headers), 0) != StringLength(&headers)) + || + ( (meth == 1) && (send(soc_c, StringBuff(&output), StringLength(&output), 0) != StringLength(&output)) ) + ) { +#ifdef _DEBUG + //assert(FALSE); +#endif + } + } else { +#ifdef _DEBUG + // assert(FALSE); +#endif + } + + /* Shutdown (FIN) and wait until confirmed */ + { + char c; +#ifdef _WIN32 + shutdown(soc_c, SD_SEND); +#else + shutdown(soc_c, 1); +#endif + /* This is necessary as IE sometimes (!) sends an additional CRLF after POST data */ + while(recv(soc_c, ((char*)&c), 1, 0) > 0); + } + +#if HTS_WIN + closesocket(soc_c); +#else + close(soc_c); +#endif + } + + if (soc != INVALID_SOCKET) { +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + } + + StringFree(&headers); + StringFree(&output); + StringFree(&tmpbuff); + StringFree(&fspath); + + if (buffer) + free(buffer); + + if (commandReturnMsg) + free(commandReturnMsg); + commandReturnMsg = NULL; + if (commandReturnCmdl) + free(commandReturnCmdl); + commandReturnCmdl = NULL; + + /* Unlock */ + webhttrack_lock(0); + + return retour; +} + + + +/* Language files */ + + +int htslang_init() { + if (NewLangList==NULL) { + int i = 0; + NewLangList=inthash_new(NewLangListSz); + if (NewLangList==NULL) { + abortLog("Error in lang.h: not enough memory"); + } else { + inthash_value_is_malloc(NewLangList,1); + } + } + return 1; +} + +int htslang_uninit() { + if (NewLangList!=NULL) { + inthash_delete(&NewLangList); + } + return 1; +} + +int htslang_load(char* limit_to, char* path) { + char* hashname; + // + int selected_lang=LANG_T(path, -1); + // + if (!limit_to) { + LANG_DELETE(); + NewLangStr=inthash_new(NewLangStrSz); + NewLangStrKeys=inthash_new(NewLangStrKeysSz); + if ((NewLangStr==NULL) || (NewLangStrKeys==NULL)) { + abortLog("Error in lang.h: not enough memory"); + } else { + inthash_value_is_malloc(NewLangStr,1); + inthash_value_is_malloc(NewLangStrKeys,1); + } + } + + /* Load master file (list of keys and internal keys) */ + if (!limit_to) { + char* mname = "lang.def"; + FILE* fp=fopen(fconcat(path, mname),"rb"); + if (fp) { + char intkey[8192]; + char key[8192]; + while(!feof(fp)) { + linput_cpp(fp,intkey,8000); + linput_cpp(fp,key,8000); + if (strnotempty(intkey) && strnotempty(key)) { + char* test=LANGINTKEY(key); + + /* Increment for multiple definitions */ + if (strnotempty(test)) { + int increment=0; + int pos=strlen(key); + do { + increment++; + sprintf(key+pos,"%d",increment); + test=LANGINTKEY(key); + } while (strnotempty(test)); + } + + if (!strnotempty(test)) { // éviter doublons + // conv_printf(key,key); + int len; + char* buff; + len=strlen(intkey); + buff=(char*)malloc(len+2); + if (buff) { + strcpybuff(buff,intkey); + inthash_add(NewLangStrKeys,key,(long int)(char*)buff); + } + } + } // if + } // while + fclose(fp); + } else { + return 0; + } + } + + /* Language Name? */ + { + char name[256]; + sprintf(name,"LANGUAGE_%d",selected_lang+1); + hashname=LANGINTKEY(name); + } + + /* Get only language name */ + if (limit_to) { + if (hashname) + strcpybuff(limit_to, hashname); + else + strcpybuff(limit_to, "???"); + return 0; + } + + /* Error */ + if (!hashname) + return 0; + + /* Load specific language file */ + { + int loops; + // 2nd loop: load undefined strings + for(loops=0;loops<2;loops++) { + FILE* fp; + char lbasename[1024]; + { + char name[256]; + sprintf(name,"LANGUAGE_%d",(loops==0)?(selected_lang+1):1); + hashname=LANGINTKEY(name); + } + sprintf(lbasename, "lang/%s.txt",hashname); + fp=fopen(fconcat(path, lbasename), "rb"); + if (fp) { + char extkey[8192]; + char value[8192]; + while(!feof(fp)) { + linput_cpp(fp,extkey,8000); + linput_cpp(fp,value,8000); + if (strnotempty(extkey) && strnotempty(value)) { + int len; + char* buff; + char* intkey; + + intkey=LANGINTKEY(extkey); + + if (strnotempty(intkey)) { + + /* Increment for multiple definitions */ + { + char* test=LANGSEL(intkey); + if (strnotempty(test)) { + if (loops == 0) { + int increment=0; + int pos=strlen(extkey); + do { + increment++; + sprintf(extkey+pos,"%d",increment); + intkey=LANGINTKEY(extkey); + if (strnotempty(intkey)) + test=LANGSEL(intkey); + else + test=""; + } while (strnotempty(test)); + } else + intkey=""; + } else { + if (loops > 0) { + //err_msg += intkey; + //err_msg += " "; + } + } + } + + /* Add key */ + if (strnotempty(intkey)) { + len=strlen(value); + buff=(char*)malloc(len+2); + if (buff) { + conv_printf(value,buff); + inthash_add(NewLangStr,intkey,(long int)(char*)buff); + } + } + + } + } // if + } // while + fclose(fp); + } else { + return 0; + } + } + } + + // Control limit_to + if (limit_to) + limit_to[0]='\0'; + + return 1; +} + +/* NOTE : also contains the "webhttrack" hack */ +void conv_printf(char* from,char* to) { + int i=0,j=0,len; + len=strlen(from); + while(i<len) { + switch(from[i]) { + case '\\': + i++; + switch(from[i]) { + case 'a': to[j]='\a'; break; + case 'b': to[j]='\b'; break; + case 'f': to[j]='\f'; break; + case 'n': to[j]='\n'; break; + case 'r': to[j]='\r'; break; + case 't': to[j]='\t'; break; + case 'v': to[j]='\v'; break; + case '\'': to[j]='\''; break; + case '\"': to[j]='\"'; break; + case '\\': to[j]='\\'; break; + case '?': to[j]='\?'; break; + default: to[j]=from[i]; break; + } + break; + default: + to[j]=from[i]; + break; + } + i++; + j++; + } + to[j++]='\0'; + /* Dirty hack */ + { + char * a = to; + while((a = strstr(a, "WinHTTrack"))) { + a[0] = 'W'; + a[1] = 'e'; + a[2] = 'b'; + a++; + } + } +} + +void LANG_DELETE() { + inthash_delete(&NewLangStr); + inthash_delete(&NewLangStrKeys); +} + +// sélection de la langue +void LANG_INIT(char* path) { + //CWinApp* pApp = AfxGetApp(); + //if (pApp) { + int test = 0; /* pApp->GetProfileInt("Language","IntId",0); */ + LANG_T(path, 0 /*pApp->GetProfileInt("Language","IntId",0)*/ ); + //} +} + +int LANG_T(char* path, int l) { + if (l>=0) { + QLANG_T(l); + htslang_load(NULL, path); + } + return QLANG_T(-1); // 0=default (english) +} + +int LANG_SEARCH(char* path, char* iso) { + char lang_str[1024]; + int i = 0; + int curr_lng=LANG_T(path, -1); + int found = 0; + unsigned long int adr = 0; + do { + QLANG_T(i); + strcpybuff(lang_str,"LANGUAGE_ISO"); + htslang_load(lang_str, path); + if (strfield(iso, lang_str)) { + found = i; + } + i++; + } while(strlen(lang_str) > 0); + QLANG_T(curr_lng); + return found; +} + +int LANG_LIST(char* path, char* buffer) { + char lang_str[1024]; + int i = 0; + int curr_lng=LANG_T(path, -1); + int found = 0; + buffer[0] = '\0'; + do { + QLANG_T(i); + strcpybuff(lang_str, "LANGUAGE_NAME"); + htslang_load(lang_str, path); + if (strlen(lang_str) > 0) { + if (buffer[0]) + strcatbuff(buffer, "\n"); + strcatbuff(buffer, lang_str); + } + i++; + } while(strlen(lang_str) > 0); + QLANG_T(curr_lng); + return i; +} + +int QLANG_T(int l) { + static int lng=0; + if (l>=0) { + lng=l; + } + return lng; // 0=default (english) +} + +char* LANGSEL(char* name) { + unsigned long int adr = 0; + if (NewLangStr) + if (!inthash_read(NewLangStr,name,(long int *)&adr)) + adr=0; + if (adr) { + return (char*)adr; + } + return ""; +} + +char* LANGINTKEY(char* name) { + unsigned long int adr=0; + if (NewLangStrKeys) + if (!inthash_read(NewLangStrKeys,name,(long int *)&adr)) + adr=0; + if (adr) { + return (char*)adr; + } + return ""; +} + +char* gethomedir(void) { + char* home = getenv( "HOME" ); + if (home) + return home; + else + return "."; +} + +int linput_cpp(FILE* fp,char* s,int max) { + int rlen=0; + s[0]='\0'; + do { + int ret; + if (rlen>0) + if (s[rlen-1]=='\\') + s[--rlen]='\0'; // couper \ final + // lire ligne + ret=linput_trim(fp,s+rlen,max-rlen); + if (ret>0) + rlen+=ret; + } while((s[max(rlen-1,0)]=='\\') && (rlen<max)); + return rlen; +} + +// copy of concat +typedef struct { + char buff[16][HTS_URLMAXSIZE*2*2]; + int rol; +} concat_strc; +char* concat(const char* a,const char* b) { + static concat_strc* strc = NULL; + if (strc == NULL) { + strc = (concat_strc*) calloc(16, sizeof(concat_strc)); + } + strc->rol=((strc->rol+1)%16); // roving pointer + strcpybuff(strc->buff[strc->rol],a); + if (b) strcatbuff(strc->buff[strc->rol],b); + return strc->buff[strc->rol]; +} +#ifdef _WIN32 +char* __fconv(char* a) { + int i; + for(i=0;i<(int) strlen(a);i++) + if (a[i]=='/') // convertir + a[i]='\\'; + return a; +} +char* fconcat(char* a,char* b) { + return __fconv(concat(a,b)); +} +char* fconv(char* a) { + return __fconv(concat(a,"")); +} +#endif + +/* *** Various functions *** */ + + +int fexist(char* s) { + struct stat st; + memset(&st, 0, sizeof(st)); + if (stat(s, &st) == 0) { + if (S_ISREG(st.st_mode)) { + return 1; + } + } + return 0; +} + +int linput(FILE* fp,char* s,int max) { + int c; + int j=0; + do { + c=fgetc(fp); + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 0: case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} + +int linput_trim(FILE* fp,char* s,int max) { + int rlen=0; + char* ls=(char*) malloct(max+2); + s[0]='\0'; + if (ls) { + char* a; + // lire ligne + rlen=linput(fp,ls,max); + if (rlen) { + // sauter espaces et tabs en fin + while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) ) + ls[--rlen]='\0'; + // sauter espaces en début + a=ls; + while((rlen>0) && ((*a==' ') || (*a=='\t'))) { + a++; + rlen--; + } + if (rlen>0) { + memcpy(s,a,rlen); // can copy \0 chars + s[rlen]='\0'; + } + } + // + freet(ls); + } + return rlen; +} + +int linputsoc(T_SOC soc, char* s, int max) { + int c; + int j=0; + do { + unsigned char ch; + if (recv(soc, &ch, 1, 0) == 1) { + c = ch; + } else { + c = EOF; + } + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} + +int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { + if (check_readinput_t(soc, timeout)) { + return linputsoc(soc, s, max); + } + return -1; +} + +// check if data is available +int check_readinput(htsblk* r) { + if (r->soc != INVALID_SOCKET) { + fd_set fds; // poll structures + struct timeval tv; // structure for select + FD_ZERO(&fds); + FD_SET(r->soc,&fds); + tv.tv_sec=0; + tv.tv_usec=0; + select(r->soc + 1,&fds,NULL,NULL,&tv); + if (FD_ISSET(r->soc,&fds)) + return 1; + else + return 0; + } else + return 0; +} + +// check if data is available +int check_readinput_t(T_SOC soc, int timeout) { + if (soc != INVALID_SOCKET) { + fd_set fds; // poll structures + struct timeval tv; // structure for select + FD_ZERO(&fds); + FD_SET(soc,&fds); + tv.tv_sec=timeout; + tv.tv_usec=0; + select(soc + 1,&fds,NULL,NULL,&tv); + if (FD_ISSET(soc,&fds)) + return 1; + else + return 0; + } else + return 0; +} + +int strfield(const char* f,const char* s) { + int r=0; + while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; } + if (*s==0) + return r; + else + return 0; +} + +int ehexh(char c) { + if ((c>='0') && (c<='9')) return c-'0'; + if ((c>='a') && (c<='f')) c-=('a'-'A'); + if ((c>='A') && (c<='F')) return (c-'A'+10); + return 0; +} + +int ehex(char* s) { + return 16*ehexh(*s)+ehexh(*(s+1)); +} + +void unescapehttp(char* s, String* tempo) { + int i; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(tempo, '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + StringAddchar(tempo, (char) hc); + i++; // sauter 2 caractères finalement + } + else if (s[i]=='+') { + StringAddchar(tempo, ' '); + } + else + StringAddchar(tempo, s[i]); + } +} + +/* same, except + */ +void unescapeini(char* s, String* tempo) { + int i; + char lastc=0; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(tempo, lastc = '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + if (!is_retorsep(hc) || !is_retorsep(lastc)) { + StringAddchar(tempo, lastc = (char) hc); + } + i++; // sauter 2 caractères finalement + } + else + StringAddchar(tempo, lastc = s[i]); + } +} + diff --git a/src/htsserver.h b/src/htsserver.h new file mode 100644 index 0000000..2818b34 --- /dev/null +++ b/src/htsserver.h @@ -0,0 +1,149 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: Mini-server */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +// Fichier intercepteur d'URL .h + +#ifndef HTS_SERVER_DEFH +#define HTS_SERVER_DEFH + +#include "htsbasenet.h" + +// Fonctions +void socinput(T_SOC soc,char* s,int max); +T_SOC smallserver_init_std(int* port_prox,char* adr_prox); +T_SOC smallserver_init(int* port,char* adr); +int smallserver(T_SOC soc,char* url,char* method,char* data, char* path); + +#define CATCH_RESPONSE \ + "HTTP/1.0 200 OK\r\n"\ + "Content-type: text/html\r\n"\ + "\r\n"\ + "<!-- Generated by HTTrack Website Copier -->\r\n"\ + "<HTML><HEAD>\r\n"\ + "<TITLE>Link caught!</TITLE>\r\n"\ + "<SCRIPT LANGUAGE=\"Javascript\">\r\n"\ + "<!--\r\n"\ + "function back() {\r\n"\ + " history.go(-1);\r\n"\ + "}\r\n"\ + "// -->\r\n"\ + "</SCRIPT>\r\n"\ + "</HEAD>\r\n"\ + "<BODY>\r\n"\ + "<H2>Link captured into HTTrack Website Copier, you can now restore your proxy preferences!</H2>\r\n"\ + "<BR><BR>\r\n"\ + "<H3><A HREF=\"javascript:back();\">Clic here to go back</A></H3>\r\n"\ + "</BODY></HTML>"\ + "<!-- Generated by HTTrack Website Copier -->\r\n"\ + "\r\n"\ + + +/* String */ + +typedef struct { + char* buff; + int len; + int capa; +} String; + +#define STRING_EMPTY {NULL, 0, 0} +#define BLK_SIZE 8192 +#define StringBuff(blk) ((blk)->buff) +#define StringLength(blk) ((blk)->len) +#define StringCapacity(blk) ((blk)->capa) +#define StringClear(blk) do { \ + if ((blk)->capa > 0) { \ + (blk)->buff[0] = '\0'; \ + }\ + (blk)->len = 0; \ +} while(0) +#define StringFree(blk) do { if ((blk)->buff != NULL) { freet((blk)->buff); (blk)->buff = NULL; } } while(0) +#define StringMemcat(blk, str, size) do { \ + if ((blk)->len + (int)(size) + 1 > (blk)->capa) { \ + (blk)->capa = (blk)->len + (size) + BLK_SIZE; \ + (blk)->buff = (char*) realloct((blk)->buff, (blk)->capa); \ + assertf((blk)->buff != NULL); \ + } \ + if ((int)(size) > 0) { \ + memcpy((blk)->buff + (blk)->len, (str), (size)); \ + (blk)->len += (size); \ + } \ + *((blk)->buff + (blk)->len) = '\0'; \ +} while(0) +#define StringAddchar(blk, c) do { \ + char __c = (c); \ + StringMemcat(blk, &__c, 1); \ +} while(0) +static void* StringAcquire(String* blk) { + void* buff = blk->buff; + blk->buff = NULL; + blk->capa = 0; + blk->len = 0; + return buff; +} + +static void StringStrcat(String* blk, char* str) { + StringMemcat(blk, str, strlen(str)); +} + + +/* Language files */ +int htslang_load(char* limit_to, char* apppath); +void conv_printf(char* from,char* to); +void LANG_DELETE(void); +void LANG_INIT(char* path); +int LANG_T(char* path, int l); +int QLANG_T(int l); +char* LANGSEL(char* name); +char* LANGINTKEY(char* name); +int LANG_SEARCH(char* path, char* iso); +int LANG_LIST(char* path, char* buffer); + +int htslang_init(void); +int htslang_uninit(void); + +int linput_cpp(FILE* fp,char* s,int max); +void unescapehttp(char* s, String* tempo); +void unescapeini(char* s, String* tempo); + +int smallserver_setkey(char* key, char* value); +int smallserver_setkeyint(char* key, LLint value); +int smallserver_setkeyarr(char* key, int id, char* key2, char* value); + +#endif + + + diff --git a/src/htssystem.h b/src/htssystem.h index 989607d..6c4d216 100644 --- a/src/htssystem.h +++ b/src/htssystem.h @@ -1,15 +1 @@ -// Définition de la plate-forme utilisée - -// Sun Solaris .......... 0 -// Windows/95 ........... 1 -// Ibm 580 .............. 2 - -#define HTS_PLATFORM 1 - -// SHELL -#define HTS_ANALYSTE 2 - - -// Fin de la définition - - +/* (empty file) */ diff --git a/src/htssystem.h.windows9x b/src/htssystem.h.windows9x deleted file mode 100644 index 0689e0c..0000000 --- a/src/htssystem.h.windows9x +++ /dev/null @@ -1,11 +0,0 @@ -/* HTTrack, Offline Browser for Windows and Unix */
-
-/* HTTrack system definition for Windows */
-/* This should be the only file you have to change */
-
-/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */
-
-
-/* Fix plateform number to 1 (Windows) */
-/* If it doesn't compile, try another one */
-#define HTS_PLATEFORM 1
diff --git a/src/htsthread.c b/src/htsthread.c index 0a3bee6..d403730 100644 --- a/src/htsthread.c +++ b/src/htsthread.c @@ -68,7 +68,7 @@ unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_s [-1 check if locked (always return 0 with mutex)] -999 initialize */ -int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) { +HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) { #if HTS_WIN /* lock */ if (lock==1) diff --git a/src/htsthread.h b/src/htsthread.h index cb3a139..326c8cb 100644 --- a/src/htsthread.h +++ b/src/htsthread.h @@ -85,7 +85,7 @@ Please visit our Website: http://www.httrack.com #endif -int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock); +HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock); #if USE_PTHREAD unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist ); diff --git a/src/htstools.c b/src/htstools.c index 1eeafbf..44e5137 100644 --- a/src/htstools.c +++ b/src/htstools.c @@ -90,7 +90,7 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha ok=-2; // non supporté } #if HTS_USEOPENSSL - } else if (strfield(lien,"https://")) { + } else if (SSL_is_available && strfield(lien,"https://")) { // Note: ftp:foobar.gif is not valid if (ident_url_absolute(lien,adr,fil)==-1) { ok=-1; // erreur URL @@ -114,45 +114,56 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha /* patch scheme if necessary */ if (strfield(lien,"http:")) { lien+=5; - strcpy(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http) + strcpybuff(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http) } else if (strfield(lien,"https:")) { lien+=6; - strcpy(adr, "https://"); // même adresse forcée en https - strcat(adr, jump_protocol(origin_adr)); + strcpybuff(adr, "https://"); // même adresse forcée en https + strcatbuff(adr, jump_protocol(origin_adr)); } else if (strfield(lien,"ftp:")) { lien+=4; - strcpy(adr, "ftp://"); // même adresse forcée en ftp - strcat(adr, jump_protocol(origin_adr)); + strcpybuff(adr, "ftp://"); // même adresse forcée en ftp + strcatbuff(adr, jump_protocol(origin_adr)); } else { - strcpy(adr,origin_adr); // même adresse ; et même éventuel protocole + strcpybuff(adr,origin_adr); // même adresse ; et même éventuel protocole } - + if (*lien!='/') { // sinon c'est un lien absolu - a=strchr(origin_fil,'?'); - if (!a) a=origin_fil+strlen(origin_fil); - while((*a!='/') && ( a > origin_fil) ) a--; - if (*a=='/') { // ok on a un '/' - if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) { - // copier chemin - strncpy(fil,origin_fil,((int) (a - origin_fil))+1); - *(fil + ((int) (a - origin_fil))+1)='\0'; - - // copier chemin relatif - if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) { - strcat(fil,lien + ((*lien=='/')?1:0) ); - // simplifier url pour les ../ - fil_simplifie(fil); - } else - ok=-1; // erreur + if (*lien == '\0') { + strcpybuff(fil,origin_fil); + } else if (*lien == '?') { // example: a href="?page=2" + char* a; + strcpybuff(fil,origin_fil); + a=strchr(fil,'?'); + if (a) *a='\0'; + strcatbuff(fil,lien); + } else { + a=strchr(origin_fil,'?'); + if (a == NULL) a=origin_fil+strlen(origin_fil); + while((*a!='/') && ( a > origin_fil) ) a--; + if (*a=='/') { // ok on a un '/' + if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) { + // copier chemin + strncpy(fil,origin_fil,((int) (a - origin_fil))+1); + *(fil + ((int) (a - origin_fil))+1)='\0'; + + // copier chemin relatif + if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) { + strcatbuff(fil,lien + ((*lien=='/')?1:0) ); + // simplifier url pour les ../ + fil_simplifie(fil); + } else + ok=-1; // erreur + } else { // erreur + ok=-1; // erreur URL + } } else { // erreur ok=-1; // erreur URL } - } else { // erreur - ok=-1; // erreur URL } } else { // chemin absolu // copier chemin directement - strcat(fil,lien); + strcatbuff(fil,lien); + fil_simplifie(fil); } // *lien!='/' } else ok=-1; @@ -191,17 +202,17 @@ int lienrelatif(char* s,char* link,char* curr_fil) { // patch: éliminer les ? (paramètres) sinon bug if ( (a=strchr(curr_fil,'?')) ) { - strncat(newcurr_fil,curr_fil,(int) (a - curr_fil)); + strncatbuff(newcurr_fil,curr_fil,(int) (a - curr_fil)); curr_fil = newcurr_fil; } if ( (a=strchr(link,'?')) ) { - strncat(newlink,link,(int) (a - link)); + strncatbuff(newlink,link,(int) (a - link)); link = newlink; } // recopier uniquement le chemin courant curr=_curr; - strcpy(curr,curr_fil); + strcpybuff(curr,curr_fil); if ((a=strchr(curr,'?'))==NULL) // couper au ? (params) a=curr+strlen(curr)-1; // pas de params: aller à la fin while((*a!='/') && ( a> curr)) a--; // chercher dernier / du chemin courant @@ -234,13 +245,13 @@ int lienrelatif(char* s,char* link,char* curr_fil) { // LES ../ ONT ETE SIMPLIFIES a=curr; if (*a=='/') a++; - while(*a) if (*(a++)=='/') strcat(s,"../"); - //if (strlen(s)==0) strcat(s,"/"); + while(*a) if (*(a++)=='/') strcatbuff(s,"../"); + //if (strlen(s)==0) strcatbuff(s,"/"); - if (slash) strcat(s,"/"); // garder absolu!! + if (slash) strcatbuff(s,"/"); // garder absolu!! // on est dans le répertoire de départ, copier - strcat(s,link + ((*link=='/')?1:0) ); + strcatbuff(s,link + ((*link=='/')?1:0) ); /* Security check */ if (strlen(s) >= HTS_URLMAXSIZE) @@ -294,10 +305,10 @@ void long_to_83(int mode,char* n83,char* save) { fnl[i]='\0'; // conversion longfile_to_83(mode,fn83,fnl); - strcat(n83,fn83); + strcatbuff(n83,fn83); save+=i; - if (*save=='/') { strcat(n83,"/"); save++; } + if (*save=='/') { strcatbuff(n83,"/"); save++; } } } @@ -375,15 +386,15 @@ void longfile_to_83(int mode,char* n83,char* save) { } // corriger vers 8-3 n83[0]='\0'; - strncat(n83,nom,8); + strncatbuff(n83,nom,8); if (strnotempty(ext)) { - strcat(n83,"."); - strncat(n83,ext,3); + strcatbuff(n83,"."); + strncatbuff(n83,ext,3); } } // écrire backblue.gif -int verif_backblue(char* base) { +int verif_backblue(httrackp* opt,char* base) { int* done; int ret=0; NOSTATIC_RESERVE(done, int, 1); @@ -400,7 +411,7 @@ int verif_backblue(char* base) { if (fwrite(HTS_DATA_BACK_GIF,HTS_DATA_BACK_GIF_LEN,1,fp) != HTS_DATA_BACK_GIF_LEN) ret=1; fclose(fp); - usercommand(0,NULL,fconcat(base,"backblue.gif")); + usercommand(opt,0,NULL,fconcat(base,"backblue.gif"),"",""); } else ret=1; // @@ -409,7 +420,7 @@ int verif_backblue(char* base) { if (fwrite(HTS_DATA_FADE_GIF,HTS_DATA_FADE_GIF_LEN,1,fp) != HTS_DATA_FADE_GIF_LEN) ret=1; fclose(fp); - usercommand(0,NULL,fconcat(base,"fade.gif")); + usercommand(opt,0,NULL,fconcat(base,"fade.gif"),"",""); } else ret=1; } @@ -524,7 +535,7 @@ int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) { } -int hts_buildtopindex(char* path,char* binpath) { +HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { FILE* fpo; int retval=0; char rpath[1024*2]; @@ -537,7 +548,7 @@ int hts_buildtopindex(char* path,char* binpath) { if (toptemplate_header && toptemplate_body && toptemplate_footer) { - strcpy(rpath,path); + strcpybuff(rpath,path); if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') rpath[strlen(rpath)-1]='\0'; @@ -546,7 +557,7 @@ int hts_buildtopindex(char* path,char* binpath) { fpo=fopen(fconcat(rpath,"/index.html"),"wb"); if (fpo) { find_handle h; - verif_backblue(concat(rpath,"/")); // générer gif + verif_backblue(opt,concat(rpath,"/")); // générer gif // Header fprintf(fpo,toptemplate_header, "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->" @@ -560,10 +571,10 @@ int hts_buildtopindex(char* path,char* binpath) { do { if (hts_findisdir(h)) { char iname[HTS_URLMAXSIZE*2]; - strcpy(iname,rpath); - strcat(iname,"/"); - strcat(iname,hts_findgetname(h)); - strcat(iname,"/index.html"); + strcpybuff(iname,rpath); + strcatbuff(iname,"/"); + strcatbuff(iname,hts_findgetname(h)); + strcatbuff(iname,"/index.html"); if (fexist(iname)) { struct topindex_chain * oldchain=chain; chain=calloc(sizeof(struct topindex_chain), 1); @@ -575,7 +586,7 @@ int hts_buildtopindex(char* path,char* binpath) { oldchain->next=chain; } chain->next=NULL; - strcpy(chain->name, hts_findgetname(h)); + strcpybuff(chain->name, hts_findgetname(h)); } } @@ -587,7 +598,7 @@ int hts_buildtopindex(char* path,char* binpath) { chain=startchain; while(chain) { char hname[HTS_URLMAXSIZE*2]; - strcpy(hname,chain->name); + strcpybuff(hname,chain->name); escape_check_url(hname); fprintf(fpo,toptemplate_body, hname, @@ -639,7 +650,7 @@ if (h) { hts_findclose(h); } */ -find_handle hts_findfirst(char* path) { +HTSEXT_API find_handle hts_findfirst(char* path) { if (path) { if (strnotempty(path)) { find_handle_struct* find = (find_handle_struct*) calloc(1,sizeof(find_handle_struct)); @@ -648,22 +659,22 @@ find_handle hts_findfirst(char* path) { #if HTS_WIN { char rpath[1024*2]; - strcpy(rpath,path); + strcpybuff(rpath,path); if (rpath[0]) { if (rpath[strlen(rpath)-1]!='\\') - strcat(rpath,"\\"); + strcatbuff(rpath,"\\"); } - strcat(rpath,"*.*"); + strcatbuff(rpath,"*.*"); find->handle = FindFirstFile(rpath,&find->hdata); if (find->handle != INVALID_HANDLE_VALUE) return find; } #else - strcpy(find->path,path); + strcpybuff(find->path,path); { if (find->path[0]) { if (find->path[strlen(find->path)-1]!='/') - strcat(find->path,"/"); + strcatbuff(find->path,"/"); } } find->hdir=opendir(path); @@ -678,7 +689,8 @@ find_handle hts_findfirst(char* path) { } return NULL; } -int hts_findnext(find_handle find) { + +HTSEXT_API int hts_findnext(find_handle find) { if (find) { #if HTS_WIN if ( (FindNextFile(find->handle,&find->hdata))) @@ -693,7 +705,8 @@ int hts_findnext(find_handle find) { } return 0; } -int hts_findclose(find_handle find) { + +HTSEXT_API int hts_findclose(find_handle find) { if (find) { #if HTS_WIN if (find->handle) { @@ -710,7 +723,8 @@ int hts_findclose(find_handle find) { } return 0; } -char* hts_findgetname(find_handle find) { + +HTSEXT_API char* hts_findgetname(find_handle find) { if (find) { #if HTS_WIN return find->hdata.cFileName; @@ -721,7 +735,8 @@ char* hts_findgetname(find_handle find) { } return NULL; } -int hts_findgetsize(find_handle find) { + +HTSEXT_API int hts_findgetsize(find_handle find) { if (find) { #if HTS_WIN return find->hdata.nFileSizeLow; @@ -731,7 +746,8 @@ int hts_findgetsize(find_handle find) { } return -1; } -int hts_findisdir(find_handle find) { + +HTSEXT_API int hts_findisdir(find_handle find) { if (find) { if (!hts_findissystem(find)) { #if HTS_WIN @@ -745,7 +761,7 @@ int hts_findisdir(find_handle find) { } return 0; } -int hts_findisfile(find_handle find) { +HTSEXT_API int hts_findisfile(find_handle find) { if (find) { if (!hts_findissystem(find)) { #if HTS_WIN @@ -759,7 +775,7 @@ int hts_findisfile(find_handle find) { } return 0; } -int hts_findissystem(find_handle find) { +HTSEXT_API int hts_findissystem(find_handle find) { if (find) { #if HTS_WIN if (find->hdata.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_TEMPORARY)) diff --git a/src/htstools.h b/src/htstools.h index b3e2c7e..e3f7dd7 100644 --- a/src/htstools.h +++ b/src/htstools.h @@ -45,10 +45,12 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htscore.h" -#if HTS_WIN +#ifdef _WIN32 #else #include <dirent.h> +#ifdef HAVE_UNISTD_H #include <unistd.h> +#endif #include <sys/stat.h> #endif @@ -83,56 +85,47 @@ HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s); //HTS_INLINE int rech_tageq(const char* adr,const char* s); HTS_INLINE int rech_sampletag(const char* adr,const char* s); HTS_INLINE int check_tag(char* from,const char* tag); -int verif_backblue(char* base); +int verif_backblue(httrackp* opt,char* base); int verif_external(int nb,int test); int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type); -int hts_buildtopindex(char* path,char* binpath); - +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath); +#endif // Portable directory find functions -#if HTS_WIN - +#ifndef HTTRACK_DEFLIB +#ifdef _WIN32 typedef struct { WIN32_FIND_DATA hdata; HANDLE handle; } find_handle_struct; - - #else - typedef struct { DIR * hdir; struct dirent* dirp; struct stat filestat; char path[2048]; } find_handle_struct; - #endif - typedef find_handle_struct* find_handle; - typedef struct topindex_chain { char name[2048]; /* path */ struct topindex_chain* next; /* next element */ } topindex_chain ; - - // Directory find functions -find_handle hts_findfirst(char* path); -int hts_findnext(find_handle find); -int hts_findclose(find_handle find); +HTSEXT_API find_handle hts_findfirst(char* path); +HTSEXT_API int hts_findnext(find_handle find); +HTSEXT_API int hts_findclose(find_handle find); // -char* hts_findgetname(find_handle find); -int hts_findgetsize(find_handle find); -int hts_findisdir(find_handle find); -int hts_findisfile(find_handle find); -int hts_findissystem(find_handle find); - - - +HTSEXT_API char* hts_findgetname(find_handle find); +HTSEXT_API int hts_findgetsize(find_handle find); +HTSEXT_API int hts_findisdir(find_handle find); +HTSEXT_API int hts_findisfile(find_handle find); +HTSEXT_API int hts_findissystem(find_handle find); +#endif #endif diff --git a/src/htsweb.c b/src/htsweb.c new file mode 100644 index 0000000..a5e1902 --- /dev/null +++ b/src/htsweb.c @@ -0,0 +1,653 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: webhttrack.c routines */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <ctype.h> +#ifndef _WIN32 +#include <signal.h> +#endif +// htswrap_add +#include "htsglobal.h" +#include "htswrap.h" +#include "httrack-library.h" + +/* Threads */ +#include "htsthread.h" + +/* External modules */ +#include "htsinthash.c" +#include "htsmd5.c" +#include "md5.c" + +#include "htsserver.h" +#include "htsweb.h" + +#if USE_BEGINTHREAD==0 +#error fatal: no threads support +#endif + +#if HTS_WIN +#ifndef __cplusplus +// DOS +#include <process.h> /* _beginthread, _endthread */ +#endif +#else +#endif + +static PTHREAD_LOCK_TYPE refreshMutex; + +static int help_server(char* dest_path); +extern int commandRunning; +extern int commandEnd; +extern int commandReturn; +extern int commandEndRequested; +extern char* commandReturnMsg; +extern char* commandReturnCmdl; + +static void htsweb_sig_brpipe( int code ) { + /* ignore */ +} + +int main(int argc, char* argv[]) +{ + int i; + int ret = 0; + printf("Initialzing the server..\n"); + +#ifdef _WIN32 + { + WORD wVersionRequested; // requested version WinSock API + WSADATA wsadata; // Windows Sockets API data + int stat; + wVersionRequested = 0x0101; + stat = WSAStartup( wVersionRequested, &wsadata ); + if (stat != 0) { + fprintf(stderr, "Winsock not found!\n"); + return -1; + } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { + fprintf(stderr, "WINSOCK.DLL does not support version 1.1\n"); + WSACleanup(); + return -1; + } + } +#endif + + if (argc < 2 || (argc % 2) != 0) { + fprintf(stderr, "** Warning: use the webhttrack frontend if available\n"); + fprintf(stderr, "usage: %s <path-to-html-root-dir> [key value [key value]..]\n", argv[0]); + fprintf(stderr, "example: %s /usr/share/httrack\n", argv[0]); + return 1; + } + + /* init and launch */ + hts_init(); + htslang_init(); + webhttrack_lock(-999); + + /* set general keys */ +#ifdef HTS_ETCPATH + smallserver_setkey("ETCPATH", HTS_ETCPATH); +#endif +#ifdef HTS_BINPATH + smallserver_setkey("BINPATH", HTS_BINPATH); +#endif +#ifdef HTS_LIBPATH + smallserver_setkey("LIBPATH", HTS_LIBPATH); +#endif +#ifdef HTS_PREFIX + smallserver_setkey("PREFIX", HTS_PREFIX); +#endif +#ifdef HTS_HTTRACKCNF + smallserver_setkey("HTTRACKCNF", HTS_HTTRACKCNF); +#endif +#ifdef HTS_HTTRACKDIR + smallserver_setkey("HTTRACKDIR", HTS_HTTRACKDIR); +#endif +#ifdef HTS_INET6 + smallserver_setkey("INET6", "1"); +#endif +#ifdef HTS_USEOPENSSL + smallserver_setkey("USEOPENSSL", "1"); +#endif +#ifdef HTS_DLOPEN + smallserver_setkey("DLOPEN", "1"); +#endif +#ifdef HTS_USESWF + smallserver_setkey("USESWF", "1"); +#endif +#ifdef HTS_USEZLIB + smallserver_setkey("USEZLIB", "1"); +#endif +#ifdef _WIN32 + smallserver_setkey("WIN32", "1"); +#endif + smallserver_setkey("HTTRACK_VERSION", HTTRACK_VERSION); + smallserver_setkey("HTTRACK_VERSIONID", HTTRACK_VERSIONID); + smallserver_setkey("HTTRACK_AFF_VERSION", HTTRACK_AFF_VERSION); + { + char tmp[32]; + sprintf(tmp, "%d", HTS_PLATFORM); + smallserver_setkey("HTS_PLATFORM", tmp); + } + smallserver_setkey("HTTRACK_WEB", HTTRACK_WEB); + + /* protected session-id */ + { + char buff[1024]; + char digest[32 + 2]; + srand(time(NULL)); + sprintf(buff, "%d-%d", (int)time(NULL), (int)rand()); + domd5mem(buff,strlen(buff),digest,1); + smallserver_setkey("sid", digest); + smallserver_setkey("_sid", digest); + } + + /* set commandline keys */ + for(i = 2 ; i < argc ; i += 2) { + smallserver_setkey(argv[i], argv[i + 1]); + } + + /* sigpipe */ +#ifndef _WIN32 + signal( SIGPIPE , htsweb_sig_brpipe ); // broken pipe (write into non-opened socket) +#endif + + /* launch */ + ret = help_server(argv[1]); + + hts_uninit(); + +#ifdef _WIN32 + WSACleanup(); +#endif + + return ret; +} + +static int webhttrack_runmain(int argc, char** argv); +static PTHREAD_TYPE back_launch_cmd( void* pP ) { + char* cmd = (char*) pP; + char** argv = (char**) malloct(1024 * sizeof(char*)); + int argc = 0; + int i = 0; + int g = 0; + + /* copy commandline */ + if (commandReturnCmdl) + free(commandReturnCmdl); + commandReturnCmdl = strdup(cmd); + + /* split */ + argv[0]="webhttrack"; + argv[1]=cmd; + argc++; + i = 0; + while(cmd[i]) { + if (cmd[i] == '\t' || cmd[i] == '\r' || cmd[i] == '\n') { + cmd[i] = ' '; + } + i++; + } + i = 0; + while(cmd[i]) { + if(cmd[i]=='\"') g=!g; + if(cmd[i]==' ') { + if(!g){ + cmd[i]='\0'; + argv[argc++]=cmd+i+1; + } + } + i++; + } + + /* run */ + commandReturn = webhttrack_runmain(argc, argv); + if (commandReturn) { + if (commandReturnMsg) + free(commandReturnMsg); + commandReturnMsg = strdup(hts_errmsg()); + } + + /* okay */ + commandRunning = 0; + + /* finished */ + commandEnd = 1; + + /* free */ + free(cmd); + freet(argv); + return PTHREAD_RETURN; +} + +void webhttrack_main(char* cmd) { + commandRunning = 1; + _beginthread(back_launch_cmd, 0, (void*) strdup(cmd)); +} + +void webhttrack_lock(int lock) { + htsSetLock(&refreshMutex, lock); +} + +static int webhttrack_runmain(int argc, char** argv) { + hts_init(); + htswrap_add("init",htsshow_init); + htswrap_add("free",htsshow_uninit); + htswrap_add("start",htsshow_start); + htswrap_add("change-options",htsshow_chopt); + htswrap_add("end",htsshow_end); + htswrap_add("check-html",htsshow_checkhtml); + htswrap_add("loop",htsshow_loop); + htswrap_add("query",htsshow_query); + htswrap_add("query2",htsshow_query2); + htswrap_add("query3",htsshow_query3); + htswrap_add("check-link",htsshow_check); + htswrap_add("pause",htsshow_pause); + htswrap_add("save-file",htsshow_filesave); + htswrap_add("link-detected",htsshow_linkdetected); + htswrap_add("transfer-status",htsshow_xfrstatus); + htswrap_add("save-name",htsshow_savename); + hts_uninit(); + return hts_main(argc,argv); + +} + +static int help_server(char* dest_path) { + int returncode = 0; + char adr_prox[HTS_URLMAXSIZE*2]; + int port_prox; + T_SOC soc=smallserver_init_std(&port_prox,adr_prox); + if (soc!=INVALID_SOCKET) { + char url[HTS_URLMAXSIZE*2]; + char method[32]; + char data[32768]; + url[0]=method[0]=data[0]='\0'; + // + printf("Okay, temporary server installed.\nThe URL is:\n"); + printf("URL=http://%s:%d/\n", adr_prox, port_prox); +#ifndef _WIN32 + { + pid_t pid = getpid(); + printf("PID=%d\n", (int)pid); + } +#endif + fflush(stdout); + fflush(stderr); + // + if (!smallserver(soc,url,method,data,dest_path)) { + fprintf(stderr, "Unable to create the server\n"); +#ifdef _WIN32 + closesocket(soc); +#else + close(soc); +#endif + printf("Done\n"); + returncode = 1; + } else { + returncode = 0; + } + } else { + fprintf(stderr, "Unable to initialize a temporary server (no remaining port)\n"); + returncode = 1; + } + printf("EXITED\n"); + fflush(stdout); + fflush(stderr); + return returncode; +} + + +/* CALLBACK FUNCTIONS */ + +/* Initialize the Winsock */ +void __cdecl htsshow_init(void) { +} +void __cdecl htsshow_uninit(void) { +} +int __cdecl htsshow_start(httrackp* opt) { + return 1; +} +int __cdecl htsshow_chopt(httrackp* opt) { + return htsshow_start(opt); +} +int __cdecl htsshow_end(void) { + return 1; +} +int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { + return 1; +} +int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack + static TStamp prev_mytime=0; /* ok */ + static t_InpInfo SInfo; /* ok */ + // + TStamp mytime; + long int rate=0; + // + int stat_written=-1; + int stat_updated=-1; + int stat_errors=-1; + int stat_warnings=-1; + int stat_infos=-1; + int nbk=-1; + LLint nb=-1; + int stat_nsocket=-1; + LLint stat_bytes=-1; + LLint stat_bytes_recv=-1; + int irate=-1; + // + char st[256]; + + /* Exit now */ + if (commandEndRequested == 2) + return 0; + + /* Lock */ + webhttrack_lock(1); + + if (stats) { + stat_written=stats->stat_files; + stat_updated=stats->stat_updated_files; + stat_errors=stats->stat_errors; + stat_warnings=stats->stat_warnings; + stat_infos=stats->stat_infos; + nbk=stats->nbk; + stat_nsocket=stats->stat_nsocket; + irate=(int)stats->rate; + nb=stats->nb; + stat_bytes=stats->nb; + stat_bytes_recv=stats->HTS_TOTAL_RECV; + } + + mytime=mtime_local(); + if ((stat_time>0) && (stat_bytes_recv>0)) + rate=(int)(stat_bytes_recv/stat_time); + else + rate=0; // pas d'infos + + /* Infos */ + if (stat_bytes>=0) SInfo.stat_bytes=stat_bytes; // bytes + if (stat_time>=0) SInfo.stat_time=stat_time; // time + if (lien_tot>=0) SInfo.lien_tot=lien_tot; // nb liens + if (lien_n>=0) SInfo.lien_n=lien_n; // scanned + SInfo.stat_nsocket=stat_nsocket; // socks + if (rate>0) SInfo.rate=rate; // rate + if (irate>=0) SInfo.irate=irate; // irate + if (SInfo.irate<0) SInfo.irate=SInfo.rate; + if (nbk>=0) SInfo.stat_back=nbk; + if (stat_written>=0) SInfo.stat_written=stat_written; + if (stat_updated>=0) SInfo.stat_updated=stat_updated; + if (stat_errors>=0) SInfo.stat_errors=stat_errors; + if (stat_warnings>=0) SInfo.stat_warnings=stat_warnings; + if (stat_infos>=0) SInfo.stat_infos=stat_infos; + + + st[0]='\0'; + qsec2str(st,stat_time); + + /* Set keys */ + smallserver_setkeyint("info.stat_bytes", SInfo.stat_bytes); + smallserver_setkeyint("info.stat_time", SInfo.stat_time); + smallserver_setkeyint("info.lien_tot", SInfo.lien_tot); + smallserver_setkeyint("info.lien_n", SInfo.lien_n); + smallserver_setkeyint("info.stat_nsocket", SInfo.stat_nsocket); + smallserver_setkeyint("info.rate", SInfo.rate); + smallserver_setkeyint("info.irate", SInfo.irate); + smallserver_setkeyint("info.stat_back", SInfo.stat_back); + smallserver_setkeyint("info.stat_written", SInfo.stat_written); + smallserver_setkeyint("info.stat_updated", SInfo.stat_updated); + smallserver_setkeyint("info.stat_errors", SInfo.stat_errors); + smallserver_setkeyint("info.stat_warnings", SInfo.stat_warnings); + smallserver_setkeyint("info.stat_infos", SInfo.stat_infos); + /* */ + smallserver_setkey("info.stat_time_str", st); + + if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) { + prev_mytime=mytime; + + + // parcourir registre des liens + if (back_index>=0 && back_max > 0) { // seulement si index passé + int j,k; + int index=0; + int ok=0; // idem + int l; // idem + // + t_StatsBuffer StatsBuffer[NStatsBuffer]; + + { + int i; + for(i=0;i<NStatsBuffer;i++) { + strcpybuff(StatsBuffer[i].state,""); + strcpybuff(StatsBuffer[i].name,""); + strcpybuff(StatsBuffer[i].file,""); + strcpybuff(StatsBuffer[i].url_sav,""); + StatsBuffer[i].back=0; + StatsBuffer[i].size=0; + StatsBuffer[i].sizetot=0; + } + } + for(k=0;k<2;k++) { // 0: lien en cours 1: autres liens + for(j=0;(j<3) && (index<NStatsBuffer);j++) { // passe de priorité + int _i; + for(_i=0+k;(_i< max(back_max*k,1) ) && (index<NStatsBuffer);_i++) { // no lien + int i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel) + if (back[i].status>=0) { // signifie "lien actif" + // int ok=0; // OPTI + ok=0; + switch(j) { + case 0: // prioritaire + if ((back[i].status>0) && (back[i].status<99)) { + strcpybuff(StatsBuffer[index].state,"receive"); ok=1; + } + break; + case 1: + if (back[i].status==99) { + strcpybuff(StatsBuffer[index].state,"request"); ok=1; + } + else if (back[i].status==100) { + strcpybuff(StatsBuffer[index].state,"connect"); ok=1; + } + else if (back[i].status==101) { + strcpybuff(StatsBuffer[index].state,"search"); ok=1; + } + else if (back[i].status==1000) { // ohh le beau ftp + sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1; + } + break; + default: + if (back[i].status==0) { // prêt + if ((back[i].r.statuscode==200)) { + strcpybuff(StatsBuffer[index].state,"ready"); ok=1; + } + else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) { + char tempo[256]; tempo[0]='\0'; + infostatuscode(tempo,back[i].r.statuscode); + strcpybuff(StatsBuffer[index].state,tempo); ok=1; + } + else { + strcpybuff(StatsBuffer[index].state,"error"); ok=1; + } + } + break; + } + + if (ok) { + char s[HTS_URLMAXSIZE*2]; + // + StatsBuffer[index].back=i; // index pour + d'infos + // + s[0]='\0'; + strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel + if (strcmp(back[i].url_adr,"file://")) + strcatbuff(s,back[i].url_adr); + else + strcatbuff(s,"localhost"); + if (back[i].url_fil[0]!='/') + strcatbuff(s,"/"); + strcatbuff(s,back[i].url_fil); + + StatsBuffer[index].file[0]='\0'; + { + char* a=strrchr(s,'/'); + if (a) { + strncatbuff(StatsBuffer[index].file,a,200); + *a='\0'; + } + } + + if ((l=strlen(s))<MAX_LEN_INPROGRESS) + strcpybuff(StatsBuffer[index].name,s); + else { + // couper + StatsBuffer[index].name[0]='\0'; + strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2); + strcatbuff(StatsBuffer[index].name,"..."); + strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2); + } + + if (back[i].r.totalsize>0) { // taille prédéfinie + StatsBuffer[index].sizetot=back[i].r.totalsize; + StatsBuffer[index].size=back[i].r.size; + } else { // pas de taille prédéfinie + if (back[i].status==0) { // prêt + StatsBuffer[index].sizetot=back[i].r.size; + StatsBuffer[index].size=back[i].r.size; + } else { + StatsBuffer[index].sizetot=8192; + StatsBuffer[index].size=(back[i].r.size % 8192); + } + } + index++; + } + } + } + } + } + + /* Display current job */ + { + int parsing=0; + if (commandEndRequested) + smallserver_setkey("info.currentjob", "finishing pending transfers - Select [Cancel] to stop now!"); + else if (!(parsing=hts_is_parsing(-1))) + smallserver_setkey("info.currentjob", "receiving files"); + else { + char tmp[1024]; + tmp[0] = '\0'; + switch(hts_is_testing()) { + case 0: + sprintf(tmp, "parsing HTML file (%d%%)",parsing); + break; + case 1: + sprintf(tmp, "parsing HTML file: testing links (%d%%)",parsing); + break; + case 2: + sprintf(tmp, "purging files"); + break; + } + smallserver_setkey("info.currentjob", tmp); + } + } + + /* Display background jobs */ + { + int i; + for(i=0;i<NStatsBuffer;i++) { + if (strnotempty(StatsBuffer[i].state)) { + smallserver_setkeyarr("info.state[", i, "]", StatsBuffer[i].state); + smallserver_setkeyarr("info.name[", i, "]", StatsBuffer[i].name); + smallserver_setkeyarr("info.file[", i, "]", StatsBuffer[i].file); + smallserver_setkeyarr("info.size[", i, "]", int2bytes(StatsBuffer[i].size)); + smallserver_setkeyarr("info.sizetot[", i, "]", int2bytes(StatsBuffer[i].sizetot)); + smallserver_setkeyarr("info.url_adr[", i, "]", StatsBuffer[i].url_adr); + smallserver_setkeyarr("info.url_fil[", i, "]", StatsBuffer[i].url_fil); + smallserver_setkeyarr("info.url_sav[", i, "]", StatsBuffer[i].url_sav); + } + } + } + + + } + + } + + /* UnLock */ + webhttrack_lock(0); + + return 1; +} +char* __cdecl htsshow_query(char* question) { + static char s[]=""; /* ok */ + return s; +} +char* __cdecl htsshow_query2(char* question) { + static char s[]=""; /* ok */ + return s; +} +char* __cdecl htsshow_query3(char* question) { + static char s[]=""; /* ok */ + return s; +} +int __cdecl htsshow_check(char* adr,char* fil,int status) { + return -1; +} +void __cdecl htsshow_pause(char* lockfile) { +} +void __cdecl htsshow_filesave(char* file) { +} +int __cdecl htsshow_linkdetected(char* link) { + return 1; +} +int __cdecl htsshow_xfrstatus(lien_back* back) { + return 1; +} +int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { + return 1; +} + + diff --git a/src/htsweb.h b/src/htsweb.h new file mode 100644 index 0000000..272b363 --- /dev/null +++ b/src/htsweb.h @@ -0,0 +1,110 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) Xavier Roche and other contributors + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + + +Please visit our Website: http://www.httrack.com +*/ + + +/* ------------------------------------------------------------ */ +/* File: webhttrack.c routines */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +#ifndef WEBHTTRACK_WBC +#define WEBHTTRACK_WBC + +#include "htsglobal.h" +#include "htscore.h" + +#define NStatsBuffer 14 +#define MAX_LEN_INPROGRESS 40 + +typedef struct { + char name[1024]; + char file[1024]; + char state[256]; + char url_sav[HTS_URLMAXSIZE*2]; // pour cancel + char url_adr[HTS_URLMAXSIZE*2]; + char url_fil[HTS_URLMAXSIZE*2]; + LLint size; + LLint sizetot; + int offset; + // + int back; + // + int actived; // pour disabled +} t_StatsBuffer; + +typedef struct { + int ask_refresh; + int refresh; + LLint stat_bytes; + int stat_time; + int lien_n; + int lien_tot; + int stat_nsocket; + int rate; + int irate; + int ft; + LLint stat_written; + int stat_updated; + int stat_errors; + int stat_warnings; + int stat_infos; + TStamp stat_timestart; + int stat_back; +} t_InpInfo; + +// wrappers +void __cdecl htsshow_init(void); +void __cdecl htsshow_uninit(void); +int __cdecl htsshow_start(httrackp* opt); +int __cdecl htsshow_chopt(httrackp* opt); +int __cdecl htsshow_end(void); +int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); +int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); +char* __cdecl htsshow_query(char* question); +char* __cdecl htsshow_query2(char* question); +char* __cdecl htsshow_query3(char* question); +int __cdecl htsshow_check(char* adr,char* fil,int status); +void __cdecl htsshow_pause(char* lockfile); +void __cdecl htsshow_filesave(char* file); +int __cdecl htsshow_linkdetected(char* link); +int __cdecl htsshow_xfrstatus(lien_back* back); +int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); + + +int main(int argc, char **argv); +void webhttrack_main(char* cmd); +void webhttrack_lock(int lock); + +#ifndef _WIN32 +#define fconv(a) (a) +#define fconcat(a,b) concat(a,b) +#endif + +#endif diff --git a/src/htswizard.c b/src/htswizard.c index b23f5fb..e976ffd 100644 --- a/src/htswizard.c +++ b/src/htswizard.c @@ -55,17 +55,17 @@ Please visit our Website: http://www.httrack.com #define urlfil (liens[ptr]->fil) // libérer filters[0] pour insérer un élément dans filters[0] -#define HT_INSERT_FILTERS0 {\ +#define HT_INSERT_FILTERS0 do {\ int i;\ - if (*filptr > 0) {\ - for(i = (*filptr)-1 ; i>=0 ; i--) {\ - strcpy(filters[i+1],filters[i]);\ + if (*opt->filters.filptr > 0) {\ + for(i = (*opt->filters.filptr)-1 ; i>=0 ; i--) {\ + strcpybuff((*opt->filters.filters)[i+1],(*opt->filters.filters)[i]);\ }\ }\ - strcpy(filters[0],"");\ - (*filptr)++;\ - (*filptr)=minimum((*filptr),filter_max);\ -} + (*opt->filters.filters)[0][0]='\0';\ + (*opt->filters.filptr)++;\ + assertf((*opt->filters.filptr) < opt->maxfilter); \ +} while(0) @@ -91,22 +91,34 @@ retour: int hts_acceptlink(httrackp* opt, int ptr,int lien_tot,lien_url** liens, char* adr,char* fil, - char*** ptrfilters,int* filptr,int filter_max, - robots_wizard* robots, int* set_prio_to, int* just_test_it) { int forbidden_url=-1; int meme_adresse; - char** filters = *ptrfilters; +#define _FILTERS (*opt->filters.filters) +#define _FILTERS_PTR (opt->filters.filptr) +#define _ROBOTS ((robots_wizard*)opt->robotsptr) + int may_set_prio_to=0; - // -------------------- PHASE 1 -------------------- + // -------------------- PHASE 0 -------------------- /* Infos */ if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil); test_flush; } + + /* Already exists? Then, we know that we knew that this link had to be known */ + if (adr[0] != '\0' + && fil[0] != '\0' + && opt->hash != NULL + && hash_read((hash_struct*)opt->hash, adr, fil, 1, opt->urlhack) >= 0 + ) { + return 0; /* Yokai */ + } + + // -------------------- PHASE 1 -------------------- /* Doit-on traiter les non html? */ if ((opt->getmode & 2)==0) { // non on ne doit pas @@ -156,6 +168,7 @@ int hts_acceptlink(httrackp* opt, // problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*.. char tempo[HTS_URLMAXSIZE*2]; char tempo2[HTS_URLMAXSIZE*2]; + tempo[0] = tempo2[0] = '\0'; // note (up/down): on calcule à partir du lien primaire, ET du lien précédent. // ex: si on descend 2 fois on peut remonter 1 fois @@ -177,12 +190,14 @@ int hts_acceptlink(httrackp* opt, // (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés)) // NEW - if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) { - if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' - forbidden_url=0; - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); - test_flush; + if (tempo[0] != '\0' && tempo[1] != '\0') { + if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) { + if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' + forbidden_url=0; + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); + test_flush; + } } } } @@ -380,6 +395,7 @@ int hts_acceptlink(httrackp* opt, if (!ishtml(fil)) { // non html //printf("ok %s%s\n",ad,fil); forbidden_url=0; // autoriser + may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner if ((opt->debug>1) && (opt->log!=NULL)) { fspc(opt->log,"debug"); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil); test_flush; @@ -404,17 +420,17 @@ int hts_acceptlink(httrackp* opt, if (forbidden_url!=-1) question=0; // pas de question, résolu // former URL complète du lien actuel - strcpy(l,jump_identification(adr)); - if (*fil!='/') strcat(l,"/"); - strcat(l,fil); + strcpybuff(l,jump_identification(adr)); + if (*fil!='/') strcatbuff(l,"/"); + strcatbuff(l,fil); // full version (http://foo:bar@www.foo.com/bar.html) if (!link_has_authority(adr)) - strcpy(lfull,"http://"); + strcpybuff(lfull,"http://"); else lfull[0]='\0'; - strcat(lfull,adr); - if (*fil!='/') strcat(lfull,"/"); - strcat(lfull,fil); + strcatbuff(lfull,adr); + if (*fil!='/') strcatbuff(lfull,"/"); + strcatbuff(lfull,fil); // tester filters (URLs autorisées ou interdites explicitement) @@ -422,41 +438,80 @@ int hts_acceptlink(httrackp* opt, if (ptr==0) { // lien primaire, autoriser question=1; // la question sera résolue automatiquement forbidden_url=0; + may_set_prio_to=0; // clear may-set flag } else { - int jok; - // filters, 0=sait pas 1=ok -1=interdit - { - int jokDepth1=0,jokDepth2=0; - int jok1=0,jok2=0; - jok1 = fa_strjoker(filters,*filptr,lfull,NULL,NULL,&jokDepth1); - jok2 = fa_strjoker(filters,*filptr,l, NULL,NULL,&jokDepth2); - if (jok2 == 0) // #2 doesn't know - jok = jok1; // then, use #1 - else if (jok1 == 0) // #1 doesn't know - jok = jok2; // then, use #2 - else if (jokDepth1 >= jokDepth2) // #1 matching rule is "after" #2, then it is prioritary - jok = jok1; - else // #2 matching rule is "after" #1, then it is prioritary - jok = jok2; - } - - if (jok == 1) { // autorisé - filters_answer=1; // décision prise par les filtres - question=0; // ne pas poser de question, autorisé - forbidden_url=0; // URL autorisée - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized link: link %s at %s%s"LF,l,urladr,urlfil); - test_flush; + // eternal depth first + // vérifier récursivité extérieure + if (opt->extdepth>0) { + if ( /*question && */ (ptr>0) && (!force_mirror)) { + // well, this is kinda a hak + // we don't want to mirror EVERYTHING, and we have to decide where to stop + // there is no way yet to tag "external" links, and therefore links that are + // "weak" (authorized depth < external depth) are just not considered for external + // hack + if (liens[ptr]->depth > opt->extdepth) { + // *set_prio_to = opt->extdepth + 1; + *set_prio_to = 1 + (opt->extdepth); + may_set_prio_to=0; // clear may-set flag + forbidden_url=0; // autorisé + question=0; // résolution auto + if ((opt->debug>1) && (opt->log!=NULL)) { + if (question) { + fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil); + } else { + fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) forced to accept link (external depth): link %s at %s%s"LF,l,urladr,urlfil); + } + test_flush; + } + + } } - } else if (jok == -1) { - filters_answer=1; // décision prise par les filtres - question=0; // ne pas poser de question: - forbidden_url=1; // URL interdite - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden link: link %s at %s%s"LF,l,urladr,urlfil); - test_flush; + } + + // filters + { + int jok; + char* mdepth=""; + // filters, 0=sait pas 1=ok -1=interdit + { + int jokDepth1=0,jokDepth2=0; + int jok1=0,jok2=0; + jok1 = fa_strjoker(_FILTERS,*_FILTERS_PTR,lfull,NULL,NULL,&jokDepth1); + jok2 = fa_strjoker(_FILTERS,*_FILTERS_PTR,l, NULL,NULL,&jokDepth2); + if (jok2 == 0) { // #2 doesn't know + jok = jok1; // then, use #1 + mdepth = _FILTERS[jokDepth1]; + } else if (jok1 == 0) { // #1 doesn't know + jok = jok2; // then, use #2 + mdepth = _FILTERS[jokDepth2]; + } else if (jokDepth1 >= jokDepth2) { // #1 matching rule is "after" #2, then it is prioritary + jok = jok1; + mdepth = _FILTERS[jokDepth1]; + } else { // #2 matching rule is "after" #1, then it is prioritary + jok = jok2; + mdepth = _FILTERS[jokDepth2]; + } } - } // sinon on touche à rien + + if (jok == 1) { // autorisé + filters_answer=1; // décision prise par les filtres + question=0; // ne pas poser de question, autorisé + forbidden_url=0; // URL autorisée + may_set_prio_to=0; // clear may-set flag + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); + test_flush; + } + } else if (jok == -1) { // forbidden + filters_answer=1; // décision prise par les filtres + question=0; // ne pas poser de question: + forbidden_url=1; // URL interdite + if ((opt->debug>1) && (opt->log!=NULL)) { + fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); + test_flush; + } + } // sinon on touche à rien + } } // vérifier mode mirror links @@ -464,6 +519,7 @@ int hts_acceptlink(httrackp* opt, if (opt->mirror_first_page) { // mode mirror links if (liens[ptr]->precedent==0) { // parent=primary! forbidden_url=0; // autorisé + may_set_prio_to=0; // clear may-set flag question=1; // résolution auto force_mirror=5; // mirror (5) if ((opt->debug>1) && (opt->log!=NULL)) { @@ -473,20 +529,6 @@ int hts_acceptlink(httrackp* opt, } } } - - // vérifier récursivité extérieure - if ((question) && (ptr>0) && (!force_mirror)) { - if (opt->extdepth>0) { - // *set_prio_to = opt->extdepth + 1; - *set_prio_to = opt->extdepth + 1; - forbidden_url=0; // autorisé - question=0; // résolution auto - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil); - test_flush; - } - } - } // on doit poser la question.. peut on la poser? // (oui je sais quel preuve de délicatesse, merci merci) @@ -503,7 +545,7 @@ int hts_acceptlink(httrackp* opt, // vérifier robots.txt if (opt->robots) { - int r = checkrobots(robots,adr,fil); + int r = checkrobots(_ROBOTS,adr,fil); if (r == -1) { // interdiction #if DEBUG_ROBOTS printf("robots.txt forbidden: %s%s\n",adr,fil); @@ -578,9 +620,9 @@ int hts_acceptlink(httrackp* opt, { char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; - strcat(tempo,adr); - strcat(tempo,"/"); - strcat(tempo,fil); + strcatbuff(tempo,adr); + strcatbuff(tempo,"/"); + strcatbuff(tempo,fil); s=hts_htmlcheck_query3(tempo); } #else @@ -616,25 +658,7 @@ int hts_acceptlink(httrackp* opt, } while(n==-999); #endif io_flush; - } else { // lien primaire: autoriser répertoire entier - - /* sanity check */ - if ((*filptr) + 1 >= opt->maxfilter) { - opt->maxfilter += HTS_FILTERSINC; - if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) { - printf("PANIC! : Too many filters : >%d [%d]\n", (*filptr),__LINE__); - fflush(stdout); - if (opt->errlog) { - fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*filptr) ); - fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); - test_flush; - } - abort(); // wild.. - } - //opt->filters.filters=filters; - //*ptrfilters = filters; - } - + } else { // lien primaire: autoriser répertoire entier if (!force_mirror) { if ((opt->seeker & 1)==0) { // interdiction de descendre n=7; @@ -645,6 +669,22 @@ int hts_acceptlink(httrackp* opt, n=force_mirror; } + /* sanity check - reallocate filters HERE */ + if ((*_FILTERS_PTR) + 1 >= opt->maxfilter) { + opt->maxfilter += HTS_FILTERSINC; + if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) { + printf("PANIC! : Too many filters : >%d [%d]\n", (*_FILTERS_PTR),__LINE__); + fflush(stdout); + if (opt->errlog) { + fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*_FILTERS_PTR) ); + fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); + test_flush; + } + assertf("too many filters - giving up" == NULL); // wild.. + } + } + + // here we have enough room for a new filter if necessary switch(n) { case -1: // sauter tout le reste forbidden_url=1; @@ -653,10 +693,10 @@ int hts_acceptlink(httrackp* opt, case 0: // interdire les mêmes liens: adr/fil forbidden_url=1; HT_INSERT_FILTERS0; // insérer en 0 - strcpy(filters[0],"-"); - strcat(filters[0],jump_identification(adr)); - if (*fil!='/') strcat(filters[0],"/"); - strcat(filters[0],fil); + strcpybuff(_FILTERS[0],"-"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + if (*fil!='/') strcatbuff(_FILTERS[0],"/"); + strcatbuff(_FILTERS[0],fil); break; case 1: // éliminer répertoire entier et sous rép: adr/path/ * @@ -666,12 +706,13 @@ int hts_acceptlink(httrackp* opt, while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 - strcpy(filters[0],"-"); - strcat(filters[0],jump_identification(adr)); - if (*fil!='/') strcat(filters[0],"/"); - strncat(filters[0],fil,i); - if (filters[0][strlen(filters[0])-1]!='/') strcat(filters[0],"/"); - strcat(filters[0],"*"); + strcpybuff(_FILTERS[0],"-"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + if (*fil!='/') strcatbuff(_FILTERS[0],"/"); + strncatbuff(_FILTERS[0] ,fil,i); + if (_FILTERS[0][strlen(_FILTERS[0])-1]!='/') + strcatbuff(_FILTERS[0],"/"); + strcatbuff(_FILTERS[0],"*"); } } @@ -681,9 +722,9 @@ int hts_acceptlink(httrackp* opt, case 2: // adresse adr* forbidden_url=1; HT_INSERT_FILTERS0; // insérer en 0 - strcpy(filters[0],"-"); - strcat(filters[0],jump_identification(adr)); - strcat(filters[0],"*"); + strcpybuff(_FILTERS[0],"-"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + strcatbuff(_FILTERS[0],"*"); break; case 3: // ** A FAIRE @@ -703,10 +744,10 @@ int hts_acceptlink(httrackp* opt, case 4: // same link // PAS BESOIN!! /*HT_INSERT_FILTERS0; // insérer en 0 - strcpy(filters[0],"+"); - strcat(filters[0],adr); - if (*fil!='/') strcat(filters[0],"/"); - strcat(filters[0],fil);*/ + strcpybuff(_FILTERS[0],"+"); + strcatbuff(_FILTERS[0],adr); + if (*fil!='/') strcatbuff(_FILTERS[0],"/"); + strcatbuff(_FILTERS[0],fil);*/ // étant donné le renversement wizard/primary filter (les primary autorisent up/down ET interdisent) @@ -722,25 +763,25 @@ int hts_acceptlink(httrackp* opt, while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 - strcpy(filters[0],"+"); - strcat(filters[0],jump_identification(adr)); - if (*fil!='/') strcat(filters[0],"/"); - strncat(filters[0],fil,i+1); - strcat(filters[0],"*"); + strcpybuff(_FILTERS[0],"+"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + if (*fil!='/') strcatbuff(_FILTERS[0],"/"); + strncatbuff(_FILTERS[0],fil,i+1); + strcatbuff(_FILTERS[0],"*"); } } else { // autoriser domaine alors!! - HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+"); - strcpy(filters[0],"+"); - strcat(filters[0],jump_identification(adr)); - strcat(filters[0],"*"); + HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(filters[filptr],"+"); + strcpybuff(_FILTERS[0],"+"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + strcatbuff(_FILTERS[0],"*"); } break; case 6: // same domain - HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+"); - strcpy(filters[0],"+"); - strcat(filters[0],jump_identification(adr)); - strcat(filters[0],"*"); + HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(filters[filptr],"+"); + strcpybuff(_FILTERS[0],"+"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + strcatbuff(_FILTERS[0],"*"); break; // case 7: // autoriser ce répertoire @@ -749,11 +790,11 @@ int hts_acceptlink(httrackp* opt, while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 - strcpy(filters[0],"+"); - strcat(filters[0],jump_identification(adr)); - if (*fil!='/') strcat(filters[0],"/"); - strncat(filters[0],fil,i+1); - strcat(filters[0],"*[file]"); + strcpybuff(_FILTERS[0],"+"); + strcatbuff(_FILTERS[0],jump_identification(adr)); + if (*fil!='/') strcatbuff(_FILTERS[0],"/"); + strncatbuff(_FILTERS[0],fil,i+1); + strcatbuff(_FILTERS[0],"*[file]"); } } @@ -788,10 +829,19 @@ int hts_acceptlink(httrackp* opt, #if HTS_ANALYSTE { int test_url=hts_htmlcheck_check(adr,fil,forbidden_url); - if (test_url!=-1) + if (test_url!=-1) { forbidden_url=test_url; + may_set_prio_to=0; // clear may-set flag + } } -#endif +#endif + + // -------------------- FINAL PHASE -------------------- + // Test if the "Near" test won + if (may_set_prio_to && forbidden_url == 0) { + *set_prio_to = may_set_prio_to; + } + return forbidden_url; } @@ -808,17 +858,17 @@ int hts_testlinksize(httrackp* opt, int size_flag=0; // former URL complète du lien actuel - strcpy(l,jump_identification(adr)); - if (*fil!='/') strcat(l,"/"); - strcat(l,fil); + strcpybuff(l,jump_identification(adr)); + if (*fil!='/') strcatbuff(l,"/"); + strcatbuff(l,fil); // if (!link_has_authority(adr)) - strcpy(lfull,"http://"); + strcpybuff(lfull,"http://"); else lfull[0]='\0'; - strcat(lfull,adr); - if (*fil!='/') strcat(l,"/"); - strcat(lfull,fil); + strcatbuff(lfull,adr); + if (*fil!='/') strcatbuff(l,"/"); + strcatbuff(lfull,fil); // tester filtres (taille) // jok = fa_strjoker(opt->filters.filters,*opt->filters.filptr,l,&sz,&size_flag,NULL); diff --git a/src/htswizard.h b/src/htswizard.h index 28c5d2f..147c7b7 100644 --- a/src/htswizard.h +++ b/src/htswizard.h @@ -43,8 +43,6 @@ Please visit our Website: http://www.httrack.com int hts_acceptlink(httrackp* opt, int ptr,int lien_tot,lien_url** liens, char* adr,char* fil, - char*** filters,int* filptr,int filter_max, - robots_wizard* robots, int* set_prio_to_0, int* just_test_it); int hts_testlinksize(httrackp* opt, diff --git a/src/htswrap.c b/src/htswrap.c index 824af7e..28c4c71 100644 --- a/src/htswrap.c +++ b/src/htswrap.c @@ -37,30 +37,31 @@ Please visit our Website: http://www.httrack.com #include "htswrap.h" #include "htshash.h" +#include "htsinthash.h" // typedef long (__stdcall * XSHBFF_WndProc_type)(HWND ,UINT ,WPARAM ,LPARAM); inthash wrappers=NULL; -int htswrap_init(void) { +HTSEXT_API int htswrap_init(void) { if (!wrappers) wrappers=inthash_new(42); return inthash_created(wrappers); } -int htswrap_free(void) { +HTSEXT_API int htswrap_free(void) { inthash_delete(&wrappers); return 1; } -int htswrap_add(char* name,void* fct) { +HTSEXT_API int htswrap_add(char* name,void* fct) { if (!wrappers) htswrap_init(); inthash_write(wrappers,name,(unsigned long int)fct); return 1; } -unsigned long int htswrap_read(char* name) { +HTSEXT_API unsigned long int htswrap_read(char* name) { unsigned long int fct=0; if (!wrappers) htswrap_init(); diff --git a/src/htswrap.h b/src/htswrap.h index 03bf73f..b87bf11 100644 --- a/src/htswrap.h +++ b/src/htswrap.h @@ -35,14 +35,16 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ - - #ifndef HTSWRAP_DEFH #define HTSWRAP_DEFH -int htswrap_init(void); -int htswrap_add(char* name,void* fct); -int htswrap_free(void); -unsigned long int htswrap_read(char* name); +#include "htsglobal.h" + +#ifndef HTTRACK_DEFLIB +HTSEXT_API int htswrap_init(void); +HTSEXT_API int htswrap_add(char* name,void* fct); +HTSEXT_API int htswrap_free(void); +HTSEXT_API unsigned long int htswrap_read(char* name); +#endif #endif diff --git a/src/htszlib.c b/src/htszlib.c index d138a1c..faf4e88 100644 --- a/src/htszlib.c +++ b/src/htszlib.c @@ -42,23 +42,26 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htscore.h" -#if HTS_USEZLIB +#include "htszlib.h" +#if HTS_USEZLIB /* zlib */ +/* #include <zlib.h> #include "htszlib.h" +*/ /* Unpack file into a new file Return value: size of the new file, or -1 if an error occured */ int hts_zunpack(char* filename,char* newfile) { - if (filename && newfile) { + if (gz_is_available && filename && newfile) { if (filename[0] && newfile[0]) { gzFile gz = gzopen (filename, "rb"); if (gz) { FILE* fpout=fopen(fconv(newfile),"wb"); - int size=0; + INTsys size=0; if (fpout) { int nr; do { @@ -66,7 +69,7 @@ int hts_zunpack(char* filename,char* newfile) { nr=gzread (gz, buff, 1024); if (nr>0) { size+=nr; - if ((int)fwrite(buff,1,nr,fpout) != nr) + if ((INTsys)fwrite(buff,1,nr,fpout) != nr) nr=size=-1; } } while(nr>0); diff --git a/src/htszlib.h b/src/htszlib.h index 63310b8..173d966 100644 --- a/src/htszlib.h +++ b/src/htszlib.h @@ -43,6 +43,27 @@ Please visit our Website: http://www.httrack.com int hts_zunpack(char* filename,char* newfile); +#define gzopen hts_ptrfunc_gzopen +#define gzread hts_ptrfunc_gzread +#define gzclose hts_ptrfunc_gzclose + +#ifdef _WIN32 +#define ZEXPORT WINAPI +#else +#define ZEXPORT +#endif + +typedef void* voidp; +typedef voidp gzFile; +typedef gzFile (ZEXPORT *t_gzopen)(const char *path, const char *mode); +typedef int (ZEXPORT *t_gzread)(gzFile file, voidp buf, unsigned len); +typedef int (ZEXPORT *t_gzclose)(gzFile file); + +extern int gz_is_available; +extern t_gzopen gzopen; +extern t_gzread gzread; +extern t_gzclose gzclose; + #endif #endif diff --git a/src/httrack-library.h b/src/httrack-library.h index 13ecb46..aeea70f 100644 --- a/src/httrack-library.h +++ b/src/httrack-library.h @@ -42,9 +42,157 @@ Please visit our Website: http://www.httrack.com #include "htsopt.h" #include "htswrap.h" -int hts_init(void); -int hts_main(int argc, char **argv); +/* Main functions */ +HTSEXT_API int hts_init(void); +HTSEXT_API int hts_uninit(void); +HTSEXT_API int hts_main(int argc, char **argv); + +/* Wrapper functions */ +HTSEXT_API int htswrap_init(void); +HTSEXT_API int htswrap_add(char* name,void* fct); +HTSEXT_API int htswrap_free(void); +HTSEXT_API unsigned long int htswrap_read(char* name); +HTSEXT_API const char* hts_is_available(void); + +/* Other functions */ +HTSEXT_API int hts_resetvar(void); +HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath); + +/* Catch-URL */ +HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox); +HTSEXT_API T_SOC catch_url_init(int* port,char* adr); +HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data); + +/* State */ +HTSEXT_API int hts_is_parsing(int flag); +HTSEXT_API int hts_is_testing(void); +HTSEXT_API int hts_is_exiting(void); +HTSEXT_API int hts_setopt(httrackp* opt); +HTSEXT_API int hts_addurl(char** url); +HTSEXT_API int hts_resetaddurl(void); +HTSEXT_API int copy_htsopt(httrackp* from, httrackp* to); +HTSEXT_API char* hts_errmsg(void); +HTSEXT_API int hts_setpause(int); // pause transfer +HTSEXT_API int hts_request_stop(int force); +HTSEXT_API char* hts_cancel_file(char * s); +HTSEXT_API void hts_cancel_test(void); +HTSEXT_API void hts_cancel_parsing(void); +HTSEXT_API char* hts_cancel_file(char * s); +HTSEXT_API void hts_cancel_test(void); +HTSEXT_API void hts_cancel_parsing(void); + +/* Tools */ +HTSEXT_API int structcheck(char* s); +HTSEXT_API void infostatuscode(char* msg,int statuscode); +HTSEXT_API HTS_INLINE TStamp mtime_local(void); +HTSEXT_API void qsec2str(char *st,TStamp t); +HTSEXT_API char* int2char(int n); +HTSEXT_API char* int2bytes(LLint n); +HTSEXT_API char* int2bytessec(long int n); +HTSEXT_API char** int2bytes2(LLint n); +HTSEXT_API char* jump_identification(char*); +HTSEXT_API char* jump_normalized(char*); +HTSEXT_API char* jump_toport(char*); +HTSEXT_API char* fil_normalized(char* source, char* dest); +HTSEXT_API char* adr_normalized(char* source, char* dest); +HTSEXT_API char* hts_rootdir(char* file); + +/* Escaping URLs */ +HTSEXT_API void unescape_amp(char* s); +HTSEXT_API void escape_spc_url(char* s); +HTSEXT_API void escape_in_url(char* s); +HTSEXT_API void escape_uri(char* s); +HTSEXT_API void escape_uri_utf(char* s); +HTSEXT_API void escape_check_url(char* s); +HTSEXT_API char* escape_check_url_addr(char* s); +HTSEXT_API void x_escape_http(char* s,int mode); +HTSEXT_API char* unescape_http(char* s); +HTSEXT_API char* unescape_http_unharm(char* s, int no_high); +HTSEXT_API char* antislash_unescaped(char* s); +HTSEXT_API void escape_remove_control(char* s); + +/* Portable directory API */ + +typedef struct find_handle_struct find_handle_struct; +typedef find_handle_struct* find_handle; +typedef struct topindex_chain { + char name[2048]; /* path */ + struct topindex_chain* next; /* next element */ +} topindex_chain ; +HTSEXT_API find_handle hts_findfirst(char* path); +HTSEXT_API int hts_findnext(find_handle find); +HTSEXT_API int hts_findclose(find_handle find); +HTSEXT_API char* hts_findgetname(find_handle find); +HTSEXT_API int hts_findgetsize(find_handle find); +HTSEXT_API int hts_findisdir(find_handle find); +HTSEXT_API int hts_findisfile(find_handle find); +HTSEXT_API int hts_findissystem(find_handle find); + +/* Wrapper functions types (commented) : */ +/* +typedef void (* t_hts_htmlcheck_init)(void); +typedef void (* t_hts_htmlcheck_uninit)(void); +typedef int (* t_hts_htmlcheck_start)(httrackp* opt); +typedef int (* t_hts_htmlcheck_end)(void); +typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); +typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); +typedef char* (* t_hts_htmlcheck_query)(char* question); +typedef char* (* t_hts_htmlcheck_query2)(char* question); +typedef char* (* t_hts_htmlcheck_query3)(char* question); +typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); +typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); +typedef void (* t_hts_htmlcheck_pause)(char* lockfile); +typedef void (* t_hts_htmlcheck_filesave)(char* file); +typedef int (* t_hts_htmlcheck_linkdetected)(char* link); +typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); +typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); +typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); +typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); +*/ +/* Wrapper functions names : */ +/* + hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); +Log: "engine: init" + + hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); +Log: "engine: free" + + hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); +Log: "engine: start" + + hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); +Log: "engine: end" + + hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); +Log: "engine: change-options" + + hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); +Log: "check-html: <url>" + + hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); + hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); + hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); + hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); + hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); +Log: none + + hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); +Log: "pause: <lockfile>" + + hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); + hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); +Log: none + + hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); +Log: + "engine: transfer-status: link updated: <url> -> <file>" + | "engine: transfer-status: link added: <url> -> <file>" + | "engine: transfer-status: link recorded: <url> -> <file>" + | "engine: transfer-status: link link error (<errno>, '<err_msg>'): <url>" + hts_htmlcheck_savename = (t_hts_htmlcheck_savename ) htswrap_read("save-name"); +Log: + "engine: save-name: local name: <url> -> <file>" +*/ #endif - diff --git a/src/httrack.c b/src/httrack.c index 0289fca..c69a600 100644 --- a/src/httrack.c +++ b/src/httrack.c @@ -35,8 +35,7 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -#if HTS_WIN -#else +#ifndef _WIN32 #ifndef Sleep #define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); } #endif @@ -51,13 +50,22 @@ Please visit our Website: http://www.httrack.com #if HTS_ANALYSTE_CONSOLE /* specific definitions */ -#include "htsbase.h" +//#include "htsbase.h" #include <stdio.h> #include <stdlib.h> #include <string.h> +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif #include <ctype.h> #ifdef _WIN32 -#include "Winsock.h" +//#include "Winsock.h" #endif /* END specific definitions */ @@ -118,8 +126,8 @@ void vt_home(void) { static int use_show; - int main(int argc, char **argv) { + int ret = 0; hts_init(); /* @@ -183,7 +191,11 @@ Log: htswrap_add("transfer-status",htsshow_xfrstatus); htswrap_add("save-name",htsshow_savename); - return hts_main(argc,argv); + ret = hts_main(argc,argv); + if (ret) { + fprintf(stderr, "* %s\n", hts_errmsg()); + } + return ret; } @@ -224,7 +236,7 @@ int __cdecl htsshow_start(httrackp* opt) { return 1; } int __cdecl htsshow_chopt(httrackp* opt) { - return __cdecl htsshow_start(opt); + return htsshow_start(opt); } int __cdecl htsshow_end(void) { return 1; @@ -283,7 +295,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, if (rate>0) SInfo.rate=rate; // rate if (irate>=0) SInfo.irate=irate; // irate if (SInfo.irate<0) SInfo.irate=SInfo.rate; - if (SInfo.stat_back>=0) SInfo.stat_back=nbk; + if (nbk>=0) SInfo.stat_back=nbk; if (stat_written>=0) SInfo.stat_written=stat_written; if (stat_updated>=0) SInfo.stat_updated=stat_updated; if (stat_errors>=0) SInfo.stat_errors=stat_errors; @@ -369,10 +381,10 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, { int i; for(i=0;i<NStatsBuffer;i++) { - strcpy(StatsBuffer[i].state,""); - strcpy(StatsBuffer[i].name,""); - strcpy(StatsBuffer[i].file,""); - strcpy(StatsBuffer[i].url_sav,""); + strcpybuff(StatsBuffer[i].state,""); + strcpybuff(StatsBuffer[i].name,""); + strcpybuff(StatsBuffer[i].file,""); + strcpybuff(StatsBuffer[i].url_sav,""); StatsBuffer[i].back=0; StatsBuffer[i].size=0; StatsBuffer[i].sizetot=0; @@ -389,18 +401,18 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, switch(j) { case 0: // prioritaire if ((back[i].status>0) && (back[i].status<99)) { - strcpy(StatsBuffer[index].state,"receive"); ok=1; + strcpybuff(StatsBuffer[index].state,"receive"); ok=1; } break; case 1: if (back[i].status==99) { - strcpy(StatsBuffer[index].state,"request"); ok=1; + strcpybuff(StatsBuffer[index].state,"request"); ok=1; } else if (back[i].status==100) { - strcpy(StatsBuffer[index].state,"connect"); ok=1; + strcpybuff(StatsBuffer[index].state,"connect"); ok=1; } else if (back[i].status==101) { - strcpy(StatsBuffer[index].state,"search"); ok=1; + strcpybuff(StatsBuffer[index].state,"search"); ok=1; } else if (back[i].status==1000) { // ohh le beau ftp sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1; @@ -409,15 +421,15 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, default: if (back[i].status==0) { // prêt if ((back[i].r.statuscode==200)) { - strcpy(StatsBuffer[index].state,"ready"); ok=1; + strcpybuff(StatsBuffer[index].state,"ready"); ok=1; } else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) { char tempo[256]; tempo[0]='\0'; infostatuscode(tempo,back[i].r.statuscode); - strcpy(StatsBuffer[index].state,tempo); ok=1; + strcpybuff(StatsBuffer[index].state,tempo); ok=1; } else { - strcpy(StatsBuffer[index].state,"error"); ok=1; + strcpybuff(StatsBuffer[index].state,"error"); ok=1; } } break; @@ -429,32 +441,32 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, StatsBuffer[index].back=i; // index pour + d'infos // s[0]='\0'; - strcpy(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel + strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel if (strcmp(back[i].url_adr,"file://")) - strcat(s,back[i].url_adr); + strcatbuff(s,back[i].url_adr); else - strcat(s,"localhost"); + strcatbuff(s,"localhost"); if (back[i].url_fil[0]!='/') - strcat(s,"/"); - strcat(s,back[i].url_fil); + strcatbuff(s,"/"); + strcatbuff(s,back[i].url_fil); StatsBuffer[index].file[0]='\0'; { char* a=strrchr(s,'/'); if (a) { - strncat(StatsBuffer[index].file,a,200); + strncatbuff(StatsBuffer[index].file,a,200); *a='\0'; } } if ((l=strlen(s))<MAX_LEN_INPROGRESS) - strcpy(StatsBuffer[index].name,s); + strcpybuff(StatsBuffer[index].name,s); else { // couper StatsBuffer[index].name[0]='\0'; - strncat(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2); - strcat(StatsBuffer[index].name,"..."); - strcat(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2); + strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2); + strcatbuff(StatsBuffer[index].name,"..."); + strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2); } if (back[i].r.totalsize>0) { // taille prédéfinie @@ -568,4 +580,36 @@ int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer } +/* *** Various functions *** */ + + +int fexist(char* s) { + struct stat st; + memset(&st, 0, sizeof(st)); + if (stat(s, &st) == 0) { + if (S_ISREG(st.st_mode)) { + return 1; + } + } + return 0; +} + +int linput(FILE* fp,char* s,int max) { + int c; + int j=0; + do { + c=fgetc(fp); + if (c!=EOF) { + switch(c) { + case 13: break; // sauter CR + case 10: c=-1; break; + case 9: case 12: break; // sauter ces caractères + default: s[j++]=(char) c; break; + } + } + } while((c!=-1) && (c!=EOF) && (j<(max-1))); + s[j]='\0'; + return j; +} + #endif diff --git a/src/httrack.dsp b/src/httrack.dsp index 7fc08da..590f025 100644 --- a/src/httrack.dsp +++ b/src/httrack.dsp @@ -43,8 +43,8 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FR /YX /FD /c
-# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD CPP /nologo /MD /W3 /Gi /O2 /Op /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /I "C:\Dev\Winhttrack" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "HTS_ANALYSTE_CONSOLE" /YX /FD /Zm200 /c
+# SUBTRACT CPP /Fr
# ADD BASE RSC /l 0x40c /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -52,7 +52,7 @@ BSC32=bscmake.exe # ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /machine:I386 /out:"L:\HTTrack\httrack\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Releaselib"
# SUBTRACT LINK32 /verbose
!ELSEIF "$(CFG)" == "httrack - Win32 Debug"
@@ -69,7 +69,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /Gm /GR /GX /ZI /Od /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FAcs /Fr /YX /FD /c
+# ADD CPP /nologo /MDd /W3 /Gm /GR /ZI /Od /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /I "C:\Dev\Winhttrack" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "HTS_ANALYSTE_CONSOLE" /FAcs /Fr /FD /Zm200 /c
# ADD BASE RSC /l 0x40c /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@@ -77,8 +77,8 @@ BSC32=bscmake.exe # ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /map /debug /debugtype:both /machine:I386 /out:"c:\temp\test\httrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
-# SUBTRACT LINK32 /profile
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /debugtype:both /machine:I386 /out:"C:\temp\httrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Debuglib"
+# SUBTRACT LINK32 /profile /map
!ELSEIF "$(CFG)" == "httrack - Win32 Release avec debug"
@@ -96,8 +96,7 @@ LINK32=link.exe # PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# SUBTRACT BASE CPP /Ox /Oa /Ow /Og /Os
-# ADD CPP /nologo /MT /W3 /GX /Zi /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /FAcs /FR /YX /FD /c
-# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD CPP /nologo /MD /W3 /GX /Zi /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /I "C:\Dev\Winhttrack" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "HTS_ANALYSTE_CONSOLE" /FAcs /FR /YX /FD /Zm200 /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -106,7 +105,7 @@ BSC32=bscmake.exe LINK32=link.exe
# ADD BASE LINK32 wsock32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe"
# SUBTRACT BASE LINK32 /verbose
-# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /debug /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /machine:I386 /out:"L:\HTTrack\httrack\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
# SUBTRACT LINK32 /verbose
!ENDIF
@@ -118,207 +117,11 @@ LINK32=link.exe # Name "httrack - Win32 Release avec debug"
# Begin Source File
-SOURCE=.\htsalias.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsalias.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsback.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsback.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsbauth.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsbauth.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscache.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscache.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscatchurl.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscatchurl.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsconfig.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscore.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscore.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscoremain.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscoremain.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsdefines.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsfilters.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsfilters.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsftp.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsftp.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsglobal.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshash.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshash.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshelp.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshelp.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsindex.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsindex.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsjava.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsjava.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htslib.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htslib.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsmd5.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsmd5.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsname.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsname.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsnostatic.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsnostatic.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsrobots.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsrobots.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsthread.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsthread.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htstools.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htstools.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswizard.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswizard.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswrap.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswrap.h
-# End Source File
-# Begin Source File
-
SOURCE=.\httrack.c
# End Source File
# Begin Source File
SOURCE=.\httrack.h
# End Source File
-# Begin Source File
-
-SOURCE=.\md5.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\md5.h
-# End Source File
# End Target
# End Project
diff --git a/src/httrack.dsw b/src/httrack.dsw index 9aa199f..3cbb30f 100644 --- a/src/httrack.dsw +++ b/src/httrack.dsw @@ -11,6 +11,21 @@ Package=<5> Package=<4>
{{{
+ Begin Project Dependency
+ Project_Dep_Name libhttrack
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "libhttrack"=.\libhttrack\libhttrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
}}}
###############################################################################
diff --git a/src/httrack.h b/src/httrack.h index a8633de..f297e00 100644 --- a/src/httrack.h +++ b/src/httrack.h @@ -43,9 +43,9 @@ Please visit our Website: http://www.httrack.com #include "htscore.h" typedef struct { - char name[1000]; - char file[256]; - char state[20]; + char name[1024]; + char file[1024]; + char state[256]; char url_sav[HTS_URLMAXSIZE*2]; // pour cancel char url_adr[HTS_URLMAXSIZE*2]; char url_fil[HTS_URLMAXSIZE*2]; diff --git a/src/postinst-config.in b/src/postinst-config.in deleted file mode 100755 index f0edc72..0000000 --- a/src/postinst-config.in +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh - -# Config file location -cnfdir="__ETCPATH__" -cnf="__ETCPATH__/httrack.conf" - -if test "`id -u`" -eq 0; then - mkdir -p "$cnfdir" - if ! test -f "$cnf"; then - echo "creating $cnf (please modify it) .." - cat>"$cnf" << EOF -# HTTrack Website Copier Settings -# See httrack --help for more information - -# Examples: (to uncomment) - -# set proxy proxy.myisp.com:8080 -# retries=2 -# set max-size 10000000 -# set max-time 36000 -# set user-agent Mouzilla/17.0 (compatible; HTTrack; I) -# -# There are MUCH more options.. try 'httrack --quiet --help | more' - -# Deny and allow for links -# this will be used by default for all mirrors -allow *.gif -allow *.png -deny ad.doubleclick.net/* - -# Path and other options -# '~' in the *begining* means 'home dir' -# '#' at the *end* means "projectname" (that is, the first URL given) -# Example: '~/websites/#' will create /home/smith/websites/www.foo.com -# folder when launching 'httrack www.foo.com' -set path ~/websites/# - -EOF - fi - - if ! grep "set path" "$cnf" >/dev/null; then - echo "default path set to <home dir>/websites/<first_site_name>" - fi - - chown root:__ROOTGROUP__ "$cnf" - chmod 744 "$cnf" -else - cat << EOF - -You are not root, therefore $cnf configuration file hasn't been created -Re-run this sript ($0) as root if you want to do that - -EOF -fi - diff --git a/src/strip_cr.in b/src/strip_cr.in deleted file mode 100755 index 03af084..0000000 --- a/src/strip_cr.in +++ /dev/null @@ -1,32 +0,0 @@ -__PERL__ -# A simple script to convert DOS text files to -# Unix one. Useful to strip all CR on .c and .h -# sourcefiles. -# Usage: strip_cr <files> -foreach $fname (@ARGV) { - $ad=1; - if (open(FL,$fname)) { - if (open(FO,">".$fname.".tmp")) { - while(<FL>) { - s/\r\n$/\n/g; - print FO "$_"; - } - close(FL); - close(FO); - if ((-s $fname) != (-s $fname.".tmp")) { - print("Stripping ".$fname."..\n"); - rename($fname.".tmp",$fname); - } else { - unlink($fname.".tmp"); - } - } else { - print "Unable to open ".$fname.".tmp\n"; - } - } else { - print "Unable to open $fname\n"; - } -} -if (!$ad) { - print "Ensure that a text file has no lines ended with CR (DOS)\n"; - print "Usage: strip_cr <file>\n"; -} diff --git a/src/webhttrack b/src/webhttrack new file mode 100755 index 0000000..3b433bb --- /dev/null +++ b/src/webhttrack @@ -0,0 +1,101 @@ +#!/bin/bash +# +# WebHTTrack launcher script +# Initializes the htsserver GUI frontend and launch the default browser +BROWSEREXE= +SRCHBROWSEREXE="x-www-browser www-browser mozilla galeon konqueror opera netscape" +if test -n "${BROWSER}"; then +# sensible-browser will f up if BROWSER is not set +SRCHBROWSEREXE="sensible-browser ${SRCHBROWSEREXE}" +fi +SRCHPATH="/usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack ${HOME}/usr/bin ${HOME}/bin" +SRCHDISTPATH="/usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share ${HOME}/usr/local ${HOME}/usr/share" + +### +# And now some famous cuisine + +function log { +echo "$0($$): $@" >&2 +return 0 +} + +# First ensure that we can launch the server +BINPATH= +for i in ${SRCHPATH}; do + ! test -n "${BINPATH}" && test -x ${i}/htsserver && BINPATH=${i} +done +for i in ${SRCHDISTPATH}; do + ! test -n "${DISTPATH}" && test -f "${i}/httrack/lang.def" && DISTPATH="${i}/httrack" +done +test -n "${BINPATH}" || ! log "could not find htsserver" || exit 1 +test -n "${DISTPATH}" || ! log "could not find httrack directory" || exit 1 +test -f ${DISTPATH}/lang.def || ! log "could not find ${DISTPATH}/lang.def" || exit 1 +test -f ${DISTPATH}/lang.indexes || ! log "could not find ${DISTPATH}/lang.indexes" || exit 1 +test -d ${DISTPATH}/lang || ! log "could not find ${DISTPATH}/lang" || exit 1 +test -d ${DISTPATH}/html || ! log "could not find ${DISTPATH}/html" || exit 1 + +# Locale +HTSLANG="${LC_MESSAGES}" +! test -n "${HTSLANG}" && HTSLANG="${LC_ALL}" +! test -n "${HTSLANG}" && HTSLANG="${LANG}" +test -n "${HTSLANG}" && HTSLANG="`echo ${HTSLANG} | cut -c1-2` | tr 'A-Z' 'a-z'" +LANGN=`grep "${HTSLANG}:" ${DISTPATH}/lang.indexes | cut -f2 -d':'` +! test -n "${LANGN}" && LANGN=1 + +# Find the browser +# note: not all systems have sensible-browser or www-browser alternative +# thefeore, we have to find a bit more if sensible-browser could not be found +for i in ${SRCHBROWSEREXE}; do +for j in ${SRCHPATH}; do +if test -x ${j}/${i}; then +BROWSEREXE=${j}/${i} +fi +test -n "$BROWSEREXE" && break +done +test -n "$BROWSEREXE" && break +done +test -n "$BROWSEREXE" || ! log "cound not find any suitable browser" || exit 1 + +# "browse" command +if test "$1" = "browse"; then +${BROWSEREXE} "file://${HOME}/websites/index.html" +exit $? +fi + +# Create a temporary filename +TMPSRVFILE="/tmp/.webhttrack.$$.`/usr/bin/head -c16 /dev/random | /usr/bin/md5sum | /usr/bin/cut -f1 -d' '`" +>${TMPSRVFILE} || ! log "cound not create the temporary file ${TMPSRVFILE}" || exit 1 +# Launch htsserver binary and setup the server +(${BINPATH}/htsserver "${DISTPATH}/" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}& +# Find the generated SRVURL +SRVURL= +MAXCOUNT=60 +while ! test -n "$SRVURL"; do +MAXCOUNT=$[$MAXCOUNT - 1] +test $MAXCOUNT -gt 0 || exit 1 +test $MAXCOUNT -lt 50 && echo "waiting for server to reply.." +SRVURL=`/bin/grep -E URL= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=` +test ! "$SRVURL" = "error" || ! log "could not spawn htsserver" || exit 1 +test -n "$SRVURL" || /bin/sleep 1 +done + +# Cleanup function +function cleanup { +test -n "$1" && log "nasty signal caught, cleaning up.." +test -f ${TMPSRVFILE} && SRVPID=`/bin/grep -E PID= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=` +test -n "${SRVPID}" && kill -9 ${SRVPID} +test -f ${TMPSRVFILE} && rm ${TMPSRVFILE} +test -n "$1" && log "..done" +return 0 +} + +# Cleanup in case of emergency +trap "cleanup now; exit" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25 + +# Got SRVURL, launch browser +${BROWSEREXE} "${SRVURL}" + +# That's all, folks! +trap "" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25 +cleanup +exit 0 diff --git a/src/webhttrack.dsp b/src/webhttrack.dsp new file mode 100755 index 0000000..a5940e8 --- /dev/null +++ b/src/webhttrack.dsp @@ -0,0 +1,120 @@ +# Microsoft Developer Studio Project File - Name="webhttrack" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=webhttrack - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "webhttrack.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "webhttrack.mak" CFG="webhttrack - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "webhttrack - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "webhttrack - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "webhttrack - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /O2 /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# SUBTRACT CPP /YX /Yc /Yu
+# ADD BASE RSC /l 0x40c /d "NDEBUG"
+# ADD RSC /l 0x40c /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /pdb:none /machine:I386 /force /out:"L:\HTTrack\httrack\webhttrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Releaselib"
+
+!ELSEIF "$(CFG)" == "webhttrack - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /Gm /GR /GX /ZI /Od /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FAcs /FR /FD /GZ /c
+# SUBTRACT CPP /YX /Yc /Yu
+# ADD BASE RSC /l 0x40c /d "_DEBUG"
+# ADD RSC /l 0x40c /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /machine:I386 /out:"C:\temp\webhttrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Releaselib"
+
+!ENDIF
+
+# Begin Target
+
+# Name "webhttrack - Win32 Release"
+# Name "webhttrack - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\htsserver.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsweb.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\htsserver.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsweb.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# Begin Source File
+
+SOURCE=.\ReadMe.txt
+# End Source File
+# End Target
+# End Project
diff --git a/src/webhttrack.dsw b/src/webhttrack.dsw new file mode 100755 index 0000000..ea9ea0b --- /dev/null +++ b/src/webhttrack.dsw @@ -0,0 +1,44 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "libhttrack"=..\libhttrack\libhttrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Project: "webhttrack"=.\webhttrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+ Begin Project Dependency
+ Project_Dep_Name libhttrack
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
|