summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile10
-rw-r--r--src/Makefile.in417
-rwxr-xr-xsrc/configure603
-rw-r--r--src/gpl.txt287
-rw-r--r--src/hts-indextmpl.h924
-rw-r--r--src/htsalias.c520
-rw-r--r--src/htsalias.h58
-rw-r--r--src/htsback.c2462
-rw-r--r--src/htsback.h75
-rw-r--r--src/htsbase.h136
-rw-r--r--src/htsbasenet.h86
-rw-r--r--src/htsbauth.c401
-rw-r--r--src/htsbauth.h74
-rw-r--r--src/htscache.c881
-rw-r--r--src/htscache.h64
-rw-r--r--src/htscatchurl.c296
-rw-r--r--src/htscatchurl.h76
-rw-r--r--src/htsconfig.h133
-rw-r--r--src/htscore.c4158
-rw-r--r--src/htscore.h363
-rw-r--r--src/htscoremain.c2001
-rw-r--r--src/htscoremain.h62
-rw-r--r--src/htsdefines.h100
-rw-r--r--src/htsfilters.c316
-rw-r--r--src/htsfilters.h49
-rw-r--r--src/htsftp.c1135
-rw-r--r--src/htsftp.h68
-rw-r--r--src/htsglobal.h332
-rw-r--r--src/htshash.c453
-rw-r--r--src/htshash.h104
-rw-r--r--src/htshelp.c622
-rw-r--r--src/htshelp.h53
-rw-r--r--src/htsindex.c483
-rw-r--r--src/htsindex.h48
-rw-r--r--src/htsjava.c395
-rw-r--r--src/htsjava.h69
-rw-r--r--src/htslib.c4279
-rw-r--r--src/htslib.h339
-rw-r--r--src/htsmd5.c76
-rw-r--r--src/htsmd5.h52
-rw-r--r--src/htsname.c1266
-rw-r--r--src/htsname.h50
-rw-r--r--src/htsnet.h242
-rw-r--r--src/htsnostatic.c260
-rw-r--r--src/htsnostatic.h223
-rw-r--r--src/htsopt.h186
-rw-r--r--src/htsparse.c2377
-rw-r--r--src/htsrobots.c118
-rw-r--r--src/htsrobots.h56
-rw-r--r--src/htssystem.h15
-rw-r--r--src/htssystem.h.windows9x11
-rw-r--r--src/htsthread.c97
-rw-r--r--src/htsthread.h95
-rw-r--r--src/htstools.c785
-rw-r--r--src/htstools.h138
-rw-r--r--src/htswizard.c880
-rw-r--r--src/htswizard.h53
-rw-r--r--src/htswrap.c69
-rw-r--r--src/htswrap.h48
-rw-r--r--src/htszlib.c84
-rw-r--r--src/htszlib.h49
-rw-r--r--src/httrack-library.h50
-rw-r--r--src/httrack.c571
-rw-r--r--src/httrack.dsp324
-rw-r--r--src/httrack.dsw29
-rw-r--r--src/httrack.h107
-rw-r--r--src/md5.c271
-rw-r--r--src/md5.h36
-rwxr-xr-xsrc/postinst-config.in55
-rwxr-xr-xsrc/strip_cr.in32
70 files changed, 31637 insertions, 0 deletions
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..be898d9
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,10 @@
+all :
+ @echo "please launch configure before! :"
+ @echo "./configure"
+ @echo ""
+ @echo "then, you can launch:"
+ @echo "make"
+ @echo "make install"
+ @echo ""
+ @echo "(see INSTALL file to know how-to-install)"
+
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..57bf1d3
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,417 @@
+# HTTrack Website Copier, Offline Browser for Windows and Unix
+# Copyright (C) Xavier Roche and other contributors
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Version: 1.3 (09/2001)
+# Usage: just type 'make'
+
+# Generated by configure
+AUTONAME =
+#__AUTONAME__
+
+
+### Makefile commands
+SHELL = /bin/sh
+MAIN =
+CC = gcc
+CFLAGS = __CFLAGS__
+# threads
+LFLAGS = __LFLAGS__ __LFLAGS2__ __LFLAGS3__
+
+
+BINARIES = htscore.o htsback.o htscache.o\
+ htscatchurl.o htsfilters.o htsftp.o htshash.o\
+ htshelp.o htsjava.o htslib.o htscoremain.o\
+ htsname.o htsrobots.o htstools.o htswizard.o\
+ htsalias.o htsthread.o htsindex.o\
+ htsbauth.o htsmd5.o\
+ htswrap.o md5.o htszlib.o\
+ htsnostatic.o \
+ httrack.o
+
+SOBINARIES = htscore.o htsback.o htscache.o\
+ htscatchurl.o htsfilters.o htsftp.o htshash.o\
+ htshelp.o htsjava.o htslib.o htscoremain.o\
+ htsname.o htsrobots.o htstools.o htswizard.o\
+ htsalias.o htsthread.o htsindex.o\
+ htsbauth.o htsmd5.o htszlib.o\
+ htsnostatic.o \
+ htswrap.o md5.o
+
+MAINBIN = httrack.c
+
+BOUTPUT = httrack
+BOUTPUTSO = libhttrack.so
+DOCS = ../HelpHtml ../templates ../httrack-doc.html ../COPYING ../INSTALL ../README ../*.txt
+HTSSYSTEM = htssystem.h
+BINPATH = __BINPATH__
+ETCPATH = __ETCPATH__
+LIBPATH = __LIBPATH__
+PREFIX = __PREFIX__
+
+## Defines for "library" (program compiled with HTTrack)
+BINARIES_LIB = example.o httracklib.o\
+ src/htscore.o src/htsback.o src/htscache.o\
+ src/htscatchurl.o src/htsfilters.o src/htsftp.o src/htshash.o\
+ src/htshelp.o src/htsjava.o src/htslib.o src/htscoremain.o\
+ src/htsname.o src/htsrobots.o src/htstools.o src/htswizard.o\
+ src/htsalias.o src/htsthread.o src/htsindex.o\
+ src/htsbauth.o src/htsmd5.o htszlib.o\
+ src/htsshow.o src/htswrap.o\
+ src/htsnostatic.o \
+ src/md5.o
+BOUTPUT_LIB = example
+HTSSYSTEM_LIB = src/htssystem.h
+
+# in_addr_t problems :
+# In case of problems during compiling,
+# make htss
+# edit htssystem.h and add the following line:
+# #define HTS_DO_NOT_REDEFINE_in_addr_t
+# make manual
+
+
+# Keywords for build types (example: make linux)
+MAKE_LINUX = linux
+MAKE_NETBSD = netbsd
+MAKE_OPENBSD = openbsd
+MAKE_SOLARIS = solaris
+MAKE_AIX = aix
+MAKE_STD = standard
+MAKE_STD2 = standard2
+MAKE_STD3 = standard3
+MAKE_STD4 = standard4
+###
+MAKE_LIB = build_httracklib
+###
+MAKE_AUTO = auto
+###
+
+# First, detect OS Type
+# If your make does not recognize this, change it!
+SHORTUNAME = $(shell uname)
+FULLUNAME = $(shell uname -a)
+
+### Targets:
+
+# Default target, attempt to use uname if necessary
+# if uname is empty, display info message
+all :
+ @if test -n "$(AUTONAME)"; then\
+ $(MAKE) $(AUTONAME);\
+ elif test -n "$(SHORTUNAME)"; then\
+ $(MAKE) $(SHORTUNAME);\
+ else\
+ $(MAKE) help;\
+ fi
+
+# If we can not detect the OS Type, show a message info
+help :
+ @clear
+ @echo ""
+ @echo "Welcome to HTTrack Website Copier install!"
+ @echo "-----------------------------------------"
+ @echo ""
+ @echo "1. To make HTTrack, just type in:"
+ @echo " make $(MAKE_LINUX)"
+ @echo " or"
+ @echo " make $(MAKE_NETBSD)"
+ @echo " or"
+ @echo " make $(MAKE_OPENBSD)"
+ @echo " or"
+ @echo " make $(MAKE_SOLARIS)"
+ @echo " or"
+ @echo " make $(MAKE_AIX)"
+ @echo " or"
+ @echo " make $(MAKE_STD)"
+ @echo " or (problems with in_addr_t)"
+ @echo " make $(MAKE_STD2)"
+ @echo " or (problems with 64-bit)"
+ @echo " make $(MAKE_STD3)"
+ @echo " or (problems with both in_addr_t and 64-bit)"
+ @echo " make $(MAKE_STD4)"
+ @echo ""
+ @echo "According to your OS type"
+ @echo "(example: type in 'make $(MAKE_LINUX)' if you compile HTTrack with linux)"
+ @echo
+ @echo "Or, if it does not work, you can try "
+ @echo " make htss"
+ @echo " edit htssystem.h (check OS type), and add the following line:"
+ @echo " #define HTS_DO_NOT_REDEFINE_in_addr_t"
+ @echo " make manual"
+ @echo
+ @echo "2. Then, type in 'make install' to copy httrack to $(BINPATH)"
+ @echo " or just use ./httrack to launch the program"
+ @echo ""
+ @echo "3. Build problems, type in:"
+ @echo " make moreinfo"
+ @echo ""
+ @echo "Have fun with HTTrack Website Copier!"
+ @echo ""
+info : help
+
+# Troubleshooter
+moreinfo :
+ @echo "Known problems:"
+ @echo ""
+ @echo "\`in_addr_t' undeclared (first use this function)"
+ @echo "see in_addr_t problems in Makefile"
+ @echo ""
+
+###
+
+## Build Targets (this is the name given by 'uname')
+Linux : $(MAKE_LINUX)
+SunOS : $(MAKE_SOLARIS)
+AIX : $(MAKE_AIX)
+NetBSD : $(MAKE_NETBSD)
+OpenBSD : $(MAKE_OPENBSD)
+
+### Build Targets (standard types)
+default : firstinfo htssystem htssystem_default build_default strip clean lastinfo
+$(MAKE_LINUX) : firstinfo htssystem htssystem_linux build_default strip clean lastinfo
+$(MAKE_SOLARIS) : firstinfo htssystem htssystem_solaris build_solaris strip clean lastinfo
+$(MAKE_AIX) : firstinfo htssystem htssystem_aix build_default strip clean lastinfo
+$(MAKE_NETBSD) : firstinfo htssystem htssystem_netbsd build_default strip clean lastinfo
+$(MAKE_OPENBSD) : firstinfo htssystem htssystem_openbsd build_nopthread strip clean lastinfo
+$(MAKE_STD) : firstinfo htssystem htssystem_default build_default strip clean lastinfo
+$(MAKE_STD2) : firstinfo htssystem htssystem_default2 build_default strip clean lastinfo
+$(MAKE_STD3) : firstinfo htssystem htssystem_default3 build_default strip clean lastinfo
+$(MAKE_STD4) : firstinfo htssystem htssystem_default4 build_default strip clean lastinfo
+### Defines for "library" (program compiled with HTTrack)
+$(MAKE_LIB) : build_lib strip_lib clean_lib lastinfo
+###
+$(MAKE_AUTO) : __MAKEAUTO__
+###
+
+## Defines for OSes
+lib_default : htssystem htssystem_default addhtssystem_lib info_lib
+lib_linux : htssystem htssystem_linux addhtssystem_lib info_lib
+lib_solaris : htssystem htssystem_solaris addhtssystem_lib info_lib
+lib_aix : htssystem htssystem_aix addhtssystem_lib info_lib
+lib_netbsd : htssystem htssystem_netbsd addhtssystem_lib info_lib
+lib_openbsd : htssystem htssystem_openbsd addhtssystem_lib info_lib
+lib_std : htssystem htssystem_default addhtssystem_lib info_lib
+
+
+# manual build
+htss : htssystem htssystem_default
+manual : build_default strip clean lastinfo
+
+# Creates htssystem.h file
+htssystem :
+ @echo "/* HTTrack, Offline Browser for Windows and Unix */" > $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "/* HTTrack system definition */" >> $(HTSSYSTEM)
+ @echo "/* This should be the only file you have to change */" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+ @echo "/* Fix plateform number to 0 (SunOS) */" >> $(HTSSYSTEM)
+ @echo "/* If it doesn't compile, try another one */" >> $(HTSSYSTEM)
+
+htssystem_solaris :
+ @echo "#define HTS_PLATFORM 0" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_aix :
+ @echo "#define HTS_PLATFORM 2" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_linux :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_netbsd:
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_openbsd:
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_USE_UID" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_nopthread:
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default2 :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default3 :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+htssystem_default4 :
+ @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
+ @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+# Generated by configure
+htssystem_auto :
+ @echo "#define HTS_PLATFORM __PLATFORM__" >> $(HTSSYSTEM)
+ @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEUID__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEINA__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEPTH__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINE64B__" >> $(HTSSYSTEM)
+ @echo "#define __DEFINEFTI__" >> $(HTSSYSTEM)
+ @echo "#define HTS_PREFIX \"__DEFINEPRE__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_BINPATH \"__BINPATH__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_ETCPATH \"__ETCPATH__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_LIBPATH \"__LIBPATH__\"" >> $(HTSSYSTEM)
+ @echo "#define HTS_USEZLIB __ZLIB__" >> $(HTSSYSTEM)
+ @echo "#define HTS_ALIGN __PTRALIGN__" >> $(HTSSYSTEM)
+ @echo "#define HTS_INET6 __INET6__" >> $(HTSSYSTEM)
+ @echo "#define HTS_USEOPENSSL __SSL__" >> $(HTSSYSTEM)
+
+## Defines for "library" (program compiled with HTTrack)
+addhtssystem_lib :
+ @echo "/* Extended functions */" >> $(HTSSYSTEM)
+ @echo "#define HTS_ANALYSTE 2" >> $(HTSSYSTEM)
+ @echo "" >> $(HTSSYSTEM)
+
+# Info message before build
+firstinfo :
+ @echo "Building all, please wait"
+ @echo "In case of problems, type in:"
+ @echo "make help"
+ @echo ""
+ @echo "OS TYPE: $(SHORTUNAME)"
+ @echo "Make mode: $(MAKECMDGOALS)"
+ @echo
+
+##
+info_lib :
+ @echo "Please copy htssystem.h to src/htssystem.h by typing:"
+ @echo "cp htssystem.h src/htssystem.h"
+
+### Targets for compiling
+build_solaris : $(BINARIES)
+ $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS) -lnsl -lsocket
+ chmod 755 $(BOUTPUT)
+
+build_nopthread: $(BINARIES)
+ $(CC) $(NOPCFLAGS) $(BINARIES) -o $(BOUTPUT) $(NOPLFLAGS)
+ chmod 755 $(BOUTPUT)
+
+build_default : $(BINARIES)
+ $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS)
+ chmod 755 $(BOUTPUT)
+
+## Defines for "library" (program compiled with HTTrack)
+build_lib : $(BINARIES_LIB)
+ $(CC) $(CFLAGS) $(BINARIES_LIB) -o $(BOUTPUT_LIB) $(LFLAGS)
+ chmod 644 $(BOUTPUT_LIB)
+
+## Auto
+build_auto : build_bin__DYNAMIC__
+
+build_bin : $(BINARIES)
+ $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS)
+ chmod 755 $(BOUTPUT)
+
+build_binso : $(SOBINARIES)
+ $(CC) $(CFLAGS) -shared -Wl,-x,-soname,$(BOUTPUTSO) -o $(BOUTPUTSO) $(SOBINARIES) -lc $(LFLAGS)
+ $(CC) -L. -lhttrack $(MAINBIN) -o $(BOUTPUT)
+ chmod 755 $(BOUTPUT)
+
+##
+# Strip file so that is can be shorter
+strip :
+ strip --strip-all $(BOUTPUT) || strip $(BOUTPUT)
+ __STRIPLIB__
+
+strip_lib :
+ strip --strip-unneeded $(BOUTPUT_LIB)
+
+# Cleaning up..
+clean :
+ rm -f $(BINARIES)
+
+## Defines for "library" (program compiled with HTTrack)
+clean_lib :
+ rm -f $(BINARIES_LIB)
+
+# Bye bye
+lastinfo :
+ @echo "Build successful"
+
+# Installing httrack into the correct folder
+install : __INSTALL__
+uninstall : remove
+remove : __UNINSTALL__
+
+# Install docs
+docinstall :
+ (mkdir -p "$(PREFIX)/doc/httrack" && chmod 755 "$(PREFIX)/doc/httrack") || true
+ cp -fR $(DOCS) "$(PREFIX)/doc/httrack/"
+
+# Uninstall docs
+docremove :
+ rm -rf "$(PREFIX)/doc/httrack"
+
+# Install binaries and conf files
+bininstall :
+ @echo "Copying $(BOUTPUT) to $(BINPATH).."
+ test ! -d "$(BINPATH)" && (mkdir -p "$(BINPATH)" && chmod 755 "$(BINPATH)") || true
+ cp -f $(BOUTPUT) $(BINPATH)
+ chmod 755 $(BINPATH)/$(BOUTPUT)
+
+libremove :
+ rm -f "$(LIBPATH)/$(BOUTPUTSO)"
+ test -L "$(PREFIX)/lib/$(BOUTPUTSO).1" && rm -f "$(PREFIX)/lib/$(BOUTPUTSO).1" || true
+
+libinstall :
+ @echo "Copying $(BOUTPUTSO) to $(LIBPATH)/.."
+ test ! -d "$(LIBPATH)" && (mkdir -p "$(LIBPATH)" && chmod 755 "$(LIBPATH)") || true
+ cp -f $(BOUTPUTSO) $(LIBPATH)/
+ chmod 644 $(LIBPATH)/$(BOUTPUTSO)
+ ln -sf "$(BOUTPUTSO)" "$(PREFIX)/lib/$(BOUTPUTSO).1"
+
+
+# Uninstall binaries
+binremove :
+ rm -f $(BINPATH)/$(BOUTPUT)
+ rm -f $(ETCPATH)/httrack.conf
+
+# Configure program
+config :
+ @./postinst-config
+
+###
+
diff --git a/src/configure b/src/configure
new file mode 100755
index 0000000..7c2d472
--- /dev/null
+++ b/src/configure
@@ -0,0 +1,603 @@
+#!/bin/sh
+# No, this isn't generated by autoconf
+# Some parts are inspired by autoconf (Free Software Foundation), however
+# And the idea is slightly the same
+
+# Usage:
+# './configure' and then 'make' and 'make install', or
+# './configure --make --install'
+
+SHELL=/bin/sh
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ case "$ac_option" in
+ -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) ac_optarg= ;;
+ esac
+
+ case "$ac_option" in
+
+ --pthread | --thread) THREADS=1 ;;
+ --nopthread | --nothread) THREADS=0 ;;
+
+ --zlib | --gzip) ZLIB=1 ;;
+ --nozlib | --nogzip) ZLIB=0 ;;
+
+ --static | --noso) DYNAMIC=0
+ ;;
+ --dynamic | --so) DYNAMIC=1
+ ;;
+
+ --longlong) LONGLONG=1 ;;
+ --nolonglong) LONGLONG=0 ;;
+
+ --inaddrt) NODECLINADDRT=0 ;;
+ --noinaddrt) NODECLINADDRT=1 ;;
+
+ --ipv6) IPV6=1 ;;
+ --noipv6) IPV6=0 ;;
+
+ --ssl) SSL=1 ;;
+ --https) SSL=1 ;;
+ --nossl) SSL=0 ;;
+ --nohttps) SSL=0 ;;
+
+ --useuid) NOUID=0 ;;
+ --nouseuid) NOUID=1 ;;
+
+ --useftime) NOFTIME=0 ;;
+ --nouseftime) NOFTIME=1 ;;
+
+ --system=*) SYSTEMTYPE="$ac_optarg" ;;
+ --system) ac_prev=SYSTEMTYPE ;;
+
+ --debug) OTYPE="-O0 -g3"
+ MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo" ;;
+
+ --make) DOMAKE=1 ;;
+ --install) DOINSTALL=1 ;;
+ --bininstall) DOINSTALL=1
+ NODOCINSTALL=1
+ ;;
+ --docinstall) DOINSTALL=1
+ DOCINSTALL=1
+ ;;
+ --cls)
+ cd ..
+ chmod 'u=rw,go=r' `find ./ -type f`
+ chmod 'u=rwx,go=rx' `find ./ -type d`
+ chmod 'u=rwx,go=rx' ./src/configure
+ chmod 'u=rwx,go=rx' ./src/strip_cr.in
+ chmod 'u=rwx,go=rx' ./src/postinst-config.in
+ cd src
+ strip_cr *.c *.h
+ rm -f ./httrack 2>/dev/null
+ exit
+ ;;
+ -prefix | --prefix | --prefi | --pref | --pre | --pr)
+ ac_prev=PREFIX ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=*)
+ PREFIX="$ac_optarg" ;;
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=BINPATH ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ BINPATH="$ac_optarg" ;;
+ -etcdir | --etcdir | --etcdi | --etcd | --etc | --et)
+ ac_prev=ETCPATH ;;
+ -etcdir=* | --etcdir=* | --etcdi=* | --etcd=* | --etc=* | --et=*)
+ ETCPATH="$ac_optarg" ;;
+ -libdir | --libdir | --libdi | --libd | --lib | --li)
+ ac_prev=LIBPATH ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=* | --lib=* | --li=*)
+ LIBPATH="$ac_optarg" ;;
+
+ --help)
+ cat <<EOF
+--prefix=.. : prefix (/usr)
+--bindir=.. : binary dir (/usr/bin)
+--etcdir=.. : config dir (/usr/etc or /etc)
+--libdir=.. : library dir (/usr/lib)
+--dynamic : do use dynamic (.so) mode
+--static : do use static mode
+--nopthread : do not use threads (pthread.h)
+--pthread : do use threads (pthread.h)
+--noipv6 : do not use ipv6 extensions
+--ipv6 : do use ipv6 extensions
+--nohttps : do not use SSL extensions
+--https : do use SSL extensions
+--nozlib : do not use compression (zlib)
+--zlib : do use compression (zlib)
+--nolonglong : do not use 64-bit int
+--longlong : do use 64-bit int
+--noinaddrt : do not redeclare in_addr_t
+--inaddrt : do redeclare in_addr_t
+--nouseuid : do not use setuid()/setgid()
+--useuid : do use setuid()/setgid()
+--nouseftime : do not use ftime()
+--useftime : do use ftime()
+--system=<type> : override system type (uname) - NOT RECOMMENDED! (types: 'Default','Linux','SunOS','AIX')
+--make : 'make' after configure
+--install : 'make install' after configure
+--bininstall : 'make bininstall' after configure
+--docinstall : 'make docinstall' after configure
+--debug : add debug information (for gdb)
+EOF
+ exit
+ ;;
+
+ *) echo "Unrecognized option: $ac_option"
+ exit
+ ;;
+
+ esac
+
+done
+
+echo "Welcome to HTTrack Website Copier!"
+echo "Type in ./configure --help for more details"
+echo "If this script fails, you can enter supplemental options through '--option=value'"
+echo "or enter in manual make, through 'make help'"
+echo ""
+
+if cp -f Makefile.in Makefile; then
+
+SEDEXEC=
+
+# System (OS) type?
+printf "Checking for OS type.. "
+if test -z "$SYSTEMTYPE"; then
+ SYSTEMTYPE="`uname`"
+fi
+case "$SYSTEMTYPE" in
+ SunOS) printf "SunOS/Solaris\n";
+ PLATFORM=0
+ SOLSOCK=1
+ ;;
+ AIX) printf "AIX\n"; PLATFORM=2 ;;
+ *) printf "Linux type\n"; PLATFORM=3 ;;
+esac
+
+WTYPE="-Wall -Wcast-align -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wpointer-arith -Wnested-externs"
+
+if test -z "$OTYPE"; then
+ OTYPE="-O3"
+fi
+if test -z "$MKTYPE"; then
+ MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo"
+fi
+
+# Root group
+if test -z "$ROOTGROUP"; then
+ printf "Checking for 'root' group.."
+ if egrep "^root:" /etc/group >/dev/null; then
+ ROOTGROUP="root"
+ elif egrep "^wheel:" /etc/group >/dev/null; then
+ ROOTGROUP="wheel"
+ fi
+ printf "$ROOTGROUP\n"
+else
+ echo "Overriding ROOTGROUP=$ROOTGROUP"
+fi
+
+
+# Binaries location
+if test -z "$BINPATH"; then
+printf "Checking for bin directory.. "
+if test -n "$PREFIX"; then
+ BINPATH="$PREFIX/bin"
+elif test -d "/usr/bin"; then
+ BINPATH="/usr/bin"
+else
+ BINPATH="/bin"
+fi
+printf "$BINPATH\n"
+else
+ echo "Overriding BINPATH=$BINPATH"
+fi
+
+# shlib?
+if test -d "/usr/shlib"; then
+SYSLIB=/usr/shlib
+else
+SYSLIB=/usr/lib
+fi
+
+# /etc location
+if test -z "$ETCPATH"; then
+printf "Checking for etc directory.. "
+if test -n "$PREFIX"; then
+ ETCPATH="$PREFIX/etc"
+elif test -d "/usr/etc"; then
+ ETCPATH="/usr/etc"
+else
+ ETCPATH="/etc"
+fi
+printf "$ETCPATH\n"
+else
+ echo "Overriding ETCPATH=$ETCPATH"
+fi
+
+# /usr/lib location
+if test -z "$LIBPATH"; then
+printf "Checking for lib directory.. "
+if test -n "$PREFIX"; then
+ LIBPATH="$PREFIX/lib"
+elif test -d "/usr/lib"; then
+ LIBPATH="/usr/lib"
+elif test -d "/usr/local/lib"; then
+ LIBPATH="/usr/local/lib"
+else
+ LIBPATH="/lib"
+fi
+printf "$LIBPATH\n"
+else
+ echo "Overriding LIBPATH=$LIBPATH"
+fi
+
+# Prefix location
+if test -z "$PREFIX"; then
+ printf "Checking for prefix directory.. "
+ PREFIX="/usr"
+ printf "$PREFIX\n"
+else
+ echo "Overriding PREFIX=$PREFIX"
+fi
+
+
+# 64-bit (long long) cause some troubles to some processors
+# because some alignements aren't properly defined
+# we only accept 64-bit on tested processors here
+if test -z "$LONGLONG"; then
+printf "Checking for long long.. "
+LONGLONG=
+if grep "long long" /usr/include/sys/types.h >/dev/null; then
+if uname -a|egrep ' i[3-9]86 ' >/dev/null; then
+ LONGLONG=1
+fi
+fi
+if test -n "$LONGLONG"; then
+ printf "supported\n"
+else
+ printf "not tested/supported. Use --LONGLONG=1 to override\n"
+fi
+else
+ echo "Overriding LONGLONG=$LONGLONG"
+fi
+
+# IPV6?
+# NOT TESTED FOR OTHER PLATFORMS.. FIXME!
+if test -z "$IPV6"; then
+printf "Checking for ipv6 support.. "
+if test -f "/usr/include/linux/in6.h" -o -f "/usr/local/include/linux/in6.h"; then
+IPV6=1
+else
+IPV6=0
+fi
+if test "$IPV6" -eq 1; then
+printf "supported\n"
+else
+printf "not supported\n"
+fi
+else
+ echo "Overriding IPV6=$IPV6"
+fi
+if test "$IPV6" -eq 1; then
+IPTYPE="-DINET6"
+LIPTYPE=""
+else
+IPTYPE=
+LIPTYPE=
+fi
+
+# HTTPS?
+# NOT TESTED FOR OTHER PLATFORMS.. FIXME!
+if test -z "$SSL"; then
+printf "Checking for SSL support.. "
+if test -f "/usr/include/openssl/ssl.h" -o -f "/usr/local/include/openssl/ssl.h"; then
+SSL=1
+else
+SSL=0
+fi
+if test "$SSL" -eq 1; then
+printf "supported\n"
+else
+printf "not supported\n"
+fi
+else
+ echo "Overriding SSL=$SSL"
+fi
+if test "$SSL" -eq 1; then
+SSTYPE="-lssl -lcrypto"
+else
+SSTYPE=
+fi
+
+# Alignement
+if test -z "$PTRALIGN"; then
+printf "Checking for pointer alignements.. "
+PTRALIGN=
+if test `uname -p` = "alpha" -o `uname -p` = "sparc"; then
+PTRALIGN=8
+else
+PTRALIGN=4
+fi
+fi
+printf "$PTRALIGN\n"
+
+
+# Dynamic (.so) module?
+if test -z "$DYNAMIC"; then
+ DYNAMIC=1
+fi
+printf "Checking for compilation mode: "
+if test "$DYNAMIC" -eq "1"; then
+ echo "dynamic"
+ SOTYPE=-fPIC
+else
+ echo "static"
+ SOTYPE=
+fi
+
+# Do we not have to redeclare in_addr_t ?
+# Sometimes this type is defined, or not..
+if test -z "$NODECLINADDRT"; then
+printf "Checking for in_addr_t declaration in in.h.. "
+if grep -E "typedef .* in_addr_t" /usr/include/netinet/in.h >/dev/null || grep -E "typedef .* in_addr_t" /usr/include/sys/types.h; then
+ printf "found, do not redeclare\n"
+ NODECLINADDRT=1
+else
+ printf "not found, declaring\n"
+ NODECLINADDRT=
+fi
+else
+ echo "Overriding NODECLINADDRT=$NODECLINADDRT"
+fi
+
+# Test if we can use zlib (/usr/lib/libz.so)
+# This allow to speed up transfers using HTTP compression
+if test -z "$ZLIB"; then
+printf "Checking for ${SYSLIB}/libz.so.. "
+if test -f "${SYSLIB}/libz.so"; then
+ printf "found\n"
+ ZLIB=1
+else
+ printf "library not found (too bad), no http compression will be available\n"
+ ZLIB=0
+fi
+else
+ echo "Overriding ZLIB=$ZLIB"
+fi
+
+# Sometimes, pthread.h doesn't exists on some systems
+# This is sad, because it speeds up some useful things, like DNS or ftp
+if test -z "$THREADS"; then
+printf "Checking for /usr/include/pthread.h.. "
+if test -f "/usr/include/pthread.h" -o -f "/usr/local/include/pthread.h"; then
+if test -f "${SYSLIB}/libpthread.so"; then
+ printf "found\n"
+ THREADS=1
+else
+ printf "library not found (too bad), no threads will be available\n"
+ THREADS=
+fi
+else
+ printf "not found, no threads will be available\n"
+ THREADS=
+fi
+else
+ echo "Overriding THREADS=$THREADS"
+fi
+
+# Sometimes, setuid and setgid can't be used (missing pwd.h and unistd.h ?!)
+if test -z "$NOUID"; then
+NOUID=1
+printf "Checking for /usr/include/pwd.h and /usr/include/unistd.h.. "
+if test -f "/usr/include/pwd.h" -o "/usr/local/include/pwd.h"; then
+if test -f "/usr/include/unistd.h" -o -f "/usr/local/include/unistd.h" ; then
+ NOUID=
+fi
+fi
+if test -z "$NOUID"; then
+ printf "found\n"
+else
+ printf "not found, not using setuid() and setgid()\n"
+fi
+else
+ echo "Overriding NOUID=$NOUID"
+fi
+
+# Sometimes, ftime can't be used (missing declaration...)
+if test -z "$NOFTIME"; then
+NOFTIME=1
+printf "Checking for ftime in /usr/include/sys/timeb.h.. "
+if grep "int ftime" /usr/include/sys/timeb.h >/dev/null; then
+ NOFTIME=
+fi
+if test -z "$NOFTIME"; then
+ printf "found\n"
+else
+ printf "not found (too bad), not using ftime()\n"
+fi
+else
+ echo "Overriding NOFTIME=$NOFTIME"
+fi
+
+# Test variables
+if test "$NOUID" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEUID__/HTS_DO_NOT_USE_UID/'"
+fi
+if test "$NOFTIME" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEFTI__/HTS_DO_NOT_USE_FTIME/'"
+fi
+if test "$NODECLINADDRT" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEINA__/HTS_DO_NOT_REDEFINE_in_addr_t/'"
+fi
+if test "$THREADS" = 1; then
+THTYPE="-D_REENTRANT"
+LPTHTYPE="-lpthread"
+else
+THTYPE=
+LPTHTYPE=
+fi
+SEDEXEC="$SEDEXEC | sed -e \"s/__CFLAGS__/$SOTYPE $OTYPE $WTYPE $IPTYPE $THTYPE/g\""
+SEDEXEC="$SEDEXEC | sed -e \"s/__LFLAGS__/$LPTHTYPE $SSTYPE $LIPTYPE/g\""
+if test ! "$THREADS" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEPTH__/HTS_DO_NOT_USE_PTHREAD/'"
+fi
+if test "$ZLIB" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__/-lz/g'"
+else
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__//g'"
+fi
+SEDEXEC="$SEDEXEC | sed -e \"s/__ZLIB__/$ZLIB/\""
+if test "$SOLSOCK" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__/-lnsl -lsocket/g'"
+else
+ SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__//g'"
+fi
+if test ! "$LONGLONG" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DEFINE64B__/HTS_NO_64_BIT/'"
+fi
+if test -n "$PTRALIGN"; then
+ SEDEXEC="$SEDEXEC | sed -e \"s/__PTRALIGN__/$PTRALIGN/g\""
+fi
+if test -n "$IPV6"; then
+ SEDEXEC="$SEDEXEC | sed -e \"s/__INET6__/$IPV6/g\""
+fi
+if test -n "$SSL"; then
+ SEDEXEC="$SEDEXEC | sed -e \"s/__SSL__/$SSL/g\""
+fi
+if test "$DYNAMIC" = 1; then
+ SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__/so/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall libinstall docinstall/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove libremove docremove/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__/strip --strip-unneeded \\\$(BOUTPUTSO)/'"
+else
+ SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__//'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall docinstall/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove docremove/'"
+ SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__//'"
+fi
+
+SEDEXEC="$SEDEXEC | sed -e \"s/__PLATFORM__/$PLATFORM/g\""
+SEDEXEC="$SEDEXEC | sed -e 's/#__AUTONAME__/AUTONAME = auto/'"
+SEDEXEC="$SEDEXEC | sed -e 's/#define __DEFINE.*__//g'"
+
+# Paths
+TMP=`echo $BINPATH | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__BINPATH__/$TMP/g\""
+TMP=`echo $ETCPATH | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__ETCPATH__/$TMP/g\""
+TMP=`echo $LIBPATH | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__LIBPATH__/$TMP/g\""
+TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__PREFIX__/$TMP/g\""
+TMP=`echo $ROOTGROUP | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__ROOTGROUP__/$TMP/g\""
+
+TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'`
+SEDEXEC="$SEDEXEC | sed -e \"s/__DEFINEPRE__/$TMP/g\""
+SEDEXEC="$SEDEXEC | sed -e 's/__MAKEAUTO__/$MKTYPE/'"
+
+# Search for gmake
+printf "Checking for make.. "
+MAKEPATH=
+if test -f "/usr/bin/gmake"; then
+MAKEPATH=/usr/bin/gmake
+else
+if test -f "/bin/gmake"; then
+MAKEPATH=/bin/gmake
+else
+if test -f "/usr/local/bin/gmake"; then
+MAKEPATH=/usr/local/bin/gmake
+fi
+fi
+fi
+if test -n "$MAKEPATH"; then
+printf "found $MAKEPATH\n"
+else
+MAKEPATH=make
+printf "not found, assume make will work\n"
+fi
+
+# Sed strip_cr
+EXCL='#!'
+printf "Checking for perl.. "
+PERLPATH=
+cp -f strip_cr.in strip_cr
+if test -f "/usr/bin/perl"; then
+PERLPATH=/usr/bin/perl
+else
+if test -f "/bin/perl"; then
+PERLPATH=/bin/perl
+else
+if test -f "/usr/local/bin/perl"; then
+PERLPATH=/usr/local/bin/perl
+fi
+fi
+fi
+if test -n "$PERLPATH"; then
+printf "found $PERLPATH\nEnsuring that *.c/*.h source files don't contains CR (^M).. "
+TMP=`echo $PERLPATH | sed -e 's/\\//\\\\\\//g'`
+cat strip_cr | sed -e "s/__PERL__/${EXCL}${TMP}/" > __tmp; mv __tmp strip_cr
+chmod 755 strip_cr
+./strip_cr *.c *.h
+printf "done\n"
+fi
+
+# Sed postinst-config
+cp -f postinst-config.in postinst-config
+CMD="cat postinst-config $SEDEXEC > __tmp; mv __tmp postinst-config"
+if eval $CMD; then
+chmod 755 postinst-config
+else
+echo "Error while seding postinst-config"
+exit 1
+fi
+
+# Sed all __VARS__
+CMD="cat Makefile $SEDEXEC > __tmp; mv __tmp Makefile"
+echo "Command: $CMD"
+if eval $CMD; then
+
+echo ""
+echo "Makefile created!"
+echo "Type in '$MAKEPATH' to build and '$MAKEPATH install' to install"
+
+if test -n "$DOMAKE"; then
+ echo "Making.."
+ eval $MAKEPATH clean
+ eval $MAKEPATH
+fi
+if test -n "$DOINSTALL"; then
+ if test -n "$NODOCINSTALL"; then
+ echo "Installing binary.."
+ eval $MAKEPATH bininstall
+ elif test -n "$DOCINSTALL"; then
+ echo "Installing docs.."
+ eval $MAKEPATH docinstall
+ else
+ echo "Installing.."
+ eval $MAKEPATH install
+ fi
+fi
+
+else
+ echo "Error while seding Makefile.."
+ exit 1
+fi
+
+else
+ echo "Error copying Makefile.in -> Makefile.. giving up"
+ exit 1
+fi
+
diff --git a/src/gpl.txt b/src/gpl.txt
new file mode 100644
index 0000000..546a71a
--- /dev/null
+++ b/src/gpl.txt
@@ -0,0 +1,287 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+
+
diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h
new file mode 100644
index 0000000..a82b69d
--- /dev/null
+++ b/src/hts-indextmpl.h
@@ -0,0 +1,924 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Index.html templates file */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTTRACK_DEFTMPL
+#define HTTRACK_DEFTMPL
+
+
+/* Index for each project */
+/*
+regen:
+(for i in *; do echo $i; cat $i | sed -e 's/"/\\"/g' | sed -e 's/^\(.*\)$/ "\1"LF\\/'; done) > /tmp/1.txt
+*/
+/* %s = INFO */
+#define HTS_INDEX_HEADER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>Local index - HTTrack Website Copier</title>"LF\
+ " %s"LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ ""LF\
+ "<H1 ALIGN=Center>Index of locally available sites:</H1>"LF\
+ " <TABLE BORDER=\"0\" WIDTH=\"100%%\" CELLSPACING=\"1\" CELLPADDING=\"0\">"LF
+
+/* %s = URL */
+/* %s = TITLE */
+#define HTS_INDEX_BODY \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " <TR>"LF\
+ " <TD BACKGROUND=\"fade.gif\">"LF\
+ " &middot;"LF\
+ " <A HREF=\"%s\">"LF\
+ " %s"LF\
+ " </A> "LF\
+ " </TD>"LF\
+ " </TR>"LF
+
+/* %s = INFO */
+/* %s = META REFRESH IF ANY */
+#define HTS_INDEX_FOOTER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " </TABLE>"LF\
+ " <BR>"LF\
+ " <BR>"LF\
+ " <BR>"LF\
+ " <H6 ALIGN=\"RIGHT\">"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2002]</I>"LF\
+ " </H6>"LF\
+ " %s"LF\
+ " <!-- Thanks for using HTTrack Website Copier! -->"LF\
+ " %s"LF\
+ ""LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+/* Index for all projects (top index) */
+/* %s = INFO */
+#define HTS_TOPINDEX_HEADER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>List of available projects - HTTrack Website Copier</title>"LF\
+ " %s"LF\
+ ""LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ ""LF\
+ ""LF\
+ "<h1 ALIGN=Center>Index of locally available projects:</H1>"LF\
+ " <table border=\"0\" width=\"100%%%\" cellspacing=\"1\" cellpadding=\"0\">"LF
+
+/* %s = URL */
+/* %s = TITLE */
+#define HTS_TOPINDEX_BODY \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " <TR>"LF\
+ " <TD BACKGROUND=\"fade.gif\">"LF\
+ " &middot; <A HREF=\"%s/index.html\">%s</A>"LF\
+ " </TD>"LF\
+ " </TR>"LF
+
+/* %s = INFO */
+#define HTS_TOPINDEX_FOOTER \
+ "<!-- Note: Template file not found, using internal one -->"LF\
+ " </TABLE>"LF\
+ " <BR>"LF\
+ " <H6 ALIGN=\"RIGHT\">"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2002]</I>"LF\
+ " </H6>"LF\
+ " %s"LF\
+ " <!-- Thanks for using HTTrack Website Copier! -->"LF\
+ ""LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+
+/* Other files (fade and backblue images) */
+
+#define HTS_LOG_SECURITY_WARNING "note:\tthe hts-log.txt file, and hts-cache folder, may contain sensitive information,"LF\
+ "\tsuch as username/password authentication for websites mirrored in this project"LF\
+ "\tdo not share these files/folders if you want these information to remain private"LF
+
+#define HTS_DATA_UNKNOWN_HTML "<html>"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>Page not retrieved! - HTTrack Website Copier</title>"LF\
+ " %s"LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ "<h1><strong><u>Oops!...</u></strong></h1>"LF\
+ "<h3>This page has <font color=\"red\"><em>not</em></font> been retrieved by HTTrack Website Copier. </h3>"LF\
+ "<script language=\"Javascript\">"LF\
+ "<!--"LF\
+ " var loc=document.location.toString();"LF\
+ " if (loc) {"LF\
+ " var pos=loc.indexOf('link=');"LF\
+ " if (pos>0) {"LF\
+ " document.write('Clic to the link <b>below</b> to go to the online location!<br><a href=\"'+loc.substring(pos+5)+'\">'+loc.substring(pos+5)+'</a><br>');"LF\
+ " } else"LF\
+ " document.write('(no location defined)');"LF\
+ " }"LF\
+ "// -->"LF\
+ "</script>"LF\
+ "<h6 align=\"right\">Mirror by HTTrack Website Copier</h6>"LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+#define HTS_DATA_UNKNOWN_HTML_LEN 0
+
+#define HTS_DATA_ERROR_HTML "<html>"LF\
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"LF\
+ ""LF\
+ "<head>"LF\
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />"LF\
+ " <meta name=\"description\" content=\"HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help\" />"LF\
+ " <meta name=\"keywords\" content=\"httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software\" />"LF\
+ " <title>Page not retrieved! - HTTrack Website Copier</title>"LF\
+ " <style type=\"text/css\">"LF\
+ " <!--"LF\
+ ""LF\
+ "body {"LF\
+ " margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px;"LF\
+ " background: #77b;"LF\
+ "}"LF\
+ "body, td {"LF\
+ " font: 14px \"Trebuchet MS\", Verdana, Arial, Helvetica, sans-serif;"LF\
+ " }"LF\
+ ""LF\
+ "#subTitle {"LF\
+ " background: #000; color: #fff; padding: 4px; font-weight: bold; "LF\
+ " }"LF\
+ ""LF\
+ "#siteNavigation a, #siteNavigation .current {"LF\
+ " font-weight: bold; color: #448;"LF\
+ " }"LF\
+ "#siteNavigation a:link { text-decoration: none; }"LF\
+ "#siteNavigation a:visited { text-decoration: none; }"LF\
+ ""LF\
+ "#siteNavigation .current { background-color: #ccd; }"LF\
+ ""LF\
+ "#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; }"LF\
+ "#siteNavigation a:active { text-decoration: none; background-color: #ccc; }"LF\
+ ""LF\
+ ""LF\
+ "a:link { text-decoration: underline; color: #00f; }"LF\
+ "a:visited { text-decoration: underline; color: #000; }"LF\
+ "a:hover { text-decoration: underline; color: #c00; }"LF\
+ "a:active { text-decoration: underline; }"LF\
+ ""LF\
+ "#pageContent {"LF\
+ " clear: both;"LF\
+ " border-bottom: 6px solid #000;"LF\
+ " padding: 10px; padding-top: 20px;"LF\
+ " line-height: 1.65em;"LF\
+ " background-image: url(backblue.gif);"LF\
+ " background-repeat: no-repeat;"LF\
+ " background-position: top right;"LF\
+ " }"LF\
+ ""LF\
+ "#pageContent, #siteNavigation {"LF\
+ " background-color: #ccd;"LF\
+ " }"LF\
+ ""LF\
+ ""LF\
+ ".imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; }"LF\
+ ".imgRight { float: right; margin-left: 10px; margin-bottom: 10px; }"LF\
+ ""LF\
+ "hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; }"LF\
+ ""LF\
+ "h1 { margin: 0; font-weight: bold; font-size: 2em; }"LF\
+ "h2 { margin: 0; font-weight: bold; font-size: 1.6em; }"LF\
+ "h3 { margin: 0; font-weight: bold; font-size: 1.3em; }"LF\
+ "h4 { margin: 0; font-weight: bold; font-size: 1.18em; }"LF\
+ ""LF\
+ ".blak { background-color: #000; }"LF\
+ ".hide { display: none; }"LF\
+ ".tableWidth { min-width: 400px; }"LF\
+ ""LF\
+ ".tblRegular { border-collapse: collapse; }"LF\
+ ".tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; }"LF\
+ ".tblHeaderColor, .tblHeaderColor td { background: #99c; }"LF\
+ ".tblNoBorder td { border: 0; }"LF\
+ ""LF\
+ ""LF\
+ "// -->"LF\
+ "</style>"LF\
+ ""LF\
+ "</head>"LF\
+ ""LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"3\" class=\"tableWidth\">"LF\
+ " <tr>"LF\
+ " <td id=\"subTitle\">HTTrack Website Copier - Open Source offline browser</td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ "<table width=\"76%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"0\" class=\"tableWidth\">"LF\
+ "<tr class=\"blak\">"LF\
+ "<td>"LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"1\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td colspan=\"6\"> "LF\
+ " <table width=\"100%%\" border=\"0\" align=\"center\" cellspacing=\"0\" cellpadding=\"10\">"LF\
+ " <tr> "LF\
+ " <td id=\"pageContent\"> "LF\
+ "<!-- ==================== End prologue ==================== -->"LF\
+ "<h1><strong><u>Oops!...</u></strong></h1>"LF\
+ "<h3>This page has <font color=\"red\"><em>not</em></font> been retrieved by HTTrack Website Copier (%s). </h3>"LF\
+ "<script language=\"Javascript\">"LF\
+ "<!--"LF\
+ " var loc=document.location.toString();"LF\
+ " if (loc) {"LF\
+ " var pos=loc.indexOf('link=');"LF\
+ " if (pos>0) {"LF\
+ " document.write('Clic to the link <b>below</b> to go to the online location!<br><a href=\"'+loc.substring(pos+5)+'\">'+loc.substring(pos+5)+'</a><br>');"LF\
+ " } else"LF\
+ " document.write('(no location defined)');"LF\
+ " }"LF\
+ "// -->"LF\
+ "</script>"LF\
+ "<h6 align=\"right\">Mirror by HTTrack Website Copier</h6>"LF\
+ "</body>"LF\
+ "</html>"LF\
+ "<!-- ==================== Start epilogue ==================== -->"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ " </td>"LF\
+ " </tr>"LF\
+ " </table>"LF\
+ "</td>"LF\
+ "</tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
+ " <tr>"LF\
+ " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " </tr>"LF\
+ "</table>"LF\
+ ""LF\
+ "</body>"LF\
+ ""LF\
+ "</html>"LF\
+ ""LF\
+ ""LF
+
+// image gif "unknown"
+#define HTS_DATA_UNKNOWN_GIF \
+ "\x47\x49\x46\x38\x39\x61\x20\x0\x20\x0\xf7\xff\x0\xc0\xc0\xc0\xff\x0\x0\xfc\x3\x0\xf8\x6\x0\xf6\x9\x0\xf2\xc\x0\xf0\xf\x0\xf0\xe\x0\xed\x11\x0\xec\x13\x0\xeb\x14\x0\xe9\x15\x0\xe8\x18\x0\xe6\x18\x0\xe5\x1a\x0\xe3\x1c\x0\xe2\x1d\x0\xe1\x1e\x0\xdf\x20\x0\xdd\x23\x0\xdd\x22\x0\xdb\x23\x0\xda\x25\x0\xd9\x25\x0\xd8\x27\x0\xd6\x29\x0\xd5\x2a\x0\xd3\x2c\x0\xd2\x2d\x0"\
+ "\xd1\x2d\x0\xd0\x2f\x0\xcf\x30\x0\xce\x31\x0\xcb\x34\x0\xcb\x33\x0\xc8\x36\x0\xc5\x3b\x0\xc2\x3c\x0\xc0\x3f\x0\xbc\x43\x0\xba\x45\x0\xb7\x48\x0\xb4\x4c\x0\xb1\x4e\x0\xad\x51\x0\xaa\x55\x0\xa8\x58\x0\xa4\x5a\x0\xa1\x5e\x0\x9f\x60\x0\x99\x66\x0\x96\x68\x0\x93\x6c\x0\x90\x6e\x0\x8d\x72\x0\x8b\x74\x0\x8a\x75\x0\x88\x78\x0\x85\x79\x0\x82\x7d\x0\x7e\x80\x0\x7d\x82\x0\x79"\
+ "\x86\x0\x77\x88\x0\x73\x8b\x0\x72\x8d\x0\x70\x8e\x0\x6e\x91\x0\x6a\x95\x0\x68\x97\x0\x65\x9a\x0\x63\x9d\x0\x62\x9e\x0\x60\xa0\x0\x5d\xa2\x0\x5c\xa3\x0\x5a\xa5\x0\x57\xa9\x0\x57\xa7\x0\x54\xab\x0\x53\xac\x0\x52\xad\x0\x51\xae\x0\x4f\xb0\x0\x4e\xb1\x0\x4d\xb2\x0\x4c\xb4\x0\x49\xb6\x0\x48\xb8\x0\x46\xba\x0\x45\xbb\x0\x43\xbd\x0\x43\xbc\x0\x40\xbf\x0\x3f\xc0\x0\x3e\xc1"\
+ "\x0\x3d\xc2\x0\x3a\xc5\x0\x39\xc5\x0\x38\xc7\x0\x37\xc8\x0\x35\xca\x0\x34\xcb\x0\x32\xcc\x0\x31\xce\x0\x30\xd0\x0\x30\xce\x0\x2f\xd1\x0\x2e\xd1\x0\x2c\xd2\x0\x2b\xd4\x0\x2a\xd5\x0\x29\xd6\x0\x27\xd8\x0\x26\xda\x0\x26\xd8\x0\x25\xdb\x0\x24\xdc\x0\x21\xde\x0\x20\xdf\x0\x1f\xe1\x0\x1e\xe1\x0\x1c\xe3\x0\x1b\xe5\x0\x19\xe6\x0\x18\xe7\x0\x15\xeb\x0\x15\xea\x0\x14\xec\x0"\
+ "\x12\xed\x0\x10\xef\x0\xf\xf0\x0\xd\xf2\x0\xa\xf5\x0\x9\xf6\x0\x7\xf8\x0\x5\xfa\x0\x3\xfb\x0\x1\xfd\x0\x0\xfe\x2\x0\xfb\x4\x0\xf8\x7\x0\xf6\xa\x0\xf3\xd\x0\xee\x12\x0\xaa\x54\x0\xa5\x5a\x0\xa2\x5d\x0\xa0\x60\x0\x9c\x62\x0\x99\x66\x0\x98\x67\x0\x94\x6b\x0\x92\x6d\x0\x91\x6e\x0\x8f\x70\x0\x8c\x74\x0\x8a\x75\x0\x86\x79\x0\x83\x7c\x0\x81\x7e\x0\x7e\x82\x0"\
+ "\x7b\x83\x0\x79\x87\x0\x76\x8a\x0\x73\x8c\x0\x70\x8f\x0\x6a\x95\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"\
+ "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"\
+ "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x21\xf9\x4\x1\x0\x0\x0\x0\x2c\x0\x0\x0\x0\x20\x0\x20\x0\x40\x8"\
+ "\xff\x0\x1\x8\x1c\x48\xb0\x60\x82\x7\x16\x3a\x8c\x30\x91\x82\xc5\x8b\x82\x10\x23\xa\xa4\x81\x83\xa0\x92\x27\x56\xb6\x88\x51\x23\xb1\xa3\xc0\x38\x78\xfe\x10\x4a\xe4\xb1\xa4\xc9\x93\x1e\xf\x30\x90\x90\x41\xa2\x8e\x1e\x40\x88\x20\x41\x29\xf1\x4b\x99\x36\x75\xf6\xd0\x8c\xe8\x8\xd2\xce\x92\x94\x2e\x6d\xf2\x14\x8a\xd4\xcf\x92\x1\x4\x14\x58\x10\x1\xc3\x87\x82\x32\x6a\xe4\xe0\x81\x72\xc2\x86\x10"\
+ "\x1d\x83\x14\x49\xd2\x84\x8a\x96\xa3\x5\xa3\x5c\xe9\x42\x66\x8d\x1c\xb0\x4\xcf\xbc\xb1\xd3\x67\x10\x5a\x82\x80\xa\x29\x6a\xf4\xb6\xee\xce\x48\x93\x2c\x69\xb2\x4b\x50\x54\xa9\x53\x7c\x25\x6\x18\x60\xa0\x1\x5\xd\x22\x4a\xa0\x58\xe1\x22\xc6\xc\x1b\x34\x3\x10\x40\xe0\xa0\x2\x87\x88\x37\x76\xf8\x10\x82\x52\x1\x84\xb\x1e\x3a\xfe\x18\x62\x64\x9\x14\x94\x20\x48\x9c\x50\xd1\x2\x6\xc4\x23\x4c"\
+ "\xa4\x60\xf1\x12\xd8\xc9\x94\x2c\x60\xcc\xb8\xe1\x5b\x85\x4b\x18\x34\x70\xee\x4\x1e\x93\x66\x4e\x1e\x3f\x81\xd9\xd0\xd1\x13\xc8\x50\x60\x0\x7c\x4\x1d\x5a\xf4\x1c\x0\x22\x46\x8f\xaa\x6b\xdf\xce\xbd\xa3\xa4\x4a\x98\x38\x7d\xb\x7a\x9e\xa9\x13\xa8\x51\xa6\xba\xbf\xd\x8\x0\x3b\xff"
+#define HTS_DATA_UNKNOWN_GIF_LEN 1070
+
+/* hexdump bg_rings.gif | cut -c9- - | sed -e 's/\([0-9a-f][0-9a-f]\)\([0-9a-f][0-9a-f]\)/\\x\2 \\x\1/g' | sed -e 's/ //g' | sed -e 's/^\(.*\)$/ \"\1\" \\/' */
+#define HTS_DATA_BACK_GIF \
+ "\x47\x49\x46\x38\x39\x61\xf5\x01\xc8\x01\xa2\x00\x00\xcc\xcc\xdd" \
+ "\xc7\xc7\xda\xc4\xc4\xd7\xbe\xbe\xd3\xbd\xbd\xd2\xb9\xb9\xd0\xfe" \
+ "\x01\x02\x00\x00\x00\x21\xf9\x04\xfd\x14\x00\x06\x00\x2c\x00\x00" \
+ "\x00\x00\xf5\x01\xc8\x01\x40\x03\xff\x08\xba\xdc\xfe\x30\xca\x49" \
+ "\xab\xbd\x38\xeb\xcd\xbb\xff\x60\x28\x8e\x64\x69\x9e\x68\xaa\xae" \
+ "\x6c\xeb\xbe\x70\x2c\xcf\x74\x6d\xdf\x62\x20\x08\x43\xe1\xff\xc0" \
+ "\xa0\x70\x48\x2c\x1a\x8f\xc8\xa4\xd2\x38\x68\x06\x02\xb8\xa8\x74" \
+ "\x4a\xad\xc2\x74\x84\xa5\x76\xcb\xed\x7a\xbf\xe0\x30\xd8\x69\x2d" \
+ "\x9b\xcf\xe8\x40\x4f\xcc\x6e\xbb\xdf\xf0\xb8\x9c\x4d\x20\x40\xd1" \
+ "\xf8\xbc\x79\x3d\xef\xfb\xff\x42\x4d\x3b\x77\x7a\x12\x4f\x3b\x4d" \
+ "\x80\x6d\x03\x02\x85\x8e\x8f\x0f\x02\x8a\x93\x94\x43\x02\x84\x90" \
+ "\x99\x1f\x58\x95\x05\x8d\x9a\xa0\x24\x92\x9d\xa4\x6e\x03\xa1\xa8" \
+ "\xa1\x3c\x73\x9f\xa9\x8e\x59\xa5\xb1\x5b\xad\xae\xb5\xb6\x20\x02" \
+ "\xb0\x62\xb4\xb7\x28\xba\xb2\xc0\x43\xa7\xbd\xc4\xc5\x36\x7c\x5d" \
+ "\xc3\xc6\x10\xc8\xc1\xce\x98\xcb\xd1\xd2\x67\xa3\x5c\xbc\x8e\xd5" \
+ "\xce\xc0\xd7\xd3\xdd\xde\xa1\x01\xbf\x49\xdc\x33\x01\xda\xc1\xdf" \
+ "\xe9\xea\xe9\xe1\x4b\x76\x29\xd9\xe7\x9d\xeb\xf4\x14\x3a\xcd\xf2" \
+ "\x71\x82\xd0\xc4\xf1\x46\xe4\x13\xc4\xe5\x03\xa4\xac\x5e\x3f\x81" \
+ "\x03\x13\xca\x61\x84\xcd\x88\x04\x7c\x0a\xe7\xf0\x33\xa8\xc7\x5c" \
+ "\xc4\x71\x00\x7b\x71\x1a\x33\x91\xff\xca\x45\x82\x14\xf1\x20\x4c" \
+ "\xd8\x31\xe4\x89\x5c\x4b\x32\x96\x80\xf8\xd1\x94\xc9\x29\x23\xb5" \
+ "\x95\x7c\x89\x47\xcd\x91\x82\x1b\x5a\xfa\x51\x49\x33\x45\x4c\x60" \
+ "\x33\x7b\x2e\x6b\x47\x84\x1c\x4b\x9d\x62\x70\x0a\x85\x47\x72\xa9" \
+ "\xd3\x07\x3d\x18\x1c\x45\x0a\x86\xe7\x53\x0e\x3f\x4b\x05\xbd\xca" \
+ "\x75\x2a\xd5\x2f\x5c\x49\x0c\x54\x1a\xb6\x6c\x83\xaf\x72\xcc\x7a" \
+ "\xf0\x87\x4e\xad\xdb\x07\x59\xd1\x5a\x7b\x9b\x61\x2c\xdd\xbb\x0b" \
+ "\xd8\xca\xad\x8a\x97\x82\x57\x52\x5b\xfb\x3a\xd5\xbb\x17\xac\x60" \
+ "\x66\x76\x0f\xdf\x2d\x0c\x47\xb1\x83\xb8\xa4\x1c\xd3\xb5\xc8\xb8" \
+ "\x8d\x55\xb5\x94\xf3\x49\xa6\x5b\xf9\x4d\x60\xb5\x11\x37\xbb\x25" \
+ "\xdc\x99\x0b\xd9\xbe\x11\x09\x88\x36\x4b\xba\xf4\xac\xcd\xad\xb7" \
+ "\xad\x0e\xeb\xfa\xcd\xe6\xcc\x0a\x3f\xcf\x66\x57\xdb\x8d\x6a\xc9" \
+ "\xb8\x15\x9e\xde\x4d\x2f\x78\x6f\x30\xba\xc3\xc6\x3e\x97\x9c\x78" \
+ "\xb1\xe5\xc7\x97\x34\xef\xfa\xf5\xb7\x73\x75\xd0\xa3\x2b\x19\x8e" \
+ "\xd7\x78\xcb\x77\xd7\xa7\x41\xd6\xbe\x84\x7b\xdf\xbf\x17\xcd\x87" \
+ "\xcf\xe4\x9d\x3c\xdf\xdd\x44\x4b\xab\x5f\x7f\x06\xbd\xfb\x2d\xf3" \
+ "\x05\x67\x97\x3b\x60\x3a\xfd\x17\xff\xe3\xdd\x87\xdf\x7f\xf1\x09" \
+ "\x18\x04\x43\xff\xc1\xb0\x9f\x81\x5a\xe4\x27\x9a\x4d\x0c\xce\x21" \
+ "\xc8\x0e\x97\x3c\x61\xa1\x21\x16\x1e\xc2\x43\x13\x53\xd1\xb4\x60" \
+ "\x84\x5a\x5c\x76\x1d\x4a\x20\x86\x16\x56\x81\x25\xc6\xe1\x1f\x7d" \
+ "\x10\xa6\x38\xc7\x61\x28\xba\xa8\xcf\x8a\x09\x52\xb0\x43\x80\xd1" \
+ "\x5d\xd7\x9e\x8c\x72\x88\x58\xe3\x8f\x4b\xd9\xc7\x63\x1c\x3e\x02" \
+ "\x69\x24\x3d\x1f\x0e\xc9\xca\x91\x4c\xbe\x94\xa4\x92\x94\xf4\xd7" \
+ "\xe4\x94\xd3\x3c\x09\xe5\x39\x64\x50\xa9\x25\x24\x56\x5e\xe9\x1a" \
+ "\x87\x4f\x14\x77\xa3\x61\x5b\x3e\xd2\xa2\x97\x68\xa2\x59\xe6\x34" \
+ "\x67\xa6\xe9\x66\x6d\x6b\x5e\x45\xe2\x9b\x74\x9e\x13\x27\x71\x73" \
+ "\xd6\xa9\x27\x18\x77\xf6\x99\x57\x9e\x7b\x42\xe9\xe7\xa0\x29\x1c" \
+ "\x22\x64\xa0\x7d\x30\x42\x23\xa1\x8c\xaa\x93\xe1\xa3\x90\x46\xfa" \
+ "\x68\xa3\x94\x56\x6a\xe9\x94\x19\x52\xb8\x21\x87\x89\x84\xc1\xe9" \
+ "\x84\x14\x66\x78\xe9\xa8\x43\x01\x8a\xa8\x36\xfb\x90\xea\xa7\x0e" \
+ "\xa7\xb6\x5a\x44\x96\xaa\x52\xd4\xa6\xab\xb4\x02\x43\x40\x91\xb1" \
+ "\xb6\x70\x68\xad\x3e\x28\x3a\xdb\x3d\xbb\x2e\x84\x6b\xac\x3b\x22" \
+ "\x8a\x60\xae\x1c\x6c\x94\xe8\xa2\xff\x23\xba\x3a\x2c\xb2\x36\xac" \
+ "\x62\x0a\xb3\x4b\xe1\x98\x22\xb5\xd0\xda\x22\xed\x7b\x1e\xd2\xe9" \
+ "\x60\xb6\x93\x05\xfb\x03\xb6\x55\x74\x19\x1d\xb9\xe0\xee\xf6\x21" \
+ "\x78\x67\x58\xeb\x1e\xba\xe9\x6e\x39\x2b\x13\x32\x98\xeb\x1a\xbc" \
+ "\xf1\x66\x60\x21\x85\x9f\x0a\xf9\xa9\xa6\x17\x72\x95\xdd\xb7\x86" \
+ "\x5c\x49\x70\xae\xf3\xf2\xda\xeb\xb3\x53\x64\x17\x94\xbd\x9d\x65" \
+ "\x0b\xb1\x70\x8c\x5c\x12\x83\x86\x9d\xca\x71\xab\x19\x05\x46\xe0" \
+ "\xee\x71\x95\xc6\xf8\x11\xc3\xf5\x28\x9b\x52\x14\x1f\xd7\x76\x30" \
+ "\x8b\x2d\x91\x1c\x9e\xa9\x96\x98\x30\x71\x61\xf2\xa6\x96\x6f\x04" \
+ "\x53\x39\x58\xac\x81\x4d\xce\x6c\xdb\xcd\x24\x88\x2c\x95\x92\x2e" \
+ "\xdf\xb5\x73\x64\x40\xd7\x20\xae\xca\x35\x0e\x64\x5d\xd2\x35\xf8" \
+ "\x2c\x57\xd1\x65\xa5\x2c\x21\xd4\x52\x48\x8d\xd6\xca\x6e\x2d\x7d" \
+ "\x35\xd6\x52\x1c\x2d\x20\xd5\x4e\x25\x06\x76\xd6\x06\xc3\x67\xf6" \
+ "\xd9\x51\x88\x3d\xf6\x6c\x56\x7f\xcd\xb6\x14\x5e\xf7\x46\xf6\x4b" \
+ "\x75\x2f\x39\x37\xdd\x68\xe2\x6b\x50\xdc\x7d\xec\x8d\x36\x9a\x77" \
+ "\xcb\x1a\x91\xdf\x82\x6b\x90\x37\xc8\x9b\xa5\x97\x78\xb4\x6f\x22" \
+ "\xbe\xce\x47\x5c\x3f\x0e\x02\xe0\xff\xd1\x3d\x0d\xe3\x77\x96\xc7" \
+ "\xb0\xb8\x76\xa2\x61\x0e\x52\xe7\x2c\x68\xfd\xa5\x68\x9f\x2b\xa2" \
+ "\x39\xe9\xa2\xec\xb9\xfa\x61\xa6\xfb\x21\x79\xe2\x6e\xf3\x58\xf9" \
+ "\x53\xb1\x03\x32\x3b\xd8\xa9\x0b\xb8\x3b\x92\xf2\xfd\x0e\x6e\xee" \
+ "\xe7\x3a\x47\x7c\x29\x85\x67\x2b\x3a\x88\xb7\xab\xd5\xfb\x45\x16" \
+ "\xef\xfd\x3c\x88\xc2\xd3\x34\x7d\x65\xb0\x5a\x2a\xb2\xb3\x4c\x1e" \
+ "\xaf\x70\x10\x4f\x2d\x4f\xf4\x9a\xdb\x7e\xaf\xc5\x64\xe2\xa7\x1d" \
+ "\x32\xcc\x74\xae\x76\x3d\xe1\x73\xeb\xb0\x21\x52\xa9\x02\xe9\xbd" \
+ "\x97\xc9\xb3\x9e\xed\xfb\xa7\xe6\xaf\xff\xa5\xb5\x33\x5f\x79\xfc" \
+ "\xf7\xbf\x3e\xf1\x4f\x80\xaf\xaa\x5e\x01\x59\x94\x3e\x04\xbe\x21" \
+ "\x7b\xeb\xb9\xc7\x0f\x16\x38\x01\xf6\x39\xd0\x5b\xfd\x3a\xe0\x8b" \
+ "\x28\x98\x03\x0d\x5e\xf0\x83\x0e\xe1\x20\x0d\x12\x06\xc2\x12\x26" \
+ "\x41\x84\x99\xb0\xa0\x09\x69\x85\xc2\x75\x48\x70\x85\x43\x6a\x21" \
+ "\x70\x54\x08\x43\x3b\xc9\x90\x54\x18\xf3\x20\xf6\x7c\x75\xc3\x1e" \
+ "\xfa\xf0\x87\x40\x0c\xa2\x10\x87\x48\xc4\x22\x76\x2e\x87\x0c\x02" \
+ "\x55\x98\x8c\x88\xb5\x17\xd6\x10\x09\xf5\x63\x62\x78\x4c\xf6\xc4" \
+ "\x60\x40\x50\x8a\x25\xd3\x61\x15\xff\x65\x51\x07\x05\x62\xb1\x04" \
+ "\x24\xdc\xa2\x9b\x76\xf0\xc5\x7a\x89\x51\x8c\xc7\x2a\x23\x06\xc2" \
+ "\x78\xc6\x06\x81\x6a\x10\x93\x5a\xe3\xa3\x34\xb5\x29\x2d\x52\x62" \
+ "\x63\x5f\xbc\x5f\x9a\xa2\xd8\x13\x43\xd9\x31\x44\x5e\xa4\xd4\xf6" \
+ "\x04\x78\xab\x40\x3a\xc5\x89\x9d\xc0\x23\xd8\x02\x58\xa7\xe8\xe5" \
+ "\x8b\x8a\x7d\x20\x20\x7d\xfe\x58\x1b\x45\x8a\x10\x92\x2e\x89\xd3" \
+ "\x20\xe9\x24\x49\xd2\xb1\xb1\x0b\x9d\x0c\xd2\xa9\x9a\xa7\x46\x05" \
+ "\xd0\x30\x44\xa1\x0b\x94\x21\xd5\x58\xbe\x2f\x84\xd2\x16\x0d\xac" \
+ "\xcd\x2b\x4b\xc9\x81\x53\x1a\x81\x5d\x21\xa1\x24\x52\x56\x49\xcb" \
+ "\x11\xd8\xb2\x28\xe9\xe0\x64\x2f\xa5\xf1\x24\x52\x52\x81\x91\xbe" \
+ "\x1b\x26\x45\x92\x64\x4c\x19\xb8\x89\x97\xca\xd4\x43\xb0\x66\xc9" \
+ "\x01\x5d\x0a\x27\x9a\x7d\x59\x50\x33\x3f\x00\x3f\x6c\xae\x66\x3f" \
+ "\xb8\x5c\x81\x1e\x1d\xe7\xcd\xff\x08\x29\x9c\x22\x18\xa7\x89\x7a" \
+ "\x28\x3f\x6b\x7a\x81\x43\x8e\xc4\x4e\x79\x70\xa1\xbe\xc7\x21\x12" \
+ "\x83\xd0\x44\x41\x76\x8a\x54\xcf\xa4\xfd\xf2\x82\x57\xcc\xc3\x72" \
+ "\xfc\x02\xa5\x6d\x52\x49\x9d\x84\xa4\xa6\x07\x20\x32\x81\x2b\x21" \
+ "\xcc\x9d\xfa\x88\xa7\x23\x80\x35\xff\xad\x72\xf9\x20\x02\xc8\xec" \
+ "\x8d\x41\x23\x58\x18\x89\x86\x04\x93\x49\x90\x92\x45\x95\x94\x4f" \
+ "\xb5\x20\xf4\x0b\x22\x15\xcd\x3f\x0b\x90\xd2\x18\x9c\x54\x1e\x83" \
+ "\xca\x68\x25\x36\x8a\x97\x56\x16\x21\x94\x32\xbd\x97\x26\x29\x37" \
+ "\xaa\x53\xde\x0d\x4a\x25\xed\x09\x44\x91\xf0\xba\xfd\x1d\x61\x58" \
+ "\x43\xb5\xa2\x96\x5e\x4a\xd4\xce\x4d\x45\x25\x39\x2d\x4d\x50\x0d" \
+ "\x12\x55\xbd\x71\x90\x30\xbc\x60\xaa\x33\x8a\x4a\x1f\xad\xde\x74" \
+ "\x88\xc8\x18\xda\x90\x68\x5a\x16\xaf\x12\xa1\x97\x49\x95\xcd\x8f" \
+ "\x12\xa2\xcc\x82\xd6\xc8\xac\x42\x50\xa8\xe0\xd2\x0a\x8c\x04\xd1" \
+ "\x35\x08\x53\xf5\x27\x50\xbb\x3a\x90\x72\x56\x15\x7b\x1c\xcd\x87" \
+ "\x5c\x2d\x77\xd7\x58\xe4\xd5\x18\x85\x9d\x60\x39\x01\xf0\xd7\xca" \
+ "\x84\x67\x6d\xde\x84\x6b\x5d\x9d\x03\x59\x6f\x26\xb6\x14\xce\xb9" \
+ "\x6c\x01\x16\xcb\x00\x2f\xe1\x69\x20\x83\x65\x9d\x97\x0e\x6b\x8b" \
+ "\xa6\x70\x56\x01\xb1\xdc\xda\xaf\xd8\x7a\x5a\x05\x68\x16\x69\x2a" \
+ "\x35\xed\x69\x25\x0b\x0c\xb2\x96\x2c\x21\xb6\x95\x61\x37\x1b\x27" \
+ "\x5b\xce\xd2\x16\x28\xa8\x53\x48\x6b\x15\x90\x26\xd2\xa2\x62\x9d" \
+ "\x9c\x6d\x6c\x69\x42\x6b\x0c\xe4\xff\x2e\xf6\xb7\x6a\x75\x8c\x72" \
+ "\xe1\xc0\x5c\xc2\xba\x49\x34\x17\x31\x2e\xd4\x5e\x6b\xd8\xcd\x70" \
+ "\xf7\xa2\xbe\x7d\xd3\x6d\x3e\xa2\xdd\x9b\x4d\x77\xb9\xb0\x19\xd9" \
+ "\x62\x85\x29\x99\xef\xfa\x80\xab\xac\xa4\x53\x79\x33\xe1\xde\x5e" \
+ "\x79\xb3\x91\xe9\x6d\x49\x6e\x59\x07\x5d\x54\x8d\x57\x27\xf0\x65" \
+ "\x62\x7d\x63\x11\x5b\xa4\x54\x37\x69\xa9\x45\xaf\x77\xbf\x72\xe0" \
+ "\x7c\x05\xca\x7d\x68\x69\x30\xb8\x02\xb5\x5f\x79\xca\x65\xbe\x94" \
+ "\x4a\xf0\xe9\x56\xa3\xe1\x3e\xa0\xf3\x87\x03\xae\xed\x6a\x19\x23" \
+ "\x61\xed\x9d\x2a\xc0\xdd\x51\x70\x0f\x3b\x5c\x1b\x0c\x87\x22\xc4" \
+ "\x50\x94\xe1\x79\xdd\x53\xe1\x7a\xf4\x97\x23\x14\x84\xb1\x33\x4a" \
+ "\x6c\x8c\x1b\x27\xc5\xc5\xeb\xd1\xf1\x33\xa6\xc8\xe2\xda\x02\x99" \
+ "\xc3\xbc\x3a\x72\x2a\x7c\xcc\x0a\x25\xdf\x45\xc8\xda\xe0\x71\x95" \
+ "\xa0\xd4\xc5\x5c\x41\x59\x1b\x28\x5e\xcd\x27\x95\x54\x87\x41\x58" \
+ "\xc1\xc9\x60\x14\xa0\x94\xd3\x71\xe5\x18\xe6\x12\x81\x63\x76\x54" \
+ "\x99\x05\x24\x94\x19\xbb\x28\xcd\x36\x2e\x72\x9d\x04\xf6\xc1\x1a" \
+ "\x4f\x66\xa5\xb5\x1a\x4d\x09\xe1\xdc\x66\x3c\xa7\x29\x9b\x2b\xe4" \
+ "\xf3\x89\x6c\xea\x50\xc7\x30\x19\xff\xb0\x8f\x44\x04\x9b\x7f\x25" \
+ "\x67\x1e\x09\x1a\x48\xfb\xe2\xd7\xa7\x92\x40\x80\x7f\x85\x6a\x89" \
+ "\x71\x3a\xb4\x2c\x87\x6b\xc0\x36\xb2\x14\xcc\x9c\x96\xc6\x9a\x19" \
+ "\xd3\xd2\x50\x1b\x69\xd4\x2a\x36\xf5\x5b\x3d\x7d\x13\x50\xab\x5a" \
+ "\x13\x5b\x66\xf5\xa7\x5f\x4d\x9c\x4d\xca\x9a\x08\x69\xa4\xb5\x60" \
+ "\x6c\x7d\xeb\x90\xba\x5a\xd7\x38\x40\xb5\x9b\x02\x0a\x6c\x93\xf8" \
+ "\xb9\xd7\x49\xe1\xe1\x64\x40\xaa\xd8\x16\x6a\x1a\xd9\x3f\x50\xa2" \
+ "\x8b\x73\xa8\x41\x22\x1e\x1b\xda\xc8\x2e\xe5\xb3\xb1\xed\xa2\xe7" \
+ "\x36\x9a\xdb\xd7\x55\x75\xac\xc1\xad\xb0\x62\x3b\x60\xdc\xe4\x6e" \
+ "\x9f\xb9\x2f\x80\xee\x74\xa7\x68\xdd\xbe\xfc\xb6\xbb\x61\x0a\xef" \
+ "\x16\xec\x60\xde\xa0\xab\x77\xdb\x08\x8d\x6f\xe1\xea\x7b\xa2\xfc" \
+ "\xee\xf7\x24\xfe\x1d\x8d\x7b\x0a\x1c\x3f\x1e\x25\x38\x4d\xda\x79" \
+ "\xc6\x8a\x29\x7c\x55\xf2\xdb\x14\xa9\x2b\x56\xa1\x87\x5b\xfc\xe2" \
+ "\x18\xcf\xb8\xc6\x37\xce\xf1\x8e\x7b\xfc\xe3\x20\x0f\xb9\xc8\x47" \
+ "\x4e\x72\xfd\x69\x48\xe2\xdf\x81\x27\x1c\x4b\x7e\x27\x3f\x62\xd0" \
+ "\x09\xbf\x66\xf9\x14\x5c\xfe\x44\x3e\xca\xdc\x85\x01\x07\xb7\xcd" \
+ "\x6f\x0e\xeb\x6b\x0b\xbc\xcb\x31\xff\xef\x78\xce\x0f\x6e\x8a\x84" \
+ "\xf3\xfc\x04\x6a\x10\xf6\x19\xab\x7c\x74\x0f\xb4\x9b\xe8\x5b\xfd" \
+ "\x70\xd3\x4d\x29\x6f\xa8\x2b\x22\xd7\x23\xf7\xb9\xd5\x0b\x83\x75" \
+ "\x8b\x6b\x7d\xeb\xda\x29\x75\xb1\x95\x0e\xf6\x49\x74\x7d\xb1\x4f" \
+ "\x2f\x3b\x9d\x2c\xd9\xcb\xb4\x6f\xb1\x0e\xb7\xaa\x90\x71\x23\x3d" \
+ "\xbf\xc2\xb0\xbd\x88\xdb\x26\x4f\xc5\x82\xde\x01\x8c\x51\x8e\xef" \
+ "\x35\xe2\xf5\x05\x81\x6e\x34\x83\x47\x77\x81\x79\xe7\x8f\xd1\xe1" \
+ "\x53\xf7\x58\x88\x7d\x6e\x64\xc7\x12\xe0\x87\x92\x74\x52\x3c\x3e" \
+ "\x5f\x91\xdf\xc6\xe4\x3f\xfa\xf5\x06\x6d\xde\x7a\x08\xbc\xfc\xcd" \
+ "\x86\x6e\x99\x58\x25\x5e\xb0\x14\xbc\xf7\x4e\x1a\xe5\x66\x01\x89" \
+ "\x5e\x84\xa4\x7f\xe7\xe7\x6d\x91\x79\x40\x3c\xfa\x52\xb1\x9f\x0b" \
+ "\x93\x5a\xaf\x9d\xdb\x0f\x6f\x7a\xaf\xff\x66\xab\xec\x2c\xc3\xce" \
+ "\x47\x7b\xf6\x55\xa8\xfa\x85\xf5\xed\xf6\x98\x49\x46\xf9\x54\x21" \
+ "\x7e\x7c\xd9\xe0\xfb\x32\xf0\x3e\xd5\x1f\x87\x58\xf5\x51\xa6\x27" \
+ "\xe9\x0f\xb7\xf9\x3f\xd8\xbe\x0c\xa0\xff\x91\x2c\x97\x1c\xfc\xc1" \
+ "\xf7\x46\xed\x4b\x3f\x75\x1b\x59\xcd\xfb\x0d\x5b\x7b\xfb\x37\xd0" \
+ "\x7c\xf1\x83\xe0\xfa\x7b\xb1\x3f\xff\xad\x97\x96\x7e\x50\x90\x3f" \
+ "\x37\xf3\x77\x12\x5e\xa0\x7f\x36\x12\x39\x01\x58\x3a\xc9\x30\x79" \
+ "\xff\x37\x10\xc8\x07\x6f\x49\x42\x80\x8c\xf5\x4c\x07\x78\x03\xc1" \
+ "\x02\x7f\xd5\x54\x5c\x13\x38\x73\xbb\xd2\x7f\x33\x90\x26\x10\xc8" \
+ "\x73\x78\x46\x5a\xf8\x87\x14\xe6\x97\x81\xc1\xf6\x1a\x90\x33\x5a" \
+ "\x26\xc8\x25\x38\xc2\x5c\xeb\xd7\x05\x25\xb8\x82\x65\x70\x28\x0a" \
+ "\xb5\x80\x32\x21\x83\xb5\xb0\x1f\xa1\x64\x83\xce\x80\x83\xc4\x20" \
+ "\x78\x3e\xe0\x3f\x5e\x62\x81\xc8\x82\x44\x8e\x47\x71\x98\x26\x2b" \
+ "\x01\x72\x37\x3c\x78\x78\xa9\x97\x7b\xf4\xa3\x6c\xdd\x20\x24\xcc" \
+ "\xd2\x84\xb2\xd0\x80\x46\x63\x7c\x9d\xb1\x73\xb5\x60\x1f\x52\x17" \
+ "\x10\x57\x82\x85\x83\x66\x85\x24\x78\x77\xa0\x60\x1f\xdf\xf2\x82" \
+ "\x5b\xc0\x36\xac\x52\x43\x4c\xa7\x09\xf6\x51\x01\xa7\x97\x16\x49" \
+ "\x03\x7e\x0e\xc4\x81\x61\x93\x15\xf6\x10\x86\xf9\xa2\x85\x2b\x64" \
+ "\x86\x5f\x56\x04\x14\xc0\x87\x12\x53\x22\x6f\x24\x2a\x15\x90\x29" \
+ "\x92\xf6\x47\x80\x38\x38\x40\x30\x01\x6a\x78\x32\xb9\xe2\x87\xa8" \
+ "\xb2\x78\x37\xc0\x70\x70\x70\x76\x14\xd8\x6c\x91\xe0\x56\xaa\xa2" \
+ "\x74\x52\xe8\x0a\x14\xb5\x0b\xc7\xff\x04\x5f\x85\x76\x29\xa3\xf6" \
+ "\x81\x23\xc0\x6c\xe3\x80\x06\x91\xa8\x04\x62\x88\x58\xfc\x11\x8b" \
+ "\xa2\x20\x2e\x8d\x58\x0e\x50\xa2\x8a\xe9\x30\x87\xba\xb8\x6f\xd6" \
+ "\x82\x87\x28\xf0\x8a\x27\xc4\x28\x30\x06\x8c\x77\xb1\x52\xa1\x35" \
+ "\x82\xd0\x13\x53\x06\x76\x24\xe3\x46\x4d\xc2\x88\x04\xab\xa2\x13" \
+ "\xb4\xa8\x09\xd0\xd1\x49\xb9\xd8\x72\x2d\x51\x8d\xb5\xb0\x65\xc9" \
+ "\x33\x87\x8d\xb1\x26\xca\xe8\x4a\xa6\x97\x15\x5f\x88\x01\x64\xd8" \
+ "\x09\xbd\xd8\x0b\xee\xc5\x8d\x64\x26\x8d\xc9\xb2\x57\x4b\x65\x33" \
+ "\x58\xe3\x15\xcd\x01\x8e\x3f\x43\x25\xe9\xf8\x03\x31\x68\x54\x5f" \
+ "\x55\x01\xd1\xf8\x0f\x54\x32\x8e\x5b\xd0\x8f\x89\xf6\x2a\x0d\x45" \
+ "\x52\x53\xe2\x63\xee\xa8\x67\x44\x80\x4e\x04\xc9\x5a\x4c\xf2\x5a" \
+ "\x44\x48\x29\x47\xc1\x00\xf8\xe8\x06\x4d\xb2\x8f\x05\xd0\x90\x86" \
+ "\x26\x04\x18\xa9\x24\x15\x19\x12\xfe\x36\x44\xd5\x20\x56\xb6\x73" \
+ "\x24\xb8\x85\x45\x98\x10\x90\x08\x09\x24\xfb\x38\x92\x40\xe3\x92" \
+ "\xc0\xf4\x23\x9a\x25\x93\x33\x39\x3e\xab\x96\x0f\x38\x09\x34\x1c" \
+ "\x39\x09\xeb\xf8\x08\x12\xd9\x4b\x3f\x79\x75\x35\xb2\x8f\xd8\x44" \
+ "\x93\xc2\x90\x20\x11\xd9\x8a\xd1\xff\xa4\x94\x43\x90\x20\x48\x99" \
+ "\x94\x82\xc2\x32\xa8\x87\x4d\x19\xd9\x06\x1e\x79\x0c\x7d\xe5\x4d" \
+ "\x4d\x79\x95\x8f\x05\x96\xd8\x44\x65\x81\x25\x0f\x5b\xc9\x24\x45" \
+ "\xa9\x08\x67\x69\x46\x9a\xb1\x58\x50\x19\x57\xe1\x71\x59\x3d\x79" \
+ "\x36\x59\xd9\x06\x61\x29\x0f\x73\x79\x36\x84\x48\x1c\x95\xf5\x94" \
+ "\x9e\xc8\x97\x3c\x39\x5b\x7b\x39\x1b\x7d\x19\x4d\xfd\xb4\x1a\x43" \
+ "\x59\x4e\x75\xc9\x06\x6b\xe9\x02\x9a\xd5\x5a\x5f\x19\x98\xea\xc2" \
+ "\x80\xad\xf5\x96\x42\xd0\x98\x08\x28\x99\xc9\xe5\x59\xbb\x91\x98" \
+ "\xe5\x64\x99\x07\xb2\x1b\x9a\x15\x94\x96\xa2\x82\xc2\xd7\x95\x82" \
+ "\x79\x8a\xd8\x05\x5a\xc3\x35\x84\xb3\x71\x63\xc3\xb5\x98\xd4\x47" \
+ "\x98\xac\x59\x99\xa6\x99\x5f\xf9\x80\x99\x4c\xa2\x26\x88\xe9\x99" \
+ "\x91\xc5\x9b\xb8\x89\x97\xc3\x05\x9a\x20\x19\x5c\x85\x19\x4d\x69" \
+ "\x39\x09\x06\xc9\x15\x0a\x41\x9a\x95\x82\x26\x79\x99\x0a\x91\x89" \
+ "\x04\xce\xd9\x28\xb2\xc9\x06\xd1\x89\x0a\x3e\x56\x9d\x8c\x72\x9d" \
+ "\x49\x51\x60\xb5\xc9\x59\xc4\x19\x9a\x0b\xb6\x92\xa7\x95\x9c\x94" \
+ "\x60\x9c\xc7\x39\x4c\xe8\x39\x70\xe5\xe9\x9b\x86\xf9\x67\xef\x49" \
+ "\x99\x9c\x25\x81\xed\x15\x11\xdc\xff\x39\x28\x63\x14\x9c\xf9\xb0" \
+ "\x9c\x45\x34\x6c\xfc\x99\x9b\xf5\x19\x6e\xd2\x45\x4e\xe5\x64\x80" \
+ "\xcf\x97\x5d\x8b\xd5\x9e\x40\x39\x9f\x4e\xb3\xa0\xe2\x05\x1c\xea" \
+ "\x65\x59\x08\xea\x18\xc9\xa9\x9b\xff\xe1\x9d\x8b\x10\xa0\xa8\xa9" \
+ "\x4c\x1a\xaa\x95\xbc\x75\x11\xfe\xf9\x43\xd3\x89\x9f\x1c\xaa\x99" \
+ "\xc3\x34\x67\x9b\x81\x9e\xd9\x99\x2e\xe3\x89\x6b\x27\x8a\xa2\xda" \
+ "\x56\x27\x18\xba\x12\x3a\xd1\xa2\xd0\x52\xa2\xf4\x98\xa0\x9c\x43" \
+ "\x94\xf8\x25\xa1\xbb\x44\x4b\x1f\xea\x06\x35\xda\x3a\xcd\x58\x4a" \
+ "\x7a\x32\xa2\xa2\x44\x3f\xa5\xf4\xa2\xff\x78\x9f\x54\x51\xa4\x40" \
+ "\xfa\xa3\x50\xca\xa4\x5f\xc4\xa0\x80\xa1\x9e\x41\x2a\x45\x3a\x2a" \
+ "\xa2\x10\x56\x1d\x58\xe4\xa4\x45\xa0\xa4\x4b\x01\x8e\x38\x2a\x48" \
+ "\x14\xa6\x65\xf9\x67\x44\x62\xfa\x92\xb7\x81\xa5\x40\x90\x9f\xf4" \
+ "\x61\x2c\xb3\xb1\x66\x72\xda\x2c\x69\x7a\x9a\x7b\x71\xa6\x7d\x02" \
+ "\xa7\x96\xb7\x1b\x7e\x0a\x04\xe7\xb8\x40\x5d\xda\xa3\x7a\xda\x51" \
+ "\x3e\x34\xa4\x71\x40\xa6\x4f\x21\x6c\xc6\x38\x57\xad\x22\xa5\x26" \
+ "\x50\xa8\xdf\xd9\x42\x81\x4a\x0a\x77\x7a\x03\xde\xc9\xa7\xce\xe8" \
+ "\x2a\x9c\xfa\x62\x1a\x95\x7a\xae\xff\xc2\xa8\x57\x51\x7b\x8f\x3a" \
+ "\x7a\xb4\x22\xa9\xc1\xd8\x7b\xac\xd3\xa6\x4e\x99\x59\xe4\x71\x8b" \
+ "\xdb\x45\x2b\x83\xaa\x18\xaf\x28\xab\xf9\x72\xa9\xb1\x90\xa9\x8e" \
+ "\x78\x1f\x96\x08\x2e\xba\x5a\x0a\xb5\xaa\x18\x94\x08\x3d\xaa\xda" \
+ "\x0b\xc1\x8a\x3c\xe6\xe4\x22\xa7\xea\x27\xae\x2a\x1d\x19\x2a\x92" \
+ "\xbc\x3a\x0d\xcf\xba\x1d\x35\x22\xa6\x5d\x86\x8a\xdf\x33\xad\x55" \
+ "\x50\xad\xbd\xd2\x1f\xc7\x1a\x7f\xdf\x13\xae\x2c\x60\x87\x86\xb8" \
+ "\x0f\x5e\xe4\x77\xe0\xb5\x8b\x02\x44\xae\xe5\xea\xad\x1f\x71\x5b" \
+ "\xe6\x33\xac\xa2\x09\x76\x21\x91\xac\x22\xd6\x3d\xf8\xea\x1e\x78" \
+ "\x13\x7a\x07\x35\x6f\x3d\xa1\xa8\x33\xe5\xae\x17\x03\xaf\xe9\xd9" \
+ "\x66\xfb\x6a\x2b\x04\x2b\x03\xe6\xca\x2b\xa5\xfa\x41\x0b\x4b\x03" \
+ "\xc5\xaa\x6e\x57\x41\xa9\xd8\x27\x2f\x02\x3b\x59\x65\x61\xb0\xa6" \
+ "\x41\x28\xac\xd8\x2a\x6f\x91\xb1\x5a\x41\x29\x50\x08\x9c\x6f\xc1" \
+ "\xb1\xc9\xd0\x53\x22\x2b\x06\xbb\x06\x43\x11\x9b\x06\x63\xa2\x9a" \
+ "\xe7\x01\x43\x9f\xea\x16\xf2\x93\xb0\x4a\xf0\xa6\x35\xc4\xad\x25" \
+ "\x33\x8a\x1b\x06\x9e\x30\xc4\xb3\x4b\x61\x84\xa5\x30\x21\x49\x18" \
+ "\xad\x55\x24\xb4\x3e\x08\x89\x62\xff\xa4\xb4\x4b\xfb\x00\x40\x88" \
+ "\x66\x4f\x5b\x1c\x9e\x56\xb3\x53\x1b\x66\x55\xfb\xb2\x37\xb7\xb2" \
+ "\x09\xe1\xb4\x4f\xcb\xb5\x09\x81\xab\x57\xab\x9d\xd0\xd6\xac\x63" \
+ "\x5b\x13\xdc\x66\xb6\x67\x6b\x7d\x38\x4b\x6a\x5e\x3b\x81\x28\x5b" \
+ "\x09\x6f\x3b\x7f\x60\xfb\x15\x9a\xb8\xb6\x02\x75\x70\x6a\x8b\xb7" \
+ "\x34\x10\xb7\x46\xc6\xb7\x85\x50\xb7\x9d\x51\x48\x80\x7b\x06\x0d" \
+ "\x5b\xb5\x73\x9b\x7d\x6a\x17\x04\x84\x5b\xb8\x51\xe0\xb7\xfa\xb5" \
+ "\xb7\x8e\xdb\x77\x90\x6b\xb7\xbf\xca\x22\x89\xeb\x21\x6d\x5b\x27" \
+ "\x30\xf7\x2b\x84\x26\xaa\x8b\x8b\x9d\xa1\xb8\x70\x86\x27\x88\x28" \
+ "\x24\xb8\xc3\x87\xae\xb7\xd0\x4e\xbd\x93\xa8\xa1\x8b\x2a\x87\x88" \
+ "\x88\xf7\xa7\x88\x75\x44\x60\x41\x34\xb1\xaf\x0b\xb2\x45\x74\xb8" \
+ "\xb9\x7b\x2a\x65\x54\xb9\xbd\xdb\x05\xbd\x84\xbb\xc1\xcb\x23\x14" \
+ "\x5a\xbc\x02\x34\x5b\xc0\x8b\xbc\x9b\x15\x6a\xc4\xcb\xbc\xae\x01" \
+ "\x6c\xcf\x0b\xbd\x72\x51\x6f\xd3\x4b\xbd\xf1\xaa\x70\x25\x8b\xbd" \
+ "\x11\x93\x71\xbc\xcb\xbd\x1d\xca\x71\xdb\x0b\xbe\xce\x15\x72\xd7" \
+ "\x4b\xbe\x74\xc8\x73\x1f\x8b\xbe\x3d\x38\x81\xe3\xcb\xbe\x7e\xb0" \
+ "\xb4\xeb\x0b\xbf\x1b\x74\xb6\x95\x58\x47\xbf\x7f\x30\xb9\x0a\x30" \
+ "\xbf\xf8\xbb\x04\xfa\x1b\x01\xef\xdb\xbf\xcd\xfb\xbf\x89\x78\xbf" \
+ "\x02\x7c\x56\x04\xec\x74\x8d\x47\xbf\x56\xdb\x74\x86\xd2\xbb\xa3" \
+ "\x9b\xc0\x0c\xab\x68\x69\x4b\x46\x12\xdc\x73\x0b\xfc\x3d\x5c\x78" \
+ "\xc1\xdd\x40\xb4\xcc\xb3\x77\x5a\xcb\xc1\xfa\x72\x72\x75\xf4\x47" \
+ "\x9c\x02\x30\x47\x2b\xc2\x2a\xbc\xc2\x2c\xdc\xc2\x2e\x0c\x6f\x09" \
+ "\x00\x00\x3b\x00"
+
+#define HTS_DATA_BACK_GIF_LEN 4243
+
+#define HTS_DATA_FADE_GIF \
+ "\x47\x49\x46\x38\x39\x61\x8\x0\x8\x0\xf7\x0\x0\x0\x0\x0\x0\x0\x33\x0\x0\x66\x0\x0\x99\x0\x0\xcc\x0\x0\xff\x0\x33\x0\x0\x33\x33\x0\x33\x66\x0\x33\x99\x0\x33\xcc\x0\x33\xff\x0\x66\x0\x0\x66\x33\x0\x66\x66\x0\x66\x99\x0\x66\xcc\x0\x66\xff\x0\x99\x0\x0\x99\x33\x0\x99\x66\x0\x99\x99\x0\x99\xcc\x0\x99\xff\x0\xcc\x0\x0\xcc\x33\x0\xcc\x66\x0\xcc\x99\x0\xcc\xcc"\
+ "\x0\xcc\xff\x0\xff\x0\x0\xff\x33\x0\xff\x66\x0\xff\x99\x0\xff\xcc\x0\xff\xff\x33\x0\x0\x33\x0\x33\x33\x0\x66\x33\x0\x99\x33\x0\xcc\x33\x0\xff\x33\x33\x0\x33\x33\x33\x33\x33\x66\x33\x33\x99\x33\x33\xcc\x33\x33\xff\x33\x66\x0\x33\x66\x33\x33\x66\x66\x33\x66\x99\x33\x66\xcc\x33\x66\xff\x33\x99\x0\x33\x99\x33\x33\x99\x66\x33\x99\x99\x33\x99\xcc\x33\x99\xff\x33\xcc\x0\x33\xcc\x33\x33"\
+ "\xcc\x66\x33\xcc\x99\x33\xcc\xcc\x33\xcc\xff\x33\xff\x0\x33\xff\x33\x33\xff\x66\x33\xff\x99\x33\xff\xcc\x33\xff\xff\x66\x0\x0\x66\x0\x33\x66\x0\x66\x66\x0\x99\x66\x0\xcc\x66\x0\xff\x66\x33\x0\x66\x33\x33\x66\x33\x66\x66\x33\x99\x66\x33\xcc\x66\x33\xff\x66\x66\x0\x66\x66\x33\x66\x66\x66\x66\x66\x99\x66\x66\xcc\x66\x66\xff\x66\x99\x0\x66\x99\x33\x66\x99\x66\x66\x99\x99\x66\x99\xcc\x66\x99"\
+ "\xff\x66\xcc\x0\x66\xcc\x33\x66\xcc\x66\x66\xcc\x99\x66\xcc\xcc\x66\xcc\xff\x66\xff\x0\x66\xff\x33\x66\xff\x66\x66\xff\x99\x66\xff\xcc\x66\xff\xff\x99\x0\x0\x99\x0\x33\x99\x0\x66\x99\x0\x99\x99\x0\xcc\x99\x0\xff\x99\x33\x0\x99\x33\x33\x99\x33\x66\x99\x33\x99\x99\x33\xcc\x99\x33\xff\x99\x66\x0\x99\x66\x33\x99\x66\x66\x99\x66\x99\x99\x66\xcc\x99\x66\xff\x99\x99\x0\x99\x99\x33\x99\x99\x66"\
+ "\x99\x99\x99\x99\x99\xcc\x99\x99\xff\x99\xcc\x0\x99\xcc\x33\x99\xcc\x66\x99\xcc\x99\x99\xcc\xcc\x99\xcc\xff\x99\xff\x0\x99\xff\x33\x99\xff\x66\x99\xff\x99\x99\xff\xcc\x99\xff\xff\xcc\x0\x0\xcc\x0\x33\xcc\x0\x66\xcc\x0\x99\xcc\x0\xcc\xcc\x0\xff\xcc\x33\x0\xcc\x33\x33\xcc\x33\x66\xcc\x33\x99\xcc\x33\xcc\xcc\x33\xff\xcc\x66\x0\xcc\x66\x33\xcc\x66\x66\xcc\x66\x99\xcc\x66\xcc\xcc\x66\xff\xcc"\
+ "\x99\x0\xcc\x99\x33\xcc\x99\x66\xcc\x99\x99\xcc\x99\xcc\xcc\x99\xff\xcc\xcc\x0\xcc\xcc\x33\xcc\xcc\x66\xcc\xcc\x99\xcc\xcc\xcc\xcc\xcc\xff\xcc\xff\x0\xcc\xff\x33\xcc\xff\x66\xcc\xff\x99\xcc\xff\xcc\xcc\xff\xff\xff\x0\x0\xff\x0\x33\xff\x0\x66\xff\x0\x99\xff\x0\xcc\xff\x0\xff\xff\x33\x0\xff\x33\x33\xff\x33\x66\xff\x33\x99\xff\x33\xcc\xff\x33\xff\xff\x66\x0\xff\x66\x33\xff\x66\x66\xff\x66"\
+ "\x99\xff\x66\xcc\xff\x66\xff\xff\x99\x0\xff\x99\x33\xff\x99\x66\xff\x99\x99\xff\x99\xcc\xff\x99\xff\xff\xcc\x0\xff\xcc\x33\xff\xcc\x66\xff\xcc\x99\xff\xcc\xcc\xff\xcc\xff\xff\xff\x0\xff\xff\x33\xff\xff\x66\xff\xff\x99\xff\xff\xcc\xff\xff\xff\x21\xe\x9\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"\
+ "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x21\xf9\x4\x1\x0\x0\xd8\x0\x2c\x0\x0\x0\x0\x8\x0\x8\x0\x0\x8"\
+ "\x19\x0\xaf\x61\x13\x48\x10\xdb\xc0\x83\x4\xb\x16\x44\x88\x50\xe1\x41\x86\x9\x21\x1a\x74\x78\x2d\x20\x0\x3b\xff"
+#define HTS_DATA_FADE_GIF_LEN 828
+
+#endif
diff --git a/src/htsalias.c b/src/htsalias.c
new file mode 100644
index 0000000..65a34e3
--- /dev/null
+++ b/src/htsalias.c
@@ -0,0 +1,520 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsalias.c subroutines: */
+/* alias for command-line options and config files */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "htsalias.h"
+#include "htsglobal.h"
+void linput(FILE* fp,char* s,int max);
+void hts_lowcase(char* s);
+
+#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
+#define is_realspace(c) (strchr(" \x0d\x0a\x09\x0c",(c))!=NULL)
+
+// COPY OF cmdl_ins in htsmain.c
+// Insert a command in the argc/argv
+#define cmdl_ins(token,argc,argv,buff,ptr) \
+ { \
+ int i; \
+ for(i=argc;i>0;i--)\
+ argv[i]=argv[i-1];\
+ } \
+ argv[0]=(buff+ptr); \
+ strcpy(argv[0],token); \
+ ptr += (strlen(argv[0])+1); \
+ argc++
+// END OF COPY OF cmdl_ins in htsmain.c
+
+
+/*
+ Aliases for command-line and config file definitions
+ These definitions can be used:
+ in command line:
+ --sockets=8 --cache=0
+ --sockets 8 --cache off
+ --nocache
+ -c8 -C0
+ in config file:
+ sockets=8 cache=0
+ set sockets 8 cache off
+
+*/
+/*
+ single : no options
+ param : this option allows a number parameter (1, for example) and can be mixed with other options (R1C1c8)
+ param1 : this option must be alone, and needs one distinct parameter (-P <path>)
+ param0 : this option must be alone, but the parameter should be put together (+*.gif)
+*/
+const char hts_optalias[][4][64] = {
+ /* {"","","",""}, */
+ {"path","-O","param1","output path"},
+ {"chroot","-%O","param1","default top path"},
+ {"mirror","-w","single",""},
+ {"mirror-wizard","-W","single",""},
+ {"get-files","-g","single",""},
+ {"quiet","-q","single",""},
+ {"mirrorlinks","-Y","single",""},
+ {"proxy","-P","param1","proxy name:port"},
+ {"httpproxy-ftp","-%f","param",""},
+ {"depth","-r","param",""},{"recurse-levels","-r","param",""},
+ {"ext-depth","-%e","param",""},
+ {"max-files","-m","param",""},
+ {"max-size","-M","param",""},
+ {"max-time","-E","param",""},
+ {"max-rate","-A","param",""},
+ {"max-pause","-G","param",""},
+ {"sockets","-c","param","number of simultaneous connections allowed"},{"socket","-c","param","number of simultaneous connections allowed"},{"connection","-c","param","number of simultaneous connections allowed"},
+ {"connection-per-second","-%c","param","number of connection per second allowed"},
+ {"timeout","-T","",""},
+ {"retries","-R","param","number of retries for non-fatal errors"},
+ {"min-rate","-J","param",""},
+ {"host-control","-H","param",""},
+ {"extended-parsing","-%P","param",""},
+ {"near","-n","single",""},
+ {"test","-t","single",""},
+ {"list","-%L","param1",""},
+ {"language","-%l","param1",""}, {"lang","-%l","param1",""},
+ {"structure","-N","param",""}, {"user-structure","-N","param1",""},
+ {"long-names","-L","param",""},
+ {"keep-links","-K","param",""},
+ {"replace-external","-x","single",""},
+ {"no-passwords","-%x","single",""},{"no-password","-%x","single",""},
+ {"include-query-string","-%q","single",""},
+ {"generate-errors","-o","single",""},
+ {"purge-old","-X","param",""},
+ {"cookies","-b","param",""},
+ {"check-type","-u","param",""},
+ {"assume","-%A","param1",""}, {"mimetype","-%A","param1",""},
+ {"parse-java","-j","param",""},
+ {"protocol","-@i","param",""},
+ {"robots","-s","param",""},
+ {"http-10","-%h","single",""},{"http-1.0","-%h","single",""},
+ {"no-compression","-%z","single",""},
+ {"tolerant","-%B","single",""},
+ {"updatehack","-%s","single",""}, {"sizehack","-%s","single",""},
+ {"user-agent","-F","param1","user-agent identity"},
+ {"footer","-%F","param1",""},
+ {"cache","-C","param","number of retries for non-fatal errors"},
+ {"store-all-in-cache","-k","single",""},
+ {"do-not-recatch","-%n","single",""},
+ {"do-not-log","-Q","single",""},
+ {"extra-log","-z","single",""},
+ {"debug-log","-Z","single",""},
+ {"verbose","-v","single",""},
+ {"file-log","-f","single",""},
+ {"single-log","-f2","single",""},
+ {"index","-I","single",""},
+ {"search-index","-%I","single",""},
+ {"priority","-p","param",""},
+ {"debug-headers","-%H","single",""},
+ {"userdef-cmd","-V","param1",""},
+ {"structure","-N","param1","user-defined structure"},
+ {"usercommand","-V","param1","user-defined command"},
+ {"display","-%v","single","show files transfered and other funny realtime information"},
+ {"dos83","-L0","single",""},
+ {"iso9660","-L2","single",""},
+ /* */
+
+ /* DEPRECATED */
+ {"stay-on-same-dir","-S","single","stay on the same directory - DEPRECATED"},
+ {"can-go-down","-D","single","can only go down into subdirs - DEPRECATED"},
+ {"can-go-up","-U","single","can only go to upper directories- DEPRECATED"},
+ {"can-go-up-and-down","-B","single","can both go up&down into the directory structure - DEPRECATED"},
+ {"stay-on-same-address","-a","single","stay on the same address - DEPRECATED"},
+ {"stay-on-same-domain","-d","single","stay on the same principal domain - DEPRECATED"},
+ {"stay-on-same-tld","-l","single","stay on the same TLD (eg: .com) - DEPRECATED"},
+ {"go-everywhere","-e","single","go everywhere on the web - DEPRECATED"},
+
+ /* Badly documented */
+ {"debug-testfilters","-#0","param1","debug: test filters"},
+ {"advanced-flushlogs","-#f","single",""},
+ {"advanced-maxfilters","-#F","param",""},
+ {"version","-#h","single",""},
+ {"debug-scanstdin","-#K","single",""},
+ {"advanced-maxlinks","-#L","single",""},
+ {"advanced-progressinfo","-#p","single",""},
+ {"catch-url","-#P","single","catch complex URL through proxy"},
+ {"debug-oldftp","-#R","single",""},
+ {"debug-xfrstats","-#T","single",""},
+ {"advanced-wait","-#u","single",""},
+ {"debug-ratestats","-#Z","single",""},
+ {"exec","-#!","param1",""},
+
+ /* STANDARD ALIASES */
+ {"spider","-p0C0I0t","single",""},
+ {"testsite","-p0C0I0t","single",""},
+ {"testlinks","-r1p0C0I0t","single",""}, {"test","-r1p0C0I0t","single",""}, {"bookmark","-r1p0C0I0t","single",""},
+ {"mirror","-w","single",""},
+ {"testscan","-p0C0I0Q","single",""}, {"scan","-p0C0I0Q","single",""}, {"check","-p0C0I0Q","single",""},
+ {"skeleton","-p1","single",""},
+ {"preserve","-%p","single",""},
+ {"get","-qg","single",""},
+ {"update","-iC2","single",""},
+ {"continue","-iC1","single",""}, {"restart","-iC1","single",""},
+ {"continue","-i","single",""}, /* for help alias */
+ {"sucker","-r999","single",""},
+ {"help","-h","single",""}, {"documentation","-h","single",""}, {"doc","-h","single",""},
+ {"wide","-c32","single",""},
+ {"tiny","-c1","single",""},
+ {"ultrawide","-c48","single",""},
+ {"http10","-%h","single",""},
+ {"filelist","-%L","single",""}, {"list","-%L","single",""},
+ /* END OF ALIASES */
+
+ /* Filters */
+ {"allow","+","param0","allow filter"},
+ {"deny","-","param0","deny filter"},
+ /* */
+
+ /* URLs */
+ {"add","","param0","add URLs"},
+ /* */
+
+ /* Specific */
+ {"user","-%U","param1","output path"},
+ /* */
+
+ /* Internal */
+ {"catchurl","--catchurl","single","catch complex URL through proxy"},
+ {"updatehttrack","--updatehttrack","single","update HTTrack Website Copier"},
+ {"clean","--clean","single","clean up log files and cache"},
+ {"tide","--clean","single","clean up log files and cache"},
+ /* */
+
+ {"","","",""}
+};
+
+
+/*
+ Check for alias in command-line
+ argc,argv as in main()
+ n_arg argument position
+ return_argv a char[2][] where to put result
+ return_error buffer in case of syntax error
+
+ return value: number of arguments treated (0 if error)
+*/
+int optalias_check(int argc,const char * const * argv,int n_arg,
+ int* return_argc,char** return_argv,
+ char* return_error) {
+ return_error[0]='\0';
+ *return_argc=1;
+ if (argv[n_arg][0]=='-')
+ if (argv[n_arg][1]=='-') {
+ char command[1000];
+ char param[1000];
+ char addcommand[256];
+ /* */
+ char* position;
+ int need_param=1;
+ //int return_param=0;
+ int pos;
+ command[0]=param[0]=addcommand[0]='\0';
+
+ /* --sockets=8 */
+ if ( (position=strchr(argv[n_arg],'=')) ) {
+ /* Copy command */
+ strncat(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) );
+ /* Copy parameter */
+ strcpy(param,position+1);
+ }
+ /* --nocache */
+ else if (strncmp(argv[n_arg]+2,"no",2)==0) {
+ strcpy(command,argv[n_arg]+4);
+ strcpy(param,"0");
+ }
+ /* --sockets 8 */
+ else {
+ if (strncmp(argv[n_arg]+2,"wide-",5)==0) {
+ strcpy(addcommand,"c32");
+ strcpy(command,strchr(argv[n_arg]+2,'-')+1);
+ } else if (strncmp(argv[n_arg]+2,"tiny-",5)==0) {
+ strcpy(addcommand,"c1");
+ strcpy(command,strchr(argv[n_arg]+2,'-')+1);
+ } else
+ strcpy(command,argv[n_arg]+2);
+ need_param=2;
+ }
+
+ /* Now solve the alias */
+ pos=optalias_find(command);
+ if (pos>=0) {
+ /* Copy real name */
+ strcpy(command,hts_optalias[pos][1]);
+ /* With parameters? */
+ if (strncmp(hts_optalias[pos][2],"param",5)==0) {
+ /* Copy parameters? */
+ if (need_param == 2) {
+ if ((n_arg+1>=argc) || (argv[n_arg+1][0]=='-')) { /* no supplemental parameter */
+ sprintf(return_error,
+ "Syntax error:\n\tOption %s needs to be followed by a parameter: %s <param>\n\t%s\n",
+ command,command,_NOT_NULL(optalias_help(command)));
+ return 0;
+ }
+ strcpy(param,argv[n_arg+1]);
+ need_param=2;
+ }
+ } else
+ need_param=1;
+
+ /* Final result */
+
+ /* Must be alone (-P /tmp) */
+ if (strcmp(hts_optalias[pos][2],"param1")==0) {
+ strcpy(return_argv[0],command);
+ strcpy(return_argv[1],param);
+ *return_argc=2; /* 2 parameters returned */
+ }
+ /* Alone with parameter (+*.gif) */
+ else if (strcmp(hts_optalias[pos][2],"param0")==0) {
+ /* Command */
+ strcpy(return_argv[0],command);
+ strcat(return_argv[0],param);
+ }
+ /* Together (-c8) */
+ else {
+ /* Command */
+ strcpy(return_argv[0],command);
+ /* Parameters accepted */
+ if (strncmp(hts_optalias[pos][2],"param",5)==0) {
+ /* --cache=off or --index=on */
+ if (strcmp(param,"off")==0)
+ strcat(return_argv[0],"0");
+ else if (strcmp(param,"on")==0) {
+ // on is the default
+ // strcat(return_argv[0],"1");
+ } else
+ strcat(return_argv[0],param);
+ }
+ *return_argc=1; /* 1 parameter returned */
+ }
+ } else {
+ sprintf(return_error,"Unknown option: %s\n",command);
+ return 0;
+ }
+ return need_param;
+ }
+
+ /* Check -P <path> */
+ {
+ int pos;
+ if ((pos=optreal_find(argv[n_arg]))>=0) {
+ if ( (strcmp(hts_optalias[pos][2],"param1")==0) || (strcmp(hts_optalias[pos][2],"param0")==0)) {
+ if ((n_arg+1>=argc) || (argv[n_arg+1][0]=='-')) { /* no supplemental parameter */
+ sprintf(return_error,
+ "Syntax error:\n\tOption %s needs to be followed by a parameter: %s <param>\n\t%s\n",
+ argv[n_arg],argv[n_arg],_NOT_NULL(optalias_help(argv[n_arg])));
+ return 0;
+ }
+ /* Copy parameters */
+ strcpy(return_argv[0],argv[n_arg]);
+ strcpy(return_argv[1],argv[n_arg+1]);
+ /* And return */
+ *return_argc=2; /* 2 parameters returned */
+ return 2; /* 2 parameters used */
+ }
+ }
+ }
+
+ /* Copy and return other unknown option */
+ strcpy(return_argv[0],argv[n_arg]);
+ return 1;
+}
+
+/* Finds the <token> option alias and returns the index, or -1 if failed */
+int optalias_find(const char* token) {
+ if (token[0] != '\0') {
+ int i=0;
+ while(hts_optalias[i][0][0] != '\0') {
+ if (strcmp(token,hts_optalias[i][0])==0) {
+ return i;
+ }
+ i++;
+ }
+ }
+ return -1;
+}
+
+/* Finds the <token> real option and returns the index, or -1 if failed */
+int optreal_find(const char* token) {
+ if (token[0] != '\0') {
+ int i=0;
+ while(hts_optalias[i][0][0] != '\0') {
+ if (strcmp(token,hts_optalias[i][1])==0) {
+ return i;
+ }
+ i++;
+ }
+ }
+ return -1;
+}
+
+const char* optreal_value(int p) {
+ return hts_optalias[p][1];
+}
+const char* optalias_value(int p) {
+ return hts_optalias[p][0];
+}
+const char* opttype_value(int p) {
+ return hts_optalias[p][2];
+}
+const char* opthelp_value(int p) {
+ return hts_optalias[p][3];
+}
+
+/* Help for option <token>, empty if not available, or NULL if unknown <token> */
+const char* optalias_help(const char* token) {
+ int pos=optalias_find(token);
+ if (pos>=0)
+ return hts_optalias[pos][3];
+ else
+ return NULL;
+}
+
+/* Include a file to the current command line */
+/* example:
+ set sockets 8
+ index on
+ allow *.gif
+ deny ad.*
+*/
+int optinclude_file(const char* name,
+ int* argc,char** argv,char* x_argvblk,int* x_ptr) {
+ FILE* fp;
+ fp=fopen(name,"rb");
+ if (fp) {
+ char line[256];
+ int insert_after=1; /* first, insert after program filename */
+ while(!feof(fp)) {
+ char *a,*b;
+ int result;
+
+ /* read line */
+ linput(fp,line,250);
+ hts_lowcase(line);
+ if (strnotempty(line)) {
+ /* no comment line: # // ; */
+ if (strchr("#/;",line[0])==NULL) {
+ /* right trim */
+ a=line+strlen(line)-1;
+ while(is_realspace(*a)) *(a--) = '\0';
+ /* jump "set " and spaces */
+ a=line;
+ while(is_realspace(*a)) a++;
+ if (strncmp(a,"set",3)==0) {
+ if (is_realspace(*(a+3))) {
+ a+=4;
+ }
+ }
+ while(is_realspace(*a)) a++;
+ /* delete = ("sockets=8") */
+ if ( (b=strchr(a,'=')) )
+ *b=' ';
+
+ /* isolate option and parameter */
+ b=a;
+ while( (!is_realspace(*b)) && (*b) ) b++;
+ if (*b) {
+ *b='\0';
+ b++;
+ }
+ /* a is now the option, b the parameter */
+
+ {
+ int return_argc;
+ char return_error[256];
+ char _tmp_argv[4][HTS_CDLMAXSIZE];
+ char* tmp_argv[4];
+ tmp_argv[0]=_tmp_argv[0]; tmp_argv[1]=_tmp_argv[1]; tmp_argv[2]=_tmp_argv[2]; tmp_argv[3]=_tmp_argv[3];
+ strcpy(tmp_argv[0],"--");
+ strcat(tmp_argv[0],a);
+ strcpy(tmp_argv[1],b);
+
+ result=optalias_check(2,(const char * const *)tmp_argv,0,
+ &return_argc,(tmp_argv+2),
+ return_error);
+ if (!result) {
+ printf("%s\n",return_error);
+ } else {
+ int insert_after_argc;
+ /* Insert parameters BUT so that they can be in the same order */
+ /* temporary argc: Number of parameters after minus insert_after_argc */
+ insert_after_argc=(*argc)-insert_after;
+ cmdl_ins((tmp_argv[2]),insert_after_argc,(argv+insert_after),x_argvblk,(*x_ptr));
+ *argc=insert_after_argc+insert_after;
+ insert_after++;
+ /* Second one */
+ if (return_argc>1) {
+ insert_after_argc=(*argc)-insert_after;
+ cmdl_ins((tmp_argv[3]),insert_after_argc,(argv+insert_after),x_argvblk,(*x_ptr));
+ *argc=insert_after_argc+insert_after;
+ insert_after++;
+ }
+ /* increment to nbr of used parameters */
+ /* insert_after+=result; */
+ }
+ }
+ }
+
+ }
+ }
+ fclose(fp);
+ return 1;
+ }
+ return 0;
+}
+
+/* Get home directory, '.' if failed */
+/* example: /home/smith */
+char* hts_gethome(void) {
+ char* home = getenv( "HOME" );
+ if (home)
+ return home;
+ else
+ return ".";
+}
+
+/* Convert ~/foo into /home/smith/foo */
+void expand_home(char* str) {
+ if (str[0] == '~') {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,hts_gethome());
+ strcat(tempo,str+1);
+ strcpy(str,tempo);
+ }
+}
diff --git a/src/htsalias.h b/src/htsalias.h
new file mode 100644
index 0000000..1c94b19
--- /dev/null
+++ b/src/htsalias.h
@@ -0,0 +1,58 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsalias.h subroutines: */
+/* alias for command-line options and config files */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSALIAS_DEFH
+#define HTSALIAS_DEFH
+
+extern const char hts_optalias[][4][64];
+int optalias_check(int argc,const char * const * argv,int n_arg,
+ int* return_argc,char** return_argv,
+ char* return_error);
+int optalias_find(const char* token);
+const char* optalias_help(const char* token);
+int optreal_find(const char* token);
+int optinclude_file(const char* name,
+ int* argc,char** argv,char* x_argvblk,int* x_ptr);
+const char* optreal_value(int p);
+const char* optalias_value(int p);
+const char* opttype_value(int p);
+const char* opthelp_value(int p);
+char* hts_gethome(void);
+void expand_home(char* str);
+
+#endif
diff --git a/src/htsback.c b/src/htsback.c
new file mode 100644
index 0000000..d99564f
--- /dev/null
+++ b/src/htsback.c
@@ -0,0 +1,2462 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* backing system (multiple socket download) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsback.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsthread.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+/* END specific definitions */
+
+//#if HTS_WIN
+#include "htsftp.h"
+#if HTS_USEZLIB
+#include "htszlib.h"
+#endif
+//#endif
+
+#if HTS_WIN
+#ifndef __cplusplus
+// DOS
+#include <process.h> /* _beginthread, _endthread */
+#endif
+#else
+#endif
+
+#undef test_flush
+#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } }
+
+#define VT_CLREOL "\33[K"
+
+
+// ---
+// routines de backing
+// retourne l'index d'un lien dans un tableau de backing
+int back_index(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
+ int i=0;
+ int index=-1;
+ while( i<back_max ) {
+ if (back[i].status>=0) // réception OU prêt
+ if (strfield2(back[i].url_adr,adr)) {
+ if (strcmp(back[i].url_fil,fil)==0) {
+ if (index==-1) /* first time we meet, store it */
+ index=i;
+ else if (strcmp(back[i].url_sav,sav)==0) { /* oops, check sav too */
+ index=i;
+ return index;
+ }
+ }
+ }
+ i++;
+ }
+ return index;
+}
+
+// nombre d'entrées libres dans le backing
+int back_available(lien_back* back,int back_max) {
+ int i;
+ int nb=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].status==-1) /* libre */
+ nb++;
+ return nb;
+}
+
+// retourne estimation de la taille des html et fichiers stockés en mémoire
+LLint back_incache(lien_back* back,int back_max) {
+ int i;
+ LLint sum=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].status!=-1)
+ if (back[i].r.adr) // ne comptabilier que les blocs en mémoire
+ sum+=max(back[i].r.size,back[i].r.totalsize);
+ return sum;
+}
+
+// le lien a-t-il été mis en backing?
+HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav) {
+ return (back_index(back,back_max,adr,fil,sav)>=0);
+}
+
+// nombre de sockets en tâche de fond
+int back_nsoc(lien_back* back,int back_max) {
+ int n=0;
+ int i;
+ for(i=0;i<back_max;i++)
+ if (back[i].status>0) // réception uniquement
+ n++;
+
+ return n;
+}
+
+// objet (lien) téléchargé ou transféré depuis le cache
+//
+// fermer les paramètres de transfert,
+// et notamment vérifier les fichiers compressés (décompresser), callback etc.
+int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
+ if (
+ (back[p].status == 0) // ready
+ &&
+ (!back[p].testmode) // not test mode
+ &&
+ (back[p].r.statuscode>0) // not internal error
+ ) {
+ char* state="unknown";
+
+ /* décompression */
+#if HTS_USEZLIB
+ if (back[p].r.compressed) {
+ if (back[p].r.size > 0) {
+ //if ( (back[p].r.adr) && (back[p].r.size>0) ) {
+ // stats
+ back[p].compressed_size=back[p].r.size;
+ // en mémoire -> passage sur disque
+ if (!back[p].r.is_write) {
+ back[p].tmpfile[0]='\0';
+ strcpy(back[p].tmpfile,tempnam(NULL,"httrz"));
+ if (back[p].tmpfile[0]) {
+ back[p].r.out=fopen(back[p].tmpfile,"wb");
+ if (back[p].r.out) {
+ if ((back[p].r.adr) && (back[p].r.size>0)) {
+ if ((INTsys)fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Write error when decompressing");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Empty compressed file");
+ }
+ } else {
+ back[p].tmpfile[0]='\0';
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Open error when decompressing");
+ }
+ }
+ }
+ // fermer fichier sortie
+ if (back[p].r.out!=NULL) {
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+ // décompression
+ if (back[p].tmpfile[0] && back[p].url_sav[0]) {
+ LLint size;
+ filecreateempty(back[p].url_sav); // filenote & co
+ if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
+ back[p].r.size=back[p].r.totalsize=size;
+ // fichier -> mémoire
+ if (!back[p].r.is_write) {
+ back[p].r.adr=readfile(back[p].url_sav);
+ if (!back[p].r.adr) {
+ back[p].r.statuscode=-1;
+ strcpy(back[p].r.msg,"Read error when decompressing");
+ }
+ remove(back[p].url_sav);
+ }
+ }
+ remove(back[p].tmpfile);
+ }
+ // stats
+ HTS_STAT.total_packed+=back[p].compressed_size;
+ HTS_STAT.total_unpacked+=back[p].r.size;
+ HTS_STAT.total_packedfiles++;
+ // unflag
+ }
+ }
+ back[p].r.compressed=0;
+#endif
+
+ /* Stats */
+ if (cache->txt) {
+ char flags[32];
+ char s[256];
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ strftime(s,250,"%H:%M:%S",A);
+
+ flags[0]='\0';
+ /* input flags */
+ if (back[p].is_update)
+ strcat(flags, "U"); // update request
+ else
+ strcat(flags, "-");
+ if (back[p].range_req_size)
+ strcat(flags, "R"); // range request
+ else
+ strcat(flags, "-");
+ /* state flags */
+ if (back[p].r.is_file) // direct to disk
+ strcat(flags, "F");
+ else
+ strcat(flags, "-");
+ /* output flags */
+ if (!back[p].r.notmodified)
+ strcat(flags, "M"); // modified
+ else
+ strcat(flags, "-");
+ if (back[p].r.is_chunk) // chunked
+ strcat(flags, "C");
+ else
+ strcat(flags, "-");
+ if (back[p].r.compressed)
+ strcat(flags, "Z"); // gzip
+ else
+ strcat(flags, "-");
+ fprintf(cache->txt,"%s\t"LLintP"/"LLintP"\t%s\t", s,
+ back[p].r.size, back[p].r.totalsize,
+ flags);
+ }
+ if (back[p].r.statuscode==200) {
+ if (back[p].r.size>=0) {
+ if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) {
+ HTS_STAT.stat_bytes+=back[p].r.size;
+ HTS_STAT.stat_files++;
+ }
+ if ( (!back[p].r.notmodified) && (opt->is_update) ) {
+ HTS_STAT.stat_updated_files++; // page modifiée
+ if (opt->log!=NULL) {
+ fspc(opt->log,"info");
+ if (back[p].is_update) {
+ fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ } else {
+ fprintf(opt->log,"engine: transfer-status: link added: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ }
+ test_flush;
+ }
+ if (cache->txt) {
+ if (back[p].is_update) {
+ state="updated";
+ } else {
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+ test_flush;
+ }
+ if (cache->txt) {
+ if (opt->is_update)
+ state="untouched";
+ else
+ state="added";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ if (cache->txt) {
+ state="empty";
+ }
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil);
+ }
+ if (cache->txt) {
+ state="error";
+ }
+ }
+ if (cache->txt) {
+ fprintf(cache->txt,
+ "%d\t"
+ "%s ('%s')\t"
+ "%s\t"
+ "%s%s\t"
+ "%s%s\t%s\t"
+ "(from %s%s)"
+ LF,
+ back[p].r.statuscode,
+ state, escape_check_url_addr(back[p].r.msg),
+ escape_check_url_addr(back[p].r.contenttype),
+ ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)),
+ escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav),
+ escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil)
+ );
+ if (opt->flush)
+ fflush(cache->txt);
+ }
+
+ /* Cache */
+ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
+
+ // status finished callback
+#if HTS_ANALYSTE
+ hts_htmlcheck_xfrstatus(&back[p]);
+#endif
+ return 0;
+ }
+ return -1;
+}
+
+
+// effacer entrée
+int back_delete(lien_back* back,int p) {
+ if (p>=0) { // on sait jamais..
+ // Vérificateur d'intégrité
+ #if DEBUG_CHECKINT
+ _CHECKINT(&back[p],"Appel back_delete")
+ #endif
+#if HTS_DEBUG_CLOSESOCK
+ char info[256];
+ sprintf(info,"back_delete: #%d\n",p);
+ DEBUG_W2(info);
+#endif
+
+ // Libérer tous les sockets, handles, buffers..
+ if (back[p].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_delete: deletehttp\n");
+#endif
+ deletehttp(&back[p].r);
+ back[p].r.soc=INVALID_SOCKET;
+ }
+
+#if HTS_USEOPENSSL
+ /* Free OpenSSL structures */
+ if (back[p].r.ssl_con) {
+ SSL_shutdown(back[p].r.ssl_con);
+ SSL_free(back[p].r.ssl_con);
+ back[p].r.ssl_con=NULL;
+ }
+ /*
+ if (back[p].r.ssl_soc) {
+ BIO_free_all(back[p].r.ssl_soc);
+ back[p].r.ssl_soc=NULL;
+ }
+ */
+#endif
+
+ if (back[p].r.adr!=NULL) { // reste un bloc à désallouer
+ freet(back[p].r.adr);
+ back[p].r.adr=NULL;
+ }
+ if (back[p].chunk_adr!=NULL) { // reste un bloc à désallouer
+ freet(back[p].chunk_adr);
+ back[p].chunk_adr=NULL;
+ back[p].chunk_size=0;
+ back[p].is_chunk=0;
+ }
+ // if (back[p].r.is_file) { // fermer fichier entrée
+ if (back[p].r.fp!=NULL) {
+ fclose(back[p].r.fp);
+ back[p].r.fp=NULL;
+ }
+ // }
+
+ /* fichier de sortie */
+ if (back[p].r.out!=NULL) { // fermer fichier sortie
+ fclose(back[p].r.out);
+ back[p].r.out=NULL;
+ }
+
+ if (back[p].r.is_write) { // ecriture directe
+ /* écrire date "remote" */
+ if (strnotempty(back[p].url_sav)) // normalement existe si on a un fichier de sortie
+ if (strnotempty(back[p].r.lastmodified)) // last-modified existe
+ if (fexist(back[p].url_sav)) // ainsi que le fichier
+ set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified);
+
+ /* executer commande utilisateur après chargement du fichier */
+ usercommand(0,NULL,back[p].url_sav);
+ back[p].r.is_write=0;
+ }
+
+ // Tout nettoyer
+ memset(&back[p], 0, sizeof(lien_back));
+ back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+
+ // Le plus important: libérer le champ
+ back[p].status=-1;
+ }
+ return 0;
+}
+
+/* Space left on backing stack */
+int back_stack_available(lien_back* back,int back_max) {
+ int p=0,n=0;
+ for( ; p < back_max ; p++ )
+ if ( back[p].status == -1 )
+ n++;
+ return n;
+}
+
+// ajouter un lien en backing
+int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr) {
+ int p=0;
+
+ // vérifier cohérence de adr et fil (non vide!)
+ if (strnotempty(adr)==0) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: adr is empty for back_add"LF);
+ }
+ return -1; // erreur!
+ }
+ if (strnotempty(fil)==0) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: fil is empty for back_add"LF);
+ }
+ return -1; // erreur!
+ }
+ // FIN vérifier cohérence de adr et fil (non vide!)
+
+ // rechercher emplacement
+ while((p<back_max) && back[p].status!=-1) p++;
+ if (back[p].status==-1) { // ok on a de la place
+ back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur
+
+ // ne sert à rien normalement
+ if (back[p].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_add: deletehttp\n");
+#endif
+ deletehttp(&back[p].r);
+ }
+
+ // effacer r
+ memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+
+ // créer entrée
+ strcpy(back[p].url_adr,adr);
+ strcpy(back[p].url_fil,fil);
+ strcpy(back[p].url_sav,save);
+ back[p].pass2_ptr=pass2_ptr;
+ // copier referer si besoin
+ strcpy(back[p].referer_adr,"");
+ strcpy(back[p].referer_fil,"");
+ if ((referer_adr) && (referer_fil)) { // existe
+ if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide
+ if (referer_adr[0]!='!') { // non détruit
+ if (strcmp(referer_adr,"file://")) { // PAS file://
+ if (strcmp(referer_adr,"primary")) { // pas referer 1er lien
+ strcpy(back[p].referer_adr,referer_adr);
+ strcpy(back[p].referer_fil,referer_fil);
+ }
+ }
+ }
+ }
+ }
+ // sav ne sert à rien pour le moment
+ back[p].r.size=0; // rien n'a encore été chargé
+ back[p].r.soc=INVALID_SOCKET; // pas de socket
+ back[p].r.adr=NULL; // pas de bloc de mémoire
+ back[p].r.is_write=0; // à priori stockage en mémoire
+ back[p].maxfile_html=opt->maxfile_html;
+ back[p].maxfile_nonhtml=opt->maxfile_nonhtml;
+ back[p].testmode=test; // mode test?
+ if (!opt->http10) // option "forcer 1.0" désactivée
+ back[p].http11=1; // autoriser http/1.1
+ back[p].head_request=0;
+ if (strcmp(back[p].url_sav,BACK_ADD_TEST)==0) // HEAD
+ back[p].head_request=1;
+ else if (strcmp(back[p].url_sav,BACK_ADD_TEST2)==0) // test en GET
+ back[p].head_request=2; // test en get
+
+
+ /* Stop requested - abort backing */
+ if (opt->state.stop) {
+ back[p].r.statuscode=-1; // fatal
+ strcpy(back[p].r.msg,"mirror stopped by user");
+ back[p].status=0; // terminé
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush;
+ }
+ return 0;
+ }
+
+
+ // tester cache
+ if ((strcmp(adr,"file://")) /* pas fichier */
+ && ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */
+ && ( (strnotempty(save)) || (strcmp(fil,"/robots.txt")==0) ) ) { // si en test on ne doit pas utiliser le cache sinon telescopage avec le 302..
+ //if ((!test) && (strcmp(adr,"file://"))
+ //if ((!test) && (strncmp(adr,"ftp://",6)) && (strcmp(adr,"file://"))
+#if HTS_FAST_CACHE
+ long int hash_pos;
+ int hash_pos_return=0;
+#else
+ char* a=NULL;
+#endif
+#if HTS_FAST_CACHE
+ if (cache->hashtable) {
+#else
+ if (cache->use) {
+#endif
+ char buff[HTS_URLMAXSIZE*4];
+#if HTS_FAST_CACHE
+ strcpy(buff,adr); strcat(buff,fil);
+ hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
+#else
+ buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n");
+ a=strstr(cache->use,buff);
+#endif
+
+ // Ok, noté en cache->. mais bien présent dans le cache ou sur disque?
+#if HTS_FAST_CACHE
+ if (hash_pos_return) {
+#else
+ if (a) {
+#endif
+ if (!test) { // non mode test
+#if HTS_FAST_CACHE
+ int pos=hash_pos;
+#else
+ int pos=-1;
+ a+=strlen(buff);
+ sscanf(a,"%d",&pos); // lire position
+#endif
+ if (pos<0) { // pas de mise en cache data, vérifier existence
+ if (fsize(antislash(save)) <= 0) { // fichier existe pas ou est vide!
+#if HTS_FAST_CACHE
+ hash_pos_return=0;
+#else
+ a=NULL;
+#endif
+ // dévalider car non présent sur disque dans structure originale!!!
+ // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour
+ // en être sûr
+ if (opt->norecatch) { // tester norecatch
+ if (!fexist(antislash(save))) { // fichier existe pas mais déclaré: on l'a effacé
+ FILE* fp=fopen(antislash(save),"wb");
+ if (fp) fclose(fp);
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ //
+ } else
+#if HTS_FAST_CACHE
+ hash_pos_return=0;
+#else
+ a=NULL;
+#endif
+
+ // Existe pas en cache, ou bien pas de cache présent
+#if HTS_FAST_CACHE
+ if (hash_pos_return) { // OK existe en cache (et données aussi)!
+#else
+ if (a!=NULL) { // OK existe en cache (et données aussi)!
+#endif
+ if (cache->type==1) { // cache prioritaire (pas de test if-modified..)
+ // dans ce cas on peut également lire des réponses cachées comme 404,302...
+ // lire dans le cache
+ if (!test)
+ back[p].r=cache_read(opt,cache,adr,fil,save);
+ else
+ back[p].r=cache_read(opt,cache,adr,fil,NULL); // charger en tête uniquement du cache
+ if (!back[p].r.location)
+ back[p].r.location=back[p].location_buffer;
+ else { /* recopier */
+ strcpy(back[p].location_buffer,back[p].r.location);
+ back[p].r.location=back[p].location_buffer;
+ }
+
+ /* Interdiction taille par le wizard? --> détruire */
+ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
+ if (!back_checksize(opt,&back[p],0)) {
+ back[p].status=0; // FINI
+ back[p].r.statuscode=-1;
+ if (!back[p].testmode)
+ strcpy(back[p].r.msg,"Cached file skipped (too big)");
+ else
+ strcpy(back[p].r.msg,"Test: Cached file skipped (too big)");
+ return 0;
+ }
+ }
+
+ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ if (!test) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ } else {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File immediately tested from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+ }
+ back[p].r.notmodified=1; // fichier non modifié
+ back[p].status=0; // OK prêt
+
+ // finalize transfer
+ if (!test) {
+ if (back[p].r.statuscode>0) {
+ back_finalize(opt,cache,back,p);
+ }
+ }
+
+ return 0;
+ } else { // erreur
+ // effacer r
+ memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+ // et continuer (chercher le fichier)
+ }
+
+ } else if (cache->type==2) { // si en cache, demander de tester If-Modified-Since
+ htsblk* r=cache_header(opt,cache,adr,fil);
+
+ /* Interdiction taille par le wizard? */
+ {
+ LLint save_totalsize=back[p].r.totalsize;
+ back[p].r.totalsize=r->totalsize;
+ if (!back_checksize(opt,&back[p],1)) {
+ r=NULL;
+ //
+ back[p].status=0; // FINI
+ deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET;
+ if (!back[p].testmode)
+ strcpy(back[p].r.msg,"File too big");
+ else
+ strcpy(back[p].r.msg,"Test: File too big");
+ return 0;
+ }
+ back[p].r.totalsize=save_totalsize;
+ }
+
+ if (r) {
+ if (r->statuscode==200) { // uniquement des 200 (OK)
+ if (strnotempty(r->etag)) { // ETag (RFC2616)
+ /*
+ - If both an entity tag and a Last-Modified value have been
+ provided by the origin server, SHOULD use both validators in
+ cache-conditional requests. This allows both HTTP/1.0 and
+ HTTP/1.1 caches to respond appropriately.
+ */
+ if (strnotempty(r->lastmodified))
+ sprintf(back[p].send_too,"If-None-Match: %s\r\nIf-Modified-Since: %s\r\n",r->etag,r->lastmodified);
+ else
+ sprintf(back[p].send_too,"If-None-Match: %s\r\n",r->etag);
+ }
+ else if (strnotempty(r->lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",r->lastmodified);
+ else if (strnotempty(cache->lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified);
+
+ /* this is an update of a file */
+ if (strnotempty(back[p].send_too))
+ back[p].is_update=1;
+ back[p].r.req.nocompression=1; /* Do not compress when updating! */
+
+ }
+ /* else if (strnotempty(cache->lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified);
+ */
+ }
+#if DEBUGCA
+ printf("..is modified test %s\n",back[p].send_too);
+#endif
+ }
+ // Okay, pas trouvé dans le cache
+ // Et si le fichier existe sur disque?
+ // Pas dans le cache: fichier n'a pas été transféré du tout, donc pas sur disque?
+ } else {
+ if (fexist(save)) { // fichier existe? aghl!
+ LLint sz=fsize(save);
+ // Bon, là il est possible que le fichier ait été partiellement transféré
+ // (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque)
+ // PAS de If-Modified-Since, on a pas connaissance des données à la date du cache
+ // On demande juste les données restantes si le date est valide (206), tout sinon (200)
+ if ((ishtml(save) != 1) && (ishtml(back[p].url_fil)!=1)) { // NON HTML (liens changés!!)
+ if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!)
+ if (strnotempty(cache->lastmodified)) { /* pas de If-.. possible */
+ /*if ( (!opt->http10) && (strnotempty(cache->lastmodified)) ) { */ /* ne pas forcer 1.0 */
+#if DEBUGCA
+ printf("..if unmodified since %s size "LLintP"\n",cache->lastmodified,(LLint)sz);
+#endif
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+
+ /* impossible - don't have etag or date
+ if (strnotempty(back[p].r.etag)) { // ETag (RFC2616)
+ sprintf(back[p].send_too,"If-None-Match: %s\r\n",back[p].r.etag);
+ back[p].http11=1; // En tête 1.1
+ } else if (strnotempty(back[p].r.lastmodified)) {
+ sprintf(back[p].send_too,"If-Unmodified-Since: %s\r\n",back[p].r.lastmodified);
+ back[p].http11=1; // En tête 1.1
+ } else
+ */
+ if (strlen(cache->lastmodified)) {
+ sprintf(back[p].send_too,
+ "If-Unmodified-Since: %s\r\nRange: bytes="LLintP"-\r\n"
+ ,cache->lastmodified,(LLint)sz);
+ back[p].http11=1; // En tête 1.1
+ back[p].range_req_size=sz;
+ back[p].r.req.range_used=1;
+ back[p].r.req.nocompression=1;
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush;
+ }
+ }
+
+ } else {
+ if ((opt->debug>0) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"warning");
+ /*
+ if (opt->http10)
+ fprintf(opt->errlog,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil);
+ else
+ */
+ fprintf(opt->errlog,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ /* Sinon requête normale... */
+ back[p].http11=0;
+ }
+ } else if (opt->norecatch) { // tester norecatch
+ filenote(save,NULL); // ne pas purger tout de même
+ back[p].status=0; // OK prêt
+ back[p].r.statuscode=-1; // erreur
+ strcpy(back[p].r.msg,"Null-size file not recaught");
+ return 0;
+ }
+ } else {
+ if ((opt->debug>0) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"warning");
+ fprintf(opt->errlog,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil);
+ test_flush;
+ }
+ /* Sinon requête normale... */
+ back[p].http11=0;
+ }
+ }
+ }
+ }
+
+
+ {
+ ///htsblk r; non directement dans la structure-réponse!
+ T_SOC soc;
+
+ // ouvrir liaison, envoyer requète
+ // ne pas traiter ou recevoir l'en tête immédiatement
+ memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+ // recopier proxy
+ memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy));
+ // et user-agent
+ strcpy(back[p].r.req.user_agent,opt->user_agent);
+ strcpy(back[p].r.req.lang_iso,opt->lang_iso);
+ back[p].r.req.user_agent_send=opt->user_agent_send;
+ // et http11
+ back[p].r.req.http11=back[p].http11;
+ back[p].r.req.nocompression=opt->nocompression;
+
+ // mode ftp, court-circuit!
+ if (strfield(back[p].url_adr,"ftp://")) {
+ if (back[p].testmode) {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with ftp link for back_add"LF);
+ }
+ return -1; // erreur pas de test permis
+ }
+ if (!(back[p].r.req.proxy.active && opt->ftp_proxy)) { // connexion directe, gérée en thread
+ back[p].status=1000; // connexion ftp
+#if USE_BEGINTHREAD
+ launch_ftp(&(back[p]));
+#else
+ {
+ char nid[32];
+ sprintf(nid,"htsftp%d-in_progress.lock",p);
+ strcpy(back[p].location_buffer,fconcat(opt->path_log,nid));
+ }
+ launch_ftp(&(back[p]),back[p].location_buffer,opt->exec);
+#endif
+ return 0;
+ }
+ }
+#if HTS_USEOPENSSL
+ else if (strfield(back[p].url_adr,"https://")) { // let's rock
+ back[p].r.ssl = 1;
+ // back[p].r.ssl_soc = NULL;
+ back[p].r.ssl_con = NULL;
+ }
+#endif
+
+#if HTS_XGETHOST
+#if HDEBUG
+ printf("back_solve..\n");
+#endif
+ back[p].status=101; // tentative de résolution du nom de host
+ soc=INVALID_SOCKET; // pas encore ouverte
+ back_solve(&back[p]); // préparer
+ if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns
+#if HDEBUG
+ printf("ok, dns cache ready..\n");
+#endif
+ soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
+ if (soc==INVALID_SOCKET) {
+ back[p].status=0; // fini, erreur
+ }
+ }
+//
+#else
+//
+#if CNXDEBUG
+ printf("XFopen..\n");
+#endif
+
+ if (strnotempty(back[p].send_too)) // envoyer un if-modified-since
+#if HTS_XCONN
+ soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
+#else
+ soc=http_xfopen(0,0,1,back[p].send_too,adr,fil,&(back[p].r));
+#endif
+ else
+#if HTS_XCONN
+ soc=http_xfopen(test,0,0,NULL,adr,fil,&(back[p].r));
+#else
+ soc=http_xfopen(test,0,1,NULL,adr,fil,&(back[p].r));
+#endif
+#endif
+ if (opt->timeout>0) { // gestion du opt->timeout
+ back[p].timeout=opt->timeout;
+ back[p].timeout_refresh=time_local();
+ } else {
+ back[p].timeout=-1; // pas de gestion (default)
+ }
+
+ if (opt->rateout>0) { // gestion d'un taux minimum de transfert toléré
+ back[p].rateout=opt->rateout;
+ back[p].rateout_time=time_local();
+ } else {
+ back[p].rateout=-1; // pas de gestion (default)
+ }
+
+ // Note: on charge les code-page erreurs (erreur 404, etc) dans le cas où cela est
+ // rattrapable (exemple: 301,302 moved xxx -> refresh sur la page!)
+ //if ((back[p].statuscode!=200) || (soc<0)) { // ERREUR HTTP/autre
+
+#if CNXDEBUG
+printf("Xfopen ok, poll..\n");
+#endif
+
+#if HTS_XGETHOST
+ if (soc!=INVALID_SOCKET)
+ if (back[p].status==101) { // pas d'erreur
+ if (!back[p].r.is_file)
+ back[p].status=100; // connexion en cours
+ else
+ back[p].status=1; // fichier
+ }
+
+#else
+ if (soc==INVALID_SOCKET) { // erreur socket
+ back[p].status=0; // FINI
+ //if (back[p].soc!=INVALID_SOCKET) deletehttp(back[p].soc);
+ back[p].r.soc=INVALID_SOCKET;
+ } else {
+ if (!back[p].r.is_file)
+#if HTS_XCONN
+ back[p].status=100; // connexion en cours
+#else
+ back[p].status=99; // chargement en tête en cours
+#endif
+ else
+ back[p].status=1; // chargement fichier
+#if BDEBUG==1
+ printf("..loading header\n");
+#endif
+ }
+#endif
+
+ }
+
+
+ // note: si il y a erreur (404,etc) status=2 (terminé/échec) mais
+ // le lien est considéré comme traité
+ //if (back[p].soc<0) // erreur
+ // return -1;
+
+ return 0;
+ } else {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: no space left in stack for back_add"LF);
+ }
+ return -1; // plus de place
+ }
+}
+
+
+
+#if HTS_XGETHOST
+#if USE_BEGINTHREAD
+// lancement multithread du robot
+PTHREAD_TYPE Hostlookup(void* iadr_p) {
+ char iadr[256];
+ t_dnscache* cache=_hts_cache(); // adresse du cache
+ t_hostent* hp;
+ int error_found=0;
+
+ // recopier (après id:pass)
+#if DEBUGDNS
+ printf("resolv in background: %s\n",jump_identification(iadr_p));
+#endif
+ strcpy(iadr,jump_identification(iadr_p));
+ // couper éventuel :
+ {
+ char *a;
+ if ( (a=jump_toport(iadr)) )
+ *a='\0'; // get rid of it
+ }
+ freet(iadr_p);
+
+ // attendre que le cache dns soit prêt
+ while(_hts_lockdns(-1)); // attendre libération
+ _hts_lockdns(1); // locker
+ while(cache->n) {
+ if (strcmp(cache->iadr,iadr)==0) {
+ error_found=1;
+ }
+ cache=cache->n; // calculer queue
+ }
+ if (strcmp(cache->iadr,iadr)==0) {
+ error_found=1;
+ }
+
+ if (!error_found) {
+ // en gros copie de hts_gethostbyname sans le return
+ cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache));
+ if (cache->n!=NULL) {
+ t_fullhostent fullhostent_buffer;
+ strcpy(cache->n->iadr,iadr);
+ cache->n->host_length=0; /* pour le moment rien */
+ cache->n->n=NULL;
+ _hts_lockdns(0); // délocker
+
+ /* resolve */
+#if DEBUGDNS
+ printf("gethostbyname() in progress for %s\n",iadr);
+#endif
+ cache->n->host_length=-1;
+ memset(cache->n->host_addr, 0, sizeof(cache->n->host_addr));
+ hp=vxgethostbyname(iadr, &fullhostent_buffer);
+ if (hp!=NULL) {
+ memcpy(cache->n->host_addr, hp->h_addr, hp->h_length);
+ cache->n->host_length = hp->h_length;
+ }
+ } else
+ _hts_lockdns(0); // délocker
+ } else {
+#if DEBUGDNS
+ printf("aborting resolv for %s (found)\n",iadr);
+#endif
+ _hts_lockdns(0); // délocker
+ }
+ // fin de copie de hts_gethostbyname
+
+#if DEBUGDNS
+ printf("quitting resolv for %s (result: %d)\n",iadr,(cache->n!=NULL)?cache->n->host_length:(-999));
+#endif
+
+ return PTHREAD_RETURN; /* _endthread implied */
+}
+#endif
+
+// attendre que le host (ou celui du proxy) ait été résolu
+// si c'est un fichier, la résolution est immédiate
+// idem pour ftp://
+void back_solve(lien_back* back) {
+ if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://"))) {
+ //## if (back->url_adr[0]!=lOCAL_CHAR) { // qq chose à préparer
+ char* a;
+ if (!(back->r.req.proxy.active))
+ a=back->url_adr;
+ else
+ a=back->r.req.proxy.name;
+ a = jump_protocol(a);
+ if (!hts_dnstest(a)) { // non encore testé!..
+ // inscire en thread
+#if HTS_WIN
+ // Windows
+#if USE_BEGINTHREAD
+ {
+ char* p = calloct(strlen(a)+2,1);
+ if (p) {
+ strcpy(p,a);
+ _beginthread( Hostlookup , 0, p );
+ }
+ }
+#else
+ /*t_hostent* h=*/
+ /*hts_gethostbyname(a);*/ // calcul
+#endif
+#else
+#if USE_BEGINTHREAD
+ char* p = calloct(strlen(a)+2,1);
+ if (p) {
+ strcpy(p,a);
+ _beginthread( Hostlookup , 0, p );
+ }
+#else
+ // Sous Unix, le gethostbyname() est bloquant..
+ /*t_hostent* h=*/
+ /*hts_gethostbyname(a);*/ // calcul
+#endif
+#endif
+ }
+ }
+}
+
+// détermine si le host a pu être résolu
+int host_wait(lien_back* back) {
+ if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://"))) {
+ //## if (back->url_adr[0]!=lOCAL_CHAR) {
+ if (!(back->r.req.proxy.active)) {
+ return (hts_dnstest(back->url_adr));
+ } else {
+ return (hts_dnstest(back->r.req.proxy.name));
+ }
+ } else return 1; // prêt, fichier local
+}
+#endif
+
+
+// élimine les fichiers non html en backing (anticipation)
+// cleanup non-html files in backing to save backing space
+// and allow faster "save in cache" operation
+void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
+ int i;
+ for(i=0;i<back_max;i++) {
+ if (back[i].status == 0) { // ready
+ if (!back[i].testmode) { // not test mode
+ if (strnotempty(back[i].url_sav)) { // filename exists
+ if (back[i].r.is_write) { // not in memory (on disk, ready)
+ if (back[i].r.size>0) { // size>0
+ if (back[i].r.statuscode==200) { // HTTP "OK"
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML/hypertext
+ if (!may_be_hypertext_mime(back[i].r.contenttype)) { // may NOT be parseable mime type
+ if (back[i].pass2_ptr) {
+ // finalize
+ // // back_finalize(opt,cache,back,i);
+ // stats
+ //HTS_STAT.stat_bytes+=back[i].r.size;
+ //HTS_STAT.stat_files++;
+ //if ( (!back[i].r.notmodified) && (opt->is_update) ) {
+ // HTS_STAT.stat_updated_files++; // page modifiée
+ //}
+ //cache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav);
+ *back[i].pass2_ptr=-1; // Done!
+ back_delete(back,i); // Delete backing entry
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+// attente (gestion des buffers des sockets)
+void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart) {
+ int i;
+ T_SOC nfds=INVALID_SOCKET;
+ fd_set fds,fds_c,fds_e; // fds pour lecture, connect (write), et erreur
+ int nsockets; // nbre sockets
+ LLint max_read_bytes; // max bytes read per sockets
+ struct timeval tv;
+ int do_wait=0;
+ int gestion_timeout=0;
+ int busy_recv=0; // pas de données pour le moment
+ int busy_state=0; // pas de connexions
+ int max_loop; // nombre de boucles max à parcourir..
+#if HTS_ANALYSTE
+ int max_loop_chk=0;
+#endif
+
+
+ // max. number of loops
+ max_loop=8;
+
+#if 1
+ // Cleanup the stack to save space!
+ back_clean(opt,cache,back,back_max);
+#endif
+
+ // recevoir tant qu'il y a des données (avec un maximum de max_loop boucles)
+ do_wait=0;
+ gestion_timeout=0;
+ do {
+ int max_c;
+ busy_state=busy_recv=0;
+
+#if 0
+ check_rate(stat_timestart,opt->maxrate); // vérifier taux de transfert
+#endif
+ // inscrire les sockets actuelles, et rechercher l'ID la plus élevée
+ FD_ZERO(&fds);
+ FD_ZERO(&fds_c);
+ FD_ZERO(&fds_e);
+ nsockets=0;
+ max_read_bytes=TAILLE_BUFFER; // maximum bytes that can be read
+ nfds=INVALID_SOCKET;
+
+ max_c=1;
+ for(i=0;i<back_max;i++) {
+
+ // en cas de gestion du connect préemptif
+#if HTS_XCONN
+ if (back[i].status==100) { // connexion
+ do_wait=1;
+
+ // noter socket write
+ FD_SET(back[i].r.soc,&fds_c);
+
+ // noter socket erreur
+ FD_SET(back[i].r.soc,&fds_e);
+
+ // calculer max
+ if (max_c) {
+ max_c=0;
+ nfds=back[i].r.soc;
+ } else if (back[i].r.soc>nfds) {
+ // ID socket la plus élevée
+ nfds=back[i].r.soc;
+ }
+
+ } else
+#endif
+#if HTS_XGETHOST
+ if (back[i].status==101) { // attente
+ // rien à faire..
+ } else
+#endif
+ // poll pour la lecture sur les sockets
+ if ((back[i].status>0) && (back[i].status<100)) { // en réception http
+
+#if BDEBUG==1
+ //printf("....socket in progress: %d\n",back[i].r.soc);
+#endif
+ // non local et non ftp
+ if (!back[i].r.is_file) {
+ //## if (back[i].url_adr[0]!=lOCAL_CHAR) {
+
+ // vérification de sécurité
+ if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know..
+ do_wait=1;
+
+ // noter socket read
+ FD_SET(back[i].r.soc,&fds);
+
+ // noter socket error
+ FD_SET(back[i].r.soc,&fds_e);
+
+ // incrémenter nombre de sockets
+ nsockets++;
+
+ // calculer max
+ if (max_c) {
+ max_c=0;
+ nfds=back[i].r.soc;
+ } else if (back[i].r.soc>nfds) {
+ // ID socket la plus élevée
+ nfds=back[i].r.soc;
+ }
+ } else {
+ back[i].r.statuscode=-4;
+ if (back[i].status==100)
+ strcpy(back[i].r.msg,"Connect Error");
+ else
+ strcpy(back[i].r.msg,"Receive Error");
+ back[i].status=0; // terminé
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush;
+ }
+ }
+#if WIDE_DEBUG
+ else {
+ DEBUG_W("PANIC!!! Socket is invalid in a poll test!\n");
+ }
+#endif
+
+ }
+
+ }
+ }
+ nfds++;
+
+ if (do_wait) { // attendre
+ // temps d'attente max: 2.5 seconde
+ tv.tv_sec=HTS_SOCK_SEC;
+ tv.tv_usec=HTS_SOCK_MS;
+
+#if BDEBUG==1
+ printf("..select\n");
+#endif
+
+ // poller les sockets-attention au noyau sous Unix..
+#if HTS_WIDE_DEBUG
+ DEBUG_W("select\n");
+#endif
+ select(nfds,&fds,&fds_c,&fds_e,&tv);
+#if HTS_WIDE_DEBUG
+ DEBUG_W("select done\n");
+#endif
+ }
+
+ // maximum data which can be received for a socket, if limited
+ if (nsockets) {
+ if (opt->maxrate>0) {
+ max_read_bytes = ( check_downloadable_bytes(opt->maxrate) / nsockets );
+ }
+ }
+ if (!max_read_bytes)
+ busy_recv=0;
+
+ // recevoir les données arrivées
+ for(i=0;i<back_max;i++) {
+
+ if (back[i].status>0) {
+ if (!back[i].r.is_file) { // not file..
+ if (back[i].r.soc!=INVALID_SOCKET) { // hey, you never know..
+ int err=FD_ISSET(back[i].r.soc,&fds_e);
+ if (err) {
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait: deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-4;
+ if (back[i].status==100)
+ strcpy(back[i].r.msg,"Connect Error");
+ else
+ strcpy(back[i].r.msg,"Receive Error");
+ back[i].status=0; // terminé
+ }
+ }
+ }
+ }
+
+ // ---- FLAG WRITE MIS A UN?: POUR LE CONNECT
+ if (back[i].status==100) { // attendre connect
+ int dispo=0;
+ // vérifier l'existance de timeout-check
+ if (!gestion_timeout)
+ if (back[i].timeout>0)
+ gestion_timeout=1;
+
+ // connecté?
+ dispo=FD_ISSET(back[i].r.soc,&fds_c);
+ if (dispo) { // ok connected!!
+ busy_state=1;
+
+#if HTS_USEOPENSSL
+ /* SSL mode */
+ if (back[i].r.ssl) {
+ // handshake not yet launched
+ if (!back[i].r.ssl_con) {
+ SSL_CTX_set_options(openssl_ctx, SSL_OP_ALL);
+ // new session
+ back[i].r.ssl_con = SSL_new(openssl_ctx);
+ if (back[i].r.ssl_con) {
+ SSL_clear(back[i].r.ssl_con);
+ if (SSL_set_fd(back[i].r.ssl_con, back[i].r.soc) == 1) {
+ SSL_set_connect_state(back[i].r.ssl_con);
+ back[i].status = 102; /* handshake wait */
+ } else
+ back[i].r.statuscode=-6;
+ } else
+ back[i].r.statuscode=-6;
+ }
+ /* Error */
+ if (back[i].r.statuscode == -6) {
+ strcpy(back[i].r.msg, "bad SSL/TLS handshake");
+ deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ back[i].status=0;
+ }
+ }
+
+#endif
+
+#if BDEBUG==1
+ printf("..connect ok on socket %d\n",back[i].r.soc);
+#endif
+
+ if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==100)) {
+ /* limit nb. connections/seconds to avoid server overload */
+ if (opt->maxconn>0) {
+ Sleep(1000/opt->maxconn);
+ }
+
+ if (back[i].timeout>0) { // refresh timeout si besoin est
+ back[i].timeout_refresh=time_local();
+ }
+ if (back[i].rateout>0) { // le taux de transfert de base sur le début de la connexion
+ back[i].rateout_time=time_local();
+ }
+ // envoyer header
+ //if (strcmp(back[i].url_sav,BACK_ADD_TEST)!=0) // vrai get
+ if (!back[i].head_request)
+ http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
+ else if (back[i].head_request==2) // test en GET!
+ http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
+ else // test!
+ http_sendhead(opt->cookie,1,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
+ back[i].status=99; // attendre en tête maintenant
+ }
+ }
+
+ // attente gethostbyname
+ }
+#if HTS_USEOPENSSL
+ else if (back[i].status==102) { // wait for SSL handshake
+ /* SSL mode */
+ if (back[i].r.ssl) {
+ int conn_code;
+ if ((conn_code = SSL_connect(back[i].r.ssl_con)) <= 0) {
+ /* non blocking I/O, will retry */
+ int err_code = SSL_get_error(back[i].r.ssl_con, conn_code);
+ if (
+ (err_code != SSL_ERROR_WANT_READ)
+ &&
+ (err_code != SSL_ERROR_WANT_WRITE)
+ ) {
+ char tmp[256];
+ tmp[0]='\0';
+ ERR_error_string(err_code, tmp);
+ back[i].r.msg[0]='\0';
+ strncat(back[i].r.msg, tmp, sizeof(back[i].r.msg) - 2);
+ if (!strnotempty(back[i].r.msg)) {
+ sprintf(back[i].r.msg, "SSL/TLS error %d", err_code);
+ }
+ deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ back[i].status=0;
+ }
+ } else { /* got it! */
+ back[i].status=100; // back to waitconnect
+ }
+ } else {
+ strcpy(back[i].r.msg, "unexpected SSL/TLS error");
+ deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ back[i].status=0;
+ }
+
+ }
+#endif
+#if HTS_XGETHOST
+ else if (back[i].status==101) { // attendre gethostbyname
+#if DEBUGDNS
+ //printf("status 101 for %s\n",back[i].url_adr);
+#endif
+
+ if (!gestion_timeout)
+ if (back[i].timeout>0)
+ gestion_timeout=1;
+
+ if (host_wait(&back[i])) { // prêt
+ back[i].status=100; // attente connexion
+ if (back[i].timeout>0) { // refresh timeout si besoin est
+ back[i].timeout_refresh=time_local();
+ }
+ if (back[i].rateout>0) { // le taux de transfert de base sur le début de la connexion
+ back[i].rateout_time=time_local();
+ }
+
+ back[i].r.soc=http_xfopen(0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r));
+ if (back[i].r.soc==INVALID_SOCKET) {
+ back[i].status=0; // fini, erreur
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(2): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-5;
+ if (strnotempty(back[i].r.msg)==0)
+ strcpy(back[i].r.msg,"Unable to resolve host name");
+ }
+ }
+
+
+ // ---- FLAG READ MIS A UN?: POUR LA RECEPTION
+ }
+#endif
+#if USE_BEGINTHREAD
+ // ..rien à faire, c'est magic les threads
+#else
+ else if (back[i].status==1000) { // en réception ftp
+ if (!fexist(back[i].location_buffer)) { // terminé
+ FILE* fp;
+ fp=fopen(fconcat(back[i].location_buffer,".ok"),"rb");
+ if (fp) {
+ int j=0;
+ fscanf(fp,"%d ",&(back[i].r.statuscode));
+ while(!feof(fp)) {
+ int c = fgetc(fp);
+ if (c!=EOF)
+ back[i].r.msg[j++]=c;
+ }
+ back[i].r.msg[j++]='\0';
+ fclose(fp);
+ remove(fconcat(back[i].location_buffer,".ok"));
+ strcpy(fconcat(back[i].location_buffer,".ok"),"");
+ } else {
+ strcpy(back[i].r.msg,"Unknown ftp result, check if file is ok");
+ back[i].r.statuscode=-1;
+ }
+ back[i].status=0;
+ // finalize transfer
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+ }
+ }
+#endif
+ else if ((back[i].status>0) && (back[i].status<1000)) { // en réception http
+ int dispo=0;
+
+ // vérifier l'existance de timeout-check
+ if (!gestion_timeout)
+ if (back[i].timeout>0)
+ gestion_timeout=1;
+
+ // données dispo?
+ //## if (back[i].url_adr[0]!=lOCAL_CHAR)
+ if (!back[i].r.is_file) {
+ dispo=FD_ISSET(back[i].r.soc,&fds);
+ }
+ else
+ dispo=1;
+
+ // Check transfer rate!
+ if (!max_read_bytes)
+ dispo=0; // limit transfer rate
+
+ if (dispo) { // données dispo
+ LLint retour_fread;
+ busy_recv=1; // on récupère encore
+#if BDEBUG==1
+ printf("..data available on socket %d\n",back[i].r.soc);
+#endif
+
+
+ // range size hack old location
+
+#if HTS_DIRECTDISK
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ // Ahh que ca serait vachement mieux et que ahh que la mémoire vous dit merci!
+ if (back[i].status) {
+ if (back[i].r.is_write==0) { // mode mémoire
+ if (back[i].r.adr==NULL) { // rien n'a été écrit
+ if (!back[i].testmode) { // pas mode test
+ if (strnotempty(back[i].url_sav)) {
+ if (strcmp(back[i].url_fil,"/robots.txt")) {
+ if (back[i].r.statuscode==200) { // 'OK'
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (opt->getmode&2) { // on peut ecrire des non html
+ back[i].r.is_write=1; // écrire
+ if (back[i].r.compressed
+ &&
+ /* .gz are *NOT* depacked!! */
+ (strfield(get_ext(back[i].url_sav),"gz") == 0)
+ ) {
+ back[i].tmpfile[0]='\0';
+ strcpy(back[i].tmpfile,tempnam(NULL,"httrZ"));
+ if (back[i].tmpfile[0])
+ back[i].r.out=fopen(back[i].tmpfile,"wb");
+ } else {
+ back[i].r.compressed=0;
+ back[i].r.out=filecreate(back[i].url_sav);
+ }
+#if HDEBUG
+ printf("direct-disk: %s\n",back[i].url_sav);
+#endif
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File received from net to disk: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+
+ if (back[i].r.out==NULL) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error");
+ fprintf(opt->errlog,"Unable to save file %s"LF,back[i].url_sav);
+ test_flush;
+ }
+ back[i].r.is_write=0; // erreur, abandonner
+#if HDEBUG
+ printf("..error!\n");
+#endif
+ }
+#if HTS_WIN==0
+ else chmod(back[i].url_sav,HTS_ACCESS_FILE);
+#endif
+ } else { // on coupe tout!
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ back[i].status=0; // terminé
+ if (!back[i].testmode)
+ back[i].r.statuscode=-10; // EUHH CANCEL
+ else
+ back[i].r.statuscode=-10; // "TEST OK"
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(3): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+
+ // réception de données depuis socket ou fichier
+ if (back[i].status) {
+ if (back[i].status==99) // recevoir par bloc de lignes
+ retour_fread=http_xfread1(&(back[i].r),0);
+ else if (back[i].status==98) { // recevoir longueur chunk en hexa caractère par caractère
+ // backuper pour lire dans le buffer chunk
+ htsblk r;
+ memcpy(&r, &(back[i].r), sizeof(htsblk));
+ back[i].r.is_write=0; // mémoire
+ back[i].r.adr=back[i].chunk_adr; // adresse
+ back[i].r.size=back[i].chunk_size; // taille taille chunk
+ back[i].r.totalsize=-1; // total inconnu
+ back[i].r.out=NULL;
+ back[i].r.is_file=0;
+ //
+ // ligne par ligne
+ retour_fread=http_xfread1(&(back[i].r),-1);
+ // modifier et restaurer
+ back[i].chunk_adr=back[i].r.adr; // adresse
+ back[i].chunk_size=back[i].r.size; // taille taille chunk
+ memcpy(&(back[i].r), &r, sizeof(htsblk)); // restaurer véritable r
+ }
+ else if (back[i].is_chunk) { // attention chunk, limiter taille à lire
+#if CHUNKDEBUG==1
+ printf("read %d bytes\n",(int)min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
+#endif
+ retour_fread=(int) http_xfread1(&(back[i].r),(int) min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
+ } else
+ retour_fread=(int) http_xfread1(&(back[i].r),(int) max_read_bytes);
+ // retour_fread=http_fread1(&(back[i].r));
+ } else
+ retour_fread=-1; // interruption ou annulation interne (peut ne pas être une erreur)
+
+ // Si réception chunk, tester si on est pas à la fin!
+ if (back[i].status==1) {
+ if (back[i].is_chunk) { // attendre prochain chunk
+ if (back[i].r.size==back[i].r.totalsize) { // fin chunk!
+ //printf("chunk end at %d\n",back[i].r.size);
+ back[i].status=98; // prochain chunk
+ if (back[i].chunk_adr!=NULL) { freet(back[i].chunk_adr); back[i].chunk_adr=NULL; } back[i].chunk_size=0;
+ retour_fread=0; // pas d'erreur
+#if CHUNKDEBUG==1
+ printf("waiting for next chunk header (soc %d)..\n",back[i].r.soc);
+#endif
+ }
+ }
+ }
+
+ if (retour_fread < 0) { // erreur réception
+ back[i].status=0; // terminé
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(4): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+#if CHUNKDEBUG==1
+ if (back[i].is_chunk)
+ printf("must be the last chunk for %s (connection closed) - %d/%d\n",back[i].url_fil,back[i].r.size,back[i].r.totalsize);
+#endif
+ //if ((back[i].r.statuscode==-1) && (strnotempty(back[i].r.msg)==0)) {
+ if ((back[i].r.statuscode<0) && (strnotempty(back[i].r.msg)==0)) {
+#if HDEBUG
+ printf("error interruped: %s\n",back[i].r.adr);
+#endif
+ if (back[i].r.size>0)
+ strcat(back[i].r.msg,"Interrupted transfer");
+ else
+ strcat(back[i].r.msg,"No data (connection closed)");
+ back[i].r.statuscode=-4;
+ }
+
+ // finalize transfer
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+
+ if (back[i].r.totalsize>0) { // tester totalsize
+ //if ((back[i].r.totalsize>0) && (back[i].status==99)) { // tester totalsize
+ if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
+ if (!opt->tolerant) {
+ //#if HTS_CL_IS_FATAL
+ if (back[i].r.adr) freet(back[i].r.adr); back[i].r.adr=NULL;
+ if (back[i].r.size<back[i].r.totalsize)
+ back[i].r.statuscode=-4; // recatch
+ sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",back[i].r.size,back[i].r.totalsize);
+ } else {
+ //#else
+ // Un warning suffira..
+ if (cache->errlog!=NULL) {
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+ //#endif
+ }
+ }
+ }
+#if BDEBUG==1
+ printf("transfer ok\n");
+#endif
+ } else if (retour_fread > 0) { // pas d'erreur de réception et data
+ if (back[i].timeout>0) { // refresh timeout si besoin est
+ back[i].timeout_refresh=time_local();
+ }
+
+ // Traitement des en têtes chunks ou en têtes
+ if (back[i].status==98) { // réception taille chunk en hexa ( après les en têtes, peut ne pas
+ if (back[i].chunk_size>=2) {
+ int chunk_size=-1;
+ // être présent)
+ if (back[i].chunk_adr[back[i].chunk_size-1]==10) { // LF, fin ligne chunk
+ char chunk_data[64];
+ if (back[i].chunk_size<32) { // pas trop gros
+ back[i].chunk_adr[ back[i].chunk_size-1]='\0'; // octet nul
+ strcpy(chunk_data,""); // hex number
+ strcat(chunk_data,back[i].chunk_adr);
+#if CHUNKDEBUG==1
+ printf("chunk received and read: %s\n",chunk_data);
+#endif
+ if (back[i].r.totalsize<0)
+ back[i].r.totalsize=0; // initialiser à 0
+ if (sscanf(chunk_data,"%x",&chunk_size) == 1) {
+ back[i].r.totalsize+=chunk_size; // noter taille
+ back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1);
+ if (!back[i].r.adr) {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+ }
+#if CHUNKDEBUG==1
+ printf("chunk length: %d - next total "LLintP":\n",chunk_size,back[i].r.totalsize);
+#endif
+ } else
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil);
+ }
+ } else {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,back[i].chunk_size,back[i].url_adr,back[i].url_fil);
+ }
+ }
+
+ // ok, continuer sur le body
+
+ // si chunk non nul continuer (ou commencer)
+ if (chunk_size>0) {
+ back[i].status=1; // continuer body
+#if CHUNKDEBUG==1
+ printf("waiting for body (chunk)\n");
+#endif
+ } else { // chunk nul, c'est la fin
+#if CHUNKDEBUG==1
+ printf("chunk end, total: %d\n",back[i].r.size);
+#endif
+ back[i].status=0; // fin
+ // finalize transfer
+ back_finalize(opt,cache,back,i);
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(5): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+
+ /* Tester totalsize en fin de chunk */
+ if ((back[i].r.totalsize>0)) { // tester totalsize
+ if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
+#if HTS_CL_IS_FATAL
+ if (back[i].r.adr) { freet(back[i].r.adr); back[i].r.adr=NULL; }
+ back[i].r.statuscode=-1;
+ strcpy(back[i].r.msg,"Incorrect length");
+#else
+ // Un warning suffira..
+ if (cache->errlog!=NULL) {
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ }
+#endif
+ }
+ }
+
+
+ }
+ }
+
+ // effacer buffer (chunk en tete)
+ if (back[i].chunk_adr!=NULL) {
+ freet(back[i].chunk_adr);
+ back[i].chunk_adr=NULL;
+ back[i].chunk_size=0;
+ }
+
+ } // chunk LF?
+ } // taille buffer chunk>2
+ //
+ } else if (back[i].status==99) { // en têtes (avant le chunk si il est présent)
+ //
+ if (back[i].r.size>=2) {
+ // double LF
+ if (
+ ((back[i].r.adr[back[i].r.size-1]==10) && (back[i].r.adr[back[i].r.size-2]==10))
+ ||
+ (back[i].r.adr[0] == '<') /* bogus server */
+ ) {
+ char rcvd[2048];
+ int ptr=0;
+ int noFreebuff=0;
+
+#if BDEBUG==1
+ printf("..ok, header received\n");
+#endif
+
+ /* Hack for zero-length headers */
+ if (back[i].r.adr[0] != '<') {
+
+ // ----------------------------------------
+ // traiter en-tête!
+ // status-line à récupérer
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000);
+ if (strnotempty(rcvd)==0)
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000); // "certains serveurs buggés envoient un \n au début" (RFC)
+
+ // traiter status-line
+ treatfirstline(&back[i].r,rcvd);
+
+#if HDEBUG
+ printf("(Buffer) Status-Code=%d\n",back[i].r.statuscode);
+#endif
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"response for %s%s:\r\ncode=%d\r\n",jump_identification(back[i].url_adr),back[i].url_fil,back[i].r.statuscode);
+ fprintfio(ioinfo,back[i].r.adr,">>> ");
+ fprintf(ioinfo,"\r\n");
+ fflush(ioinfo);
+ } // en-tête
+ }
+
+ // header // ** !attention! HTTP/0.9 non supporté
+ do {
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000);
+#if HDEBUG
+ printf("(buffer)>%s\n",rcvd);
+#endif
+ /*
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"(buffer)>%s\r\n",rcvd);
+ fflush(ioinfo);
+ }
+ }
+ */
+
+ if (strnotempty(rcvd))
+ treathead(opt->cookie,back[i].url_adr,back[i].url_fil,&back[i].r,rcvd); // traiter
+
+ // parfois les serveurs buggés renvoient un content-range avec un 200
+ if (back[i].r.statuscode==200) // 'OK'
+ if (strfield(rcvd,"content-range:")) // Avec un content-range: relisez les RFC..
+ back[i].r.statuscode=206; // FORCER A 206 !!!!!
+
+ } while(strnotempty(rcvd));
+ // ----------------------------------------
+
+ // libérer mémoire -- après! --
+ if (back[i].r.adr!=NULL) { freet(back[i].r.adr); back[i].r.adr=NULL; }
+ } else {
+ // assume text/html, OK
+ treatfirstline(&back[i].r, back[i].r.adr);
+ noFreebuff=1;
+ }
+
+
+
+ /*
+ Status code and header-response hacks
+ */
+
+
+ // Check response : 203 == 200
+ if (back[i].r.statuscode==203) { // 'Non-Authoritative Information'
+ back[i].r.statuscode=200; // forcer "OK"
+ } else if (back[i].r.statuscode == 100) {
+ back[i].status=99;
+ back[i].r.size=0;
+ back[i].r.totalsize=0;
+ back[i].chunk_size=0;
+ back[i].r.statuscode=-1;
+ back[i].r.msg[0]='\0';
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ continue;
+ }
+
+ /*
+ Solve "false" 416 problems
+ */
+ if (back[i].r.statuscode==416) { // 'Requested Range Not Satisfiable'
+ // Example:
+ // Range: bytes=2830-
+ // ->
+ // Content-Range: bytes */2830
+ if (back[i].range_req_size == back[i].r.crange) {
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // READY
+ back[i].r.size=back[i].r.totalsize=back[i].range_req_size;
+ filenote(back[i].url_sav,NULL);
+ back[i].r.statuscode=304; // NOT MODIFIED
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+
+ // transform 406 into 200 ; we'll catch embedded links inside the choice page
+ if (back[i].r.statuscode==406) { // 'Not Acceptable'
+ back[i].r.statuscode=200;
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
+ // Force update if same size detected
+ if (opt->sizehack) {
+ // We already have the file
+ // and ask the remote server for an update
+ // Some servers, especially dynamic pages severs, always
+ // answer that the page has been modified since last visit
+ // And answer with a 200 (OK) response, and the same page
+ // If the size is the same, and the option has been set, we assume
+ // that the file is identical - and therefore let's break the connection
+ if (back[i].is_update) { // mise à jour
+ if (back[i].r.statuscode==200) { // 'OK'
+ htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL); // lire entrée cache
+ if (r.statuscode == 200) { // OK pas d'erreur cache
+ LLint len1,len2;
+ len1=r.totalsize;
+ len2=back[i].r.totalsize;
+ if (r.size>0)
+ len1=r.size;
+ if (len1>0) {
+ if (len1 == len2) { // tailles identiques
+ back[i].r.statuscode=304; // forcer NOT MODIFIED
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ } else {
+ if (opt->errlog!=NULL) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File seems complete (same size), but there was a cache read error: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ if (r.adr) {
+ freet(r.adr);
+ }
+ }
+ }
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
+ // Detect already downloaded file (with another browser, for example)
+ if (opt->sizehack) {
+ if (!back[i].is_update) { // mise à jour
+ if (back[i].r.statuscode==200) { // 'OK'
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML
+ if (strnotempty(back[i].url_sav)) { // target found
+ int size = fsize(back[i].url_sav); // target size
+ if (size >= 0) {
+ if (back[i].r.totalsize == size) { // same size!
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // READY
+ back[i].r.size=back[i].r.totalsize;
+ filenote(back[i].url_sav,NULL);
+ back[i].r.statuscode=304; // NOT MODIFIED
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Various hacks to limit re-transfers when updating a mirror
+ // Detect bad range: header
+ if (opt->sizehack) {
+ // We have request for a partial file (with a 'Range: NNN-' header)
+ // and received a complete file notification (200), with 'Content-length: NNN'
+ // it might be possible that we had the complete file
+ // this is the case in *most* cases, so break the connection
+ if (back[i].r.is_write==0) { // mode mémoire
+ if (back[i].r.adr==NULL) { // rien n'a été écrit
+ if (!back[i].testmode) { // pas mode test
+ if (strnotempty(back[i].url_sav)) {
+ if (strcmp(back[i].url_fil,"/robots.txt")) {
+ if (back[i].r.statuscode==200) { // 'OK'
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (back[i].r.statuscode==200) { // "OK"
+ if (back[i].range_req_size>0) { // but Range: requested
+ if (back[i].range_req_size == back[i].r.totalsize) { // And same size
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(skip_range): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // READY
+ back[i].r.size=back[i].r.totalsize;
+ filenote(back[i].url_sav,NULL);
+ back[i].r.statuscode=304; // NOT MODIFIED
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // END - Various hacks to limit re-transfers when updating a mirror
+
+ /*
+ End of status code and header-response hacks
+ */
+
+
+
+ /* Interdiction taille par le wizard? */
+ if (back[i].r.soc!=INVALID_SOCKET) {
+ if (!back_checksize(opt,&back[i],1)) {
+ back[i].status=0; // FINI
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if (!back[i].testmode)
+ strcpy(back[i].r.msg,"File too big");
+ else
+ strcpy(back[i].r.msg,"Test: File too big");
+ }
+ }
+
+ /* sinon, continuer */
+ /* if (back[i].r.soc!=INVALID_SOCKET) { // ok récupérer body? */
+ // head: terminé
+ if (back[i].head_request) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Tested file: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(head request): deletehttp\n");
+#endif
+ // Couper connexion
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // terminé
+ }
+ // traiter une éventuelle erreur 304 (cache à jour utilisable)
+ else if (back[i].r.statuscode==304) { // document à jour dans le cache
+ // lire dans le cache
+ // ** NOTE: pas de vérif de la taille ici!!
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(file is not modified): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].r=cache_read(opt,cache,back[i].url_adr,back[i].url_fil,back[i].url_sav);
+ if (!back[i].r.location)
+ back[i].r.location=back[i].location_buffer;
+ else { /* recopier */
+ strcpy(back[i].location_buffer,back[i].r.location);
+ back[i].r.location=back[i].location_buffer;
+ }
+
+ // hack:
+ // In case of 'if-unmodified-since' hack, a 304 status can be sent
+ // then, force 'ok' status
+ if (back[i].r.statuscode == -1) {
+ if (fexist(back[i].url_sav)) {
+ back[i].r.statuscode=200; // OK
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+
+ // Status is okay?
+ if (back[i].r.statuscode!=-1) { // pas d'erreur de lecture
+ back[i].status=0; // OK prêt
+ back[i].r.notmodified=1; // NON modifié!
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+
+ // finalize
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+
+#if DEBUGCA
+ printf("..document à jour après requète: %s%s\n",back[i].url_adr,back[i].url_fil);
+#endif
+
+ //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
+ } else { // erreur
+ back[i].status=0; // terminé
+ //printf("erreur cache\n");
+
+ }
+
+ } else if ((back[i].r.statuscode==301)
+ || (back[i].r.statuscode==302)
+ || (back[i].r.statuscode==303)
+ || (back[i].r.statuscode==307)
+ || (back[i].r.statuscode==412)
+ || (back[i].r.statuscode==416)
+ ) { // Ne pas prendre le html, erreurs connues et gérées
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(301,302,303,307,412,416..): deletehttp\n");
+#endif
+ // Couper connexion
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // terminé
+ // finalize
+ if (back[i].r.statuscode>0) {
+ back_finalize(opt,cache,back,i);
+ }
+ } else { // il faut aller le chercher
+
+ // effacer buffer (requète)
+ if (!noFreebuff) {
+ if (back[i].r.adr!=NULL) {
+ freet(back[i].r.adr);
+ back[i].r.adr=NULL;
+ }
+ back[i].r.size=0;
+ }
+
+ // traiter 206 (partial content)
+ // xxc SI CHUNK VERIFIER QUE CA MARCHE??
+ if (back[i].r.statuscode==206) { // on nous envoie un morceau (la fin) coz une partie sur disque!
+ LLint sz=fsize(back[i].url_sav);
+#if HDEBUG
+ printf("partial content: "LLintP" on disk..\n",(LLint)sz);
+#endif
+ if (sz>=0) {
+ if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
+ if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir**
+ filenote(back[i].url_sav,NULL); // noter fichier comme connu
+ back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append
+ if (back[i].r.out) {
+ back[i].r.is_write=1; // écrire
+ back[i].r.size=sz; // déja écrit
+ back[i].r.statuscode=200; // Forcer 'OK'
+ if (back[i].r.totalsize>0)
+ back[i].r.totalsize+=sz; // plus en fait
+ fseek(back[i].r.out,0,SEEK_END); // à la fin
+#if HDEBUG
+ printf("continue interrupted file\n");
+#endif
+ } else { // On est dans la m**
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not open partial file");
+ }
+ }
+ } else { // mémoire
+ FILE* fp=fopen(fconv(back[i].url_sav),"rb");
+ if (fp) {
+ LLint alloc_mem=sz + 1;
+ if (back[i].r.totalsize>0)
+ alloc_mem+=back[i].r.totalsize; // AJOUTER RESTANT!
+ if ( (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) {
+ back[i].r.size=sz;
+ if (back[i].r.totalsize>0)
+ back[i].r.totalsize+=sz; // plus en fait
+ if (((int) fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) {
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not read partial file");
+ } else {
+ back[i].r.statuscode=200; // Forcer 'OK'
+#if HDEBUG
+ printf("continue in mem interrupted file\n");
+#endif
+ }
+ } else {
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"No memory for partial file");
+ }
+ fclose(fp);
+ } else { // Argh..
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not open partial file");
+ }
+ }
+ } else { // Non trouvé??
+ back[i].status=0; // terminé (voir plus loin)
+ strcpy(back[i].r.msg,"Can not find partial file");
+ }
+ // Erreur?
+ if (back[i].status==0) {
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(206 solve problems): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ //back[i].r.statuscode=206; ????????
+ back[i].r.statuscode=-5;
+ if (strnotempty(back[i].r.msg))
+ strcpy(back[i].r.msg,"Error attempting to solve status 206 (partial file)");
+ }
+ }
+
+ if (back[i].status!=0) { // non terminé (erreur)
+ if (!back[i].testmode) { // fichier normal
+
+ if (!back[i].r.is_chunk) { // pas de chunk
+ //if (back[i].r.http11!=2) { // pas de chunk
+ back[i].is_chunk=0;
+ back[i].status=1; // start body
+ } else {
+#if CHUNKDEBUG==1
+ printf("chunk encoding detected %s..\n",back[i].url_fil);
+#endif
+ back[i].is_chunk=1;
+ back[i].chunk_adr=NULL;
+ back[i].chunk_size=0;
+ back[i].status=98; // start body wait chunk
+ }
+ if (back[i].rateout>0) {
+ back[i].rateout_time=time_local(); // refresh pour transfer rate
+ }
+#if HDEBUG
+ printf("(buffer) start body!\n");
+#endif
+ } else { // mode test, ne pas passer en 1!!
+ back[i].status=0; // READY
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(test ok): deletehttp\n");
+#endif
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if (back[i].r.statuscode==200) {
+ strcpy(back[i].r.msg,"Test: OK");
+ back[i].r.statuscode=-10; // test réussi
+ }
+ else { // test a échoué, on ne change rien sauf que l'erreur est à titre indicatif
+ char tempo[1000];
+ strcpy(tempo,back[i].r.msg);
+ strcpy(back[i].r.msg,"Test: ");
+ strcat(back[i].r.msg,tempo);
+ }
+
+ }
+ }
+
+ }
+
+ /*}*/
+
+ } // si LF
+ } // r.size>2
+ } // si == 99
+
+ } // si pas d'erreurs
+#if BDEBUG==1
+ printf("bytes overall: %d\n",back[i].r.size);
+#endif
+ } // données dispo
+
+ // en cas d'erreur cl, supprimer éventuel fichier sur disque
+#if HTS_REMOVE_BAD_FILES
+ if (back[i].status<0) {
+ if (!back[i].testmode) { // pas en test
+ remove(back[i].url_sav); // éliminer fichier (endommagé)
+ //printf("&& %s\n",back[i].url_sav);
+ }
+ }
+#endif
+
+ /* funny log for commandline users */
+ //if (!opt->quiet) {
+ // petite animation
+ if (opt->verbosedisplay==1) {
+ if (back[i].status==0) {
+ if (back[i].r.statuscode==200)
+ printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size);
+ else
+ printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size,back[i].r.statuscode);
+ fflush(stdout);
+ }
+ }
+ //}
+
+
+ } // status>0
+ } // for
+
+ // vérifier timeouts
+ if (gestion_timeout) {
+ TStamp act;
+ act=time_local(); // temps en secondes
+ for(i=0;i<back_max;i++) {
+ if (back[i].status>0) { // réception/connexion/..
+ if (back[i].timeout>0) {
+ //printf("time check %d\n",((int) (act-back[i].timeout_refresh))-back[i].timeout);
+ if (((int) (act-back[i].timeout_refresh))>=back[i].timeout) {
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(timeout): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-2;
+ if (back[i].status==100)
+ strcpy(back[i].r.msg,"Connect Time Out");
+ else if (back[i].status==101)
+ strcpy(back[i].r.msg,"DNS Time Out");
+ else
+ strcpy(back[i].r.msg,"Receive Time Out");
+ back[i].status=0; // terminé
+ } else if ((back[i].rateout>0) && (back[i].status<99)) {
+ if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s
+ if ( (int) ((back[i].r.size)/(act-back[i].rateout_time)) < back[i].rateout ) { // trop lent
+ back[i].status=0; // terminé
+ if (back[i].r.soc!=INVALID_SOCKET) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("back_wait(rateout): deletehttp\n");
+#endif
+ deletehttp(&back[i].r);
+ }
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-3;
+ strcpy(back[i].r.msg,"Transfer Rate Too Low");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ max_loop--;
+#if HTS_ANALYSTE
+ max_loop_chk++;
+#endif
+ } while((busy_state) && (busy_recv) && (max_loop>0));
+#if HTS_ANALYSTE
+ if ((!busy_recv) && (!busy_state)) {
+ if (max_loop_chk>=1) {
+ Sleep(10); // un tite pause pour éviter les lag..
+ }
+ }
+#endif
+}
+
+int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) {
+ LLint size_to_test;
+ if (check_only_totalsize)
+ size_to_test=eback->r.totalsize;
+ else
+ size_to_test=max(eback->r.totalsize,eback->r.size);
+ if (size_to_test>=0) {
+
+ /* Interdiction taille par le wizard? */
+ if (hts_testlinksize(opt,eback->url_adr,eback->url_fil,(eback->r.totalsize+1023)/1024)==-1) {
+ return 0; /* interdit */
+ }
+
+ /* vérifier taille classique (heml et non html) */
+ if ((istoobig(size_to_test,eback->maxfile_html,eback->maxfile_nonhtml,eback->r.contenttype))) {
+ return 0; /* interdit */
+ }
+ }
+ return 1;
+}
+
+
+// octets transférés + add
+LLint back_transfered(LLint nb,lien_back* back,int back_max) {
+ int i;
+ // ajouter octets en instance
+ for(i=0;i<back_max;i++)
+ if ((back[i].status>0) && (back[i].status<99))
+ nb+=back[i].r.size;
+ return nb;
+}
+
+// infos backing
+// j: 1 afficher sockets 2 afficher autres 3 tout afficher
+void back_info(lien_back* back,int i,int j,FILE* fp) {
+ if (back[i].status>=0) {
+ char s[256];
+ s[0]='\0';
+ back_infostr(back,i,j,s);
+ strcat(s,LF);
+ fprintf(fp,"%s",s);
+ }
+}
+
+// infos backing
+// j: 1 afficher sockets 2 afficher autres 3 tout afficher
+void back_infostr(lien_back* back,int i,int j,char* s) {
+ if (back[i].status>=0) {
+ int aff=0;
+ if (j & 1) {
+ if (back[i].status==100) {
+ strcat(s,"CONNECT ");
+ } else if (back[i].status==99) {
+ strcat(s,"INFOS ");
+ aff=1;
+ } else if (back[i].status==98) {
+ strcat(s,"INFOSC"); // infos chunk
+ aff=1;
+ }
+ else if (back[i].status>0) {
+#if HTS_ANALYSTE==2
+ strcat(s,"WAIT ");
+#else
+ strcat(s,"RECEIVE ");
+#endif
+ aff=1;
+ }
+ }
+ if (j & 2) {
+ if (back[i].status==0) {
+ switch (back[i].r.statuscode) {
+ case 200:
+ strcat(s,"READY ");
+ aff=1;
+ break;
+#if HTS_ANALYSTE==2
+ default:
+ strcat(s,"ERROR ");
+ break;
+#else
+ case -1:
+ strcat(s,"ERROR ");
+ aff=1;
+ break;
+ case -2:
+ strcat(s,"TIMEOUT ");
+ aff=1;
+ break;
+ case -3:
+ strcat(s,"TOOSLOW ");
+ aff=1;
+ break;
+ case 400:
+ strcat(s,"BADREQUEST ");
+ aff=1;
+ break;
+ case 401: case 403:
+ strcat(s,"FORBIDDEN ");
+ aff=1;
+ break;
+ case 404:
+ strcat(s,"NOT FOUND ");
+ aff=1;
+ break;
+ case 500:
+ strcat(s,"SERVERROR ");
+ aff=1;
+ break;
+ default:
+ {
+ char s2[256];
+ sprintf(s2,"ERROR(%d)",back[i].r.statuscode);
+ strcat(s,s2);
+ }
+ aff=1;
+#endif
+ }
+ }
+ }
+
+ if (aff) {
+ {
+ char s2[1024];
+ sprintf(s2,"\"%s",back[i].url_adr); strcat(s,s2);
+
+ if (back[i].url_fil[0]!='/') strcat(s,"/");
+ sprintf(s2,"%s\" ",back[i].url_fil); strcat(s,s2);
+ sprintf(s,LLintP" "LLintP" ",back[i].r.size,back[i].r.totalsize); strcat(s,s2);
+ }
+ }
+ }
+}
+
+// -- backing --
+
+#undef test_flush
diff --git a/src/htsback.h b/src/htsback.h
new file mode 100644
index 0000000..af5fe6c
--- /dev/null
+++ b/src/htsback.h
@@ -0,0 +1,75 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* backing system (multiple socket download) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSBACK_DEFH
+#define HTSBACK_DEFH
+
+#include "htsglobal.h"
+#include "htsbasenet.h"
+#include "htscore.h"
+
+// backing
+#define BACK_ADD_TEST "(dummy)"
+#define BACK_ADD_TEST2 "(dummy2)"
+int back_index(lien_back* back,int back_max,char* adr,char* fil,char* sav);
+int back_available(lien_back* back,int back_max);
+LLint back_incache(lien_back* back,int back_max);
+HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav);
+int back_nsoc(lien_back* back,int back_max);
+int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr);
+int back_stack_available(lien_back* back,int back_max);
+void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max);
+void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart);
+int back_delete(lien_back* back,int p);
+int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p);
+void back_info(lien_back* back,int i,int j,FILE* fp);
+void back_infostr(lien_back* back,int i,int j,char* s);
+LLint back_transfered(LLint add,lien_back* back,int back_max);
+// hostback
+#if HTS_XGETHOST
+void back_solve(lien_back* back);
+int host_wait(lien_back* back);
+#endif
+int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize);
+
+#if HTS_XGETHOST
+#if USE_BEGINTHREAD
+PTHREAD_TYPE Hostlookup(void* iadr_p);
+#endif
+#endif
+
+#endif
diff --git a/src/htsbase.h b/src/htsbase.h
new file mode 100644
index 0000000..3e83471
--- /dev/null
+++ b/src/htsbase.h
@@ -0,0 +1,136 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Basic definitions */
+/* Used in .c files for basic (malloc() ..) definitions */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_BASICH
+#define HTS_BASICH
+
+#include "htsglobal.h"
+
+// size_t et mode_t
+#include <stdio.h>
+#if HTS_WIN
+#else
+#include <fcntl.h>
+#endif
+
+#if HTS_WIN
+#else
+ #define min(a,b) ((a)>(b)?(b):(a))
+ #define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+// teste égalité de 2 chars, case insensitive
+#define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a))
+#define streql(a,b) (hichar(a)==hichar(b))
+
+// is this MIME an hypertext MIME (text/html), html/js-style or other script/text type?
+#define HTS_HYPERTEXT_DEFAULT_MIME "text/html"
+#define is_hypertext_mime(a) \
+ ( (strfield2((a),"text/html")!=0)\
+ || (strfield2((a),"application/x-javascript")!=0) \
+ || (strfield2((a),"text/css")!=0) \
+ || (strfield2((a),"image/svg+xml")!=0) \
+ || (strfield2((a),"image/svg-xml")!=0) \
+ /*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\
+ )
+
+#define may_be_hypertext_mime(a) \
+ (\
+ (strfield2((a),"audio/x-pn-realaudio")!=0) \
+ )
+
+
+// caractère maj
+#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
+
+// conversion éventuelle / vers antislash
+#if HTS_WIN
+char* antislash(char* s);
+#else
+#define antislash(A) (A)
+#endif
+
+
+// functions
+#if HTS_PLATFORM!=3
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if HTS_PLATFORM!=2
+#if HTS_PLATFORM!=1
+ int open (const char *, int, ...);
+#endif
+ //int read (int,const char*,int);
+ //int write (int,char*,int);
+#endif
+#if HTS_PLATFORM!=1
+ int close (int);
+ void* calloc (size_t,size_t);
+ void* malloc (size_t);
+ void* realloc (void*,size_t);
+ void free (void*);
+#endif
+#if HTS_WIN
+#else
+ int mkdir (const char*,mode_t);
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+
+// tracer malloc()
+#if HTS_TRACE_MALLOC
+#define malloct(A) hts_malloc(A,0)
+#define calloct(A,B) hts_malloc(A,B)
+#define freet(A) hts_free(A)
+#define realloct(A,B) hts_realloc(A,B)
+void hts_freeall();
+void* hts_malloc (size_t,size_t);
+void hts_free (void*);
+void* hts_realloc (void*,size_t);
+#else
+#define malloct(A) malloc(A)
+#define calloct(A,B) calloc(A,B)
+#define freet(A) free(A)
+#define realloct(A,B) realloc(A,B)
+#endif
+
+
+#endif
+
diff --git a/src/htsbasenet.h b/src/htsbasenet.h
new file mode 100644
index 0000000..d63a2e7
--- /dev/null
+++ b/src/htsbasenet.h
@@ -0,0 +1,86 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Basic net definitions */
+/* Used in .c and .h files that needs T_SOC and so */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_DEFBASENETH
+#define HTS_DEFBASENETH
+
+#if HTS_WIN
+
+#if HTS_INET6==0
+ #include <winsock.h>
+#else
+#undef HTS_USESCOPEID
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <tpipv6.h>
+#endif
+ typedef SOCKET T_SOC;
+ typedef struct hostent FAR t_hostent;
+
+#else
+#define HTS_USESCOPEID
+ #define INVALID_SOCKET -1
+ typedef int T_SOC;
+ typedef struct hostent t_hostent;
+#endif
+
+#if HTS_USEOPENSSL
+/*
+ OpensSSL crypto routines by Eric Young (eay@cryptsoft.com)
+ Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ All rights reserved
+*/
+#ifndef HTS_OPENSSL_H_INCLUDED
+#define HTS_OPENSSL_H_INCLUDED
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <openssl/ssl.h>
+#include <openssl/crypto.h>
+#include <openssl/err.h>
+//#include <openssl/bio.h>
+#ifdef __cplusplus
+ };
+#endif
+/* OpenSSL structure */
+extern SSL_CTX *openssl_ctx;
+
+#endif
+#endif
+
+#endif
diff --git a/src/htsbauth.c b/src/htsbauth.c
new file mode 100644
index 0000000..a1506c1
--- /dev/null
+++ b/src/htsbauth.c
@@ -0,0 +1,401 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* basic authentication: password storage */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include "htsbauth.h"
+
+/* specific definitions */
+#include "htsglobal.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsnostatic.h"
+
+/* END specific definitions */
+
+// gestion des cookie
+// ajoute, dans l'ordre
+// !=0 : erreur
+int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path) {
+ char* a=cookie->data;
+ char* insert;
+ char cook[16384];
+ // effacer éventuel cookie en double
+ cookie_del(cookie,cook_name,domain,path);
+ if ((int)strlen(cook_value)>1024) return -1; // trop long
+ if ((int)strlen(cook_name)>256) return -1; // trop long
+ if ((int)strlen(domain)>256) return -1; // trop long
+ if ((int)strlen(path)>256) return -1; // trop long
+ if ((int)(
+ strlen(cookie->data)
+ +strlen(cook_value)
+ +strlen(cook_name)
+ +strlen(domain)
+ +strlen(path)
+ +256
+ ) > cookie->max_len) return -1; // impossible d'ajouter
+
+ insert=a; // insérer ici
+ while (*a) {
+ if ( strlen(cookie_get(a,2)) < strlen(path) ) // long. path (le + long est prioritaire)
+ a=cookie->data+strlen(cookie->data); // fin
+ else {
+ a=strchr(a,'\n'); // prochain champ
+ if (a==NULL)
+ a=cookie->data+strlen(cookie->data); // fin
+ else
+ a++;
+ while(*a=='\n') a++;
+ insert=a; // insérer ici
+ }
+ }
+ // construction du cookie
+ strcpy(cook,domain);
+ strcat(cook,"\t");
+ strcat(cook,"TRUE");
+ strcat(cook,"\t");
+ strcat(cook,path);
+ strcat(cook,"\t");
+ strcat(cook,"FALSE");
+ strcat(cook,"\t");
+ strcat(cook,"1999999999");
+ strcat(cook,"\t");
+ strcat(cook,cook_name);
+ strcat(cook,"\t");
+ strcat(cook,cook_value);
+ strcat(cook,"\n");
+ if (!( ((int) strlen(cookie->data) + (int) strlen(cook)) < cookie->max_len)) return -1; // impossible d'ajouter
+ cookie_insert(insert,cook);
+#if DEBUG_COOK
+ printf("add_new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",cook_name,cook_value,domain,path);
+ //printf(">>>cook: %s<<<\n",cookie->data);
+#endif
+ return 0;
+}
+
+// effacer cookie si existe
+int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path) {
+ char *a,*b;
+ b=cookie_find(cookie->data,cook_name,domain,path);
+ if (b) {
+ a=cookie_nextfield(b);
+ cookie_delete(b,(int) (a - b));
+#if DEBUG_COOK
+ printf("deleted old cookie: %s %s %s\n",cook_name,domain,path);
+#endif
+ }
+ return 0;
+}
+
+// rechercher cookie à partir de la position s (par exemple s=cookie.data)
+// renvoie pointeur sur ligne, ou NULL si introuvable
+// path est aligné à droite et cook_name peut être vide (chercher alors tout cookie)
+// .doubleclick.net TRUE / FALSE 1999999999 id A
+char* cookie_find(char* s,char* cook_name,char* domain,char* path) {
+ char* a=s;
+ while (*a) {
+ int t;
+ if (strnotempty(cook_name)==0)
+ t=1; // accepter par défaut
+ else
+ t=( strcmp(cookie_get(a,5),cook_name)==0 ); // tester si même nom
+ if (t) { // même nom ou nom qualconque
+ //
+ char* chk_dom=cookie_get(a,0); // domaine concerné par le cookie
+ if ((int) strlen(chk_dom) <= (int) strlen(domain)) {
+ if ( strcmp(chk_dom,domain+strlen(domain)-strlen(chk_dom))==0 ) { // même domaine
+ //
+ char* chk_path=cookie_get(a,2); // chemin concerné par le cookie
+ if ((int) strlen(chk_path) <= (int) strlen(path)) {
+ if (strncmp(path,chk_path,strlen(chk_path))==0 ) { // même chemin
+ return a;
+ }
+ }
+ }
+ }
+ }
+ a=cookie_nextfield(a);
+ }
+ return NULL;
+}
+
+// renvoie prochain champ
+char* cookie_nextfield(char* a) {
+ char* b=a;
+ a=strchr(a,'\n'); // prochain champ
+ if (a==NULL)
+ a=b+strlen(b); // fin
+ else
+ a++;
+ while(*a=='\n') a++;
+ return a;
+}
+
+// lire cookies.txt
+// lire également (Windows seulement) les *@*.txt (cookies IE copiés)
+// !=0 : erreur
+int cookie_load(t_cookie* cookie,char* fpath,char* name) {
+ cookie->data[0]='\0';
+
+ // Fusionner d'abord les éventuels cookies IE
+#if HTS_WIN
+ {
+ WIN32_FIND_DATA find;
+ HANDLE h;
+ char pth[MAX_PATH + 32];
+ strcpy(pth,fpath);
+ strcat(pth,"*@*.txt");
+ h = FindFirstFile(pth,&find);
+ if (h != INVALID_HANDLE_VALUE) {
+ do {
+ if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ))
+ if (!(find.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM )) {
+ FILE* fp=fopen(fconcat(fpath,find.cFileName),"rb");
+ if (fp) {
+ char cook_name[256];
+ char cook_value[1000];
+ char domainpathpath[512];
+ //
+ char domain[256]; // domaine cookie (.netscape.com)
+ char path[256]; // chemin (/)
+ int cookie_merged=0;
+ linput(fp,cook_name,250);
+ if (!feof(fp)) {
+ linput(fp,cook_value,250);
+ if ( (!feof(fp)) && (strnotempty(cook_value)) ) {
+ linput(fp,domainpathpath,500);
+ if (strnotempty(domainpathpath)) {
+ if (ident_url_absolute(domainpathpath,domain,path)>=0) {
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ cookie_merged=1;
+ }
+ }
+ }
+ }
+ fclose(fp);
+ if (cookie_merged)
+ remove(fconcat(fpath,find.cFileName));
+ } // if fp
+ }
+ } while(FindNextFile(h,&find));
+ FindClose(h);
+ }
+ }
+#endif
+
+ // Ensuite, cookies.txt
+ {
+ FILE* fp = fopen(fconcat(fpath,name),"rb");
+ if (fp) {
+ char line[8192];
+ while( (!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) {
+ rawlinput(fp,line,8100);
+ if (strnotempty(line)) {
+ if (strlen(line)<8000) {
+ if (line[0]!='#') {
+ char domain[256]; // domaine cookie (.netscape.com)
+ char path[256]; // chemin (/)
+ char cook_name[256]; // nom cookie (MYCOOK)
+ char cook_value[8192]; // valeur (ID=toto,S=1234)
+ strcpy(domain,cookie_get(line,0)); // host
+ strcpy(path,cookie_get(line,2)); // path
+ strcpy(cook_name,cookie_get(line,5)); // name
+ strcpy(cook_value,cookie_get(line,6)); // value
+#if DEBUG_COOK
+ printf("%s\n",line);
+#endif
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ }
+ }
+ }
+ }
+ fclose(fp);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+// écrire cookies.txt
+// !=0 : erreur
+int cookie_save(t_cookie* cookie,char* name) {
+ if (strnotempty(cookie->data)) {
+ char line[8192];
+ FILE* fp = fopen(fconv(name),"wb");
+ if (fp) {
+ char* a=cookie->data;
+ fprintf(fp,"# HTTrack Website Copier Cookie File"LF"# This file format is compatible with Netscape cookies"LF);
+ do {
+ a+=binput(a,line,8000);
+ fprintf(fp,"%s"LF,line);
+ } while(strnotempty(line));
+ fclose(fp);
+ return 0;
+ }
+ } else
+ return 0;
+ return -1;
+}
+
+// insertion chaine ins avant s
+void cookie_insert(char* s,char* ins) {
+ char* buff;
+ if (strnotempty(s)==0) { // rien à faire, juste concat
+ strcat(s,ins);
+ } else {
+ buff=(char*) malloc(strlen(s)+2);
+ if (buff) {
+ strcpy(buff,s); // copie temporaire
+ strcpy(s,ins); // insérer
+ strcat(s,buff); // copier
+ free(buff);
+ }
+ }
+}
+// destruction chaine dans s position pos
+void cookie_delete(char* s,int pos) {
+ char* buff;
+ if (strnotempty(s+pos)==0) { // rien à faire, effacer
+ s[0]='\0';
+ } else {
+ buff=(char*) malloc(strlen(s+pos)+2);
+ if (buff) {
+ strcpy(buff,s+pos); // copie temporaire
+ strcpy(s,buff); // copier
+ free(buff);
+ }
+ }
+}
+
+// renvoie champ param de la chaine cookie_base
+// ex: cookie_get("ceci est<tab>un<tab>exemple",1) renvoi "un"
+char* cookie_get(char* cookie_base,int param) {
+ char* buffer;
+ //
+ char * limit;
+ NOSTATIC_RESERVE(buffer, char, 8192);
+
+ while(*cookie_base=='\n') cookie_base++;
+ limit = strchr(cookie_base,'\n');
+ if (!limit) limit=cookie_base+strlen(cookie_base);
+ if (limit) {
+ if (param) {
+ int i;
+ for(i=0;i<param;i++) {
+ if (cookie_base) {
+ cookie_base=strchr(cookie_base,'\t'); // prochain tab
+ if (cookie_base) cookie_base++;
+ }
+ }
+ }
+ if (cookie_base) {
+ if ( cookie_base < limit) {
+ char* a = cookie_base;
+ while( (*a) && (*a!='\t') && (*a!='\n')) a++;
+ buffer[0]='\0';
+ strncat(buffer,cookie_base,(int) (a - cookie_base));
+ return buffer;
+ } else
+ return "";
+ } else
+ return "";
+ } else
+ return "";
+}
+// fin cookies
+
+
+
+// -- basic auth --
+
+/* déclarer un répertoire comme possédant une authentification propre */
+int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) {
+ if (cookie) {
+ if (!bauth_check(cookie,adr,fil)) { // n'existe pas déja
+ bauth_chain* chain=&cookie->auth;
+ char* prefix=bauth_prefix(adr,fil);
+ /* fin de la chaine */
+ while(chain->next)
+ chain=chain->next;
+ chain->next=(bauth_chain*) calloc(sizeof(bauth_chain),1);
+ if (chain->next) {
+ chain=chain->next;
+ chain->next=NULL;
+ strcpy(chain->auth,auth);
+ strcpy(chain->prefix,prefix);
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* tester adr et fil, et retourner authentification si nécessaire */
+/* sinon, retourne NULL */
+char* bauth_check(t_cookie* cookie,char* adr,char* fil) {
+ if (cookie) {
+ bauth_chain* chain=&cookie->auth;
+ char* prefix=bauth_prefix(adr,fil);
+ while(chain) {
+ if (strnotempty(chain->prefix)) {
+ if (strncmp(prefix,chain->prefix,strlen(chain->prefix))==0) {
+ return chain->auth;
+ }
+ }
+ chain=chain->next;
+ }
+ }
+ return NULL;
+}
+
+char* bauth_prefix(char* adr,char* fil) {
+ char* prefix;
+ char* a;
+ NOSTATIC_RESERVE(prefix, char, HTS_URLMAXSIZE*2);
+ strcpy(prefix,jump_identification(adr));
+ strcat(prefix,fil);
+ a=strchr(prefix,'?');
+ if (a) *a='\0';
+ if (strchr(prefix,'/')) {
+ a=prefix+strlen(prefix)-1;
+ while(*a != '/') a--;
+ *(a+1)='\0';
+ }
+ return prefix;
+}
diff --git a/src/htsbauth.h b/src/htsbauth.h
new file mode 100644
index 0000000..d361d83
--- /dev/null
+++ b/src/htsbauth.h
@@ -0,0 +1,74 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* basic authentication: password storage */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSBAUTH_DEFH
+#define HTSBAUTH_DEFH
+
+// robots wizard
+typedef struct bauth_chain {
+ char prefix[1024]; /* www.foo.com/secure/ */
+ char auth[1024]; /* base-64 encoded user:pass */
+ struct bauth_chain* next; /* next element */
+} bauth_chain;
+
+
+// buffer pour les cookies et authentification
+typedef struct {
+ int max_len;
+ char data[32768];
+ bauth_chain auth;
+} t_cookie;
+
+// cookies
+int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path);
+int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path);
+int cookie_load(t_cookie* cookie,char* path,char* name);
+int cookie_save(t_cookie* cookie,char* name);
+void cookie_insert(char* s,char* ins);
+void cookie_delete(char* s,int pos);
+char* cookie_get(char* cookie_base,int param);
+char* cookie_find(char* s,char* cook_name,char* domain,char* path);
+char* cookie_nextfield(char* a);
+
+// basic auth
+int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth);
+char* bauth_check(t_cookie* cookie,char* adr,char* fil);
+char* bauth_prefix(char* adr,char* fil);
+
+
+#endif
diff --git a/src/htscache.c b/src/htscache.c
new file mode 100644
index 0000000..da8791e
--- /dev/null
+++ b/src/htscache.c
@@ -0,0 +1,881 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* cache system (index and stores files in cache) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htscache.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsbasenet.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsnostatic.h"
+/* END specific definitions */
+
+#undef test_flush
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+// routines de mise en cache
+
+/*
+ VERSION 1.0 :
+ -----------
+
+.ndx file
+ file with data
+ <string>(date/time) [ <string>(hostname+filename) (datfile_position_ascii) ] * number_of_links
+ file without data
+ <string>(date/time) [ <string>(hostname+filename) (-datfile_position_ascii) ] * number_of_links
+
+.dat file
+ [ file ] *
+with
+ file= (with data)
+ [ bytes ] * sizeof(htsblk header) [ bytes ] * n(length of file given in htsblk header)
+ file= (without data)
+ [ bytes ] * sizeof(htsblk header)
+with
+ <string>(name) = <length in ascii>+<lf>+<data>
+
+
+ VERSION 1.1/1.2 :
+ ---------------
+
+.ndx file
+ file with data
+ <string>("CACHE-1.1") <string>(date/time) [ <string>(hostname+filename) (datfile_position_ascii) ] * number_of_links
+ file without data
+ <string>("CACHE-1.1") <string>(date/time) [ <string>(hostname+filename) (-datfile_position_ascii) ] * number_of_links
+
+.dat file
+ <string>("CACHE-1.1") [ [Header_1.1] [bytes] * n(length of file given in header) ] *
+with
+ Header_1.1=
+ <int>(statuscode)
+ <int>(size)
+ <string>(msg)
+ <string>(contenttype)
+ <string>(last-modified)
+ <string>(Etag)
+ [<string>"SD" <string>(supplemental data)]
+ [<string>"SD" <string>(supplemental data)]
+ ...
+ <string>"HTS" (end of header)
+ <int>(number of bytes of data) (0 if no data written)
+*/
+
+// Nouveau: si != text/html ne stocke que la taille
+
+
+void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save) {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File checked by cache: %s"LF,url_adr);
+ }
+ // ---stockage en cache---
+ // stocker dans le cache?
+ if (opt->cache) {
+ if (cache->dat!=NULL) {
+ // c'est le seul endroit ou l'on ajoute des elements dans le cache (fichier entier ou header)
+ // on stocke tout fichier "ok", mais également les réponses 404,301,302...
+ if ((r->statuscode==200) /* stocker réponse standard, plus */
+ || (r->statuscode==204) /* no content */
+ || (r->statuscode==301) /* moved perm */
+ || (r->statuscode==302) /* moved temp */
+ || (r->statuscode==303) /* moved temp */
+ || (r->statuscode==307) /* moved temp */
+ || (r->statuscode==401) /* authorization */
+ || (r->statuscode==403) /* unauthorized */
+ || (r->statuscode==404) /* not found */
+ || (r->statuscode==410) /* gone */
+ )
+ { /* ne pas stocker si la page générée est une erreur */
+ if (!r->is_file) {
+ // stocker fichiers (et robots.txt)
+ if ( (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) {
+ // ajouter le fichier au cache
+ cache_add(*r,url_adr,url_fil,url_save,cache->ndx,cache->dat,opt->all_in_cache);
+ }
+ }
+ }
+ }
+ }
+ // ---fin stockage en cache---
+}
+
+
+/* Ajout d'un fichier en cache */
+void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache) {
+ int pos;
+ char s[256];
+ char buff[HTS_URLMAXSIZE*4];
+ int ok=1;
+ int dataincache=0; // donnée en cache?
+ /*char digest[32+2];*/
+ /*digest[0]='\0';*/
+
+ // Longueur url_save==0?
+ if ( (strnotempty(url_save)==0) ) {
+ if (strcmp(url_fil,"/robots.txt")==0) // robots.txt
+ dataincache=1;
+ else
+ return; // erreur (sauf robots.txt)
+ }
+
+ if (r.size <= 0) // taille <= 0
+ return; // refusé..
+
+ // Mettre les *donées* en cache ?
+ if (is_hypertext_mime(r.contenttype)) // html, mise en cache des données et
+ dataincache=1; // pas uniquement de l'en tête
+ else if (all_in_cache)
+ dataincache=1; // forcer tout en cache
+
+ /* calcul md5 ? */
+ /*
+ if (is_hypertext_mime(r.contenttype)) { // html, calcul MD5
+ if (r.adr) {
+ domd5mem(r.adr,r.size,digest,1);
+ }
+ }*/
+
+ // Position
+ fflush(cache_dat); fflush(cache_ndx);
+ pos=ftell(cache_dat);
+ // écrire pointeur seek, adresse, fichier
+ if (dataincache) // patcher
+ sprintf(s,"%d\n",pos); // ecrire tel que (eh oui évite les \0..)
+ else
+ sprintf(s,"%d\n",-pos); // ecrire tel que (eh oui évite les \0..)
+
+ // data
+ // écrire données en-tête, données fichier
+ /*if (!dataincache) { // patcher
+ r.size=-r.size; // négatif
+ }*/
+
+ // Construction header
+ ok=0;
+ if (cache_wint(cache_dat,r.statuscode)!=-1) // statuscode
+ if (cache_wLLint(cache_dat,r.size)!=-1) // size
+ if (cache_wstr(cache_dat,r.msg)!=-1) // msg
+ if (cache_wstr(cache_dat,r.contenttype)!=-1) // contenttype
+ if (cache_wstr(cache_dat,r.lastmodified)!=-1) // last-modified
+ if (cache_wstr(cache_dat,r.etag)!=-1) // Etag
+ if (cache_wstr(cache_dat,(r.location!=NULL)?r.location:"")!=-1) // 'location' pour moved
+ if (cache_wstr(cache_dat,r.cdispo)!=-1) // Content-disposition
+ if (cache_wstr(cache_dat,"HTS")!=-1) // end of header
+ ok=1; /* ok */
+ // Fin construction header
+
+ /*if ((int) fwrite((char*) &r,1,sizeof(htsblk),cache_dat) == sizeof(htsblk)) {*/
+ if (ok) {
+ if (dataincache) { // mise en cache?
+ if (!r.adr) { /* taille nulle (parfois en cas de 301 */
+ if (cache_wLLint(cache_dat,0)==-1) /* 0 bytes */
+ ok=0;
+ } else if (r.is_write==0) { // en mémoire, recopie directe
+ if (cache_wLLint(cache_dat,r.size)!=-1) {
+ if (r.size>0) { // taille>0
+ if ((INTsys) fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size)
+ ok=0;
+ } else // taille=0, ne rien écrire
+ ok=0;
+ } else
+ ok=0;
+ } else { // recopier fichier dans cache
+ FILE* fp;
+ // On recopie le fichier..
+ LLint file_size=fsize(fconv(url_save));
+ if (file_size>=0) {
+ if (cache_wLLint(cache_dat,file_size)!=-1) {
+ fp=fopen(fconv(url_save),"rb");
+ if (fp!=NULL) {
+ char buff[32768];
+ int nl;
+ do {
+ nl=fread(buff,1,32768,fp);
+ if (nl>0) {
+ if ((INTsys) fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur
+ nl=-1;
+ ok=0;
+ }
+ }
+ } while(nl>0);
+ fclose(fp);
+ } else ok=0;
+ } else ok=0;
+ } else ok=0;
+ }
+ } else {
+ if (cache_wLLint(cache_dat,0)==-1) /* 0 bytes */
+ ok=0;
+ }
+ } else ok=0;
+ /*if (!dataincache) { // dépatcher
+ r.size=-r.size;
+ }*/
+
+ // index
+ // adresse+cr+fichier+cr
+ if (ok) {
+ buff[0]='\0'; strcat(buff,url_adr); strcat(buff,"\n"); strcat(buff,url_fil); strcat(buff,"\n");
+ cache_wstr(cache_ndx,buff);
+ fwrite(s,1,strlen(s),cache_ndx);
+ } // si ok=0 on a peut être écrit des données pour rien mais on s'en tape
+
+ // en cas de plantage, on aura au moins le cache!
+ fflush(cache_dat); fflush(cache_ndx);
+}
+
+
+// lecture d'un fichier dans le cache
+// si save==null alors test unqiquement
+htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save) {
+#if HTS_FAST_CACHE
+ long int hash_pos;
+ int hash_pos_return;
+#else
+ char* a;
+#endif
+ char buff[HTS_URLMAXSIZE*2];
+ char location[HTS_URLMAXSIZE*2];
+ htsblk r;
+ int ok=0;
+ int header_only=0;
+
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; strcpy(location,""); r.location=location;
+#if HTS_FAST_CACHE
+ strcpy(buff,adr); strcat(buff,fil);
+ hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
+#else
+ buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n");
+ if (cache->use)
+ a=strstr(cache->use,buff);
+ else
+ a=NULL; // forcer erreur
+#endif
+
+ // en cas de succès
+#if HTS_FAST_CACHE
+ if (hash_pos_return) {
+#else
+ if (a!=NULL) { // OK existe en cache!
+#endif
+ int pos;
+#if DEBUGCA
+ fprintf(stdout,"..cache: %s%s at ",adr,fil);
+#endif
+
+#if HTS_FAST_CACHE
+ pos=hash_pos; /* simply */
+#else
+ a+=strlen(buff);
+ sscanf(a,"%d",&pos); // lire position
+#endif
+#if DEBUGCA
+ printf("%d\n",pos);
+#endif
+
+ fflush(cache->olddat);
+ if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) {
+ /* Importer cache1.0 */
+ if (cache->version==0) {
+ OLD_htsblk old_r;
+ if (fread((char*) &old_r,1,sizeof(old_r),cache->olddat)==sizeof(old_r)) { // lire tout (y compris statuscode etc)
+ r.statuscode=old_r.statuscode;
+ r.size=old_r.size; // taille fichier
+ strcpy(r.msg,old_r.msg);
+ strcpy(r.contenttype,old_r.contenttype);
+ ok=1; /* import ok */
+ }
+ /* */
+ /* Cache 1.1 */
+ } else {
+ char check[256];
+ LLint size_read;
+ check[0]='\0';
+ //
+ cache_rint(cache->olddat,&r.statuscode);
+ cache_rLLint(cache->olddat,&r.size);
+ cache_rstr(cache->olddat,r.msg);
+ cache_rstr(cache->olddat,r.contenttype);
+ cache_rstr(cache->olddat,r.lastmodified);
+ cache_rstr(cache->olddat,r.etag);
+ cache_rstr(cache->olddat,r.location);
+ if (cache->version >= 2)
+ cache_rstr(cache->olddat,r.cdispo);
+ //
+ cache_rstr(cache->olddat,check);
+ if (strcmp(check,"HTS")==0) { /* intégrité OK */
+ ok=1;
+ }
+ cache_rLLint(cache->olddat,&size_read); /* lire size pour être sûr de la taille déclarée (réécrire) */
+ if (size_read>0) { /* si inscrite ici */
+ r.size=size_read;
+ } else { /* pas de données directement dans le cache, fichier présent? */
+ if (r.statuscode!=200)
+ header_only=1; /* que l'en tête ici! */
+ }
+ }
+
+ /* Remplir certains champs */
+ r.totalsize=r.size;
+
+ // lecture du header (y compris le statuscode)
+ /*if (fread((char*) &r,1,sizeof(htsblk),cache->olddat)==sizeof(htsblk)) { // lire tout (y compris statuscode etc)*/
+ if (ok) {
+ // sécurité
+ r.adr=NULL;
+ r.out=NULL;
+ ////r.location=NULL; non, fixée lors des 301 ou 302
+ r.fp=NULL;
+
+ if ( (r.statuscode>=0) && (r.statuscode<=999)
+ && (r.notmodified>=0) && (r.notmodified<=9) ) { // petite vérif intégrité
+ if ((save) && (!header_only) ) { /* ne pas lire uniquement header */
+ //int to_file=0;
+
+ r.adr=NULL; r.soc=INVALID_SOCKET;
+ // // r.location=NULL;
+
+#if HTS_DIRECTDISK
+ // Court-circuit:
+ // Peut-on stocker le fichier directement sur disque?
+ if ((r.statuscode==200) && (!is_hypertext_mime(r.contenttype)) && (strnotempty(save))) { // pas HTML, écrire sur disk directement
+ int ok=0;
+
+ r.is_write=1; // écrire
+ if (fexist(antislash(save))) { // un fichier existe déja
+ //if (fsize(antislash(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ ok=1; // plus rien à faire
+ filenote(save,NULL); // noter comme connu
+ //}
+ }
+
+ if ((pos<0) && (!ok)) { // Pas de donnée en cache et fichier introuvable : erreur!
+ if (opt->norecatch) {
+ filecreateempty(save);
+ //
+ r.statuscode=-1;
+ strcpy(r.msg,"File deleted by user not recaught");
+ ok=1; // ne pas récupérer (et pas d'erreur)
+ } else {
+ r.statuscode=-1;
+ strcpy(r.msg,"Previous cache file not found");
+ ok=1; // ne pas récupérer
+ }
+ }
+
+ if (!ok) {
+ r.out=filecreate(save);
+#if HDEBUG
+ printf("direct-disk: %s\n",save);
+#endif
+ if (r.out!=NULL) {
+ char buff[32768+4];
+ LLint nl;
+ LLint size;
+ size=r.size;
+ do {
+ nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat);
+ if (nl>0) {
+ size-=nl;
+ if ((INTsys) fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Read To Disk");
+ }
+ }
+ } while((nl>0) && (size>0) && (r.statuscode!=-1));
+
+ fclose(r.out);
+ r.out=NULL;
+#if HTS_WIN==0
+ chmod(save,HTS_ACCESS_FILE);
+#endif
+ usercommand(0,NULL,antislash(save));
+ } else {
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Write Error : Unable to Create File");
+ //printf("%s\n",save);
+ }
+ }
+
+ } else
+#endif
+ { // lire en mémoire
+
+ if (pos<0) { // Pas de donnée en cache, bizarre car html!!!
+ r.statuscode=-1;
+ strcpy(r.msg,"Previous cache file not found (2)");
+ } else {
+ // lire fichier (d'un coup)
+ r.adr=(char*) malloct((INTsys)r.size+4);
+ if (r.adr!=NULL) {
+ if ((INTsys) fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur
+ freet(r.adr);
+ r.adr=NULL;
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Read Data");
+ } else
+ *(r.adr+r.size)='\0';
+ //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
+ } else { // erreur
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Memory Error");
+ }
+ }
+ }
+ } // si save==null, ne rien charger (juste en tête)
+ } else {
+#if DEBUGCA
+ printf("Cache Read Error : Bad Data");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Bad Data");
+ }
+ } else { // erreur
+#if DEBUGCA
+ printf("Cache Read Error : Read Header");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Read Header");
+ }
+ } else {
+#if DEBUGCA
+ printf("Cache Read Error : Seek Failed");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"Cache Read Error : Seek Failed");
+ }
+ } else {
+#if DEBUGCA
+ printf("File Cache Not Found");
+#endif
+ r.statuscode=-1;
+ strcpy(r.msg,"File Cache Not Found");
+ }
+ return r;
+}
+
+/* write (string1-string2)-data in cache */
+/* 0 if failed */
+int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) {
+ if (cache_dat) {
+ char buff[HTS_URLMAXSIZE*4];
+ char s[256];
+ int pos;
+ fflush(cache_dat); fflush(cache_ndx);
+ pos=ftell(cache_dat);
+ /* first write data */
+ if (cache_wint(cache_dat,len)!=-1) { // length
+ if ((INTsys) fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data
+ /* then write index */
+ sprintf(s,"%d\n",pos);
+ buff[0]='\0'; strcat(buff,str1); strcat(buff,"\n"); strcat(buff,str2); strcat(buff,"\n");
+ cache_wstr(cache_ndx,buff);
+ if (fwrite(s,1,strlen(s),cache_ndx) == strlen(s)) {
+ fflush(cache_dat); fflush(cache_ndx);
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/* read the data corresponding to (string1-string2) in cache */
+/* 0 if failed */
+int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) {
+#if HTS_FAST_CACHE
+ if (cache->hashtable) {
+ char buff[HTS_URLMAXSIZE*4];
+ long int pos;
+ strcpy(buff,str1); strcat(buff,str2);
+ if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) {
+ if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) {
+ int len;
+ cache_rint(cache->olddat,&len);
+ if (len>0) {
+ char* mem_buff=(char*)malloct(len+4); /* Plus byte 0 */
+ if (mem_buff) {
+ if ((int)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/
+ *inbuff=mem_buff;
+ *inlen=len;
+ return 1;
+ } else
+ freet(mem_buff);
+ }
+ }
+ }
+ }
+ }
+#endif
+ *inbuff=NULL;
+ *inlen=0;
+ return 0;
+}
+
+// renvoyer uniquement en tête, ou NULL si erreur
+htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil) {
+ htsblk* r;
+ NOSTATIC_RESERVE(r, htsblk, 1);
+ *r=cache_read(opt,cache,adr,fil,NULL); // test uniquement
+ if (r->statuscode != -1)
+ return r;
+ else
+ return NULL;
+}
+
+
+// Initialisation du cache: créer nouveau, renomer ancien, charger..
+void cache_init(cache_back* cache,httrackp* opt) {
+ // ---
+ // utilisation du cache: renommer ancien éventuel et charger index
+ if (opt->cache) {
+#if DEBUGCA
+ printf("cache init: ");
+#endif
+#if HTS_WIN
+ mkdir(fconcat(opt->path_log,"hts-cache"));
+#else
+ mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER);
+#endif
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+#if DEBUGCA
+ printf("work with former cache\n");
+#endif
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/old.ndx"));
+
+ rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat"));
+ rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx"));
+ } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel
+#if DEBUGCA
+ printf("new cache\n");
+#endif
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/new.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/new.ndx"));
+ }
+
+ // charger index cache précédent
+ if ((fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))) { // cache précédent
+ if ((fsize(fconcat(opt->path_log,"hts-cache/old.dat"))>=0) && (fsize(fconcat(opt->path_log,"hts-cache/old.ndx"))>0)) {
+ FILE* oldndx=NULL;
+#if DEBUGCA
+ printf("..load cache\n");
+#endif
+ cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/old.dat"),"rb");
+ oldndx=fopen(fconcat(opt->path_log,"hts-cache/old.ndx"),"rb");
+ // les deux doivent être ouvrables
+ if ((cache->olddat==NULL) && (oldndx!=NULL)) {
+ fclose(oldndx);
+ oldndx=NULL;
+ }
+ if ((cache->olddat!=NULL) && (oldndx==NULL)) {
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ }
+ // lire index
+ if (oldndx!=NULL) {
+ int buffl;
+ fclose(oldndx); oldndx=NULL;
+ // lire ndx, et lastmodified
+ buffl=fsize(fconcat(opt->path_log,"hts-cache/old.ndx"));
+ cache->use=readfile(fconcat(opt->path_log,"hts-cache/old.ndx"));
+ if (cache->use!=NULL) {
+ char firstline[256];
+ char* a=cache->use;
+ a+=cache_brstr(a,firstline);
+ if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
+ if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
+ cache->version=(int)(firstline[8]-'0'); // cache 1.x
+ if (cache->version <= 2) {
+ a+=cache_brstr(a,firstline);
+ strcpy(cache->lastmodified,firstline);
+ } else {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
+ fflush(opt->errlog);
+ }
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ freet(cache->use);
+ cache->use=NULL;
+ }
+ } else { // non supporté
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
+ fflush(opt->errlog);
+ }
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ freet(cache->use);
+ cache->use=NULL;
+ }
+ /* */
+ } else { // Vieille version du cache
+ /* */
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF);
+ fflush(opt->log);
+ }
+ cache->version=0; // cache 1.0
+ strcpy(cache->lastmodified,firstline);
+ }
+ opt->is_update=1; // signaler comme update
+
+ /* Create hash table for the cache (MUCH FASTER!) */
+#if HTS_FAST_CACHE
+ if (cache->use) {
+ char line[HTS_URLMAXSIZE*2];
+ char linepos[256];
+ int pos;
+ while ( (a!=NULL) && (a < (cache->use+buffl) ) ) {
+ a=strchr(a+1,'\n'); /* start of line */
+ if (a) {
+ a++;
+ /* read "host/file" */
+ a+=binput(a,line,HTS_URLMAXSIZE);
+ a+=binput(a,line+strlen(line),HTS_URLMAXSIZE);
+ /* read position */
+ a+=binput(a,linepos,200);
+ sscanf(linepos,"%d",&pos);
+ inthash_add((inthash)cache->hashtable,line,pos);
+ }
+ }
+ /* Not needed anymore! */
+ freet(cache->use);
+ cache->use=NULL;
+ }
+#endif
+ }
+ }
+ } // taille cache>0
+ } // cache precedent existe
+
+#if DEBUGCA
+ printf("..create cache\n");
+#endif
+ // ouvrir caches actuels
+ cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
+ cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
+ // les deux doivent être ouvrables
+ if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
+ fclose(cache->ndx);
+ cache->ndx=NULL;
+ }
+ if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
+ fclose(cache->dat);
+ cache->dat=NULL;
+ }
+
+ if (cache->ndx!=NULL) {
+ char s[256];
+
+ cache_wstr(cache->dat,"CACHE-1.2");
+ fflush(cache->dat);
+ cache_wstr(cache->ndx,"CACHE-1.2");
+ fflush(cache->ndx);
+ //
+ time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
+ cache_wstr(cache->ndx,s);
+ fflush(cache->ndx); // un petit fflush au cas où
+
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpy(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
+
+ // test
+ // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ }
+
+ }
+
+}
+
+
+
+
+// lire un fichier.. (compatible \0)
+char* readfile(char* fil) {
+ char* adr=NULL;
+ int len=0;
+ len=fsize(fil);
+ if (len>0) { // existe
+ FILE* fp;
+ fp=fopen(fconv(fil),"rb");
+ if (fp!=NULL) { // n'existe pas (!)
+ adr=(char*) malloct(len+1);
+ if (adr!=NULL) {
+ if ((int) fread(adr,1,len,fp)!=len) { // fichier endommagé ?
+ freet(adr);
+ adr=NULL;
+ } else
+ *(adr+len)='\0';
+ }
+ fclose(fp);
+ }
+ }
+ return adr;
+}
+
+char* readfile_or(char* fil,char* defaultdata) {
+ char* realfile=fil;
+ char* ret;
+ if (!fexist(fil))
+ realfile=fconcat(hts_rootdir(NULL),fil);
+ ret=readfile(realfile);
+ if (ret)
+ return ret;
+ else {
+ char *adr=malloct(strlen(defaultdata)+2);
+ if (adr) {
+ strcpy(adr,defaultdata);
+ return adr;
+ }
+ }
+ return NULL;
+}
+
+// écriture/lecture d'une chaîne sur un fichier
+// -1 : erreur, sinon 0
+int cache_wstr(FILE* fp,char* s) {
+ int i;
+ char buff[256+4];
+ i=strlen(s);
+ sprintf(buff,"%d\n",i);
+ if (fwrite(buff,1,strlen(buff),fp) != strlen(buff))
+ return -1;
+ if (i>0)
+ if ((int) fwrite(s,1,i,fp) != i)
+ return -1;
+ return 0;
+}
+void cache_rstr(FILE* fp,char* s) {
+ int i;
+ char buff[256+4];
+ linput(fp,buff,256);
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ fread(s,1,i,fp);
+ *(s+i)='\0';
+}
+int cache_brstr(char* adr,char* s) {
+ int i;
+ int off;
+ char buff[256+4];
+ off=binput(adr,buff,256);
+ adr+=off;
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ strncpy(s,adr,i);
+ *(s+i)='\0';
+ off+=i;
+ return off;
+}
+int cache_quickbrstr(char* adr,char* s) {
+ int i;
+ int off;
+ char buff[256+4];
+ off=binput(adr,buff,256);
+ adr+=off;
+ sscanf(buff,"%d",&i);
+ if (i>0)
+ strncpy(s,adr,i);
+ *(s+i)='\0';
+ off+=i;
+ return off;
+}
+/* idem, mais en int */
+int cache_brint(char* adr,int* i) {
+ char s[256];
+ int r=cache_brstr(adr,s);
+ if (r!=-1)
+ sscanf(s,"%d",i);
+ return r;
+}
+void cache_rint(FILE* fp,int* i) {
+ char s[256];
+ cache_rstr(fp,s);
+ sscanf(s,"%d",i);
+}
+int cache_wint(FILE* fp,int i) {
+ char s[256];
+ sprintf(s,"%d",(int) i);
+ return cache_wstr(fp,s);
+}
+void cache_rLLint(FILE* fp,LLint* i) {
+ char s[256];
+ cache_rstr(fp,s);
+ sscanf(s,LLintP,i);
+}
+int cache_wLLint(FILE* fp,LLint i) {
+ char s[256];
+ sprintf(s,LLintP,(LLint) i);
+ return cache_wstr(fp,s);
+}
+// -- cache --
diff --git a/src/htscache.h b/src/htscache.h
new file mode 100644
index 0000000..08069d1
--- /dev/null
+++ b/src/htscache.h
@@ -0,0 +1,64 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* cache system (index and stores files in cache) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSCACHE_DEFH
+#define HTSCACHE_DEFH
+
+#include "htscore.h"
+
+// cache
+void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save);
+void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache);
+htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save);
+htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil);
+void cache_init(cache_back* cache,httrackp* opt);
+
+int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len);
+int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* len);
+
+int cache_wstr(FILE* fp,char* s);
+void cache_rstr(FILE* fp,char* s);
+int cache_brstr(char* adr,char* s);
+int cache_quickbrstr(char* adr,char* s);
+int cache_brint(char* adr,int* i);
+void cache_rint(FILE* fp,int* i);
+int cache_wint(FILE* fp,int i);
+void cache_rLLint(FILE* fp,LLint* i);
+int cache_wLLint(FILE* fp,LLint i);
+#endif
diff --git a/src/htscatchurl.c b/src/htscatchurl.c
new file mode 100644
index 0000000..c119677
--- /dev/null
+++ b/src/htscatchurl.c
@@ -0,0 +1,296 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: URL catch .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier intercepteur d'URL .c
+
+/* specific definitions */
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#if HTS_WIN
+#else
+#include <arpa/inet.h>
+#endif
+/* END specific definitions */
+
+/* définitions globales */
+#include "htsglobal.h"
+
+/* htslib */
+/*#include "htslib.h"*/
+
+/* catch url */
+#include "htscatchurl.h"
+
+
+// URL Link catcher
+
+// 0- Init the URL catcher with standard port
+
+// catch_url_init(&port,&return_host);
+T_SOC catch_url_init_std(int* port_prox,char* adr_prox) {
+ T_SOC soc;
+ int try_to_listen_to[]={8080,3128,80,81,82,8081,3129,31337,0,-1};
+ int i=0;
+ do {
+ soc=catch_url_init(&try_to_listen_to[i],adr_prox);
+ *port_prox=try_to_listen_to[i];
+ i++;
+ } while( (soc == INVALID_SOCKET) && (try_to_listen_to[i]>=0));
+ return soc;
+}
+
+
+// 1- Init the URL catcher
+
+// catch_url_init(&port,&return_host);
+T_SOC catch_url_init(int* port,char* adr) {
+ T_SOC soc = INVALID_SOCKET;
+ char h_loc[256+2];
+
+ /*
+#ifdef _WIN32
+ {
+ WORD wVersionRequested;
+ WSADATA wsadata;
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ return INVALID_SOCKET;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ WSACleanup();
+ return INVALID_SOCKET;
+ }
+ }
+#endif
+ */
+
+ if (gethostname(h_loc,256)==0) { // host name
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp_loc;
+ t_fullhostent buffer;
+
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ if ( (hp_loc=vxgethostbyname(h_loc, &buffer)) ) { // notre host
+
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length);
+
+ if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) {
+ SOCaddr_initport(server, *port);
+ if ( bind(soc,(struct sockaddr*) &server,server_size) == 0 ) {
+ SOCaddr server2;
+ int len;
+ len=sizeof(server2);
+ // effacer structure
+ memset(&server2, 0, sizeof(server2));
+ if (getsockname(soc,(struct sockaddr*) &server2,&len) == 0) {
+ *port=ntohs(SOCaddr_sinport(server)); // récupérer port
+ if (listen(soc,10)>=0) { // au pif le 10
+ SOCaddr_inetntoa(adr, 128, server2, len);
+ } else {
+#if _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+ return soc;
+}
+
+// 2 - Wait for URL
+
+// catch_url
+// returns 0 if error
+// url: buffer where URL must be stored - or ip:port in case of failure
+// data: 32Kb
+int catch_url(T_SOC soc,char* url,char* method,char* data) {
+ int retour=0;
+
+ // connexion (accept)
+ if (soc != INVALID_SOCKET) {
+ T_SOC soc2;
+ struct sockaddr dummyaddr;
+ int dummylen = sizeof(struct sockaddr);
+ while ( (soc2=accept(soc,&dummyaddr,&dummylen)) == INVALID_SOCKET);
+ /*
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ */
+ soc = soc2;
+ /* INFOS */
+ {
+ SOCaddr server2;
+ int len;
+ len=sizeof(server2);
+ // effacer structure
+ memset(&server2, 0, sizeof(server2));
+ if (getpeername(soc,(struct sockaddr*) &server2,&len) == 0) {
+ char dot[256+2];
+ SOCaddr_inetntoa(dot, 256, server2, sizeof(server2));
+ sprintf(url,"%s:%d", dot, htons(SOCaddr_sinport(server2)));
+ }
+ }
+ /* INFOS */
+
+ // réception
+ if (soc != INVALID_SOCKET) {
+ char line[1000];
+ char protocol[256];
+ line[0]=protocol[0]='\0';
+ //
+ socinput(soc,line,1000);
+ if (strnotempty(line)) {
+ if (sscanf(line,"%s %s %s",method,url,protocol) == 3) {
+ char url_adr[HTS_URLMAXSIZE*2];
+ char url_fil[HTS_URLMAXSIZE*2];
+ // méthode en majuscule
+ int i,r=0;
+ url_adr[0]=url_fil[0]='\0';
+ //
+ for(i=0;i<(int) strlen(method);i++) {
+ if ((method[i]>='a') && (method[i]<='z'))
+ method[i]-=('a'-'A');
+ }
+ // adresse du lien
+ if (ident_url_absolute(url,url_adr,url_fil)>=0) {
+ // Traitement des en-têtes
+ char loc[HTS_URLMAXSIZE*2];
+ htsblk blkretour;
+ memset(&blkretour, 0, sizeof(htsblk)); // effacer
+ blkretour.location=loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
+ // Lire en têtes restants
+ sprintf(data,"%s %s %s\r\n",method,url_fil,protocol);
+ while(strnotempty(line)) {
+ socinput(soc,line,1000);
+ treathead(NULL,NULL,NULL,&blkretour,line); // traiter
+ strcat(data,line);
+ strcat(data,"\r\n");
+ }
+ // CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus
+ //strcat(data,"\r\n");
+ if (blkretour.totalsize>0) {
+ int len=(int)min(blkretour.totalsize,32000);
+ int pos=strlen(data);
+ // Copier le reste (post éventuel)
+ while((len>0) && ((r=recv(soc,(char*) data+pos,len,0))>0) ) {
+ pos+=r;
+ len-=r;
+ data[pos]='\0'; // terminer par NULL
+ }
+ }
+ // Envoyer page
+ sprintf(line,CATCH_RESPONSE);
+ send(soc,line,strlen(line),0);
+ // OK!
+ retour=1;
+ }
+ }
+ } // sinon erreur
+ }
+ }
+ if (soc != INVALID_SOCKET) {
+#ifdef _WIN32
+ closesocket(soc);
+ /*
+ WSACleanup();
+ */
+#else
+ close(soc);
+#endif
+ }
+ return retour;
+}
+
+
+
+// Lecture de ligne sur socket
+void socinput(T_SOC soc,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ unsigned char b;
+ if (recv(soc,(char*) &b,1,0)==1) {
+ c=b;
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ } else
+ c=EOF;
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j++]='\0';
+}
+
diff --git a/src/htscatchurl.h b/src/htscatchurl.h
new file mode 100644
index 0000000..77036fd
--- /dev/null
+++ b/src/htscatchurl.h
@@ -0,0 +1,76 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: URL catch .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier intercepteur d'URL .h
+
+#ifndef HTS_CATCHURL_DEFH
+#define HTS_CATCHURL_DEFH
+
+#include "htsbasenet.h"
+
+// Fonctions
+void socinput(T_SOC soc,char* s,int max);
+T_SOC catch_url_init_std(int* port_prox,char* adr_prox);
+T_SOC catch_url_init(int* port,char* adr);
+int catch_url(T_SOC soc,char* url,char* method,char* data);
+
+#define CATCH_RESPONSE \
+ "HTTP/1.0 200 OK\r\n"\
+ "Content-type: text/html\r\n"\
+ "\r\n"\
+ "<!-- Generated by HTTrack Website Copier -->\r\n"\
+ "<HTML><HEAD>\r\n"\
+ "<TITLE>Link caught!</TITLE>\r\n"\
+ "<SCRIPT LANGUAGE=\"Javascript\">\r\n"\
+ "<!--\r\n"\
+ "function back() {\r\n"\
+ " history.go(-1);\r\n"\
+ "}\r\n"\
+ "// -->\r\n"\
+ "</SCRIPT>\r\n"\
+ "</HEAD>\r\n"\
+ "<BODY>\r\n"\
+ "<H2>Link captured into HTTrack Website Copier, you can now restore your proxy preferences!</H2>\r\n"\
+ "<BR><BR>\r\n"\
+ "<H3><A HREF=\"javascript:back();\">Clic here to go back</A></H3>\r\n"\
+ "</BODY></HTML>"\
+ "<!-- Generated by HTTrack Website Copier -->\r\n"\
+ "\r\n"\
+
+#endif
+
+
+
diff --git a/src/htsconfig.h b/src/htsconfig.h
new file mode 100644
index 0000000..665c9df
--- /dev/null
+++ b/src/htsconfig.h
@@ -0,0 +1,133 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Global engine definition file */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Ensemble des paramètres du robot
+
+#ifndef HTTRACK_GLOBAL_ENGINE_DEFH
+#define HTTRACK_GLOBAL_ENGINE_DEFH
+
+// ------------------------------------------------------------
+// Définitions du ROBOT
+
+// accès des miroirs pour les autres utilisateurs (0/1)
+#define HTS_ACCESS 1
+
+// temps de poll d'une socket: 1/10s
+#define HTS_SOCK_SEC 0
+#define HTS_SOCK_MS 100000
+
+// nom par défaut
+#define DEFAULT_HTML "index.html"
+
+// nom par défaut pour / en ftp
+#define DEFAULT_FTP "index.txt"
+
+// extension par défaut pour fichiers n'en ayant pas
+#define DEFAULT_EXT ".html"
+#define DEFAULT_EXT_SHORT ".htm"
+//#define DEFAULT_EXT ".txt"
+//#define DEFAULT_EXT_SHORT ".txt"
+
+// éviter les /nul, /con..
+#define HTS_OVERRIDE_DOS_FOLDERS 1
+
+// indexing (keyword)
+#define HTS_MAKE_KEYWORD_INDEX 1
+
+// poll stdin autorisé? (0/1)
+#define HTS_POLL 1
+
+// vérifier les liens sans extension (0/1) [à éviter, très lent]
+#define HTS_CHECK_STRANGEDIR 0
+
+// le slash est un html par défaut (exemple/ est toujours un html)
+#define HTS_SLASH_ISHTML 1
+
+// supprimer index si un répertoire identique existe
+#define HTS_REMOVE_ANNOYING_INDEX 1
+
+// écriture directe dur disque possible (0/1)
+#define HTS_DIRECTDISK 1
+
+// gérer une table de hachage?
+#define HTS_HASH 1
+
+// fast cache (build hash table)
+#define HTS_FAST_CACHE 1
+
+// le > peut être considéré comme un tag de fermeture de commentaire (<!-- > est valide)
+#define GT_ENDS_COMMENT 1
+
+// always adds a '/' at the end if a '~' is encountered (/~smith -> /~smith/)
+#define HTS_TILDE_SLASH 0
+
+// always transform a '//' into a sigle '/'
+#define HTS_STRIP_DOUBLE_SLASH 0
+
+// case-sensitive pour les dossiers et fichiers (0/1)
+// [normalement 1, mais pose des problèmes (url malformée par exemple) et n'est pas très utile..
+// ..et pas bcp respecté]
+#define HTS_CASSE 0
+
+// Un fichier ayant une taille différente du content-length doit il être annulé?
+// SEE opt.tolerant and opt.http10
+// #define HTS_CL_IS_FATAL 0
+
+// une erreur supprime le fichier sur disque
+// (non fixé pour cause de retry)
+#define HTS_REMOVE_BAD_FILES 0
+
+// en cas de Range: xx- donnant un Content-length: xx
+// alors skipper le fichier, considéré comme transmis
+// #define HTS_SKIP_FULL_RANGE 1
+
+// nombre max de filtres que l'utilisateur peut fixer
+// #define HTS_FILTERSMAX 10000
+#define HTS_FILTERSINC 1000
+
+// connect non bloquant? (poll sur write)
+#define HTS_XCONN 1
+
+// gethostbyname non bloquant? (gestion multithread)
+#define HTS_XGETHOST 1
+
+// à partir de combien de secondes doit-on étudier le taux de transfert?
+#define HTS_WATCHRATE 15
+
+// ------------------------------------------------------------
+//
+
+#endif
diff --git a/src/htscore.c b/src/htscore.c
new file mode 100644
index 0000000..1b9db7a
--- /dev/null
+++ b/src/htscore.c
@@ -0,0 +1,4158 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Main source */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+/* File defs */
+#include "htscore.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsbauth.h"
+#include "htsmd5.h"
+#include "htsindex.h"
+
+// htswrap_add
+#include "htswrap.h"
+/* END specific definitions */
+
+
+/* HTML parsing */
+#if HTS_ANALYSTE
+
+t_hts_htmlcheck_init hts_htmlcheck_init;
+t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+t_hts_htmlcheck_start hts_htmlcheck_start;
+t_hts_htmlcheck_end hts_htmlcheck_end;
+t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+t_hts_htmlcheck hts_htmlcheck;
+t_hts_htmlcheck_query hts_htmlcheck_query;
+t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+t_hts_htmlcheck_loop hts_htmlcheck_loop;
+t_hts_htmlcheck_check hts_htmlcheck_check;
+t_hts_htmlcheck_pause hts_htmlcheck_pause;
+t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
+t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
+t_hts_htmlcheck_savename hts_htmlcheck_savename;
+
+char _hts_errmsg[1100]="";
+int _hts_in_html_parsing=0;
+int _hts_in_html_done=0; // % done
+int _hts_in_html_poll=0; // parsing
+int _hts_setpause=0;
+//httrackp* _hts_setopt=NULL;
+char** _hts_addurl=NULL;
+
+//
+int _hts_cancel=0;
+#endif
+
+
+
+int exit_xh; /* quick exit (fatal error or interrupt) */
+
+/* debug */
+#if DEBUG_SHOWTYPES
+char REG[32768]="\n";
+#endif
+#if NSDEBUG
+int nsocDEBUG=0;
+#endif
+
+//
+#define _CLRSCR printf("\33[m\33[2J");
+#define _GOTOXY(X,Y) printf("\33[" X ";" Y "f");
+
+#if DEBUG_CHECKINT
+ #define _CHECKINT_FAIL(a) printf("\n%s\n",a); fflush(stdout); exit(1);
+ #define _CHECKINT(obj_ptr,message) \
+ if (obj_ptr) {\
+ if (( * ((char*) (obj_ptr)) != 0) || ( * ((char*) (((char*) (obj_ptr)) + sizeof(*(obj_ptr))-1)) != 0)) {\
+ char msg[1100];\
+ if (( * ((char*) (obj_ptr)) != 0) && ( * ((char*) (((char*) (obj_ptr)) + sizeof(*(obj_ptr))-1)) != 0))\
+ sprintf(msg,"* PANIC: Integrity error (structure crushed) in: %s",message);\
+ else if ( * ((char*) (obj_ptr)) != 0)\
+ sprintf(msg,"* PANIC: Integrity error (start of structure) in: %s",message);\
+ else\
+ sprintf(msg,"* PANIC: Integrity error (end of structure) in: %s",message);\
+ _CHECKINT_FAIL(msg);\
+ }\
+ } else {\
+ char msg[1100];\
+ sprintf(msg,"* PANIC: NULL pointer in: %s",message);\
+ _CHECKINT_FAIL(msg);\
+ }
+#endif
+
+#if DEBUG_HASH
+ // longest hash chain?
+ int longest_hash[3]={0,0,0},hashnumber=0;
+#endif
+
+// demande d'interaction avec le shell
+#if HTS_ANALYSTE
+char HTbuff[2048];
+#endif
+
+
+
+// Début de httpmirror, routines annexes
+
+// version 1 pour httpmirror
+// flusher si on doit lire peu à peu le fichier
+#define test_flush if (opt.flush) { fflush(opt.log); fflush(opt.errlog); }
+
+// pour alléger la syntaxe, des raccourcis sont créés
+#define urladr (liens[ptr]->adr)
+#define urlfil (liens[ptr]->fil)
+#define savename (liens[ptr]->sav)
+//#define level (liens[ptr]->depth)
+
+// au cas où nous devons quitter rapidement xhttpmirror (plus de mémoire, etc)
+// note: partir de liens_max.. vers 0.. sinon erreur de violation de mémoire: les liens suivants
+// ne sont plus à nous.. agh! [dur celui-là]
+#if HTS_ANALYSTE
+#define HTMLCHECK_UNINIT { \
+if ( (opt.debug>0) && (opt.log!=NULL) ) { \
+fspc(opt.log,"info"); fprintf(opt.log,"engine: end"LF); \
+} \
+hts_htmlcheck_end(); \
+}
+#else
+ #define HTMLCHECK_UNINIT
+#endif
+
+#define XH_extuninit { \
+ int i; \
+ HTMLCHECK_UNINIT \
+ if (liens!=NULL) { \
+ for(i=lien_max-1;i>=0;i--) { \
+ if (liens[i]) { \
+ if (liens[i]->firstblock==1) { \
+ freet(liens[i]); \
+ liens[i]=NULL; \
+ } \
+ } \
+ } \
+ freet(liens); \
+ liens=NULL; \
+ } \
+ if (filters && filters[0]) { \
+ freet(filters[0]); filters[0]=NULL; \
+ } \
+ if (filters) { \
+ freet(filters); filters=NULL; \
+ } \
+ if (back) { \
+ int i; \
+ for(i=0;i<back_max;i++) { \
+ back_delete(back,i); \
+ } \
+ freet(back); back=NULL; \
+ } \
+ checkrobots_free(&robots);\
+ if (cache.use) { freet(cache.use); cache.use=NULL; } \
+ if (cache.dat) { fclose(cache.dat); cache.dat=NULL; } \
+ if (cache.ndx) { fclose(cache.ndx); cache.ndx=NULL; } \
+ if (cache.olddat) { fclose(cache.olddat); cache.olddat=NULL; } \
+ if (cache.lst) { fclose(cache.lst); cache.lst=NULL; } \
+ if (cache.txt) { fclose(cache.txt); cache.txt=NULL; } \
+ if (opt.log) fflush(opt.log); \
+ if (opt.errlog) fflush(opt.errlog);\
+ if (makestat_fp) { fclose(makestat_fp); makestat_fp=NULL; } \
+ if (maketrack_fp){ fclose(maketrack_fp); maketrack_fp=NULL; } \
+ if (opt.accept_cookie) cookie_save(opt.cookie,fconcat(opt.path_log,"cookies.txt")); \
+ if (makeindex_fp) { fclose(makeindex_fp); makeindex_fp=NULL; } \
+ if (cache_hashtable) { inthash_delete(&cache_hashtable); } \
+ if (template_header) { freet(template_header); template_header=NULL; } \
+ if (template_body) { freet(template_body); template_body=NULL; } \
+ if (template_footer) { freet(template_footer); template_footer=NULL; } \
+ structcheck_init(-1); \
+}
+#define XH_uninit XH_extuninit if (r.adr) { freet(r.adr); r.adr=NULL; }
+
+// Enregistrement d'un lien:
+// on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité)
+// puis on vérifie qu'on a assez de marge dans le buffer - sinon on en réalloue un autre
+// enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite
+// codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
+// FA,FS: former_adr et former_fil, lien original
+#define REALLOC_SIZE 8192
+#if HTS_HASH
+#define liens_record_sav_len(A)
+#else
+#define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
+#endif
+
+#define liens_record(A,F,S,FA,FF) { \
+int notecode=0; \
+int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
+ adr_len=strlen(A),\
+ fil_len=strlen(F),\
+ sav_len=strlen(S),\
+ cod_len=0,\
+ former_adr_len=strlen(FA),\
+ former_fil_len=strlen(FF); \
+if (former_adr_len>0) {\
+ former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+ former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+} else former_adr_len=former_fil_len=0;\
+if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
+cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
+adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
+lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
+lien_size=add_tab_alloc; \
+if (lien_buffer!=NULL) { \
+liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+liens[lien_tot]->firstblock=1; \
+} \
+} else { \
+liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+liens[lien_tot]->firstblock=0; \
+} \
+if (liens[lien_tot]!=NULL) { \
+liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
+liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
+liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
+liens[lien_tot]->cod=NULL; \
+if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpy(liens[lien_tot]->cod,codebase); } \
+if (former_adr_len>0) {\
+liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
+liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
+strcpy(liens[lien_tot]->former_adr,FA); \
+strcpy(liens[lien_tot]->former_fil,FF); \
+}\
+strcpy(liens[lien_tot]->adr,A); \
+strcpy(liens[lien_tot]->fil,F); \
+strcpy(liens[lien_tot]->sav,S); \
+liens_record_sav_len(liens[lien_tot]); \
+hash_write(&hash,lien_tot); \
+} \
+}
+
+/* - abandonné (simplifie) -
+// Ajouter à un lien EXISTANT deux champs former_adr et former_fil pour indiquer le nom d'un fichier avant un "move"
+// NOTE: si un alloc est fait ici il n'y aura pas de freet() à la fin, tant pis (firstbloc)
+#define liens_add_former(index,A,F) { \
+int adr_len=strlen(A),fil_len=strlen(F); \
+adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; \
+if ((int) lien_size < (int) (adr_len+fil_len)) { \
+lien_buffer=(char*) calloct(add_tab_alloc,1); \
+lien_size=add_tab_alloc; \
+} \
+if (lien_buffer!=NULL) { \
+if (liens[lien_tot]!=NULL) { \
+liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
+liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
+strcpy(liens[lien_tot]->former_adr,A); \
+strcpy(liens[lien_tot]->former_fil,F); \
+} \
+} \
+}
+*/
+
+#if 0
+#define HT_ADD_ADR { \
+ fwrite(lastsaved,1,((int) (adr - lastsaved)),fp); \
+ lastsaved=adr; }
+#define HT_ADD(A) fwrite(A,1,(int) strlen(A),fp);
+#define HT_ADD_START
+#define HT_ADD_END if (fp) { fclose(fp); fp=NULL; }
+#define HT_ADD_FOP { \
+ fp=filecreate(savename); \
+ if (fp==NULL) { \
+ if (opt.errlog) { \
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to create %s for %s%s"LF,savename,urladr,urlfil); \
+ test_flush; \
+ } \
+ freet(r.adr); r.adr=NULL; \
+ error=1; \
+ } \
+ }
+#else
+// version optimisée, qui permet de ne pas toucher aux html non modifiés (update)
+#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
+ ht_size=(A)+ht_len+REALLOC_SIZE; \
+ ht_buff=(char*) realloct(ht_buff,ht_size); \
+ if (ht_buff==NULL) { \
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
+ XH_uninit; \
+ exit(1); \
+ } \
+ } \
+ ht_len+=A;
+/*
+(Optimized)
+#define HT_ADD_ADR { int i,j=ht_len; HT_ADD_CHK(((int) adr)- ((int) lastsaved)) \
+ for(i=0;i<((int) adr)- ((int) lastsaved);i++) \
+ ht_buff[j+i]=lastsaved[i]; \
+ ht_buff[j+((int) adr)- ((int) lastsaved)]='\0'; \
+ lastsaved=adr; }
+*/
+#define HT_ADD_ADR \
+ if ((opt.getmode & 1) && (ptr>0)) { \
+ int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
+ memcpy(ht_buff+j, lastsaved, i); \
+ ht_buff[j+i]='\0'; \
+ lastsaved=adr; \
+ }
+/*
+(Optimized)
+#define HT_ADD(A) { HT_ADD_CHK(strlen(A)) strcat(ht_buff,A); }
+*/
+#define HT_ADD(A) \
+ if ((opt.getmode & 1) && (ptr>0)) { \
+ int i=strlen(A),j=ht_len; \
+ if (i) { \
+ HT_ADD_CHK(i) \
+ memcpy(ht_buff+j, A, i); \
+ ht_buff[j+i]='\0'; \
+ } }
+#define HT_ADD_START \
+ int ht_size=(int)(r.size*5)/4+REALLOC_SIZE; \
+ int ht_len=0; \
+ char* ht_buff=NULL; \
+ if ((opt.getmode & 1) && (ptr>0)) { \
+ ht_buff=(char*) malloct(ht_size); \
+ if (ht_buff==NULL) { \
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
+ XH_uninit; \
+ exit(1); \
+ } \
+ ht_buff[0]='\0'; \
+ }
+#define HT_ADD_END { \
+ int ok=0;\
+ if (ht_buff) { \
+ int file_len=(int) strlen(ht_buff);\
+ char digest[32+2];\
+ digest[0]='\0';\
+ domd5mem(ht_buff,file_len,digest,1);\
+ if (fsize(antislash(savename))==file_len) { \
+ int mlen;\
+ char* mbuff;\
+ cache_readdata(&cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
+ if (mlen) mbuff[mlen]='\0';\
+ if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
+ ok=1;\
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {\
+ fspc(opt.log,"debug"); fprintf(opt.log,"File not re-written (md5): %s"LF,savename);\
+ test_flush;\
+ }\
+ } else {\
+ ok=0;\
+ } \
+ }\
+ if (!ok) { \
+ fp=filecreate(savename); \
+ if (fp) { \
+ if (file_len>0) {\
+ if ((int)fwrite(ht_buff,1,file_len,fp) != file_len) { \
+ if (opt.errlog) { \
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to write HTML file %s"LF,savename);\
+ test_flush;\
+ }\
+ }\
+ }\
+ fclose(fp); fp=NULL; \
+ if (strnotempty(r.lastmodified)) \
+ set_filetime_rfc822(savename,r.lastmodified); \
+ usercommand(0,NULL,antislash(savename)); \
+ } else {\
+ if (opt.errlog) { \
+ fspc(opt.errlog,"error");\
+ fprintf(opt.errlog,"Unable to save file %s"LF,savename);\
+ test_flush;\
+ }\
+ }\
+ } else {\
+ filenote(savename,NULL); \
+ }\
+ if (cache.ndx)\
+ cache_writedata(cache.ndx,cache.dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
+ } \
+ freet(ht_buff); ht_buff=NULL; \
+ }
+#define HT_ADD_FOP
+#endif
+
+// libérer filters[0] pour insérer un élément dans filters[0]
+#define HT_INSERT_FILTERS0 {\
+ int i;\
+ if (filptr>0) {\
+ for(i=filptr-1;i>=0;i--) {\
+ strcpy(filters[i+1],filters[i]);\
+ }\
+ }\
+ strcpy(filters[0],"");\
+ filptr++;\
+ filptr=minimum(filptr,filter_max);\
+}
+
+#define HT_INDEX_END do { \
+if (!makeindex_done) { \
+if (makeindex_fp) { \
+ char tempo[1024]; \
+ if (makeindex_links == 1) { \
+ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
+ } else \
+ tempo[0]='\0'; \
+ fprintf(makeindex_fp,template_footer, \
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
+ tempo \
+ ); \
+ fflush(makeindex_fp); \
+ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
+ makeindex_fp=NULL; \
+ usercommand(0,NULL,fconcat(opt.path_html,"index.html")); \
+} \
+} \
+makeindex_done=1; /* ok c'est fait */ \
+} while(0)
+
+
+
+
+// Début de httpmirror, robot
+// url1 peut être multiple
+int httpmirror(char* url1,httrackp* ptropt) {
+ httrackp opt = *ptropt; // structure d'options
+ char* primary=NULL; // première page, contenant les liens à scanner
+ int lien_tot=0; // nombre de liens pour le moment
+ lien_url** liens=NULL; // les pointeurs sur les liens
+ hash_struct hash; // système de hachage, accélère la recherche dans les liens
+ t_cookie cookie; // gestion des cookies
+ int lien_max=0;
+ int lien_size=0; // octets restants dans buffer liens dispo
+ char* lien_buffer=NULL; // buffer liens actuel
+ int add_tab_alloc=256000; // +256K de liens à chaque fois
+ //char* tab_alloc=NULL;
+ int ptr; // pointeur actuel sur les liens
+ //
+ int numero_passe=0; // deux passes pour html puis images
+ int back_max=0; // fichiers qui peuvent être en local
+ lien_back* back=NULL; // backing en local
+ htsblk r; // retour de certaines fonctions
+ TStamp lastime=0; // pour affichage infos de tmp en tmp
+ // pour les stats, nombre de fichiers & octets écrits
+ LLint stat_fragment=0; // pour la fragmentation
+ //TStamp istat_timestart; // départ pour calcul instantanné
+ //
+ TStamp last_info_shell=0;
+ int info_shell=0;
+ // filtres
+ char** filters = NULL;
+ //int filter_max=0;
+ int filptr=0;
+ //
+ int makeindex_done=0; // lorsque l'index sera fait
+ FILE* makeindex_fp=NULL;
+ int makeindex_links=0;
+ char makeindex_firstlink[HTS_URLMAXSIZE*2];
+ // statistiques (mode #Z)
+ FILE* makestat_fp=NULL; // fichier de stats taux transfert
+ FILE* maketrack_fp=NULL; // idem pour le tracking
+ TStamp makestat_time=0; // attente (secondes)
+ LLint makestat_total=0; // repère du nombre d'octets transférés depuis denrière stat
+ int makestat_lnk=0; // idem, pour le nombre de liens
+ //
+ char codebase[HTS_URLMAXSIZE*2]; // base pour applet java
+ char base[HTS_URLMAXSIZE*2]; // base pour les autres fichiers
+ //
+ cache_back cache;
+ robots_wizard robots; // gestion robots.txt
+ inthash cache_hashtable=NULL;
+ int cache_hash_size=0;
+ //
+ char *template_header=NULL,*template_body=NULL,*template_footer=NULL;
+ //
+ codebase[0]='\0'; base[0]='\0';
+ //
+ cookie.auth.next=NULL;
+ cookie.auth.auth[0]=cookie.auth.prefix[0]='\0';
+ //
+
+ // noter heure actuelle de départ en secondes
+ memset(&HTS_STAT, 0, sizeof(HTS_STAT));
+ HTS_STAT.stat_timestart=time_local();
+ //istat_timestart=stat_timestart;
+ HTS_STAT.istat_timestart[0]=HTS_STAT.istat_timestart[1]=mtime_local();
+ /* reset stats */
+ HTS_STAT.HTS_TOTAL_RECV=0;
+ HTS_STAT.istat_bytes[0]=HTS_STAT.istat_bytes[1]=0;
+ if (opt.aff_progress)
+ lastime=HTS_STAT.stat_timestart;
+ if (opt.shell) {
+ last_info_shell=HTS_STAT.stat_timestart;
+ }
+ if ((opt.makestat) || (opt.maketrack)){
+ makestat_time=HTS_STAT.stat_timestart;
+ }
+ // initialiser compteur erreurs
+ fspc(NULL,NULL);
+
+ // initialiser cookie
+ if (opt.accept_cookie) {
+ opt.cookie=&cookie;
+ cookie.max_len=30000; // max len
+ strcpy(cookie.data,"");
+ // Charger cookies.txt par défaut ou cookies.txt du miroir
+ if (fexist(fconcat(opt.path_log,"cookies.txt")))
+ cookie_load(opt.cookie,opt.path_log,"cookies.txt");
+ else if (fexist("cookies.txt"))
+ cookie_load(opt.cookie,"","cookies.txt");
+ } else
+ opt.cookie=NULL;
+
+ // initialiser exit_xh
+ exit_xh=0; // sortir prématurément (var globale)
+
+ // initialiser usercommand
+ usercommand(opt.sys_com_exec,opt.sys_com,"");
+
+ // initialiser structcheck
+ structcheck_init(1);
+
+ // initialiser tableau options accessible par d'autres fonctions (signal)
+ hts_declareoptbuffer(&opt);
+
+ // initialiser verif_backblue
+ verif_backblue(NULL);
+ verif_external(0,0);
+ verif_external(1,0);
+
+ // et templates html
+ template_header=readfile_or(fconcat(opt.path_bin,"templates/index-header.html"),HTS_INDEX_HEADER);
+ template_body=readfile_or(fconcat(opt.path_bin,"templates/index-body.html"),HTS_INDEX_BODY);
+ template_footer=readfile_or(fconcat(opt.path_bin,"templates/index-footer.html"),HTS_INDEX_FOOTER);
+
+ // initialiser mimedefs
+ get_userhttptype(1,opt.mimedefs,NULL);
+
+ // Initialiser indexation
+ if (opt.kindex)
+ index_init(opt.path_html);
+
+ // effacer bloc cache
+ memset(&cache, 0, sizeof(cache_back));
+ cache.type=opt.cache; // cache?
+ cache.errlog=opt.errlog; // err log?
+ cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper
+
+ // initialiser hash cache
+ if (!cache_hash_size)
+ cache_hash_size=HTS_HASH_SIZE;
+ cache_hashtable=inthash_new(cache_hash_size);
+ if (cache_hashtable==NULL) {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ filters[0]=NULL; back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+ cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */
+
+ // initialiser cache DNS
+ _hts_lockdns(-999);
+
+ // robots.txt
+ strcpy(robots.adr,"!"); // dummy
+ robots.token[0]='\0';
+ robots.next=NULL; // suivant
+ opt.robotsptr = &robots;
+
+ // effacer filters
+ opt.maxfilter = maximum(opt.maxfilter, 128);
+ if (filters_init(&filters, opt.maxfilter, 0) == 0) {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+ opt.filters.filters=&filters;
+ //
+ opt.filters.filptr=&filptr;
+ //opt.filters.filter_max=&filter_max;
+
+ // tableau de pointeurs sur les liens
+ lien_max=maximum(opt.maxlink,32);
+ liens=(lien_url**) malloct(lien_max*sizeof(lien_url*)); // tableau de pointeurs sur les liens
+ if (liens==NULL) {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ //XH_uninit;
+ return 0;
+ } else {
+ int i;
+ for(i=0;i<lien_max;i++) {
+ liens[i]=NULL;
+ }
+ }
+ // initialiser ptr et lien_tot
+ ptr=0;
+ lien_tot=0;
+#if HTS_HASH
+ // initialiser hachage
+ {
+ int i;
+ for(i=0;i<HTS_HASH_SIZE;i++)
+ hash.hash[0][i]=hash.hash[1][i]=hash.hash[2][i] = -1; // pas d'entrées
+ hash.liens = liens;
+ hash.max_lien=0;
+ }
+#endif
+
+
+ // copier adresse(s) dans liste des adresses
+ {
+ char *a=url1;
+ int primary_len=8192;
+ if (strnotempty(opt.filelist)) {
+ primary_len+=max(0,fsize(opt.filelist)*2);
+ }
+ primary_len+=strlen(url1)*2;
+
+ // création de la première page, qui contient les liens de base à scanner
+ // c'est plus propre et plus logique que d'entrer à la main les liens dans la pile
+ // on bénéficie ainsi des vérifications et des tests du robot pour les liens "primaires"
+ primary=(char*) malloct(primary_len);
+ if (primary) {
+ primary[0]='\0';
+ } else {
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+
+ while(*a) {
+ int i;
+ int joker=0;
+
+ // vérifier qu'il n'y a pas de * dans l'url
+ if (*a=='+')
+ joker=1;
+ else if (*a=='-')
+ joker=1;
+ /* NON, certaines URL ont des * (!)
+ else {
+ int i=0;
+ while((a[i]!=0) && (a[i]!=' ')) if (a[i++]=='*') joker=1;
+ }
+ */
+
+ if (joker) { // joker ou filters
+ //char* p;
+ char tempo[HTS_URLMAXSIZE*2];
+ int type; int plus=0;
+
+ // noter joker (dans b)
+ if (*a=='+') { // champ +
+ type=1; plus=1; a++;
+ } else if (*a=='-') { // champ forbidden[]
+ type=0; a++;
+ } else { // champ + avec joker sans doute
+ type=1;
+ }
+
+ // recopier prochaine chaine (+ ou -)
+ i=0;
+ while((*a!=0) && (*a!=' ')) { tempo[i++]=*a; a++; }
+ tempo[i++]='\0';
+ while(*a==' ') { a++; }
+
+ // sauter les + sans rien après..
+ if (strnotempty(tempo)) {
+ if ((plus==0) && (type==1)) { // implicite: *www.edf.fr par exemple
+ if (tempo[strlen(tempo)-1]!='*') {
+ strcat(tempo,"*"); // ajouter un *
+ }
+ }
+ if (type)
+ strcpy(filters[filptr],"+");
+ else
+ strcpy(filters[filptr],"-");
+ /*
+ if (strfield(tempo,"http://"))
+ strcat(filters[filptr],tempo+7); // ignorer http://
+ else if (strfield(tempo,"ftp://"))
+ strcat(filters[filptr],tempo+6); // ignorer ftp://
+ else
+ */
+ strcat(filters[filptr],tempo);
+ filptr++;
+
+ /* sanity check */
+ if (filptr + 1 >= opt.maxfilter) {
+ opt.maxfilter += HTS_FILTERSINC;
+ if (filters_init(&filters, opt.maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n",filptr,__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,LF"Too many filters, giving up..(>%d)"LF,filptr);
+ fprintf(opt.errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ test_flush;
+ }
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit;
+ return 0;
+ }
+ //opt.filters.filters=filters;
+ }
+
+ }
+
+ } else { // adresse normale
+ char url[HTS_URLMAXSIZE*2];
+ // prochaine adresse
+ i=0;
+ while((*a!=0) && (*a!=' ')) { url[i++]=*a; a++; }
+ while(*a==' ') { a++; }
+ url[i++]='\0';
+
+ //strcat(primary,"<PRIMARY=\"");
+ if (strstr(url,":/")==NULL)
+ strcat(primary,"http://");
+ strcat(primary,url);
+ //strcat(primary,"\">");
+ strcat(primary,"\n");
+ }
+ } // while
+
+ /* load URL file list */
+ /* OPTIMIZED for fast load */
+ if (strnotempty(opt.filelist)) {
+ char* filelist_buff=NULL;
+ int filelist_sz=fsize(opt.filelist);
+ if (filelist_sz>0) {
+ FILE* fp=fopen(opt.filelist,"rb");
+ if (fp) {
+ filelist_buff=malloct(filelist_sz + 2);
+ if (filelist_buff) {
+ if ((int)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) {
+ freet(filelist_buff);
+ filelist_buff=NULL;
+ } else {
+ *(filelist_buff + filelist_sz) = '\0';
+ }
+ }
+ fclose(fp);
+ }
+ }
+
+ if (filelist_buff) {
+ int filelist_ptr=0;
+ int n=0;
+ char line[HTS_URLMAXSIZE*2];
+ char* primary_ptr = primary + strlen(primary);
+ while( filelist_ptr < filelist_sz ) {
+ int count=binput(filelist_buff+filelist_ptr,line,HTS_URLMAXSIZE);
+ filelist_ptr+=count;
+ if (count && line[0]) {
+ n++;
+ if (strstr(line,":/")==NULL) {
+ strcpy(primary_ptr, "http://");
+ primary_ptr += strlen(primary_ptr);
+ }
+ strcpy(primary_ptr, line);
+ primary_ptr += strlen(primary_ptr);
+ strcpy(primary_ptr, "\n");
+ primary_ptr += 1;
+ }
+ }
+ // fclose(fp);
+ if (opt.log!=NULL) {
+ fspc(opt.log,"info"); fprintf(opt.log,"%d links added from %s"LF,n,opt.filelist); test_flush;
+ }
+
+ // Free buffer
+ freet(filelist_buff);
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Could not include URL list: %s"LF,opt.filelist); test_flush;
+ }
+ }
+ }
+
+
+ // lien primaire
+ liens_record("primary","/primary","primary.html","","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ back_max=0; // uniquement a cause du warning de XH_extuninit
+ XH_extuninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ liens[lien_tot]->testmode=0; // pas mode test
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=opt.depth+1; // lien de priorité maximale
+ liens[lien_tot]->pass2=0; // 1ère passe
+ liens[lien_tot]->retry=opt.retry; // lien de priorité maximale
+ liens[lien_tot]->premier=lien_tot; // premier lien, objet-père=objet
+ liens[lien_tot]->precedent=lien_tot; // lien précédent
+ lien_tot++;
+
+ // Initialiser cache
+ cache_init(&cache,&opt);
+ }
+
+#if BDEBUG==3
+ {
+ int i;
+ for(i=0;i<lien_tot;i++) {
+ printf("%d>%s%s as %s\n",i,liens[i]->adr,liens[i]->fil,liens[i]->sav);
+ }
+ for(i=0;i<filptr;i++) {
+ printf("%d>filters=%s\n",i,filters[i]);
+ }
+ }
+#endif
+
+ // backing
+ //soc_max=opt.maxsoc;
+ if (opt.maxsoc>0) {
+#if BDEBUG==2
+ _CLRSCR;
+#endif
+ // Nombre de fichiers HTML pouvant être présents en mémoire de manière simultannée
+ // On prévoit large: les fichiers HTML ne prennent que peu de place en mémoire, et les
+ // fichiers non html sont sauvés en direct sur disque.
+ // --> 1024 entrées + 32 entrées par socket en supplément
+ back_max=opt.maxsoc*32+1024;
+ //back_max=opt.maxsoc*8+32;
+ back=(lien_back*) calloct((back_max+1),sizeof(lien_back));
+ if (back==NULL) {
+ if (opt.errlog)
+ fprintf(opt.errlog,"Not enough memory, can not allocate %d bytes"LF,(int)((opt.maxsoc+1)*sizeof(lien_back)));
+ return 0;
+ } else { // copier buffer-location & effacer
+ int i;
+ for(i=0;i<back_max;i++){
+ back[i].r.location=back[i].location_buffer;
+ back[i].status=-1;
+ back[i].r.soc=INVALID_SOCKET;
+ }
+ }
+ }
+
+
+ // flush
+ test_flush;
+
+ // statistiques
+ if (opt.makestat) {
+ makestat_fp=fopen(fconcat(opt.path_log,"hts-stats.txt"),"wb");
+ if (makestat_fp != NULL) {
+ fprintf(makestat_fp,"HTTrack statistics report, every minutes"LF LF);
+ }
+ }
+
+ // tracking -- débuggage
+ if (opt.maketrack) {
+ maketrack_fp=fopen(fconcat(opt.path_log,"hts-track.txt"),"wb");
+ if (maketrack_fp != NULL) {
+ fprintf(maketrack_fp,"HTTrack tracking report, every minutes"LF LF);
+ }
+ }
+
+ // on n'a pas de liens!! (exemple: httrack www.* impossible sans départ..)
+ if (lien_tot<=0) {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Error! You MUST specify at least one complete URL, and not only wildcards!"LF);
+ }
+ }
+
+
+ // attendre une certaine heure..
+ if (opt.waittime>0) {
+ int rollover=0;
+ int ok=0;
+ {
+ TStamp tl=0;
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ tl+=A->tm_sec;
+ tl+=A->tm_min*60;
+ tl+=A->tm_hour*60*60;
+ if (tl>opt.waittime) // attendre minuit
+ rollover=1;
+ }
+
+ // attendre..
+ do {
+ TStamp tl=0;
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ tl+=A->tm_sec;
+ tl+=A->tm_min*60;
+ tl+=A->tm_hour*60*60;
+
+ if (rollover) {
+ if (tl<=opt.waittime)
+ rollover=0; // attendre heure
+ } else {
+ if (tl>opt.waittime)
+ ok=1; // ok!
+ }
+
+#if HTS_ANALYSTE
+ {
+ int r;
+ if (rollover)
+ r=hts_htmlcheck_loop(back,back_max,0,0,lien_tot,(int) (opt.waittime-tl+24*3600),NULL);
+ else
+ r=hts_htmlcheck_loop(back,back_max,0,0,lien_tot,(int) (opt.waittime-tl),NULL);
+ if (!r) {
+ exit_xh=1; // exit requested
+ ok=1;
+ } else
+ Sleep(100);
+ }
+#endif
+ } while(!ok);
+
+ // note: recopie de plus haut
+ // noter heure actuelle de départ en secondes
+ HTS_STAT.stat_timestart=time_local();
+ if (opt.aff_progress)
+ lastime=HTS_STAT.stat_timestart;
+ if (opt.shell) {
+ last_info_shell=HTS_STAT.stat_timestart;
+ }
+ if ((opt.makestat) || (opt.maketrack)){
+ makestat_time=HTS_STAT.stat_timestart;
+ }
+
+
+ }
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: start"LF);
+ }
+#if HTS_ANALYSTE
+ if (!hts_htmlcheck_start(&opt)) {
+ XH_extuninit;
+ return 1;
+ }
+#endif
+
+
+ // ------------------------------------------------------------
+
+ // ------------------------------------------------------------
+ // Boucle générale de parcours des liens
+ // ------------------------------------------------------------
+ do {
+ int error=0; // si error alors sauter
+ int store_errpage=0; // c'est une erreur mais on enregistre le html
+ char loc[HTS_URLMAXSIZE*2]; // adresse de relocation
+
+ // Ici on charge le fichier (html, gif..) en mémoire
+ // Les HTMLs sont traités (si leur priorité est suffisante)
+
+ // effacer r
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET;
+ r.location=loc; // en cas d'erreur 3xx (moved)
+ // recopier proxy
+ memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy));
+ // et user-agent
+ strcpy(r.req.user_agent,opt.user_agent);
+ r.req.user_agent_send=opt.user_agent_send;
+
+ if (!error) {
+
+ // Skip empty/invalid/done in background
+ if (liens[ptr]) {
+ while ( (liens[ptr]) && (
+ ( ((urladr != NULL)?(urladr):(" "))[0]=='!') ||
+ ( ((urlfil != NULL)?(urlfil):(" "))[0]=='\0') ||
+ ( (liens[ptr]->pass2 == -1) )
+ )
+ ) { // sauter si lien annulé (ou fil vide)
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link #%d seems ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" ")));
+ test_flush;
+ }
+ ptr++;
+ }
+ }
+ if (liens[ptr]) { // on a qq chose à récupérer?
+
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Wait get: %s%s"LF,urladr,urlfil);
+ test_flush;
+#if DEBUG_ROBOTS
+ if (strcmp(urlfil,"/robots.txt") == 0) {
+ printf("robots.txt detected\n");
+ }
+#endif
+ }
+ // ------------------------------------------------------------
+ // DEBUT --RECUPERATION LIEN---
+ if (ptr==0) { // premier lien à parcourir: lien primaire construit avant
+ r.adr=primary; primary=NULL;
+ r.statuscode=200;
+ r.size=strlen(r.adr);
+ r.soc=INVALID_SOCKET;
+ strcpy(r.contenttype,"text/html");
+ /*} else if (opt.maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing)
+ // charger le fichier en mémoire tout bêtement
+ r=xhttpget(urladr,urlfil);
+ //
+ */
+ } else { // backing, multiples sockets
+ //
+ int b;
+ int n;
+
+#if BDEBUG==1
+ printf("\nBack test..\n");
+#endif
+
+ // pause/lock files
+ {
+ int do_pause=0;
+
+ // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
+ if (fexist(fconcat(opt.path_log,"hts-stop.lock"))) {
+ // remove lockfile
+ remove(fconcat(opt.path_log,"hts-stop.lock"));
+ if (!fexist(fconcat(opt.path_log,"hts-stop.lock"))) {
+ do_pause=1;
+ }
+ }
+
+ // after receving N bytes, pause
+ if (opt.fragment>0) {
+ if ((HTS_STAT.stat_bytes-stat_fragment) > opt.fragment) {
+ do_pause=1;
+ }
+ }
+
+ // pause?
+ if (do_pause) {
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: pause requested.."LF);
+ }
+ while (back_nsoc(back,back_max)>0) { // attendre fin des transferts
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+ Sleep(200);
+#if HTS_ANALYSTE
+ {
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ b=0;
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+#endif
+ }
+ // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause
+ // l'utilisateur ferait un rm -r après avoir effectué un tar
+ structcheck_init(1);
+ {
+ FILE* fp = fopen(fconcat(opt.path_log,"hts-paused.lock"),"wb");
+ if (fp) {
+ fspc(fp,"info"); // dater
+ fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror..."LF""LF"",HTS_STAT.stat_bytes);
+ fclose(fp);
+ }
+ }
+ stat_fragment=HTS_STAT.stat_bytes;
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: pause: %s"LF,fconcat(opt.path_log,"hts-paused.lock"));
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_pause(fconcat(opt.path_log,"hts-paused.lock"));
+#else
+ while (fexist(fconcat(opt.path_log,"hts-paused.lock"))) {
+ //back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives)
+ Sleep(1000);
+ }
+#endif
+ }
+ //
+ }
+ // end of pause/lock files
+
+#if HTS_ANALYSTE
+ // changement dans les préférences
+/*
+ if (_hts_setopt) {
+ copy_htsopt(_hts_setopt,&opt); // copier au besoin
+ _hts_setopt=NULL; // effacer callback
+ }
+*/
+ if (_hts_addurl) {
+ char add_adr[HTS_URLMAXSIZE*2];
+ char add_fil[HTS_URLMAXSIZE*2];
+ while(*_hts_addurl) {
+ char add_url[HTS_URLMAXSIZE*2];
+ add_adr[0]=add_fil[0]=add_url[0]='\0';
+ if (!link_has_authority(*_hts_addurl))
+ strcpy(add_url,"http://"); // ajouter http://
+ strcat(add_url,*_hts_addurl);
+ if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
+ // ----Ajout----
+ // noter NOUVEAU lien
+ char add_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) {
+ if (hash_read(&hash,add_sav,"",0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO)
+ liens_record(add_adr,add_fil,add_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=0; // mode test?
+ liens[lien_tot]->link_import=0; // mode normal
+ liens[lien_tot]->depth=opt.depth;
+ liens[lien_tot]->pass2=max(0,numero_passe);
+ liens[lien_tot]->retry=opt.retry;
+ liens[lien_tot]->premier=lien_tot;
+ liens[lien_tot]->precedent=lien_tot;
+ lien_tot++;
+ //
+ if ((opt.debug>0) && (opt.log!=NULL)) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
+ }
+ //
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
+ test_flush;
+ }
+ }
+
+ }
+ } else {
+ if (opt.errlog) {
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"Error during URL decoding for %s"LF,add_url);
+ test_flush;
+ }
+ }
+ // ----Fin Ajout----
+ _hts_addurl++; // suivante
+ }
+ _hts_addurl=NULL; // libérer _hts_addurl
+ }
+ // si une pause a été demandée
+ if (_hts_setpause) {
+ // index du lien actuel
+ int b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0) b=0; // forcer pour les stats
+ while(_hts_setpause) { // on fait la pause..
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ if (back_nsoc(back,back_max)==0)
+ Sleep(250); // tite pause
+ }
+ }
+#endif
+
+ // si le fichier n'est pas en backing, le mettre..
+ if (!back_exist(back,back_max,urladr,urlfil,savename)) {
+#if BDEBUG==1
+ printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
+#endif
+ if (back_add(back,back_max,&opt,&cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
+ printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
+#if BDEBUG==1
+ printf("error while crash adding\n");
+#endif
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+
+ }
+ }
+
+#if BDEBUG==1
+ printf("test number of socks\n");
+#endif
+
+ // ajouter autant de socket qu'on peut ajouter
+ n=opt.maxsoc-back_nsoc(back,back_max);
+#if BDEBUG==1
+ printf("%d sockets available for backing\n",n);
+#endif
+
+#if HTS_ANALYSTE
+ if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter
+#else
+ if (n>0) { // si sockets libre
+#endif
+ // remplir autant que l'on peut le cache (backing)
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+ }
+
+ // index du lien actuel
+/*
+ b=back_index(back,back_max,urladr,urlfil,savename);
+
+ if (b>=0)
+*/
+ {
+ // ------------------------------------------------------------
+ // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE
+ do {
+
+ // index du lien actuel
+ b=back_index(back,back_max,urladr,urlfil,savename);
+#if BDEBUG==1
+ printf("back index %d, waiting\n",b);
+#endif
+ // Continue to the loop if link still present
+ if (b<0)
+ continue;
+
+ // Receive data
+ if (back[b].status>0)
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // And fill the backing stack
+ if (back[b].status>0)
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
+ if ((opt.makestat) || (opt.maketrack)) {
+ TStamp l=time_local();
+ if ((int) (l-makestat_time) >= 60) {
+ if (makestat_fp != NULL) {
+ fspc(makestat_fp,"info");
+ fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-makestat_total)/(l-makestat_time)), HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-makestat_lnk,(int) lien_tot);
+ fflush(makestat_fp);
+ makestat_total=HTS_STAT.HTS_TOTAL_RECV;
+ makestat_lnk=lien_tot;
+ }
+ if (maketrack_fp!=NULL) {
+ int i;
+ fspc(maketrack_fp,"info"); fprintf(maketrack_fp,LF);
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,3,maketrack_fp);
+ }
+ fprintf(maketrack_fp,LF);
+
+ }
+ makestat_time=l;
+ }
+ }
+#if HTS_ANALYSTE
+ {
+ int i;
+ {
+ char* s=hts_cancel_file("");
+ if (strnotempty(s)) { // fichier à canceller
+ for(i=0;i<back_max;i++) {
+ if ((back[i].status>0)) {
+ if (strcmp(back[i].url_sav,s)==0) { // ok trouvé
+ if (back[i].status != 1000) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("user cancel: deletehttp\n");
+#endif
+ if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-1;
+ strcpy(back[i].r.msg,"Cancelled by User");
+ back[i].status=0; // terminé
+ } else // cancel ftp.. flag à 1
+ back[i].stop_ftp = 1;
+ }
+ }
+ }
+ s[0]='\0';
+ }
+ }
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+
+#endif
+#if HTS_POLL
+ if ((opt.shell) || (opt.keyboard) || (opt.verbosedisplay) || (!opt.quiet)) {
+ TStamp tl;
+ info_shell=1;
+
+ /* Toggle with ENTER */
+ if (!opt.quiet) {
+ if (check_stdin()) {
+ char com[256];
+ linput(stdin,com,200);
+ if (opt.verbosedisplay==2)
+ opt.verbosedisplay=1;
+ else
+ opt.verbosedisplay=2;
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: change-options"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_chopt(&opt);
+#endif
+ }
+ }
+
+ /*
+ ..useless..
+ while (check_stdin()) { // données disponibles
+ char com[256];
+ com[0]='\0';
+
+ if (!rcvd) rcvd=1;
+ linput(stdin,com,256);
+
+ if (strnotempty(com)) {
+ if (strlen(com)<=2) {
+ switch(*com) {
+ case '?': { // Status?
+ if (back[b].status>0) printf("WAIT\n");
+ else printf("READY\n");
+ }
+ break;
+ case 'f': { // Fichier en attente?
+ if (back[b].status>0) printf("WAIT %s\n",back[b].url_fil);
+ else printf("READY %s\n",back[b].url_fil);
+ }
+ break;
+ case 'A': case 'F': { // filters
+ int i;
+ for(i=0;i<filptr;i++) {
+ printf("%s ",filters[i]);
+ }
+ printf("\n");
+ }
+ break;
+ case '#': { // Afficher statistique sur le nombre de liens, etc
+ switch(*(com+1)) {
+ case 'l': printf("%d\n",lien_tot); break; // nombre de liens enregistrés
+ case 's': printf("%d\n",back_nsoc(back,back_max)); break; // nombre de sockets
+ case 'r': printf("%d\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); break; // taux de transfert
+ }
+ }
+ break;
+ case 'K': if (*(com+1)=='!') { // Kill
+ XH_uninit;
+ return -1;
+ }
+ break;
+ case 'X': if (*(com+1)=='!') { // exit
+ exit_xh=1;
+ }
+ break;
+ case 'I': if (*(com+1)=='+') info_shell=1; else info_shell=0;
+ break;
+ }
+ io_flush;
+ } else if (*com=='@') {
+ printf("%s\n",com+1);
+ io_flush;
+ }
+ }
+
+ } // while
+ */
+ tl=time_local();
+
+ // générer un message d'infos sur l'état actuel
+ if (opt.shell) { // si shell
+ if ((tl-last_info_shell)>0) { // toute les 1 sec
+ FILE* fp=stdout;
+ int a=0;
+ last_info_shell=tl;
+ if (fexist(fconcat(opt.path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant
+ // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
+ // (libérons les robots esclaves de l'internet!)
+ remove(fconcat(opt.path_log,"hts-autopsy"));
+ fp=fopen(fconcat(opt.path_log,"hts-isalive"),"wb");
+ a=1;
+ }
+ if ((info_shell) || a) {
+ int i,j;
+
+ fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
+ fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
+ fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
+ fprintf(fp,"LINK %d"LF,lien_tot);
+ {
+ LLint mem=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].r.adr!=NULL)
+ mem+=back[i].r.size;
+ fprintf(fp,"INMEM "LLintP""LF,mem);
+ }
+ for(j=0;j<2;j++) { // passes pour ready et wait
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // **
+ }
+ }
+ fprintf(fp,LF);
+ if (a)
+ fclose(fp);
+ io_flush;
+ }
+ }
+ } // si shell
+
+ } // si shell ou keyboard (option)
+ //
+#endif
+ } while((b>=0) && (back[max(b,0)].status>0));
+
+
+ // If link not found on the stack, it's because it has already been downloaded
+ // in background
+ // Then, skip it and go to the next one
+ if (b<0) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
+ test_flush;
+ }
+
+ // prochain lien
+ // ptr++;
+
+ // Jump to 'continue'
+ // This is one of the very very rare cases where goto
+ // is acceptable
+ // A supplemental flag and if( ) { } would be really messy
+ goto jump_if_done;
+ }
+
+
+#if HTS_ANALYSTE==2
+#else
+ //if (!opt.quiet) { // petite animation
+ if (!opt.verbosedisplay) {
+ if (!opt.quiet) {
+ static int roll=0; /* static: ok */
+ roll=(roll+1)%4;
+ printf("%c\x0d",("/-\\|")[roll]);
+ fflush(stdout);
+ }
+ } else if (opt.verbosedisplay==1) {
+ if (back[b].r.statuscode==200)
+ printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size);
+ else
+ printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size,back[b].r.statuscode);
+ fflush(stdout);
+ }
+ //}
+#endif
+ // ------------------------------------------------------------
+ // Vérificateur d'intégrité
+#if DEBUG_CHECKINT
+ _CHECKINT(&back[b],"Retour de back_wait, après le while")
+ {
+ int i;
+ for(i=0;i<back_max;i++) {
+ char si[256];
+ sprintf(si,"Test global après back_wait, index %d",i);
+ _CHECKINT(&back[i],si)
+ }
+ }
+#endif
+
+ // copier structure réponse htsblk
+ memcpy(&r, &(back[b].r), sizeof(htsblk));
+ r.location=loc; // ne PAS copier location!! adresse, pas de buffer
+ if (back[b].r.location)
+ strcpy(r.location,back[b].r.location);
+ back[b].r.adr=NULL; // ne pas faire de desalloc ensuite
+
+ // libérer emplacement backing
+ back_delete(back,b);
+
+ // progression
+ if (opt.aff_progress) {
+ TStamp tl=time_local();
+ if ((tl-HTS_STAT.stat_timestart)>0) {
+ char s[32];
+ int i=0;
+ lastime=tl;
+ _CLRSCR; _GOTOXY("1","1");
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,99)) { // **
+ if (back[i].status>=0) { // loading..
+ s[0]='\0';
+ if (strlen(back[i].url_fil)>16)
+ strcat(s,back[i].url_fil+strlen(back[i].url_fil)-16);
+ else
+ strncat(s,back[i].url_fil,16);
+ printf("%s : ",s);
+
+ printf("[");
+ if (back[i].r.totalsize>0) {
+ int p;
+ int j;
+ p=(int)((back[i].r.size*10)/back[i].r.totalsize);
+ p=minimum(10,p);
+ for(j=0;j<p;j++) printf("*");
+ for(j=0;j<(10-p);j++) printf("-");
+ } else {
+ printf(LLintP,back[i].r.size);
+ }
+ printf("]");
+
+ //} else if (back[i].status==0) {
+ // strcpy(s,"ENDED");
+ }
+ printf("\n");
+ i++;
+ }
+ io_flush;
+ }
+ }
+
+ // débug graphique
+#if BDEBUG==2
+ {
+ char s[12];
+ int i=0;
+ _GOTOXY(1,1);
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,160)) {
+ if (back[i].status>0) {
+ sprintf(s,"%d",back[i].r.size);
+ } else if (back[i].status==0) {
+ strcpy(s,"ENDED");
+ } else
+ strcpy(s," - ");
+ while(strlen(s)<8) strcat(s," ");
+ printf("%s",s); io_flush;
+ i++;
+ }
+ }
+#endif
+
+
+#if BDEBUG==1
+ printf("statuscode=%d with %s / msg=%s\n",r.statuscode,r.contenttype,r.msg);
+#endif
+
+ }
+ /*else {
+#if BDEBUG==1
+ printf("back index error\n");
+#endif
+ }
+ */
+
+ }
+ // FIN --RECUPERATION LIEN---
+ // ------------------------------------------------------------
+
+
+
+ } else { // lien vide..
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush;
+ error=1;
+ }
+ } // test si url existe (non vide!)
+
+
+
+ // ---tester taille a posteriori---
+ // tester r.adr
+ if (!error) {
+ // erreur, pas de fichier chargé:
+ if ((!r.adr) && (r.is_write==0)
+ && (r.statuscode!=301)
+ && (r.statuscode!=302)
+ && (r.statuscode!=303)
+ && (r.statuscode!=307)
+ && (r.statuscode!=412)
+ && (r.statuscode!=416)
+ ) {
+ // error=1;
+
+ // peut être que le fichier était trop gros?
+ if ((istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype))
+ || (istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype))) {
+ error=0;
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Big file cancelled according to user's preferences: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ // // // error=1; // ne pas traiter la suite -- euhh si finalement..
+ }
+ }
+ // ---fin tester taille a posteriori---
+
+
+ // --------------------
+ // BOGUS MIME TYPE HACK
+ // Check if we have a bogus MIME type
+ // example:
+ // Content-type="text/html"
+ // and
+ // Content-disposition="foo.jpg"
+ // --------------------
+ if (!error) {
+ if (r.statuscode == 200) { // OK (ou 304 en backing)
+ if (r.adr) { // Written file
+ if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */
+ ) {
+ if (strnotempty(r.cdispo)) { // Content-disposition set!
+ if (ishtml(savename) == 0) { // Non HTML!!
+ // patch it!
+ strcpy(r.contenttype,"application/octet-stream");
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // ------------------------------------
+ // BOGUS MIME TYPE HACK II (the revenge)
+ // Check if we have a bogus MIME type
+ if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */
+ ) {
+ if ((r.adr) && (r.size)) {
+ unsigned int map[256];
+ int i;
+ unsigned int nspec = 0;
+ map_characters((unsigned char*)r.adr, (unsigned int)r.size, (unsigned int*)map);
+ for(i = 1 ; i < 32 ; i++) { // null chars ignored..
+ if (!is_realspace(i)
+ && i != 27 /* Damn you ISO2022-xx! */
+ ) {
+ nspec += map[i];
+ }
+ }
+ if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters
+ strcpy(r.contenttype,"application/octet-stream");
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ // --------------------
+ // REAL MEDIA HACK
+ // Check if we have to load locally the file
+ // --------------------
+ if (!error) {
+ if (r.statuscode == 200) { // OK (ou 304 en backing)
+ if (r.adr==NULL) { // Written file
+ if (may_be_hypertext_mime(r.contenttype)) { // to parse!
+ LLint sz;
+ sz=fsize(savename);
+ if (sz>0) { // ok, exists!
+ if (sz < 1024) { // ok, small file --> to parse!
+ FILE* fp=fopen(savename,"rb");
+ if (fp) {
+ r.adr=malloct((int)sz + 2);
+ if (r.adr) {
+ fread(r.adr,(int)sz,1,fp);
+ r.size=sz;
+ fclose(fp);
+ fp=NULL;
+ // remove (temporary) file!
+ remove(savename);
+ }
+ if (fp)
+ fclose(fp);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // EN OF REAL MEDIA HACK
+
+
+ // ---stockage en cache---
+ // stocker dans le cache?
+ /*
+ if (!error) {
+ if (ptr>0) {
+ if (liens[ptr]) {
+ cache_mayadd(&opt,&cache,&r,urladr,urlfil,savename);
+ } else
+ error=1;
+ }
+ }
+ */
+ // ---fin stockage en cache---
+
+
+
+ // DEBUT rattrapage des 301,302,307..
+ // ------------------------------------------------------------
+ if (!error) {
+ ////////{
+ // on a chargé un fichier en plus
+ // if (!error) stat_loaded+=r.size;
+
+ // ------------------------------------------------------------
+ // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing
+ // ------------------------------------------------------------
+ if ( (r.statuscode==301)
+ || (r.statuscode==302)
+ || (r.statuscode==303)
+ || (r.statuscode==307)
+ ) {
+ //if (r.adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi]
+ //int i=0;
+ char *rn=NULL;
+ // char* p;
+
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ //if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"%s for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+
+
+ {
+ char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ int get_it=0; // ne pas prendre le fichier à la même adresse par défaut
+ int reponse=0;
+ mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
+ //
+
+ strcpy(mov_url,r.location);
+
+ // url qque -> adresse+fichier
+ if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {
+ int set_prio_to=0; // pas de priotité fixéd par wizard
+
+ //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
+ // c'est (en gros) la même URL..
+ // si c'est un problème de casse dans le host c'est que le serveur est buggé
+ // ("RFC says.." : host name IS case insensitive)
+ if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ // on tourne en rond
+ if (strcmp(mov_fil,urlfil)==0) {
+ error=1;
+ get_it=-1; // ne rien faire
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Can not bear crazy server (%s) for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+ } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
+ get_it=1;
+ }
+ } else { // adresse différente
+ if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible)
+ // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ // accepté?
+ if (hts_acceptlink(&opt,ptr,lien_tot,liens,
+ mov_adr,mov_fil,
+ &filters,&filptr,opt.maxfilter,
+ &robots,
+ &set_prio_to,
+ NULL) != 1) { /* nouvelle adresse non refusée ? */
+ get_it=1;
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ }
+ } /* sinon traité normalement */
+ }
+
+ //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ if (get_it==1) {
+ // court-circuiter le reste du traitement
+ // et reculer pour mieux sauter
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
+ test_flush;
+ }
+ // canceller lien actuel
+ error=1;
+ strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+#if HTS_HASH
+#else
+ liens[ptr]->sav_len=-1; // taille invalide
+#endif
+ // noter NOUVEAU lien
+ {
+ char mov_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) {
+ if (hash_read(&hash,mov_sav,"",0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO) avec SAV IDENTIQUE
+ liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
+ //liens_record(mov_adr,mov_fil,mov_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ // mode test?
+ liens[lien_tot]->testmode=liens[ptr]->testmode;
+ liens[lien_tot]->link_import=0; // mode normal
+ if (!set_prio_to)
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ else
+ liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page)
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
+ test_flush;
+ }
+ }
+
+ }
+ }
+
+ //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
+
+ // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
+ // sous DOS ca marche pas très bien... mais comme je suis génial url_savename()
+ // est à même de régler ce problème
+ }
+ } // ident_url_xx
+
+ if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie)
+ rn=(char*) calloct(8192,1);
+ if (rn!=NULL) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
+ test_flush;
+ }
+ escape_uri(mov_url);
+ // On prépare une page qui sautera immédiatement sur la bonne URL
+ // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
+ strcpy(rn,"<HTML>"CRLF);
+ strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcat(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
+ strcat(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
+ strcat(rn,mov_url); // URL
+ strcat(rn,"\">"CRLF);
+ strcat(rn,"<A HREF=\"");
+ strcat(rn,mov_url);
+ strcat(rn,"\">");
+ strcat(rn,"<B>Click here...</B></A>"CRLF);
+ strcat(rn,"</BODY>"CRLF);
+ strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcat(rn,"</HTML>"CRLF);
+
+ // changer la page
+ if (r.adr) { freet(r.adr); r.adr=NULL; }
+ r.adr=rn;
+ r.size=strlen(r.adr);
+ strcpy(r.contenttype,"text/html");
+ }
+ } // get_it==0
+
+ } // bloc
+ // erreur HTTP (ex: 404, not found)
+ } else if (
+ (r.statuscode==412)
+ || (r.statuscode==416)
+ ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
+ if (fexist(liens[ptr]->sav)) {
+ remove(liens[ptr]->sav); // Eliminer
+ if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
+#if HDEBUG
+ printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
+#endif
+ if ( (opt.debug>1) && (opt.errlog!=NULL) ) {
+ //if (opt.errlog) {
+ fspc(opt.errlog,"debug"); fprintf(opt.errlog,"Partial file reget (%s) for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+ // enregistrer le MEME lien (MACRO)
+ liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=ptr;
+ lien_tot++;
+ //
+ // canceller lien actuel
+ error=1;
+ strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+#if HTS_HASH
+#else
+ liens[ptr]->sav_len=-1; // taille invalide
+#endif
+ //
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Can not remove old file %s"LF,urlfil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r.msg,urladr,urlfil);
+ test_flush;
+ }
+ }
+ } else if (r.statuscode!=200) {
+ int can_retry=0;
+
+ // cas où l'on peut reessayer
+ // -2=timeout -3=rateout (interne à httrack)
+ switch(r.statuscode) {
+ //case -1: can_retry=1; break;
+ case -2: if (opt.hostcontrol) { // timeout et retry épuisés
+ if ((opt.hostcontrol & 1) && (liens[ptr]->retry<=0)) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr));
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ } else can_retry=1;
+ } else can_retry=1;
+ break;
+ case -3: if ((opt.hostcontrol) && (liens[ptr]->retry<=0)) { // too slow
+ if (opt.hostcontrol & 2) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr));
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ } else can_retry=1;
+ } else can_retry=1;
+ break;
+ case -4: // connect closed
+ can_retry=1;
+ break;
+ case -5: // other (non fatal) error
+ can_retry=1;
+ break;
+ case -6: // bad SSL handskake
+ can_retry=1;
+ break;
+ case 408: case 409: case 500: case 502: case 504: can_retry=1;
+ break;
+ }
+
+ if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0)
+ if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible)
+ if (opt.errlog) {
+ if ((opt.retry>0) && (can_retry)){
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r.msg,r.statuscode,opt.retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ } else {
+ if (r.statuscode==-10) { // test OK
+ if ((opt.debug>0) && (opt.errlog!=NULL)) {
+ fspc(opt.errlog,"info");
+ fprintf(opt.errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ }
+ } else {
+ if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r.msg,r.statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ } else {
+ if (opt.debug>1) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"No robots.txt rules at %s"LF,urladr);
+ test_flush;
+ }
+ }
+ }
+ }
+ test_flush;
+ }
+
+ // NO error in trop level
+ // due to the "no connection -> previous restored" hack
+ // This prevent the engine from wiping all data if the website has been deleted (or moved)
+ // since last time (which is quite annoying)
+ if (liens[ptr]->precedent != 0) {
+ // ici on teste si on doit enregistrer la page tout de même
+ if (opt.errpage) {
+ store_errpage=1;
+ }
+ } else {
+ if (strcmp(urlfil,"/robots.txt") != 0) {
+ /*
+ This is an error caused by a link entered by the user
+ That is, link(s) entered by user are invalid (404, 500, connect error, proxy error..)
+ If all links entered are invalid, the session failed and we will attempt to restore
+ the previous one
+ Example: Try to update a website which has been deleted remotely: this may delete
+ the website locally, which is really not desired (especially if the website disappeared!)
+ With this hack, the engine won't wipe local files (how clever)
+ */
+ HTS_STAT.stat_errors_front++;
+ }
+ }
+
+ } else { // retry!!
+ if (opt.debug>0 && opt.errlog != NULL) { // on fera un alert si le retry échoue
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r.statuscode,r.msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ test_flush;
+ }
+ // redemander fichier
+ liens_record(urladr,urlfil,savename,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry-1; // moins 1 retry!
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fspc(opt.errlog,"panic");
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ }
+ } else {
+ if (opt.errlog) {
+ if (opt.debug>1) {
+ fspc(opt.errlog,"info");
+ fprintf(opt.errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
+ }
+ }
+ }
+ if (!store_errpage) {
+ if (r.adr) { freet(r.adr); r.adr=NULL; } // désalloc
+ error=1; // erreur!
+ }
+ }
+ // FIN rattrapage des 301,302,307..
+ // ------------------------------------------------------------
+
+
+
+ } // if !error
+ } // if !error
+
+ if (!error) {
+#if DEBUG_SHOWTYPES
+ if (strstr(REG,r.contenttype)==NULL) {
+ strcat(REG,r.contenttype);
+ strcat(REG,"\n");
+ printf("%s\n",r.contenttype);
+ io_flush;
+ }
+#endif
+
+
+ // ------------------------------------------------------
+ // ok, fichier chargé localement
+ // ------------------------------------------------------
+
+ // Vérificateur d'intégrité
+ #if DEBUG_CHECKINT
+ {
+ int i;
+ for(i=0;i<back_max;i++) {
+ char si[256];
+ sprintf(si,"Test global après back_wait, index %d",i);
+ _CHECKINT(&back[i],si)
+ }
+ }
+ #endif
+
+
+ /* info: updated */
+ /*
+ if (ptr>0) {
+ // "mis à jour"
+ if ((!r.notmodified) && (opt.is_update) && (!store_errpage)) { // page modifiée
+ if (strnotempty(savename)) {
+ HTS_STAT.stat_updated_files++;
+ if (opt.log!=NULL) {
+ //if ((opt.debug>0) && (opt.log!=NULL)) {
+ fspc(opt.log,"info"); fprintf(opt.log,"File updated: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ } else {
+ if (!store_errpage) {
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"File recorded: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+ */
+
+ // ------------------------------------------------------
+ // traitement (parsing)
+ // ------------------------------------------------------
+
+ // traiter
+ if (
+ ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype) && (r.adr) ) /* Is real media, .. */
+ )
+ && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */
+ && (r.adr!=NULL) /* HTML Data exists */
+ && (r.size>0) /* And not empty */
+ && (!store_errpage) /* Not an html error page */
+ && (savename[0]!='\0') /* Output filename exists */
+ ) { // ne traiter que le html si autorisé
+ // -- -- -- --
+ // Parsing HTML
+ if (!error) {
+ /* Info for wrappers */
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil);
+ }
+ {
+ // I'll have to segment this part
+#include "htsparse.c"
+ }
+ }
+ // Fin parsing HTML
+ // -- -- -- --
+
+
+ } // si text/html
+ // -- -- --
+ else { // sauver fichier quelconque
+ // -- -- --
+ // sauver fichier
+
+
+ /* En cas d'erreur, vérifier que fichier d'erreur existe */
+ if (strnotempty(savename) == 0) { // chemin de sauvegarde existant
+ if (strcmp(urlfil,"/robots.txt")==0) { // pas robots.txt
+ if (store_errpage) { // c'est une page d'erreur
+ int create_html_warning=0;
+ int create_gif_warning=0;
+ switch (ishtml(urlfil)) { /* pas fichier html */
+ case 0: /* non html */
+ {
+ char buff[256];
+ guess_httptype(buff,urlfil);
+ if (strcmp(buff,"image/gif")==0)
+ create_gif_warning=1;
+ }
+ break;
+ case 1: /* html */
+ if (!r.adr) {
+ }
+ break;
+ default: /* don't know.. */
+ break;
+ }
+ /* Créer message d'erreur ? */
+ if (create_html_warning) {
+ char* adr=(char*)malloct(strlen(HTS_DATA_ERROR_HTML)+1100);
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Creating HTML warning file (%s)"LF,r.msg);
+ test_flush;
+ }
+ if (adr) {
+ if (r.adr) {
+ freet(r.adr);
+ r.adr=NULL;
+ }
+ sprintf(adr,HTS_DATA_ERROR_HTML,r.msg);
+ r.adr=adr;
+ }
+ } else if (create_gif_warning) {
+ char* adr=(char*)malloct(HTS_DATA_UNKNOWN_GIF_LEN);
+ if ( (opt.debug>0) && (opt.log!=NULL) ) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Creating GIF dummy file (%s)"LF,r.msg);
+ test_flush;
+ }
+ if (r.adr) {
+ freet(r.adr);
+ r.adr=NULL;
+ }
+ memcpy(adr, HTS_DATA_UNKNOWN_GIF, HTS_DATA_UNKNOWN_GIF_LEN);
+ r.adr=adr;
+ }
+ }
+ }
+ }
+
+ if (strnotempty(savename) == 0) { // pas de chemin de sauvegarde
+ if (strcmp(urlfil,"/robots.txt")==0) { // robots.txt
+ if (r.adr) {
+ int bptr=0;
+ char line[1024];
+ char buff[8192];
+ char infobuff[8192];
+ int record=0;
+ line[0]='\0'; buff[0]='\0'; infobuff[0]='\0';
+ //
+#if DEBUG_ROBOTS
+ printf("robots.txt dump:\n%s\n",r.adr);
+#endif
+ do {
+ bptr+=binput(r.adr+bptr, line, sizeof(line) - 2);
+ if (strfield(line,"user-agent:")) {
+ char* a;
+ a=line+11;
+ while(*a==' ') a++; // sauter espace(s)
+ if (*a == '*') {
+ if (record != 2)
+ record=1; // c pour nous
+ } else if (strfield(a,"httrack")) {
+ buff[0]='\0'; // re-enregistrer
+ infobuff[0]='\0';
+ record=2; // locked
+#if DEBUG_ROBOTS
+ printf("explicit disallow for httrack\n");
+#endif
+ }
+ else record=0;
+ } else if (record) {
+ if (strfield(line,"disallow:")) {
+ char* a;
+ a=strchr(line,'#');
+ if (a) *a='\0';
+ while((line[strlen(line)-1]==' ')
+ || (line[strlen(line)-1]==10)
+ || (line[strlen(line)-1]==13))
+ line[strlen(line)-1]='\0'; // supprimer espaces
+ a=line+9;
+ while((*a==' ') || (*a==10) || (*a==13))
+ a++; // sauter espace(s)
+ if (strnotempty(a)) {
+ if (strcmp(a,"/") != 0) { /* ignoring disallow: / */
+ if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) {
+ strcat(buff,a);
+ strcat(buff,"\n");
+ if (strnotempty(infobuff)) strcat(infobuff,", ");
+ strcat(infobuff,a);
+ }
+ } else {
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: %s robots.txt rules are too restrictive, ignoring /"LF,urladr);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ } while( (bptr<r.size) && (strlen(buff) < (sizeof(buff) - 32) ) );
+ if (strnotempty(buff)) {
+ checkrobots_set(&robots,urladr,buff);
+ if (opt.log!=NULL) {
+ if (opt.log != opt.errlog) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Note: robots.txt forbidden links for %s are: %s"LF,urladr,infobuff);
+ test_flush;
+ }
+ }
+ if (opt.errlog!=NULL) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: due to %s remote robots.txt rules, links begining with these path will be forbidden: %s (see in the options to disable this)"LF,urladr,infobuff);
+ test_flush;
+ }
+ }
+ }
+ }
+ } else if (r.is_write) { // déja sauvé sur disque
+ /*
+ if (!ishttperror(r.statuscode))
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r.size;
+ */
+ //printf("ok......\n");
+ } else {
+ // Si on doit sauver une page HTML sans la scanner, cela signifie que le niveau de
+ // récursion nous en empêche
+ // Dans ce cas on met un fichier indiquant ce fait
+ // Si par la suite on doit retraiter ce fichier avec un niveau de récursion plus
+ // fort, on supprimera le readme, et on scannera le fichier html!
+ // note: sauté si store_errpage (càd si page d'erreur, non à scanner!)
+ if ( (is_hypertext_mime(r.contenttype)) && (!store_errpage) && (r.size>0)) { // c'est du html!!
+ char tempo[HTS_URLMAXSIZE*2];
+ FILE* fp;
+ tempo[0]='\0';
+ strcpy(tempo,savename);
+ strcat(tempo,".readme");
+
+#if HTS_DOSNAME
+ // remplacer / par des slash arrière
+ {
+ int i=0;
+ while(tempo[i]) {
+ if (tempo[i]=='/')
+ tempo[i]='\\';
+ i++;
+ }
+ }
+ // a partir d'ici le slash devient antislash
+#endif
+
+ if ((fp=fopen(tempo,"wb"))!=NULL) {
+ fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION""CRLF""CRLF);
+ fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename);
+ fprintf(fp,"Some links contained in it may be unreachable locally."CRLF);
+ fprintf(fp,"If you want to get these files, you have to set an upper recurse level, ");
+ fprintf(fp,"and to rescan the URL."CRLF);
+ fclose(fp);
+#if HTS_WIN==0
+ chmod(tempo,HTS_ACCESS_FILE);
+#endif
+ usercommand(0,NULL,antislash(tempo));
+ }
+
+
+ if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning: store %s without scan: %s"LF,r.contenttype,savename);
+ test_flush;
+ }
+ } else {
+ if ((opt.getmode & 2)!=0) { // ok autorisé
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"Store %s: %s"LF,r.contenttype,savename);
+ test_flush;
+ }
+ } else { // lien non autorisé! (ex: cgi-bin en html)
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil);
+ test_flush;
+ }
+ freet(r.adr); r.adr=NULL;
+ }
+ }
+
+ //printf("extern=%s\n",r.contenttype);
+
+ // ATTENTION C'EST ICI QU'ON SAUVE LE FICHIER!!
+ if (r.adr) {
+ if (filesave(r.adr,(int)r.size,savename)!=0) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s"LF,savename);
+ test_flush;
+ }
+ } else {
+ /*
+ if (!ishttperror(r.statuscode))
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r.size;
+ */
+ }
+ }
+
+ }
+
+
+ /* Parsing of other media types (java, ram..) */
+ /*
+ if (strfield2(r.contenttype,"audio/x-pn-realaudio")) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): parsing %s"LF,savename); test_flush;
+ }
+ if (fexist(savename)) { // ok, existe bien!
+ FILE* fp=fopen(savename,"r+b");
+ if (fp) {
+ if (!fseek(fp,0,SEEK_SET)) {
+ char line[HTS_URLMAXSIZE*2];
+ linput(fp,line,HTS_URLMAXSIZE);
+ if (strnotempty(line)) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): detected %s"LF,line); test_flush;
+ }
+ }
+ }
+ fclose(fp);
+ }
+ }
+ } else */
+ if (opt.parsejava) {
+ if (strlen(savename)>6) { // fichier.class
+ if (strfield(savename+strlen(savename)-6,".class")) { // ok c'est une classe
+ if (fexist(savename)) { // ok, existe bien!
+ char err_msg[1100];
+ int r;
+ err_msg[0]='\0';
+
+ //##char* buffer;
+ // JavaParsing f34R!
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing %s"LF,savename); test_flush;
+ }
+
+ //##buffer=(char*) malloct(32768);
+ //##if (buffer) {
+ //
+ //##strcpy(buffer,"$BUFFER$");
+ //##hts_add_file(buffer); // déclarer buffer
+ while(hts_add_file(NULL,-1) >= 0); // clear chain
+
+ r=hts_parse_java(savename,(char*) &err_msg); // parsing
+ if (!r) { // error
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to parse java file %s : %s"LF,savename,err_msg);
+ test_flush;
+ }
+ } else { // ok
+ char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2],save[HTS_URLMAXSIZE*2]; // nom du fichier à sauver dans la boucle
+ char codebase[HTS_URLMAXSIZE*2]; // codebase classe java
+ char lien[HTS_URLMAXSIZE*2];
+ //##char* a;
+ int file_position;
+ int pass_fix,prio_fix;
+ codebase[0]='\0';
+ //
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing finished, now copying links.."LF); test_flush;
+ }
+ // recopie de "creer le lien"
+ //
+
+ // adr = c'est la même
+ // fil et save: save2 et fil2
+ prio_fix=maximum(liens[ptr]->depth-1,0);
+ pass_fix=max(liens[ptr]->pass2,numero_passe);
+ if (liens[ptr]->cod) strcpy(codebase,liens[ptr]->cod); // codebase valable pour tt les classes descendantes
+ if (strnotempty(codebase)==0) { // pas de codebase, construire
+ char* a;
+ strcpy(codebase,liens[ptr]->fil);
+ a=codebase+strlen(codebase)-1;
+ while((*a) && (*a!='/') && ( a > codebase)) a--;
+ if (*a=='/')
+ *(a+1)='\0'; // couper
+ } else { // couper http:// éventuel
+ if (strfield(codebase,"http://")) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=codebase+7;
+ a=strchr(a,'/'); // après host
+ if (a) { // ** msg erreur et vérifier?
+ strcpy(tempo,a);
+ strcpy(codebase,tempo); // couper host
+ } else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Unexpected strstr error in base %s"LF,codebase);
+ test_flush;
+ }
+ }
+ }
+ }
+ //##a=buffer;
+ //##strcat(buffer,"&"); // fin du buffer
+ if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Codebase too long, parsing skipped (%s)"LF,codebase);
+ test_flush;
+ }
+ //##a=NULL;
+ while(hts_add_file(NULL,-1) >= 0); // clear chain
+ }
+ while ( (file_position=hts_add_file(lien,-1)) >= 0 ) {
+ int dejafait=0;
+ /* //##
+ char* b;
+
+ // prochain fichier à noter!
+ lien[0]='\0';
+ b=strchr(a,'&'); // marqueur de fin de chaine (voir hts_add_file)
+ if (b) {
+ if ( ( ((int) b-(int) a) + strlen(codebase)) < HTS_URLMAXSIZE)
+ strncat(lien,a,(int) b-(int) a); // nom du fichier
+ else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Error: Java-Parser generated link that exceeds %d bytes"LF,HTS_URLMAXSIZE);
+ test_flush;
+ }
+ }
+ } else a=NULL;
+
+ if (strnotempty(lien)==0) a=NULL; // fin
+ if (a)
+ a=b+1;
+ */
+
+ if (strnotempty(lien)) {
+
+ // calculer les chemins et noms de sauvegarde
+ if (ident_url_relatif(lien,urladr,codebase,adr,fil)>=0) { // reformage selon chemin
+ int r;
+
+ // patcher opt pour garder structure originale!! (on ne patche pas les noms dans la classe java!)
+ //##if (!strstr(lien,"://")) { // PAS tester les http://.. inutile (on ne va pas patcher le binaire :-( )
+ if (1) {
+ char tempo[HTS_URLMAXSIZE*2];
+ int a,b;
+ tempo[0]='\0';
+ a=opt.savename_type;
+ b=opt.savename_83;
+ opt.savename_type=0;
+ opt.savename_83=0;
+ // note: adr,fil peuvent être patchés
+ r=url_savename(adr,fil,save,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe);
+ opt.savename_type=a;
+ opt.savename_83=b;
+ if (r != -1) {
+ if (savename) {
+ if (lienrelatif(tempo,save,savename)==0) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
+ test_flush;
+ }
+ //
+ // xxc xxc xxc xxc TODO java:
+ // rebuild the java class with patched strings...
+ //
+ if (strlen(tempo)<=strlen(lien)) {
+ FILE* fp=fopen(savename,"r+b");
+ if (fp) {
+ if (!fseek(fp,file_position,SEEK_SET)) {
+ //unsigned short int string_length=strlen(tempo);
+ //fwrite(&valint,sizeof(string_length),1,fp);
+ // xxc xxc ARGH! SI la taille est <, décaler le code ?!
+ } else {
+ if (opt.log!=NULL) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to patch: %s"LF,savename);
+ test_flush;
+ }
+ }
+ fclose(fp);
+ } else {
+ if (opt.log!=NULL) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to open: %s"LF,savename);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt.log!=NULL) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): link too long, unable to write it: %s"LF,tempo);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): file not caught: %s"LF,lien); test_flush;
+ }
+ r=-1;
+ }
+ //
+ if (r != -1) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush;
+ }
+
+ // modifié par rapport à l'autre version (cf prio_fix notamment et save2)
+
+ // vérifier que le lien n'a pas déja été noté
+ // si c'est le cas, alors il faut s'assurer que la priorité associée
+ // au fichier est la plus grande des deux priorités
+ //
+ // On part de la fin et on essaye de se presser (économise temps machine)
+#if HTS_HASH
+ {
+ int i=hash_read(&hash,save,"",0); // lecture type 0 (sav)
+ if (i>=0) {
+ liens[i]->depth=maximum(liens[i]->depth,prio_fix);
+ dejafait=1;
+ }
+ }
+#else
+ {
+ int l;
+ int i;
+ l=strlen(save);
+ for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
+ if (liens[i]->sav_len==l) { // même taille de chaîne
+ if (strcmp(liens[i]->sav,save)==0) { // existe déja
+ liens[i]->depth=maximum(liens[i]->depth,prio_fix);
+ dejafait=1;
+ }
+ }
+ }
+ }
+#endif
+
+
+ if (!dejafait) {
+ //
+ // >>>> CREER LE LIEN JAVA <<<<
+
+ // enregistrer fichier de java (MACRO)
+ liens_record(adr,fil,save,"","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ // if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_extuninit; // désallocation mémoire & buffers
+ return 0;
+ }
+
+ // mode test?
+ liens[lien_tot]->testmode=0; // pas mode test
+
+ liens[lien_tot]->link_import=0; // pas mode import
+
+ // écrire autres paramètres de la structure-lien
+ //if (meme_adresse)
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ //else // sinon l'objet père est le précédent lui même
+ // liens[lien_tot]->premier=ptr;
+
+ liens[lien_tot]->precedent=ptr;
+ // noter la priorité
+ liens[lien_tot]->depth=prio_fix;
+ liens[lien_tot]->pass2=max(pass_fix,numero_passe);
+ liens[lien_tot]->retry=opt.retry;
+
+ //strcpy(liens[lien_tot]->adr,adr);
+ //strcpy(liens[lien_tot]->fil,fil);
+ //strcpy(liens[lien_tot]->sav,save);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
+ test_flush;
+ }
+
+ lien_tot++; // UN LIEN DE PLUS
+ }
+ }
+ }
+
+ }
+ }
+
+ }
+ //##// effacer buffer temporaire
+ //##if (buffer) freet(buffer); buffer=NULL;
+ //##} // if buffer
+ } // if exist
+ } // if .class
+ } // if strlen-savename
+ } // if opt.parsejava
+
+
+
+ } // text/html ou autre
+
+ } // if !error
+
+
+jump_if_done:
+ // libérer les liens
+ if (r.adr) { freet(r.adr); r.adr=NULL; } // libérer la mémoire!
+
+ // prochain lien
+ ptr++;
+
+ // faut-il sauter le(s) lien(s) suivant(s)? (fichiers images à passer après les html)
+ if (opt.getmode & 4) { // sauver les non html après
+ // sauter les fichiers selon la passe
+ if (!numero_passe) {
+ while((ptr<lien_tot)?( liens[ptr]->pass2):0) ptr++;
+ } else {
+ while((ptr<lien_tot)?( ! liens[ptr]->pass2):0) ptr++;
+ }
+ if (ptr>=lien_tot) { // fin de boucle
+ if (!numero_passe) { // première boucle
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fprintf(opt.log,LF"Now getting non-html files..."LF);
+ test_flush;
+ }
+ numero_passe=1; // seconde boucle
+ ptr=0;
+ // prochain pass2
+ while((ptr<lien_tot)?(!liens[ptr]->pass2):0) ptr++;
+
+ //printf("first link==%d\n");
+
+ }
+ }
+ }
+
+ // a-t-on dépassé le quota?
+ if ((opt.maxsite>0) && (HTS_STAT.stat_bytes>=opt.maxsite)) {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,opt.maxsite);
+ test_flush;
+ }
+ ptr=lien_tot;
+ } else if ((opt.maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt.maxtime)) {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"More than %d seconds passed.. giving up"LF,opt.maxtime);
+ test_flush;
+ }
+ ptr=lien_tot;
+ } else if (exit_xh) { // sortir
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ ptr=lien_tot;
+ }
+ } while(ptr<lien_tot);
+ //
+ //
+ //
+
+ /*
+ Ensure the index is being closed
+ */
+ HT_INDEX_END;
+
+ /*
+ updating-a-remotely-deteted-website hack
+ no much data transfered, no data saved
+ <no files successfulyl saved>
+ we assume that something was bad (no connection)
+ just backup old cache and restore everything
+ */
+ if (
+ (HTS_STAT.stat_files <= 0)
+ &&
+ (HTS_STAT.HTS_TOTAL_RECV < 32768) /* should be fine */
+ ) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"No data seems to have been transfered during this session! : restoring previous one!"LF);
+ test_flush;
+ }
+ XH_uninit;
+ if ( (fexist(fconcat(opt.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt.path_log,"hts-cache/old.ndx"))) ) {
+ remove(fconcat(opt.path_log,"hts-cache/new.dat"));
+ remove(fconcat(opt.path_log,"hts-cache/new.ndx"));
+ remove(fconcat(opt.path_log,"hts-cache/new.lst"));
+ remove(fconcat(opt.path_log,"hts-cache/new.txt"));
+ rename(fconcat(opt.path_log,"hts-cache/old.dat"),fconcat(opt.path_log,"hts-cache/new.dat"));
+ rename(fconcat(opt.path_log,"hts-cache/old.ndx"),fconcat(opt.path_log,"hts-cache/new.ndx"));
+ rename(fconcat(opt.path_log,"hts-cache/old.lst"),fconcat(opt.path_log,"hts-cache/new.lst"));
+ rename(fconcat(opt.path_log,"hts-cache/old.txt"),fconcat(opt.path_log,"hts-cache/new.txt"));
+ }
+ exit_xh=2; /* interrupted (no connection detected) */
+ return 1;
+ }
+
+ // info text
+ if (cache.txt) {
+ fclose(cache.txt); cache.txt=NULL;
+ }
+
+ // purger!
+ if (cache.lst) {
+ fclose(cache.lst); cache.lst=NULL;
+ if (opt.delete_old) {
+ FILE *old_lst,*new_lst;
+ //
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=3;
+#endif
+ //
+ old_lst=fopen(fconcat(opt.path_log,"hts-cache/old.lst"),"rb");
+ if (old_lst) {
+ LLint sz=fsize(fconcat(opt.path_log,"hts-cache/new.lst"));
+ new_lst=fopen(fconcat(opt.path_log,"hts-cache/new.lst"),"rb");
+ if ((new_lst) && (sz>0)) {
+ char* adr=(char*) malloct((INTsys)sz);
+ if (adr) {
+ if ((int) fread(adr,1,(INTsys)sz,new_lst) == sz) {
+ char line[1100];
+ int purge=0;
+ while(!feof(old_lst)) {
+ linput(old_lst,line,1000);
+ if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau?
+ char file[HTS_URLMAXSIZE*2];
+ strcpy(file,opt.path_html);
+ strcat(file,line+1);
+ file[strlen(file)-1]='\0';
+ if (fexist(file)) { // toujours sur disque: virer
+ if (opt.log) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Purging %s"LF,file);
+ }
+ remove(file); purge=1;
+ }
+ }
+ }
+ {
+ fseek(old_lst,0,SEEK_SET);
+ while(!feof(old_lst)) {
+ linput(old_lst,line,1000);
+ while(strnotempty(line) && (line[strlen(line)-1]!='/') && (line[strlen(line)-1]!='\\')) {
+ line[strlen(line)-1]='\0';
+ }
+ if (strnotempty(line))
+ line[strlen(line)-1]='\0';
+ if (strnotempty(line))
+ if (!strstr(adr,line)) { // non trouvé?
+ char file[HTS_URLMAXSIZE*2];
+ strcpy(file,opt.path_html);
+ strcat(file,line+1);
+ while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait)
+ purge=1;
+ if (opt.log) {
+ fspc(opt.log,"info"); fprintf(opt.log,"Purging directory %s/"LF,file);
+ while(strnotempty(file) && (file[strlen(file)-1]!='/') && (file[strlen(file)-1]!='\\')) {
+ file[strlen(file)-1]='\0';
+ }
+ if (strnotempty(file))
+ file[strlen(file)-1]='\0';
+ }
+ }
+ }
+ }
+ }
+ //
+ if (!purge) {
+ if (opt.log) {
+ fprintf(opt.log,"No files purged"LF);
+ }
+ }
+ }
+ freet(adr);
+ }
+ fclose(new_lst);
+ }
+ fclose(old_lst);
+ }
+ //
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=0;
+#endif
+ }
+ }
+ // fin purge!
+
+ // Indexation
+ if (opt.kindex)
+ index_finish(opt.path_html,opt.kindex);
+
+ // afficher résumé dans log
+ if (opt.log!=NULL) {
+ int error = fspc(NULL,"error");
+ int warning = fspc(NULL,"warning");
+ int info = fspc(NULL,"info");
+ char htstime[256];
+ // int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart));
+ int n=(int) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart)));
+
+ sec2str(htstime,time_local()-HTS_STAT.stat_timestart);
+ //fprintf(opt.log,LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n);
+ fprintf(opt.log,LF"HTTrack mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]",htstime,(int)lien_tot-1,(int)HTS_STAT.stat_files,(int)HTS_STAT.stat_bytes,(int)HTS_STAT.HTS_TOTAL_RECV,(int)n);
+ if (HTS_STAT.total_packed) {
+ int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked);
+ fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,packed_ratio);
+ }
+ fprintf(opt.log,LF);
+ if (error)
+ fprintf(opt.log,"(%d errors, %d warnings, %d messages)"LF,error,warning,info);
+ else
+ fprintf(opt.log,"(No errors, %d warnings, %d messages)"LF,warning,info);
+ test_flush;
+ }
+#if DEBUG_HASH
+ // noter les collisions
+ {
+ int i;
+ int empty1=0,empty2=0,empty3=0;
+ for(i=0;i<HTS_HASH_SIZE;i++) {
+ if (hash.hash[0][i] == -1)
+ empty1++;
+ if (hash.hash[1][i] == -1)
+ empty2++;
+ if (hash.hash[2][i] == -1)
+ empty3++;
+ }
+ printf("\n");
+ printf("Debug info: Hash-table report\n");
+ printf("Number of files entered: %d\n",hashnumber);
+ printf("Table size: %d\n",HTS_HASH_SIZE);
+ printf("\n");
+ printf("Longest chain sav: %d, empty: %d\n",longest_hash[0],empty1);
+ printf("Longest chain adr,fil: %d, empty: %d\n",longest_hash[1],empty2);
+ printf("Longest chain former_adr/fil: %d, empty: %d\n",longest_hash[2],empty3);
+ printf("\n");
+ }
+#endif
+ // fin afficher résumé dans log
+
+ // désallocation mémoire & buffers
+
+ XH_uninit
+
+ return 1; // OK
+}
+// version 2 pour le reste
+// flusher si on doit lire peu à peu le fichier
+#undef test_flush
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+
+// Estimate transfer rate
+// a little bit complex, but not too much
+/*
+ .. : idle
+ ^ : event
+
+ ----|----|----|----|----|----|----|----|---->
+ 1 2 3 4 5 6 7 8 9 time (seconds)
+ ----|----|----|----|----|----|----|----|---->
+ ^........^.........^.........^.........^.... timer 0
+ ----^.........^.........^.........^......... timer 1
+ 0 1 0 1 0 1 0 timer N sets its statistics
+ * * * * timer 0 resync timer 1
+
+ Therefore, each seconds, we resync the transfer rate with 2-seconds
+
+*/
+int engine_stats(void) {
+#if 0
+ static FILE* debug_fp=NULL; /* ok */
+ if (!debug_fp)
+ debug_fp=fopen("esstat.txt","wb");
+#endif
+ HTS_STAT.stat_nsocket=HTS_STAT.stat_errors=HTS_STAT.nbk==0;
+ HTS_STAT.nb=0;
+ if (HTS_STAT.HTS_TOTAL_RECV>2048) {
+ TStamp cdif=mtime_local();
+ int i;
+
+ for(i=0;i<2;i++) {
+ if ( (cdif - HTS_STAT.istat_timestart[i]) >= 2000) {
+ TStamp dif;
+#if 0
+fprintf(debug_fp,"set timer %d\n",i); fflush(debug_fp);
+#endif
+ dif=cdif - HTS_STAT.istat_timestart[i];
+ if ((TStamp)(dif/1000)>0) {
+ LLint byt=(HTS_STAT.HTS_TOTAL_RECV - HTS_STAT.istat_bytes[i]);
+ HTS_STAT.rate=(LLint)((TStamp) ((TStamp)byt/(dif/1000)));
+ HTS_STAT.istat_idlasttimer=i; // this timer recently sets the stats
+ //
+ HTS_STAT.istat_bytes[i]=HTS_STAT.HTS_TOTAL_RECV;
+ HTS_STAT.istat_timestart[i]=cdif;
+ }
+ return 1; /* refreshed */
+ }
+ }
+
+ // resynchronization between timer 0 (master) and 1 (slave)
+ // timer #0 resync timer #1 when reaching 1 second limit
+ if (HTS_STAT.istat_reference01 != HTS_STAT.istat_timestart[0]) {
+ if ( (cdif - HTS_STAT.istat_timestart[0]) >= 1000) {
+#if 0
+fprintf(debug_fp,"resync timer 1\n"); fflush(debug_fp);
+#endif
+ HTS_STAT.istat_bytes[1]=HTS_STAT.HTS_TOTAL_RECV;
+ HTS_STAT.istat_timestart[1]=cdif;
+ HTS_STAT.istat_reference01=HTS_STAT.istat_timestart[0];
+ }
+ }
+
+ }
+ return 0;
+}
+
+
+// bannir host (trop lent etc)
+void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host) {
+ //int l;
+ int i;
+
+ if (host[0]=='!')
+ return; // erreur.. déja cancellé.. bizarre.. devrait pas arriver
+
+ /* sanity check */
+ if (*filptr + 1 >= opt->maxfilter) {
+ opt->maxfilter += HTS_FILTERSINC;
+ if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n",*filptr,__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*filptr);
+ fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ fflush(opt->errlog);
+ }
+ abort();
+ }
+ //opt->filters.filters=&filters;
+ }
+
+ // interdire host
+ if (*filptr < filter_max) {
+ strcpy(filters[*filptr],"-");
+ strcat(filters[*filptr],host);
+ strcat(filters[*filptr],"/*"); // host/ * interdit
+ (*filptr)++; *filptr=minimum(*filptr,filter_max);
+ }
+
+ // oups
+ if (strlen(host)<=1) { // euhh?? longueur <= 1
+ if (strcmp(host,"file://")) {
+ //## if (host[0]!=lOCAL_CHAR) { // pas local
+ if (opt->log!=NULL) {
+ fprintf(opt->log,"PANIC! HostCancel detected memory leaks [char %d]"LF,host[0]); test_flush;
+ }
+ return; // purée
+ }
+ }
+
+ // couper connexion
+ for(i=0;i<back_max;i++) {
+ if (back[i].status>=0) // réception OU prêt
+ if (strfield2(back[i].url_adr,host)) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("host control: deletehttp\n");
+#endif
+ back[i].status=0; // terminé
+ if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-2; // timeout (peu importe si c'est un traffic jam)
+ strcpy(back[i].r.msg,"Link Cancelled by host control");
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fprintf(opt->log,"Shutdown: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ }
+
+ // effacer liens
+ //l=strlen(host);
+ for(i=0;i<lien_tot;i++) {
+ //if (liens[i]->adr_len==l) { // même taille de chaîne
+ // Calcul de taille sécurisée
+ if (liens[i]) {
+ if (liens[i]->adr) {
+ int l = 0;
+ while((liens[i]->adr[l]) && (l<1020)) l++;
+ if ((l > 0) && (l<1020)) { // sécurité
+ if (strfield2(jump_identification(liens[i]->adr),host)) { // host
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fprintf(opt->log,"Cancel: %s%s"LF,liens[i]->adr,liens[i]->fil); test_flush;
+ }
+ strcpy(liens[i]->adr,"!"); // cancel (invalide hash)
+#if HTS_HASH
+#else
+ liens[i]->sav_len=-1; // taille invalide
+#endif
+ // on efface pas le hash, because si on rencontre le lien, reverif sav..
+ }
+ } else {
+ if (opt->log!=NULL) {
+ char dmp[1040];
+ dmp[0]='\0';
+ strncat(dmp,liens[i]->adr,1024);
+ fprintf(opt->log,"WARNING! HostCancel detected memory leaks [len %d at %d]"LF,l,i); test_flush;
+ fprintf(opt->log,"dump 1024 bytes (address %p): "LF"%s"LF,liens[i]->adr,dmp); test_flush;
+ }
+ }
+ } else {
+ if (opt->log!=NULL) {
+ fprintf(opt->log,"WARNING! HostCancel detected memory leaks [adr at %d]"LF,i); test_flush;
+ }
+ }
+ } else {
+ if (opt->log!=NULL) {
+ fprintf(opt->log,"WARNING! HostCancel detected memory leaks [null at %d]"LF,i); test_flush;
+ }
+ }
+ //}
+ }
+}
+
+
+/* Init structure */
+/* 1 : init */
+/* -1 : off */
+char* structcheck_init(int init) {
+ char** structcheck_buff;
+ int* structcheck_buff_size;
+ NOSTATIC_RESERVE(structcheck_buff, char*, 1);
+ NOSTATIC_RESERVE(structcheck_buff_size, int, 1);
+ if (init < 2) {
+ if (init) {
+ if (*structcheck_buff)
+ freet(*structcheck_buff);
+ *structcheck_buff=NULL;
+ }
+ if (init != -1) {
+ if (*structcheck_buff==NULL) {
+ *structcheck_buff_size = 65536;
+ *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx
+ if (*structcheck_buff)
+ strcpy(*structcheck_buff,"#");
+ }
+ }
+ } else { /* Ensure enough room */
+ if (*structcheck_buff_size < init) {
+ *structcheck_buff_size = init + 65536;
+ *structcheck_buff=(char*) realloct(*structcheck_buff, *structcheck_buff_size);
+ if (*structcheck_buff == NULL) { /* Reset :( */
+ *structcheck_buff_size = 65536;
+ *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx
+ if (*structcheck_buff)
+ strcpy(*structcheck_buff,"#");
+ }
+ }
+ }
+ return *structcheck_buff;
+}
+
+int filters_init(char*** ptrfilters, int maxfilter, int filterinc) {
+ char** filters = *ptrfilters;
+ int filter_max=maximum(maxfilter, 128);
+ if (filters == NULL) {
+ filters=(char**) malloct( sizeof(char*) * (filter_max+2) );
+ memset(filters, 0, sizeof(char*) * (filter_max+2)); // filters[0] == 0
+ } else {
+ filters=(char**) realloct(filters, sizeof(char*) * (filter_max+2) );
+ }
+ if (filters) {
+ if (filters[0] == NULL) {
+ filters[0]=(char*) malloct( sizeof(char) * (filter_max+2) * (HTS_URLMAXSIZE*2) );
+ memset(filters[0], 0, sizeof(char) * (filter_max+2) * (HTS_URLMAXSIZE*2) );
+ } else {
+ filters[0]=(char*) realloct(filters[0], sizeof(char) * (filter_max+2) * (HTS_URLMAXSIZE*2) );
+ }
+ if (filters[0] == NULL) {
+ freet(filters);
+ filters = NULL;
+ }
+ }
+ if (filters != NULL) {
+ int i;
+ int from;
+ if (filterinc == 0)
+ from = 0;
+ else
+ from = filter_max - filterinc;
+ for(i=0 ; i<=filter_max ; i++) { // PLUS UN (sécurité)
+ filters[i]=filters[0]+i*(HTS_URLMAXSIZE*2);
+ }
+ for(i=from ; i<=filter_max ; i++) { // PLUS UN (sécurité)
+ filters[i][0]='\0'; // clear
+ }
+ }
+ *ptrfilters = filters;
+ return (filters != NULL) ? filter_max : 0;
+}
+
+// vérifier présence de l'arbo
+int structcheck(char* s) {
+ // vérifier la présence des dossier(s)
+ char *a=s;
+ char nom[HTS_URLMAXSIZE*2];
+ char *b;
+ char* structcheck_buff=NULL;
+ if (strnotempty(s)==0) return 0;
+ if (strlen(s)>HTS_URLMAXSIZE) return 0;
+
+ // Get buffer address
+ structcheck_buff=structcheck_init(0);
+ if (!structcheck_buff)
+ return -1;
+
+ if (strlen(structcheck_buff) > 65000) {
+ strcpy(structcheck_buff,"#"); // réinit.. c'est idiot ** **
+ }
+
+ if (structcheck_buff) {
+ b=nom;
+ do {
+ if (*a) *b++=*a++;
+ while((*a!='/') && (*a!='\0')) *b++=*a++;
+ *b='\0'; // pas de ++ pour boucler
+ if (*a=='/') { // toujours dossier
+ if (strnotempty(nom)) {
+ char tempo[HTS_URLMAXSIZE*2];
+
+ strcpy(tempo,"#"); strcat(tempo,nom); strcat(tempo,"#");
+ if (strstr(structcheck_buff,tempo)==NULL) { // non encore créé
+
+ /* Check room */
+ structcheck_init(strlen(structcheck_buff) + strlen(nom) + 8192);
+ if (!structcheck_buff)
+ return -1;
+
+ strcat(structcheck_buff,"#"); strcat(structcheck_buff,nom); strcat(structcheck_buff,"#"); // ajouter à la liste
+
+#if HTS_WIN
+ if (mkdir(fconv(nom))!=0)
+#else
+ if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0)
+#endif
+ {
+#if HTS_REMOVE_ANNOYING_INDEX
+ // might be a filename with same name than this folder
+ // then, remove it to allow folder creation
+ // it happends when servers gives a folder index while
+ // requesting / page
+ // -> if the file can be opened (not a folder) then rename it
+ FILE* fp=fopen(fconv(nom),"ab");
+ if (fp) {
+ fclose(fp);
+ rename(fconv(nom),fconcat(fconv(nom),".txt"));
+ }
+ // if it fails, that's too bad
+#if HTS_WIN
+ mkdir(fconv(nom));
+#else
+ mkdir(fconv(nom),HTS_ACCESS_FOLDER);
+#endif
+#endif
+ // Si existe déja renvoie une erreur.. tant pis
+ }
+#if HTS_WIN==0
+ chmod(fconv(nom),HTS_ACCESS_FOLDER);
+#endif
+ }
+ }
+ *b++=*a++; // slash
+ }
+ } while(*a);
+ }
+ return 0;
+}
+
+
+// sauver un fichier
+int filesave(char* adr,int len,char* s) {
+ FILE* fp;
+ // écrire le fichier
+ if ((fp=filecreate(s))!=NULL) {
+ int nl=0;
+ if (len>0) {
+ nl=(int) fwrite(adr,1,len,fp);
+ }
+ fclose(fp);
+ usercommand(0,NULL,antislash(s));
+ if (nl!=len) // erreur
+ return -1;
+ } else
+ return -1;
+
+ return 0;
+}
+
+
+// ouvrir un fichier (avec chemin Un*x)
+FILE* filecreate(char* s) {
+ char fname[HTS_URLMAXSIZE*2];
+ FILE* fp;
+ fname[0]='\0';
+
+ // noter lst
+ filenote(s,NULL);
+
+ // if (*s=='/') strcpy(fname,s+1); else strcpy(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path>
+ strcpy(fname,s);
+
+#if HTS_DOSNAME
+ // remplacer / par des slash arrière
+ {
+ int i=0;
+ while(fname[i]) {
+ if (fname[i]=='/')
+ fname[i]='\\';
+ i++;
+ }
+ }
+ // a partir d'ici le slash devient antislash
+#endif
+
+ // construite le chemin si besoin est
+ if (structcheck(s)!=0) {
+ return NULL;
+ }
+
+ // ouvrir
+ fp=fopen(fname,"wb");
+#if HTS_WIN==0
+ if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE);
+#endif
+
+ return fp;
+}
+
+// create an empty file
+int filecreateempty(char* filename) {
+ FILE* fp;
+ fp=filecreate(filename); // filenote & co
+ if (fp) {
+ fclose(fp);
+ return 1;
+ } else
+ return 0;
+}
+
+// noter fichier
+typedef struct {
+ FILE* lst;
+ char path[HTS_URLMAXSIZE*2];
+} filenote_strc;
+int filenote(char* s,filecreate_params* params) {
+ filenote_strc* strc;
+ NOSTATIC_RESERVE(strc, filenote_strc, 1);
+
+ // gestion du fichier liste liste
+ if (params) {
+ //filecreate_params* p = (filecreate_params*) params;
+ strcpy(strc->path,params->path);
+ strc->lst=params->lst;
+ return 0;
+ } else if (strc->lst) {
+ char savelst[HTS_URLMAXSIZE*2];
+ strcpy(savelst,fslash(s));
+ // couper chemin?
+ if (strnotempty(strc->path)) {
+ if (strncmp(fslash(strc->path),savelst,strlen(strc->path))==0) { // couper
+ strcpy(savelst,s+strlen(strc->path));
+ }
+ }
+ fprintf(strc->lst,"[%s]"LF,savelst);
+ fflush(strc->lst);
+ }
+ return 1;
+}
+
+// executer commande utilisateur
+typedef struct {
+ int exe;
+ char cmd[2048];
+} usercommand_strc;
+HTS_INLINE void usercommand(int _exe,char* _cmd,char* file) {
+ usercommand_strc* strc;
+ NOSTATIC_RESERVE(strc, usercommand_strc, 1);
+
+ if (_exe) {
+ strcpy(strc->cmd,_cmd);
+ if (strnotempty(strc->cmd))
+ strc->exe=_exe;
+ else
+ strc->exe=0;
+ }
+
+#if HTS_ANALYSTE
+ if (hts_htmlcheck_filesave)
+ if (strnotempty(file))
+ hts_htmlcheck_filesave(file);
+#endif
+
+ if (strc->exe) {
+ if (strnotempty(file)) {
+ if (strnotempty(strc->cmd)) {
+ usercommand_exe(strc->cmd,file);
+ }
+ }
+ }
+}
+void usercommand_exe(char* cmd,char* file) {
+ char temp[8192];
+ char c[2]="";
+ int i;
+ temp[0]='\0';
+ //
+ for(i=0;i<(int) strlen(cmd);i++) {
+ if ((cmd[i]=='$') && (cmd[i+1]=='0')) {
+ strcat(temp,file);
+ i++;
+ } else {
+ c[0]=cmd[i]; c[1]='\0';
+ strcat(temp,c);
+ }
+ }
+ system(temp);
+}
+
+// écrire n espaces dans fp
+typedef struct {
+ int error;
+ int warning;
+ int info;
+} fspc_strc;
+HTS_INLINE int fspc(FILE* fp,char* type) {
+ fspc_strc* strc;
+ NOSTATIC_RESERVE(strc, fspc_strc, 1); // log..
+
+ //
+ if (fp) {
+ char s[256];
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ strftime(s,250,"%H:%M:%S",A);
+ if (strnotempty(type))
+ fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1);
+ else
+ fprintf(fp,"%s\t \t",s);
+ if (strcmp(type,"warning")==0)
+ strc->warning++;
+ else if (strcmp(type,"error")==0)
+ strc->error++;
+ else if (strcmp(type,"info")==0)
+ strc->info++;
+ }
+ else if (!type)
+ strc->error=strc->warning=strc->info=0; // reset
+ else if (strcmp(type,"warning")==0)
+ return strc->warning;
+ else if (strcmp(type,"error")==0)
+ return strc->error;
+ else if (strcmp(type,"info")==0)
+ return strc->info;
+ return 0;
+}
+
+
+// vérifier taux de transfert
+#if 0
+void check_rate(TStamp stat_timestart,int maxrate) {
+ // vérifier taux de transfert (pas trop grand?)
+ /*
+ if (maxrate>0) {
+ int r = (int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-stat_timestart)); // taux actuel de transfert
+ HTS_STAT.HTS_TOTAL_RECV_STATE=0;
+ if (r>maxrate) { // taux>taux autorisé
+ int taux = (int) (((TStamp) (r - maxrate) * 100) / (TStamp) maxrate);
+ if (taux<15)
+ HTS_STAT.HTS_TOTAL_RECV_STATE=1; // ralentir un peu (<15% dépassement)
+ else if (taux<50)
+ HTS_STAT.HTS_TOTAL_RECV_STATE=2; // beaucoup (<50% dépassement)
+ else
+ HTS_STAT.HTS_TOTAL_RECV_STATE=3; // énormément (>50% dépassement)
+ }
+ }
+ */
+}
+#endif
+
+// ---
+// sous routines liées au moteur et au backing
+
+// supplemental links ready (done) after ptr
+int backlinks_done(lien_url** liens,int lien_tot,int ptr) {
+ int n=0;
+ int i;
+ //Links done and stored in cache
+ for(i=ptr+1;i<lien_tot;i++) {
+ if (liens[i]) {
+ if (liens[i]->pass2 == -1) {
+ n++;
+ }
+ }
+ }
+ return n;
+}
+
+// remplir backing si moins de max_bytes en mémoire
+HTS_INLINE int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
+ if (!opt->state.stop) {
+ if (back_incache(back,back_max)<opt->maxcache) { // pas trop en mémoire?
+ return back_fill(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ }
+ }
+ return -1; /* plus de place */
+}
+
+// remplir backing
+int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
+ int n;
+
+ // ajouter autant de socket qu'on peut ajouter
+ n=opt->maxsoc-back_nsoc(back,back_max);
+
+ // vérifier qu'il restera assez de place pour les tests ensuite (en théorie, 1 entrée libre restante suffirait)
+ n=min( n, back_available(back,back_max) - 8 );
+
+ // no space left on backing stack - do not back anymore
+ if (back_stack_available(back,back_max) <= 2)
+ n=0;
+
+ if (n>0) {
+ int p;
+
+ if (ptr<cache->ptr_last) { /* restart (2 scans: first html, then non html) */
+ cache->ptr_ant=0;
+ }
+
+ p=ptr+1;
+ /* on a déja parcouru */
+ if (p<cache->ptr_ant)
+ p=cache->ptr_ant;
+ while( (p<lien_tot) && (n>0) ) {
+ //while((p<lien_tot) && (n>0) && (p < ptr+opt->maxcache_anticipate)) {
+ int ok=1;
+
+ // on ne met pas le fichier en backing si il doit être traité après
+ if (liens[p]->pass2) { // 2è passe
+ if (numero_passe!=1)
+ ok=0;
+ } else {
+ if (numero_passe!=0)
+ ok=0;
+ }
+
+ // note: si un backing est fini, il reste en mémoire jusqu'à ce que
+ // le ptr l'atteigne
+ if (ok) {
+ if (!back_exist(back,back_max,liens[p]->adr,liens[p]->fil,liens[p]->sav)) {
+ if (back_add(back,back_max,opt,cache,liens[p]->adr,liens[p]->fil,liens[p]->sav,liens[liens[p]->precedent]->adr,liens[liens[p]->precedent]->fil,liens[p]->testmode,&liens[p]->pass2)==-1) {
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: unable to add more links through back_add for back_fill"LF);
+ test_flush;
+ }
+#if BDEBUG==1
+ printf("error while adding\n");
+#endif
+ n=0; // sortir
+ } else {
+ n--;
+#if BDEBUG==1
+ printf("backing: %s%s\n",liens[p]->adr,liens[p]->fil);
+#endif
+ }
+ }
+ }
+ p++;
+ } // while
+ /* sauver position dernière anticipation */
+ cache->ptr_ant=p;
+ cache->ptr_last=ptr;
+ }
+ return 0;
+}
+// ---
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+// routines de détournement de SIGHUP & co (Unix)
+//
+httrackp* hts_declareoptbuffer(httrackp* optdecl) {
+ static httrackp* opt=NULL; /* OK */
+ if (optdecl) opt=optdecl;
+ return opt;
+}
+//
+void sig_finish( int code ) { // finir et quitter
+ signal(code,sig_term); // quitter si encore
+ exit_xh=1;
+ fprintf(stderr,"\nExit requested to engine (signal %d)\n",code);
+}
+void sig_term( int code ) { // quitter brutalement
+ fprintf(stderr,"\nProgram terminated (signal %d)\n",code);
+ exit(0);
+}
+#if HTS_WIN
+void sig_ask( int code ) { // demander
+ char s[256];
+ signal(code,sig_term); // quitter si encore
+ printf("\nQuit program/Interrupt/Cancel? (Q/I/C) ");
+ fflush(stdout);
+ scanf("%s",s);
+ if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q'))
+ exit(0); // quitter
+ else if ( (s[0]=='i') || (s[0]=='I') ) {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ // ask for stop
+ opt->state.stop=1;
+ }
+ }
+ signal(code,sig_ask); // remettre signal
+}
+#else
+void sig_back( int code ) { // ignorer et mettre en backing
+ signal(code,sig_ignore);
+ sig_doback(0);
+}
+void sig_ask( int code ) { // demander
+ char s[256];
+ signal(code,sig_term); // quitter si encore
+ printf("\nQuit program/Interrupt/Background/bLind background/Cancel? (Q/I/B/L/C) ");
+ fflush(stdout);
+ scanf("%s",s);
+ if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q'))
+ exit(0); // quitter
+ else if ( (s[0]=='b') || (s[0]=='B') || (s[0]=='a') || (s[0]=='A') )
+ sig_doback(0); // arrière plan
+ else if ( (s[0]=='l') || (s[0]=='L') )
+ sig_doback(1); // arrière plan
+ else if ( (s[0]=='i') || (s[0]=='I') ) {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ // ask for stop
+ opt->state.stop=1;
+ }
+ signal(code,sig_ask); // remettre signal
+ }
+ else {
+ printf("cancel..\n");
+ signal(code,sig_ask); // remettre signal
+ }
+}
+void sig_ignore( int code ) { // ignorer signal
+}
+void sig_brpipe( int code ) { // treat if necessary
+ if (!sig_ignore_flag(-1)) {
+ sig_term(code);
+ }
+}
+void sig_doback(int blind) { // mettre en backing
+ int out=-1;
+ //
+ printf("\nMoving to background to complete the mirror...\n"); fflush(stdout);
+
+ {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ // suppress logging and asking lousy questions
+ opt->quiet=1;
+ opt->verbosedisplay=0;
+ }
+ }
+
+ if (!blind)
+ out = open("hts-nohup.out",O_CREAT|O_WRONLY,S_IRUSR|S_IWUSR);
+ if (out == -1)
+ out = open("/dev/null",O_WRONLY,S_IRUSR|S_IWUSR);
+ close(0);
+ close(1);
+ dup(out);
+ close(2);
+ dup(out);
+ //
+ switch (fork()) {
+ case 0:
+ break;
+ case -1:
+ fprintf(stderr,"Error: can not fork process\n");
+ break;
+ default: // pere
+ usleep(100000); // pause 1/10s "A microsecond is .000001s"
+ _exit(0);
+ break;
+ }
+}
+#endif
+// fin routines de détournement de SIGHUP & co
+
+// Poll stdin.. si besoin
+#if HTS_POLL
+// lecture stdin des caractères disponibles
+int read_stdin(char* s,int max) {
+ int i=0;
+ while((check_stdin()) && (i<(max-1)) )
+ s[i++]=fgetc(stdin);
+ s[i]='\0';
+ return i;
+}
+#ifdef _WIN32
+HTS_INLINE int check_stdin(void) {
+ return (_kbhit());
+}
+#else
+HTS_INLINE int check_flot(T_SOC s) {
+ fd_set fds;
+ struct timeval tv;
+ FD_ZERO(&fds);
+ FD_SET((T_SOC) s,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(s+1,&fds,NULL,NULL,&tv);
+ return FD_ISSET(s,&fds);
+}
+HTS_INLINE int check_stdin(void) {
+ fflush(stdout); fflush(stdin);
+ if (check_flot(0))
+ return 1;
+ return 0;
+}
+#endif
+#endif
+
+// Attente de touche
+#if HTS_ANALYSTE
+int ask_continue(void) {
+ char* s;
+ s=hts_htmlcheck_query2(HTbuff);
+ if (s) {
+ if (strnotempty(s)) {
+ if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON")))
+ return 0;
+ }
+ return 1;
+ }
+ return 1;
+}
+#else
+int ask_continue(void) {
+ char s[12];
+ s[0]='\0';
+ printf("Press <Y><Enter> to confirm, <N><Enter> to abort\n");
+ io_flush; linput(stdin,s,4);
+ if (strnotempty(s)) {
+ if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON")))
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+// nombre de digits dans un nombre
+int nombre_digit(int n) {
+ int i=1;
+ while(n >= 10) { n/=10; i++; }
+ return i;
+}
+
+
+// renvoi adresse de la fin du token dans p
+// renvoi NULL si la chaine est un token unique
+// (PATCHE également la chaine)
+// ex: "test" "test2" renvoi adresse sur espace
+// flag==1 si chaine comporte des echappements comme \"
+char* next_token(char* p,int flag) {
+ int detect=0;
+ int quote=0;
+ p--;
+ do {
+ p++;
+ if (flag && (*p=='\\')) { // sauter \x ou \"
+ if (quote) {
+ char c='\0';
+ if (*(p+1)=='\\')
+ c='\\';
+ else if (*(p+1)=='"')
+ c='"';
+ if (c) {
+ char tempo[8192];
+ tempo[0]=c; tempo[1]='\0';
+ strcat(tempo,p+2);
+ strcpy(p,tempo);
+ }
+ }
+ }
+ else if (*p==34) { // guillemets (de fin)
+ quote=!quote;
+ }
+ else if (*p==32) {
+ if (!quote)
+ detect=1;
+ }
+ else if (*p=='\0') {
+ p=NULL;
+ detect=1;
+ }
+ } while(!detect);
+ return p;
+}
+
+// routines annexes
+#if HTS_ANALYSTE
+// canceller un fichier (noter comme cancellable)
+// !!NOT THREAD SAFE!!
+char* hts_cancel_file(char * s) {
+ static char sav[HTS_URLMAXSIZE*2]="";
+ if (s[0]!='\0')
+ if (sav[0]=='\0')
+ strcpy(sav,s);
+ return sav;
+}
+void hts_cancel_test(void) {
+ if (_hts_in_html_parsing==2)
+ _hts_cancel=2;
+}
+void hts_cancel_parsing(void) {
+ if (_hts_in_html_parsing)
+ _hts_cancel=1;
+}
+#endif
+// for(_i=0;(_i<back_max) && (index<NStatsBuffer);_i++) {
+// i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel)
+// if (back[i].status>=0) { // signifie "lien actif"
+
+
+/*
+hts_add_file, add/get elements in the add chain for java parsing
+if file_position >= 0
+ push 'file/file_position'
+ return 1 (return 0 if exists)
+else
+ pop file -> 'file'
+ return 'file_position'
+else if empty/error
+ return -1;
+*/
+typedef struct addfile_chain {
+ char name[1024];
+ int pos;
+ struct addfile_chain* next;
+} addfile_chain;
+typedef addfile_chain* addfile_chain_ptr;
+int hts_add_file(char* file,int file_position) {
+ addfile_chain** chain;
+ NOSTATIC_RESERVE(chain, addfile_chain_ptr, 1);
+
+ if (file_position>=0) { /* copy file to the chain */
+ struct addfile_chain** current;
+ current=chain; /* start from */
+ while(*current) {
+ if (strcmp((*current)->name,file)==0)
+ return 0; /* already exists */
+ current=&( (*current)->next ); /* 'next' address */
+ }
+ *current=calloct(1,sizeof(addfile_chain));
+ if (*current) {
+ (*current)->next=NULL;
+ (*current)->pos=-1;
+ (*current)->name[0]='\0';
+ }
+ if (*current) {
+ strcpy((*current)->name,file);
+ (*current)->pos=file_position;
+ return 1;
+ } else {
+ printf("PANIC! Too many Java files during parsing [1]\n");
+ return -1;
+ }
+ } else { /* copy last element in file and delete it */
+ if (file)
+ file[0]='\0';
+ if (*chain) {
+ struct addfile_chain** current;
+ int pos=-1;
+ current=chain; /* start from */
+ while( (*current)->next ) {
+ current=&( (*current)->next ); /* 'next' address */
+ }
+ if (file)
+ strcpy(file,(*current)->name);
+ pos=(*current)->pos;
+ freet(*current);
+ *current=NULL;
+ return pos;
+ }
+ return -1; /* no more elements */
+ }
+
+ return 0;
+}
+
+#if HTS_ANALYSTE
+// en train de parser un fichier html? réponse: % effectués
+// flag>0 : refresh demandé
+int hts_is_parsing(int flag) {
+ if (_hts_in_html_parsing) { // parsing?
+ if (flag>=0) _hts_in_html_poll=1; // faudrait un tit refresh
+ return max(_hts_in_html_done,1); // % effectués
+ } else {
+ return 0; // non
+ }
+}
+int hts_is_testing(void) { // 0 non 1 test 2 purge
+ if (_hts_in_html_parsing==2)
+ return 1;
+ else if (_hts_in_html_parsing==3)
+ return 2;
+ return 0;
+}
+// message d'erreur?
+char* hts_errmsg(void) {
+ return _hts_errmsg;
+}
+// mode pause transfer
+int hts_setpause(int p) {
+ if (p>=0) _hts_setpause=p;
+ return _hts_setpause;
+}
+// ask for termination
+int hts_request_stop(int force) {
+ httrackp* opt=hts_declareoptbuffer(NULL);
+ if (opt) {
+ opt->state.stop=1;
+ }
+ return 0;
+}
+// régler en cours de route les paramètres réglables..
+// -1 : erreur
+int hts_setopt(httrackp* set_opt) {
+ if (set_opt) {
+ httrackp* engine_opt=hts_declareoptbuffer(NULL);
+ if (engine_opt) {
+ //_hts_setopt=opt;
+ copy_htsopt(set_opt,engine_opt);
+ }
+ }
+ return 0;
+}
+// ajout d'URL
+// -1 : erreur
+int hts_addurl(char** url) {
+ if (url) _hts_addurl=url;
+ return (_hts_addurl!=NULL);
+}
+int hts_resetaddurl(void) {
+ _hts_addurl=NULL;
+ return (_hts_addurl!=NULL);
+}
+// copier nouveaux paramètres si besoin
+int copy_htsopt(httrackp* from,httrackp* to) {
+ if (from->maxsite > -1)
+ to->maxsite = from->maxsite;
+
+ if (from->maxfile_nonhtml > -1)
+ to->maxfile_nonhtml = from->maxfile_nonhtml;
+
+ if (from->maxfile_html > -1)
+ to->maxfile_html = from->maxfile_html;
+
+ if (from->maxsoc > 0)
+ to->maxsoc = from->maxsoc;
+
+ if (from->nearlink > -1)
+ to->nearlink = from->nearlink;
+
+ if (from->timeout > -1)
+ to->timeout = from->timeout;
+
+ if (from->rateout > -1)
+ to->rateout = from->rateout;
+
+ if (from->maxtime > -1)
+ to->maxtime = from->maxtime;
+
+ if (from->maxrate > -1)
+ to->maxrate = from->maxrate;
+
+ if (strnotempty(from->user_agent))
+ strcpy(to->user_agent , from->user_agent);
+
+ if (from->retry > -1)
+ to->retry = from->retry;
+
+ if (from->hostcontrol > -1)
+ to->hostcontrol = from->hostcontrol;
+
+ if (from->errpage > -1)
+ to->errpage = from->errpage;
+
+ if (from->parseall > -1)
+ to->parseall = from->parseall;
+
+
+ // test all: bit 8 de travel
+ if (from->travel > -1) {
+ if (from->travel & 256)
+ to->travel|=256;
+ else
+ to->travel&=255;
+ }
+
+
+ return 0;
+}
+
+#endif
+//
+
+
+
+
+
+// message copyright interne
+void voidf(void) {
+ char* a;
+ a=""CRLF""CRLF;
+ a="+-----------------------------------------------+"CRLF;
+ a="|HyperTextTRACKer, Offline Browser Utility |"CRLF;
+ a="| HTTrack Website Copier |"CRLF;
+ a="|Code: Windows Interface Xavier Roche |"CRLF;
+ a="| HTS/HTTrack Xavier Roche |"CRLF;
+ a="| .class Parser Yann Philippot |"CRLF;
+ a="| |"CRLF;
+ a="|Tested on: Windows95,98,NT,2K |"CRLF;
+ a="| Linux PC |"CRLF;
+ a="| Sun-Solaris 5.6 |"CRLF;
+ a="| AIX 4 |"CRLF;
+ a="| |"CRLF;
+ a="|Copyright (C) Xavier Roche and other |"CRLF;
+ a="|contributors |"CRLF;
+ a="| |"CRLF;
+ a="|Use this program at your own risks! |"CRLF;
+ a="+-----------------------------------------------+"CRLF;
+ a=""CRLF;
+}
+
+
+// HTTrack Website Copier Copyright (C) Xavier Roche and other contributors
+//
+
diff --git a/src/htscore.h b/src/htscore.h
new file mode 100644
index 0000000..a50aac8
--- /dev/null
+++ b/src/htscore.h
@@ -0,0 +1,363 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Main file .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .h
+#ifndef HTTRACK_DEFH
+#define HTTRACK_DEFH
+
+
+#include "htsglobal.h"
+
+/* specific definitions */
+#include "htsbase.h"
+// Includes & définitions
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef _WIN32
+#include <conio.h>
+#include <signal.h>
+#include <direct.h>
+#else
+#include <signal.h>
+#include <unistd.h>
+#endif
+/* END specific definitions */
+
+
+// Include htslib.h for all types
+#include "htslib.h"
+
+#include "htsopt.h"
+
+// structure d'un lien
+typedef struct {
+ char firstblock; // flag 1=premier malloc
+ char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down
+ short int depth; // profondeur autorisée lien ; >0 forte 0=faible
+ short int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background
+ int premier; // pointeur sur le premier lien qui a donné lieu aux autres liens du domaine
+ int precedent; // pointeur sur le lien qui a donné lieu à ce lien précis
+ //int moved; // pointeur sur moved
+ short int retry; // nombre de retry restants
+ short int testmode; // mode test uniquement, envoyer juste un head!
+ char* adr; // adresse
+ char* fil; // nom du fichier distant
+ char* sav; // nom à sauver sur disque (avec chemin éventuel)
+ char* cod; // chemin codebase éventuel si classe java
+ char* former_adr; // adresse initiale (avant éventuel moved), peut être nulle
+ char* former_fil; // nom du fichier distant initial (avant éventuel moved), peut être nul
+ // pour optimisation:
+#if HTS_HASH
+ int hash_next[3]; // prochain lien avec même valeur hash
+#else
+ int sav_len; // taille de sav
+#endif
+} lien_url;
+
+// chargement de fichiers en 'arrière plan'
+typedef struct {
+#if DEBUG_CHECKINT
+ char magic;
+#endif
+ char url_adr[HTS_URLMAXSIZE*2]; // adresse
+ char url_fil[HTS_URLMAXSIZE*2]; // nom du fichier distant
+ char url_sav[HTS_URLMAXSIZE*2]; // nom à sauver sur disque (avec chemin éventuel)
+ char referer_adr[HTS_URLMAXSIZE*2]; // adresse host page referer
+ char referer_fil[HTS_URLMAXSIZE*2]; // fichier page referer
+ char location_buffer[HTS_URLMAXSIZE*2]; // "location" en cas de "moved" (302,..)
+ char tmpfile[HTS_URLMAXSIZE*2]; // nom à sauver temporairement (compressé)
+ char send_too[1024]; // données à envoyer en même temps que le header
+ int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours)
+ int testmode; // mode de test
+ int timeout; // gérer des timeouts? (!=0 : nombre de secondes)
+ TStamp timeout_refresh; // si oui, time refresh
+ int rateout; // timeout refresh? (!=0 : taux minimum toléré en octets/s)
+ TStamp rateout_time; // si oui, date de départ
+ LLint maxfile_nonhtml; // taille max d'un fichier non html
+ LLint maxfile_html; // idem pour un ficheir html
+ htsblk r; // structure htsblk de chaque objet en background
+ short int is_update; // mode update
+ int head_request; // requète HEAD?
+ LLint range_req_size; // range utilisé
+ //
+ int http11; // L'en tête doit être signé HTTP/1.1 et non HTTP/1.0
+ int is_chunk; // chunk?
+ char* chunk_adr; // adresse chunk en cours de chargement
+ LLint chunk_size; // taille chunk en cours de chargement
+ LLint compressed_size; // taille compressés (stats uniquement)
+ //
+ short int* pass2_ptr; // pointeur sur liens[ptr]->pass2
+ //
+ char info[256]; // éventuel status pour le ftp
+ int stop_ftp; // flag stop pour ftp
+#if DEBUG_CHECKINT
+ char magic2;
+#endif
+} lien_back;
+
+// cache
+typedef struct {
+ int version; // 0 ou 1
+ /* */
+ int type;
+ FILE *dat,*ndx,*olddat;
+ char *use; // liste des adr+fil
+ FILE *lst; // liste des fichiers pour la "purge"
+ FILE *txt; // liste des fichiers (info)
+ char lastmodified[256];
+ // HASH
+ void* hashtable;
+ // fichiers log optionnels
+ FILE* log;
+ FILE* errlog;
+ // variables
+ int ptr_ant; // pointeur pour anticiper
+ int ptr_last; // pointeur pour anticiper
+} cache_back;
+
+typedef struct {
+ lien_url** liens; // pointeur sur liens
+ int max_lien; // indice le plus grand rencontré
+ int hash[3][HTS_HASH_SIZE]; // tables pour sav/adr-fil/former_adr-former_fil
+} hash_struct;
+
+#if HTS_HASH
+#else
+#define hash_write(A,B)
+#endif
+
+typedef struct {
+ FILE* lst;
+ char path[HTS_URLMAXSIZE*2];
+} filecreate_params;
+
+// Fonctions
+
+// INCLUDES .H PARTIES DE CODE HTTRACK
+
+// routine main
+#include "htscoremain.h"
+
+// divers outils pour httrack.c
+#include "htstools.h"
+
+// aide pour la version en ligne de commande
+#include "htshelp.h"
+
+// génération du nom de fichier à sauver
+#include "htsname.h"
+
+// gestion ftp
+#include "htsftp.h"
+
+// routine parser java
+#include "htsjava.h"
+
+// gestion interception d'URL
+#include "htscatchurl.h"
+
+// gestion robots.txt
+#include "htsrobots.h"
+
+// routines d'acceptation de liens
+#include "htswizard.h"
+
+// routines de regexp
+#include "htsfilters.h"
+
+// gestion backing
+#include "htsback.h"
+
+// gestion cache
+#include "htscache.h"
+
+// gestion hashage
+#include "htshash.h"
+
+// gestion réentrance
+#include "htsnostatic.h"
+
+// infos console
+#if HTS_ANALYSTE_CONSOLE
+#include "httrack.h"
+#endif
+
+#include "htsdefines.h"
+
+#include "hts-indextmpl.h"
+
+// INCLUDES .H PARTIES DE CODE HTTRACK
+
+//
+
+/*
+typedef void (* t_hts_htmlcheck_init)(void);
+typedef void (* t_hts_htmlcheck_uninit)(void);
+typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_end)(void);
+typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+typedef char* (* t_hts_htmlcheck_query)(char* question);
+typedef char* (* t_hts_htmlcheck_query2)(char* question);
+typedef char* (* t_hts_htmlcheck_query3)(char* question);
+typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,LLint stat_bytes,LLint stat_bytes_recv,int stat_time,int stat_nsocket, LLint stat_written, int stat_updated, int stat_errors, int irate, int nbk );
+typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
+typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
+*/
+
+// demande d'interaction avec le shell
+#if HTS_ANALYSTE
+//char HTbuff[1024];
+/*
+extern t_hts_htmlcheck_init hts_htmlcheck_init;
+extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+extern t_hts_htmlcheck_start hts_htmlcheck_start;
+extern t_hts_htmlcheck_end hts_htmlcheck_end;
+extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck hts_htmlcheck;
+extern t_hts_htmlcheck_query hts_htmlcheck_query;
+extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+extern t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+extern t_hts_htmlcheck_loop hts_htmlcheck_loop;
+extern t_hts_htmlcheck_check hts_htmlcheck_check;
+extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
+*/
+//
+int hts_is_parsing(int flag);
+int hts_is_testing(void);
+int hts_setopt(httrackp* opt);
+int hts_addurl(char** url);
+int hts_resetaddurl(void);
+int copy_htsopt(httrackp* from,httrackp* to);
+char* hts_errmsg(void);
+int hts_setpause(int); // pause transfer
+int hts_request_stop(int force);
+//
+char* hts_cancel_file(char * s);
+void hts_cancel_test(void);
+void hts_cancel_parsing(void);
+//
+// Variables globales
+extern int _hts_in_html_parsing;
+extern int _hts_in_html_done; // % réalisés
+extern int _hts_in_html_poll; // parsing
+extern char _hts_errmsg[1100];
+extern int _hts_setpause;
+//extern httrackp* _hts_setopt;
+extern char** _hts_addurl;
+extern int _hts_cancel;
+#endif
+
+
+
+//
+
+
+//int httpmirror(char* url,int level,httrackp opt);
+int httpmirror(char* url1,httrackp* opt);
+int filesave(char* adr,int len,char* s);
+int engine_stats(void);
+void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host);
+FILE* filecreate(char* s);
+int filecreateempty(char* filename);
+int filenote(char* s,filecreate_params* params);
+HTS_INLINE void usercommand(int exe,char* cmd,char* file);
+void usercommand_exe(char* cmd,char* file);
+char* structcheck_init(int init);
+int filters_init(char*** ptrfilters, int maxfilter, int filterinc);
+int structcheck(char* s);
+HTS_INLINE int fspc(FILE* fp,char* type);
+char* next_token(char* p,int flag);
+//
+char* readfile(char* fil);
+char* readfile_or(char* fil,char* defaultdata);
+#if 0
+void check_rate(TStamp stat_timestart,int maxrate);
+#endif
+
+// liens
+int liens_record(char* adr,char* fil,char* save,char* former_adr,char* former_fil,char* codebase);
+
+
+// backing, routines externes
+int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot);
+int backlinks_done(lien_url** liens,int lien_tot,int ptr);
+int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot);
+
+// cancel file
+#if HTS_ANALYSTE
+char* hts_cancel_file(char * s);
+void hts_cancel_test(void);
+void hts_cancel_parsing(void);
+#endif
+
+int ask_continue(void);
+int nombre_digit(int n);
+
+// Java
+int hts_add_file(char* file,int file_position);
+
+// Polling
+#if HTS_POLL
+HTS_INLINE int check_flot(T_SOC s);
+HTS_INLINE int check_stdin(void);
+int read_stdin(char* s,int max);
+#endif
+
+httrackp* hts_declareoptbuffer(httrackp* optdecl);
+void sig_finish( int code ); // finir et quitter
+void sig_term( int code ); // quitter
+#if HTS_WIN
+void sig_ask( int code ); // demander
+#else
+void sig_back( int code ); // ignorer et mettre en backing
+void sig_ask( int code ); // demander
+void sig_ignore( int code ); // ignorer signal
+void sig_brpipe( int code ); // treat if necessary
+void sig_doback(int); // mettre en arrière plan
+#endif
+
+// Void
+void voidf(void);
+
+#define HTS_TOPINDEX "TOP_INDEX_HTTRACK"
+
+#endif
+
+
diff --git a/src/htscoremain.c b/src/htscoremain.c
new file mode 100644
index 0000000..a03635f
--- /dev/null
+++ b/src/htscoremain.c
@@ -0,0 +1,2001 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* main routine (first called) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htscoremain.h"
+
+#include "htsglobal.h"
+#include "htscore.h"
+#include "htsdefines.h"
+#include "htsalias.h"
+#include "htswrap.h"
+#include <ctype.h>
+#if HTS_WIN
+#else
+#ifndef HTS_DO_NOT_USE_UID
+/* setuid */
+#include <pwd.h>
+#include <unistd.h>
+#endif
+#endif
+
+extern int exit_xh; // sortir prématurément
+
+/* Resolver */
+extern int IPV6_resolver;
+
+
+// Add a command in the argc/argv
+#define cmdl_add(token,argc,argv,buff,ptr) \
+ argv[argc]=(buff+ptr); \
+ strcpy(argv[argc],token); \
+ ptr += (strlen(argv[argc])+2); \
+ argc++
+
+// Insert a command in the argc/argv
+#define cmdl_ins(token,argc,argv,buff,ptr) \
+ { \
+ int i; \
+ for(i=argc;i>0;i--)\
+ argv[i]=argv[i-1];\
+ } \
+ argv[0]=(buff+ptr); \
+ strcpy(argv[0],token); \
+ ptr += (strlen(argv[0])+2); \
+ argc++
+
+#define htsmain_free() do { if (url != NULL) { free(url); } } while(0)
+
+// Main, récupère les paramètres et appelle le robot
+#if HTS_ANALYSTE
+int hts_main(int argc, char **argv) {
+#else
+int main(int argc, char **argv) {
+#endif
+ char* x_argv[999]; // Patch pour argv et argc: en cas de récupération de ligne de commande
+ char* x_argvblk=NULL; // (reprise ou update)
+ int x_ptr=0; // offset
+ /*
+ char* x_argv2[999]; // Patch pour config
+ char* x_argvblk2=NULL;
+ */
+ //
+ int argv_url=-1; // ==0 : utiliser cache et doit.log
+ char* argv_firsturl=NULL; // utilisé pour nommage par défaut
+ char* url = NULL; // URLS séparées par un espace
+ //char url[65536]; // URLS séparées par un espace
+ // the parametres
+ httrackp httrack;
+ int httrack_logmode=3; // ONE log file
+ int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer)
+#if HTS_WIN
+#if HTS_ANALYSTE!=2
+ WORD wVersionRequested; /* requested version WinSock API */
+ WSADATA wsadata; /* Windows Sockets API data */
+#endif
+#else
+#ifndef HTS_DO_NOT_USE_UID
+ int switch_uid=-1,switch_gid=-1; /* setuid/setgid */
+#endif
+ int switch_chroot=0; /* chroot ? */
+#endif
+ //
+ url = malloc(65536);
+ if (url == NULL) {
+ HTS_PANIC_PRINTF("* memory exhausted");
+ htsmain_free();
+ return -1;
+ }
+ url[0]='\0';
+ //
+
+#if HTS_ANALYSTE
+ // custom wrappers
+ hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
+ hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
+ hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
+ hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
+ hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+ hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+ hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
+ hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
+ hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
+ hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
+ hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
+ hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
+ hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
+ hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+ hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
+ hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name");
+#endif
+
+ // options par défaut
+ memset(&httrack, 0, sizeof(httrackp));
+ httrack.wizard=2; // wizard automatique
+ httrack.quiet=0; // questions
+ //
+ httrack.travel=0; // même adresse
+ httrack.depth=9999; // mirror total par défaut
+ httrack.extdepth=0; // mais pas à l'extérieur
+ httrack.seeker=1; // down
+ httrack.urlmode=2; // relatif par défaut
+ httrack.debug=0; // pas de débug en plus
+ httrack.getmode=3; // linear scan
+ httrack.maxsite=-1; // taille max site (aucune)
+ httrack.maxfile_nonhtml=-1; // taille max fichier non html
+ httrack.maxfile_html=-1; // idem pour html
+ httrack.maxsoc=8; // nbre socket max
+ httrack.fragment=-1; // pas de fragmentation
+ httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents"
+ httrack.makeindex=1; // faire un index
+ httrack.kindex=0; // index 'keyword'
+ httrack.delete_old=1; // effacer anciens fichiers
+ httrack.makestat=0; // pas de fichier de stats
+ httrack.maketrack=0; // ni de tracking
+ httrack.timeout=120; // timeout par défaut (2 minutes)
+ httrack.cache=1; // cache prioritaire
+ httrack.shell=0; // pas de shell par defaut
+ httrack.proxy.active=0; // pas de proxy
+ httrack.user_agent_send=1; // envoyer un user-agent
+ strcpy(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
+ httrack.savename_83=0; // noms longs par défaut
+ httrack.savename_type=0; // avec structure originale
+ httrack.parsejava=1; // parser classes
+ httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer
+ httrack.retry=2; // 2 retry par défaut
+ httrack.errpage=1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.)
+ httrack.check_type=1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html
+ httrack.all_in_cache=0; // ne pas tout stocker en cache
+ httrack.robots=2; // traiter les robots.txt
+ httrack.external=0; // liens externes normaux
+ httrack.passprivacy=0; // mots de passe dans les fichiers
+ httrack.includequery=1; // include query-string par défaut
+ httrack.mirror_first_page=0; // pas mode mirror links
+ httrack.accept_cookie=1; // gérer les cookies
+ httrack.cookie=NULL;
+ httrack.http10=0; // laisser http/1.1
+ httrack.nocompression=0; // pas de compression
+ httrack.tolerant=0; // ne pas accepter content-length incorrect
+ httrack.parseall=1; // tout parser (tags inconnus, par exemple)
+ httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur
+ httrack.verbosedisplay=0; // pas d'animation texte
+ strcpy(httrack.footer,HTS_DEFAULT_FOOTER);
+ httrack.ftp_proxy=1; // proxy http pour ftp
+ strcpy(httrack.filelist,"");
+ strcpy(httrack.lang_iso,"en, *");
+ strcpy(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT)
+ //
+ httrack.log=stdout;
+ httrack.errlog=stderr;
+ httrack.flush=1; // flush sur les fichiers log
+ httrack.aff_progress=0;
+ httrack.keyboard=0;
+ //
+ strcpy(httrack.path_html,"");
+ strcpy(httrack.path_log,"");
+ strcpy(httrack.path_bin,"");
+ //
+ httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb)
+ httrack.maxfilter=200; // 200 filtres max par défaut
+ httrack.maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT --
+ //httrack.maxcache_anticipate=256; // maximum de liens à anticiper
+ httrack.maxtime=-1; // temps max en secondes
+ httrack.maxrate=-1; // pas de taux maxi
+ httrack.maxconn=10; // nombre connexions/s
+ httrack.waittime=-1; // wait until.. hh*3600+mm*60+ss
+ //
+ httrack.exec=argv[0];
+ httrack.is_update=0; // not an update (yet)
+ httrack.dir_topindex=0; // do not built top index (yet)
+ //
+ httrack.state.stop=0; // stopper
+ //
+ _DEBUG_HEAD=0; // pas de debuggage en têtes
+
+#if HTS_WIN
+#if HTS_ANALYSTE!=2
+ {
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ HTS_PANIC_PRINTF("Winsock not found!\n");
+ htsmain_free();
+ return -1;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ HTS_PANIC_PRINTF("WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ htsmain_free();
+ return -1;
+ }
+ }
+#endif
+#endif
+
+ /* Init root dir */
+ hts_rootdir(argv[0]);
+
+#if HTS_WIN
+#else
+ /* Terminal is a tty, may ask questions and display funny information */
+ if (isatty(1)) {
+ httrack.quiet=0;
+ httrack.verbosedisplay=1;
+ }
+ /* Not a tty, no stdin input or funny output! */
+ else {
+ httrack.quiet=1;
+ httrack.verbosedisplay=0;
+ }
+#endif
+
+ /* First test: if -#R then only launch ftp */
+ if (argc > 2) {
+ if (strcmp(argv[1],"-#R")==0) {
+ if (argc==6) {
+ lien_back r;
+ char* path;
+ FILE* fp;
+ strcpy(r.url_adr,argv[2]);
+ strcpy(r.url_fil,argv[3]);
+ strcpy(r.url_sav,argv[4]);
+ path=argv[5];
+ r.status=1000;
+ run_launch_ftp(&r);
+ fp=fopen(fconv(path),"wb");
+ if (fp) {
+ fprintf(fp,"%d %s",r.r.statuscode,r.r.msg);
+ fclose(fp); fp=NULL;
+ rename(fconv(path),fconcat(path,".ok"));
+ } else remove(fconv(path));
+ } else {
+ printf("htsftp error, wrong parameter number (%d)\n",argc);
+ }
+ exit(0); // pas _exit()
+ }
+ }
+
+ // ok, non ftp, continuer
+
+
+ // Binary program path?
+#ifndef HTS_HTTRACKDIR
+ {
+ char* path=fslash(argv[0]);
+ char* a;
+ if ((a=strrchr(path,'/'))) {
+ httrack.path_bin[0]='\0';
+ strncat(httrack.path_bin,argv[0],(int) a - (int) path);
+ }
+ }
+#else
+ strcpy(httrack.path_bin,HTS_HTTRACKDIR);
+#endif
+
+
+ /* filter CR, LF, TAB.. */
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ char* a;
+ while( (a=strchr(argv[na],'\x0d')) ) *a=' ';
+ while( (a=strchr(argv[na],'\x0a')) ) *a=' ';
+ while( (a=strchr(argv[na],9)) ) *a=' ';
+ /* equivalent to "empty parameter" */
+ if ((strcmp(argv[na],HTS_NOPARAM)==0) || (strcmp(argv[na],HTS_NOPARAM2)==0)) // (none)
+ strcpy(argv[na],"\"\"");
+ if (strncmp(argv[na],"-&",2)==0)
+ argv[na][1]='%';
+ }
+ }
+
+
+
+ /* create x_argvblk buffer for transformed command line */
+ {
+ int current_size=0;
+ int size;
+ int na;
+ for(na=0;na<argc;na++)
+ current_size += (strlen(argv[na]) + 1);
+ if ((size=fsize("config"))>0)
+ current_size += size;
+ x_argvblk=(char*) malloct(current_size+32768);
+ if (x_argvblk == NULL) {
+ HTS_PANIC_PRINTF("Error, not enough memory");
+ htsmain_free();
+ return -1;
+ }
+ x_argvblk[0]='\0';
+ x_ptr=0;
+ }
+
+ /* Create new argc/argv, replace alias, count URLs, treat -h, -q, -i */
+ {
+ char _tmp_argv[2][HTS_CDLMAXSIZE];
+ char* tmp_argv[2];
+ char tmp_error[HTS_CDLMAXSIZE];
+ int tmp_argc;
+ int x_argc=0;
+ int na;
+ tmp_argv[0]=_tmp_argv[0];
+ tmp_argv[1]=_tmp_argv[1];
+ //
+ argv_url=0; /* pour comptage */
+ //
+ cmdl_add(argv[0],x_argc,x_argv,x_argvblk,x_ptr);
+ na=1; /* commencer après nom_prg */
+ while(na<argc) {
+ int result=1;
+ tmp_argv[0][0]=tmp_argv[1][0]='\0';
+
+ /* Vérifier argv[] non vide */
+ if (strnotempty(argv[na])) {
+
+ /* Vérifier Commande (alias) */
+ result=optalias_check(argc,(const char * const *)argv,na,
+ &tmp_argc,(char**)tmp_argv,tmp_error);
+ if (!result) {
+ HTS_PANIC_PRINTF(tmp_error);
+ htsmain_free();
+ return -1;
+ }
+
+ /* Copier */
+ cmdl_add(tmp_argv[0],x_argc,x_argv,x_argvblk,x_ptr);
+ if (tmp_argc > 1) {
+ cmdl_add(tmp_argv[1],x_argc,x_argv,x_argvblk,x_ptr);
+ }
+
+ /* Compter URLs et détecter -i,-q.. */
+ if (tmp_argc == 1) { /* pas -P & co */
+ if (!cmdl_opt(tmp_argv[0])) { /* pas -c0 & co */
+ if (argv_url<0) argv_url=0; // -1==force -> 1=one url already detected, wipe all previous options
+ //if (argv_url>=0) {
+ argv_url++;
+ if (!argv_firsturl)
+ argv_firsturl=x_argv[x_argc-1];
+ //}
+ } else {
+ if (strcmp(tmp_argv[0],"-h")==0) {
+ help(argv[0],!httrack.quiet);
+ htsmain_free();
+ return 0;
+ } else {
+ if (strncmp(tmp_argv[0],"--",2)) { /* pas */
+ if ((strchr(tmp_argv[0],'q')!=NULL))
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ if ((strchr(tmp_argv[0],'i')!=NULL)) { // doit.log!
+ argv_url=-1; /* forcer */
+ httrack.quiet=1;
+ }
+ }
+ }
+ }
+ } else if (tmp_argc == 2) {
+ if ((strcmp(tmp_argv[0],"-%L")==0)) { // liste d'URLs
+ if (argv_url<0) argv_url=0; // -1==force -> 1=one url already detected, wipe all previous options
+ //if (argv_url>=0)
+ argv_url++; /* forcer */
+ }
+ }
+ }
+
+ na+=result;
+ }
+ if (argv_url<0)
+ argv_url=0;
+
+ /* Nouveaux argc et argv */
+ argv=x_argv;
+ argc=x_argc;
+ }
+
+
+
+
+ // Ici on ajoute les arguments de config
+/*
+ if (fexist("config")) { // configuration
+ x_argvblk2=(char*) calloct(32768,1);
+
+ if (x_argvblk2!=NULL) {
+ FILE* fp;
+ int x_argc2;
+
+ //strcpy(x_argvblk2,"httrack ");
+ fp=fopen("config","rb");
+ if (fp) {
+ linput(fp,x_argvblk2+strlen(x_argvblk2),32000);
+ fclose(fp); fp=NULL;
+
+ // calculer arguments selon derniers arguments
+ x_argv2[0]=argv[0];
+ x_argc2=1;
+ {
+ char* p=x_argvblk2;
+ do {
+ x_argv2[x_argc2++]=p;
+ p=strchr(p,' ');
+ if (p) {
+ *p=0; // octet nul (tableau)
+ p++;
+ }
+ } while(p!=NULL);
+ }
+ // recopier arguments actuels (pointeurs uniquement)
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ x_argv2[x_argc2++]=argv[na];
+ }
+ }
+ argc=x_argc2; // nouvel argc
+ argv=x_argv2; // nouvel argv
+ }
+ }
+ }
+*/
+
+
+ // Option O and includerc
+ {
+ int loops=0;
+ while (loops<2) {
+ char* com;
+ int na;
+
+ for(na=1;na<argc;na++) {
+
+ if (argv[na][0]=='"') {
+ char tempo[HTS_CDLMAXSIZE];
+ strcpy(tempo,argv[na]+1);
+ if (tempo[strlen(tempo)-1]!='"') {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Missing quote in %s",argv[na]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ tempo[strlen(tempo)-1]='\0';
+ strcpy(argv[na],tempo);
+ }
+
+ if (cmdl_opt(argv[na])) { // option
+ com=argv[na]+1;
+
+ while(*com) {
+ switch(*com) {
+ case 'O': // output path
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option O needs to be followed by a blank space, and a path (or path,path)");
+ printf("Example: -O /binary/\n");
+ printf("Example: -O /binary/,/log/\n");
+ htsmain_free();
+ return -1;
+ } else {
+ char* a;
+ na++;
+ strcpy(httrack.path_html,"");
+ strcpy(httrack.path_log,"");
+ a=strstr(argv[na],"\",\""); // rechercher en premier, au cas ou -O "c:\pipo,test","c:\test"
+ if (!a)
+ a=strchr(argv[na],','); // 2 path
+ else
+ a++; // position ,
+ if (a) {
+ strncat(httrack.path_html,argv[na],(int) (a-argv[na]));
+ strcat(httrack.path_log,a+1);
+ } else {
+ strcpy(httrack.path_log,argv[na]);
+ strcpy(httrack.path_html,argv[na]);
+ }
+ // Eliminer les cas comme -O "C:\mirror\"
+ if (httrack.path_log[0]=='"') { // Guillemets
+ char tmp[256];
+ strcpy(tmp,httrack.path_log+1);
+ if (tmp[strlen(tmp)-1]=='"')
+ tmp[strlen(tmp)-1]='\0';
+ strcpy(httrack.path_log,tmp);
+ }
+ if (httrack.path_html[0]=='"') {
+ char tmp[256];
+ strcpy(tmp,httrack.path_html+1);
+ if (tmp[strlen(tmp)-1]=='"')
+ tmp[strlen(tmp)-1]='\0';
+ strcpy(httrack.path_html,tmp);
+ }
+ check_path(httrack.path_log,argv_firsturl);
+ if (check_path(httrack.path_html,argv_firsturl)) {
+ httrack.dir_topindex=1; // rebuilt top index
+ }
+
+ //printf("-->%s\n%s\n",httrack.path_html,httrack.path_log);
+
+ }
+ break;
+ } // switch
+ com++;
+ } // while
+
+ } // arg
+
+ } // for
+
+ /* if doit.log exists, or if new URL(s) defined,
+ then DO NOT load standard config files */
+ /* (config files are added in doit.log) */
+#if DEBUG_STEPS
+ printf("Loading httrackrc/doit.log\n");
+#endif
+ /* recreate a doit.log (no old doit.log or new URLs (and parameters)) */
+ if ((strnotempty(httrack.path_log)) || (strnotempty(httrack.path_html)))
+ loops++; // do not loop once again and do not include rc file (O option exists)
+ else {
+ if ( (!fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) || (argv_url>0) ) {
+ if (!optinclude_file(fconcat(httrack.path_log,HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr))
+ if (!optinclude_file(HTS_HTTRACKRC,&argc,argv,x_argvblk,&x_ptr)) {
+ if (!optinclude_file(fconcat(hts_gethome(),"/"HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) {
+#ifdef HTS_HTTRACKCNF
+ optinclude_file(HTS_HTTRACKCNF,&argc,argv,x_argvblk,&x_ptr);
+#endif
+ }
+ }
+ } else
+ loops++; // do not loop once again
+ }
+
+ loops++;
+ } // while
+
+ } // traiter -O
+
+
+
+ /* load doit.log and insert in current command line */
+ if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) {
+ FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb");
+ if (fp) {
+ int insert_after=1; /* insérer après nom au début */
+ //
+ char buff[8192];
+ char *p,*lastp;
+ linput(fp,buff,8000);
+ fclose(fp); fp=NULL;
+ p=buff;
+ do {
+ int insert_after_argc;
+ // read next
+ lastp=p;
+ if (p) {
+ p=next_token(p,1);
+ if (p) {
+ *p=0; // null
+ p++;
+ }
+ }
+
+ /* Insert parameters BUT so that they can be in the same order */
+ if (lastp) {
+ if (strnotempty(lastp)) {
+ insert_after_argc=argc-insert_after;
+ cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr);
+ argc=insert_after_argc+insert_after;
+ insert_after++;
+ }
+ }
+ } while(lastp!=NULL);
+ //fclose(fp);
+ }
+ }
+
+
+ // Existence d'un cache - pas de new mais un old.. renommer
+#if DEBUG_STEPS
+ printf("Checking cache\n");
+#endif
+ if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) {
+ if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) {
+ remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
+ remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ //remove(fconcat(httrack.path_log,"hts-cache/new.lst"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ //rename(fconcat(httrack.path_log,"hts-cache/old.lst"),fconcat(httrack.path_log,"hts-cache/new.lst"));
+ }
+ }
+
+ /* Interrupted mirror detected */
+ if (!httrack.quiet) {
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) {
+ /* Old cache */
+ if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) {
+ if (httrack.log != NULL) {
+ fprintf(httrack.log,"Warning!\n");
+ fprintf(httrack.log,"An aborted mirror has been detected!\nThe current temporary cache is required for any update operation and only contains data downloaded during the last aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files\n");
+ fprintf(httrack.log,"Please restart HTTrack with --continue (-iC1) option to override this message!\n");
+ }
+ exit(0);
+ }
+ }
+ }
+
+ // remplacer "macros" comme --spider
+ // permet de lancer httrack sans a avoir à se rappeler de syntaxes comme p0C0I0Qc32 ..
+#if DEBUG_STEPS
+ printf("Checking last macros\n");
+#endif
+ {
+ int i;
+ for(i=0;i<argc;i++) {
+#if DEBUG_STEPS
+ printf("Checking #%d:\n",argv[i]);
+ printf("%s\n",argv[i]);
+#endif
+ if (argv[i][0]=='-') {
+ if (argv[i][1]=='-') { // --xxx
+ if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer
+ strcpy(argv[i]+1,"");
+ if (fexist(fconcat(httrack.path_log,"hts-log.txt")))
+ remove(fconcat(httrack.path_log,"hts-log.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-err.txt")))
+ remove(fconcat(httrack.path_log,"hts-err.txt"));
+ if (fexist(fconcat(httrack.path_html,"index.html")))
+ remove(fconcat(httrack.path_html,"index.html"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.ndx"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.lst")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.lst"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.lst")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.lst"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.txt")))
+ remove(fconcat(httrack.path_log,"hts-cache/new.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.txt")))
+ remove(fconcat(httrack.path_log,"hts-cache/old.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log")))
+ remove(fconcat(httrack.path_log,"hts-cache/doit.log"));
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock")))
+ remove(fconcat(httrack.path_log,"hts-in_progress.lock"));
+ rmdir(fconcat(httrack.path_log,"hts-cache"));
+ //
+ } else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire!
+ argv_url=1; // forcer a passer les parametres
+ strcpy(argv[i]+1,"#P");
+ //
+ } else if (strfield2(argv[i]+2,"updatehttrack")) {
+#ifdef _WIN32
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"%s not available in this version",argv[i]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+#else
+#if 0
+ char _args[8][256];
+ char *args[8];
+
+ printf("Cheking for updates...\n");
+ strcpy(_args[0],argv[0]);
+ strcpy(_args[1],"--get");
+ sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,"");
+ strcpy(_args[3],"--quickinfo");
+ args[0]=_args[0];
+ args[1]=_args[1];
+ args[2]=_args[2];
+ args[3]=_args[3];
+ args[4]=NULL;
+ if (execvp(args[0],args)==-1) {
+ }
+#endif
+#endif
+ }
+ //
+ else {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"%s not recognized",argv[i]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+
+ }
+ }
+ }
+ }
+
+ // Compter urls/jokers
+ /*
+ if (argv_url<=0) {
+ int na;
+ argv_url=0;
+ for(na=1;na<argc;na++) {
+ if ( (strcmp(argv[na],"-P")==0) || (strcmp(argv[na],"-N")==0) || (strcmp(argv[na],"-F")==0) || (strcmp(argv[na],"-O")==0) || (strcmp(argv[na],"-V")==0) ) {
+ na++; // sauter nom de proxy
+ } else if (!cmdl_opt(argv[na])) {
+ argv_url++; // un de plus
+ } else if (strcmp(argv[na],"-h")==0) {
+ help(argv[0],!httrack.quiet);
+ htsmain_free();
+ return 0;
+ } else {
+ if ((strchr(argv[na],'q')!=NULL))
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ if ((strchr(argv[na],'i')!=NULL)) { // doit.log!
+ argv_url=0;
+ na=argc;
+ }
+ }
+ }
+ }
+ */
+
+ // Ici on ajoute les arguments qui ont été appelés avant au cas où on récupère une session
+ // Exemple: httrack www.truc.fr -L0 puis ^C puis httrack sans URL : ajouter URL précédente
+ /*
+ if (argv_url==0) {
+ //if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent
+
+ x_argvblk=(char*) calloct(32768,1);
+
+ if (x_argvblk!=NULL) {
+ FILE* fp;
+ int x_argc;
+
+ //strcpy(x_argvblk,"httrack ");
+ fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb");
+ if (fp) {
+ linput(fp,x_argvblk+strlen(x_argvblk),8192);
+ fclose(fp); fp=NULL;
+ }
+
+ // calculer arguments selon derniers arguments
+ x_argv[0]=argv[0];
+ x_argc=1;
+ {
+ char* p=x_argvblk;
+ do {
+ x_argv[x_argc++]=p;
+ //p=strstr(p," ");
+ // exemple de chaine: "echo \"test\"" c:\a "\$0"
+ p=next_token(p,1); // prochain token
+ if (p) {
+ *p=0; // octet nul (tableau)
+ p++;
+ }
+ } while(p!=NULL);
+ }
+ // recopier arguments actuels (pointeurs uniquement)
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ if (strcmp(argv[na],"-O") != 0) // SAUF le path!
+ x_argv[x_argc++]=argv[na];
+ else
+ na++;
+ }
+ }
+ argc=x_argc; // nouvel argc
+ argv=x_argv; // nouvel argv
+ }
+
+
+ }
+ //}
+ }
+ */
+
+ // Vérifier quiet
+ /*
+ {
+ int na;
+ for(na=1;na<argc;na++) {
+ if (!cmdl_opt(argv[na])) {
+ if ((strcmp(argv[na],"-P")==0) || (strcmp(argv[na],"-N")==0) || (strcmp(argv[na],"-F")==0) || (strcmp(argv[na],"-O")==0) || (strcmp(argv[na],"-V")==0))
+ na++; // sauter nom de proxy
+ } else {
+ if ((strchr(argv[na],'q')!=NULL) || (strchr(argv[na],'i')!=NULL))
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ }
+ }
+ }
+ */
+
+ // Pas d'URL
+#if DEBUG_STEPS
+ printf("Checking URLs\n");
+#endif
+ if (argv_url==0) {
+ // Présence d'un cache, que faire?..
+ if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent
+ if (x_argvblk!=NULL) {
+ int m;
+ // établir mode - mode cache: 1 (cache valide) 2 (cache à vérifier)
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // cache prioritaire
+ m=1;
+ recuperer=1;
+ } else {
+ m=2;
+ }
+ httrack.cache=m;
+
+ if (httrack.quiet==0) { // sinon on continue automatiquement
+ HT_REQUEST_START;
+ HT_PRINT("A cache (hts-cache/) has been found in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(LF);
+ if (m==1) {
+ HT_PRINT("That means that a transfer has been aborted"LF);
+ HT_PRINT("OK to Continue ");
+ } else {
+ HT_PRINT("That means you can update faster the remote site(s)"LF);
+ HT_PRINT("OK to Update ");
+ }
+ HT_PRINT("httrack "); HT_PRINT(x_argvblk); HT_PRINT("?"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ }
+
+ } else {
+ HTS_PANIC_PRINTF("Error, not enough memory");
+ htsmain_free();
+ return -1;
+ }
+ } else { // log existe pas
+ HTS_PANIC_PRINTF("A cache has been found, but no command line");
+ printf("Please launch httrack with proper parameters to reuse the cache\n");
+ htsmain_free();
+ return -1;
+ }
+
+ } else { // aucune URL définie et pas de cache
+#if HTS_ANALYSTE!=2
+ if (httrack.quiet) {
+#endif
+ help(argv[0],!httrack.quiet);
+ htsmain_free();
+ return -1;
+#if HTS_ANALYSTE!=2
+ } else {
+ help_wizard(&httrack);
+ htsmain_free();
+ return -1;
+ }
+#endif
+ htsmain_free();
+ return 0;
+ }
+ } else { // plus de 2 paramètres
+ // un fichier log existe?
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // fichier lock?
+ //char s[32];
+
+ httrack.cache=1; // cache prioritaire
+ if (httrack.quiet==0) {
+ if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ HT_REQUEST_START;
+ HT_PRINT("There is a lock-file in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(LF"That means that a mirror has not been terminated"LF);
+ HT_PRINT("Be sure you call httrack with proper parameters"LF);
+ HT_PRINT("(The cache allows you to restart faster the transfer)"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ }
+ }
+ } else if (fexist(fconcat(httrack.path_html,"index.html"))) {
+ //char s[32];
+ httrack.cache=2; // cache vient après test de validité
+ if (httrack.quiet==0) {
+ if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ HT_REQUEST_START;
+ HT_PRINT("There is an index.html and a hts-cache folder in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(LF"A site may have been mirrored here, that could mean that you want to update it"LF);
+ HT_PRINT("Be sure parameters are ok"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ } else {
+ HT_REQUEST_START;
+ HT_PRINT("There is an index.html in the directory ");
+ HT_PRINT(httrack.path_log);
+ HT_PRINT(" but no cache"LF);
+ HT_PRINT("There is an index.html in the directory, but no cache"LF);
+ HT_PRINT("A site may have been mirrored here, and erased.."LF);
+ HT_PRINT("Be sure parameters are ok"LF);
+ HT_REQUEST_END;
+ if (!ask_continue()) {
+ htsmain_free();
+ return 0;
+ }
+ }
+ }
+ }
+ }
+
+
+ // Treat parameters
+ // Traiter les paramètres
+#if DEBUG_STEPS
+ printf("Analyze parameters\n");
+#endif
+ {
+ char* com;
+ int na;
+
+ for(na=1;na<argc;na++) {
+
+ if (argv[na][0]=='"') {
+ char tempo[HTS_CDLMAXSIZE];
+ strcpy(tempo,argv[na]+1);
+ if (tempo[strlen(tempo)-1]!='"') {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Missing quote in %s",argv[na]);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ tempo[strlen(tempo)-1]='\0';
+ strcpy(argv[na],tempo);
+ }
+
+ if (cmdl_opt(argv[na])) { // option
+ com=argv[na]+1;
+
+ while(*com) {
+ switch(*com) {
+ case ' ': case 9: case '-': case '\0': break;
+ //
+ case 'h':
+ help(argv[0],0);
+ htsmain_free();
+ return 0; // déja fait normalement
+ //
+ case 'g': // récupérer un (ou plusieurs) fichiers isolés
+ httrack.wizard=2; // le wizard on peut plus s'en passer..
+ //httrack.wizard=0; // pas de wizard
+ httrack.cache=0; // ni de cache
+ httrack.makeindex=0; // ni d'index
+ httrack_logmode=1; // erreurs à l'écran
+ httrack.savename_type=1003; // mettre dans le répertoire courant
+ httrack.depth=0; // ne pas explorer la page
+ httrack.accept_cookie=0; // pas de cookies
+ break;
+ case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions)
+ httrack.travel=0;
+ httrack.seeker=1;
+ break;
+ case 'W': httrack.wizard=1; // Wizard-Help (pose des questions)
+ httrack.travel=0;
+ httrack.seeker=1;
+ break;
+ case 'r': // n'est plus le recurse get bestial mais wizard itou!
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.depth);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.depth=3;
+ break;
+/*
+ case 'r': httrack.wizard=0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.depth);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.depth=3;
+ break;
+*/
+ //
+ // note: les tests httrack.depth sont pour éviter de faire
+ // un miroir du web (:-O) accidentelement ;-)
+ case 'a': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=0+(httrack.travel&256); break;
+ case 'd': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=1+(httrack.travel&256); break;
+ case 'l': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=2+(httrack.travel&256); break;
+ case 'e': /*if (httrack.depth==9999) httrack.depth=3;*/
+ httrack.travel=7+(httrack.travel&256); break;
+ case 't': httrack.travel|=256; break;
+ case 'n': httrack.nearlink=1; break;
+ case 'x': httrack.external=1; break;
+ //
+ case 'U': httrack.seeker=2; break;
+ case 'D': httrack.seeker=1; break;
+ case 'S': httrack.seeker=0; break;
+ case 'B': httrack.seeker=3; break;
+ //
+ case 'Y': httrack.mirror_first_page=1; break;
+ //
+ case 'q': case 'i': httrack.quiet=1; break;
+ //
+ case 'Q': httrack_logmode=0; break;
+ case 'v': httrack_logmode=1; break;
+ case 'f': httrack_logmode=2; if (*(com+1)=='2') httrack_logmode=3; while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ //case 'A': httrack.urlmode=1; break;
+ //case 'R': httrack.urlmode=2; break;
+ case 'K': httrack.urlmode=0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.urlmode);
+ if (httrack.urlmode == 0) { // in fact K0 ==> K2
+ // and K ==> K0
+ httrack.urlmode=2;
+ }
+ while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ //if (*(com+1)=='0') { httrack.urlmode=2; com++; } break;
+ //
+ case 'c':
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.maxsoc);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1
+ } else httrack.maxsoc=8;
+
+ break;
+ //
+ case 'p': sscanf(com+1,"%d",&httrack.getmode); while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ case 'G': sscanf(com+1,LLintP,&httrack.fragment); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'M': sscanf(com+1,LLintP,&httrack.maxsite); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'm': sscanf(com+1,LLintP,&httrack.maxfile_nonhtml); while(isdigit((unsigned char)*(com+1))) com++;
+ if (*(com+1)==',') {
+ com++;
+ sscanf(com+1,LLintP,&httrack.maxfile_html); while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.maxfile_html=-1;
+ break;
+ //
+ case 'T': sscanf(com+1,"%d",&httrack.timeout); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'J': sscanf(com+1,"%d",&httrack.rateout); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'R': sscanf(com+1,"%d",&httrack.retry); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'E': sscanf(com+1,"%d",&httrack.maxtime); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'H': sscanf(com+1,"%d",&httrack.hostcontrol); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'A': sscanf(com+1,"%d",&httrack.maxrate); while(isdigit((unsigned char)*(com+1))) com++; break;
+
+ case 'j': httrack.parsejava=1; if (*(com+1)=='0') { httrack.parsejava=0; com++; } break;
+ //
+ case 'I': httrack.makeindex=1; if (*(com+1)=='0') { httrack.makeindex=0; com++; } break;
+ //
+ case 'X': httrack.delete_old=1; if (*(com+1)=='0') { httrack.delete_old=0; com++; } break;
+ //
+ case 'b': sscanf(com+1,"%d",&httrack.accept_cookie); while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ case 'N':
+ if (strcmp(argv[na],"-N")==0) { // Tout seul
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) { // erreur
+ HTS_PANIC_PRINTF("Option N needs a number, or needs to be followed by a blank space, and a string");
+ printf("Example: -N4\n");
+ htsmain_free();
+ return -1;
+ } else {
+ na++;
+ if (strlen(argv[na])>=127) {
+ HTS_PANIC_PRINTF("Userdef structure string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.savename_userdef,argv[na]);
+ if (strnotempty(httrack.savename_userdef))
+ httrack.savename_type = -1; // userdef!
+ else
+ httrack.savename_type = 0; // -N "" : par défaut
+ }
+ } else {
+ sscanf(com+1,"%d",&httrack.savename_type); while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ break;
+ case 'L':
+ {
+ sscanf(com+1,"%d",&httrack.savename_83);
+ switch(httrack.savename_83) {
+ case 0:
+ httrack.savename_83=1;
+ break;
+ case 1:
+ httrack.savename_83=0;
+ break;
+ default:
+ httrack.savename_83=2;
+ break;
+ }
+ while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ break;
+ case 's':
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.robots);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.robots=1;
+#if DEBUG_ROBOTS
+ printf("robots.txt mode set to %d\n",httrack.robots);
+#endif
+ break;
+ case 'o': sscanf(com+1,"%d",&httrack.errpage); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'u': sscanf(com+1,"%d",&httrack.check_type); while(isdigit((unsigned char)*(com+1))) com++; break;
+ //
+ case 'C':
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&httrack.cache);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else httrack.cache=1;
+ break;
+ case 'k': httrack.all_in_cache=1; break;
+ //
+ case 'z': httrack.debug=1; break; // petit debug
+ case 'Z': httrack.debug=2; break; // GROS debug
+ //
+ case '&': case '%': { // deuxième jeu d'options
+ com++;
+ switch(*com) {
+ case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files
+ case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files
+ case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; }
+ break; // Keyword Index
+ case 'c': sscanf(com+1,"%d",&httrack.maxconn); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'e': sscanf(com+1,"%d",&httrack.extdepth); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'B': httrack.tolerant=1; if (*(com+1)=='0') { httrack.tolerant=0; com++; } break; // HTTP/1.0 notamment
+ case 'h': httrack.http10=1; if (*(com+1)=='0') { httrack.http10=0; com++; } break; // HTTP/1.0
+ case 'z': httrack.nocompression=1; if (*(com+1)=='0') { httrack.nocompression=0; com++; } break; // pas de compression
+ case 'f': httrack.ftp_proxy=1; if (*(com+1)=='0') { httrack.ftp_proxy=0; com++; } break; // proxy http pour ftp
+ case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser
+ case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement
+ case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length
+ case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break;
+
+ // preserve: no footer, original links
+ case 'p':
+ httrack.footer[0]='\0';
+ httrack.urlmode=4;
+ break;
+ case 'L': // URL list
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %L needs to be followed by a blank space, and a text filename");
+ printf("Example: -%%L \"mylist.txt\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("File list string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.filelist,argv[na]);
+ }
+ break;
+ case 'A': // assume
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %A needs to be followed by a blank space, and a filesystemtype=mimetype/mimesubtype parameters");
+ printf("Example: -%%A php3=text/html,asp=text/html\n");
+ htsmain_free();
+ return -1;
+ } else{
+ char* a;
+ na++;
+ if ( (strlen(argv[na]) + strlen(httrack.mimedefs) + 4) >= sizeof(httrack.mimedefs)) {
+ HTS_PANIC_PRINTF("Mime definition string too long");
+ htsmain_free();
+ return -1;
+ }
+ // --assume standard
+ if (strcmp(argv[na],"standard") == 0) {
+ strcpy(httrack.mimedefs,"\n");
+ strcat(httrack.mimedefs,HTS_ASSUME_STANDARD);
+ strcat(httrack.mimedefs,"\n");
+ } else {
+ strcat(httrack.mimedefs,argv[na]);
+ strcat(httrack.mimedefs,"\n");
+ }
+ a=httrack.mimedefs;
+ while(*a) {
+ switch(*a) {
+ case ',': case ' ': case '\r': case ';': case '\t':
+ *a='\n';
+ break;
+ }
+ a++;
+ }
+ }
+ break;
+ //
+ case 'l':
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %l needs to be followed by a blank space, and an ISO language code");
+ printf("Example: -%%l \"en\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=62) {
+ HTS_PANIC_PRINTF("Lang list string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.lang_iso,argv[na]);
+ }
+ break;
+ //
+ case 'F': // footer id
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %F needs to be followed by a blank space, and a footer string");
+ printf("Example: -%%F \"<!-- Mirrored from %%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %%s -->\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("Footer string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.footer,argv[na]);
+ }
+ break;
+ case 'H': // debug headers
+ _DEBUG_HEAD=1;
+ break;
+ case 'O':
+#if HTS_WIN
+ printf("Warning option -%%O has no effect in this system (chroot)\n");
+#else
+ switch_chroot=1;
+#endif
+ break;
+ case 'U': // setuid
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %U needs to be followed by a blank space, and a username");
+ printf("Example: -%%U smith\n");
+ htsmain_free();
+ return -1;
+ } else {
+ na++;
+#if HTS_WIN
+ printf("Warning option -%%U has no effect on this system (setuid)\n");
+#else
+#ifndef HTS_DO_NOT_USE_UID
+ /* Change the user id and gid */
+ {
+ struct passwd* userdef=getpwnam((const char*)argv[na]);
+ if (userdef) { /* we'll have to switch the user id */
+ switch_gid=userdef->pw_gid;
+ switch_uid=userdef->pw_uid;
+ }
+ }
+#else
+ printf("Warning option -%%U has no effect with this compiled version (setuid)\n");
+#endif
+#endif
+ }
+ break;
+
+ default: {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"invalid option %%%c\n",*com);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ break;
+
+ }
+ }
+ break;
+ //
+ case '@': { // troisième jeu d'options
+ com++;
+ switch(*com) {
+ case 'i':
+#if HTS_INET6==0
+ printf("Warning, option @i has no effect (v6 routines not compiled)\n");
+#else
+ {
+ int res=0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&res); while(isdigit((unsigned char)*(com+1))) com++;
+ }
+ switch(res) {
+ case 1:
+ case 4:
+ IPV6_resolver=1;
+ break;
+ case 2:
+ case 6:
+ IPV6_resolver=2;
+ break;
+ case 0:
+ IPV6_resolver=0;
+ break;
+ default:
+ printf("Unknown flag @i%d\n", res);
+ htsmain_free();
+ return -1;
+ break;
+ }
+ }
+#endif
+ break;
+
+ default: {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"invalid option %%%c\n",*com);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ break;
+
+ //case 's': httrack.sslengine=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.sslengine); while(isdigit((unsigned char)*(com+1))) com++; } break;
+ }
+ }
+ break;
+
+ //
+ case '#': { // non documenté (appel de l'interface)
+ com++;
+ switch(*com) {
+ case 'f': httrack.flush=1; break;
+ case 'h':
+ printf("HTTrack version "HTTRACK_VERSION"\n");
+ exit(1);
+ break;
+ case 'p': httrack.aff_progress=1; break;
+ case 'S': httrack.shell=1; break; // stdin sur un shell
+ case 'K': httrack.keyboard=1; break; // vérifier stdin
+ //
+ case 'L': sscanf(com+1,"%d",&httrack.maxlink); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'F': sscanf(com+1,"%d",&httrack.maxfilter); while(isdigit((unsigned char)*(com+1))) com++; break;
+ case 'Z': httrack.makestat=1; break;
+ case 'T': httrack.maketrack=1; break;
+ case 'u': sscanf(com+1,"%d",&httrack.waittime); while(isdigit((unsigned char)*(com+1))) com++; break;
+
+ case 'R': // ohh ftp, catch->ftpget
+ HTS_PANIC_PRINTF("Unexpected internal error with -#R command");
+ htsmain_free();
+ return -1;
+ break;
+ case 'P': { // catchurl
+ help_catchurl(httrack.path_log);
+ htsmain_free();
+ return 0;
+ }
+ break;
+
+ case '0': /* test #0 : filters */
+ if (na+2>=argc) {
+ HTS_PANIC_PRINTF("Option #0 needs to be followed by a filter string and a string");
+ printf("Example: '-#0' '*.gif' 'foo.gif'\n");
+ htsmain_free();
+ return -1;
+ } else {
+ if (strjoker(argv[na+2],argv[na+1],NULL,NULL))
+ printf("%s does match %s\n",argv[na+2],argv[na+1]);
+ else
+ printf("%s does NOT match %s\n",argv[na+2],argv[na+1]);
+ htsmain_free();
+ return 0;
+ }
+ break;
+ case '!':
+ if (na+1>=argc) {
+ HTS_PANIC_PRINTF("Option #! needs to be followed by a commandline");
+ printf("Example: '-#!' 'echo hello'\n");
+ htsmain_free();
+ return -1;
+ } else {
+ system(argv[na+1]);
+ }
+ break;
+
+ default: printf("Internal option %c not recognized\n",*com); break;
+ }
+ }
+ break;
+ case 'O': // output path
+ na++; // sauter, déja traité
+ break;
+ case 'P': // proxy
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option P needs to be followed by a blank space, and a proxy proxy:port or user:id@proxy:port");
+ printf("Example: -P proxy.myhost.com:8080\n");
+ htsmain_free();
+ return -1;
+ } else {
+ char* a;
+ na++;
+ httrack.proxy.active=1;
+ // Rechercher MAIS en partant de la fin à cause de user:pass@proxy:port
+ a = argv[na] + strlen(argv[na]) -1;
+ // a=strstr(argv[na],":"); // port
+ while( (a > argv[na]) && (*a != ':') && (*a != '@') ) a--;
+ if (*a == ':') { // un port est présent, <proxy>:port
+ sscanf(a+1,"%d",&httrack.proxy.port);
+ httrack.proxy.name[0]='\0';
+ strncat(httrack.proxy.name,argv[na],(int) (a - argv[na]));
+ } else { // <proxy>
+ httrack.proxy.port=8080;
+ strcpy(httrack.proxy.name,argv[na]);
+ }
+ }
+ break;
+ case 'F': // user-agent field
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option F needs to be followed by a blank space, and a user-agent name");
+ printf("Example: -F \"my_user_agent/1.0\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=126) {
+ HTS_PANIC_PRINTF("User-agent length too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.user_agent,argv[na]);
+ if (strnotempty(httrack.user_agent))
+ httrack.user_agent_send=1;
+ else
+ httrack.user_agent_send=0; // -F "" désactive l'option
+ }
+ break;
+ //
+ case 'V': // execute command
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option V needs to be followed by a system-command string");
+ printf("Example: -V \"tar uvf some.tar \\$0\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=2048) {
+ HTS_PANIC_PRINTF("System-command length too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpy(httrack.sys_com,argv[na]);
+ if (strnotempty(httrack.sys_com))
+ httrack.sys_com_exec=1;
+ else
+ httrack.sys_com_exec=0; // -V "" désactive l'option
+ }
+ break;
+ //
+ default: {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"invalid option %c\n",*com);
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+ break;
+ } // switch
+ com++;
+ } // while
+
+ } else { // URL/filters
+ char tempo[1024];
+ if (strnotempty(url)) strcat(url," "); // espace de séparation
+ strcpy(tempo,unescape_http_unharm(argv[na],1));
+ escape_spc_url(tempo);
+ strcat(url,tempo);
+ } // if argv=- etc.
+
+ } // for
+ }
+
+#if BDEBUG==3
+ printf("URLs/filters=%s\n",url);
+#endif
+
+#if DEBUG_STEPS
+ printf("Analyzing parameters done\n");
+#endif
+
+
+#if HTS_WIN
+#else
+#ifndef HTS_DO_NOT_USE_UID
+ /* Chroot - xxc */
+ if (switch_chroot) {
+ uid_t userid=getuid();
+ //struct passwd* userdef=getpwuid(userid);
+ //if (userdef) {
+ if (!userid) {
+ //if (strcmp(userdef->pw_name,"root")==0) {
+ char rpath[1024];
+ //printf("html=%s log=%s\n",httrack.path_html,httrack.path_log); // xxc
+ if ((httrack.path_html[0]) && (httrack.path_log[0])) {
+ char *a=httrack.path_html,*b=httrack.path_log,*c=NULL,*d=NULL;
+ c=a; d=b;
+ while ((*a) && (*a == *b)) {
+ if (*a=='/') { c=a; d=b; }
+ a++;
+ b++;
+ }
+
+ rpath[0]='\0';
+ if (c != httrack.path_html) {
+ if (httrack.path_html[0]!='/')
+ strcat(rpath,"./");
+ strncat(rpath,httrack.path_html,(int) (c - httrack.path_html));
+ }
+ {
+ char tmp[1024];
+ strcpy(tmp,c); strcpy(httrack.path_html,tmp);
+ strcpy(tmp,d); strcpy(httrack.path_log,tmp);
+ }
+ } else {
+ strcpy(rpath,"./");
+ strcpy(httrack.path_html,"/");
+ strcpy(httrack.path_log,"/");
+ }
+ if (rpath[0]) {
+ printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log);
+ if (chroot(rpath)) {
+ printf("ERROR! Can not chroot to %s!\n",rpath);
+ exit(0);
+ }
+ if (chdir("/")) { /* new root */
+ printf("ERROR! Can not chdir to %s!\n",rpath);
+ exit(0);
+ }
+ } else
+ printf("WARNING: chroot not possible with these paths\n");
+ }
+ //}
+ }
+
+ /* Setuid */
+ if (switch_uid>=0) {
+ printf("[setting user/group to %d/%d]\n",switch_uid,switch_gid);
+ if (setgid(switch_gid))
+ printf("WARNING! Can not setgid to %d!\n",switch_gid);
+ if (setuid(switch_uid))
+ printf("WARNING! Can not setuid to %d!\n",switch_uid);
+ }
+
+ /* Final check */
+ {
+ uid_t userid=getuid();
+ if (!userid) { /* running as r00t */
+ printf("WARNING! You are running this program as root!\n");
+ printf("It might be a good idea to use the -%%U option to change the userid:\n");
+ printf("Example: -%%U smith\n\n");
+ }
+ }
+#endif
+#endif
+
+ //printf("WARNING! This is *only* a beta-release of HTTrack\n");
+ io_flush;
+
+#if DEBUG_STEPS
+ printf("Cache & log settings\n");
+#endif
+
+ // on utilise le cache..
+ // en cas de présence des deux versions, garder la version la plus avancée,
+ // cad la version contenant le plus de fichiers
+ if (httrack.cache) {
+ if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // problemes..
+ if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) {
+ if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) {
+ // switcher si new<32Ko et old>65Ko (tailles arbitraires) ?
+ // ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre
+ // alors l'ancien cache
+ if (fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))<32768) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat"))>65536) {
+ if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat")) > fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))) {
+ remove(fconcat(httrack.path_log,"hts-cache/new.dat"));
+ remove(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat"));
+ rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ //} else { // ne rien faire
+ // remove("hts-cache/old.dat");
+ // remove("hts-cache/old.ndx");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Débuggage des en têtes
+ if (_DEBUG_HEAD) {
+ ioinfo=fopen(fconcat(httrack.path_log,"hts-ioinfo.txt"),"wb");
+ }
+
+ {
+ char n_lock[256];
+ // on peut pas avoir un affichage ET un fichier log
+ // ca sera pour la version 2
+ if (httrack_logmode==1) {
+ httrack.log=stdout;
+ httrack.errlog=stderr;
+ } else if (httrack_logmode>=2) {
+ // deux fichiers log
+ structcheck(httrack.path_log);
+ if (fexist(fconcat(httrack.path_log,"hts-log.txt")))
+ remove(fconcat(httrack.path_log,"hts-log.txt"));
+ if (fexist(fconcat(httrack.path_log,"hts-err.txt")))
+ remove(fconcat(httrack.path_log,"hts-err.txt"));
+
+ httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w");
+ if (httrack_logmode==2)
+ httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w");
+ else
+ httrack.errlog=httrack.log;
+ if (httrack.log==NULL) {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-log.txt"));
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ } else if (httrack.errlog==NULL) {
+ char s[HTS_CDLMAXSIZE];
+ sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-err.txt"));
+ HTS_PANIC_PRINTF(s);
+ htsmain_free();
+ return -1;
+ }
+
+ } else {
+ httrack.log=NULL;
+ httrack.errlog=NULL;
+ }
+
+ // un petit lock-file pour indiquer un miroir en cours, ainsi qu'un éventuel fichier log
+ {
+ FILE* fp=NULL;
+ //int n=0;
+ char t[256];
+ time_local_rfc822(t); // faut bien que ca serve quelque part l'heure RFC1945 arf'
+
+ /* readme for information purpose */
+ {
+ FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/readme.txt"),"wb");
+ if (fp) {
+ fprintf(fp,"What's in this folder?"LF);
+ fprintf(fp,""LF);
+ fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION""LF);
+ fprintf(fp,"and is used for updating this website."LF);
+ fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF);
+ fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF);
+ fprintf(fp,"(you can safely delete old.dat, old.ndx and old.lst files, however)"LF);
+ fprintf(fp,""LF);
+ fprintf(fp,HTS_LOG_SECURITY_WARNING);
+ fclose(fp);
+ }
+ }
+
+ sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"));
+ //sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n);
+ /*do {
+ if (!n)
+ sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n);
+ else
+ sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress%d.lock"),n);
+ n++;
+ } while((fexist(n_lock)) && httrack.quiet);
+ if (fexist(n_lock)) {
+ if (!recuperer) {
+ remove(n_lock);
+ }
+ }*/
+
+ // vérifier existence de la structure
+ structcheck(httrack.path_html);
+ structcheck(httrack.path_log);
+
+ // reprise/update
+ if (httrack.cache) {
+ FILE* fp;
+ int i;
+#if HTS_WIN
+ mkdir(fconcat(httrack.path_log,"hts-cache"));
+#else
+ mkdir(fconcat(httrack.path_log,"hts-cache"),HTS_PROTECT_FOLDER);
+#endif
+ fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"wb");
+ if (fp) {
+ for(i=0+1;i<argc;i++) {
+ if ( ((strchr(argv[i],' ')!=NULL) || (strchr(argv[i],'"')!=NULL) || (strchr(argv[i],'\\')!=NULL)) && (argv[i][0]!='"') ) {
+ int j;
+ fprintf(fp,"\"");
+ for(j=0;j<(int) strlen(argv[i]);j++) {
+ if (argv[i][j]==34)
+ fprintf(fp,"\\\"");
+ else if (argv[i][j]=='\\')
+ fprintf(fp,"\\\\");
+ else
+ fprintf(fp,"%c",argv[i][j]);
+ }
+ fprintf(fp,"\"");
+ } else if (strnotempty(argv[i])==0) { // ""
+ fprintf(fp,"\"\"");
+ } else { // non critique
+ fprintf(fp,"%s",argv[i]);
+ }
+ if (i<argc-1)
+ fprintf(fp," ");
+ }
+ fprintf(fp,LF);
+ fprintf(fp,"File generated automatically on %s, do NOT edit"LF,t);
+ fprintf(fp,LF);
+ fprintf(fp,"To update a mirror, just launch httrack without any parameters"LF);
+ fprintf(fp,"The existing cache will be used (and modified)"LF);
+ fprintf(fp,"To have other options, retype all parameters and launch HTTrack"LF);
+ fprintf(fp,"To continue an interrupted mirror, just launch httrack without any parameters"LF);
+ fprintf(fp,LF);
+ fclose(fp); fp=NULL;
+ //} else if (httrack.debug>1) {
+ // printf("! FileOpen error, \"%s\"\n",strerror(errno));
+ }
+ }
+
+ // petit message dans le lock
+ if ( (fp=fopen(n_lock,"wb"))!=NULL) {
+ int i;
+ fprintf(fp,"Mirror in progress since %s .. please wait!"LF,t);
+ for(i=0;i<argc;i++) {
+ if (strchr(argv[i],' ')==NULL)
+ fprintf(fp,"%s ",argv[i]);
+ else // entre ""
+ fprintf(fp,"\"%s\" ",argv[i]);
+ }
+ fprintf(fp,LF);
+ fprintf(fp, "To pause the engine: create an empty file named 'hts-stop.lock'"LF);
+ fclose(fp); fp=NULL;
+ }
+
+ // fichier log
+ if (httrack.log) {
+ int i;
+ fprintf(httrack.log,"HTTrack"HTTRACK_VERSION" launched on %s at %s"LF,t,url);
+ fprintf(httrack.log,"(");
+ for(i=0;i<argc;i++) {
+ if ((strchr(argv[i],' ')==NULL) || (strchr(argv[i],'\"')))
+ fprintf(httrack.log,"%s ",argv[i]);
+ else // entre "" (si espace(s) et pas déja de ")
+ fprintf(httrack.log,"\"%s\" ",argv[i]);
+ }
+ fprintf(httrack.log,")"LF);
+ fprintf(httrack.log,LF);
+ fprintf(httrack.log,"Information, Warnings and Errors reported for this mirror:"LF);
+ fprintf(httrack.log,HTS_LOG_SECURITY_WARNING );
+ fprintf(httrack.log,LF);
+ }
+
+ if (httrack_logmode) {
+ printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS""LF,t);
+ if (httrack.wizard==0) {
+ printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode);
+ } else { // the magic wizard
+ printf("mirroring %s with the wizard help..\n",url);
+ }
+ }
+ }
+
+ io_flush;
+
+ /* Info for wrappers */
+ if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"engine: init"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_init();
+#endif
+
+ // détourner SIGHUP etc.
+#if HTS_WIN
+ signal( SIGINT , sig_ask ); // ^C
+ signal( SIGTERM , sig_finish ); // kill <process>
+#else
+ signal( SIGHUP , sig_back ); // close window
+ signal( SIGTSTP , sig_back ); // ^Z
+ signal( SIGTERM , sig_finish ); // kill <process>
+ signal( SIGINT , sig_ask ); // ^C
+ signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket)
+/*
+deprecated - see SIGCHLD
+#ifndef HTS_DO_NOT_SIGCLD
+ signal( SIGCLD , sig_ignore ); // child change status
+#endif
+*/
+ signal( SIGCHLD , sig_ignore ); // child change status
+#endif
+#if DEBUG_STEPS
+ printf("Launching the mirror\n");
+#endif
+
+
+ // Lancement du miroir
+ // ------------------------------------------------------------
+ if (httpmirror(url, &httrack)==0) {
+ printf("Error during operation (see log file), site has not been successfully mirrored\n");
+ } else {
+ if (httrack.shell) {
+ HTT_REQUEST_START;
+ HT_PRINT("TRANSFER DONE"LF);
+ HTT_REQUEST_END
+ } else {
+ printf("Done.\n");
+ }
+ }
+ // ------------------------------------------------------------
+
+ //
+ // Build top index
+ if (httrack.dir_topindex) {
+ char rpath[1024*2];
+ char* a;
+ strcpy(rpath,httrack.path_html);
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]=='/')
+ rpath[strlen(rpath)-1]='\0';
+ }
+ a=strrchr(rpath,'/');
+ if (a) {
+ *a='\0';
+ hts_buildtopindex(rpath,httrack.path_bin);
+ if (httrack.log) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF);
+ }
+ }
+ }
+
+ if (exit_xh ==1) {
+ if (httrack.log) {
+ fprintf(httrack.log,"* * MIRROR ABORTED! * *\nThe current temporary cache is required for any update operation and only contains data downloaded during the present aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files]\n");
+ }
+ }
+
+ /* Info for wrappers */
+ if ( (httrack.debug>0) && (httrack.log!=NULL) ) {
+ fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_uninit();
+#endif
+
+ if (httrack_logmode!=1) {
+ if (httrack.errlog == httrack.log) httrack.errlog=NULL;
+ if (httrack.log) { fclose(httrack.log); httrack.log=NULL; }
+ if (httrack.errlog) { fclose(httrack.errlog); httrack.errlog=NULL; }
+ }
+
+ // Débuggage des en têtes
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fclose(ioinfo);
+ }
+ }
+
+ // supprimer lock
+ remove(n_lock);
+ }
+
+ if (x_argvblk)
+ freet(x_argvblk);
+
+#if HTS_WIN
+#if HTS_ANALYSTE!=2
+// WSACleanup(); // ** non en cas de thread tjs présent!..
+#endif
+#endif
+#if HTS_TRACE_MALLOC
+ hts_freeall();
+#endif
+
+ printf("Thanks for using HTTrack!\n");
+ io_flush;
+ htsmain_free();
+ return 0; // OK
+}
+
+
+// main() subroutines
+
+// vérifier chemin path
+int check_path(char* s,char* defaultname) {
+ int i;
+ int return_value=0;
+
+ // Replace name: ~/mywebsites/# -> /home/foo/mywebsites/#
+ expand_home(s);
+ for(i=0;i<(int) strlen(s);i++) // conversion \ -> /
+ if (s[i]=='\\')
+ s[i]='/';
+
+ // remove ending /
+ if (strnotempty(s))
+ if (s[strlen(s)-1]=='/')
+ s[strlen(s)-1]='\0';
+
+ // Replace name: /home/foo/mywebsites/# -> /home/foo/mywebsites/wonderfulsite
+ if (strnotempty(s)) {
+ if (s[(i=strlen(s))-1]=='#') {
+ if (strnotempty((defaultname?defaultname:""))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=strchr(defaultname,'#'); // we never know..
+ if (a) *a='\0';
+ tempo[0]='\0';
+ strncat(tempo,s,i-1);
+ strcat(tempo,defaultname);
+ strcpy(s,tempo);
+ } else
+ s[0]='\0'; // Clear path (no name/default url given)
+ return_value=1; // expanded
+ }
+ }
+
+ // ending /
+ if (strnotempty(s))
+ if (s[strlen(s)-1]!='/') // ajouter slash à la fin
+ strcat(s,"/");
+
+ return return_value;
+}
+
+// détermine si l'argument est une option
+int cmdl_opt(char* s) {
+ if (s[0]=='-') { // c'est peut être une option
+ if (strchr(s,'.')!=NULL)
+ return 0; // sans doute un -www.truc.fr (note: -www n'est pas compris)
+ else if (strchr(s,'/')!=NULL)
+ return 0; // idem, -*cgi-bin/
+ else if (strchr(s,'*')!=NULL)
+ return 0; // joker, idem
+ else
+ return 1;
+ } else return 0;
+}
+
diff --git a/src/htscoremain.h b/src/htscoremain.h
new file mode 100644
index 0000000..0775492
--- /dev/null
+++ b/src/htscoremain.h
@@ -0,0 +1,62 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* main routine (first called) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSMAINHSR_DEFH
+#define HTSMAINHSR_DEFH
+
+// --assume standard
+#define HTS_ASSUME_STANDARD \
+ "php2,php3,php4,php,cgi,asp,jsp,pl,cfm=text/html"
+
+#include "htsglobal.h"
+
+// Main, récupère les paramètres et appelle le robot
+#if HTS_ANALYSTE
+int hts_main(int argc, char **argv);
+#else
+int main(int argc, char **argv);
+#endif
+
+int cmdl_opt(char* s);
+int check_path(char* s,char* defaultname);
+
+
+
+
+#endif
diff --git a/src/htsdefines.h b/src/htsdefines.h
new file mode 100644
index 0000000..223fae1
--- /dev/null
+++ b/src/htsdefines.h
@@ -0,0 +1,100 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Some defines for httrack.c and others */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .h
+#ifndef HTS_DEFINES_DEFH
+#define HTS_DEFINES_DEFH
+
+typedef void (* t_hts_htmlcheck_init)(void);
+typedef void (* t_hts_htmlcheck_uninit)(void);
+typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_end)(void);
+typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+typedef char* (* t_hts_htmlcheck_query)(char* question);
+typedef char* (* t_hts_htmlcheck_query2)(char* question);
+typedef char* (* t_hts_htmlcheck_query3)(char* question);
+typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);
+typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
+typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
+typedef void (* t_hts_htmlcheck_filesave)(char* file);
+typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
+typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
+typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+
+// demande d'interaction avec le shell
+#if HTS_ANALYSTE
+extern char HTbuff[2048];
+extern t_hts_htmlcheck_init hts_htmlcheck_init;
+extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+extern t_hts_htmlcheck_start hts_htmlcheck_start;
+extern t_hts_htmlcheck_end hts_htmlcheck_end;
+extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck hts_htmlcheck;
+extern t_hts_htmlcheck_query hts_htmlcheck_query;
+extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+extern t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+extern t_hts_htmlcheck_loop hts_htmlcheck_loop;
+extern t_hts_htmlcheck_check hts_htmlcheck_check;
+extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
+extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
+extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
+extern t_hts_htmlcheck_savename hts_htmlcheck_savename;
+#endif
+
+#if HTS_ANALYSTE==2
+#define HT_PRINT(A) strcat(HTbuff,A);
+#define HT_REQUEST_START HTbuff[0]='\0';
+#define HT_REQUEST_END
+#define HTT_REQUEST_START HTbuff[0]='\0';
+#define HTT_REQUEST_END
+#define HTS_REQUEST_START HTbuff[0]='\0';
+#define HTS_REQUEST_END
+#define HTS_PANIC_PRINTF(S) strcpy(_hts_errmsg,S);
+#else
+#define HT_PRINT(A) printf("%s",A);
+#define HT_REQUEST_START /*printf("§\n");*/
+#define HT_REQUEST_END /*printf("§\n");*/
+#define HTT_REQUEST_START /*if (httrack.shell) printf("§\n");*/
+#define HTT_REQUEST_END /*if (httrack.shell) printf("§\n");*/
+#define HTS_REQUEST_START if (opt->shell) { HT_REQUEST_START }
+#define HTS_REQUEST_END if (opt->shell) { HT_REQUEST_END }
+#define HTS_PANIC_PRINTF(S) printf("%s\n",S);
+#endif
+
+#endif
+
diff --git a/src/htsfilters.c b/src/htsfilters.c
new file mode 100644
index 0000000..ed0dee4
--- /dev/null
+++ b/src/htsfilters.c
@@ -0,0 +1,316 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* filters ("regexp") */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+// *.gif match all gif files
+// *[file]/*[file].exe match all exe files with one folder structure
+// *[A-Z,a-z,0-9,/,?] match letters, nums, / and ?
+// *[A-Z,a-z,0-9,/,?]
+
+// *[>10,<100].gif match all gif files larger than 10KB and smaller than 100KB
+// *[file,>10,<100].gif FORBIDDEN: you must not mix size test and pattern test
+
+#include "htsfilters.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* END specific definitions */
+
+// à partir d'un tableau de {"+*.toto","-*.zip","+*.tata"} définit si nom est autorisé
+// optionnel: taille à contrôller (ou numéro, etc) en pointeur
+// (en de détection de *size, la taille limite est écrite par dessus *size)
+// exemple: +-*.gif*[<5] == supprimer GIF si <5KB
+int fa_strjoker(char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth) {
+ int verdict = 0; // on sait pas
+ int i;
+ LLint sizelimit=0;
+ if (size)
+ sizelimit=*size;
+ for(i=0;i<nfil;i++) {
+ LLint sz;
+ if (size)
+ sz=*size;
+ if (strjoker(nom,filters[i] + 1,&sz,size_flag)) { // reconnu
+ if (size)
+ if (sz != *size)
+ sizelimit=sz;
+ if (filters[i][0]=='+')
+ verdict = 1; // autorisé
+ else
+ verdict = -1; // interdit
+ if (depth)
+ *depth=i;
+ }
+ }
+ if (size)
+ *size=sizelimit;
+ return verdict;
+}
+
+
+// supercomparateur joker (tm)
+// compare a et b (b=avec joker dedans), case insensitive [voir CI]
+// renvoi l'adresse de la première lettre de la chaine
+// (càd *[..]toto.. renvoi adresse de toto dans la chaine)
+// accepte les délires du genre www.*.*/ * / * truc*.*
+// cet algo est 'un peu' récursif mais ne consomme pas trop de tm
+// * = toute lettre
+// --?-- : spécifique à HTTrack et aux ?
+HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag) {
+ int err=0;
+ if (strnotempty(joker)==0) { // fin de chaine joker
+ if (strnotempty(chaine)==0) // fin aussi pour la chaine: ok
+ return chaine;
+ else if (chaine[0]=='?')
+ return chaine; // --?-- pour les index.html?Choix=2
+ else
+ return NULL; // non trouvé
+ }
+
+ // on va progresser en suivant les 'mots' contenus dans le joker
+ // un mot peut être un * ou bien toute autre séquence de lettres
+
+ if (strcmp(joker,"*")==0) { // ok, rien après
+ return chaine;
+ }
+
+ // 1er cas: jokers * ou jokers multiples *[..]
+ if (joker[0]=='*') { // comparer joker+reste (*toto/..)
+ int jmp; // nombre de caractères pour le prochain mot dans joker
+ int cut = 0; // interdire tout caractère superflu
+ char pass[256];
+ char LEFT='[',RIGHT=']';
+ int unique=0;
+
+ switch(joker[1]) {
+ case '[':
+ LEFT='[';
+ RIGHT=']';
+ unique=0;
+ break;
+ case '(':
+ LEFT='(';
+ RIGHT=')';
+ unique=1;
+ break;
+ }
+
+ if ((joker[1]==LEFT) && (joker[2]!=LEFT)) { // multijoker (tm)
+ int i;
+ for(i=0;i<256;i++) pass[i]=0;
+
+ // noms réservés
+ if ((strfield(joker+2,"file")) || (strfield(joker+2,"name"))) {
+ for(i=0;i<256;i++) pass[i]=1;
+ pass[(int) '?'] = 0;
+ //pass[(int) ';'] = 0;
+ pass[(int) '/'] = 0;
+ i=2;
+ { int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) i++;
+ }
+ } else if (strfield(joker+2,"path")) {
+ for(i=0;i<256;i++) pass[i]=1;
+ pass[(int) '?'] = 0;
+ //pass[(int) ';'] = 0;
+ i=2;
+ { int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) i++;
+ }
+ } else if (strfield(joker+2,"param")) {
+ if (chaine[0]=='?') { // il y a un paramètre juste là
+ for(i=0;i<256;i++) pass[i]=1;
+ } // sinon synonyme de 'rien'
+ i=2;
+ { int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) i++;
+ }
+ } else {
+ // décode les directives comme *[A-Z,âêîôû,0-9]
+ i=2;
+ if (joker[i] == RIGHT) { // *[] signifie "plus rien après"
+ cut = 1; // caractère supplémentaire interdit
+ } else {
+ int len=(int) strlen(joker);
+ while ((joker[i]!=RIGHT) && (joker[i]) && (i<len)) {
+ if ( (joker[i]=='<') || (joker[i]=='>') ) { // *[<10]
+ int lsize=0;
+ int lverdict;
+ i++;
+ if (sscanf(joker+i,"%d",&lsize) == 1) {
+ if (size) {
+ if (*size>=0) {
+ if (size_flag)
+ *size_flag=1; /* a joué */
+ if (joker[i-1]=='<')
+ lverdict=(*size<lsize);
+ else
+ lverdict=(*size>lsize);
+ if (!lverdict) {
+ return NULL; // ne correspond pas
+ } else {
+ *size=lsize;
+ return chaine; // ok
+ }
+ } else
+ return NULL; // ne correspond pas
+ } else
+ return NULL; // ne correspond pas (test impossible)
+ // jump
+ while(isdigit((unsigned char)joker[i])) i++;
+ }
+ }
+ else if (joker[i+1]=='-') { // 2 car, ex: *[A-Z]
+ if ((int) (unsigned char) joker[i+2]>(int) (unsigned char) joker[i]) {
+ int j;
+ for(j=(int) (unsigned char) joker[i];j<=(int) (unsigned char) joker[i+2];j++)
+ pass[j]=1;
+
+ } else err=1;
+ i+=3;
+ } else { // 1 car, ex: *[ ]
+ pass[(int) (unsigned char) joker[i]]=1;
+ i++;
+ }
+ if ((joker[i]==',') || (joker[i]==';')) i++;
+ }
+ }
+ }
+ // à sauter dans joker
+ jmp=i;
+ if (joker[i]) jmp++;
+
+ //
+ } else { // tout autoriser
+ //
+ int i;
+ for(i=0;i<256;i++) pass[i]=1; // tout autoriser
+ jmp=1;
+ if (joker[2]==LEFT) jmp=3; // permet de recher *<crochet ouvrant>
+ }
+
+ {
+ int i,max;
+ char* adr;
+
+ // la chaine doit se terminer exactement
+ if (cut) {
+ if (strnotempty(chaine))
+ return NULL; // perdu
+ else
+ return chaine; // ok
+ }
+
+ // comparaison en boucle, c'est ca qui consomme huhu..
+ // le tableau pass[256] indique les caractères ASCII autorisés
+
+ // tester sans le joker (pas ()+ mais ()*)
+ if (!unique) {
+ if ( (adr=strjoker(chaine,joker+jmp,size,size_flag)) ) {
+ return adr;
+ }
+ }
+
+ // tester
+ i=0;
+ if (!unique)
+ max=strlen(chaine);
+ else /* *(a) only match a (not aaaaa) */
+ max=1;
+ while(i<(int) max) {
+ if (pass[(int) (unsigned char) chaine[i]]) { // caractère autorisé
+ if ( (adr=strjoker(chaine+i+1,joker+jmp,size,size_flag)) ) {
+ return adr;
+ }
+ i++;
+ } else i=max+2; // sortir
+ }
+
+ // tester chaîne vide
+ if (i!=max+2) // avant c'est ok
+ if ( (adr=strjoker(chaine+max,joker+jmp,size,size_flag)) )
+ return adr;
+
+ return NULL; // perdu
+ }
+
+ } else { // comparer mot+reste (toto*..)
+ if (strnotempty(chaine)) {
+ int jmp=0,ok=1;
+
+ // comparer début de joker et début de chaine
+ while((joker[jmp]!='*') && (joker[jmp]) && (ok)) {
+ // CI : remplacer streql par une comparaison !=
+ if (!streql(chaine[jmp],joker[jmp])) {
+ ok=0; // quitter
+ }
+ jmp++;
+ }
+
+ // comparaison ok?
+ if (ok) {
+ // continuer la comparaison.
+ if (strjoker(chaine+jmp,joker+jmp,size,size_flag))
+ return chaine; // retourner 1e lettre
+ }
+
+ } // strlen(a)
+ return NULL;
+ } // * ou mot
+
+ return NULL;
+}
+
+// recherche multiple
+// exemple: find dans un texte de strcpy(*[A-Z,a-z],"*[0-9]"); va rechercher la première occurence
+// d'un strcpy sur une variable ayant un nom en lettres et copiant une chaine de chiffres
+// ATTENTION!! Eviter les jokers en début, où gare au temps machine!
+char* strjokerfind(char* chaine,char* joker) {
+ char* adr;
+ while(*chaine) {
+ if ( (adr=strjoker(chaine,joker,NULL,NULL)) ) { // ok trouvé
+ return adr;
+ }
+ chaine++;
+ }
+ return NULL;
+}
diff --git a/src/htsfilters.h b/src/htsfilters.h
new file mode 100644
index 0000000..168d330
--- /dev/null
+++ b/src/htsfilters.h
@@ -0,0 +1,49 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* filters ("regexp") */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSFILT_DEFH
+#define HTSFILT_DEFH
+
+#include "htsbase.h"
+
+int fa_strjoker(char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth);
+HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag);
+char* strjokerfind(char* chaine,char* joker);
+
+#endif
diff --git a/src/htsftp.c b/src/htsftp.c
new file mode 100644
index 0000000..5fbe895
--- /dev/null
+++ b/src/htsftp.c
@@ -0,0 +1,1135 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: basic FTP protocol manager */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Gestion protocole ftp
+// Version .05 (01/2000)
+
+#include "htsftp.h"
+
+#include "htsglobal.h"
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsthread.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HTS_WIN
+#else
+//inet_ntoa
+#include <arpa/inet.h>
+#endif
+
+#if HTS_WIN
+#ifndef __cplusplus
+// DOS
+#include <process.h> /* _beginthread, _endthread */
+#endif
+#endif
+
+// ftp mode passif
+// #if HTS_INET6==0
+#define FTP_PASV 1
+// #else
+// no passive mode for v6
+// #define FTP_PASV 0
+// #endif
+
+#define FTP_DEBUG 0
+//#define FORK_DEBUG 0
+
+#define FTP_STATUS_READY 1001
+
+#if USE_BEGINTHREAD
+/*
+#ifdef __cplusplus
+// C++ -> Shell
+UINT back_launch_ftp( LPVOID pP ) {
+ lien_back* back=(lien_back*) pP;
+ if (back == NULL) {
+ //back->status=FTP_STATUS_READY; // fini
+ //back->r.statuscode=-1;
+ return -1;
+ }
+
+ // lancer ftp
+ run_launch_ftp(back);
+ // prêt
+ back->status=0;
+
+ return 0; // thread completed successfully
+}
+#else
+*/
+PTHREAD_TYPE back_launch_ftp( void* pP ) {
+ lien_back* back=(lien_back*) pP;
+ if (back == NULL) {
+ //back->status=FTP_STATUS_READY; // fini
+ //back->r.statuscode=-1;
+#if FTP_DEBUG
+ printf("[ftp error: no args]\n");
+#endif
+ return PTHREAD_RETURN;
+ }
+
+ /* Initialize */
+ hts_init();
+
+ // lancer ftp
+#if FTP_DEBUG
+ printf("[Launching main ftp routine]\n");
+#endif
+ run_launch_ftp(back);
+ // prêt
+ back->status=0;
+
+ /* Uninitialize */
+ hts_uninit();
+ return PTHREAD_RETURN;
+}
+/*#endif*/
+// lancer en back
+void launch_ftp(lien_back* back) {
+/*
+#ifdef __cplusplus
+ // C++ -> Shell
+ AfxBeginThread(back_launch_ftp,(LPVOID) back);
+#else
+*/
+ // DOS
+#if FTP_DEBUG
+ printf("[Launching main ftp thread]\n");
+#endif
+ _beginthread(back_launch_ftp, 0, (void*) back);
+/*#endif*/
+}
+
+#else
+// Unix sans pthread
+int back_launch_ftp(lien_back* back) {
+ // lancer ftp
+ run_launch_ftp(back);
+ // prêt
+ back->status=0;
+ return 0;
+}
+void launch_ftp(lien_back* back,char* path,char* exec) {
+ FILE* fp = fopen(fconv(path),"wb");
+ if (fp) {
+ char _args[8][256];
+ char *args[8];
+ fclose(fp); fp=NULL;
+
+ strcpy(_args[0],exec);
+ strcpy(_args[1],"-#R");
+ strcpy(_args[2],back->url_adr);
+ strcpy(_args[3],back->url_fil);
+ strcpy(_args[4],back->url_sav);
+ strcpy(_args[5],path);
+ //strcpy(_args[6],"");
+ args[0]=_args[0];
+ args[1]=_args[1];
+ args[2]=_args[2];
+ args[3]=_args[3];
+ args[4]=_args[4];
+ args[5]=_args[5];
+ args[6]=NULL;
+ switch (fork()) { // note: vfork déconne un max'
+ case -1: printf("Can not vfork() process\n"); break;
+ case 0:
+ if (execvp(args[0],args)==-1) {
+ fp=fopen(fconv(path),"wb");
+ if (fp) {
+ fprintf(fp,"-1 unable to launch %s",args[0]);
+ fclose(fp); fp=NULL;
+ rename(path,concat(path,".ok"));
+ } else remove(path);
+ }
+ _exit(0); // exit 'propre'
+ break;
+ default: // parent
+ // bah on fait rien..
+ break;
+ }
+ }
+}
+#endif
+
+// pour l'arrêt du ftp
+#ifdef _WIN32
+#define _T_SOC_close(soc) closesocket(soc); soc=INVALID_SOCKET;
+#else
+#define _T_SOC_close(soc) close(soc); soc=INVALID_SOCKET;
+#endif
+#define _HALT_FTP { \
+ if ( soc_ctl != INVALID_SOCKET ) _T_SOC_close(soc_ctl); \
+ if ( soc_servdat != INVALID_SOCKET ) _T_SOC_close(soc_servdat); \
+ if ( soc_dat != INVALID_SOCKET ) _T_SOC_close(soc_dat); \
+}
+#define _CHECK_HALT_FTP \
+ if (stop_ftp(back)) { \
+ _HALT_FTP \
+ return 0; \
+ }
+
+// la véritable fonction une fois lancées les routines thread/fork
+int run_launch_ftp(lien_back* back) {
+ char user[256]="anonymous";
+ char pass[256]="user@";
+ char line_retr[2048];
+ int port=21;
+#if FTP_PASV
+ int port_pasv=0;
+#endif
+ char adr_ip[1024];
+ char *adr,*real_adr;
+ char* ftp_filename="";
+ int timeout = 300; // timeout
+ int timeout_onfly=8; // attente réponse supplémentaire
+ int transfer_list=0; // directory
+ int rest_understood=0; // rest command understood
+ t_fullhostent fullhostent_buffer; // buffer pour resolver
+ //
+ T_SOC soc_ctl=INVALID_SOCKET;
+ T_SOC soc_servdat=INVALID_SOCKET;
+ T_SOC soc_dat=INVALID_SOCKET;
+ //
+ SOCaddr server_data;
+ int server_data_size=sizeof(server_data);
+ //
+ line_retr[0]=adr_ip[0]='\0';
+
+ timeout=300;
+
+ // effacer
+ strcpy(back->r.msg,"");
+ back->r.statuscode=0;
+ back->r.size=0;
+
+ // récupérer user et pass si présents, et sauter user:id@ dans adr
+ real_adr = strchr(back->url_adr,':');
+ if (real_adr) real_adr++;
+ else real_adr=back->url_adr;
+ while(*real_adr=='/') real_adr++; // sauter /
+ if ( (adr = jump_identification(real_adr)) != real_adr) { // user
+ int i=-1;
+ pass[0]='\0';
+ do {
+ i++;
+ user[i]=real_adr[i];
+ } while( (real_adr[i]!=':') && (real_adr[i]) );
+ user[i]='\0';
+ if (real_adr[i]==':') { // pass
+ int j=-1;
+ i++; // oui on saute aussi le :
+ do {
+ j++;
+ pass[j]=real_adr[i+j];
+ } while( ((&real_adr[i+j+1]) < adr) && (real_adr[i+j]) );
+ pass[j]='\0';
+ }
+ }
+
+ // Calculer RETR <nom>
+ {
+ char* a;
+ a=back->url_fil + strlen(back->url_fil)-1;
+ while( (a > back->url_fil) && (*a!='/')) a--;
+ if (*a == '/') { // ok repéré
+ a++; // sauter /
+ ftp_filename=a;
+ if (strnotempty(a)) {
+ char* ua=unescape_http(a);
+ if (
+ (strchr(ua, ' '))
+ ||
+ (strchr(ua, '\"'))
+ ||
+ (strchr(ua, '\''))
+ ) {
+ sprintf(line_retr,"RETR \"%s\"",ua);
+ } else { /* Regular one */
+ sprintf(line_retr,"RETR %s",ua);
+ }
+ } else {
+ transfer_list=1;
+ sprintf(line_retr,"LIST -A");
+ }
+ } else {
+ strcpy(back->r.msg,"Unexpected PORT error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ }
+
+#if FTP_DEBUG
+ printf("Connecting to %s...\n",adr);
+#endif
+
+ // connexion
+ {
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp;
+ char * a;
+ char _adr[256];
+ _adr[0]='\0';
+ //T_SOC soc_ctl;
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ // port
+ a=strchr(adr,':'); // port
+ if (a) {
+ sscanf(a+1,"%d",&port);
+ strncat(_adr,adr,(int) (a - adr));
+ } else
+ strcpy(_adr,adr);
+
+ // récupérer adresse résolue
+ strcpy(back->info,"host name");
+ hp = hts_gethostbyname(_adr, &fullhostent_buffer);
+ if (hp == NULL) {
+ strcpy(back->r.msg,"Unable to get server's address");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-5;
+ _HALT_FTP
+ return 0;
+ }
+ _CHECK_HALT_FTP;
+
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ // copie adresse pour cnx data
+ SOCaddr_copyaddr(server_data, server_data_size, hp->h_addr_list[0], hp->h_length);
+ // memcpy(&server.sin_addr, hp->h_addr, hp->h_length);
+
+ // créer ("attachement") une socket (point d'accès) internet,en flot
+ soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+ if (soc_ctl==INVALID_SOCKET) {
+ strcpy(back->r.msg,"Unable to create a socket");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ _HALT_FTP
+ return 0;
+ }
+
+ SOCaddr_initport(server, port);
+ // server.sin_port = htons((unsigned short int) port);
+
+ // connexion (bloquante, on est en thread)
+ strcpy(back->info,"connect");
+
+#if HTS_WIN
+ if (connect(soc_ctl, (const struct sockaddr FAR *)&server, server_size) != 0) {
+#else
+ if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) {
+#endif
+ strcpy(back->r.msg,"Unable to connect to the server");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ _HALT_FTP
+ return 0;
+#if HTS_WIN
+ }
+#else
+ }
+#endif
+ _CHECK_HALT_FTP;
+
+ {
+ char line[1024];
+ // envoi du login
+
+ // --USER--
+ get_ftp_line(soc_ctl,line,timeout); // en tête
+ _CHECK_HALT_FTP;
+
+ if (line[0]=='2') { // ok, connecté
+ strcpy(back->info,"login: user");
+ sprintf(line,"USER %s",user);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if ((line[0]=='3') || (line[0]=='2')) {
+ // --PASS--
+ strcpy(back->info,"login: pass");
+ sprintf(line,"PASS %s",pass);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { // ok
+ // --CWD--
+ char* a;
+ a=back->url_fil + strlen(back->url_fil)-1;
+ while( (a > back->url_fil) && (*a!='/')) a--;
+ if (*a == '/') { // ok repéré
+ char target[1024];
+ target[0]='\0';
+ strncat(target,back->url_fil,(int) (a - back->url_fil));
+ if (strnotempty(target)==0)
+ strcat(target,"/");
+ strcpy(back->info,"cwd");
+ sprintf(line,"CWD %s",target);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ send_line(soc_ctl,"TYPE I");
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ // ok..
+ } else {
+ strcpy(back->r.msg,"TYPE I error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"CWD error: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ strcpy(back->r.msg,"Unexpected ftp error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+
+ } else {
+ sprintf(back->r.msg,"Bad password: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"Bad user name: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"Connection refused: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+
+ // ok, si on est prêts on écoute sur un port et on demande la sauce
+ if (back->r.statuscode != -1) {
+
+
+ //
+ // Pré-REST
+ //
+#if FTP_PASV
+ if (SOCaddr_getproto(server, server_size) == '1') {
+ strcpy(back->info,"pasv");
+ sprintf(line,"PASV");
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ } else { /* ipv6 */
+ line[0]='\0';
+ }
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') {
+ char *a,*b,*c;
+ a=strchr(line,'('); // exemple: 227 Entering Passive Mode (123,45,67,89,177,27)
+ if (a) {
+
+ // -- analyse de l'adresse IP et du port --
+ a++;
+ b=strchr(a,',');
+ if (b) b=strchr(b+1,',');
+ if (b) b=strchr(b+1,',');
+ if (b) b=strchr(b+1,',');
+ c=a; while( (c=strchr(c,',')) ) *c='.'; // remplacer , par .
+ if (b) *b='\0';
+ //
+ strcpy(adr_ip,a); // copier adresse ip
+ //
+ if (b) {
+ a=b+1; // début du port
+ b=strchr(a,'.');
+ if (b) {
+ int n1,n2;
+ //
+ *b='\0';
+ b++;
+ c=strchr(b,')');
+ if (c) {
+ *c='\0';
+ if ( (sscanf(a,"%d",&n1)==1) && (sscanf(b,"%d",&n2)==1) && (strlen(adr_ip)<=16)) {
+ port_pasv=n2+(n1<<8);
+ }
+ } else {
+ deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
+ } // sinon on est prêts
+ }
+ }
+ // -- fin analyse de l'adresse IP et du port --
+ } else {
+ sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ /*
+ * try epsv (ipv6) *
+ */
+ strcpy(back->info,"pasv");
+ sprintf(line,"EPSV");
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { /* got it */
+ char *a;
+ a=strchr(line,'('); // exemple: 229 Entering Extended Passive Mode (|||6446|)
+ if (
+ (a != NULL)
+ &&
+ (*a == '(')
+ && (*(a+1))
+ && (*(a+1) == *(a+2)) && (*(a+1) == *(a+3))
+ && (isdigit(*(a+4)))
+ && (*(a+5))
+ ) {
+ unsigned int n1 = 0;
+ if (sscanf(a+4,"%d",&n1)==1) {
+ if ((n1 < 65535) && (n1 > 0)) {
+ port_pasv=n1;
+ }
+ }
+ } else {
+ sprintf(back->r.msg,"EPSV incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"PASV/EPSV error: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ }
+#else
+ // rien à faire avant
+#endif
+
+#if FTP_PASV
+ if (port_pasv) {
+#endif
+ // SIZE
+ if (back->r.statuscode != -1) {
+ if (!transfer_list) {
+ char* ua=unescape_http(ftp_filename);
+ if (
+ (strchr(ua, ' '))
+ ||
+ (strchr(ua, '\"'))
+ ||
+ (strchr(ua, '\''))
+ ) {
+ sprintf(line,"SIZE \"%s\"", ua);
+ } else {
+ sprintf(line,"SIZE %s", ua);
+ }
+
+ // SIZE?
+ strcpy(back->info,"size");
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { // SIZE compris, ALORS tester REST (sinon pas tester: cf probleme des txt.gz decompresses a la volee)
+ // REST?
+ if (fexist(back->url_sav) && (transfer_list==0)) {
+ strcpy(back->info,"rest");
+ sprintf(line,"REST "LLintP,(LLint)fsize(back->url_sav));
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if ((line[0]=='3') || (line[0]=='2')) { // ok
+ rest_understood=1;
+ } // sinon tant pis
+ }
+ } // sinon tant pis
+ }
+ }
+#if FTP_PASV
+ }
+#endif
+
+ //
+ // Post-REST
+ //
+#if FTP_PASV
+ // Ok, se connecter
+ if (port_pasv) {
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp;
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ // infos
+ strcpy(back->info,"resolv");
+
+ // résoudre
+ if (adr_ip[0]) {
+ hp = hts_gethostbyname(adr_ip, &fullhostent_buffer);
+ if (hp) {
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ } else {
+ server_size=0;
+ }
+ } else {
+ memcpy(&server, &server_data, sizeof(server_data));
+ server_size=server_data_size;
+ }
+
+ // infos
+ strcpy(back->info,"cnxdata");
+#if FTP_DEBUG
+ printf("Data: Connecting to %s:%d...\n", adr_ip, port_pasv);
+#endif
+ if (server_size > 0) {
+ // socket
+ soc_dat=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+ if (soc_dat != INVALID_SOCKET) {
+ // structure: connexion au domaine internet, port 80 (ou autre)
+ SOCaddr_initport(server, port_pasv);
+ // server.sin_port = htons((unsigned short int) port_pasv);
+#if HTS_WIN
+ if (connect(soc_dat, (const struct sockaddr FAR *)&server, server_size) == 0) {
+#else
+ if (connect(soc_dat, (struct sockaddr *)&server, server_size) != -1) {
+#endif
+ strcpy(back->info,"retr");
+ strcpy(line,line_retr);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='1') {
+ // OK
+ } else {
+ deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
+ //
+ sprintf(back->r.msg,"RETR command errror: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+#if FTP_DEBUG
+ printf("Data: unable to connect\n");
+#endif
+ deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
+ //
+ strcpy(back->r.msg,"Unable to connect");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ strcpy(back->r.msg,"Unable to create a socket");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ sprintf(back->r.msg,"Unable to resolve IP %s",adr_ip);
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+ } else {
+ sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ } // sinon on est prêts
+#else
+ //T_SOC soc_servdat;
+ strcpy(back->info,"listening");
+ if ( (soc_servdat = get_datasocket(line)) != INVALID_SOCKET) {
+ _CHECK_HALT_FTP;
+ send_line(soc_ctl,line); // envoi du RETR
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='2') { // ok
+ strcpy(back->info,"retr");
+ strcpy(line,line_retr);
+ send_line(soc_ctl,line);
+ get_ftp_line(soc_ctl,line,timeout);
+ _CHECK_HALT_FTP;
+ if (line[0]=='1') {
+ //T_SOC soc_dat;
+ struct sockaddr dummyaddr;
+ int dummylen = sizeof(struct sockaddr);
+ if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) {
+ strcpy(back->r.msg,"Unable to accept connection");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"RETR command errror: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ sprintf(back->r.msg,"PORT command error: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+#if HTS_WIN
+ closesocket(soc_servdat);
+#else
+ close(soc_servdat);
+#endif
+ } else {
+ strcpy(back->r.msg,"Unable to listen to a port");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+#endif
+
+ //
+ // Ok, connexion initiée
+ //
+ if (soc_dat != INVALID_SOCKET) {
+ if (rest_understood) { // REST envoyée et comprise
+ filenote(back->url_sav,NULL);
+ back->r.fp = fopen(fconv(back->url_sav),"ab");
+ } else
+ back->r.fp = filecreate(back->url_sav);
+ strcpy(back->info,"receiving");
+ if (back->r.fp != NULL) {
+ char buff[1024];
+ int len=1;
+ int read_len=1024;
+ //HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues
+
+ while( (len>0) && (!stop_ftp(back)) ) {
+ // attendre les données
+ len=1; // pas d'erreur pour le moment
+ switch(wait_socket_receive(soc_dat,timeout)) {
+ case -1:
+ strcpy(back->r.msg,"Read error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ len=0; // fin
+ break;
+ case 0:
+ sprintf(back->r.msg,"Time out (%d)",timeout);
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ len=0; // fin
+ break;
+ }
+
+ // réception
+ if (len) {
+ len=recv(soc_dat,buff,read_len,0);
+ if (len>0) {
+ back->r.size+=len;
+ HTS_STAT.HTS_TOTAL_RECV+=len;
+ if (back->r.fp) {
+ if ((int) fwrite(buff,1,len,back->r.fp) != len) {
+ strcpy(back->r.msg,"Write error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ len=0; // error
+ }
+ } else {
+ strcpy(back->r.msg,"Unexpected write error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else { // Erreur ou terminé
+ //strcpy(back->r.msg,"Read error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=0;
+ }
+ read_len=1024;
+ //HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues
+ }
+ }
+ if (back->r.fp) {
+ fclose(back->r.fp);
+ back->r.fp=NULL;
+ }
+ } else {
+ strcpy(back->r.msg,"Unable to write file");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+#if HTS_WIN
+ closesocket(soc_dat);
+#else
+ close(soc_dat);
+#endif
+
+ // 226 Transfer complete?
+ if (back->r.statuscode != -1) {
+ if (wait_socket_receive(soc_ctl,timeout_onfly)>0) {
+ // récupérer 226 transfer complete
+ get_ftp_line(soc_ctl,line,timeout);
+ if (line[0]=='2') { // OK
+ strcpy(back->r.msg,"OK");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=200;
+ } else {
+ sprintf(back->r.msg,"RETR incorrect: %s",linejmp(line));
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ } else {
+ strcpy(back->r.msg,"Read error");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ }
+ }
+
+ }
+
+
+
+ }
+
+
+ }
+
+ _CHECK_HALT_FTP;
+ strcpy(back->info,"quit");
+ send_line(soc_ctl,"QUIT"); // bye bye
+ get_ftp_line(soc_ctl,NULL,timeout);
+#if HTS_WIN
+ closesocket(soc_ctl);
+#else
+ close(soc_ctl);
+#endif
+ }
+
+ if (back->r.statuscode!=-1) {
+ back->r.statuscode=200;
+ strcpy(back->r.msg,"OK");
+ }
+ back->status=FTP_STATUS_READY; // fini
+ return 0;
+}
+
+
+
+// ouverture d'un port
+T_SOC get_datasocket(char* to_send) {
+ T_SOC soc = INVALID_SOCKET;
+ char h_loc[256+2];
+
+ to_send[0]='\0';
+ if (gethostname(h_loc,256)==0) { // host name
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp_loc;
+ t_fullhostent buffer;
+
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ if ( (hp_loc=vxgethostbyname(h_loc, &buffer)) ) { // notre host
+
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length);
+
+ if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) {
+
+ if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) {
+ SOCaddr server2;
+ int len;
+ len=sizeof(server2);
+ // effacer structure
+ memset(&server2, 0, sizeof(server2));
+ if (getsockname(soc,(struct sockaddr*) &server2, &len) == 0) {
+ // *port=ntohs(server.sin_port); // récupérer port
+ if (listen(soc,10)>=0) { // au pif le 10
+#if HTS_INET6==0
+ unsigned short int a,n1,n2;
+ // calculer port
+ a = SOCaddr_sinport(server2);
+ n1 = (a & 0xff);
+ n2 = ((a>>8) & 0xff);
+ {
+ char dots[256+2];
+ char dot[256+2];
+ char* a;
+ SOCaddr_inetntoa(dot, 256, server2, sizeof(server2));
+ //
+ dots[0]='\0';
+ strncat(dots, dot, 128);
+ while( (a=strchr(dots,'.')) ) *a=','; // virgules!
+ while( (a=strchr(dots,':')) ) *a=','; // virgules!
+ sprintf(to_send,"PORT %s,%d,%d",dots,n1,n2);
+ }
+#else
+ /*
+ EPRT |1|132.235.1.2|6275|
+ EPRT |2|1080::8:800:200C:417A|5282|
+ */
+ {
+ char dot[256+2];
+ SOCaddr_inetntoa(dot, 256, server2, len);
+ sprintf(to_send,"EPRT |%c|%s|%d|", SOCaddr_getproto(server2, len), dot, SOCaddr_sinport(server2));
+ }
+#endif
+
+ } else {
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ } else {
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+
+
+ return soc;
+}
+
+#if FTP_DEBUG
+FILE* dd=NULL;
+#endif
+
+// routines de réception/émission
+// 0 = ERROR
+int send_line(T_SOC soc,char* data) {
+ char line[1024];
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"---> %s\x0d\x0a",data);
+ fflush(ioinfo);
+ }
+ }
+#if FTP_DEBUG
+ if (dd == NULL) dd = fopen("toto.txt","w");
+ fprintf(dd,"---> %s\x0d\x0a",data); fflush(dd);
+ printf("---> %s",data); fflush(stdout);
+#endif
+ sprintf(line,"%s\x0d\x0a",data);
+ if (check_socket_connect(soc) != 1) {
+#if FTP_DEBUG
+ printf("!SOC WRITE ERROR\n");
+#endif
+ return 0; // erreur, plus connecté!
+ }
+#if FTP_DEBUG
+ {
+ int r = (send(soc,line,strlen(line),0) == (int) strlen(line));
+ printf("%s\x0d\x0a",data); fflush(stdout);
+ return r;
+ }
+#else
+ return (send(soc,line,strlen(line),0) == (int) strlen(line));
+#endif
+}
+
+int get_ftp_line(T_SOC soc,char* line,int timeout) {
+ char data[1024];
+ int i,ok,multiline;
+#if FTP_DEBUG
+ if (dd == NULL) dd = fopen("toto.txt","w");
+#endif
+
+ data[0]='\0';
+ i=ok=multiline=0; data[3]='\0';
+ do {
+ char b;
+
+ // vérifier données
+ switch(wait_socket_receive(soc,timeout)) {
+ case -1: // erreur de lecture
+ if (line) strcpy(line,"500 *read error");
+ return 0;
+ break;
+ case 0:
+ if (line) sprintf(line,"500 *read timeout (%d)",timeout);
+ return 0;
+ break;
+ }
+
+ //HTS_TOTAL_RECV_CHECK(dummy); // Diminuer au besoin si trop de données reçues
+ switch(recv(soc,&b,1,0)) {
+ //case 0: break; // pas encore --> erreur (on attend)!
+ case 1:
+ HTS_STAT.HTS_TOTAL_RECV+=1; // compter flux entrant
+ if ((b!=10) && (b!=13))
+ data[i++]=b;
+ break;
+ default:
+ if (line) strcpy(line,"500 *read error");
+ return 0; // error
+ break;
+ }
+ if ( ((b==13) || (b==10)) && (i>0) ){ // CR/LF
+ if (
+ (data[3] == '-')
+ ||
+ ((multiline) && (!isdigit((unsigned char)data[0])))
+ )
+ {
+ data[3]='\0';
+ i=0;
+ multiline=1;
+ }
+ else
+ ok=1; // sortir
+ }
+ } while(!ok);
+ data[i++]='\0';
+
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"<--- %s\x0d\x0a",data);
+ fflush(ioinfo);
+ }
+ }
+#if FTP_DEBUG
+ fprintf(dd,"<--- %s\n",data); fflush(dd);
+ printf("<--- %s\n",data);
+#endif
+ if (line) strcpy(line,data);
+ return (strnotempty(data));
+}
+
+// sauter NNN
+char* linejmp(char* line) {
+ if (strlen(line)>4)
+ return line+4;
+ else
+ return line;
+}
+
+// test socket:
+// 0 : no data
+// 1 : data detected
+// -1: error
+int check_socket(T_SOC soc) {
+ fd_set fds,fds_e; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_ZERO(&fds_e);
+ // socket read
+ FD_SET(soc,&fds);
+ // socket error
+ FD_SET(soc,&fds_e);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ // poll!
+ select(soc + 1,&fds,NULL,&fds_e,&tv);
+ if (FD_ISSET(soc,&fds_e)) { // error detected
+ return -1;
+ } else if (FD_ISSET(soc,&fds)) {
+ return 1;
+ }
+ return 0;
+}
+// check if connected
+int check_socket_connect(T_SOC soc) {
+ fd_set fds,fds_e; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_ZERO(&fds_e);
+ // socket write
+ FD_SET(soc,&fds);
+ // socket error
+ FD_SET(soc,&fds_e);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ // poll!
+ select(soc + 1,NULL,&fds,&fds_e,&tv);
+ if (FD_ISSET(soc,&fds_e)) { // error detected
+ return -1;
+ } else if (FD_ISSET(soc,&fds)) {
+ return 1;
+ }
+ return 0;
+}
+// attendre des données
+int wait_socket_receive(T_SOC soc,int timeout) {
+ // attendre les données
+ TStamp ltime=time_local();
+ int r;
+#if FTP_DEBUG
+ printf("\x0dWaiting for data "); fflush(stdout);
+#endif
+ while( (!(r = check_socket(soc))) && ( ((int) ((TStamp) (time_local()-ltime))) < timeout )) {
+ Sleep(100);
+#if FTP_DEBUG
+ printf("."); fflush(stdout);
+#endif
+ }
+#if FTP_DEBUG
+ printf("\x0dreturn: %d\x0d",r); fflush(stdout);
+#endif
+ return r;
+}
+
+
+// cancel reçu?
+int stop_ftp(lien_back* back) {
+ if (back->stop_ftp) {
+ strcpy(back->r.msg,"Cancelled by User");
+ back->status=FTP_STATUS_READY; // fini
+ back->r.statuscode=-1;
+ return 1;
+ }
+ return 0;
+}
+
+
+
+
diff --git a/src/htsftp.h b/src/htsftp.h
new file mode 100644
index 0000000..e24f1f3
--- /dev/null
+++ b/src/htsftp.h
@@ -0,0 +1,68 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: basic FTP protocol manager .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSFTP_DEFH
+#define HTSFTP_DEFH
+
+#include "htsbase.h"
+#include "htsbasenet.h"
+#include "htsthread.h"
+
+// lien_back
+#include "htscore.h"
+
+#if USE_BEGINTHREAD
+void launch_ftp(lien_back* back);
+PTHREAD_TYPE back_launch_ftp( void* pP );
+#else
+void launch_ftp(lien_back* back,char* path,char* exec);
+int back_launch_ftp(lien_back* back);
+#endif
+
+int run_launch_ftp(lien_back* back);
+int send_line(T_SOC soc,char* data);
+int get_ftp_line(T_SOC soc,char* line,int timeout);
+T_SOC get_datasocket(char* to_send);
+int stop_ftp(lien_back* back);
+char* linejmp(char* line);
+int check_socket(T_SOC soc);
+int check_socket_connect(T_SOC soc);
+int wait_socket_receive(T_SOC soc,int timeout);
+
+
+#endif
+
diff --git a/src/htsglobal.h b/src/htsglobal.h
new file mode 100644
index 0000000..ce54d3d
--- /dev/null
+++ b/src/htsglobal.h
@@ -0,0 +1,332 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Global #define file */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier réunissant l'ensemble des defines
+
+#ifndef HTTRACK_GLOBAL_DEFH
+#define HTTRACK_GLOBAL_DEFH
+
+// Version
+#define HTTRACK_VERSION "3.20-2"
+#define HTTRACK_VERSIONID "3.20.02"
+#define HTTRACK_AFF_VERSION "3.x"
+//#define HTTRACK_AFF_WARNING "This is a RELEASE CANDIDATE version of WinHTTrack Website Copier 3.0\nPlease report us any bug or problem"
+
+
+
+// Définition plate-forme
+#include "htssystem.h"
+#include "htsconfig.h"
+
+// Socket windows ou socket unix
+#if HTS_PLATFORM==1
+#define HTS_WIN 1
+#else
+#define HTS_WIN 0
+#endif
+
+// compatibilité DOS
+#if HTS_WIN
+#define HTS_DOSNAME 1
+#else
+#define HTS_DOSNAME 0
+#endif
+
+// utiliser zlib?
+#if HTS_USEZLIB
+#else
+#ifdef _WINDOWS
+#define HTS_USEZLIB 1
+#endif
+#endif
+
+#ifndef HTS_INET6
+#define HTS_INET6 0
+#endif
+
+// utiliser openssl?
+#ifndef HTS_USEOPENSSL
+#define HTS_USEOPENSSL 1
+#endif
+
+#if HTS_WIN
+#else
+#define __cdecl
+#endif
+
+/*
+#if HTS_XGETHOST
+#if HTS_PLATFORM==1
+#ifndef __cplusplus
+#undef HTS_XGMETHOD
+#undef HTS_XGETHOST
+#endif
+#endif
+#else
+#undef HTS_XGMETHOD
+#undef HTS_XGETHOST
+#endif
+*/
+
+
+#if HTS_ANALYSTE
+#else
+#if HTS_WIN
+#else
+#undef HTS_ANALYSTE
+// Analyste
+#define HTS_ANALYSTE 1
+#define HTS_ANALYSTE_CONSOLE 1
+#endif
+#endif
+
+
+/* rc file */
+#if HTS_WIN
+#define HTS_HTTRACKRC "httrackrc"
+#else
+
+#ifndef HTS_ETCPATH
+#define HTS_ETCPATH "/etc"
+#endif
+#ifndef HTS_BINPATH
+#define HTS_BINPATH "/usr/bin"
+#endif
+#ifndef HTS_LIBPATH
+#define HTS_LIBPATH "/usr/lib"
+#endif
+#ifndef HTS_PREFIX
+#define HTS_PREFIX "/usr"
+#endif
+
+#define HTS_HTTRACKRC ".httrackrc"
+#define HTS_HTTRACKCNF HTS_ETCPATH"/httrack.conf"
+#define HTS_HTTRACKDIR HTS_PREFIX"/doc/httrack/"
+
+#endif
+
+/* Gestion des tables de hashage */
+#define HTS_HASH_SIZE 20147
+/* Taille max d'une URL */
+#define HTS_URLMAXSIZE 512
+/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */
+#define HTS_CDLMAXSIZE 1024
+/* Copyright (C) Xavier Roche and other contributors */
+#define HTTRACK_AFF_AUTHORS "[XR&CO'2002]"
+#define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->"
+#define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s"
+
+#define H_CRLF "\x0d\x0a"
+#define CRLF "\x0d\x0a"
+#if HTS_WIN
+#define LF "\x0d\x0a"
+#else
+#define LF "\x0a"
+#endif
+
+/* équivaut à "paramètre vide", par exemple -F (none) */
+#define HTS_NOPARAM "(none)"
+#define HTS_NOPARAM2 "\"(none)\""
+
+/* maximum et minimum */
+#define maximum(A,B) ( (A) > (B) ? (A) : (B) )
+#define minimum(A,B) ( (A) < (B) ? (A) : (B) )
+
+/* chaine vide? */
+#define strnotempty(A) (((A)[0]!='\0') ? 1 : 0)
+
+/* optimisation inline si possible */
+#ifdef __cplusplus
+#define HTS_INLINE inline
+#else
+#define HTS_INLINE
+#endif
+
+#ifdef HTS_NO_64_BIT
+#define HTS_LONGLONG 0
+#else
+#define HTS_LONGLONG 1
+#endif
+
+// long long int? (or int)
+// (and int cast for system functions like malloc() )
+#if HTS_LONGLONG
+ #if HTS_WIN
+ typedef __int64 LLint;
+ typedef __int64 TStamp;
+ typedef int INTsys;
+ #define LLintP "%I64d"
+ #else
+ #if HTS_PLATFORM==0
+ typedef long long int LLint;
+ typedef long long int TStamp;
+ typedef int INTsys;
+ #define LLintP "%lld"
+ #else
+ typedef long long int LLint;
+ typedef long long int TStamp;
+ typedef int INTsys;
+ #define LLintP "%Ld"
+ #endif
+ #endif
+#else
+ typedef int LLint;
+ typedef int INTsys;
+ typedef double TStamp;
+ #define LLintP "%d"
+#endif
+
+/* Alignement */
+#ifndef HTS_ALIGN
+#define HTS_ALIGN 4
+#endif
+
+/* IPV4, IPV6 and various unified structures */
+#define HTS_MAXADDRLEN 64
+
+#if HTS_WIN
+#else
+#define __cdecl
+#endif
+
+/* mode pour mkdir ET chmod (accès aux fichiers) */
+#define HTS_PROTECT_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
+#if HTS_ACCESS
+#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)
+#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+#else
+#define HTS_ACCESS_FILE (S_IRUSR|S_IWUSR)
+#define HTS_ACCESS_FOLDER (S_IRUSR|S_IWUSR|S_IXUSR)
+#endif
+
+/* vérifier la déclaration des variables préprocesseur */
+#ifndef HTS_DOSNAME
+#error | HTS_DOSNAME Has not been defined.
+#error | Set it to 1 if you are under DOS, 0 under Unix.
+#error | Example: place this line in you source, before includes:
+#error | #define HTS_DOSNAME 0
+#error
+#error
+#endif
+#ifndef HTS_ACCESS
+/* Par défaut, accès à tous les utilisateurs */
+#define HTS_ACCESS 1
+#endif
+
+/* fflush sur stdout */
+#define io_flush { fflush(stdout); fflush(stdin); }
+
+
+
+/* HTSLib */
+
+// Cache DNS, accélère les résolution d'adresses
+#define HTS_DNSCACHE 1
+
+// ID d'une pseudo-socket locale pour les file://
+#define LOCAL_SOCKET_ID -500000
+
+// taille de chaque buffer (10 sockets 650 ko)
+#define TAILLE_BUFFER 65535
+
+#if HTS_WIN
+#else
+// use pthreads.h
+#ifdef HTS_DO_NOT_USE_PTHREAD
+#define USE_PTHREAD 0
+#else
+#define USE_PTHREAD 1
+#endif
+#endif
+
+#if HTS_WIN
+#define USE_BEGINTHREAD 1
+#else
+#if USE_PTHREAD
+#define USE_BEGINTHREAD 1
+#else
+/* sh*t.. */
+#define USE_BEGINTHREAD 0
+#endif
+#endif
+
+/* ------------------------------------------------------------ */
+/* Debugging */
+/* ------------------------------------------------------------ */
+
+// débuggage types
+#define DEBUG_SHOWTYPES 0
+// backing debug
+#define BDEBUG 0
+// chunk receive
+#define CHUNKDEBUG 0
+// realloc links debug
+#define MDEBUG 0
+// cache debug
+#define DEBUGCA 0
+// DNS debug
+#define DEBUGDNS 0
+// savename debug
+#define DEBUG_SAVENAME 0
+// debug robots
+#define DEBUG_ROBOTS 0
+// debug hash
+#define DEBUG_HASH 0
+// Vérification d'intégrité
+#define DEBUG_CHECKINT 0
+// nbr sockets debug
+#define NSDEBUG 0
+// tracer mallocs
+#define HTS_TRACE_MALLOC 0
+
+// débuggage HTSLib
+#define HDEBUG 0
+// surveillance de la connexion
+#define CNXDEBUG 0
+// debuggage cookies
+#define DEBUG_COOK 0
+// débuggage hard..
+#define HTS_WIDE_DEBUG 0
+// debuggage deletehttp et cie
+#define HTS_DEBUG_CLOSESOCK 0
+// debug tracage mémoire
+#define MEMDEBUG 0
+
+// htsmain
+#define DEBUG_STEPS 0
+
+#endif
+
diff --git a/src/htshash.c b/src/htshash.c
new file mode 100644
index 0000000..b02f2ba
--- /dev/null
+++ b/src/htshash.c
@@ -0,0 +1,453 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* hash table system (fast index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htshash.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/* END specific definitions */
+
+// GESTION DES TABLES DE HACHAGE
+// Méthode à 2 clés (adr+fil), 2e cle facultative
+// hash[no_enregistrement][pos]->hash est un index dans le tableau général liens
+// #define HTS_HASH_SIZE 8191 (premier si possible!)
+// type: numero enregistrement - 0 est case insensitive (sav) 1 (adr+fil) 2 (former_adr+former_fil)
+#if HTS_HASH
+// recherche dans la table selon nom1,nom2 et le no d'enregistrement
+// retour: position ou -1 si non trouvé
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
+ unsigned int cle;
+ int pos;
+ // calculer la clé de recherche, non modulée
+ if (type)
+ cle = hash_cle(nom1,nom2);
+ else
+ cle = hash_cle(convtolower(nom1),nom2); // case insensitive
+ // la position se calcule en modulant
+ pos = (int) (cle%HTS_HASH_SIZE);
+ // entrée trouvée?
+ if (hash->hash[type][pos] >= 0) { // un enregistrement avec une telle clé existe..
+ // tester table de raccourcis (hash)
+ // pos est maintenant la position recherchée dans liens
+ pos = hash->hash[type][pos];
+ while (pos>=0) { // parcourir la chaine
+ switch (type) {
+ case 0: // sav
+ if (strfield2(nom1,hash->liens[pos]->sav)) { // case insensitive
+#if DEBUG_HASH==2
+ printf("hash: found shortcut at %d\n",pos);
+#endif
+ return pos;
+ }
+ break;
+ case 1: // adr+fil
+ if ((strcmp(nom1,jump_identification(hash->liens[pos]->adr))==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) {
+#if DEBUG_HASH==2
+ printf("hash: found shortcut at %d\n",pos);
+#endif
+ return pos;
+ }
+ break;
+ case 2: // former_adr+former_fil
+ if (hash->liens[pos]->former_adr)
+ if ((strcmp(nom1,jump_identification(hash->liens[pos]->former_adr))==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) {
+#if DEBUG_HASH==2
+ printf("hash: found shortcut at %d\n",pos);
+#endif
+ return pos;
+ }
+ break;
+ }
+ // calculer prochaine position dans la chaine
+ {
+ int old=pos;
+ pos=hash->liens[pos]->hash_next[type]; // sinon prochain dans la chaine
+ if (old==pos)
+ pos=-1; // erreur de bouclage (ne devrait pas arriver)
+ }
+ }
+
+ // Ok va falloir chercher alors..
+ /*pos=hash->max_lien; // commencer à max_lien
+ switch (type) {
+ case 0: // sav
+ while(pos>=0) {
+ if (hash->liens[pos]->hash_sav == cle ) {
+ if (strcmp(nom1,hash->liens[pos]->sav)==0) {
+ hash->hash[type][(int) (cle%HTS_HASH_SIZE)] = pos; // noter plus récent dans shortcut table
+#if DEBUG_HASH==2
+ printf("hash: found long search at %d\n",pos);
+#endif
+ return pos;
+ }
+ }
+ pos--;
+ }
+ break;
+ case 1: // adr+fil
+ while(pos>=0) {
+ if (hash->liens[pos]->hash_adrfil == cle ) {
+ if ((strcmp(nom1,hash->liens[pos]->adr)==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) {
+ hash->hash[type][(int) (cle%HTS_HASH_SIZE)] = pos; // noter plus récent dans shortcut table
+#if DEBUG_HASH==2
+ printf("hash: found long search at %d\n",pos);
+#endif
+ return pos;
+ }
+ }
+ pos--;
+ }
+ break;
+ case 2: // former_adr+former_fil
+ while(pos>=0) {
+ if (hash->liens[pos]->hash_fadrfil == cle ) {
+ if (hash->liens[pos]->former_adr)
+ if ((strcmp(nom1,hash->liens[pos]->former_adr)==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) {
+ hash->hash[type][(int) (cle%HTS_HASH_SIZE)] = pos; // noter plus récent dans shortcut table
+#if DEBUG_HASH==2
+ printf("hash: found long search at %d\n",pos);
+#endif
+ return pos;
+ }
+ }
+ pos--;
+ }
+ }*/
+#if DEBUG_HASH==1
+ printf("hash: not found after test %s%s\n",nom1,nom2);
+#endif
+ return -1; // non trouvé
+ } else {
+#if DEBUG_HASH==2
+ printf("hash: not found %s%s\n",nom1,nom2);
+#endif
+ return -1; // non trouvé : clé non entrée (même une fois)
+ }
+}
+
+// enregistrement lien lpos dans les 3 tables hash1..3
+void hash_write(hash_struct* hash,int lpos) {
+ unsigned int cle;
+ int pos;
+ int* ptr;
+ //
+ if (hash->liens[lpos]) { // on sait jamais..
+ hash->max_lien = max(hash->max_lien,lpos);
+#if DEBUG_HASH
+ hashnumber=hash->max_lien;
+#endif
+ // élément actuel sur -1 (fin de chaine)
+ hash->liens[lpos]->hash_next[0]=hash->liens[lpos]->hash_next[1]=hash->liens[lpos]->hash_next[2]=-1;
+ //
+ cle = hash_cle(convtolower(hash->liens[lpos]->sav),""); // CASE INSENSITIVE
+ pos = (int) (cle%HTS_HASH_SIZE);
+ ptr = hash_calc_chaine(hash,0,pos); // calculer adresse chaine
+ *ptr = lpos; // noter dernier enregistré
+#if DEBUG_HASH==3
+ printf("[%d",pos);
+#endif
+ //
+ cle = hash_cle(jump_identification(hash->liens[lpos]->adr),hash->liens[lpos]->fil);
+ pos = (int) (cle%HTS_HASH_SIZE);
+ ptr = hash_calc_chaine(hash,1,pos); // calculer adresse chaine
+ *ptr = lpos; // noter dernier enregistré
+#if DEBUG_HASH==3
+ printf(",%d",pos);
+#endif
+ //
+ if (hash->liens[lpos]->former_adr) { // former_adr existe?
+ cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),hash->liens[lpos]->former_fil);
+ pos = (int) (cle%HTS_HASH_SIZE);
+ ptr = hash_calc_chaine(hash,2,pos); // calculer adresse chaine
+ *ptr = lpos; // noter dernier enregistré
+#if DEBUG_HASH==3
+ printf(",%d",pos);
+#endif
+ }
+#if DEBUG_HASH==3
+ printf("] "); fflush(stdout);
+#endif
+ }
+#if DEBUT_HASH
+ else {
+ printf("* hash_write=0!!\n");
+ exit(1);
+ }
+#endif
+ //
+}
+
+// calcul clé
+// il n'y a pas de formule de hashage universelle, celle-ci semble acceptable..
+unsigned long int hash_cle(char* nom1,char* nom2) {
+ /*
+ unsigned int sum=0;
+ int i=0;
+ while(*nom1) {
+ sum += 1;
+ sum += (unsigned int) *(nom1);
+ sum *= (unsigned int) *(nom1++);
+ sum += (unsigned int) i;
+ i++;
+ }
+ while(*nom2) {
+ sum += 1;
+ sum += (unsigned int) *(nom2);
+ sum *= (unsigned int) *(nom2++);
+ sum += (unsigned int) i;
+ i++;
+ }
+ */
+ return md5sum32(nom1)
+ +md5sum32(nom2);
+}
+
+// calcul de la position finale dans la chaine des elements ayant la même clé
+int* hash_calc_chaine(hash_struct* hash,int type,int pos) {
+#if DEBUG_HASH
+ int count=0;
+#endif
+ if (hash->hash[type][pos] == -1)
+ return &(hash->hash[type][pos]); // premier élément dans la chaine
+ pos=hash->hash[type][pos];
+ while(hash->liens[pos]->hash_next[type] != -1) {
+ pos = hash->liens[pos]->hash_next[type];
+#if DEBUG_HASH
+ count++;
+#endif
+ }
+#if DEBUG_HASH
+ count++;
+ longest_hash[type]=max(longest_hash[type],count);
+#endif
+ return &(hash->liens[pos]->hash_next[type]);
+}
+#endif
+// FIN GESTION DES TABLES DE HACHAGE
+
+
+
+
+
+
+
+
+
+
+
+
+// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
+
+unsigned long int inthash_key(char* value) {
+ return md5sum32(value);
+}
+
+// Check for duplicate entry (==1 : added)
+int inthash_write(inthash hashtable,char* name,long int value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain* h=hashtable->hash[pos];
+ while (h) {
+ if (strcmp(h->name,name)==0) {
+ h->value.intg=value;
+ return 0;
+ }
+ h=h->next;
+ }
+ // Not found, add it!
+ inthash_add(hashtable,name,value);
+ return 1;
+}
+
+// Increment pos value, create one if necessary (=0)
+// (==1 : created)
+int inthash_inc(inthash hashtable,char* name) {
+ long int value=0;
+ int r=0;
+ if (inthash_read(hashtable,name,&value)) {
+ value++;
+ }
+ else { /* create new value */
+ value=0;
+ r=1;
+ }
+ inthash_write(hashtable,name,value);
+ return (r);
+}
+
+
+// Does not check for duplicate entry
+void inthash_add(inthash hashtable,char* name,long int value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain** h=&hashtable->hash[pos];
+
+ while (*h)
+ h=&((*h)->next);
+ *h=(inthash_chain*)calloc(1,
+ sizeof(inthash_chain)
+ +
+ strlen(name)+2
+ );
+ if (*h) {
+ (*h)->name=((char*)(*h)) + sizeof(inthash_chain);
+ (*h)->next=NULL;
+ strcpy((*h)->name,name);
+ (*h)->value.intg=value;
+ }
+}
+
+void* inthash_addblk(inthash hashtable,char* name,int blksize) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain** h=&hashtable->hash[pos];
+
+ while (*h)
+ h=&((*h)->next);
+ *h=(inthash_chain*)calloc(1,
+ sizeof(inthash_chain)
+ +
+ strlen(name)+2
+ +
+ blksize
+ );
+ if (*h) {
+ (*h)->name = ((char*)(*h)) + sizeof(inthash_chain);
+ (*h)->next=NULL;
+ strcpy((*h)->name,name);
+ (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2;
+ return (void*)(*h)->value.intg;
+ }
+ return NULL;
+}
+
+int inthash_read(inthash hashtable,char* name,long int* value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain* h=hashtable->hash[pos];
+ while (h) {
+ if (strcmp(h->name,name)==0) {
+ *value=h->value.intg;
+ return 1;
+ }
+ h=h->next;
+ }
+ return 0;
+}
+
+void inthash_init(inthash hashtable) {
+ unsigned int i;
+ for(i=0;i<hashtable->hash_size;i++) {
+ hashtable->hash[i]=NULL;
+ }
+}
+
+void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) {
+ if (hash) {
+ inthash_delchain(hash->next,free_handler);
+ if (free_handler) { // pos is a malloc() block, delete it!
+ if (hash->value.intg) {
+ if (free_handler)
+ free_handler((void*)hash->value.intg);
+ else
+ free((void*)hash->value.intg);
+ }
+ hash->value.intg=0;
+ }
+ free(hash);
+ }
+}
+
+void inthash_default_free_handler(void* value) {
+ if (value)
+ free(value);
+}
+
+// --
+
+inthash inthash_new(int size) {
+ inthash hashtable=(inthash)calloc(1,sizeof(struct_inthash));
+ if (hashtable) {
+ hashtable->hash_size=0;
+ hashtable->flag_valueismalloc=0;
+ if ((hashtable->hash=(inthash_chain**)calloc(size,sizeof(inthash_chain*)))) {
+ hashtable->hash_size=size;
+ inthash_init(hashtable);
+ }
+ }
+ return hashtable;
+}
+
+int inthash_created(inthash hashtable) {
+ if (hashtable)
+ if (hashtable->hash)
+ return 1;
+ return 0;
+}
+
+void inthash_value_is_malloc(inthash hashtable,int flag) {
+ hashtable->flag_valueismalloc=flag;
+}
+
+void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) {
+ hashtable->free_handler = free_handler;
+}
+
+void inthash_delete(inthash* hashtable) {
+ if (hashtable) {
+ if (*hashtable) {
+ if ((*hashtable)->hash) {
+ unsigned int i;
+ t_inthash_freehandler free_handler=NULL;
+ if ( (*hashtable)->flag_valueismalloc ) {
+ if ( (*hashtable)->free_handler )
+ free_handler=(*hashtable)->free_handler;
+ else
+ free_handler=inthash_default_free_handler;
+ }
+ for(i=0;i<(*hashtable)->hash_size;i++) {
+ inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler);
+ (*hashtable)->hash[i]=NULL;
+ }
+ }
+ free(*hashtable);
+ *hashtable=NULL;
+ }
+ }
+}
+
+
diff --git a/src/htshash.h b/src/htshash.h
new file mode 100644
index 0000000..9a54710
--- /dev/null
+++ b/src/htshash.h
@@ -0,0 +1,104 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* hash table system (fast index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSHASH_DEFH
+#define HTSHASH_DEFH
+
+#include "htscore.h"
+
+// tables de hashage
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type);
+void hash_write(hash_struct* hash,int lpos);
+int* hash_calc_chaine(hash_struct* hash,int type,int pos);
+unsigned long int hash_cle(char* nom1,char* nom2);
+
+
+
+
+// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
+
+// simple hash table for other routines
+typedef struct inthash_chain {
+ char* name; /* key (name) */
+ union {
+ unsigned long int intg; /* integer value */
+ void* ptr; /* ptr value */
+ } value;
+ struct inthash_chain* next; /* next element */
+} inthash_chain;
+
+// structure behind inthash
+typedef void (* t_inthash_freehandler)(void* value);
+typedef struct {
+ inthash_chain** hash;
+ t_inthash_freehandler free_handler;
+ unsigned int hash_size;
+ unsigned short flag_valueismalloc;
+} struct_inthash;
+
+// main inthash type
+typedef struct_inthash* inthash;
+
+// subfunctions
+unsigned long int inthash_key(char* value);
+void inthash_init(inthash hashtable);
+void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler);
+void inthash_default_free_handler(void* value);
+
+// main functions:
+
+
+/* Hash functions: */
+inthash inthash_new(int size); /* Create a new hash table */
+int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */
+void inthash_delete(inthash* hashtable); /* Delete an hash table */
+void inthash_value_is_malloc(inthash hashtable,int flag); /* Is the 'value' member a value that needs to be free()'ed ? */
+void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */
+ t_inthash_freehandler free_handler);
+/* */
+int inthash_read(inthash hashtable,char* name,long int* value); /* Read entry from the hash table */
+/* */
+void inthash_add(inthash hashtable,char* name,long int value); /* Add entry in the hash table */
+void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */
+int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */
+int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */
+/* End of hash functions: */
+
+
+#endif
diff --git a/src/htshelp.c b/src/htshelp.c
new file mode 100644
index 0000000..3d743fe
--- /dev/null
+++ b/src/htshelp.c
@@ -0,0 +1,622 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* command-line help system */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htshelp.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htscoremain.h"
+#include "htscatchurl.h"
+#include "htslib.h"
+#include "htsalias.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HTS_WIN
+#else
+#include <unistd.h>
+#endif
+/* END specific definitions */
+
+#define waitkey if (more) { char s[4]; printf("\nMORE.. q to quit\n"); linput(stdin,s,4); if (strcmp(s,"q")==0) quit=1; else printf("Page %d\n\n",++m); }
+void infomsg(char* msg) {
+ int l=0;
+ int m=0;
+ int more=0;
+ int quit=0;
+ int done=0;
+ //
+ if (msg==NULL)
+ quit=0;
+ if (msg) {
+ if (!quit) {
+ if (strlen(msg)==1) {
+ if (msg[0]=='1') {
+ more=1;
+ return;
+ }
+ }
+
+ /* afficher alias? */
+ if (((int)strlen(msg)) > 4) {
+ if (msg[0]==' ') {
+ if (msg[2]!=' ') {
+ if ((msg[3]==' ') || (msg[4]==' ')) {
+ char cmd[32]="-";
+ int p=0;
+ while(cmd[p]==' ') p++;
+ sscanf(msg+p,"%s",cmd+strlen(cmd));
+ /* clears cN -> c */
+ if ((p=strlen(cmd))>2)
+ if (cmd[p-1]=='N')
+ cmd[p-1]='\0';
+ /* finds alias (if any) */
+ p=optreal_find(cmd);
+ if (p>=0) {
+ /* fings type of parameter: number,param,param concatenated,single cmd */
+ if (strcmp(opttype_value(p),"param") == 0)
+ printf("%s (--%s[=N])\n",msg,optalias_value(p));
+ else if (strcmp(opttype_value(p),"param1") == 0)
+ printf("%s (--%s <param>)\n",msg,optalias_value(p));
+ else if (strcmp(opttype_value(p),"param0") == 0)
+ printf("%s (--%s<param>)\n",msg,optalias_value(p));
+ else
+ printf("%s (--%s)\n",msg,optalias_value(p));
+ done=1;
+ }
+ }
+ }
+ }
+ }
+
+ /* sinon */
+ if (!done)
+ printf("%s\n",msg);
+ l++;
+ if (l>20) {
+ l=0;
+ waitkey;
+ }
+ }
+ }
+}
+void help_wizard(httrackp* opt) {
+ char* urls = (char*) malloc(HTS_URLMAXSIZE*2);
+ char* mainpath = (char*) malloc(256);
+ char* projname = (char*) malloc(256);
+ char* stropt = (char*) malloc(2048); // options
+ char* stropt2 = (char*) malloc(2048); // options longues
+ char* strwild = (char*) malloc(2048); // wildcards
+ char* cmd = (char*) malloc(4096);
+ char* str = (char*) malloc(256);
+ char** argv = (char**) malloc(256 * sizeof(char*));
+ //
+ char* a;
+ //
+ if (urls == NULL || mainpath == NULL || projname == NULL || stropt == NULL
+ || stropt2 == NULL || strwild == NULL || cmd == NULL || str == NULL || argv == NULL) {
+ fprintf(stderr, "* memory exhausted in %s, line %d\n", __FILE__, __LINE__);
+ return;
+ }
+ urls[0] = mainpath[0] = projname[0] = stropt[0] = stropt2[0] = strwild[0] = cmd[0] = str[0] = '\0';
+ //
+ strcpy(stropt,"-");
+ mainpath[0]=projname[0]=stropt2[0]=strwild[0]='\0';
+ //
+
+ printf("\n");
+ printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"\n");
+ printf("Copyright (C) Xavier Roche and other contributors\n");
+#ifdef _WIN32
+ printf("Note: You are running the commandline version,\n");
+ printf("run 'WinHTTrack.exe' to get the GUI version.\n");
+#endif
+#ifdef HTTRACK_AFF_WARNING
+ printf("NOTE: "HTTRACK_AFF_WARNING"\n");
+#endif
+#ifdef HTS_PLATFORM_NAME
+#if USE_BEGINTHREAD
+ printf("[compiled: "HTS_PLATFORM_NAME" - MT]\n");
+#else
+ printf("[compiled: "HTS_PLATFORM_NAME"]\n");
+#endif
+#endif
+ printf("To see the option list, enter a blank line or try httrack --help\n");
+ //
+ // Project name
+ while(strnotempty(projname)==0) {
+ printf("\n");
+ printf("Enter project name :");
+ fflush(stdout);
+ linput(stdin,projname,250);
+ if (strnotempty(projname)==0)
+ help("httrack",1);
+ }
+ //
+ // Path
+ if (strnotempty(hts_gethome()))
+ printf("\nBase path (return=%s/websites/) :",hts_gethome());
+ else
+ printf("\nBase path (return=current directory) :");
+ linput(stdin,str,250);
+ if (!strnotempty(str)) {
+ strcat(str,hts_gethome());
+ strcat(str,"/websites/");
+ }
+ if (strnotempty(str))
+ if ((str[strlen(str)-1]!='/') && (str[strlen(str)-1]!='\\'))
+ strcat(str,"/");
+ strcat(stropt2,"-O \""); strcat(stropt2,str); strcat(stropt2,projname); strcat(stropt2,"\" ");
+ // Créer si ce n'est fait un index.html 1er niveau
+ make_empty_index(str);
+ //
+ printf("\n");
+ printf("Enter URLs (separated by commas or blank spaces) :");
+ fflush(stdout);
+ linput(stdin,urls,250);
+ if (strnotempty(urls)) {
+ while( (a=strchr(urls,',')) ) *a=' ';
+ while( (a=strchr(urls,'\t')) ) *a=' ';
+
+ // Action
+ printf("\nAction:\n");
+ switch(help_query("Mirror Web Site(s)|Mirror Web Site(s) with Wizard|Just Get Files Indicated|Mirror ALL links in URLs (Multiple Mirror)|Test Links In URLs (Bookmark Test)|Update/Continue a Mirror",1)) {
+ case 1: break;
+ case 2: strcat(stropt,"W"); break;
+ case 3: strcat(stropt2,"--get "); break;
+ case 4: strcat(stropt2,"--mirrorlinks "); break;
+ case 5: strcat(stropt2,"--testlinks "); break;
+ case 6: strcat(stropt2,"--update "); break;
+ case 0: return; break;
+ }
+
+ // Proxy
+ printf("\nProxy (return=none) :");
+ linput(stdin,str,250);
+ if (strnotempty(str)) {
+ while( (a=strchr(str,' ')) ) *a=':'; // port
+ if (!strchr(jump_identification(str),':')) {
+ char str2[256];
+ printf("\nProxy port (return=8080) :");
+ linput(stdin,str2,250);
+ strcat(str,":");
+ if (strnotempty(str2)==0)
+ strcat(str,"8080");
+ else
+ strcat(str,str2);
+ }
+ strcat(stropt2,"-P "); strcat(stropt2,str); strcat(stropt2," ");
+ }
+
+ // Display
+ strcat(stropt2," -%v ");
+
+ // Wildcards
+ printf("\nYou can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip\n");
+ printf("Wildcards (return=none) :");
+ linput(stdin,strwild,250);
+
+ // Options
+ do {
+ printf("\nYou can define additional options, such as recurse level (-r<number>), separed by blank spaces\n");
+ printf("To see the option list, type help\n");
+ printf("Additional options (return=none) :");
+ linput(stdin,str,250);
+ if (strfield2(str,"help")) {
+ help("httrack",2);
+ } else if (strnotempty(str)) {
+ strcat(stropt2,str);
+ strcat(stropt2," ");
+ }
+ } while(strfield2(str,"help"));
+
+ {
+ int argc=1;
+ int g=0;
+ int i=0;
+ //
+ printf("\n");
+ if (strlen(stropt)==1)
+ stropt[0]='\0'; // aucune
+ sprintf(cmd,"%s %s %s %s",urls,stropt,stropt2,strwild);
+ printf("---> Wizard command line: httrack %s\n\n",cmd);
+ printf("Ready to launch the mirror? (Y/n) :");
+ fflush(stdout);
+ linput(stdin,str,250);
+ if (strnotempty(str)) {
+ if (!((str[0]=='y') || (str[0]=='Y')))
+ exit(0);
+ }
+ printf("\n");
+
+ // couper en morceaux
+ argv[0]="winhttrack";
+ argv[1]=cmd;
+ argc++;
+ while(cmd[i]) {
+ if(cmd[i]=='\"') g=!g;
+ if(cmd[i]==' '){
+ if(!g){
+ cmd[i]='\0';
+ argv[argc++]=cmd+i+1;
+ }
+ }
+ i++;
+ }
+#if HTS_ANALYSTE
+ hts_main(argc,argv);
+#else
+ main(argc,argv);
+#endif
+ }
+ //} else {
+ // help("httrack",1);
+ }
+
+ /* Free buffers */
+ free(urls);
+ free(mainpath);
+ free(projname);
+ free(stropt);
+ free(stropt2);
+ free(strwild);
+ free(cmd);
+ free(str);
+}
+int help_query(char* list,int def) {
+ char s[256];
+ char* a;
+ int opt;
+ int n=1;
+ a=list;
+ while(strnotempty(a)) {
+ char* b = strchr(a,'|');
+ if (b) {
+ char str[256];
+ str[0]='\0';
+ //
+ strncat(str,a,(int) (b - a));
+ if (n==def)
+ printf("(enter)\t%d\t%s\n",n++,str);
+ else
+ printf("\t%d\t%s\n",n++,str);
+ a=b+1;
+ } else
+ a=list+strlen(list);
+ }
+ printf("\t0\tQuit");
+ do {
+ printf("\n: ");
+ fflush(stdout);
+ linput(stdin,s,250);
+ } while ((strnotempty(s)!=0) && (sscanf(s,"%d",&opt)!=1));
+ if (strnotempty(s))
+ return opt;
+ else
+ return def;
+}
+
+// Capture d'URL
+void help_catchurl(char* dest_path) {
+ char adr_prox[HTS_URLMAXSIZE*2];
+ int port_prox;
+ T_SOC soc=catch_url_init_std(&port_prox,adr_prox);
+ if (soc!=INVALID_SOCKET) {
+ char url[HTS_URLMAXSIZE*2];
+ char method[32];
+ char data[32768];
+ url[0]=method[0]=data[0]='\0';
+ //
+ printf("Okay, temporary proxy installed.\nSet your browser's preferences to:\n\n");
+ printf("\tProxy's address: \t%s\n\tProxy's port: \t%d\n",adr_prox,port_prox);
+ //
+ if (catch_url(soc,url,method,data)) {
+ char dest[HTS_URLMAXSIZE*2];
+ int i=0;
+ do {
+ sprintf(dest,"%s%s%d",dest_path,"hts-post",i);
+ i++;
+ } while(fexist(dest));
+ {
+ FILE* fp=fopen(dest,"wb");
+ if (fp) {
+ fwrite(data,strlen(data),1,fp);
+ fclose(fp);
+ }
+ }
+ // former URL!
+ {
+ char finalurl[HTS_URLMAXSIZE*2];
+ escape_check_url(dest);
+ sprintf(finalurl,"%s"POSTTOK"file:%s",url,dest);
+ printf("\nThe URL is: \"%s\"\n",finalurl);
+ printf("You can capture it through: httrack \"%s\"\n",finalurl);
+ }
+ } else
+ printf("Unable to analyse the URL\n");
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ } else
+ printf("Unable to create a temporary proxy (no remaining port)\n");
+}
+
+// Créer un index.html vide
+void make_empty_index(char* str) {
+#if 0
+ if (!fexist(fconcat(str,"index.html"))) {
+ FILE* fp=fopen(fconcat(str,"index.html"),"wb");
+ if (fp) {
+ fprintf(fp,"<!-- "HTS_TOPINDEX" -->"CRLF);
+ fprintf(fp,"<HTML><BODY>Index is empty!<BR>(File used to index all HTTrack projects)</BODY></HTML>"CRLF);
+ fclose(fp);
+ }
+ }
+#endif
+}
+
+// mini-aide (h: help)
+// y
+void help(char* app,int more) {
+ infomsg("");
+ if (more)
+ infomsg("1");
+ if (more != 2) {
+ char info[2048];
+ infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")");
+#ifdef HTTRACK_AFF_WARNING
+ infomsg("NOTE: "HTTRACK_AFF_WARNING);
+#endif
+ sprintf(info,"\tusage: %s <URLs> [-option] [+<FILTERs>] [-<FILTERs>]",app);
+ infomsg(info);
+ infomsg("\twith options listed below: (* is the default value)");
+ infomsg("");
+ }
+ infomsg("General options:");
+ infomsg(" O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles])");
+#ifndef HTS_WIN
+ infomsg(" %O chroot path to, must be r00t (-%O root_path)");
+#endif
+ infomsg("");
+ infomsg("Action options:");
+ infomsg(" w *mirror web sites");
+ infomsg(" W mirror web sites, semi-automatic (asks questions)");
+ infomsg(" g just get files (saved in the current directory)");
+ infomsg(" i continue an interrupted mirror using the cache");
+ infomsg(" Y mirror ALL links located in the first level pages (mirror links)");
+ infomsg("");
+ infomsg("Proxy options:");
+ infomsg(" P proxy use (-P proxy:port or -P user:pass@proxy:port)");
+ infomsg(" %f *use proxy for ftp (f0 don't use)");
+ infomsg("");
+ infomsg("Limits options:");
+ infomsg(" rN set the mirror depth to N (* r9999)");
+ infomsg(" %eN set the external links depth to N (* %e0)");
+ infomsg(" mN maximum file length for a non-html file");
+ infomsg(" mN,N2 maximum file length for non html (N) and html (N2)");
+ infomsg(" MN maximum overall size that can be uploaded/scanned");
+ infomsg(" EN maximum mirror time in seconds (60=1 minute, 3600=1 hour)");
+ infomsg(" AN maximum transfer rate in bytes/seconds (1000=1KB/s max)");
+ infomsg(" %cN maximum number of connections/seconds (*%c10)");
+ infomsg(" GN pause transfer if N bytes reached, and wait until lock file is deleted");
+ infomsg("");
+ infomsg("Flow control:");
+ infomsg(" cN number of multiple connections (*c8)");
+ infomsg(" TN timeout, number of seconds after a non-responding link is shutdown");
+ infomsg(" RN number of retries, in case of timeout or non-fatal errors (*R1)");
+ infomsg(" JN traffic jam control, minimum transfert rate (bytes/seconds) tolerated for a link");
+ infomsg(" HN host is abandonned if: 0=never, 1=timeout, 2=slow, 3=timeout or slow");
+ infomsg("");
+ infomsg("Links options:");
+ infomsg(" %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use)");
+ infomsg(" n get non-html files 'near' an html file (ex: an image located outside)");
+ infomsg(" t test all URLs (even forbidden ones)");
+ infomsg(" %L <file> add all URL located in this text file (one URL per line)");
+ infomsg("");
+ infomsg("Build options:");
+ infomsg(" NN structure type (0 *original structure, 1+: see below)");
+ infomsg(" or user defined structure (-N \"%h%p/%n%q.%t\")");
+ infomsg(" LN long names (L1 *long names / L0 8-3 conversion / L2 ISO9660 compatible)");
+ infomsg(" KN keep original links (e.g. http://www.adr/link) (K0 *relative link, K absolute links, K4 original links, K3 absolute URI links)");
+ infomsg(" x replace external html links by error pages");
+ infomsg(" %x do not include any password for external password protected websites (%x0 include)");
+ infomsg(" %q *include query string for local files (useless, for information purpose only) (%q0 don't include)");
+ infomsg(" o *generate output html file in case of error (404..) (o0 don't generate)");
+ infomsg(" X *purge old files after update (X0 keep delete)");
+ infomsg(" %p preserve html files 'as is' (identical to '-K4 -%F \"\"')");
+ infomsg("");
+ infomsg("Spider options:");
+ infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)");
+ infomsg(" u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)");
+ infomsg(" j *parse Java Classes (j0 don't parse)");
+ infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)");
+ infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)");
+ infomsg(" %B tolerant requests (accept bogus responses on some servers, but not standard!)");
+ infomsg(" %s update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..)");
+ infomsg(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)");
+ infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD);
+ infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)");
+ infomsg("");
+ infomsg("Browser ID:");
+ infomsg(" F user-agent field (-F \"user-agent name\")");
+ infomsg(" %F footer string in Html code (-%F \"Mirrored [from host %s [file %s [at %s]]]\"");
+ infomsg(" %l preffered language (-%l \"fr, en, jp, *\"");
+ infomsg("");
+ infomsg("Log, index, cache");
+ infomsg(" C create/use a cache for updates and retries (C0 no cache,C1 cache is prioritary,* C2 test update before)");
+ infomsg(" k store all files in cache (not useful if files on disk)");
+ infomsg(" %n do not re-download locally erased files");
+ infomsg(" %v display on screen filenames downloaded (in realtime) - * %v1 short version");
+ infomsg(" Q no log - quiet mode");
+ infomsg(" q no questions - quiet mode");
+ infomsg(" z log - extra infos");
+ infomsg(" Z log - debug");
+ infomsg(" v log on screen");
+ infomsg(" f *log in files");
+ infomsg(" f2 one single log file");
+ infomsg(" I *make an index (I0 don't make)");
+ infomsg(" %I make an searchable index for this mirror (* %I0 don't make)");
+ infomsg("");
+ infomsg("Expert options:");
+ infomsg(" pN priority mode: (* p3)");
+ infomsg(" p0 just scan, don't save anything (for checking links)");
+ infomsg(" p1 save only html files");
+ infomsg(" p2 save only non html files");
+ infomsg(" *p3 save all files");
+ infomsg(" p7 get html files before, then treat other files");
+ infomsg(" S stay on the same directory");
+ infomsg(" D *can only go down into subdirs");
+ infomsg(" U can only go to upper directories");
+ infomsg(" B can both go up&down into the directory structure");
+ infomsg(" a *stay on the same address");
+ infomsg(" d stay on the same principal domain");
+ infomsg(" l stay on the same TLD (eg: .com)");
+ infomsg(" e go everywhere on the web");
+ infomsg(" %H debug HTTP headers in logfile");
+ infomsg("");
+ infomsg("Guru options: (do NOT use if possible)");
+ infomsg(" #0 Filter test (-#0 '*.gif' 'www.bar.com/foo.gif')");
+ infomsg(" #f Always flush log files");
+ infomsg(" #FN Maximum number of filters");
+ infomsg(" #h Version info");
+ infomsg(" #K Scan stdin (debug)");
+ infomsg(" #L Maximum number of links (-#L1000000)");
+ infomsg(" #p Display ugly progress information");
+ infomsg(" #P Catch URL");
+ infomsg(" #R Old FTP routines (debug)");
+ infomsg(" #T Generate transfer ops. log every minutes");
+ infomsg(" #u Wait time");
+ infomsg(" #Z Generate transfer rate statictics every minutes");
+ infomsg(" #! Execute a shell command (-#! \"echo hello\")");
+ infomsg("");
+ infomsg("Command-line specific options:");
+ infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")");
+ infomsg(" %U run the engine with another id when called as root (-%U smith)");
+ /* infomsg(" %O do a chroot before setuid"); */
+ infomsg("");
+ infomsg("Details: Option N");
+ infomsg(" N0 Site-structure (default)");
+ infomsg(" N1 HTML in web/, images/other files in web/images/");
+ infomsg(" N2 HTML in web/HTML, images/other in web/images");
+ infomsg(" N3 HTML in web/, images/other in web/");
+ infomsg(" N4 HTML in web/, images/other in web/xxx, where xxx is the file extension (all gif will be placed onto web/gif, for example)");
+ infomsg(" N5 Images/other in web/xxx and HTML in web/HTML");
+ infomsg(" N99 All files in web/, with random names (gadget !)");
+ infomsg(" N100 Site-structure, without www.domain.xxx/");
+ infomsg(" N101 Identical to N1 exept that \"web\" is replaced by the site's name");
+ infomsg(" N102 Identical to N2 exept that \"web\" is replaced by the site's name");
+ infomsg(" N103 Identical to N3 exept that \"web\" is replaced by the site's name");
+ infomsg(" N104 Identical to N4 exept that \"web\" is replaced by the site's name");
+ infomsg(" N105 Identical to N5 exept that \"web\" is replaced by the site's name");
+ infomsg(" N199 Identical to N99 exept that \"web\" is replaced by the site's name");
+ infomsg(" N1001 Identical to N1 exept that there is no \"web\" directory");
+ infomsg(" N1002 Identical to N2 exept that there is no \"web\" directory");
+ infomsg(" N1003 Identical to N3 exept that there is no \"web\" directory (option set for g option)");
+ infomsg(" N1004 Identical to N4 exept that there is no \"web\" directory");
+ infomsg(" N1005 Identical to N5 exept that there is no \"web\" directory");
+ infomsg(" N1099 Identical to N99 exept that there is no \"web\" directory");
+ infomsg("Details: User-defined option N");
+ infomsg(" '%n' Name of file without file type (ex: image)");
+ infomsg(" '%N' Name of file, including file type (ex: image.gif)");
+ infomsg(" '%t' File type (ex: gif)");
+ infomsg(" '%p' Path [without ending /] (ex: /someimages)");
+ infomsg(" '%h' Host name (ex: www.someweb.com)");
+ infomsg(" '%M' URL MD5 (128 bits, 32 ascii bytes)");
+ infomsg(" '%Q' query string MD5 (128 bits, 32 ascii bytes)");
+ infomsg(" '%q' small query string MD5 (16 bits, 4 ascii bytes)");
+ infomsg(" '%s?' Short name version (ex: %sN)");
+ infomsg(" '%[param]' param variable in query string");
+ infomsg("");
+ infomsg("Details: Option K");
+ infomsg(" K0 foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, default)");
+ infomsg(" K -> http://www.foobar.com/folder/foo.cgi?q=45 (absolute URL)");
+ infomsg(" K4 -> foo.cgi?q=45 (original URL)");
+ infomsg(" K3 -> /folder/foo.cgi?q=45 (absolute URI)");
+ infomsg("");
+ infomsg("Shortcuts:");
+ infomsg("--mirror <URLs> *make a mirror of site(s) (default)");
+ infomsg("--get <URLs> get the files indicated, do not seek other URLs (-qg)");
+ infomsg("--list <text file> add all URL located in this text file (-%L)");
+ infomsg("--mirrorlinks <URLs> mirror all links in 1st level pages (-Y)");
+ infomsg("--testlinks <URLs> test links in pages (-r1p0C0I0t)");
+ infomsg("--spider <URLs> spider site(s), to test links: reports Errors & Warnings (-p0C0I0t)");
+ infomsg("--testsite <URLs> identical to --spider");
+ infomsg("--skeleton <URLs> make a mirror, but gets only html files (-p1)");
+ infomsg("--update update a mirror, without confirmation (-iC2)");
+ infomsg("--continue continue a mirror, without confirmation (-iC1)");
+ infomsg("");
+ infomsg("--catchurl create a temporary proxy to capture an URL or a form post URL");
+ infomsg("--clean erase cache & log files");
+ infomsg("");
+ infomsg("--http10 force http/1.0 requests (-%h)");
+ infomsg("");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/");
+ infomsg("means: mirror site www.someweb.com/bob/ and only this site");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/ www.anothertest.com/mike/ +*.com/*.jpg");
+ infomsg("means: mirror the two sites together (with shared links) and accept any .jpg files on .com sites");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/bobby.html +* -r6");
+ infomsg("means get all files starting from bobby.html, with 6 link-depth, and possibility of going everywhere on the web");
+ infomsg("");
+ infomsg("example: httrack www.someweb.com/bob/bobby.html --spider -P proxy.myhost.com:8080");
+ infomsg("runs the spider on www.someweb.com/bob/bobby.html using a proxy");
+ infomsg("");
+ infomsg("example: httrack --update");
+ infomsg("updates a mirror in the current folder");
+ infomsg("");
+ infomsg("example: httrack");
+ infomsg("will bring you to the interactive mode");
+ infomsg("");
+ infomsg("example: httrack --continue");
+ infomsg("continues a mirror in the current folder");
+ infomsg("");
+ infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")");
+ infomsg("Copyright (C) Xavier Roche and other contributors");
+#ifdef HTS_PLATFORM_NAME
+ infomsg("[compiled: "HTS_PLATFORM_NAME"]");
+#endif
+ infomsg(NULL);
+
+// infomsg(" R *relative links (e.g ../link)\n");
+// infomsg(" A absolute links (e.g /www.adr/link)\n");
+}
+
+
diff --git a/src/htshelp.h b/src/htshelp.h
new file mode 100644
index 0000000..924a526
--- /dev/null
+++ b/src/htshelp.h
@@ -0,0 +1,53 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* command-line help system */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSHELP_DEFH
+#define HTSHELP_DEFH
+
+#include "htsglobal.h"
+#include "htscore.h"
+
+void infomsg(char* msg);
+void help(char* app,int more);
+void make_empty_index(char* str);
+void help_wizard(httrackp* opt);
+int help_query(char* list,int def);
+void help_catchurl(char* dest_path);
+
+#endif
diff --git a/src/htsindex.c b/src/htsindex.c
new file mode 100644
index 0000000..5a66724
--- /dev/null
+++ b/src/htsindex.c
@@ -0,0 +1,483 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsindex.c */
+/* keyword indexing system (search index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "htsindex.h"
+#include "htsglobal.h"
+#include "htslib.h"
+
+#if HTS_MAKE_KEYWORD_INDEX
+#include "htshash.h"
+
+
+/* Keyword Indexer Parameters */
+
+// Maximum length for a keyword
+#define KEYW_LEN 50
+// Minimum length for a keyword - MUST NOT BE NULL!!!
+#define KEYW_MIN_LEN 3
+// What characters to accept? - MUST NOT BE EMPTY AND MUST NOT CONTAIN THE SPACE (32) CHARACTER!!!
+#define KEYW_ACCEPT "abcdefghijklmnopqrstuvwxyz0123456789-_."
+// Convert A to a, and so on.. to avoid case problems in indexing
+// This can be a generic table, containing characters that are in fact not accepted by KEYW_ACCEPT
+// MUST HAVE SAME SIZES!!
+#define KEYW_TRANSCODE_FROM (\
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "àâä" \
+ "ÀÂÄ" \
+ "éèêë" \
+ "ÈÈÊË" \
+ "ìîï" \
+ "ÌÎÏ" \
+ "òôö" \
+ "ÒÔÖ" \
+ "ùûü" \
+ "ÙÛÜ" \
+ "ÿ" \
+ )
+#define KEYW_TRANSCODE_TO ( \
+ "abcdefghijklmnopqrstuvwxyz" \
+ "aaa" \
+ "aaa" \
+ "eeee" \
+ "eeee" \
+ "iii" \
+ "iii" \
+ "ooo" \
+ "ooo" \
+ "uuu" \
+ "uuu" \
+ "y" \
+ )
+// These (accepted) characters will be ignored at begining of a keyword
+#define KEYW_IGNORE_BEG "-_."
+// These (accepted) characters will be stripped if at the end of a keyword
+#define KEYW_STRIP_END "-_."
+// Words begining with these (accepted) characters will be ignored
+#define KEYW_NOT_BEG "0123456789"
+// Treat these characters as space characters - MUST NOT BE EMPTY!!!
+#define KEYW_SPACE " ',;:!?\"\x0d\x0a\x09\x0c"
+// Common words (the,for..) detector
+// If a word represents more than KEYW_USELESS1K (%1000) of total words, then ignore it
+// 5 (0.5%)
+#define KEYW_USELESS1K 5
+// If a word is present in more than KEYW_USELESS1KPG (%1000) pages, then ignore it
+// 800 (80%)
+#define KEYW_USELESS1KPG 800
+// This number will be reduced by index hit for sorting purpose
+// leave it as it is here if you don't REALLY know what you are doing
+// Yes, I may be the only person, maybe
+#define KEYW_SORT_MAXCOUNT 999999999
+
+/* End of Keyword Indexer Parameters */
+
+int strcpos(char* adr,char c);
+int mystrcmp(const void* _e1,const void* _e2);
+
+// Global variables
+int hts_index_init=1;
+int hts_primindex_size=0;
+FILE* fp_tmpproject=NULL;
+int hts_primindex_words=0;
+
+#endif
+
+/*
+ Init index
+*/
+void index_init(const char* indexpath) {
+#if HTS_MAKE_KEYWORD_INDEX
+ /* remove(concat(indexpath,"index.txt")); */
+ hts_index_init=1;
+ hts_primindex_size=0;
+ hts_primindex_words=0;
+ fp_tmpproject=tmpfile();
+#endif
+}
+
+
+/*
+ Indexing system
+ A little bit dirty, (quick'n dirty, in fact)
+ But should be okay on most cases
+ Tags and javascript handled (ignored)
+*/
+int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) {
+#if HTS_MAKE_KEYWORD_INDEX
+ int intag=0,inscript=0,incomment=0;
+ char keyword[KEYW_LEN+32];
+ int i=0;
+ //
+ int WordIndexSize=1024;
+ inthash WordIndexHash=NULL;
+ FILE *tmpfp=NULL;
+ //
+
+ // Check parameters
+ if (!html_data)
+ return 0;
+ if (!size)
+ return 0;
+ if (!mime)
+ return 0;
+ if (!filename)
+ return 0;
+
+ // Init ?
+ if (hts_index_init) {
+ remove(concat(indexpath,"index.txt"));
+ remove(concat(indexpath,"sindex.html"));
+ hts_index_init=0;
+ }
+
+ // Check MIME type
+ if (strfield2(mime,"text/html")) {
+ inscript=0;
+ }
+ // FIXME - temporary fix for image/svg+xml (svg)
+ // "IN XML" (html like, in fact :) )
+ else if (
+ (strfield2(mime,"image/svg+xml"))
+ ||
+ (strfield2(mime,"image/svg-xml"))
+ ) {
+ inscript=0;
+ }
+ else if (
+ (strfield2(mime,"application/x-javascript"))
+ || (strfield2(mime,"text/css"))
+ ) {
+ inscript=1;
+ } else
+ return 0;
+
+ // Temporary file
+ tmpfp = tmpfile();
+ if (!tmpfp)
+ return 0;
+
+ // Create hash structure
+ // Hash tables rulez da world!
+ WordIndexHash=inthash_new(WordIndexSize);
+ if (!WordIndexHash)
+ return 0;
+
+ // Start indexing this page
+ keyword[0]='\0';
+ while(i<size) {
+ if (strfield(html_data + i , "<script")) {
+ inscript=1;
+ }
+ else if (strfield(html_data + i , "<!--")) {
+ incomment=1;
+ }
+ else if (strfield(html_data + i , "</script")) {
+ if (!incomment)
+ inscript=0;
+ }
+ else if (strfield(html_data + i , "-->")) {
+ incomment=0;
+ }
+ else if (html_data[i]=='<') {
+ if (!inscript)
+ intag=1;
+ }
+ else if (html_data[i]=='>') {
+ intag=0;
+ }
+ else {
+ // Okay, parse keywords
+ if ( (!inscript) && (!incomment) && (!intag) ) {
+ char cchar=html_data[i];
+ int pos;
+ int len=strlen(keyword);
+
+ // Replace (ignore case, and so on..)
+ if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0)
+ cchar=KEYW_TRANSCODE_TO[pos];
+
+ if (strchr(KEYW_ACCEPT,cchar)) {
+ /* Ignore some characters at begining */
+ if ((len>0) || (!strchr(KEYW_IGNORE_BEG,cchar))) {
+ keyword[len++]=cchar;
+ keyword[len]='\0';
+ }
+ } else if ( (strchr(KEYW_SPACE,cchar)) || (!cchar) ) {
+
+
+ /* Avoid these words */
+ if (len>0) {
+ if (strchr(KEYW_NOT_BEG,keyword[0])) {
+ keyword[(len=0)]='\0';
+ }
+ }
+
+ /* Strip ending . and so */
+ {
+ int ok=0;
+ while((len=strlen(keyword)) && (!ok)) {
+ if (strchr(KEYW_STRIP_END,keyword[len-1])) { /* strip it */
+ keyword[len-1]='\0';
+ } else
+ ok=1;
+ }
+ }
+
+ /* Store it ? */
+ if (len >= KEYW_MIN_LEN ) {
+ hts_primindex_words++;
+ if (inthash_inc(WordIndexHash,keyword)) { /* added new */
+ fprintf(tmpfp,"%s\n",keyword);
+ }
+ }
+ keyword[(len=0)]='\0';
+ } else /* Invalid */
+ keyword[(len=0)]='\0';
+
+ if (len>KEYW_LEN) {
+ keyword[(len=0)]='\0';
+ }
+ }
+
+ }
+
+ i++;
+ }
+
+ // Reset temp file
+ fseek(tmpfp,0,SEEK_SET);
+
+ // Process indexing for this page
+ {
+ //FILE* fp=NULL;
+ //fp=fopen(concat(indexpath,"index.txt"),"ab");
+ if (fp_tmpproject) {
+ while(!feof(tmpfp)) {
+ char line[KEYW_LEN + 32];
+ linput(tmpfp,line,KEYW_LEN + 2);
+ if (strnotempty(line)) {
+ unsigned long int e=0;
+ if (inthash_read(WordIndexHash,line,&e)) {
+ //if (e) {
+ char savelst[HTS_URLMAXSIZE*2];
+ e++; /* 0 means "once" */
+
+ if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper
+ strcpy(savelst,filename+strlen(indexpath));
+ else
+ strcpy(savelst,filename);
+
+ // Add entry for this file and word
+ fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst);
+ hts_primindex_size++;
+ //}
+ }
+ }
+ }
+ //fclose(fp);
+ }
+ }
+
+ // Delete temp file
+ fclose(tmpfp);
+ tmpfp=NULL;
+
+ // Clear hash table
+ inthash_delete(&WordIndexHash);
+#endif
+ return 1;
+}
+
+/*
+ Sort index!
+*/
+void index_finish(const char* indexpath,int mode) {
+#if HTS_MAKE_KEYWORD_INDEX
+ char** tab;
+ char* blk;
+ int size;
+
+ size=fpsize(fp_tmpproject);
+ if (size>0) {
+ //FILE* fp=fopen(concat(indexpath,"index.txt"),"rb");
+ if (fp_tmpproject) {
+ tab=(char**)malloct(sizeof(char*) * (hts_primindex_size+2) );
+ if (tab) {
+ blk = malloct(size+4);
+ if (blk) {
+ fseek(fp_tmpproject,0,SEEK_SET);
+ if ((int)fread(blk,1,size,fp_tmpproject) == size) {
+ char *a=blk,*b;
+ int index=0;
+ int i;
+ FILE* fp;
+
+ while( (b=strchr(a,'\n')) && (index < hts_primindex_size) ) {
+ tab[index++]=a;
+ *b='\0';
+ a=b+1;
+ }
+
+ // Sort it!
+ qsort(tab,index,sizeof(char*),mystrcmp);
+
+ // Delete fp_tmpproject
+ fclose(fp_tmpproject);
+ fp_tmpproject=NULL;
+
+ // Write new file
+ if (mode == 1) // TEXT
+ fp=fopen(concat(indexpath,"index.txt"),"wb");
+ else // HTML
+ fp=fopen(concat(indexpath,"sindex.html"),"wb");
+ if (fp) {
+ char current_word[KEYW_LEN + 32];
+ char word[KEYW_LEN + 32];
+ int hit;
+ int total_hit=0;
+ int total_line=0;
+ int last_pos=0;
+ char word0='\0';
+ current_word[0]='\0';
+
+ if (mode == 2) { // HTML
+ for(i=0;i<index;i++) {
+ if (word0 != tab[i][0]) {
+ word0 = tab[i][0];
+ fprintf(fp," <a href=\"#%c\">%c</a>\r\n",word0,word0);
+ }
+ }
+ word0='\0';
+ fprintf(fp,"<br><br>\r\n");
+ fprintf(fp,"<table width=\"100%%\" border=\"0\">\r\n<tr>\r\n<td>word</td>\r\n<td>location\r\n");
+ }
+
+ for(i=0;i<index;i++) {
+ if (sscanf(tab[i],"%s %d",word,&hit) == 2) {
+ char* a=strchr(tab[i],' ');
+ if (a) a=strchr(a+1,' ');
+ if (a++) { /* Yes, a++, not ++a :) */
+ hit=KEYW_SORT_MAXCOUNT-hit;
+ if (strcmp(word,current_word)) { /* New word */
+ if (total_hit) {
+ if (mode == 1) // TEXT
+ fprintf(fp,"\t=%d\r\n",total_hit);
+ //else // HTML
+ // fprintf(fp,"<br>(%d total hits)\r\n",total_hit);
+ if (
+ ( ((total_hit*1000 ) / hts_primindex_words) >= KEYW_USELESS1K )
+ ||
+ ( ((total_line*1000) / index ) >= KEYW_USELESS1KPG )
+ ) {
+ fseek(fp,last_pos,SEEK_SET);
+ if (mode == 1) // TEXT
+ fprintf(fp,"\tignored (%d)\r\n",((total_hit*1000)/hts_primindex_words));
+ else
+ fprintf(fp,"(ignored) [%d hits]<br>\r\n",total_hit);
+ }
+ else {
+ if (mode == 1) // TEXT
+ fprintf(fp,"\t(%d)\r\n",((total_hit*1000)/hts_primindex_words));
+ //else // HTML
+ // fprintf(fp,"(%d)\r\n",((total_hit*1000)/hts_primindex_words));
+ }
+ }
+ if (mode == 1) // TEXT
+ fprintf(fp,"%s\r\n",word);
+ else { // HTML
+ fprintf(fp,"</td></tr>\r\n");
+ if (word0 != word[0]) {
+ word0 = word[0];
+ fprintf(fp,"<th>%c</th>\r\n",word0);
+ fprintf(fp,"<a name=\"%c\"></a>\r\n",word0);
+ }
+ fprintf(fp,"<tr>\r\n<td>%s</td>\r\n<td>\r\n",word);
+ }
+ fflush(fp); last_pos=ftell(fp);
+ strcpy(current_word,word);
+ total_hit=total_line=0;
+ }
+ total_hit+=hit;
+ total_line++;
+ if (mode == 1) // TEXT
+ fprintf(fp,"\t%d %s\r\n",hit,a);
+ else // HTML
+ fprintf(fp,"<a href=\"%s\">%s</a> [%d hits]<br>\r\n",a,a,hit);
+ }
+ }
+ }
+ if (mode == 2) // HTML
+ fprintf(fp,"</td></tr>\r\n</table>\r\n");
+ fclose(fp);
+ }
+
+ }
+ freet(blk);
+ }
+ freet(tab);
+ }
+
+ }
+ //qsort
+ }
+ if (fp_tmpproject)
+ fclose(fp_tmpproject);
+ fp_tmpproject=NULL;
+#endif
+}
+
+
+/* Subroutines */
+
+#if HTS_MAKE_KEYWORD_INDEX
+int strcpos(char* adr,char c) {
+ char* apos=strchr(adr,c);
+ if (apos)
+ return (int)(apos-adr);
+ else
+ return -1;
+}
+
+int mystrcmp(const void* _e1,const void* _e2) {
+ char** e1=(char**)_e1;
+ char** e2=(char**)_e2;
+ return strcmp(*e1,*e2);
+}
+#endif
+
diff --git a/src/htsindex.h b/src/htsindex.h
new file mode 100644
index 0000000..40a189b
--- /dev/null
+++ b/src/htsindex.h
@@ -0,0 +1,48 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsindex.h */
+/* keyword indexing system (search index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSKINDEX_DEFH
+#define HTSKINDEX_DEFH
+
+#include "htsglobal.h"
+
+int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath);
+void index_init(const char* indexpath);
+void index_finish(const char* indexpath,int mode);
+
+#endif
diff --git a/src/htsjava.c b/src/htsjava.c
new file mode 100644
index 0000000..bb29692
--- /dev/null
+++ b/src/htsjava.c
@@ -0,0 +1,395 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Java classes parser */
+/* Author: Yann Philippot */
+/* ------------------------------------------------------------ */
+
+
+/* Version: Oct/2000 */
+/* Fixed: problems with class structure (10/2000) */
+
+// htsjava.c - Parseur de classes java
+
+#include "stdio.h"
+#include "htssystem.h"
+#include "htscore.h"
+#include "htsjava.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsnostatic.h"
+
+//#include <math.h>
+
+#ifndef HTS_LITTLE_ENDIAN
+#define REVERSE_ENDIAN 1
+#else
+#define REVERSE_ENDIAN 0
+#endif
+
+/* big/little endian swap */
+#define hts_swap16(A) ( (((A) & 0xFF)<<8) | (((A) & 0xFF00)>>8) )
+#define hts_swap32(A) ( (( (hts_swap16(A)) & 0xFFFF)<<16) | (( (hts_swap16(A>>16)) & 0xFFFF)) )
+
+
+// ** HTS_xx sinon pas pris par VC++
+#define HTS_CLASS 7
+#define HTS_FIELDREF 9
+#define HTS_METHODREF 10
+#define HTS_STRING 8
+#define HTS_INTEGER 3
+#define HTS_FLOAT 4
+#define HTS_LONG 5
+#define HTS_DOUBLE 6
+#define HTS_INTERFACE 11
+#define HTS_NAMEANDTYPE 12
+#define HTS_ASCIZ 1
+#define HTS_UNICODE 2
+
+#define JAVADEBUG 0
+
+int hts_parse_java(char *file,char* err_msg)
+{
+ FILE *fpout;
+ JAVA_HEADER header;
+ RESP_STRUCT *tab;
+
+#if JAVADEBUG
+ printf("fopen\n");
+#endif
+ if ((fpout = fopen(fconv(file), "r+b")) == NULL)
+ {
+ //fprintf(stderr, "Cannot open input file.\n");
+ sprintf(err_msg,"Unable to open file %s",file);
+ return 0; // une erreur..
+ }
+
+#if JAVADEBUG
+ printf("fread\n");
+#endif
+ //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet..
+ if (fread(&header,1,10,fpout) != 10) { // pas complet..
+ fclose(fpout);
+ sprintf(err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file));
+ return 0;
+ }
+
+#if JAVADEBUG
+ printf("header\n");
+#endif
+ // tester en tête
+#if REVERSE_ENDIAN
+ header.magic = hts_swap32(header.magic);
+ header.count = hts_swap16(header.count);
+#endif
+ if(header.magic!=0xCAFEBABE) {
+ sprintf(err_msg,"non java file");
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 0;
+ }
+
+ tab =(RESP_STRUCT*)calloct(header.count,sizeof(RESP_STRUCT));
+ if (!tab) {
+ sprintf(err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT));
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 0; // erreur..
+ }
+
+#if JAVADEBUG
+ printf("calchead\n");
+#endif
+ {
+ int i;
+
+ for (i = 1; i < header.count; i++) {
+ int err=0; // ++
+ tab[i]=readtable(fpout,tab[i],&err,err_msg);
+ if (!err) {
+ if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float
+ } else { // ++ une erreur est survenue!
+ if (strnotempty(err_msg)==0)
+ strcpy(err_msg,"Internal readtable error");
+ freet(tab);
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 0;
+ }
+ }
+
+ }
+
+
+#if JAVADEBUG
+ printf("addfiles\n");
+#endif
+ {
+ unsigned int acess;
+ unsigned int Class;
+ unsigned int SClass;
+ int i;
+ acess = readshort(fpout);
+ Class = readshort(fpout);
+ SClass = readshort(fpout);
+
+ for (i = 1; i <header.count; i++) {
+
+ if (tab[i].type == HTS_CLASS) {
+
+ if ((tab[i].index1<header.count) && (tab[i].index1>=0)) {
+
+
+ if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) {
+
+ if(!strstr(tab[tab[i].index1].name,"java/")) {
+ char tempo[1024];
+ tempo[0]='\0';
+
+ sprintf(tempo,"%s.class",tab[tab[i].index1].name);
+#if JAVADEBUG
+ printf("add %s\n",tempo);
+#endif
+ if (tab[tab[i].index1].file_position >= 0)
+ hts_add_file(tempo,tab[tab[i].index1].file_position);
+ }
+
+ }
+ } else {
+ i=header.count; // exit
+ }
+ }
+
+ }
+ }
+
+
+#if JAVADEBUG
+ printf("end\n");
+#endif
+ freet(tab);
+ if (fpout) { fclose(fpout); fpout=NULL; }
+ return 1;
+}
+
+
+
+
+// error: !=0 si erreur fatale
+RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
+{
+ unsigned short int length;
+ int j;
+ *error = 0; // pas d'erreur
+ trans.file_position=-1;
+ trans.type = (int)(unsigned char)fgetc(fp);
+ switch (trans.type) {
+ case HTS_CLASS:
+ strcpy(trans.name,"Class");
+ trans.index1 = readshort(fp);
+ break;
+
+ case HTS_FIELDREF:
+ strcpy(trans.name,"Field Reference");
+ trans.index1 = readshort(fp);
+ readshort(fp);
+ break;
+
+ case HTS_METHODREF:
+ strcpy(trans.name,"Method Reference");
+ trans.index1 = readshort(fp);
+ readshort(fp);
+ break;
+
+ case HTS_INTERFACE:
+ strcpy(trans.name,"Interface Method Reference");
+ trans.index1 =readshort(fp);
+ readshort(fp);
+ break;
+ case HTS_NAMEANDTYPE:
+ strcpy(trans.name,"Name and Type");
+ trans.index1 = readshort(fp);
+ readshort(fp);
+ break;
+
+ case HTS_STRING: // CONSTANT_String
+ strcpy(trans.name,"String");
+ trans.index1 = readshort(fp);
+ break;
+
+ case HTS_INTEGER:
+ strcpy(trans.name,"Integer");
+ for(j=0;j<4;j++) fgetc(fp);
+ break;
+
+ case HTS_FLOAT:
+ strcpy(trans.name,"Float");
+ for(j=0;j<4;j++) fgetc(fp);
+ break;
+
+ case HTS_LONG:
+ strcpy(trans.name,"Long");
+ for(j=0;j<8;j++) fgetc(fp);
+ break;
+ case HTS_DOUBLE:
+ strcpy(trans.name,"Double");
+ for(j=0;j<8;j++) fgetc(fp);
+ break;
+
+ case HTS_ASCIZ:
+ case HTS_UNICODE:
+
+ if (trans.type == HTS_ASCIZ)
+ strcpy(trans.name,"HTS_ASCIZ");
+ else
+ strcpy(trans.name,"HTS_UNICODE");
+
+ {
+ char buffer[1024];
+ char *p;
+
+ p=&buffer[0];
+
+ //fflush(fp);
+ trans.file_position=ftell(fp);
+ length = readshort(fp);
+ if (length<HTS_URLMAXSIZE) {
+ // while ((length > 0) && (length<500)) {
+ while (length > 0) {
+ *p++ =fgetc(fp);
+
+ length--;
+ }
+ *p='\0';
+
+ //#if JDEBUG
+ // if(tris(buffer)==1) printf("%s\n ",buffer);
+ // if(tris(buffer)==2) printf("%s\n ",printname(buffer));
+ //#endif
+ if(tris(buffer)==1) hts_add_file(buffer,trans.file_position);
+ else if(tris(buffer)==2) hts_add_file(printname(buffer),trans.file_position);
+
+ strcpy(trans.name,buffer);
+ } else { // gros pb
+ while ( (length > 0) && (!feof(fp))) {
+ fgetc(fp);
+ length--;
+ }
+ if (!feof(fp)) {
+ trans.type=-1;
+ } else {
+ sprintf(err_msg,"Internal stucture error (ASCII)");
+ *error = 1;
+ }
+ return(trans);
+ }
+ }
+ break;
+ default:
+ // printf("Type inconnue\n");
+ // on arrête tout
+ sprintf(err_msg,"Internal structure unknown (type %d)",trans.type);
+ *error = 1;
+ return(trans);
+ break;
+ }
+ return(trans);
+}
+
+
+unsigned short int readshort(FILE *fp)
+{
+ unsigned short int valint;
+ fread(&valint,sizeof(valint),1,fp);
+
+#if REVERSE_ENDIAN
+ return hts_swap16(valint);
+#else
+ return valint;
+#endif
+
+}
+
+int tris(char * buffer)
+{
+ //
+ // Java
+ if((buffer[0]=='[') && buffer[1]=='L' && (!strstr(buffer,"java/")) )
+ return 2;
+ if (strstr(buffer,".gif") || strstr(buffer,".jpg") || strstr(buffer,".jpeg") || strstr(buffer,".au") )
+ return 1;
+ // Ajouts R.X: test type
+ // Autres fichiers
+ {
+ char type[256];
+ type[0]='\0';
+ get_httptype(type,buffer,0);
+ if (strnotempty(type)) // type reconnu!
+ return 1;
+ // ajout RX 05/2001
+ else if (is_dyntype(get_ext(buffer))) // asp,cgi...
+ return 1;
+ }
+ return 0;
+}
+
+
+char * printname(char name[1024])
+{
+ char* rname;
+ //char *rname;
+ char *p;
+ char *p1;
+ int j;
+ NOSTATIC_RESERVE(rname, char, 1024);
+ rname[0]='\0';
+ //
+
+ p=&name[0];
+
+ if(*p!='[') return "";
+ p+=2;
+ //rname=(char*)calloct(strlen(name)+8,sizeof(char));
+ p1=rname;
+ for (j = 0; j < (int) strlen(name); j++,p++) {
+ if (*p == '/') *p1='.';
+ if (*p==';'){*p1='\0';
+ strcat(rname,".class");
+ return (rname);}
+ else *p1=*p;
+ p1++;
+ }
+ p1-=3;
+ *p1='\0';
+ return (rname);
+
+}
diff --git a/src/htsjava.h b/src/htsjava.h
new file mode 100644
index 0000000..66a75a5
--- /dev/null
+++ b/src/htsjava.h
@@ -0,0 +1,69 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Java classes parser .h */
+/* Author: Yann Philippot */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSJAVA_DEFH
+#define HTSJAVA_DEFH
+
+/* LLint fsize(char* s); */
+int fsize(char* s);
+
+typedef struct {
+ unsigned long int magic;
+ unsigned short int minor;
+ unsigned short int major;
+ unsigned short int count;
+} JAVA_HEADER;
+
+typedef struct {
+ int file_position;
+ //
+ unsigned int index1;
+ unsigned int type;
+ char name[1024];
+} RESP_STRUCT;
+
+
+int hts_parse_java(char *file,char* err_msg);
+RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5);
+//unsigned int swap(long int nomber,int digit);
+RESP_STRUCT readtable(FILE *fp,RESP_STRUCT,int*,char*);
+unsigned short int readshort(FILE *fp);
+int tris(char*);
+char * printname(char [1024]);
+
+
+#endif
diff --git a/src/htslib.c b/src/htslib.c
new file mode 100644
index 0000000..e4e6006
--- /dev/null
+++ b/src/htslib.c
@@ -0,0 +1,4279 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Subroutines */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .c
+
+#include "htslib.h"
+#include "htsbauth.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsbauth.h"
+#include "htsthread.h"
+#include "htsnostatic.h"
+#include "htswrap.h"
+#include <stdio.h>
+#if HTS_WIN
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/timeb.h>
+#include <fcntl.h>
+// pour utimbuf
+#if HTS_WIN
+#include <sys/utime.h>
+#else
+#if HTS_PLATFORM!=3
+#include <utime.h>
+#else
+#include <utime.h>
+#endif
+#endif
+/* END specific definitions */
+
+
+
+// Débuggage de contrôle
+#if HTS_DEBUG_CLOSESOCK
+#define _HTS_WIDE 1
+#endif
+#if HTS_WIDE_DEBUG
+#define _HTS_WIDE 1
+#endif
+#if _HTS_WIDE
+FILE* DEBUG_fp=NULL;
+#define DEBUG_W(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,":>"A); fflush(DEBUG_fp); }
+#define DEBUG_W2(A) { if (DEBUG_fp==NULL) DEBUG_fp=fopen("bug.out","wb"); fprintf(DEBUG_fp,A); fflush(DEBUG_fp); }
+#endif
+
+/* variables globales */
+int _DEBUG_HEAD;
+FILE* ioinfo;
+
+#if HTS_USEOPENSSL
+ SSL_CTX *openssl_ctx = NULL;
+#endif
+int IPV6_resolver = 0;
+
+
+/* détection complémentaire */
+const char hts_detect[][32] = {
+ "archive",
+ "background",
+ "data", // OBJECT
+ "dynsrc",
+ "lowsrc",
+ "profile", // element META
+ "src",
+ "swurl",
+ "url",
+ "usemap",
+ "longdesc", // accessibility
+ "xlink:href", // xml/svg tag
+ ""
+};
+
+/* détecter début */
+const char hts_detectbeg[][32] = {
+ "hotspot", /* hotspot1=..,hotspot2=.. */
+ ""
+};
+
+/* ne pas détcter de liens dedans */
+const char hts_nodetect[][32] = {
+ "accept-charset",
+ "accesskey",
+ "action",
+ "align",
+ "alt",
+ "axes",
+ "axis",
+ "char",
+ "charset",
+ "cite",
+ "class",
+ "classid",
+ "code",
+ "color",
+ "datetime",
+ "dir",
+ "enctype",
+ "face",
+ "height",
+ "id",
+ "lang",
+ "language",
+ "media",
+ "method",
+ "name",
+ "prompt",
+ "scheme",
+ "size",
+ "style",
+ "target",
+ "title",
+ "type",
+ "valign",
+ "version",
+ "width",
+ ""
+};
+
+
+/* détection de mini-code javascript */
+/* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */
+const char hts_detect_js[][32] = {
+ "onAbort",
+ "onBlur",
+ "onChange",
+ "onClick",
+ "onDblClick",
+ "onDragDrop",
+ "onError",
+ "onFocus",
+ "onKeyDown",
+ "onKeyPress",
+ "onKeyUp",
+ "onLoad",
+ "onMouseDown",
+ "onMouseMove",
+ "onMouseOut",
+ "onMouseOver",
+ "onMouseUp",
+ "onMove",
+ "onReset",
+ "onResize",
+ "onSelect",
+ "onSubmit",
+ "onUnload",
+ ""
+};
+
+/* détection "...URL=<url>" */
+const char hts_detectURL[][32] = {
+ "content",
+ ""
+};
+
+/* tags où l'URL doit être réécrite mais non capturée */
+const char hts_detectandleave[][32] = {
+ "action",
+ ""
+};
+
+/* ne pas renommer les types renvoyés (couvent types inconnus) */
+const char hts_mime_keep[][32] = {
+ "application/octet-stream",
+ "text/plain",
+ ""
+};
+
+/* pas de type mime connu, mais extension connue */
+const char hts_ext_dynamic[][32] = {
+ "php3",
+ "php",
+ "php4",
+ "php2",
+ "cgi",
+ "asp",
+ "jsp",
+ "pl",
+ /*"exe",*/
+ "cfm",
+ ""
+};
+
+/* types MIME */
+const char hts_mime[][2][32] = {
+ {"application/acad","dwg"},
+ {"application/arj","arj"},
+ {"application/clariscad","ccad"},
+ {"application/drafting","drw"},
+ {"application/dxf","dxf"},
+ {"application/excel","xls"},
+ {"application/i-deas","unv"},
+ {"application/iges","isg"},
+ {"application/iges","iges"},
+ {"application/mac-binhex40","hqx"},
+ {"application/mac-compactpro","cpt"},
+ {"application/msword","doc"},
+ {"application/msword","w6w"},
+ {"application/msword","word"},
+ {"application/mswrite","wri"},
+ /*{"application/octet-stream","dms"},*/
+ /*{"application/octet-stream","lzh"},*/
+ /*{"application/octet-stream","lha"},*/
+ /*{"application/octet-stream","bin"},*/
+ {"application/oda","oda"},
+ {"application/pdf","pdf"},
+ {"application/postscript","ps"},
+ {"application/postscript","ai"},
+ {"application/postscript","eps"},
+ {"application/powerpoint","ppt"},
+ {"application/pro_eng","prt"},
+ {"application/pro_eng","part"},
+ {"application/rtf","rtf"},
+ {"application/set","set"},
+ {"application/sla","stl"},
+ {"application/smil","smi"},
+ {"application/smil","smil"},
+ {"application/smil","sml"},
+ {"application/solids","sol"},
+ {"application/STEP","stp"},
+ {"application/STEP","step"},
+ {"application/vda","vda"},
+ {"application/x-authorware-map","aam"},
+ {"application/x-authorware-seg","aas"},
+ {"application/x-authorware-bin","aab"},
+ {"application/x-cocoa","cco"},
+ {"application/x-csh","csh"},
+ {"application/x-director","dir"},
+ {"application/x-director","dcr"},
+ {"application/x-director","dxr"},
+ {"application/x-mif","mif"},
+ {"application/x-dvi","dvi"},
+ {"application/x-gzip","gz"},
+ {"application/x-gzip","gzip"},
+ {"application/x-hdf","hdf"},
+ {"application/x-javascript","js"},
+ {"application/x-koan","skp"},
+ {"application/x-koan","skd"},
+ {"application/x-koan","skt"},
+ {"application/x-koan","skm"},
+ {"application/x-latex","latex"},
+ {"application/x-netcdf","nc"},
+ {"application/x-netcdf","cdf"},
+ /* {"application/x-sh","sh"}, */
+ /* {"application/x-csh","csh"}, */
+ /* {"application/x-ksh","ksh"}, */
+ {"application/x-shar","shar"},
+ {"application/x-stuffit","sit"},
+ {"application/x-tcl","tcl"},
+ {"application/x-tex","tex"},
+ {"application/x-texinfo","texinfo"},
+ {"application/x-texinfo","texi"},
+ {"application/x-troff","t"},
+ {"application/x-troff","tr"},
+ {"application/x-troff","roff"},
+ {"application/x-troff-man","man"},
+ {"application/x-troff-me","ms"},
+ {"application/x-wais-source","src"},
+ {"application/zip","zip"},
+ {"application/x-zip-compressed","zip"},
+ {"application/x-bcpio","bcpio"},
+ {"application/x-cdlink","vcd"},
+ {"application/x-cpio","cpio"},
+ {"application/x-gtar","tgz"},
+ {"application/x-gtar","gtar"},
+ {"application/x-shar","shar"},
+ {"application/x-shockwave-flash","swf"},
+ {"application/x-sv4cpio","sv4cpio"},
+ {"application/x-sv4crc","sv4crc"},
+ {"application/x-tar","tar"},
+ {"application/x-ustar","ustar"},
+ {"application/x-winhelp","hlp"},
+ {"audio/midi","mid"},
+ {"audio/midi","midi"},
+ {"audio/midi","kar"},
+ {"audio/mpeg","mp3"},
+ {"audio/mpeg","mpga"},
+ {"audio/mpeg","mp2"},
+ {"audio/basic","au"},
+ {"audio/basic","snd"},
+ {"audio/x-aiff","aif"},
+ {"audio/x-aiff","aiff"},
+ {"audio/x-aiff","aifc"},
+ {"audio/x-pn-realaudio","rm"},
+ {"audio/x-pn-realaudio","ram"},
+ {"audio/x-pn-realaudio","ra"},
+ {"audio/x-pn-realaudio-plugin","rpm"},
+ {"audio/x-wav","wav"},
+ {"chemical/x-pdb","pdb"},
+ {"chemical/x-pdb","xyz"},
+ {"drawing/x-dwf","dwf"},
+ {"image/gif","gif"},
+ {"image/ief","ief"},
+ {"image/jpeg","jpg"},
+ {"image/jpeg","jpe"},
+ {"image/jpeg","jpeg"},
+ {"image/pict","pict"},
+ {"image/png","png"},
+ {"image/tiff","tiff"},
+ {"image/tiff","tif"},
+ {"image/svg+xml","svg"},
+ {"image/svg-xml","svg"},
+ {"image/x-cmu-raster","ras"},
+ {"image/x-freehand","fh4"},
+ {"image/x-freehand","fh7"},
+ {"image/x-freehand","fh5"},
+ {"image/x-freehand","fhc"},
+ {"image/x-freehand","fh"},
+ {"image/x-portable-anymap","pnm"},
+ {"image/x-portable-bitmap","pgm"},
+ {"image/x-portable-pixmap","ppm"},
+ {"image/x-rgb","rgb"},
+ {"image/x-xbitmap","xbm"},
+ {"image/x-xpixmap","xpm"},
+ {"image/x-xwindowdump","xwd"},
+ {"model/mesh","msh"},
+ {"model/mesh","mesh"},
+ {"model/mesh","silo"},
+ {"multipart/x-zip","zip"},
+ {"multipart/x-gzip","gzip"},
+ {"text/css","css"},
+ {"text/html","html"},
+ {"text/html","htm"},
+ {"text/plain","txt"},
+ {"text/plain","g"},
+ {"text/plain","h"},
+ {"text/plain","c"},
+ {"text/plain","cc"},
+ {"text/plain","hh"},
+ {"text/plain","m"},
+ {"text/plain","f90"},
+ {"text/richtext","rtx"},
+ {"text/tab-separated-values","tsv"},
+ {"text/x-setext","etx"},
+ {"text/x-sgml","sgml"},
+ {"text/x-sgml","sgm"},
+ {"text/xml","xml"},
+ {"text/xml","dtd"},
+ {"video/mpeg","mpeg"},
+ {"video/mpeg","mpg"},
+ {"video/mpeg","mpe"},
+ {"video/quicktime","qt"},
+ {"video/quicktime","mov"},
+ {"video/x-msvideo","avi"},
+ {"video/x-sgi-movie","movie"},
+ {"x-conference/x-cooltalk","ice"},
+ /*{"application/x-httpd-cgi","cgi"},*/
+ {"x-world/x-vrml","wrl"},
+
+ {"*","class"},
+
+ {"",""}};
+
+
+// Reserved (RFC2396)
+#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
+// Delimiters (RFC2396)
+#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
+// Unwise (RFC2396)
+#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
+// Special (escape chars) (RFC2396 + >127 )
+#define CHAR_LOW(c) ( ((unsigned char)(c) <= 31) )
+#define CHAR_HIG(c) ( ((unsigned char)(c) >= 127) )
+#define CHAR_SPECIAL(c) ( CHAR_LOW(c) || CHAR_HIG(c) )
+// We try to avoid them and encode them instead
+#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
+
+
+// conversion éventuelle / vers antislash
+#if HTS_WIN
+char* antislash(char* s) {
+ char* buff;
+ char* a;
+ NOSTATIC_RESERVE(buff, char, HTS_URLMAXSIZE*2);
+
+ strcpy(buff,s);
+ while(a=strchr(buff,'/')) *a='\\';
+ return buff;
+}
+#endif
+
+
+
+// Récupération d'un fichier http sur le net.
+// Renvoie une adresse sur le bloc de mémoire, ou bien
+// NULL si un retour.msgeur (buffer retour.msg) est survenue.
+//
+// Une adresse de structure htsmsg peut être transmise pour
+// suivre l'évolution du chargement si le process a été lancé
+// en background
+
+htsblk httpget(char* url) {
+ char adr[HTS_URLMAXSIZE*2]; // adresse
+ char fil[HTS_URLMAXSIZE*2]; // chemin
+
+ // séparer URL en adresse+chemin
+ if (ident_url_absolute(url,adr,fil)==-1) {
+ htsblk retour;
+ memset(&retour, 0, sizeof(htsblk)); // effacer
+ // retour prédéfini: erreur
+ retour.adr=NULL;
+ retour.size=0;
+ retour.msg[0]='\0';
+ retour.statuscode=-1;
+ strcpy(retour.msg,"Error invalid URL");
+ return retour;
+ }
+
+ return xhttpget(adr,fil);
+}
+
+// ouvre une liaison http, envoie une requète GET et réceptionne le header
+// retour: socket
+int http_fopen(char* adr,char* fil,htsblk* retour) {
+ // / GET, traiter en-tête
+ return http_xfopen(0,1,1,NULL,adr,fil,retour);
+}
+
+// ouverture d'une liaison http, envoi d'une requète
+// mode: 0 GET 1 HEAD [2 POST]
+// treat: traiter header?
+// waitconnect: attendre le connect()
+// note: dans retour, on met les params du proxy
+int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour) {
+ //htsblk retour;
+ //int bufl=TAILLE_BUFFER; // 8Ko de buffer
+ T_SOC soc=INVALID_SOCKET;
+ //char *p,*q;
+
+ // retour prédéfini: erreur
+ if (retour) {
+ retour->adr=NULL;
+ retour->size=0;
+ retour->msg[0]='\0';
+ retour->statuscode=-5; // a priori erreur non fatale
+ }
+
+#if HDEBUG
+ printf("adr=%s\nfichier=%s\n",adr,fil);
+#endif
+
+ // ouvrir liaison
+#if HDEBUG
+ printf("Création d'une socket sur %s\n",adr);
+#endif
+
+#if CNXDEBUG
+ printf("..newhttp\n");
+#endif
+
+ /* connexion */
+ if (retour) {
+ if ( (!(retour->req.proxy.active))
+ ||
+ (
+ (strcmp(adr,"file://")==0)
+ ||
+ (strncmp(adr,"https://", 8)==0)
+ )
+ ) { /* pas de proxy, ou non utilisable ici */
+ soc=newhttp(adr,retour,-1,waitconnect);
+ } else {
+ soc=newhttp(retour->req.proxy.name,retour,retour->req.proxy.port,waitconnect); // ouvrir sur le proxy à la place
+ }
+ } else {
+ soc=newhttp(adr,NULL,-1,waitconnect);
+ }
+
+ // copier index socket retour
+ if (retour) retour->soc=soc;
+
+ /* Check for errors */
+ if (soc == INVALID_SOCKET) {
+ if (retour) {
+ if (retour->msg) {
+ if (!strnotempty(retour->msg)) {
+ strcpy(retour->msg,"Connect error");
+ }
+ }
+ }
+ }
+
+ // --------------------
+ // court-circuit (court circuite aussi le proxy..)
+ // LOCAL_SOCKET_ID est une pseudo-socket locale
+ if (soc==LOCAL_SOCKET_ID) {
+ retour->is_file=1; // fichier local
+ if (mode==0) { // GET
+
+ // Test en cas de file:///C|...
+ if (!fexist(fconv(unescape_http(fil))))
+ if (fexist(fconv(unescape_http(fil+1)))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,fil+1);
+ strcpy(fil,tempo);
+ }
+
+ // Ouvrir
+ retour->totalsize=fsize(fconv(unescape_http(fil))); // taille du fichier
+ retour->msg[0]='\0';
+ soc=INVALID_SOCKET;
+ if (retour->totalsize<0)
+ strcpy(retour->msg,"Unable to open file");
+ else if (retour->totalsize==0)
+ strcpy(retour->msg,"File empty");
+ else {
+ // Note: On passe par un FILE* (plus propre)
+ //soc=open(fil,O_RDONLY,0); // en lecture seule!
+ retour->fp=fopen(fconv(unescape_http(fil)),"rb"); // ouvrir
+ if (retour->fp==NULL)
+ soc=INVALID_SOCKET;
+ else
+ soc=LOCAL_SOCKET_ID;
+ }
+ retour->soc=soc;
+ if (soc!=INVALID_SOCKET) {
+ retour->statuscode=200; // OK
+ strcpy(retour->msg,"OK");
+ guess_httptype(retour->contenttype,fil);
+ } else if (strnotempty(retour->msg)==0)
+ strcpy(retour->msg,"Unable to open file");
+ return soc; // renvoyer
+ } else { // HEAD ou POST : interdit sur un local!!!! (c'est idiot!)
+ strcpy(retour->msg,"Unexpected Head/Post local request");
+ soc=INVALID_SOCKET; // erreur
+ retour->soc=soc;
+ return soc;
+ }
+ }
+ // --------------------
+
+ if (soc!=INVALID_SOCKET) {
+ char rcvd[1100];
+ rcvd[0]='\0';
+#if HDEBUG
+ printf("Ok, connexion réussie, id=%d\n",soc);
+#endif
+
+ // connecté?
+ if (waitconnect) {
+ http_sendhead(NULL,mode,xsend,adr,fil,NULL,NULL,retour);
+ }
+
+ if (soc!=INVALID_SOCKET) {
+
+#if HDEBUG
+ printf("Attente de la réponse:\n");
+#endif
+
+ // si GET (réception d'un fichier), réceptionner en-tête d'abord,
+ // et ensuite le corps
+ // si POST on ne réceptionne rien du tout, c'est après que l'on fera
+ // une réception standard pour récupérer l'en tête
+ if ((treat) && (waitconnect)) { // traiter (attendre!) en-tête
+ // Réception de la status line et de l'en-tête (norme RFC1945)
+
+ // status-line à récupérer
+ finput(soc,rcvd,1024);
+ if (strnotempty(rcvd)==0)
+ finput(soc,rcvd,1024); // "certains serveurs buggés envoient un \n au début" (RFC)
+
+ // traiter status-line
+ treatfirstline(retour,rcvd);
+
+#if HDEBUG
+ printf("Status-Code=%d\n",retour->statuscode);
+#endif
+
+ // en-tête
+
+ // header // ** !attention! HTTP/0.9 non supporté
+ do {
+ finput(soc,rcvd,1024);
+#if HDEBUG
+ printf(">%s\n",rcvd);
+#endif
+ if (strnotempty(rcvd))
+ treathead(NULL,NULL,NULL,retour,rcvd); // traiter
+
+ } while(strnotempty(rcvd));
+
+ //rcvsize=-1; // forCER CHARGEMENT INCONNU
+
+ //if (retour)
+ // retour->totalsize=rcvsize;
+
+ } else { // si GET, on recevra l'en tête APRES
+ //rcvsize=-1; // on ne connait pas la taille de l'en-tête
+ if (retour)
+ retour->totalsize=-1;
+ }
+
+ }
+
+ }
+
+ return soc;
+}
+
+
+// envoi d'une requète
+int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) {
+ char buff[8192];
+ //int use_11=0; // HTTP 1.1 utilisé
+ int direct_url=0; // ne pas analyser l'url (exemple: ftp://)
+ char* search_tag=NULL;
+ buff[0]='\0';
+
+ // header Date
+ //strcat(buff,"Date: ");
+ //time_gmt_rfc822(buff); // obtenir l'heure au format rfc822
+ //sendc("\n");
+ //strcat(buff,buff);
+
+ // possibilité non documentée: >post: et >postfile:
+ // si présence d'un tag >post: alors executer un POST
+ // exemple: http://www.someweb.com/test.cgi?foo>post:posteddata=10&foo=5
+ // si présence d'un tag >postfile: alors envoyer en tête brut contenu dans le fichier en question
+ // exemple: http://www.someweb.com/test.cgi?foo>postfile:post0.txt
+ search_tag=strstr(fil,POSTTOK":");
+ if (!search_tag) {
+ search_tag=strstr(fil,POSTTOK"file:");
+ if (search_tag) { // postfile
+ if (mode==0) { // GET!
+ FILE* fp=fopen(unescape_http(search_tag+strlen(POSTTOK)+5),"rb");
+ if (fp) {
+ char line[1100];
+ char protocol[256],url[HTS_URLMAXSIZE*2],method[256];
+ linput(fp,line,1000);
+ if (sscanf(line,"%s %s %s",method,url,protocol) == 3) {
+ // selon que l'on a ou pas un proxy
+ if (retour->req.proxy.active)
+ sprintf(buff,"%s http://%s%s %s\r\n",method,adr,url,protocol);
+ else
+ sprintf(buff,"%s %s %s\r\n",method,url,protocol);
+ // lire le reste en brut
+ fread(buff+strlen(buff),8000-strlen(buff),1,fp);
+ }
+ fclose(fp);
+ }
+ }
+ }
+ }
+ // Fin postfile
+
+ if (strnotempty(buff)==0) { // PAS POSTFILE
+ // Type de requète?
+ if ((search_tag) && (mode==0)) {
+ strcat(buff,"POST ");
+ } else if (mode==0) { // GET
+ strcat(buff,"GET ");
+ } else { // if (mode==1) {
+ if (!retour->req.http11) // forcer HTTP/1.0
+ strcat(buff,"GET "); // certains serveurs (cgi) buggent avec HEAD
+ else
+ strcat(buff,"HEAD ");
+ }
+
+ // si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom
+ if (retour->req.proxy.active) {
+ if (!link_has_authority(adr)) { // default http
+#if HDEBUG
+ printf("Proxy Use: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port);
+#endif
+ strcat(buff,"http://");
+ strcat(buff,jump_identification(adr));
+ } else { // ftp:// en proxy http
+#if HDEBUG
+ printf("Proxy Use for ftp: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port);
+#endif
+ direct_url=1; // ne pas analyser user/pass
+ strcat(buff,adr);
+ }
+ }
+
+ // NOM DU FICHIER
+ // on slash doit être présent en début, sinon attention aux bad request! (400)
+ if (*fil!='/') strcat(buff,"/");
+ {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ if (search_tag)
+ strncat(tempo,fil,(int) (search_tag - fil));
+ else
+ strcpy(tempo,fil);
+ escape_check_url(tempo);
+ strcat(buff,tempo); // avec échappement
+ }
+
+ // protocole
+ if (!retour->req.http11) { // forcer HTTP/1.0
+ //use_11=0;
+ strcat(buff," HTTP/1.0\x0d\x0a");
+ } else { // Requète 1.1
+ //use_11=1;
+ strcat(buff," HTTP/1.1\x0d\x0a");
+ }
+
+ /* supplemental data */
+ if (xsend) strcat(buff,xsend); // éventuelles autres lignes
+
+ // tester proxy authentication
+ if (retour->req.proxy.active) {
+ if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy!
+ char* a=jump_identification(retour->req.proxy.name);
+ char* astart=jump_protocol(retour->req.proxy.name);
+ char autorisation[1100];
+ char user_pass[256];
+ autorisation[0]=user_pass[0]='\0';
+ //
+ strncat(user_pass,astart,(int) (a - astart) - 1);
+ strcpy(user_pass,unescape_http(user_pass));
+ code64(user_pass,autorisation);
+ strcat(buff,"Proxy-Authorization: Basic ");
+ strcat(buff,autorisation);
+ strcat(buff,H_CRLF);
+#if HDEBUG
+ printf("Proxy-Authenticate, %s (code: %s)\n",user_pass,autorisation);
+#endif
+ }
+ }
+
+ // Referer?
+ if ((referer_adr) && (referer_fil)) { // existe
+ if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide
+ if (
+ (strcmp(referer_adr,"file://") != 0)
+ &&
+ ( /* no https referer to http urls */
+ (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */
+ ||
+ (strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
+ )
+ ) { // PAS file://
+ strcat(buff,"Referer: ");
+ strcat(buff,"http://");
+ strcat(buff,jump_identification(referer_adr));
+ strcat(buff,referer_fil);
+ strcat(buff,H_CRLF);
+ }
+ }
+ }
+
+ // POST?
+ if (mode==0) { // GET!
+ if (search_tag) {
+ char clen[256];
+ sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(search_tag+strlen(POSTTOK)+1))));
+ strcat(buff,clen);
+ }
+ }
+
+ // gestion cookies?
+ if (cookie) {
+ char* b=cookie->data;
+ int cook=0;
+ int max_cookies=8;
+ int max_size=2048;
+ max_size+=strlen(buff);
+ do {
+ b=cookie_find(b,"",jump_identification(adr),fil); // prochain cookie satisfaisant aux conditions
+ if (b) {
+ max_cookies--;
+ if (!cook) {
+ strcat(buff,"Cookie: ");
+ strcat(buff,"$Version=1; ");
+ cook=1;
+ } else
+ strcat(buff,"; ");
+ strcat(buff,cookie_get(b,5));
+ strcat(buff,"=");
+ strcat(buff,cookie_get(b,6));
+ strcat(buff,"; $Path=");
+ strcat(buff,cookie_get(b,2));
+ b=cookie_nextfield(b);
+ }
+ } while( (b) && (max_cookies>0) && ((int)strlen(buff)<max_size));
+ if (cook) { // on a envoyé un (ou plusieurs) cookie?
+ strcat(buff,H_CRLF);
+#if DEBUG_COOK
+ printf("Header:\n%s\n",buff);
+#endif
+ }
+ }
+
+ // connection close?
+ //if (use_11) // Si on envoie une requète 1.1, préciser qu'on ne veut pas de keep-alive!!
+ strcat(buff,"Connection: close"H_CRLF);
+
+ // gérer le keep-alive (garder socket)
+ //strcat(buff,"Connection: Keep-Alive\n");
+
+ {
+ char* real_adr=jump_identification(adr);
+ //if ((use_11) || (retour->user_agent_send)) { // Pour le 1.1 on utilise un Host:
+ if (!direct_url) { // pas ftp:// par exemple
+ //if (!retour->req.proxy.active) {
+ strcat(buff,"Host: "); strcat(buff,real_adr); strcat(buff,H_CRLF);
+ //}
+ }
+ //}
+
+ // Présence d'un user-agent?
+ if (retour->req.user_agent_send) { // ohh un user-agent
+ char s[256];
+ // HyperTextSeeker/"HTSVERSION
+ sprintf(s,"User-Agent: %s"H_CRLF,retour->req.user_agent);
+ strcat(buff,s);
+
+ // pour les serveurs difficiles
+ strcat(buff,"Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/svg+xml, */*"H_CRLF);
+ if (strnotempty(retour->req.lang_iso)) {
+ strcat(buff,"Accept-Language: "); strcat(buff,retour->req.lang_iso); strcat(buff,H_CRLF);
+ }
+ strcat(buff,"Accept-Charset: iso-8859-1, *"H_CRLF);
+ if (retour->req.http11) {
+#if HTS_USEZLIB
+ if ((!retour->req.range_used) && (!retour->req.nocompression))
+ strcat(buff,"Accept-Encoding: gzip, deflate, compress, identity"H_CRLF);
+ else
+ strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
+#else
+ strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
+#endif
+ }
+ } else {
+ strcat(buff,"Accept: */*"H_CRLF); // le minimum
+ }
+
+ /* Authentification */
+ {
+ char autorisation[1100];
+ char* a;
+ autorisation[0]='\0';
+ if (link_has_authorization(adr)) { // ohh une authentification!
+ char* a=jump_identification(adr);
+ char* astart=jump_protocol(adr);
+ if (!direct_url) { // pas ftp:// par exemple
+ char user_pass[256];
+ user_pass[0]='\0';
+ strncat(user_pass,astart,(int) (a - astart) - 1);
+ strcpy(user_pass,unescape_http(user_pass));
+ code64(user_pass,autorisation);
+ if (strcmp(fil,"/robots.txt")) /* pas robots.txt */
+ bauth_add(cookie,astart,fil,autorisation);
+ }
+ } else if ( (a=bauth_check(cookie,real_adr,fil)) )
+ strcpy(autorisation,a);
+ /* On a une autorisation a donner? */
+ if (strnotempty(autorisation)) {
+ strcat(buff,"Authorization: Basic ");
+ strcat(buff,autorisation);
+ strcat(buff,H_CRLF);
+ }
+ }
+
+ }
+ //strcat(buff,"Accept-Language: en\n");
+ //strcat(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
+
+ // CRLF de fin d'en tête
+ strcat(buff,H_CRLF);
+
+ // données complémentaires?
+ if (search_tag)
+ if (mode==0) // GET!
+ strcat(buff,unescape_http(search_tag+strlen(POSTTOK)+1));
+ }
+
+#if HDEBUG
+#endif
+ if (_DEBUG_HEAD) {
+ if (ioinfo) {
+ fprintf(ioinfo,"request for %s%s:\r\n",jump_identification(adr),fil);
+ fprintfio(ioinfo,buff,"<<< ");
+ fprintf(ioinfo,"\r\n");
+ fflush(ioinfo);
+ }
+ } // Fin test pas postfile
+ //
+
+ // Envoi
+ if (sendc(retour, buff)<0) { // ERREUR, socket rompue?...
+ //if (sendc(retour->soc,buff) != strlen(buff)) { // ERREUR, socket rompue?...
+ deletesoc_r(retour); // fermer tout de même
+ // et tenter de reconnecter
+
+ strcpy(retour->msg,"Broken pipe");
+ retour->soc=INVALID_SOCKET;
+ }
+
+ // RX'98
+ return 0;
+}
+
+
+
+
+// traiter 1ere ligne d'en tête
+void treatfirstline(htsblk* retour,char* rcvd) {
+ char* a=rcvd;
+ // exemple:
+ // HTTP/1.0 200 OK
+ if (*a) {
+ // note: certains serveurs buggés renvoient HTTP/1.0\n200 OK ou " HTTP/1.0 200 OK"
+ while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces au début
+ if (strfield(a, "HTTP/")) {
+ // sauter HTTP/1.x
+ while ((*a!=' ') && (*a!='\0') && (*a!=10) && (*a!=13) && (*a!=9)) a++;
+ if (*a != '\0') {
+ while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces
+ if ((*a>='0') && (*a<='9')) {
+ sscanf(a,"%d",&(retour->statuscode));
+ // sauter 200
+ while ((*a!=' ') && (*a!='\0') && (*a!=10) && (*a!=13) && (*a!=9)) a++;
+ while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces
+ if ((strlen(a) > 1) && (strlen(a) < 64) ) // message retour
+ strcpy(retour->msg,a);
+ else
+ infostatuscode(retour->msg,retour->statuscode);
+ // type MIME par défaut2
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ } else { // pas de code!
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Unknown response structure");
+ }
+ } else { // euhh??
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Unknown response structure");
+ }
+ } else {
+ if (*a == '<') {
+ /* This is dirty .. */
+ retour->statuscode=200;
+ strcpy(retour->msg, "Unknown, assuming junky server");
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ } else if (strnotempty(a)) {
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Unknown response structure, no HTTP/ response given");
+ } else {
+ /* This is dirty .. */
+ retour->statuscode=200;
+ strcpy(retour->msg, "Unknown, assuming junky server");
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ }
+ }
+ } else { // vide!
+ /*
+ retour->statuscode=-1;
+ strcpy(retour->msg,"Empty reponse or internal error");
+ */
+ /* This is dirty .. */
+ retour->statuscode=200;
+ strcpy(retour->msg, "Unknown, assuming junky server");
+ strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ }
+}
+
+// traiter ligne par ligne l'en tête
+// gestion des cookies
+void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
+ int p;
+ if ((p=strfield(rcvd,"Content-length:"))!=0) {
+#if HDEBUG
+ printf("ok, Content-length: détecté\n");
+#endif
+ sscanf(rcvd+p,LLintP,&(retour->totalsize));
+ }
+ else if ((p=strfield(rcvd,"Content-Disposition:"))!=0) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<250) { // pas trop long?
+ char tmp[256];
+ char *a=NULL,*b=NULL;
+ strcpy(tmp,rcvd+p);
+ a=strstr(tmp,"filename=");
+ if (a) {
+ a+=strlen("filename=");
+ while(is_space(*a)) a++;
+ //a=strchr(a,'"');
+ if (a) {
+ char *c=NULL;
+ //a++; /* jump " */
+ while((c=strchr(a,'/'))) /* skip all / (see RFC2616) */
+ a=c+1;
+ //b=strchr(a+1,'"');
+ b=a+strlen(a)-1;
+ while(is_space(*b)) b--;
+ b++;
+ if (b) {
+ *b='\0';
+ if ((int) strlen(a) < 200) { // pas trop long?
+ strcpy(retour->cdispo,a);
+ }
+ }
+ }
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Last-Modified:"))!=0) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<64) { // pas trop long?
+ //struct tm* tm_time=convert_time_rfc822(rcvd+p);
+ strcpy(retour->lastmodified,rcvd+p);
+ }
+ }
+ else if ((p=strfield(rcvd,"Date:"))!=0) {
+ if (strnotempty(retour->lastmodified)==0) { /* pas encore de last-modified */
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<64) { // pas trop long?
+ //struct tm* tm_time=convert_time_rfc822(rcvd+p);
+ strcpy(retour->lastmodified,rcvd+p);
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Etag:"))!=0) { /* Etag */
+ if (retour) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<64) // pas trop long?
+ strcpy(retour->etag,rcvd+p);
+ else // erreur.. ignorer
+ retour->etag[0]='\0';
+ }
+ }
+ else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk!
+ retour->is_chunk=1; // chunked
+ //retour->http11=2; // chunked
+#if HDEBUG
+ printf("ok, Transfer-Encoding: détecté\n");
+#endif
+ }
+ else if ((p=strfield(rcvd,"Content-type:"))!=0) {
+ if (retour) {
+ char tempo[1100];
+ // éviter les text/html; charset=foo
+ {
+ char* a=strchr(rcvd+p,';');
+ if (a) *a='\0';
+ }
+ sscanf(rcvd+p,"%s",tempo);
+ if (strlen(tempo)<64) // pas trop long!!
+ strcpy(retour->contenttype,tempo);
+ else
+ strcpy(retour->contenttype,"application/octet-stream-unknown"); // erreur
+ }
+ }
+ else if ((p=strfield(rcvd,"Content-Range:"))!=0) {
+ char* a=strstr(rcvd+p,"*/");
+ if (a) {
+ if (sscanf(a+2,LLintP,&retour->crange) != 1) {
+ retour->crange=0;
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Content-Encoding:"))!=0) {
+ if (retour) {
+ char tempo[1100];
+ {
+ char* a=strchr(rcvd+p,';');
+ if (a) *a='\0';
+ }
+ sscanf(rcvd+p,"%s",tempo);
+ if (strlen(tempo)<64) // pas trop long!!
+ strcpy(retour->contentencoding,tempo);
+ else
+ retour->contentencoding[0]='\0'; // erreur
+#if HTS_USEZLIB
+ /* Check known encodings */
+ if (retour->contentencoding[0]) {
+ if (
+ (strfield2(retour->contentencoding, "gzip"))
+ || (strfield2(retour->contentencoding, "x-gzip"))
+ /*
+ || (strfield2(retour->contentencoding, "compress"))
+ || (strfield2(retour->contentencoding, "x-compress"))
+ */
+ || (strfield2(retour->contentencoding, "deflate"))
+ || (strfield2(retour->contentencoding, "x-deflate"))
+ ) {
+ retour->compressed=1;
+ }
+ }
+#endif
+ }
+ }
+ else if ((p=strfield(rcvd,"Location:"))!=0) {
+ if (retour) {
+ if (retour->location) {
+ while(*(rcvd+p)==' ') p++; // sauter espaces
+ if ((int) strlen(rcvd+p)<HTS_URLMAXSIZE) // pas trop long?
+ strcpy(retour->location,rcvd+p);
+ else // erreur.. ignorer
+ retour->location[0]='\0';
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Connection: Keep-Alive"))!=0) {
+ // non, pas de keep-alive! on déconnectera..
+ }
+ else if ((p=strfield(rcvd,"Keep-Alive:"))!=0) { // params keep-alive
+ // rien à faire
+ }
+ else if ( ((p=strfield(rcvd,"Set-Cookie:"))!=0) && (cookie) ) { // ohh un cookie
+ char* a = rcvd+p; // pointeur
+ char domain[256]; // domaine cookie (.netscape.com)
+ char path[256]; // chemin (/)
+ char cook_name[256]; // nom cookie (MYCOOK)
+ char cook_value[8192]; // valeur (ID=toto,S=1234)
+#if DEBUG_COOK
+ printf("set-cookie detected\n");
+#endif
+ while(*a) {
+ char *token_st,*token_end;
+ char *value_st,*value_end;
+ char name[256];
+ char value[8192];
+ int next=0;
+ name[0]=value[0]='\0';
+ //
+
+ // initialiser cookie lu actuellement
+ if (adr)
+ strcpy(domain,jump_identification(adr)); // domaine
+ strcpy(path,"/"); // chemin (/)
+ strcpy(cook_name,""); // nom cookie (MYCOOK)
+ strcpy(cook_value,""); // valeur (ID=toto,S=1234)
+ // boucler jusqu'au prochain cookie ou la fin
+ do {
+ char* start_loop=a;
+ while(is_space(*a)) a++; // sauter espaces
+ token_st=a; // départ token
+ while((!is_space(*a)) && (*a) && (*a!=';') && (*a!='=')) a++; // arrêter si espace, point virgule
+ token_end=a;
+ while(is_space(*a)) a++; // sauter espaces
+ if (*a=='=') { // name=value
+ a++;
+ while(is_space(*a)) a++; // sauter espaces
+ value_st=a;
+ while( (*a!=';') && (*a)) a++; // prochain ;
+ //while( ((*a!='"') || (*(a-1)=='\\')) && (*a)) a++; // prochain " (et pas \")
+ value_end=a;
+ //if (*a==';') { // finit par un ;
+ // vérifier débordements
+ if ( (((int) (token_end - token_st))<200) && (((int) (value_end - value_st))<8000)
+ && (((int) (token_end - token_st))>0) && (((int) (value_end - value_st))>0) ) {
+ name[0]='\0';
+ value[0]='\0';
+ strncat(name,token_st,(int) (token_end - token_st));
+ strncat(value,value_st,(int) (value_end - value_st));
+#if DEBUG_COOK
+ printf("detected cookie-av: name=\"%s\" value=\"%s\"\n",name,value);
+#endif
+ if (strfield2(name,"domain")) {
+ strcpy(domain,value);
+ }
+ else if (strfield2(name,"path")) {
+ strcpy(path,value);
+ }
+ else if (strfield2(name,"max-age")) {
+ // ignoré..
+ }
+ else if (strfield2(name,"expires")) {
+ // ignoré..
+ }
+ else if (strfield2(name,"version")) {
+ // ignoré..
+ }
+ else if (strfield2(name,"comment")) {
+ // ignoré
+ }
+ else if (strfield2(name,"secure")) { // ne devrait pas arriver ici
+ // ignoré
+ }
+ else {
+ if (strnotempty(cook_name)==0) { // noter premier: nom et valeur cookie
+ strcpy(cook_name,name);
+ strcpy(cook_value,value);
+ } else { // prochain cookie
+ a=start_loop; // on devra recommencer à cette position
+ next=1; // enregistrer
+ }
+ }
+ }
+ }
+ if (!next) {
+ while((*a!=';') && (*a)) a++; // prochain
+ while(*a==';') a++; // sauter ;
+ }
+ } while((*a) && (!next));
+ if (strnotempty(cook_name)) { // cookie?
+#if DEBUG_COOK
+ printf("new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",cook_name,cook_value,domain,path);
+#endif
+ cookie_add(cookie,cook_name,cook_value,domain,path);
+ }
+ }
+ }
+}
+
+
+// transforme le message statuscode en chaîne
+void infostatuscode(char* msg,int statuscode) {
+ switch( statuscode) {
+ // Erreurs HTTP, selon RFC
+ case 100: strcpy( msg,"Continue"); break;
+ case 101: strcpy( msg,"Switching Protocols"); break;
+ case 200: strcpy( msg,"OK"); break;
+ case 201: strcpy( msg,"Created"); break;
+ case 202: strcpy( msg,"Accepted"); break;
+ case 203: strcpy( msg,"Non-Authoritative Information"); break;
+ case 204: strcpy( msg,"No Content"); break;
+ case 205: strcpy( msg,"Reset Content"); break;
+ case 206: strcpy( msg,"Partial Content"); break;
+ case 300: strcpy( msg,"Multiple Choices"); break;
+ case 301: strcpy( msg,"Moved Permanently"); break;
+ case 302: strcpy( msg,"Moved Temporarily"); break;
+ case 303: strcpy( msg,"See Other"); break;
+ case 304: strcpy( msg,"Not Modified"); break;
+ case 305: strcpy( msg,"Use Proxy"); break;
+ case 306: strcpy( msg,"Undefined 306 error"); break;
+ case 307: strcpy( msg,"Temporary Redirect"); break;
+ case 400: strcpy( msg,"Bad Request"); break;
+ case 401: strcpy( msg,"Unauthorized"); break;
+ case 402: strcpy( msg,"Payment Required"); break;
+ case 403: strcpy( msg,"Forbidden"); break;
+ case 404: strcpy( msg,"Not Found"); break;
+ case 405: strcpy( msg,"Method Not Allowed"); break;
+ case 406: strcpy( msg,"Not Acceptable"); break;
+ case 407: strcpy( msg,"Proxy Authentication Required"); break;
+ case 408: strcpy( msg,"Request Time-out"); break;
+ case 409: strcpy( msg,"Conflict"); break;
+ case 410: strcpy( msg,"Gone"); break;
+ case 411: strcpy( msg,"Length Required"); break;
+ case 412: strcpy( msg,"Precondition Failed"); break;
+ case 413: strcpy( msg,"Request Entity Too Large"); break;
+ case 414: strcpy( msg,"Request-URI Too Large"); break;
+ case 415: strcpy( msg,"Unsupported Media Type"); break;
+ case 416: strcpy( msg,"Requested Range Not Satisfiable"); break;
+ case 417: strcpy( msg,"Expectation Failed"); break;
+ case 500: strcpy( msg,"Internal Server Error"); break;
+ case 501: strcpy( msg,"Not Implemented"); break;
+ case 502: strcpy( msg,"Bad Gateway"); break;
+ case 503: strcpy( msg,"Service Unavailable"); break;
+ case 504: strcpy( msg,"Gateway Time-out"); break;
+ case 505: strcpy( msg,"HTTP Version Not Supported"); break;
+ //
+ default: if (strnotempty(msg)==0) strcpy( msg,"Unknown error"); break;
+ }
+}
+
+
+// identique au précédent, sauf que l'on donne adr+fil et non url complète
+htsblk xhttpget(char* adr,char* fil) {
+ T_SOC soc;
+ htsblk retour;
+
+ memset(&retour, 0, sizeof(htsblk));
+ soc=http_fopen(adr,fil,&retour);
+
+ if (soc!=INVALID_SOCKET) {
+ http_fread(soc,&retour);
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("xhttpget: deletehttp\n");
+#endif
+ if (retour.soc!=INVALID_SOCKET) deletehttp(&retour); // fermer
+ retour.soc=INVALID_SOCKET;
+ }
+ return retour;
+}
+
+// variation sur un thème...
+// réceptionne uniquement un en-tête (HEAD)
+// retourne dans xx.adr l'adresse pointant sur le bloc de mémoire de l'en tête
+htsblk http_gethead(char* adr,char* fil) {
+ T_SOC soc;
+ htsblk retour;
+
+ memset(&retour, 0, sizeof(htsblk));
+ soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // HEAD, pas de traitement en-tête
+
+ if (soc!=INVALID_SOCKET) {
+ http_fread(soc,&retour); // réception en-tête
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("http_gethead: deletehttp\n");
+#endif
+ if (retour.soc!=INVALID_SOCKET) deletehttp(&retour); // fermer
+ retour.soc=INVALID_SOCKET;
+ }
+ return retour;
+}
+// oui ca ressemble vachement à xhttpget - en étant sobre on peut voir LA différence..
+
+
+// lecture sur une socket ouverte, le header a déja été envoyé dans le cas de GET
+// il ne reste plus qu'à lire les données
+// (pour HEAD le header est lu ici!)
+void http_fread(T_SOC soc,htsblk* retour) {
+ //int bufl=TAILLE_BUFFER; // 8Ko de buffer
+
+ if (retour) retour->soc=soc;
+ if (soc!=INVALID_SOCKET) {
+ // fonction de lecture d'une socket (plus propre)
+ while(http_fread1(retour)!=-1);
+ soc=retour->soc;
+ if (retour->adr==NULL) {
+ if (strnotempty(retour->msg)==0)
+ sprintf(retour->msg,"Unable to read");
+ return ; // erreur
+ }
+
+#if HDEBUG
+ printf("Ok, données reçues\n");
+#endif
+
+ return ;
+
+ }
+
+ return ;
+}
+
+// check if data is available
+int check_readinput(htsblk* r) {
+ if (r->soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(r->soc,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(r->soc + 1,&fds,NULL,NULL,&tv);
+ if (FD_ISSET(r->soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
+}
+
+// lecture d'un bloc sur une socket (ou un fichier!)
+// >=0 : nombre d'octets lus
+// <0 : fin ou erreur
+HTS_INLINE LLint http_fread1(htsblk* r) {
+ //int bufl=TAILLE_BUFFER; // taille d'un buffer max.
+ return http_xfread1(r,TAILLE_BUFFER);
+}
+
+// idem, sauf qu'ici on peut choisir la taille max de données à recevoir
+// SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
+// en éliminant les cr (ex: header), arrêt si double-lf
+// SI bufl==-1 alors le buffer est censé être de 8kos, et on recoit ligne par ligne
+// en éliminant les cr (ex: header), arrêt si double-lf
+// Note: les +1 dans les malloc sont dûs à l'octet nul rajouté en fin de fichier
+LLint http_xfread1(htsblk* r,int bufl) {
+ int nl=-1;
+
+ if (bufl>0) {
+ if (!r->is_write) { // stocker en mémoire
+ if (r->totalsize>0) { // totalsize déterminé ET ALLOUE
+ if (r->adr==NULL) {
+ r->adr=(char*) malloct((INTsys) r->totalsize + 1);
+ r->size=0;
+ }
+ if (r->adr!=NULL) {
+ // lecture
+ nl = hts_read(r,r->adr + ((int) r->size),(int) (r->totalsize-r->size) ); /* NO 32 bit overlow possible here (no 4GB html!) */
+ // nouvelle taille
+ if (nl >= 0) r->size+=nl;
+
+ if ((nl < 0) || (r->size >= r->totalsize))
+ nl=-1; // break
+
+ r->adr[r->size]='\0'; // caractère NULL en fin au cas où l'on traite des HTML
+ }
+
+ } else { // inconnu..
+ // réserver de la mémoire?
+ if (r->adr==NULL) {
+#if HDEBUG
+ printf("..alloc xfread\n");
+#endif
+ r->adr=(char*) malloct(bufl + 1);
+ r->size=0;
+ }
+ else {
+#if HDEBUG
+ printf("..realloc xfread1\n");
+#endif
+ r->adr=(char*) realloct(r->adr,(int)r->size+bufl + 1);
+ }
+
+ if (r->adr!=NULL) {
+ // lecture
+ nl = hts_read(r,r->adr+(int)r->size,bufl);
+ if (nl>0) {
+ // resize
+ r->adr=(char*) realloct(r->adr,(int)r->size+nl + 1);
+ // nouvelle taille
+ r->size+=nl;
+ // octet nul
+ if (r->adr) r->adr[r->size]='\0';
+
+ } // sinon on a fini
+#if HDEBUG
+ else if (nl < 0)
+ printf("..end read (%d)\n", nl);
+#endif
+ }
+#if HDEBUG
+ else printf("..-> error\n");
+#endif
+ }
+
+ // pas de adr=erreur
+ if (r->adr==NULL) nl=-1;
+
+ } else { // stocker sur disque
+ char* buff;
+ buff=(char*) malloct(bufl);
+ if (buff!=NULL) {
+ // lecture
+ nl = hts_read(r,buff,bufl);
+ // nouvelle taille
+ if (nl > 0) {
+ r->size+=nl;
+ if ((int) fwrite(buff,1,nl,r->out)!=nl) {
+ r->statuscode=-1;
+ strcpy(r->msg,"Write error on disk");
+ nl=-1;
+ }
+ }
+
+ if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
+ nl=-1; // break
+
+ // libérer bloc tempo
+ freet(buff);
+ } else
+ nl=-1;
+
+ if ((nl < 0) && (r->out!=NULL)) {
+ fflush(r->out);
+ }
+
+
+ } // stockage disque ou mémoire
+
+ } else if (bufl == -2) { // force reserve
+ if (r->adr==NULL) {
+ r->adr=(char*) malloct(8192);
+ r->size=0;
+ return 0;
+ }
+ return -1;
+ } else { // réception d'un en-tête octet par octet
+ int count=256;
+ int tot_nl=0;
+ int lf_detected=0;
+ int at_begining=1;
+ do {
+ nl=-1;
+ count--;
+ if (r->adr==NULL) {
+ r->adr=(char*) malloct(8192);
+ r->size=0;
+ }
+ if (r->adr!=NULL) {
+ if (r->size < 8190) {
+ // lecture
+ nl = hts_read(r,r->adr+r->size,1);
+ if (nl>0) {
+ // exit if:
+ // lf detected AND already detected before
+ // or
+ // lf detected AND first character read
+ if (*(r->adr+r->size) == 10) {
+ if (lf_detected || (at_begining) || (bufl<0))
+ count=-1;
+ lf_detected=1;
+ }
+ if (*(r->adr+r->size) != 13) { // sauter caractères 13
+ if (
+ (*(r->adr+r->size) != 10)
+ &&
+ (*(r->adr+r->size) != 13)
+ ) {
+ // restart for new line
+ lf_detected=0;
+ }
+ (r->size)++;
+ at_begining=0;
+ }
+ *(r->adr+r->size)='\0'; // terminer par octet nul
+ }
+ }
+ }
+ if (nl >= 0) {
+ tot_nl+=nl;
+ if (!check_readinput(r))
+ count=-1;
+ }
+ } while((nl >= 0) && (count>0));
+ nl = tot_nl;
+ }
+#if HDEBUG
+ //printf("add to %d / %d\n",r->size,r->totalsize);
+#endif
+ // nl == 0 may mean "no relevant data", for example is using cache or ssl
+#if HTS_USEOPENSSL
+ if (r->ssl)
+ return nl;
+ else
+#endif
+ return ((nl > 0) ? nl : -1); // ==0 is fatal if direct read
+}
+
+
+// teste une adresse, et suit l'éventuel chemin "moved"
+// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
+// copie dans loc la véritable adresse si celle-ci est différente
+htsblk http_location(char* adr,char* fil,char* loc) {
+ htsblk retour;
+ int retry=0;
+ int tryagain;
+ // note: "RFC says"
+ // 5 boucles au plus, on en teste au plus 8 ici
+ // sinon abandon..
+ do {
+ tryagain=0;
+ switch ((retour=http_test(adr,fil,loc)).statuscode) {
+ case 200: break; // ok!
+ case 301: case 302: case 303: case 307: // moved!
+ // recalculer adr et fil!
+ if (ident_url_absolute(loc,adr,fil)!=-1) {
+ tryagain=1; // retenter
+ retry++; // ..encore une fois
+ }
+ }
+ } while((tryagain) && (retry<5+3));
+ return retour;
+}
+
+
+// teste si une URL (validité, header, taille)
+// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
+// en cas de moved xx, dans location
+// abandonne désormais au bout de 30 secondes (aurevoir les sites
+// qui nous font poireauter 5 heures..) -> -2=timeout
+htsblk http_test(char* adr,char* fil,char* loc) {
+ T_SOC soc;
+ htsblk retour;
+ //int rcvsize=-1;
+ //char* rcv=NULL; // adresse de retour
+ //int bufl=TAILLE_BUFFER; // 8Ko de buffer
+ TStamp tl;
+ int timeout=30; // timeout pour un check (arbitraire) // **
+
+ // pour abandonner un site trop lent
+ tl=time_local();
+
+ loc[0]='\0';
+ memset(&retour, 0, sizeof(htsblk)); // effacer
+ retour.location=loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
+
+ //soc=http_fopen(adr,fil,&retour,NULL); // ouvrir, + header
+
+ // on ouvre en head, et on traite l'en tête
+ soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // ouvrir HEAD, + envoi header
+
+ if (soc!=INVALID_SOCKET) {
+ int e=0;
+ // tant qu'on a des données, et qu'on ne recoit pas deux LF, et que le timeout n'arrie pas
+ do {
+ if (http_xfread1(&retour,0) < 0)
+ e=1;
+ else {
+ if (retour.adr!=NULL) {
+ if ((retour.adr[retour.size-1]!=10) || (retour.adr[retour.size-2]!=10))
+ e=1;
+ }
+ }
+
+ if (!e) {
+ if ((time_local()-tl)>=timeout) {
+ e=-1;
+ }
+ }
+
+ } while (!e);
+
+ if (e==1) {
+ if (adr!=NULL) {
+ int ptr=0;
+ char rcvd[1100];
+
+ // note: en gros recopie du traitement de back_wait()
+ //
+
+
+ // ----------------------------------------
+ // traiter en-tête!
+ // status-line à récupérer
+ ptr+=binput(retour.adr+ptr,rcvd,1024);
+ if (strnotempty(rcvd)==0)
+ ptr+=binput(retour.adr+ptr,rcvd,1024); // "certains serveurs buggés envoient un \n au début" (RFC)
+
+ // traiter status-line
+ treatfirstline(&retour,rcvd);
+
+#if HDEBUG
+ printf("(Buffer) Status-Code=%d\n",retour.statuscode);
+#endif
+
+ // en-tête
+
+ // header // ** !attention! HTTP/0.9 non supporté
+ do {
+ ptr+=binput(retour.adr+ptr,rcvd,1024);
+#if HDEBUG
+ printf("(buffer)>%s\n",rcvd);
+#endif
+ if (strnotempty(rcvd))
+ treathead(NULL,NULL,NULL,&retour,rcvd); // traiter
+
+ } while(strnotempty(rcvd));
+ // ----------------------------------------
+
+ // libérer mémoire
+ if (retour.adr!=NULL) { freet(retour.adr); retour.adr=NULL; }
+ }
+ } else {
+ retour.statuscode=-2;
+ strcpy(retour.msg,"Timeout While Testing");
+ }
+
+
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("http_test: deletehttp\n");
+#endif
+ deletehttp(&retour);
+ retour.soc=INVALID_SOCKET;
+ }
+ return retour;
+}
+
+// Crée un lien (http) vers une adresse internet iadr
+// retour: structure (adresse, taille, message si erreur (si !adr))
+// peut ouvrir avec des connect() non bloquants: waitconnect=0/1
+int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
+ t_fullhostent fullhostent_buffer; // buffer pour resolver
+ T_SOC soc; // descipteur de la socket
+ char* iadr;
+ // unsigned short int port;
+
+ // tester un éventuel id:pass et virer id:pass@ si détecté
+ iadr = jump_identification(_iadr);
+
+ // si iadr="#" alors c'est une fausse URL, mais un vrai fichier
+ // local.
+ // utile pour les tests!
+ //## if (iadr[0]!=lOCAL_CHAR) {
+ if (strcmp(_iadr,"file://")) { /* non fichier */
+ SOCaddr server;
+ int server_size=sizeof(server);
+ t_hostent* hp;
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+#if HDEBUG
+ printf("gethostbyname\n");
+#endif
+
+ // tester un éventuel port
+ if (port==-1) {
+ char *a=jump_toport(iadr);
+#if HTS_USEOPENSSL
+ if (retour->ssl)
+ port=443;
+ else
+ port=80; // port par défaut
+#else
+ port=80; // port par défaut
+#endif
+ if (a) {
+ char iadr2[HTS_URLMAXSIZE*2];
+ int i=-1;
+ iadr2[0]='\0';
+ sscanf(a+1,"%d",&i);
+ if (i!=-1) {
+ port=(unsigned short int) i;
+ }
+
+ // adresse véritable (sans :xx)
+ strncat(iadr2,iadr,(int) (a - iadr));
+
+ // adresse sans le :xx
+ hp = hts_gethostbyname(iadr2, &fullhostent_buffer);
+
+ } else {
+
+ // adresse normale (port par défaut par la suite)
+ hp = hts_gethostbyname(iadr, &fullhostent_buffer);
+
+ }
+
+ } else // port défini
+ hp = hts_gethostbyname(iadr, &fullhostent_buffer);
+
+
+ // Conversion iadr -> adresse
+ // structure recevant le nom de l'hôte, etc
+ //struct hostent *hp;
+ if (hp == NULL) {
+#if DEBUG
+ printf("erreur gethostbyname\n");
+#endif
+ if (retour)
+ if (retour->msg)
+ strcpy(retour->msg,"Unable to get server's address");
+ return INVALID_SOCKET;
+ }
+ // copie adresse
+ SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length);
+ // memcpy(&SOCaddr_sinaddr(server), hp->h_addr_list[0], hp->h_length);
+
+ // créer ("attachement") une socket (point d'accès) internet,en flot
+#if HDEBUG
+ printf("socket\n");
+#endif
+#if HTS_WIDE_DEBUG
+ DEBUG_W("socket\n");
+#endif
+ soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+#if HTS_WIDE_DEBUG
+ DEBUG_W("socket done\n");
+#endif
+ if (soc==INVALID_SOCKET) {
+ if (retour)
+ if (retour->msg)
+ strcpy(retour->msg,"Unable to create a socket");
+ return INVALID_SOCKET; // erreur création socket impossible
+ }
+ // structure: connexion au domaine internet, port 80 (ou autre)
+ SOCaddr_initport(server, port);
+#if HDEBUG
+ printf("==%d\n",soc);
+#endif
+
+ // connexion non bloquante?
+ if (!waitconnect ) {
+ unsigned long p=1; // non bloquant
+#if HTS_WIN
+ ioctlsocket(soc,FIONBIO,&p);
+#else
+ ioctl(soc,FIONBIO,&p);
+#endif
+ }
+
+ // Connexion au serveur lui même
+#if HDEBUG
+ printf("connect\n");
+#endif
+
+#if HTS_WIDE_DEBUG
+ DEBUG_W("connect\n");
+#endif
+#if HTS_WIN
+ if (connect(soc, (const struct sockaddr FAR *)&server, server_size) != 0) {
+#else
+ if (connect(soc, (struct sockaddr *)&server, server_size) == -1) {
+#endif
+
+ // no - non blocking
+ //deletesoc(soc);
+ //soc=INVALID_SOCKET;
+
+ // bloquant
+ if (waitconnect) {
+#if HDEBUG
+ printf("unable to connect!\n");
+#endif
+ if (retour)
+ if (retour->msg)
+ strcpy(retour->msg,"Unable to connect to the server");
+ /* Close the socket and notify the error!!! */
+ deletesoc(soc);
+ return INVALID_SOCKET;
+ }
+ }
+#if HTS_WIDE_DEBUG
+ DEBUG_W("connect done\n");
+#endif
+
+#if HDEBUG
+ printf("connexion établie\n");
+#endif
+
+ // A partir de maintenant, on peut envoyer et recevoir des données
+ // via le flot identifié par soc (socket): write(soc,adr,taille) et
+ // read(soc,adr,taille)
+
+ } else { // on doit ouvrir un fichier local!
+ // il sera géré de la même manière qu'une socket (c'est idem!)
+
+ soc=LOCAL_SOCKET_ID; // pseudo-socket locale..
+ // soc sera remplacé lors d'un http_fopen() par un handle véritable!
+
+ } // teste fichier local ou http
+
+ return soc;
+}
+
+
+
+// couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html
+// retour=-1 si erreur.
+// si file://... alors adresse=file:// (et coupe le ?query dans ce cas)
+int ident_url_absolute(char* url,char* adr,char* fil) {
+ int pos=0;
+ int scheme=0;
+
+ // effacer adr et fil
+ adr[0]=fil[0]='\0';
+
+#if HDEBUG
+ printf("protocol: %s\n",url);
+#endif
+
+ // Scheme?
+ {
+ char* a=url;
+ while (isalpha((unsigned char)*a))
+ a++;
+ if (*a == ':')
+ scheme=1;
+ }
+
+ // 1. optional scheme ":"
+ if ((pos=strfield(url,"file:"))) { // fichier local!! (pour les tests)
+ //!! p+=3;
+ strcpy(adr,"file://");
+ } else if ((pos=strfield(url,"http:"))) { // HTTP
+ //!!p+=3;
+ } else if ((pos=strfield(url,"ftp:"))) { // FTP
+ strcpy(adr,"ftp://"); // FTP!!
+ //!!p+=3;
+#if HTS_USEOPENSSL
+ } else if ((pos=strfield(url,"https:"))) { // HTTPS
+ strcpy(adr,"https://");
+#endif
+ } else if (scheme) {
+ return -1; // erreur non reconnu
+ } else
+ pos=0;
+
+ // 2. optional "//" authority
+ if (strncmp(url+pos,"//",2)==0)
+ pos+=2;
+
+ // (url+pos) now points to the path (not net path)
+
+ //## if (adr[0]!=lOCAL_CHAR) { // adresse normale http
+ if (!strfield(adr,"file:")) { // PAS file://
+ char *p,*q;
+ p=url+pos;
+
+ // p pointe sur le début de l'adresse, ex: www.truc.fr/sommaire/index.html
+ q=strchr(jump_identification(p),'/');
+ if (q==0) q=strchr(jump_identification(p),'?'); // http://www.foo.com?bar=1
+ if (q==0) q=p+strlen(p); // pointe sur \0
+ // q pointe sur le chemin, ex: index.html?query=recherche
+
+ // chemin www... trop long!!
+ if ( ( ((int) (q - p)) ) > HTS_URLMAXSIZE) {
+ //strcpy(retour.msg,"Path too long");
+ return -1; // erreur
+ }
+
+ // recopier adresse www..
+ strncat(adr,p, ((int) (q - p)) );
+ // *( adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
+ // recopier chemin /pub/..
+ if (q[0] != '/') // page par défaut (/)
+ strcat(fil,"/");
+ strcat(fil,q);
+ // SECURITE:
+ // simplifier url pour les ../
+ fil_simplifie(fil);
+ } else { // localhost file://
+ char *p;
+ int i;
+ char* a;
+
+ p=url+pos;
+
+ strcat(fil,p); // fichier local ; adr="#"
+ a=strchr(fil,'?');
+ if (a)
+ *a='\0'; /* couper query (inutile pour file:// lors de la requête) */
+ // filtrer les \\ -> / pour les fichiers DOS
+ for(i=0;i<(int) strlen(fil);i++)
+ if (fil[i]=='\\')
+ fil[i]='/';
+ }
+
+ // no hostname
+ if (!strnotempty(adr))
+ return -1; // erreur non reconnu
+
+ // nommer au besoin.. (non utilisé normalement)
+ if (!strnotempty(fil))
+ strcpy(fil,"default-index.html");
+
+ // case insensitive pour adresse
+ {
+ char *a=jump_identification(adr);
+ while(*a) {
+ if ((*a>='A') && (*a<='Z'))
+ *a+='a'-'A';
+ a++;
+ }
+ }
+
+ return 0;
+}
+
+// simplification des ../
+void fil_simplifie(char* f) {
+ int i=0;
+ int last=0;
+ char* a;
+
+ // éliminer ../
+ while (f[i]) {
+
+ if (f[i]=='/') {
+ if (f[i+1]=='.')
+ if (f[i+2]=='.') // couper dernier répertoire
+ if (f[i+3]=='/') // éviter les /tmp/..coolandlamedir/
+ { // couper dernier répertoire
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ //
+ if (!last) /* can't go upper.. */
+ strcpy(tempo,"/");
+ else
+ strncpy(tempo,f,last+1);
+ tempo[last+1]='\0';
+ strcat(tempo,f+i+4);
+ strcpy(f,tempo); // remplacer
+ i=-1; // recommencer
+ last=0;
+ }
+
+ if (i>=0)
+ last=i;
+ else
+ last=0;
+ }
+
+ i++;
+ }
+
+ // éliminer ./
+ while ( (a=strstr(f,"./")) ) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcpy(tempo,a+2);
+ strcpy(a,tempo);
+ }
+ // delete all remaining ../ (potential threat)
+ while ( (a=strstr(f,"../")) ) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcpy(tempo,a+3);
+ strcpy(a,tempo);
+ }
+
+}
+
+
+// fermer liaison fichier ou socket
+HTS_INLINE void deletehttp(htsblk* r) {
+#if HTS_DEBUG_CLOSESOCK
+ char info[256];
+ sprintf(info,"deletehttp: (htsblk*) %d\n",r);
+ DEBUG_W2(info);
+#endif
+ if (r->soc!=INVALID_SOCKET) {
+ if (r->is_file) {
+ if (r->fp)
+ fclose(r->fp);
+ r->fp=NULL;
+ } else {
+ if (r->soc!=LOCAL_SOCKET_ID)
+ deletesoc_r(r);
+ }
+ r->soc=INVALID_SOCKET;
+ }
+}
+
+// fermer une socket
+HTS_INLINE void deletesoc(T_SOC soc) {
+ if (soc!=INVALID_SOCKET) {
+// J'ai planté.. pas de shutdown
+//#if HTS_WIDE_DEBUG
+// DEBUG_W("shutdown\n");
+//#endif
+// shutdown(soc,2); // shutdown
+//#if HTS_WIDE_DEBUG
+// DEBUG_W("shutdown done\n");
+//#endif
+ // Ne pas oublier de fermer la connexion avant de partir.. (plus propre)
+#if HTS_WIDE_DEBUG
+ DEBUG_W("close\n");
+#endif
+#if HTS_WIN
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+#if HTS_WIDE_DEBUG
+ DEBUG_W("close done\n");
+#endif
+ }
+}
+
+/* Will also clean other things */
+HTS_INLINE void deletesoc_r(htsblk* r) {
+#if HTS_USEOPENSSL
+ if (r->ssl_con) {
+ SSL_shutdown(r->ssl_con);
+ // SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1);
+ SSL_free(r->ssl_con);
+ r->ssl_con=NULL;
+ }
+#endif
+ deletesoc(r->soc);
+ r->soc=INVALID_SOCKET;
+}
+
+// renvoi le nombre de secondes depuis 1970
+HTS_INLINE TStamp time_local(void) {
+ return ((TStamp) time(NULL));
+}
+
+// number of millisec since 1970
+HTS_INLINE TStamp mtime_local(void) {
+#ifndef HTS_DO_NOT_USE_FTIME
+ struct timeb B;
+ ftime( &B );
+ return (TStamp) ( ((TStamp) B.time * (TStamp) 1000)
+ + ((TStamp) B.millitm) );
+#else
+ // not precise..
+ return (TStamp) ( ((TStamp) time_local() * (TStamp) 1000)
+ + ((TStamp) 0) );
+#endif
+}
+
+// convertit un nombre de secondes en temps (chaine)
+void sec2str(char *st,TStamp t) {
+ int j,h,m,s;
+
+ j=(int) (t/(3600*24));
+ t-=((TStamp) j)*(3600*24);
+ h=(int) (t/(3600));
+ t-=((TStamp) h)*3600;
+ m=(int) (t/60);
+ t-=((TStamp) m)*60;
+ s=(int) t;
+
+ if (j>0)
+ sprintf(st,"%d days, %d hours %d minutes %d seconds",j,h,m,s);
+ else if (h>0)
+ sprintf(st,"%d hours %d minutes %d seconds",h,m,s);
+ else if (m>0)
+ sprintf(st,"%d minutes %d seconds",m,s);
+ else
+ sprintf(st,"%d seconds",s);
+}
+
+// idem, plus court (chaine)
+void qsec2str(char *st,TStamp t) {
+ int j,h,m,s;
+
+ j=(int) (t/(3600*24));
+ t-=((TStamp) j)*(3600*24);
+ h=(int) (t/(3600));
+ t-=((TStamp) h)*3600;
+ m=(int) (t/60);
+ t-=((TStamp) m)*60;
+ s=(int) t;
+
+ if (j>0)
+ sprintf(st,"%dd,%02dh,%02dmin%02ds",j,h,m,s);
+ else if (h>0)
+ sprintf(st,"%dh,%02dmin%02ds",h,m,s);
+ else if (m>0)
+ sprintf(st,"%dmin%02ds",m,s);
+ else
+ sprintf(st,"%ds",s);
+}
+
+
+// heure actuelle, GMT, format rfc (taille buffer 256o)
+void time_gmt_rfc822(char* s) {
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=gmtime(&tt);
+ if (A==NULL)
+ A=localtime(&tt);
+ time_rfc822(s,A);
+}
+
+// heure actuelle, format rfc (taille buffer 256o)
+void time_local_rfc822(char* s) {
+ time_t tt;
+ struct tm* A;
+ tt=time(NULL);
+ A=localtime(&tt);
+ time_rfc822_local(s,A);
+}
+
+/* convertir une chaine en temps */
+struct tm* convert_time_rfc822(char* s) {
+ struct tm* result;
+ /* */
+ char months[]="jan feb mar apr may jun jul aug sep oct nov dec";
+ char str[256];
+ char* a;
+ /* */
+ int result_mm=-1;
+ int result_dd=-1;
+ int result_n1=-1;
+ int result_n2=-1;
+ int result_n3=-1;
+ int result_n4=-1;
+ /* */
+ NOSTATIC_RESERVE(result, struct tm, 1);
+
+ if ((int) strlen(s) > 200)
+ return NULL;
+ strcpy(str,s);
+ hts_lowcase(str);
+ /* éliminer :,- */
+ while( (a=strchr(str,'-')) ) *a=' ';
+ while( (a=strchr(str,':')) ) *a=' ';
+ while( (a=strchr(str,',')) ) *a=' ';
+ /* tokeniser */
+ a=str;
+ while(*a) {
+ char *first,*last;
+ char tok[256];
+ /* découper mot */
+ while(*a==' ') a++; /* sauter espaces */
+ first=a;
+ while((*a) && (*a!=' ')) a++;
+ last=a;
+ tok[0]='\0';
+ if (first!=last) {
+ char* pos;
+ strncat(tok,first,(int) (last - first));
+ /* analyser */
+ if ( (pos=strstr(months,tok)) ) { /* month always in letters */
+ result_mm=((int) (pos - months))/4;
+ } else {
+ int number;
+ if (sscanf(tok,"%d",&number) == 1) { /* number token */
+ if (result_dd<0) /* day always first number */
+ result_dd=number;
+ else if (result_n1<0)
+ result_n1=number;
+ else if (result_n2<0)
+ result_n2=number;
+ else if (result_n3<0)
+ result_n3=number;
+ else if (result_n4<0)
+ result_n4=number;
+ } /* sinon, bruit de fond(+1GMT for exampel) */
+ }
+ }
+ }
+ if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) {
+ if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */
+ result->tm_year=result_n4-1900;
+ result->tm_hour=result_n1;
+ result->tm_min=result_n2;
+ result->tm_sec=max(result_n3,0);
+ } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
+ result->tm_hour=result_n2;
+ result->tm_min=result_n3;
+ result->tm_sec=max(result_n4,0);
+ if (result_n1<=50) /* 00 means 2000 */
+ result->tm_year=result_n1+100;
+ else if (result_n1<1000) /* 99 means 1999 */
+ result->tm_year=result_n1;
+ else /* 2000 */
+ result->tm_year=result_n1-1900;
+ }
+ result->tm_isdst=0; /* assume GMT */
+ result->tm_yday=-1; /* don't know */
+ result->tm_wday=-1; /* don't know */
+ result->tm_mon=result_mm;
+ result->tm_mday=result_dd;
+ return result;
+ }
+ return NULL;
+}
+
+/* sets file time. -1 if error */
+int set_filetime(char* file,struct tm* tm_time) {
+ struct utimbuf tim;
+#ifndef HTS_DO_NOT_USE_FTIME
+ struct timeb B;
+ B.timezone=0;
+ ftime( &B );
+ tim.actime=tim.modtime=mktime(tm_time) - B.timezone*60;
+#else
+ // bogus time (GMT/local)..
+ tim.actime=tim.modtime=mktime(tm_time);
+#endif
+ return utime(file,&tim);
+}
+
+/* sets file time from RFC822 date+time, -1 if error*/
+int set_filetime_rfc822(char* file,char* date) {
+ struct tm* tm_s=convert_time_rfc822(date);
+ if (tm_s) {
+ return set_filetime(file,tm_s);
+ } else return -1;
+}
+
+
+// heure au format rfc (taille buffer 256o)
+HTS_INLINE void time_rfc822(char* s,struct tm * A) {
+ strftime(s,256,"%a, %d %b %Y %H:%M:%S GMT",A);
+}
+
+// heure locale au format rfc (taille buffer 256o)
+HTS_INLINE void time_rfc822_local(char* s,struct tm * A) {
+ strftime(s,256,"%a, %d %b %Y %H:%M:%S",A);
+}
+
+// conversion en b,Kb,Mb
+char* int2bytes(LLint n) {
+ char** a=int2bytes2(n);
+ char* buff;
+ NOSTATIC_RESERVE(buff, char, 256);
+
+ strcpy(buff,a[0]);
+ strcat(buff,a[1]);
+ return concat(buff,"");
+}
+
+// conversion en b/s,Kb/s,Mb/s
+char* int2bytessec(long int n) {
+ char* buff;
+ char** a=int2bytes2(n);
+ NOSTATIC_RESERVE(buff, char, 256);
+
+ strcpy(buff,a[0]);
+ strcat(buff,a[1]);
+ return concat(buff,"/s");
+}
+char* int2char(int n) {
+ char* buffer;
+ NOSTATIC_RESERVE(buffer, char, 32);
+ sprintf(buffer,"%d",n);
+ return concat(buffer,"");
+}
+
+// conversion en b,Kb,Mb, nombre et type séparés
+// limite: 2.10^9.10^6B
+
+/* See http://physics.nist.gov/cuu/Units/binary.html */
+#define ToLLint(a) ((LLint)(a))
+#define ToLLintKiB (ToLLint(1024))
+#define ToLLintMiB (ToLLintKiB*ToLLintKiB)
+#ifdef HTS_LONGLONG
+#define ToLLintGiB (ToLLintKiB*ToLLintKiB*ToLLintKiB)
+#define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
+#define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
+#endif
+typedef struct {
+ char buff1[256];
+ char buff2[32];
+ char* buffadr[2];
+} strc_int2bytes2;
+char** int2bytes2(LLint n) {
+ strc_int2bytes2* strc;
+ NOSTATIC_RESERVE(strc, strc_int2bytes2, 1);
+
+ if (n < ToLLintKiB) {
+ sprintf(strc->buff1,"%d",(int)(LLint)n);
+ strcpy(strc->buff2,"B");
+ } else if (n < ToLLintMiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/ToLLintKiB)),(int)((LLint)((n%ToLLintKiB)*100)/ToLLintKiB));
+ strcpy(strc->buff2,"KiB");
+ }
+#ifdef HTS_LONGLONG
+ else if (n < ToLLintGiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB))));
+ strcpy(strc->buff2,"MiB");
+ } else if (n < ToLLintTiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintGiB))),(int)((LLint)(((n%(ToLLintGiB))*100)/(ToLLintGiB))));
+ strcpy(strc->buff2,"GiB");
+ } else if (n < ToLLintPiB) {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintTiB))),(int)((LLint)(((n%(ToLLintTiB))*100)/(ToLLintTiB))));
+ strcpy(strc->buff2,"TiB");
+ } else {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintPiB))),(int)((LLint)(((n%(ToLLintPiB))*100)/(ToLLintPiB))));
+ strcpy(strc->buff2,"PiB");
+ }
+#else
+ else {
+ sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB))));
+ strcpy(strc->buff2,"MiB");
+ }
+#endif
+ strc->buffadr[0]=strc->buff1;
+ strc->buffadr[1]=strc->buff2;
+ return strc->buffadr;
+}
+
+#if HTS_WIN
+#else
+// ignore sigpipe?
+int sig_ignore_flag( int setflag ) { // flag ignore
+ static int flag=0; /* YES, this one is true static */
+ if (setflag>=0)
+ flag=setflag;
+ return flag;
+}
+#endif
+
+// envoi de texte (en têtes généralement) sur la socket soc
+HTS_INLINE int sendc(htsblk* r, char* s) {
+ int n;
+
+#if HTS_WIN
+#else
+ sig_ignore_flag(1);
+#endif
+#if HDEBUG
+ write(0,s,strlen(s));
+#endif
+
+#if HTS_USEOPENSSL
+ if (r->ssl) {
+ n = SSL_write(r->ssl_con, s, strlen(s));
+ } else
+#endif
+ n = send(r->soc,s,strlen(s),0);
+
+#if HTS_WIN
+#else
+ sig_ignore_flag(0);
+#endif
+
+ return n;
+}
+
+
+// Remplace read
+void finput(int fd,char* s,int max) {
+ char c;
+ int j=0;
+ do {
+ //c=fgetc(fp);
+ if (read(fd,&c,1)<=0) {
+ c=0;
+ }
+ if (c!=0) {
+ switch(c) {
+ case 10: c=0; break;
+ case 13: break; // sauter ces caractères
+ default: s[j++]=c; break;
+ }
+ }
+ } while((c!=0) && (j<max-1));
+ s[j++]='\0';
+}
+
+// Like linput, but in memory (optimized)
+int binput(char* buff,char* s,int max) {
+ char* end;
+ int count;
+
+ // clear buffer
+ s[0]='\0';
+ // end of buffer?
+ if ( *buff == '\0')
+ return 1;
+ // find ending \n
+ end=strchr(buff,'\n');
+ // ..or end of buffer
+ if (!end)
+ end=buff+strlen(buff);
+ // then count number of bytes, maximum=max
+ count=min(max,end-buff);
+ // and strip annoying ending cr
+ while( (count>0) && (buff[count] == '\r'))
+ count--;
+ // copy
+ if (count > 0) {
+ strncat(s, buff, count);
+ }
+ // and terminate with a null char
+ s[count]='\0';
+ // then return the supplemental jump offset
+ return (end-buff)+1;
+}
+
+// Lecture d'une ligne (peut être unicode à priori)
+int linput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+int linput_trim(FILE* fp,char* s,int max) {
+ int rlen=0;
+ char* ls=(char*) malloct(max+2);
+ s[0]='\0';
+ if (ls) {
+ char* a;
+ // lire ligne
+ rlen=linput(fp,ls,max);
+ if (rlen) {
+ // sauter espaces et tabs en fin
+ while( (rlen>0) && ((ls[max(rlen-1,0)]==' ') || (ls[max(rlen-1,0)]=='\t')) )
+ ls[--rlen]='\0';
+ // sauter espaces en début
+ a=ls;
+ while((rlen>0) && ((*a==' ') || (*a=='\t'))) {
+ a++;
+ rlen--;
+ }
+ if (rlen>0) {
+ memcpy(s,a,rlen); // can copy \0 chars
+ s[rlen]='\0';
+ }
+ }
+ //
+ freet(ls);
+ }
+ return rlen;
+}
+int linput_cpp(FILE* fp,char* s,int max) {
+ int rlen=0;
+ s[0]='\0';
+ do {
+ int ret;
+ if (rlen>0)
+ if (s[rlen-1]=='\\')
+ s[--rlen]='\0'; // couper \ final
+ // lire ligne
+ ret=linput_trim(fp,s+rlen,max-rlen);
+ if (ret>0)
+ rlen+=ret;
+ } while((s[max(rlen-1,0)]=='\\') && (rlen<max));
+ return rlen;
+}
+
+// idem avec les car spéciaux
+void rawlinput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j++]='\0';
+}
+
+
+// compare le début de f avec s et retourne la position de la fin
+// 'A=a' (case insensitive)
+int strfield(const char* f,const char* s) {
+ int r=0;
+ while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; }
+ if (*s==0)
+ return r;
+ else
+ return 0;
+}
+
+//cherche chaine, case insensitive
+char* strstrcase(char *s,char *o) {
+ while((*s) && (strfield(s,o)==0)) s++;
+ if (*s=='\0') return NULL;
+ return s;
+}
+
+
+// Unicode detector
+// See http://www.unicode.org/unicode/reports/tr28/
+// (sect Table 3.1B. Legal UTF-8 Byte Sequences)
+typedef struct {
+ unsigned int pos;
+ unsigned char data[4];
+} t_auto_seq;
+
+// char between a and b
+#define CHAR_BETWEEN(c, a, b) ( (c) >= 0x##a ) && ( (c) <= 0x##b )
+// sequence start
+#define SEQBEG ( inseq == 0 )
+// in this block
+#define BLK(n,a, b) ( (seq.pos >= n) && ((err = CHAR_BETWEEN(seq.data[n], a, b))) )
+#define ELT(n,a) BLK(n,a,a)
+// end
+#define SEQEND ((ok = 1))
+// sequence started, character will fail if error
+#define IN_SEQ ( (inseq = 1) )
+// decoding error
+#define BAD_SEQ ( (ok == 0) && (inseq != 0) && (!err) )
+// no sequence started
+#define NO_SEQ ( inseq == 0 )
+
+// is this block an UTF unicode textfile?
+// 0 : no
+// 1 : yes
+// -1: don't know
+int is_unicode_utf8(unsigned char* buffer, unsigned int size) {
+ t_auto_seq seq;
+ unsigned int i;
+ int is_utf=-1;
+
+ seq.pos=0;
+ for(i=0 ; i < size ; i++) {
+ unsigned int ok=0;
+ unsigned int inseq=0;
+ unsigned int err=0;
+
+ seq.data[seq.pos]=buffer[i];
+ /**/ if ( SEQBEG && BLK(0,00,7F) && IN_SEQ && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,C2,DF) && IN_SEQ && BLK(1,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,E0 ) && IN_SEQ && BLK(1,A0,BF) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,E1,EC) && IN_SEQ && BLK(1,80,BF) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,ED ) && IN_SEQ && BLK(1,80,9F) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,EE,EF) && IN_SEQ && BLK(1,80,BF) && BLK(2,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,F0 ) && IN_SEQ && BLK(1,90,BF) && BLK(2,80,BF) && BLK(3,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && BLK(0,F1,F3) && IN_SEQ && BLK(1,80,BF) && BLK(2,80,BF) && BLK(3,80,BF) && SEQEND ) { }
+ else if ( SEQBEG && ELT(0,F4 ) && IN_SEQ && BLK(1,80,8F) && BLK(2,80,BF) && BLK(3,80,BF) && SEQEND ) { }
+ else if ( NO_SEQ ) { // bad, unknown
+ return 0;
+ }
+ /* */
+
+ /* Error */
+ if ( BAD_SEQ ) {
+ return 0;
+ }
+
+ /* unicode character */
+ if (seq.pos > 0)
+ is_utf=1;
+
+ /* Next */
+ if (ok)
+ seq.pos=0;
+ else
+ seq.pos++;
+
+ /* Internal error */
+ if (seq.pos >= 4)
+ return 0;
+
+ }
+
+ return is_utf;
+}
+
+void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map) {
+ unsigned int i;
+ memset(map, 0, sizeof(unsigned int) * 256);
+ for(i = 0 ; i < size ; i++) {
+ map[buffer[i]]++;
+ }
+}
+
+
+// le fichier est-il un fichier html?
+// 0 : non
+// 1 : oui
+// -1 : on sait pas
+// -2 : on sait pas, pas d'extension
+int ishtml(char* fil) {
+ char *a;
+
+ // patch pour les truc.html?Choix=toto
+ if ( (a=strchr(fil,'?')) ) // paramètres?
+ a--; // pointer juste avant le ?
+ else
+ a=fil+strlen(fil)-1; // pointer sur le dernier caractère
+
+ if (*a=='/') return -1; // répertoire, on sait pas!!
+ //if (*a=='/') return 1; // ok répertoire, html
+
+ while ( (*a!='.') && (*a!='/') && ( a > fil)) a--;
+ if (*a=='.') { // a une extension
+ char fil_noquery[HTS_URLMAXSIZE*2];
+ fil_noquery[0]='\0';
+ a++; // pointer sur extension
+ strncat(fil_noquery,a,HTS_URLMAXSIZE);
+ a=strchr(fil_noquery,'?');
+ if (a)
+ *a='\0';
+ return ishtml_ext(fil_noquery); // retour
+ } else return -2; // indéterminé, par exemple /truc
+}
+
+// idem, mais pour uniquement l'extension
+int ishtml_ext(char* a) {
+ int html=0;
+ //
+ if (strfield2(a,"html")) html = 1;
+ else if (strfield2(a,"htm")) html = 1;
+ else if (strfield2(a,"shtml")) html = 1;
+ else if (strfield2(a,"phtml")) html = 1;
+ else if (strfield2(a,"htmlx")) html = 1;
+ else if (strfield2(a,"shtm")) html = 1;
+ else if (strfield2(a,"phtm")) html = 1;
+ else if (strfield2(a,"htmx")) html = 1;
+ //
+ // insuccès..
+ else {
+ switch(is_knowntype(a)) {
+ case 1:
+ html = 0; // connu, non html
+ break;
+ case 2:
+ html = 1; // connu, html
+ break;
+ default:
+ html = -1; // inconnu..
+ break;
+ }
+ }
+ return html;
+}
+
+// error (404,500..)
+HTS_INLINE int ishttperror(int err) {
+ switch (err/100) {
+ case 4: case 5: return 1;
+ break;
+ }
+ return 0;
+}
+
+
+// retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant
+// une identification
+char* jump_identification(char* source) {
+ char *a,*trytofind;
+ // rechercher dernier @ (car parfois email transmise dans adresse!)
+ // mais sauter ftp:// éventuel
+ a = jump_protocol(source);
+ trytofind = strrchr_limit(a, '@', strchr(a,'/'));
+ return (trytofind != NULL)?trytofind:a;
+}
+
+// find port (:80) or NULL if not found
+// can handle IPV6 addresses
+char* jump_toport(char* source) {
+ char *a,*trytofind;
+ a = jump_identification(source);
+ trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html)
+ a = strchr( (trytofind)?trytofind:a, ':');
+ return a;
+}
+
+// strrchr, but not too far
+char* strrchr_limit(char* s, char c, char* limit) {
+ if (limit == NULL) {
+ char* p = strchr(s, c);
+ return p?(p+1):NULL;
+ } else {
+ char *a=NULL, *p;
+ for(;;) {
+ p=strchr((a)?a:s, c);
+ if ((p >= limit) || (p == NULL))
+ return a;
+ a=p+1;
+ }
+ }
+}
+
+// retourner adr sans ftp://
+HTS_INLINE char* jump_protocol(char* source) {
+ int p;
+ // scheme
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ if ((p=strfield(source,"http:")))
+ source+=p;
+ else if ((p=strfield(source,"ftp:")))
+ source+=p;
+ else if ((p=strfield(source,"https:")))
+ source+=p;
+ else if ((p=strfield(source,"file:")))
+ source+=p;
+ // net_path
+ if (strncmp(source,"//",2)==0)
+ source+=2;
+ return source;
+}
+
+// codage base 64 a vers b
+void code64(char* a,char* b) {
+ int i1=0,i2=0,i3=0,i4=0;
+ unsigned long store;
+ int n;
+ const char _hts_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ b[0]='\0';
+ while(*a) {
+ // 24 bits
+ n=1; store=0; store |= ((*a++) & 0xff);
+ if (*a) { n=2; store <<= 8; store |= ((*a++) & 0xff); }
+ if (*a) { n=3; store <<= 8; store |= ((*a++) & 0xff); }
+ if (n==3) {
+ i4=store & 63;
+ i3=(store>>6) & 63;
+ i2=(store>>12) & 63;
+ i1=(store>>18) & 63;
+ } else if (n==2) {
+ store<<=2;
+ i3=store & 63;
+ i2=(store>>6) & 63;
+ i1=(store>>12) & 63;
+ } else {
+ store<<=4;
+ i2=store & 63;
+ i1=(store>>6) & 63;
+ }
+
+ *b++ = _hts_base64[i1];
+ *b++ = _hts_base64[i2];
+ if (n>=2)
+ *b++ = _hts_base64[i3];
+ else
+ *b++ = '=';
+ if (n>=3)
+ *b++ = _hts_base64[i4];
+ else
+ *b++ = '=';
+ }
+ *b++='\0';
+}
+
+// remplacer &quot; par " etc..
+// buffer MAX 1Ko
+#define strcmpbeg(a, b) strncmp(a, b, strlen(b))
+void unescape_amp(char* s) {
+ while(*s) {
+ if (*s=='&') {
+ char* end=strchr(s,';');
+ if ( end && (((int) (end - s)) <= 8) ) {
+ unsigned char c=0;
+
+ // http://www.w3.org/TR/xhtml-modularization/dtd_module_defs.html
+ if (strcmpbeg(s, "&#") == 0) {
+ int num=0;
+ if ( (s[2] == 'x') || (s[2] == 'X')) {
+ if (sscanf(s+3, "%x", &num) == 1) {
+ c=(unsigned char)num;
+ }
+ } else {
+ if (sscanf(s+2, "%d", &num) == 1) {
+ c=(unsigned char)num;
+ }
+ }
+ } else if (strcmpbeg(s, "&nbsp;")==0)
+ c=32; // hack - c=160;
+ else if (strcmpbeg(s, "&iexcl;")==0)
+ c=161;
+ else if (strcmpbeg(s, "&cent;")==0)
+ c=162;
+ else if (strcmpbeg(s, "&pound;")==0)
+ c=163;
+ else if (strcmpbeg(s, "&curren;")==0)
+ c=164;
+ else if (strcmpbeg(s, "&yen;")==0)
+ c=165;
+ else if (strcmpbeg(s, "&brvbar;")==0)
+ c=166;
+ else if (strcmpbeg(s, "&sect;")==0)
+ c=167;
+ else if (strcmpbeg(s, "&uml;")==0)
+ c=168;
+ else if (strcmpbeg(s, "&copy;")==0)
+ c=169;
+ else if (strcmpbeg(s, "&ordf;")==0)
+ c=170;
+ //else if (strcmpbeg(s, "&laquo;")==0)
+ // c=171;
+ else if (strcmpbeg(s, "&not;")==0)
+ c=172;
+ //else if (strcmpbeg(s, "&shy;")==0)
+ // c=173;
+ else if (strcmpbeg(s, "&reg;")==0)
+ c=174;
+ else if (strcmpbeg(s, "&macr;")==0)
+ c=175;
+ else if (strcmpbeg(s, "&deg;")==0)
+ c=176;
+ else if (strcmpbeg(s, "&plusmn;")==0)
+ c=177;
+ else if (strcmpbeg(s, "&sup2;")==0)
+ c=178;
+ else if (strcmpbeg(s, "&sup3;")==0)
+ c=179;
+ else if (strcmpbeg(s, "&acute;")==0)
+ c=180;
+ else if (strcmpbeg(s, "&micro;")==0)
+ c=181;
+ else if (strcmpbeg(s, "&para;")==0)
+ c=182;
+ else if (strcmpbeg(s, "&middot;")==0)
+ c=183;
+ else if (strcmpbeg(s, "&cedil;")==0)
+ c=184;
+ else if (strcmpbeg(s, "&sup1;")==0)
+ c=185;
+ else if (strcmpbeg(s, "&ordm;")==0)
+ c=186;
+ //else if (strcmpbeg(s, "&raquo;")==0)
+ // c=187;
+ else if (strcmpbeg(s, "&frac14;")==0)
+ c=188;
+ else if (strcmpbeg(s, "&frac12;")==0)
+ c=189;
+ else if (strcmpbeg(s, "&frac34;")==0)
+ c=190;
+ else if (strcmpbeg(s, "&iquest;")==0)
+ c=191;
+ else if (strcmpbeg(s, "&Agrave;")==0)
+ c=192;
+ else if (strcmpbeg(s, "&Aacute;")==0)
+ c=193;
+ else if (strcmpbeg(s, "&Acirc;")==0)
+ c=194;
+ else if (strcmpbeg(s, "&Atilde;")==0)
+ c=195;
+ else if (strcmpbeg(s, "&Auml;")==0)
+ c=196;
+ else if (strcmpbeg(s, "&Aring;")==0)
+ c=197;
+ else if (strcmpbeg(s, "&AElig;")==0)
+ c=198;
+ else if (strcmpbeg(s, "&Ccedil;")==0)
+ c=199;
+ else if (strcmpbeg(s, "&Egrave;")==0)
+ c=200;
+ else if (strcmpbeg(s, "&Eacute;")==0)
+ c=201;
+ else if (strcmpbeg(s, "&Ecirc;")==0)
+ c=202;
+ else if (strcmpbeg(s, "&Euml;")==0)
+ c=203;
+ else if (strcmpbeg(s, "&Igrave;")==0)
+ c=204;
+ else if (strcmpbeg(s, "&Iacute;")==0)
+ c=205;
+ else if (strcmpbeg(s, "&Icirc;")==0)
+ c=206;
+ else if (strcmpbeg(s, "&Iuml;")==0)
+ c=207;
+ else if (strcmpbeg(s, "&ETH;")==0)
+ c=208;
+ else if (strcmpbeg(s, "&Ntilde;")==0)
+ c=209;
+ else if (strcmpbeg(s, "&Ograve;")==0)
+ c=210;
+ else if (strcmpbeg(s, "&Oacute;")==0)
+ c=211;
+ else if (strcmpbeg(s, "&Ocirc;")==0)
+ c=212;
+ else if (strcmpbeg(s, "&Otilde;")==0)
+ c=213;
+ else if (strcmpbeg(s, "&Ouml;")==0)
+ c=214;
+ else if (strcmpbeg(s, "&times;")==0)
+ c=215;
+ else if (strcmpbeg(s, "&Oslash;")==0)
+ c=216;
+ else if (strcmpbeg(s, "&Ugrave;")==0)
+ c=217;
+ else if (strcmpbeg(s, "&Uacute;")==0)
+ c=218;
+ else if (strcmpbeg(s, "&Ucirc;")==0)
+ c=219;
+ else if (strcmpbeg(s, "&Uuml;")==0)
+ c=220;
+ else if (strcmpbeg(s, "&Yacute;")==0)
+ c=221;
+ else if (strcmpbeg(s, "&THORN;")==0)
+ c=222;
+ else if (strcmpbeg(s, "&szlig;")==0)
+ c=223;
+ else if (strcmpbeg(s, "&agrave;")==0)
+ c=224;
+ else if (strcmpbeg(s, "&aacute;")==0)
+ c=225;
+ else if (strcmpbeg(s, "&acirc;")==0)
+ c=226;
+ else if (strcmpbeg(s, "&atilde;")==0)
+ c=227;
+ else if (strcmpbeg(s, "&auml;")==0)
+ c=228;
+ else if (strcmpbeg(s, "&aring;")==0)
+ c=229;
+ else if (strcmpbeg(s, "&aelig;")==0)
+ c=230;
+ else if (strcmpbeg(s, "&ccedil;")==0)
+ c=231;
+ else if (strcmpbeg(s, "&egrave;")==0)
+ c=232;
+ else if (strcmpbeg(s, "&eacute;")==0)
+ c=233;
+ else if (strcmpbeg(s, "&ecirc;")==0)
+ c=234;
+ else if (strcmpbeg(s, "&euml;")==0)
+ c=235;
+ else if (strcmpbeg(s, "&igrave;")==0)
+ c=236;
+ else if (strcmpbeg(s, "&iacute;")==0)
+ c=237;
+ else if (strcmpbeg(s, "&icirc;")==0)
+ c=238;
+ else if (strcmpbeg(s, "&iuml;")==0)
+ c=239;
+ else if (strcmpbeg(s, "&eth;")==0)
+ c=240;
+ else if (strcmpbeg(s, "&ntilde;")==0)
+ c=241;
+ else if (strcmpbeg(s, "&ograve;")==0)
+ c=242;
+ else if (strcmpbeg(s, "&oacute;")==0)
+ c=243;
+ else if (strcmpbeg(s, "&ocirc;")==0)
+ c=244;
+ else if (strcmpbeg(s, "&otilde;")==0)
+ c=245;
+ else if (strcmpbeg(s, "&ouml;")==0)
+ c=246;
+ else if (strcmpbeg(s, "&divide;")==0)
+ c=247;
+ else if (strcmpbeg(s, "&oslash;")==0)
+ c=248;
+ else if (strcmpbeg(s, "&ugrave;")==0)
+ c=249;
+ else if (strcmpbeg(s, "&uacute;")==0)
+ c=250;
+ else if (strcmpbeg(s, "&ucirc;")==0)
+ c=251;
+ else if (strcmpbeg(s, "&uuml;")==0)
+ c=252;
+ else if (strcmpbeg(s, "&yacute;")==0)
+ c=253;
+ else if (strcmpbeg(s, "&thorn;")==0)
+ c=254;
+ else if (strcmpbeg(s, "&yuml;")==0)
+ c=255;
+ //
+ else if (strcmpbeg(s,"&amp;")==0)
+ c='&';
+ else if (strcmpbeg(s,"&gt;")==0)
+ c='>';
+ else if (strcmpbeg(s,"&laquo;")==0)
+ c='\"';
+ else if (strcmpbeg(s,"&lt;")==0)
+ c='<';
+ else if (strcmpbeg(s,"&nbsp;")==0)
+ c=' ';
+ else if (strcmpbeg(s,"&quot;")==0)
+ c='\"';
+ else if (strcmpbeg(s,"&raquo;")==0)
+ c='\"';
+ else if (strcmpbeg(s,"&shy;")==0)
+ c='-';
+ else if (strcmpbeg(s,"&tilde;")==0)
+ c='~';
+ // remplacer?
+ if (c) {
+ char buff[HTS_URLMAXSIZE*2];
+ buff[0]=(char) c;
+ strcpy(buff+1,end+1);
+ strcpy(s,buff);
+ }
+ }
+ }
+ s++;
+ }
+}
+
+// remplacer %20 par ' ', | par : etc..
+// buffer MAX 1Ko
+char* unescape_http(char* s) {
+ char* tempo;
+ int i,j=0;
+ NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2);
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%') {
+ i++;
+ tempo[j++]=(char) ehex(s+i);
+ i++; // sauter 2 caractères finalement
+ }
+ /*
+ NON a cause de trucs comme /home/0,1837,1|7|1173|Content,00.html
+ else if (s[i]=='|') { // exemple: file:///C|Program%20Files...
+ tempo[j++]=':';
+ }
+ */
+ else
+ tempo[j++]=s[i];
+ }
+ tempo[j++]='\0';
+ return tempo;
+}
+
+// unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI
+char* unescape_http_unharm(char* s, int no_high) {
+ char* tempo;
+ int i,j=0;
+ NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2);
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%') {
+ int nchar=(char) ehex(s+i+1);
+
+ int test = ( CHAR_RESERVED(nchar)
+ || CHAR_DELIM(nchar)
+ || CHAR_UNWISE(nchar)
+ || CHAR_LOW(nchar) /* CHAR_SPECIAL */
+ || CHAR_XXAVOID(nchar)
+ || (
+ (no_high)
+ &&
+ CHAR_HIG(nchar)
+ )
+ );
+
+ if (!test) {
+ tempo[j++]=(char) ehex(s+i+1);
+ i+=2;
+ } else {
+ tempo[j++]='%';
+ }
+ }
+ /*
+ NON a cause de trucs comme /home/0,1837,1|7|1173|Content,00.html
+ else if (s[i]=='|') { // exemple: file:///C|Program%20Files...
+ tempo[j++]=':';
+ }
+ */
+ else
+ tempo[j++]=s[i];
+ }
+ tempo[j++]='\0';
+ return tempo;
+}
+
+// remplacer " par %xx etc..
+// buffer MAX 1Ko
+void escape_spc_url(char* s) {
+ x_escape_http(s,2);
+}
+// smith / john -> smith%20%2f%20john
+void escape_in_url(char* s) {
+ x_escape_http(s,1);
+}
+// smith / john -> smith%20/%20john
+void escape_uri(char* s) {
+ x_escape_http(s,3);
+}
+void escape_uri_utf(char* s) {
+ x_escape_http(s,30);
+}
+void escape_check_url(char* s) {
+ x_escape_http(s,0);
+}
+// same as escape_check_url, but returns char*
+char* escape_check_url_addr(char* s) {
+ char* adr;
+ escape_check_url(adr = concat(s,""));
+ return adr;
+}
+
+
+void x_escape_http(char* s,int mode) {
+ while(*s) {
+ int test=0;
+ if (mode == 0)
+ test=(strchr("\" ",*s)!=0);
+ else if (mode==1) {
+ test = ( CHAR_RESERVED(*s)
+ || CHAR_DELIM(*s)
+ || CHAR_UNWISE(*s)
+ || CHAR_SPECIAL(*s)
+ || CHAR_XXAVOID(*s) );
+ }
+ else if (mode==2)
+ test=(strchr(" ",*s)!=0); // n'escaper que espace
+ else if (mode==3) { // échapper que ce qui est nécessaire
+ test = (
+ CHAR_SPECIAL(*s)
+ || CHAR_XXAVOID(*s) );
+ }
+ else if (mode==30) { // échapper que ce qui est nécessaire
+ test = (
+ CHAR_LOW(*s)
+ || CHAR_XXAVOID(*s) );
+ }
+
+ if (test) {
+ char buffer[HTS_URLMAXSIZE*2];
+ int n;
+ n=(int)(unsigned char) *s;
+ strcpy(buffer,s+1);
+ sprintf(s,"%%%02x",n);
+ strcat(s,buffer);
+ }
+ s++;
+ }
+}
+
+
+HTS_INLINE int ehexh(char c) {
+ if ((c>='0') && (c<='9')) return c-'0';
+ if ((c>='a') && (c<='f')) c-=('a'-'A');
+ if ((c>='A') && (c<='F')) return (c-'A'+10);
+ return 0;
+}
+
+HTS_INLINE int ehex(char* s) {
+ return 16*ehexh(*s)+ehexh(*(s+1));
+
+}
+
+// concat, concatène deux chaines et renvoi le résultat
+// permet d'alléger grandement le code
+// il faut savoir qu'on ne peut mettre plus de 16 concat() dans une expression
+typedef struct {
+ char buff[16][HTS_URLMAXSIZE*2*2];
+ int rol;
+} concat_strc;
+char* concat(const char* a,const char* b) {
+ concat_strc* strc;
+ NOSTATIC_RESERVE(strc, concat_strc, 1);
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpy(strc->buff[strc->rol],a);
+ if (b) strcat(strc->buff[strc->rol],b);
+ return strc->buff[strc->rol];
+}
+// conversion fichier / -> antislash
+#if HTS_DOSNAME
+char* __fconv(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='/') // convertir
+ a[i]='\\';
+ return a;
+}
+char* fconcat(char* a,char* b) {
+ return __fconv(concat(a,b));
+}
+char* fconv(char* a) {
+ return __fconv(concat(a,""));
+}
+#endif
+
+/* / et \\ en / */
+char* __fslash(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='\\') // convertir
+ a[i]='/';
+ return a;
+}
+char* fslash(char* a) {
+ return __fslash(concat(a,""));
+}
+
+// conversion minuscules, avec buffer
+char* convtolower(char* a) {
+ concat_strc* strc;
+ NOSTATIC_RESERVE(strc, concat_strc, 1);
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpy(strc->buff[strc->rol],a);
+ hts_lowcase(strc->buff[strc->rol]); // lower case
+ return strc->buff[strc->rol];
+}
+
+// conversion en minuscules
+void hts_lowcase(char* s) {
+ int i;
+ for(i=0;i<(int) strlen(s);i++)
+ if ((s[i]>='A') && (s[i]<='Z'))
+ s[i]+=('a'-'A');
+}
+
+// remplacer un caractère d'une chaîne dans une autre
+HTS_INLINE void hts_replace(char *s,char from,char to) {
+ char* a;
+ while ((a=strchr(s,from))!=NULL) {
+ *a=to;
+ }
+}
+
+
+// caractère espace, guillemets, CR, LF etc..
+/* SECTION OPTIMISEE:
+ #define is_space(c) (strchr(" \"\x0d\x0a\x09'",c)!=NULL)
+ #define is_realspace(c) (strchr(" \x0d\x0a\x09\x0c",c)!=NULL)
+*/
+/*
+HTS_INLINE int is_space(char c) {
+ if (c==' ') return 1; // spc
+ if (c=='"') return 1; // quote
+ if (c==10) return 1; // lf
+ if (c==13) return 1; // cr
+ if (c=='\'') return 1; // quote
+ //if (c=='`') return 1; // backquote << non
+ if (c==9) return 1; // tab
+ return 0;
+}
+*/
+
+// caractère espace, CR, LF, TAB
+/*
+HTS_INLINE int is_realspace(char c) {
+ if (c==' ') return 1; // spc
+ if (c==10) return 1; // lf
+ if (c==13) return 1; // cr
+ if (c==9) return 1; // tab
+ return 0;
+}
+*/
+
+
+
+
+
+// deviner type d'un fichier local..
+// ex: fil="toto.gif" -> s="image/gif"
+void guess_httptype(char *s,char *fil) {
+ get_httptype(s,fil,1);
+}
+// idem
+// flag: 1 si toujours renvoyer un type
+void get_httptype(char *s,char *fil,int flag) {
+ if (ishtml(fil)==1)
+ strcpy(s,"text/html");
+ else {
+ char *a=fil+strlen(fil)-1;
+ while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
+ if (*a=='.') {
+ int ok=0;
+ int j=0;
+ a++;
+ while( (!ok) && (strnotempty(hts_mime[j][1])) ) {
+ if (strfield2(hts_mime[j][1],a)) {
+ if (hts_mime[j][0][0]!='*') { // Une correspondance existe
+ strcpy(s,hts_mime[j][0]);
+ ok=1;
+ }
+ }
+ j++;
+ }
+
+ if (!ok) if (flag) sprintf(s,"application/%s",a);
+ } else {
+ if (flag) strcpy(s,"application/octet-stream");
+ }
+ }
+}
+
+// get type of fil (php)
+// s: buffer (text/html) or NULL
+// return: 1 if known by user
+int get_userhttptype(int setdefs,char *s,char *ext) {
+ char** buffer=NULL;
+ NOSTATIC_RESERVE(buffer, char*, 1);
+ if (setdefs) {
+ *buffer=s;
+ return 1;
+ } else {
+ if (s)
+ s[0]='\0';
+ if (!ext)
+ return 0;
+ if (*buffer) {
+ char search[1024];
+ char* detect;
+ sprintf(search,"\n%s=",ext); // php=text/html
+ detect=strstr(*buffer,search);
+ if (!detect) {
+ sprintf(search,"\n%s\n",ext); // php\ncgi=text/html
+ detect=strstr(*buffer,search);
+ }
+ if (detect) {
+ detect=strchr(detect,'=');
+ if (detect) {
+ detect++;
+ if (s) {
+ char* a;
+ a=strchr(detect,'\n');
+ if (a) {
+ strncat(s,detect,(int) (a - detect));
+ }
+ }
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+// renvoyer extesion d'un type mime..
+// ex: "image/gif" -> gif
+void give_mimext(char *s,char *st) {
+ int ok=0;
+ int j=0;
+ s[0]='\0';
+ while( (!ok) && (strnotempty(hts_mime[j][1])) ) {
+ if (strfield2(hts_mime[j][0],st)) {
+ if (hts_mime[j][1][0]!='*') { // Une correspondance existe
+ strcpy(s,hts_mime[j][1]);
+ ok=1;
+ }
+ }
+ j++;
+ }
+ // wrap "x" mimetypes, such as:
+ // application/x-mp3
+ // or
+ // application/mp3
+ if (!ok) {
+ int p;
+ char* a=NULL;
+ if ((p=strfield(st,"application/x-")))
+ a=st+p;
+ else if ((p=strfield(st,"application/")))
+ a=st+p;
+ if (a) {
+ if ((int)strlen(a) >= 1) {
+ if ((int)strlen(a) <= 4) {
+ strcpy(s,a);
+ ok=1;
+ }
+ }
+ }
+ }
+}
+// extension connue?..
+// 0 : non
+// 1 : oui
+// 2 : html
+int is_knowntype(char *fil) {
+ int j=0;
+ if (!fil)
+ return 0;
+ while(strnotempty(hts_mime[j][1])) {
+ if (strfield2(hts_mime[j][1],fil)) {
+ if (strfield2(hts_mime[j][0],"text/html"))
+ return 2;
+ else
+ return 1;
+ }
+ j++;
+ }
+
+ // Known by user?
+ return (is_userknowntype(fil));
+}
+// extension : html,gif..
+char* get_ext(char *fil) {
+ char* fil_noquery;
+ char *a=fil+strlen(fil)-1;
+ NOSTATIC_RESERVE(fil_noquery, char, HTS_URLMAXSIZE*2);
+
+ while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
+ if (*a=='.') {
+ fil_noquery[0]='\0';
+ a++; // pointer sur extension
+ strncat(fil_noquery,a,HTS_URLMAXSIZE);
+ a=strchr(fil_noquery,'?');
+ if (a)
+ *a='\0';
+ return concat(fil_noquery,"");
+ }
+ else
+ return "";
+}
+// known type?..
+// 0 : no
+// 1 : yes
+// 2 : html
+// setdefs : set mime buffer:
+// file=(char*) "asp=text/html\nphp=text/html\n"
+int is_userknowntype(char *fil) {
+ char mime[1024];
+ if (!fil)
+ return 0;
+ if (!strnotempty(fil))
+ return 0;
+ mime[0]='\0';
+ get_userhttptype(0,mime,fil);
+ if (!strnotempty(mime))
+ return 0;
+ else if (strfield2(mime,"text/html"))
+ return 2;
+ else
+ return 1;
+}
+
+// page dynamique?
+// is_dyntype(get_ext("foo.asp"))
+int is_dyntype(char *fil) {
+ int j=0;
+ if (!fil)
+ return 0;
+ if (!strnotempty(fil))
+ return 0;
+ while(strnotempty(hts_ext_dynamic[j])) {
+ if (strfield2(hts_ext_dynamic[j],fil)) {
+ return 1;
+ }
+ j++;
+ }
+ return 0;
+}
+
+// types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
+// connaissent pas le type
+int may_unknown(char* st) {
+ int j=0;
+ // types média
+ if (may_be_hypertext_mime(st))
+ return 1;
+ while(strnotempty(hts_mime_keep[j])) {
+ if (strfield2(hts_mime_keep[j],st)) { // trouvé
+ return 1;
+ }
+ j++;
+ }
+ return 0;
+}
+
+
+
+// -- Utils fichiers
+
+// pretty print for i/o
+void fprintfio(FILE* fp,char* buff,char* prefix) {
+ char nl=1;
+ while(*buff) {
+ switch(*buff) {
+ case 13: break;
+ case 10:
+ fprintf(fp,"\r\n");
+ nl=1;
+ break;
+ default:
+ if (nl)
+ fprintf(fp,prefix);
+ nl=0;
+ fputc(*buff,fp);
+ }
+ buff++;
+ }
+}
+
+/* Le fichier existe-t-il? (ou est-il accessible?) */
+int fexist(char* s) {
+ FILE* fp;
+ if (strnotempty(s)==0) // nom vide: non trouvé
+ return 0;
+ fp=fopen(fconv(s),"rb");
+ if (fp!=NULL) fclose(fp);
+ return (fp!=NULL);
+}
+
+/* Taille d'un fichier, -1 si n'existe pas */
+/* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */
+/* Note: NOT YET READY FOR 64-bit */
+//LLint fsize(char* s) {
+int fsize(char* s) {
+ /*
+#if HTS_WIN
+ HANDLE hFile;
+ DWORD dwSizeHigh = 0;
+ DWORD dwSizeLow = 0;
+ hFile = CreateFile(s,0,0,NULL,OPEN_EXISTING,0,NULL);
+ if (hFile) {
+ dwSizeLow = GetFileSize (hFile, & dwSizeHigh) ;
+ CloseHandle(hFile);
+ if (dwSizeLow != 0xFFFFFFFF)
+ return (dwSizeLow & (dwSizeHigh<<32));
+ else
+ return -1;
+ } else
+ return -1;
+#else
+ */
+ FILE* fp;
+ if (strnotempty(s)==0) // nom vide: erreur
+ return -1;
+ fp=fopen(fconv(s),"rb");
+ if (fp!=NULL) {
+ int i;
+ fseek(fp,0,SEEK_END);
+ i=ftell(fp);
+ fclose(fp);
+ return i;
+ } else return -1;
+ /*
+#endif
+ */
+}
+
+int fpsize(FILE* fp) {
+ int oldpos,size;
+ if (!fp)
+ return -1;
+ oldpos=ftell(fp);
+ fseek(fp,0,SEEK_END);
+ size=ftell(fp);
+ fseek(fp,oldpos,SEEK_SET);
+ return size;
+}
+
+/* root dir, with ending / */
+typedef struct {
+ char path[1024+4];
+ int init;
+} hts_rootdir_strc;
+char* hts_rootdir(char* file) {
+ static hts_rootdir_strc strc = {"", 0};
+ //NOSTATIC_RESERVE(strc, hts_rootdir_strc, 1);
+ if (file) {
+ if (!strc.init) {
+ strc.path[0]='\0';
+ strc.init=1;
+ if (strnotempty(file)) {
+ char* a;
+ strcpy(strc.path,file);
+ while((a=strrchr(strc.path,'\\'))) *a='/';
+ if ((a=strrchr(strc.path,'/'))) {
+ *(a+1)='\0';
+ } else
+ strc.path[0]='\0';
+ }
+ if (!strnotempty(strc.path)) {
+ if( getcwd( strc.path, 1024 ) == NULL )
+ strc.path[0]='\0';
+ else
+ strcat(strc.path,"/");
+ }
+ }
+ return NULL;
+ } else if (strc.init)
+ return strc.path;
+ else
+ return "";
+}
+
+
+
+hts_stat_struct HTS_STAT;
+//
+// return number of downloadable bytes, depending on rate limiter
+// see engine_stats() routine, too
+// this routine works quite well for big files and regular ones, but apparently the rate limiter has
+// some problems with very small files (rate too high)
+LLint check_downloadable_bytes(int rate) {
+ if (rate>0) {
+ TStamp time_now;
+ TStamp elapsed_useconds;
+ LLint bytes_transfered_during_period;
+ LLint left;
+
+ // get the older timer
+ int id_timer = (HTS_STAT.istat_idlasttimer + 1) % 2;
+
+ time_now=mtime_local();
+ elapsed_useconds = time_now - HTS_STAT.istat_timestart[id_timer];
+ // NO totally stupid - elapsed_useconds+=1000; // for the next second, too
+ bytes_transfered_during_period = (HTS_STAT.HTS_TOTAL_RECV-HTS_STAT.istat_bytes[id_timer]);
+
+ left = ((rate * elapsed_useconds)/1000) - bytes_transfered_during_period;
+ if (left <= 0)
+ left = 0;
+
+ return left;
+ } else
+ return TAILLE_BUFFER;
+}
+
+//
+// 0 : OK
+// 1 : slow down
+#if 0
+int HTS_TOTAL_RECV_CHECK(int var) {
+ if (HTS_STAT.HTS_TOTAL_RECV_STATE)
+ return 1;
+ /*
+ {
+ if (HTS_STAT.HTS_TOTAL_RECV_STATE==3) {
+ var = min(var,32);
+ Sleep(250);
+ } else if (HTS_STAT.HTS_TOTAL_RECV_STATE==2) {
+ var = min(var,256);
+ Sleep(100);
+ } else {
+ var/=2;
+ if (var<=0) var=1;
+ Sleep(50);
+ }
+ }
+ */
+ return 0;
+}
+#endif
+
+// Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk)
+// >0 : data received
+// == 0 : not yet data
+// <0 : no more data or error
+HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
+ int retour;
+ // return read(soc,buff,size);
+ if (r->is_file) {
+#if HTS_WIDE_DEBUG
+ DEBUG_W("read\n");
+#endif
+ if (r->fp)
+ retour=fread(buff,1,size,r->fp);
+ else
+ retour=-1;
+ } else {
+#if HTS_WIDE_DEBUG
+ DEBUG_W("recv\n");
+ if (r->soc==INVALID_SOCKET)
+ printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n");
+#endif
+ //HTS_TOTAL_RECV_CHECK(size); // Diminuer au besoin si trop de données reçues
+#if HTS_USEOPENSSL
+ if (r->ssl) {
+ retour = SSL_read(r->ssl_con, buff, size);
+ if (retour <= 0) {
+ int err_code = SSL_get_error(r->ssl_con, retour);
+ if (
+ (err_code == SSL_ERROR_WANT_READ)
+ ||
+ (err_code == SSL_ERROR_WANT_WRITE)
+ )
+ {
+ retour = 0; /* no data yet (ssl cache) */
+ } else {
+ retour = -1; /* eof or error */
+ }
+ }
+ } else {
+#endif
+ retour=recv(r->soc,buff,size,0);
+ }
+ if (retour > 0) // compter flux entrant
+ HTS_STAT.HTS_TOTAL_RECV+=retour;
+#if HTS_USEOPENSSL
+ }
+#endif
+#if HTS_WIDE_DEBUG
+ DEBUG_W("recv/read done\n");
+#endif
+ return retour;
+}
+
+
+// -- Gestion cache DNS --
+// 'RX98
+#if HTS_DNSCACHE
+
+// 'capsule' contenant uniquement le cache
+t_dnscache* _hts_cache(void) {
+ t_dnscache* cache;
+ NOSTATIC_RESERVE(cache, t_dnscache, 1);
+ return cache;
+}
+
+// lock le cache dns pour tout opération d'ajout
+// plus prudent quand plusieurs threads peuvent écrire dedans..
+// -1: status? 0: libérer 1:locker
+
+/*
+ Simple lock function for cache
+
+ Return value: always 0
+ Parameter:
+ 1 wait for lock (mutex) available and lock it
+ 0 unlock the mutex
+ [-1 check if locked (always return 0 with mutex)]
+ -999 initialize
+*/
+#if USE_BEGINTHREAD
+int _hts_lockdns(int i) {
+ static PTHREAD_LOCK_TYPE hMutex;
+ return htsSetLock(&hMutex,i);
+}
+#else
+int _hts_lockdns(int i) {
+ int l=0;
+ if (i>=0)
+ l=i;
+ return l;
+}
+#endif
+
+// routine pour le cache - retour optionnel à donner à chaque fois
+// NULL: nom non encore testé dans le cache
+// si h_length==0 alors le nom n'existe pas dans le dns
+t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour) {
+ // attendre que le cache dns soit prêt
+ while(_hts_lockdns(-1)); // attendre libération
+ _hts_lockdns(1); // locker
+
+ while(1) {
+ if (strcmp(cache->iadr,iadr)==0) { // ok trouvé
+ if (cache->host_length>0) { // entrée valide
+ if (retour->h_addr_list[0])
+ memcpy(retour->h_addr_list[0], cache->host_addr, cache->host_length);
+ retour->h_length=cache->host_length;
+ } else if (cache->host_length==0) { // en cours
+ _hts_lockdns(0); // délocker
+ return NULL;
+ } else { // erreur dans le dns, déja vérifié
+ if (retour->h_addr_list[0])
+ retour->h_addr_list[0][0]='\0';
+ retour->h_length=0; // erreur, n'existe pas
+ }
+ _hts_lockdns(0); // délocker
+ return retour;
+ } else { // on a pas encore trouvé
+ if (cache->n!=NULL) { // chercher encore
+ cache=cache->n; // suivant!
+ } else {
+ _hts_lockdns(0); // délocker
+ return NULL; // non présent
+ }
+ }
+ }
+}
+
+// tester si iadr a déja été testé (ou en cours de test)
+// 0 non encore
+// 1 ok
+// 2 non présent
+int hts_dnstest(char* _iadr) {
+ char* iadr;
+ t_dnscache* cache=_hts_cache(); // adresse du cache
+ NOSTATIC_RESERVE(iadr, char, HTS_URLMAXSIZE*2);
+
+ // sauter user:pass@ éventuel
+ strcpy(iadr,jump_identification(_iadr));
+ // couper éventuel :
+ {
+ char *a;
+ if ( (a=jump_toport(iadr)) )
+ *a='\0';
+ }
+
+#if HTS_WIN
+ if (inet_addr(iadr)!=INADDR_NONE) // numérique
+#else
+ if (inet_addr(iadr)!=(in_addr_t) -1 ) // numérique
+#endif
+ return 1;
+
+ while(_hts_lockdns(-1)); // attendre libération
+ _hts_lockdns(1); // locker
+ while(1) {
+ if (strcmp(cache->iadr,iadr)==0) { // ok trouvé
+ _hts_lockdns(0); // délocker
+ return 1; // présent!
+ } else { // on a pas encore trouvé
+ if (cache->n!=NULL) { // chercher encore
+ cache=cache->n; // suivant!
+ } else {
+ _hts_lockdns(0); // délocker
+ return 2; // non présent
+ }
+ }
+ }
+}
+
+
+t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
+ t_fullhostent* buffer = (t_fullhostent*) v_buffer;
+ /* Clear */
+ fullhostent_init(buffer);
+
+ /* Protection */
+ if (!strnotempty(hostname)) {
+ return NULL;
+ }
+
+ /*
+ Strip [] if any : [3ffe:b80:1234:1::1]
+ The resolver doesn't seem to handle IP6 addresses in brackets
+ */
+ if ((hostname[0] == '[') && (hostname[strlen(hostname)-1] == ']')) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo, hostname+1, strlen(hostname)-2);
+ strcpy(hostname, tempo);
+ }
+
+ {
+#if HTS_INET6==0
+ /*
+ ipV4 resolver
+ */
+ t_hostent* hp=gethostbyname(hostname);
+ if (hp!=NULL) {
+ if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) {
+ memcpy(buffer->hp.h_addr_list[0], hp->h_addr_list[0], hp->h_length);
+ buffer->hp.h_length = hp->h_length;
+ return &(buffer->hp);
+ }
+ }
+#else
+ /*
+ ipV6 resolver
+ */
+ /*
+ int error_num=0;
+ t_hostent* hp=getipnodebyname(hostname, AF_INET6, AI_DEFAULT, &error_num);
+ oops, deprecated :(
+ */
+ struct addrinfo* res = NULL;
+ struct addrinfo hints;
+ memset(&hints, 0, sizeof(hints));
+ if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
+ hints.ai_family = PF_INET;
+ else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
+ hints.ai_family = PF_INET6;
+ else // V4 + V6
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+ if (getaddrinfo(hostname, NULL, &hints, &res) == 0) {
+ if (res) {
+ if ( (res->ai_addr) && (res->ai_addrlen) && (res->ai_addrlen <= buffer->addr_maxlen) ) {
+ memcpy(buffer->hp.h_addr_list[0], res->ai_addr, res->ai_addrlen);
+ buffer->hp.h_length = res->ai_addrlen;
+ freeaddrinfo(res);
+ return &(buffer->hp);
+ }
+ }
+ }
+ if (res) {
+ freeaddrinfo(res);
+ }
+
+#endif
+ }
+ return NULL;
+}
+
+// cache dns interne à HTS // ** FREE A FAIRE sur la chaine
+t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) {
+ char iadr[HTS_URLMAXSIZE*2];
+ t_fullhostent* buffer = (t_fullhostent*) v_buffer;
+ t_dnscache* cache=_hts_cache(); // adresse du cache
+ t_hostent* hp;
+
+ /* Clear */
+ fullhostent_init(buffer);
+
+ strcpy(iadr,jump_identification(_iadr));
+ // couper éventuel :
+ {
+ char *a;
+ if ( (a=jump_toport(iadr)) )
+ *a='\0';
+ }
+
+ // effacer structure de retour, créer nouvelle
+ /*
+ memset(&host, 0, sizeof(t_hostent));
+ host.h_addr_list=he;
+ he[0]=NULL;
+ he[1]=NULL;
+ host.h_length=0;
+ */
+ cache->iadr[0]='*';
+ cache->iadr[1]='\0';
+
+ /* get IP from the dns cache */
+ hp = _hts_ghbn(cache, iadr, &buffer->hp);
+ if (hp) {
+ if (hp->h_length>0)
+ return hp;
+ else
+ return NULL; // entrée erronée (erreur DNS) dans le DNS
+ } else { // non présent dans le cache dns, tester
+ t_dnscache* c=cache;
+ while(c->n) c=c->n; // calculer queue
+
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname\n");
+#endif
+#if HDEBUG
+ printf("gethostbyname (not in cache)\n");
+#endif
+ {
+ unsigned long inetaddr;
+#if HTS_WIN
+ if ((inetaddr=inet_addr(iadr))==INADDR_NONE) {
+#else
+ if ((inetaddr=inet_addr(iadr))==(in_addr_t) -1 ) {
+#endif
+#if DEBUGDNS
+ printf("resolving (not cached) %s\n",iadr);
+#endif
+ hp=vxgethostbyname(iadr, buffer); // calculer IP host
+ } else { // numérique, convertir sans passer par le dns
+ buffer->hp.h_addr_list[0]=(char*) &inetaddr;
+ buffer->hp.h_length=4;
+ hp=&buffer->hp;
+ }
+ }
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname done\n");
+#endif
+ cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache));
+ if (cache->n!=NULL) {
+ strcpy(cache->n->iadr,iadr);
+ if (hp!=NULL) {
+ memcpy(cache->n->host_addr, hp->h_addr_list[0], hp->h_length);
+ cache->n->host_length=hp->h_length;
+ } else {
+ cache->n->host_addr[0]='\0';
+ cache->n->host_length=0; // non existant dans le dns
+ }
+ cache->n->n=NULL;
+ return hp;
+ } else { // on peut pas noter, mais on peut renvoyer le résultat
+ return hp;
+ }
+ } // retour hp du cache
+}
+
+#else
+HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) {
+ t_hostent* retour;
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname (2)\n");
+#endif
+#if DEBUGDNS
+ printf("blocking method gethostbyname() in progress for %s\n",iadr);
+#endif
+ retour=vxgethostbyname(jump_identification(iadr), );
+#if HTS_WIDE_DEBUG
+ DEBUG_W("gethostbyname (2) done\n");
+#endif
+ return retour;
+}
+#endif
+
+
+// --- Tracage des mallocs() ---
+#if HTS_TRACE_MALLOC
+typedef struct _mlink {
+ void* adr;
+ int len;
+ int id;
+ struct _mlink* next;
+} mlink;
+mlink trmalloc = {NULL,0,0,NULL};
+int trmalloc_id=0;
+
+HTS_INLINE void* hts_malloc(size_t len,size_t len2) {
+ mlink* lnk = (mlink*) calloc(1,sizeof(mlink));
+ void* r = NULL;
+ if (lnk) {
+ if (len2)
+ r = calloc(len,len2);
+ else
+ r = malloc(len);
+ if (r) {
+ lnk->adr=r;
+ if (len2)
+ lnk->len=len*len2;
+ else
+ lnk->len=len;
+ lnk->id=trmalloc_id++;
+ lnk->next=trmalloc.next;
+ trmalloc.next=lnk;
+#if MEMDEBUG
+ //printf("malloc: %d\n",r);
+#endif
+ } else free(lnk);
+ }
+ return r;
+}
+HTS_INLINE void hts_free(void* adr) {
+ mlink* lnk = &trmalloc;
+ if (!adr) {
+#if MEMDEBUG
+ printf("* unexpected free() error at %d\n",adr);
+#endif
+ return;
+ }
+ do {
+ if (lnk->next->adr==adr) {
+ mlink* blk_free=lnk->next;
+#if 1
+ lnk->next=lnk->next->next;
+ free((void*) blk_free);
+#else
+#if MEMDEBUG
+ if (blk_free->id==-1) {
+ printf("* memory has already been freed: %d (id=%d)\n",blk_free->adr,blk_free->id);
+ }
+#endif
+ blk_free->id=-1;
+#endif
+ free(adr);
+#if MEMDEBUG
+ //printf("free: %d (id=%d)\n",blk_free->adr,blk_free->id);
+#endif
+ return;
+ }
+ lnk=lnk->next;
+ } while(lnk->next != NULL);
+#if MEMDEBUG
+ printf("* unexpected free() error at %d\n",adr);
+#endif
+ free(adr);
+}
+HTS_INLINE void* hts_realloc(void* adr,size_t len) {
+ mlink* lnk = &trmalloc;
+ do {
+ if (lnk->next->adr==adr) {
+ adr = realloc(adr,len);
+ lnk->next->adr = adr;
+ lnk->next->len = len;
+#if MEMDEBUG
+ //printf("realloc: %d (id=%d)\n",lnk->next->adr,lnk->next->id);
+#endif
+ return adr;
+ }
+ lnk=lnk->next;
+ } while(lnk->next != NULL);
+#if MEMDEBUG
+ printf("* unexpected realloc() error at %d\n",adr);
+#endif
+ return realloc(adr,len);
+}
+// check the malloct() and calloct() trace stack
+void hts_freeall(void) {
+ while(trmalloc.next) {
+#if MEMDEBUG
+ printf("* block %d\t not released: at %d\t (%d\t bytes)\n",trmalloc.next->id,trmalloc.next->adr,trmalloc.next->len);
+#endif
+ if (trmalloc.next->id != -1) {
+ freet(trmalloc.next->adr);
+ }
+ }
+}
+#endif
+
+
+// -- divers //
+
+// cut path and project name
+// patch also initial path
+void cut_path(char* fullpath,char* path,char* pname) {
+ path[0]=pname[0]='\0';
+ if (strnotempty(fullpath)) {
+ if ((fullpath[strlen(fullpath)-1]=='/') || (fullpath[strlen(fullpath)-1]=='\\'))
+ fullpath[strlen(fullpath)-1]='\0';
+ if (strlen(fullpath)>1) {
+ char* a;
+ while( (a=strchr(fullpath,'\\')) ) *a='/'; // remplacer par /
+ a=fullpath+strlen(fullpath)-2;
+ while( (*a!='/') && ( a > fullpath)) a--;
+ if (*a=='/') a++;
+ strcpy(pname,a);
+ strncat(path,fullpath,(int) (a - fullpath));
+ }
+ }
+}
+
+
+
+// -- Gestion protocole ftp --
+
+#if HTS_WIN
+int ftp_available(void) {
+ return 1;
+}
+#else
+int ftp_available(void) {
+ return 1; // ok!
+ //return 0; // SOUS UNIX, PROBLEMES
+}
+#endif
+
+
+
+int hts_init(void) {
+ static int hts_init_ok = 0;
+ if (!hts_init_ok) {
+ hts_init_ok = 1;
+ // default wrappers
+ htswrap_init();
+ htswrap_add("init",htsdefault_init);
+ htswrap_add("free",htsdefault_uninit);
+ htswrap_add("start",htsdefault_start);
+ htswrap_add("change-options",htsdefault_chopt);
+ htswrap_add("end",htsdefault_end);
+ htswrap_add("check-html",htsdefault_checkhtml);
+ htswrap_add("loop",htsdefault_loop);
+ htswrap_add("query",htsdefault_query);
+ htswrap_add("query2",htsdefault_query2);
+ htswrap_add("query3",htsdefault_query3);
+ htswrap_add("check-link",htsdefault_check);
+ htswrap_add("pause",htsdefault_pause);
+ htswrap_add("save-file",htsdefault_filesave);
+ htswrap_add("link-detected",htsdefault_linkdetected);
+ htswrap_add("transfer-status",htsdefault_xfrstatus);
+ htswrap_add("save-name",htsdefault_savename);
+ }
+
+#if HTS_USEOPENSSL
+ /*
+ Initialize the OpensSSL library
+ */
+ if (!openssl_ctx) {
+ SSL_library_init();
+ SSL_load_error_strings();
+ ERR_load_crypto_strings();
+ ERR_load_SSL_strings();
+ SSLeay_add_ssl_algorithms();
+ // OpenSSL_add_all_algorithms();
+ openssl_ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!openssl_ctx) {
+ fprintf(stderr, "fatal: unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)\n");
+ abort();
+ }
+ }
+#endif
+
+ /* Init vars and thread-specific values */
+ hts_initvar();
+
+ return 1;
+}
+int hts_uninit(void) {
+ hts_freevar();
+ /* htswrap_free(); */
+ return 1;
+}
+
+// defaut wrappers
+void __cdecl htsdefault_init(void) {
+}
+void __cdecl htsdefault_uninit(void) {
+ hts_freevar();
+}
+int __cdecl htsdefault_start(void* opt) {
+ return 1;
+}
+int __cdecl htsdefault_chopt(void* opt) {
+ return 1;
+}
+int __cdecl htsdefault_end(void) {
+ return 1;
+}
+int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
+ return 1;
+}
+char* __cdecl htsdefault_query(char* question) {
+ return "";
+}
+char* __cdecl htsdefault_query2(char* question) {
+ return "";
+}
+char* __cdecl htsdefault_query3(char* question) {
+ return "";
+}
+int __cdecl htsdefault_check(char* adr,char* fil,int status) {
+ return -1;
+}
+void __cdecl htsdefault_pause(char* lockfile) {
+ while (fexist(lockfile)) {
+ Sleep(1000);
+ }
+}
+void __cdecl htsdefault_filesave(char* file) {
+}
+int __cdecl htsdefault_linkdetected(char* link) {
+ return 1;
+}
+int __cdecl htsdefault_xfrstatus(void* back) {
+ return 1;
+}
+int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+ return 1;
+}
+// end defaut wrappers
+
+
+
+// Fin
+
diff --git a/src/htslib.h b/src/htslib.h
new file mode 100644
index 0000000..9b2aca3
--- /dev/null
+++ b/src/htslib.h
@@ -0,0 +1,339 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Subroutines .h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier librairie .h
+
+#ifndef HTS_DEFH
+#define HTS_DEFH
+
+/* définitions globales */
+#include "htsglobal.h"
+
+/* basic net definitions */
+#include "htsbasenet.h"
+
+/* cookies et auth */
+#include "htsbauth.h"
+
+// Attention, définition existante également dans le shell
+// (à modifier avec celle-ci)
+#define POSTTOK "?>post"
+
+#include <stdio.h>
+
+#include "htsopt.h"
+
+// structure pour paramètres supplémentaires lors de la requête
+typedef struct {
+ short int user_agent_send; // user agent (ex: httrack/1.0 [sun])
+ short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0
+ short int range_used; // Range utilisé
+ short int nocompression; // Pas de compression
+ char user_agent[64];
+ char lang_iso[64];
+ t_proxy proxy; // proxy
+} htsrequest;
+
+
+// structure pour retour d'une connexion/prise d'en tête
+typedef struct {
+ int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
+ short int notmodified; // page ou fichier NON modifié (transféré)
+ short int is_write; // sortie sur disque (out) ou en mémoire (adr)
+ short int is_chunk; // mode chunk
+ short int compressed; // compressé?
+ char* adr; // adresse du bloc de mémoire, NULL=vide
+ FILE* out; // écriture directe sur disque (si is_write=1)
+ LLint size; // taille fichier
+ char msg[80]; // message éventuel si échec ("\0"=non précisé)
+ char contenttype[64]; // content-type ("text/html" par exemple)
+ char contentencoding[64]; // content-encoding ("gzip" par exemple)
+ char* location; // on copie dedans éventuellement la véritable 'location'
+ LLint totalsize; // taille totale à télécharger (-1=inconnue)
+ short int is_file; // ce n'est pas une socket mais un descripteur de fichier si 1
+ T_SOC soc; // ID socket
+ FILE* fp; // fichier pour file://
+#if HTS_USEOPENSSL
+ short int ssl; // is this connection a SSL one? (https)
+ // BIO* ssl_soc; // SSL structure
+ SSL * ssl_con; // connection structure
+#endif
+ char lastmodified[64]; // Last-Modified
+ char etag[64]; // Etag
+ char cdispo[256]; // Content-Disposition coupé
+ LLint crange; // Content-Range
+ /* */
+ htsrequest req; // paramètres pour la requête
+ /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/
+} htsblk;
+
+
+/* ANCIENNE STURCTURE pour cache 1.0 */
+typedef struct {
+ int statuscode; // ANCIENNE STURCTURE - status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945)
+ int notmodified; // ANCIENNE STURCTURE - page ou fichier NON modifié (transféré)
+ int is_write; // ANCIENNE STURCTURE - sortie sur disque (out) ou en mémoire (adr)
+ char* adr; // ANCIENNE STURCTURE - adresse du bloc de mémoire, NULL=vide
+ FILE* out; // ANCIENNE STURCTURE - écriture directe sur disque (si is_write=1)
+ int size; // ANCIENNE STURCTURE - taille fichier
+ char msg[80]; // ANCIENNE STURCTURE - message éventuel si échec ("\0"=non précisé)
+ char contenttype[64]; // ANCIENNE STURCTURE - content-type ("text/html" par exemple)
+ char* location; // ANCIENNE STURCTURE - on copie dedans éventuellement la véritable 'location'
+ int totalsize; // ANCIENNE STURCTURE - taille totale à télécharger (-1=inconnue)
+ int is_file; // ANCIENNE STURCTURE - ce n'est pas une socket mais un descripteur de fichier si 1
+ T_SOC soc; // ANCIENNE STURCTURE - ID socket
+ FILE* fp; // ANCIENNE STURCTURE - fichier pour file://
+ t_proxy proxy; // ANCIENNE STURCTURE - proxy
+ int user_agent_send; // ANCIENNE STURCTURE - user agent (ex: httrack/1.0 [sun])
+ char user_agent[64];
+ int http11; // ANCIENNE STURCTURE - l'en tête doit être signé HTTP/1.1 et non HTTP/1.0
+} OLD_htsblk;
+/* fin ANCIENNE STURCTURE pour cache 1.0 */
+
+// cache pour le dns, pour éviter de faire des gethostbyname sans arrêt
+typedef struct t_dnscache {
+ char iadr[1024];
+ struct t_dnscache* n;
+ char host_addr[HTS_MAXADDRLEN]; // 4 octets (v4), ou 16 octets (v6)
+ int host_length; // 4 normalement - ==0 alors en cours de résolution
+ // ou >16 si sockaddr
+ // ==-1 alors erreur (host n'éxiste pas)
+} t_dnscache;
+
+
+
+
+/*
+#ifdef __cplusplus
+extern "C" {
+#endif
+*/
+
+// fonctions unix/winsock
+int hts_read(htsblk* r,char* buff,int size);
+//int HTS_TOTAL_RECV_CHECK(int var);
+LLint check_downloadable_bytes(int rate);
+
+int hts_init(void);
+int hts_uninit(void);
+
+
+// fonctions principales
+int http_fopen(char* adr,char* fil,htsblk* retour);
+int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour);
+int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour);
+htsblk httpget(char* url);
+//int newhttp(char* iadr,char* err=NULL);
+int newhttp(char* iadr,htsblk* retour,int port,int waitconnect);
+HTS_INLINE void deletehttp(htsblk* r);
+HTS_INLINE void deletesoc(T_SOC soc);
+HTS_INLINE void deletesoc_r(htsblk* r);
+htsblk http_location(char* adr,char* fil,char* loc);
+htsblk http_test(char* adr,char* fil,char* loc);
+int check_readinput(htsblk* r);
+void http_fread(T_SOC soc,htsblk* retour);
+LLint http_fread1(htsblk* r);
+void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd);
+void treatfirstline(htsblk* retour,char* rcvd);
+void infostatuscode(char* msg,int statuscode);
+
+// sous-fonctions
+htsblk xhttpget(char* adr,char* fil);
+htsblk http_gethead(char* adr,char* fil);
+LLint http_xfread1(htsblk* r,int bufl);
+HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, void* v_buffer);
+t_hostent* vxgethostbyname(char* hostname, void* v_buffer);
+t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour);
+int ftp_available(void);
+#if HTS_DNSCACHE
+int hts_dnstest(char* _iadr);
+t_dnscache* _hts_cache(void);
+int _hts_lockdns(int i);
+#endif
+
+// outils divers
+HTS_INLINE TStamp time_local(void);
+HTS_INLINE TStamp mtime_local(void);
+void sec2str(char *s,TStamp t);
+void qsec2str(char *st,TStamp t);
+void time_gmt_rfc822(char* s);
+void time_local_rfc822(char* s);
+struct tm* convert_time_rfc822(char* s);
+int set_filetime(char* file,struct tm* tm_time);
+int set_filetime_rfc822(char* file,char* date);
+HTS_INLINE void time_rfc822(char* s,struct tm * A);
+HTS_INLINE void time_rfc822_local(char* s,struct tm * A);
+char* int2char(int n);
+char* int2bytes(LLint n);
+char* int2bytessec(long int n);
+char** int2bytes2(LLint n);
+HTS_INLINE int sendc(htsblk* r, char* s);
+void finput(int fd,char* s,int max);
+int binput(char* buff,char* s,int max);
+int linput(FILE* fp,char* s,int max);
+int linput_trim(FILE* fp,char* s,int max);
+int linput_cpp(FILE* fp,char* s,int max);
+void rawlinput(FILE* fp,char* s,int max);
+int strfield(const char* f,const char* s);
+#define strfield2(f,s) ( (strlen(f)!=strlen(s)) ? 0 : (strfield(f,s)) )
+char* strstrcase(char *s,char *o);
+int ident_url_absolute(char* url,char* adr,char* fil);
+void fil_simplifie(char* f);
+int is_unicode_utf8(unsigned char* buffer, unsigned int size);
+void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map);
+int ishtml(char* urlfil);
+int ishtml_ext(char* a);
+int ishttperror(int err);
+void guess_httptype(char *s,char *fil);
+void get_httptype(char *s,char *fil,int flag);
+int get_userhttptype(int setdefs,char *s,char *ext);
+void give_mimext(char *s,char *st);
+int is_knowntype(char *fil);
+int is_userknowntype(char *fil);
+int is_dyntype(char *fil);
+char* get_ext(char *fil);
+int may_unknown(char* st);
+char* jump_identification(char*);
+char* jump_toport(char*);
+char* strrchr_limit(char* s, char c, char* limit);
+HTS_INLINE char* jump_protocol(char* source);
+void code64(char* a,char* b);
+void unescape_amp(char* s);
+void escape_spc_url(char* s);
+void escape_in_url(char* s);
+void escape_uri(char* s);
+void escape_uri_utf(char* s);
+void escape_check_url(char* s);
+char* escape_check_url_addr(char* s);
+void x_escape_http(char* s,int mode);
+HTS_INLINE int ehexh(char c);
+char* unescape_http(char* s);
+char* unescape_http_unharm(char* s, int no_high);
+char* antislash_unescaped(char* s);
+int ehex(char* s);
+char* concat(const char* a,const char* b);
+#define copychar(a) concat((a),NULL)
+#if HTS_DOSNAME
+char* fconcat(char* a,char* b);
+char* fconv(char* a);
+#else
+#define fconv(a) (a)
+#define fconcat(a,b) concat(a,b)
+#endif
+char* fslash(char* a);
+char* __fslash(char* a);
+
+char* convtolower(char* a);
+char* concat(const char* a,const char* b);
+void hts_lowcase(char* s);
+void hts_replace(char *s,char from,char to);
+
+/* Spaces: CR,LF,TAB,FF */
+#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)=='\'') )
+#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) )
+#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
+#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
+//HTS_INLINE int is_space(char);
+//HTS_INLINE int is_realspace(char);
+
+void fprintfio(FILE* fp,char* buff,char* prefix);
+
+#if HTS_WIN
+#else
+int sig_ignore_flag( int setflag ); // flag ignore
+#endif
+
+void cut_path(char* fullpath,char* path,char* pname);
+int fexist(char* s);
+/*LLint fsize(char* s); */
+int fpsize(FILE* fp);
+int fsize(char* s);
+/* root dir */
+char* hts_rootdir(char* file);
+
+// Threads
+#if USE_PTHREAD
+typedef void* ( *beginthread_type )( void * );
+unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist );
+#endif
+
+/*
+#ifdef __cplusplus
+}
+#endif
+*/
+
+
+
+/* variables globales */
+//extern LLint HTS_TOTAL_RECV; // flux entrant reçu
+//extern int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
+extern hts_stat_struct HTS_STAT;
+extern int _DEBUG_HEAD;
+extern FILE* ioinfo;
+
+/* constantes */
+extern const char hts_mime_keep[][32];
+extern const char hts_mime[][2][32];
+extern const char hts_detect[][32];
+extern const char hts_detectbeg[][32];
+extern const char hts_nodetect[][32];
+extern const char hts_detectURL[][32];
+extern const char hts_detectandleave[][32];
+extern const char hts_detect_js[][32];
+
+// defaut wrappers
+void __cdecl htsdefault_init(void);
+void __cdecl htsdefault_uninit(void);
+int __cdecl htsdefault_start(void* opt);
+int __cdecl htsdefault_chopt(void* opt);
+int __cdecl htsdefault_end(void);
+int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+char* __cdecl htsdefault_query(char* question);
+char* __cdecl htsdefault_query2(char* question);
+char* __cdecl htsdefault_query3(char* question);
+int __cdecl htsdefault_check(char* adr,char* fil,int status);
+void __cdecl htsdefault_pause(char* lockfile);
+void __cdecl htsdefault_filesave(char*);
+int __cdecl htsdefault_linkdetected(char* link);
+int __cdecl htsdefault_xfrstatus(void* back);
+int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+// end defaut wrappers
+
+#endif
+
+
diff --git a/src/htsmd5.c b/src/htsmd5.c
new file mode 100644
index 0000000..47242d8
--- /dev/null
+++ b/src/htsmd5.c
@@ -0,0 +1,76 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsmd5.c subroutines: */
+/* generate a md5 hash */
+/* */
+/* Written March 1993 by Branko Lankester */
+/* Modified June 1993 by Colin Plumb for altered md5.c. */
+/* Modified October 1995 by Erik Troan for RPM */
+/* Modified 2000 by Xavier Roche for domd5mem */
+/* ------------------------------------------------------------ */
+
+#include "htsmd5.h"
+#include "md5.h"
+#include <string.h>
+#include <stdio.h>
+
+int domd5mem(unsigned char * buf, int len,
+ unsigned char * digest, int asAscii) {
+ int endian = 1;
+ unsigned char bindigest[16];
+ MD5_CTX ctx;
+
+ MD5Init(&ctx, * ( (char*) &endian));
+ MD5Update(&ctx, buf, len);
+ MD5Final(bindigest, &ctx);
+
+ if (!asAscii) {
+ memcpy(digest, bindigest, 16);
+ } else {
+ sprintf(digest, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+ "%02x%02x%02x%02x%02x",
+ bindigest[0], bindigest[1], bindigest[2], bindigest[3],
+ bindigest[4], bindigest[5], bindigest[6], bindigest[7],
+ bindigest[8], bindigest[9], bindigest[10], bindigest[11],
+ bindigest[12], bindigest[13], bindigest[14], bindigest[15]);
+
+ }
+
+ return 0;
+}
+
+unsigned long int md5sum32(char* buff) {
+ char digest[16];
+ domd5mem(buff,strlen(buff),digest,0);
+ return *( (long int*)(char*)digest );
+}
diff --git a/src/htsmd5.h b/src/htsmd5.h
new file mode 100644
index 0000000..84148bd
--- /dev/null
+++ b/src/htsmd5.h
@@ -0,0 +1,52 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsmd5.c subroutines: */
+/* generate a md5 hash */
+/* */
+/* Written March 1993 by Branko Lankester */
+/* Modified June 1993 by Colin Plumb for altered md5.c. */
+/* Modified October 1995 by Erik Troan for RPM */
+/* Modified 2000 by Xavier Roche for domd5mem */
+/* ------------------------------------------------------------ */
+
+#ifndef HTSMD5_DEFH
+#define HTSMD5_DEFH
+
+int domd5mem(unsigned char * buf, int len,
+ unsigned char * digest, int asAscii);
+unsigned long int md5sum32(char* buff);
+
+#endif
+
+
+
diff --git a/src/htsname.c b/src/htsname.c
new file mode 100644
index 0000000..2df0c98
--- /dev/null
+++ b/src/htsname.c
@@ -0,0 +1,1266 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* savename routine (compute output filename) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsname.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htstools.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+/* END specific definitions */
+
+#undef test_flush
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+#define ADD_STANDARD_PATH \
+ { /* ajout nom */\
+ char buff[HTS_URLMAXSIZE*2];\
+ buff[0]='\0';\
+ strncat(buff,start_pos,(int) (nom_pos - start_pos));\
+ url_savename_addstr(save,buff);\
+ }
+
+#define ADD_STANDARD_NAME(shortname) \
+ { /* ajout nom */\
+ char buff[HTS_URLMAXSIZE*2];\
+ standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\
+ url_savename_addstr(save,buff);\
+ }
+
+
+/* Avoid stupid DOS system folders/file such as 'nul' */
+/* Based on linux/fs/umsdos/mangle.c */
+static const char *hts_tbdev[] =
+{
+ "/prn", "/con", "/aux", "/nul",
+ "/lpt1", "/lpt2", "/lpt3", "/lpt4",
+ "/com1", "/com2", "/com3", "/com4",
+ "/clock$",
+ "/emmxxxx0", "/xmsxxxx0", "/setverxx",
+ ""
+};
+
+
+
+// forme le nom du fichier à sauver (save) à partir de fil et adr
+// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
+int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe) {
+ char newfil[HTS_URLMAXSIZE*2]; /* ="" */
+ char* fil;
+ char* adr;
+ char* print_adr;
+ char *start_pos=NULL,*nom_pos=NULL,*dot_pos=NULL; // Position nom et point
+ // pour changement d'extension ou de nom (content-disposition)
+ int ext_chg=0;
+ char ext[256];
+ int max_char=0;
+ //CLEAR
+ newfil[0]=ext[0]='\0';
+
+ /* 8-3 ? */
+ switch(opt->savename_83) {
+ case 1:
+ max_char=8;
+ break;
+ case 2:
+ max_char=30;
+ break;
+ default:
+ max_char=8;
+ break;
+ }
+
+ // effacer save
+ save[0]='\0';
+ // fil
+ fil = fil_complete;
+ // et adr (sauter user/pass)
+ // on prend le parti de mettre les fichiers avec login/pass au même endroit que si ils
+ // étaient capturés sans ces paramètres
+ // c'est pour cette raison qu'on ignore totalement adr_complete (même pour la recherche en table de hachage)
+ adr=jump_identification(adr_complete);
+
+ // à afficher sans ftp://
+ print_adr=jump_protocol(adr);
+
+ // court-circuit pour lien primaire
+ if (strnotempty(adr)==0) {
+ if (strcmp(fil,"primary")==0) {
+ strcat(save,"primary.html");
+ return 0;
+ }
+ }
+
+
+ // vérifier que le nom n'a pas déja été calculé (si oui le renvoyer tel que)
+ // vérifier que le nom n'est pas déja pris...
+ // NOTE: si on cherche /toto/ et que /toto est trouvé on le prend (et réciproquqment) ** // **
+ if (liens!=NULL) {
+ int i;
+
+#if HTS_HASH
+ i=hash_read(hash,adr,fil_complete,1); // recherche table 1 (adr+fil)
+ if (i>=0) { // ok, trouvé
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+ i=hash_read(hash,adr,fil_complete,2); // recherche table 2 (former_adr+former_fil)
+ if (i>=0) { // ok, trouvé
+ // copier location moved!
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // et save
+ strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ return 0;
+ }
+#else
+ for(i=lien_tot-1;i>=0;i--) {
+#if HTS_CASSE
+ if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0))
+#else
+ if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
+#endif
+ { // ok c'est le même lien, adresse déja définie
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+ if (liens[i]->former_adr) { // tester ancienne loc?
+#if HTS_CASSE
+ if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete)==0))
+#else
+ if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete)))
+#endif
+ {
+ // copier location moved!
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // et save
+ strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ return 0;
+ }
+ }
+ }
+#endif
+
+ // chercher sans / ou avec / dans former
+ {
+ char fil_complete_patche[HTS_URLMAXSIZE*2];
+ strcpy(fil_complete_patche,fil_complete);
+ // Version avec ou sans /
+ if (fil_complete_patche[strlen(fil_complete_patche)-1]=='/')
+ fil_complete_patche[strlen(fil_complete_patche)-1]='\0';
+ else
+ strcat(fil_complete_patche,"/");
+#if HTS_HASH
+ i=hash_read(hash,adr,fil_complete_patche,2); // recherche table 2 (former_adr+former_fil)
+ if (i>=0) {
+ // écraser fil et adr (pas former_fil?????)
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // écrire save
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+#else
+ // même boucle en gros
+ for(i=lien_tot-1;i>=0;i--) {
+ if (liens[i]->former_adr) { // former-adr?
+#if HTS_CASSE
+ if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete_patche)==0))
+#else
+ if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete_patche)))
+#endif
+ { // ok c'est le même lien, adresse déja définie
+ // écraser fil et adr (pas former_fil?????)
+ strcpy(adr_complete,liens[i]->adr);
+ strcpy(fil_complete,liens[i]->fil);
+ // écrire save
+ strcpy(save,liens[i]->sav);
+ return 0;
+ }
+ }
+ }
+#endif
+ }
+ }
+
+ // vérifier la non présence de paramètres dans le nom de fichier
+ // si il y en a, les supprimer (ex: truc.cgi?subj=aspirateur)
+ // néanmoins, gardé pour vérifier la non duplication (voir après)
+ {
+ char* a;
+ a=strchr(fil,'?');
+ if (a!=NULL) {
+ strncat(newfil,fil,(int) (a - fil));
+ } else {
+ strcpy(newfil,fil);
+ }
+ fil=newfil;
+ }
+ // décoder %
+ strcpy(fil,unescape_http(fil));
+ /*
+ {
+ char tempo[HTS_URLMAXSIZE*2];
+ int i,j=0;
+ for (i=0;i<(int) strlen(fil);i++) {
+ if (fil[i]=='%') {
+ i++;
+ tempo[j++]=(char) ehex(fil+i);
+ i++; // sauter 2 caractères finalement
+ } else
+ tempo[j++]=fil[i];
+ }
+ tempo[j++]='\0';
+ strcpy(fil,tempo);
+ }
+ */
+
+
+ /* replace shtml to html.. */
+ switch (ishtml(fil)) { /* .html,.shtml,.. */
+ case 1:
+ if (
+ (strcmp(get_ext(fil),"html") != 0)
+ && (strcmp(get_ext(fil),"htm") != 0)
+ ) {
+ strcpy(ext,"html");
+ ext_chg=1;
+ }
+ break;
+ case 0:
+ if (!strnotempty(ext)) {
+ if (is_userknowntype(get_ext(fil))) { // mime known by user
+ char mime[1024];
+ mime[0]=ext[0]='\0';
+ get_userhttptype(0,mime,get_ext(fil));
+ if (strnotempty(mime)) {
+ give_mimext(ext,mime);
+ if (strnotempty(ext)) {
+ ext_chg=1;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+
+ // si option check_type activée
+ if ((opt->check_type) && (!ext_chg)) {
+ if ( (!strfield(adr_complete,"file://"))
+ && (!strfield(adr_complete,"ftp://"))
+ ) {
+ // tester type avec requète HEAD si on ne connait pas le type du fichier
+ if (!( (opt->check_type==1) && (fil[strlen(fil)-1]=='/') )) // slash doit être html?
+ if (ishtml(fil)<0) { // on ne sait pas si c'est un html ou un fichier..
+ // lire dans le cache
+ htsblk r = cache_read(opt,cache,adr,fil,NULL); // test uniquement
+ if (r.statuscode != -1) { // pas d'erreur de lecture cache
+ char s[16]; s[0]='\0';
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type (from cache) %s%s"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ if (strnotempty(r.cdispo)) { /* filename given */
+ ext_chg=2; /* change filename */
+ strcpy(ext,r.cdispo);
+ }
+ else if (!may_unknown(r.contenttype)) { // on peut patcher à priori?
+ give_mimext(s,r.contenttype); // obtenir extension
+ if (strnotempty(s)>0) { // on a reconnu l'extension
+ ext_chg=1;
+ strcpy(ext,s);
+ }
+ }
+ //
+ } else { // test imposible dans le cache, faire une requête
+ //
+#if HTS_ANALYSTE
+ int hihp=_hts_in_html_parsing;
+#endif
+ int has_been_moved=0;
+ char curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2];
+ curr_adr[0]=curr_fil[0]='\0';
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=2; // test
+#endif
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ strcpy(curr_adr,adr_complete);
+ strcpy(curr_fil,fil_complete);
+ // ajouter dans le backing le fichier en mode test
+ // savename: rien car en mode test
+ if (back_add(back,back_max,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1,NULL)!=-1) {
+ int b;
+ b=back_index(back,back_max,curr_adr,curr_fil,BACK_ADD_TEST);
+ if (b>=0) {
+ int petits_tours=0;
+ int get_test_request=0; // en cas de bouclage sur soi même avec HEAD, tester avec GET.. parfois c'est la cause des problèmes
+ do {
+ // temps à attendre, et remplir autant que l'on peut le cache (backing)
+ if (back[b].status>0) back_wait(back,back_max,opt,cache,0);
+ if (ptr>=0)
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+
+ // on est obligé d'appeler le shell pour le refresh..
+#if HTS_ANALYSTE
+ {
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ return -1;
+ } else if (_hts_cancel) { // cancel 2 ou 1 (cancel parsing)
+ back_delete(back,b); // cancel test
+ }
+ }
+#endif
+
+
+ // traitement des 304,303..
+ if (back[b].status<=0) {
+ if ( (back[b].r.statuscode==301)
+ || (back[b].r.statuscode==302)
+ || (back[b].r.statuscode==303)
+ || (back[b].r.statuscode==307)
+ ) { // agh moved.. un tit tour de plus
+ if ((petits_tours<5) && (former_adr) && (former_fil)) { // on va pas tourner en rond non plus!
+ if ((int) strnotempty(back[b].r.location)) { // location existe!
+ char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ mov_url[0]=mov_adr[0]=mov_fil[0]='\0';
+ //
+ strcpy(mov_url,back[b].r.location); // copier URL
+ if (ident_url_relatif(mov_url,curr_adr,curr_fil,mov_adr,mov_fil)>=0) {
+ // si non bouclage sur soi même, ou si test avec GET non testé
+ if ((strcmp(mov_adr,curr_adr)) || (strcmp(mov_fil,curr_fil)) || (get_test_request==0)) {
+ // bouclage?
+ if ((!strcmp(mov_adr,curr_adr)) && (!strcmp(mov_fil,curr_fil)))
+ get_test_request=1; // faire requète avec GET
+
+ // recopier former_adr/fil?
+ if ((former_adr) && (former_fil)) {
+ if (strnotempty(former_adr)==0) { // Pas déja noté
+ strcpy(former_adr,curr_adr);
+ strcpy(former_fil,curr_fil);
+ }
+ }
+
+ // check explicit forbidden - don't follow 3xx in this case
+ {
+ int set_prio_to=0;
+ robots_wizard* robots = (robots_wizard*) opt->robotsptr;
+ if (hts_acceptlink(opt,ptr,lien_tot,liens,
+ mov_adr,mov_fil,
+ opt->filters.filters,opt->filters.filptr,opt->maxfilter,
+ robots,
+ &set_prio_to,
+ NULL) == 1)
+ { /* forbidden */
+ has_been_moved = 1;
+ back_delete(back,b); // ok
+ strcpy(curr_adr,mov_adr);
+ strcpy(curr_fil,mov_fil);
+ mov_url[0]='\0';
+ }
+ }
+
+ // ftp: stop!
+ if (strfield(mov_url,"ftp://")) { // ftp, ok on arrête
+ has_been_moved = 1;
+ back_delete(back,b); // ok
+ strcpy(curr_adr,mov_adr);
+ strcpy(curr_fil,mov_fil);
+ } else if (*mov_url) {
+ char* methode;
+ if (!get_test_request)
+ methode=BACK_ADD_TEST; // tester avec HEAD
+ else {
+ methode=BACK_ADD_TEST2; // tester avec GET
+ if ( opt->errlog!=NULL ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Loop with HEAD request (during prefetch) at %s%s"LF,curr_adr,curr_fil);
+ test_flush;
+ }
+ }
+ // Ajouter
+ if (back_add(back,back_max,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1,NULL)!=-1) { // OK
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
+ test_flush;
+ }
+
+ // libérer emplacement backing actuel et attendre le prochain
+ back_delete(back,b);
+ strcpy(curr_adr,mov_adr);
+ strcpy(curr_fil,mov_fil);
+ b=back_index(back,back_max,curr_adr,curr_fil,methode);
+ if (!get_test_request)
+ has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé
+ petits_tours++;
+ //
+ } else {// sinon on fait rien et on s'en va.. (ftp etc)
+ if ( (opt->debug>1) && (opt->errlog)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Warning: Savename redirect backing error at %s%s"LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ }
+ }
+ } else {
+ if ( opt->errlog!=NULL ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop to same filename)"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ }
+
+ }
+ }
+ } else{ // arrêter les frais
+ if ( opt->errlog!=NULL ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop)"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ }
+ } // ok, leaving
+ }
+
+ } while(back[b].status>0);
+
+ // Si non déplacé, forcer type?
+ if (!has_been_moved) {
+ if (back[b].r.statuscode!=-10) { // erreur
+ if (strnotempty(back[b].r.contenttype)==0)
+ strcpy(back[b].r.contenttype,"text/html"); // message d'erreur en html
+ // Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
+ // libérer emplacement backing
+ /*if (opt->errlog!=NULL) {
+ fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
+ test_flush;
+ }
+ back_delete(back,b);
+ return -1; // ERREUR (404 par exemple)
+ */
+ }
+
+ { // pas d'erreur, changer type?
+ char s[16];
+ s[0]='\0';
+ if (strnotempty(back[b].r.cdispo)) { /* filename given */
+ ext_chg=2; /* change filename */
+ strcpy(ext,back[b].r.cdispo);
+ }
+ else if ((!may_unknown(back[b].r.contenttype)) || (!get_ext(back[b].url_fil)) ) { // on peut patcher à priori? (pas interdit ou pas de type)
+ give_mimext(s,back[b].r.contenttype); // obtenir extension
+ if (strnotempty(s)>0) { // on a reconnu l'extension
+ ext_chg=1;
+ strcpy(ext,s);
+ }
+ }
+ }
+ }
+ // FIN Si non déplacé, forcer type?
+
+ // libérer emplacement backing
+ back_delete(back,b);
+
+ // --- --- ---
+ // oops, a été déplacé.. on recalcule en récursif (osons!)
+ if (has_been_moved) {
+ // copier adr, fil (optionnel, mais sinon marche pas pour le rip)
+ strcpy(adr_complete,curr_adr);
+ strcpy(fil_complete,curr_fil);
+ // copier adr, fil
+
+ return url_savename(curr_adr,curr_fil,save,NULL,NULL,referer_adr,referer_fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
+ }
+ // --- --- ---
+
+ }
+
+ } else {
+ printf("PANIC! : Savename Crash adding error, unexpected error found.. [%d]\n",__LINE__);
+#if BDEBUG==1
+ printf("error while savename crash adding\n");
+#endif
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected savename backing error at %s%s"LF,adr,fil_complete);
+ test_flush;
+ }
+
+ }
+ // restaurer
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=hihp;
+#endif
+ } // caché?
+ }
+ }
+ }
+
+
+
+ // - - - DEBUT NOMMAGE - - -
+
+ // Donner nom par défaut?
+ if (fil[strlen(fil)-1]=='/') {
+ if (!strfield(adr_complete,"ftp://"))
+ strcat(fil,DEFAULT_HTML); // nommer page par défaut!!
+ else {
+ if (!opt->proxy.active)
+ strcat(fil,DEFAULT_FTP); // nommer page par défaut (texte)
+ else
+ strcat(fil,DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http)
+ }
+ }
+ // Changer extension?
+ // par exemple, php3 sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
+ if (ext_chg) { // changer ext
+ char* a=fil+strlen(fil)-1;
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug");
+ if (ext_chg==1)
+ fprintf(opt->log,"Changing link extension %s%s to .%s"LF,adr_complete,fil_complete,ext);
+ else
+ fprintf(opt->log,"Changing link name %s%s to %s"LF,adr_complete,fil_complete,ext);
+ test_flush;
+ }
+ if (ext_chg==1) {
+ while((a > fil) && (*a!='.') && (*a!='/')) a--;
+ if (*a=='.') *a='\0'; // couper
+ strcat(fil,"."); // recopier point
+ } else {
+ while(( a > fil) && (*a!='/')) a--;
+ if (*a=='/') a++;
+ *a='\0';
+ }
+ strcat(fil,ext); // copier ext/nom
+ }
+
+ // Rechercher premier / et dernier .
+ {
+ char* a=fil+strlen(fil)-1;
+
+ // passer structures
+ start_pos=fil;
+ while(( a > fil) && (*a != '/') && (*a != '\\')) {
+ if (*a == '.') // point? noter position
+ if (!dot_pos)
+ dot_pos=a;
+ a--;
+ }
+ if ((*a=='/') || (*a=='\\')) a++;
+ nom_pos = a;
+ }
+
+
+ // un nom de fichier est généré
+ // s'il existe déja, alors on le mofifie légèrement
+
+ // ajouter nom du site éventuellement en premier
+ if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t)
+ char* a = opt->savename_userdef;
+ char* b = save;
+ /*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */
+ char tok;
+
+ /*
+ { // Rechercher premier /
+ char* a=fil+strlen(fil)-1;
+ // passer structures
+ while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) {
+ if (*a == '.') // point? noter position
+ if (!dot_pos)
+ dot_pos=a;
+ a--;
+ }
+ if ((*a=='/') || (*a=='\\')) a++;
+ nom_pos = a;
+ }
+ */
+
+ // Construire nom
+ while ((*a) && (((int) (b - save)) < HTS_URLMAXSIZE ) ) { // parser, et pas trop long..
+ if (*a == '%') {
+ int short_ver=0;
+ a++;
+ if (*a == 's') {
+ short_ver=1;
+ a++;
+ }
+ *b='\0';
+ switch(tok=*a++) {
+ case '[': // %[param]
+ if (strchr(a,']')) {
+ char name[256];
+ char* c=name;
+ while(*a!=']') {
+ *c++=*a++;
+ }
+ a++;
+ *c++='\0';
+ strcat(name,"="); /* param=.. */
+ c=strchr(fil_complete,'?');
+ /* parameters exists */
+ if (c) {
+ c=strstr(c,name); /* finds param= */
+ if (c) {
+ c+=strlen(name); /* jumps "param=" */
+ while( (*c) && (*c!='&'))
+ *b++=*c++;
+ }
+ }
+ }
+ break;
+ case '%': *b++='%'; break;
+ case 'n': // nom sans ext
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ else
+ strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,nom_pos);
+ else
+ strncat(b,nom_pos,8);
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'N': // nom avec ext
+ // RECOPIE NOM + EXT
+ *b='\0';
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ else
+ strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,nom_pos);
+ else
+ strncat(b,nom_pos,8);
+ }
+ b+=strlen(b); // pointer à la fin
+ // RECOPIE NOM + EXT
+ *b='\0';
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strcpy(b,dot_pos+1);
+ else
+ strncat(b,dot_pos+1,3);
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,DEFAULT_EXT); // pas de..
+ else
+ strcpy(b,DEFAULT_EXT_SHORT); // pas de..
+ }
+ b+=strlen(b); // pointer à la fin
+ //
+ break;
+ case 't': // ext
+ *b='\0';
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strcpy(b,dot_pos+1);
+ else
+ strncat(b,dot_pos+1,3);
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,DEFAULT_EXT); // pas de..
+ else
+ strcpy(b,DEFAULT_EXT_SHORT); // pas de..
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'p': // path sans dernier /
+ *b='\0';
+ if (nom_pos != fil + 1) { // pas: /index.html (chemin nul)
+ if (!short_ver) { // Noms longs
+ strncat(b,fil,(int) (nom_pos - fil) - 1);
+ } else {
+ char pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2];
+ pth[0]=n83[0]='\0';
+ //
+ strncat(pth,fil,(int) (nom_pos - fil) - 1);
+ long_to_83(opt->savename_83,n83,pth);
+ strcpy(b,n83);
+ }
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'h': // host
+ *b='\0';
+ if (strcmp(adr_complete,"file://")==0) {
+ if (!short_ver) // Noms longs
+ strcpy(b,"localhost");
+ else
+ strcpy(b,"local");
+ } else {
+ if (!short_ver) // Noms longs
+ strcpy(b,print_adr);
+ else
+ strncat(b,print_adr,8);
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'M': /* host/address?query MD5 (128-bits) */
+ *b='\0';
+ {
+ char digest[32+2];
+ char buff[HTS_URLMAXSIZE*2];
+ digest[0]=buff[0]='\0';
+ strcpy(buff,adr);
+ strcat(buff,fil_complete);
+ domd5mem(buff,strlen(buff),digest,1);
+ strcpy(b,digest);
+ }
+ b+=strlen(b); // pointer à la fin
+ break;
+ case 'Q': case 'q': /* query MD5 (128-bits/16-bits)
+ GENERATED ONLY IF query string exists! */
+ *b='\0';
+ strncat(b,url_md5(fil_complete),(tok == 'Q')?32:4);
+ b+=strlen(b); // pointer à la fin
+ break;
+ }
+ } else
+ *b++=*a++;
+ }
+ *b++='\0';
+ //
+ // Types prédéfinis
+ //
+
+ }
+ //
+ // Structure originale
+ else if (opt->savename_type%100==0) {
+ /* recopier www.. */
+ if (opt->savename_type!=100) {
+ if (((opt->savename_type/1000)%2)==0) { // >1000 signifie "pas de www/"
+ if (strcmp(adr_complete,"file://")==0) {
+ //## if (*adr==lOCAL_CHAR) {
+ if (opt->savename_83 != 1) // noms longs
+ strcat(save,"localhost");
+ else
+ strcat(save,"local");
+ } else {
+ // adresse url
+ if (!opt->savename_83) { // noms longs (et pas de .)
+ strcat(save,print_adr);
+ } else { // noms 8-3
+ if (strlen(print_adr)>4) {
+ if (strfield(print_adr,"www."))
+ strncat(save,print_adr+4,max_char);
+ else
+ strncat(save,print_adr,8);
+ } else strncat(save,print_adr,max_char);
+ }
+ }
+ if (*fil!='/') strcat(save,"/");
+ }
+ }
+
+#if HTS_CASSE==0
+ hts_lowcase(save);
+#endif
+
+ /*
+ // ne sert à rien car a déja été filtré normalement
+ if ((*fil=='.') && (*(fil+1)=='/')) // ./index.html ** //
+ url_savename_addstr(save,fil+2);
+ else // index.html ou /index.html
+ url_savename_addstr(save,fil);
+ if (save[strlen(save)-1]=='/')
+ strcat(save,DEFAULT_HTML); // nommer page par défaut!!
+*/
+
+ /* add name */
+ ADD_STANDARD_PATH;
+ ADD_STANDARD_NAME(0);
+
+ }
+ //
+ // Structure html/image
+ else {
+ // dossier "web" ou "www.xxx" ?
+ if (((opt->savename_type/1000)%2)==0) { // >1000 signifie "pas de www/"
+ if ((opt->savename_type/100)%2) {
+ if (strcmp(adr_complete,"file://")==0) {
+ //## if (*adr==lOCAL_CHAR) {
+ if (opt->savename_83 != 1) // noms longs
+ strcat(save,"localhost/");
+ else
+ strcat(save,"local/");
+ } else {
+ // adresse url
+ if (!opt->savename_83) { // noms longs
+ strcat(save,print_adr); strcat(save,"/");
+ } else { // noms 8-3
+ if (strlen(print_adr)>4) {
+ if (strfield(print_adr,"www."))
+ strncat(save,print_adr+4,max_char);
+ else
+ strncat(save,print_adr,max_char);
+ strcat(save,"/");
+ } else {
+ strncat(save,print_adr,max_char); strcat(save,"/");
+ }
+ }
+ }
+ } else {
+ strcat(save,"web/"); // répertoire général
+ }
+ }
+
+ // si un html à coup sûr
+ if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) {
+ if (opt->savename_type%100==2) { // html/
+ strcat(save,"html/");
+ }
+ } else {
+ if ((opt->savename_type%100==1) || (opt->savename_type%100==2)) { // html & images
+ strcat(save,"images/");
+ }
+ }
+
+ switch (opt->savename_type%100) {
+ case 4: case 5: { // séparer par types
+ char* a=fil+strlen(fil)-1;
+ // passer structures
+ while(( a > fil) && (*a != '/') && (*a != '\\')) a--;
+ if ((*a=='/') || (*a=='\\')) a++;
+
+ // html?
+ if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) {
+ if (opt->savename_type%100==5)
+ strcat(save,"html/");
+ } else {
+ char* a=fil+strlen(fil)-1;
+ while(( a> fil) && (*a != '/') && (*a != '.')) a--;
+ if (*a!='.')
+ strcat(save,"other");
+ else
+ strcat(save,a+1);
+ strcat(save,"/");
+ }
+ /*strcat(save,a);*/
+ /* add name */
+ ADD_STANDARD_NAME(0);
+ }
+ break;
+ case 99: { // 'codé' .. c'est un gadget
+ int i;
+ int j;
+ char* a;
+ char C[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-";
+ int L;
+ // pseudo-CRC sur fil et adr pour initialiser générateur aléatoire..
+ unsigned int s=0;
+ L=strlen(C);
+ for(i=0;i<(int) strlen(fil_complete);i++) {
+ s+=(unsigned int) fil_complete[i];
+ }
+ for(i=0;i<(int) strlen(adr_complete);i++) {
+ s+=(unsigned int) adr_complete[i];
+ }
+ srand(s);
+
+ j=strlen(save);
+ for(i=0;i<8;i++) {
+ char c=C[(rand()%L)];
+ save[i+j]=c;
+ }
+ save[i+j]='\0';
+ // ajouter extension
+ a=fil+strlen(fil)-1;
+ while(( a > fil) && (*a != '/') && (*a != '.')) a--;
+ if (*a=='.') {
+ strcat(save,a); // ajouter
+ }
+ }
+ break;
+ default: { // noms sans les noms des répertoires
+ // ne garder que le nom, pas la structure
+ /*
+ char* a=fil+strlen(fil)-1;
+ while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) a--;
+ if ((*a=='/') || (*a=='\\')) a++;
+ strcat(save,a);
+ */
+
+ /* add name */
+ ADD_STANDARD_NAME(0);
+ }
+ break;
+ }
+
+#if HTS_CASSE==0
+ hts_lowcase(save);
+#endif
+
+ if (save[strlen(save)-1]=='/')
+ strcat(save,DEFAULT_HTML); // nommer page par défaut!!
+ }
+
+
+ // vérifier qu'on ne doit pas forcer l'extension
+ // par exemple, asp sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
+ /*if (ext_chg) {
+ char* a=save+strlen(save)-1;
+ while(((int) a>(int) save) && (*a!='.') && (*a!='/')) a--;
+ if (*a=='.') *a='\0'; // couper
+ // recopier extension
+ strcat(save,".");
+ strcat(save,ext); // copier ext
+ }*/
+ // de même en cas de manque d'extension on en place une de manière forcée..
+ // cela évite les /chez/toto et les /chez/toto/index.html incompatibles
+ if (opt->savename_type != -1) {
+ char* a=save+strlen(save)-1;
+ while(( a > save) && (*a!='.') && (*a!='/')) a--;
+ if (*a!='.') { // agh pas de point
+ //strcat(save,".none"); // a éviter
+ strcat(save,".html"); // préférable!
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Default HTML type set for %s%s"LF,adr_complete,fil_complete);
+ test_flush;
+ }
+ }
+ }
+
+ // effacer pass au besoin pour les autentifications
+ // (plus la peine : masqué au début)
+/*
+ {
+ char* a=jump_identification(save);
+ if (a!=save) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char *b;
+ tempo[0]='\0';
+ strcpy(tempo,"[");
+ b=strchr(save,':');
+ if (!b) b=strchr(save,'@');
+ if (b)
+ strncat(tempo,save,(int) b-(int) a);
+ strcat(tempo,"]");
+ strcat(tempo,a);
+ strcpy(save,a);
+ }
+ }
+*/
+
+ // éviter les / au début (cause: N100)
+ if (save[0]=='/') {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,save+1);
+ strcpy(save,tempo);
+ }
+
+ // changer les ~,:,",*,? en _ pour sauver sur disque
+ hts_replace(save,'~','_'); // interdit sous unix (~foo)
+ //
+ hts_replace(save,'\\','_');
+ hts_replace(save,':','_'); // interdit sous windows
+ hts_replace(save,'*','_'); // interdit sous windows
+ hts_replace(save,'?','_'); // doit pas arriver!!
+ hts_replace(save,'\"','_'); // interdit sous windows
+ hts_replace(save,'<','_'); // interdit sous windows
+ hts_replace(save,'>','_'); // interdit sous windows
+ hts_replace(save,'|','_'); // interdit sous windows
+ //
+ hts_replace(save,'@','_');
+ //
+ { // éliminer les // (comme ftp://)
+ char* a;
+ while( (a=strstr(save,"//")) ) *a='_';
+ // Eliminer chars spéciaux
+ a=save -1 ;
+ while(*(++a))
+ if ( ((unsigned char)(*a) <= 31)
+ || ((unsigned char)(*a) == 127) )
+ *a='_';
+ }
+
+
+#if HTS_OVERRIDE_DOS_FOLDERS
+ /* Replace /foo/nul/bar by /foo/nul-/bar */
+ {
+ int i=0;
+ while(hts_tbdev[i][0]) {
+ char* a=save;
+ while((a=strstr(a,hts_tbdev[i]))) {
+ switch ( (int) a[strlen(hts_tbdev[i])] ) {
+ case '\0':
+ case '/': {
+ char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0';
+ strncat(tempo,save,(int) (a - save) + strlen(hts_tbdev[i]));
+ strcat(tempo,"-");
+ strcat(tempo,a+strlen(hts_tbdev[i]));
+ strcpy(save,tempo);
+ }
+ break;
+ }
+ a+=strlen(hts_tbdev[i]);
+ }
+ i++;
+ }
+ }
+#endif
+
+ // conversion 8-3 .. y compris pour les répertoires
+ if (opt->savename_83) {
+ char n83[HTS_URLMAXSIZE*2];
+ long_to_83(opt->savename_83,n83,save);
+ strcpy(save,n83);
+ }
+
+
+ /* ensure that there is no ../ (potential vulnerability) */
+ fil_simplifie(save);
+
+#if HTS_ANALYSTE
+ {
+ hts_htmlcheck_savename(adr_complete,fil_complete,referer_adr,referer_fil,save);
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: save-name: local name: %s%s -> %s"LF,adr,fil,save);
+ test_flush;
+ }
+ }
+#endif
+
+ // chemin primaire éventuel A METTRE AVANT
+ if (strnotempty(opt->path_html)) {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,opt->path_html);
+ strcat(tempo,save);
+ strcpy(save,tempo);
+ }
+
+
+ // vérifier que le nom n'est pas déja pris...
+ if (liens!=NULL) {
+ int nom_ok;
+ do {
+ int i;
+ int len;
+ len=strlen(save); // taille
+ //
+ nom_ok=1; // à priori bon
+ // on part de la fin pour optimiser, plus les opti de taille pour aller encore plus vite..
+#if DEBUG_SAVENAME
+printf("\nStart search\n");
+#endif
+
+#if HTS_HASH
+ i=hash_read(hash,save,"",0); // lecture type 0 (sav)
+ if (i>=0)
+#else
+ for(i=lien_tot-1;i>=0;i--) {
+#if DEBUG_SAVENAME
+printf("%cParse: %d",13,i);
+#endif
+
+ if (liens[i]->sav_len==len) { // même taille de chaîne
+#if HTS_CASSE
+ if (strcmp(liens[i]->sav,save)==0) // existe déja
+#else
+ if (strfield2(liens[i]->sav,save)) // un tel nom existe déja
+#endif
+#endif
+ {
+#if HTS_CASSE
+ if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0))
+#else
+ if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
+#endif
+ { // ok c'est le même lien, adresse déja définie
+ //printf("Ok, %s\n",save);
+ //i=lien_tot; // sortir
+ i=0;
+#if DEBUG_SAVENAME
+printf("\nOK ALREADY DEFINED\n",13,i);
+#endif
+ } else { // utilisé par un AUTRE, changer de nom
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=save+strlen(save)-1;
+ char* b;
+ int n=2;
+ tempo[0]='\0';
+
+#if DEBUG_SAVENAME
+printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n",liens[i]->fil,fil_complete);
+#endif
+ nom_ok=0;
+ i=0;
+
+ while(( a > save) && (*a!='.') && (*a!='\\') && (*a!='/')) a--;
+ if (*a=='.')
+ strncat(tempo,save,(int) (a - save));
+ else
+ strcat(tempo,save);
+
+ // tester la présence d'un -xx (ex: index-2.html -> index-3.html)
+ b=tempo+strlen(tempo)-1;
+ while (isdigit((unsigned char)*b)) b--;
+ if (*b=='-') {
+ sscanf(b+1,"%d",&n);
+ *b='\0'; // couper
+ n++; // plus un
+ }
+
+ // en plus il faut gérer le 8-3 .. pas facile le client
+ if (opt->savename_83) {
+ int max;
+ char* a=tempo+strlen(tempo)-1;
+ while(( a > tempo) && (*a!='/')) a--;
+ if (*a=='/') a++;
+ max=max_char-1-nombre_digit(n);
+ if ((int) strlen(a)>max)
+ *(a+max)='\0'; // couper sinon il n'y aura pas la place!
+ }
+
+ // ajouter -xx (ex: index.html -> index-2.html)
+ sprintf(tempo+strlen(tempo),"-%d",n);
+
+ // ajouter extension
+ if (*a=='.')
+ strcat(tempo,a);
+
+ strcpy(save,tempo);
+
+ //printf("switched: %s\n",save);
+
+ } // if
+#if HTS_HASH
+ }
+#else
+ } // if
+ } // if sav_len
+ } // for
+#endif
+#if DEBUG_SAVENAME
+printf("\nEnd search, %s\n",fil_complete);
+#endif
+ } while(!nom_ok);
+
+ }
+
+ //printf("'%s' %s %s\n",save,adr,fil);
+
+ return 0;
+}
+
+/* nom avec md5 urilisé partout */
+void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver) {
+ b[0]='\0';
+ /* Nom */
+ if (dot_pos) {
+ if (!short_ver) // Noms longs
+ strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ else
+ strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ } else {
+ if (!short_ver) // Noms longs
+ strcat(b,nom_pos);
+ else
+ strncat(b,nom_pos,8);
+ }
+ /* MD5 - 16 bits */
+ strncat(b,url_md5(fil_complete),4);
+ /* Ext */
+ if (dot_pos) {
+ strcat(b,".");
+ if (!short_ver) // Noms longs
+ strcat(b,dot_pos+1);
+ else
+ strncat(b,dot_pos+1,3);
+ } else {
+ if (!short_ver) // Noms longs
+ strcat(b,DEFAULT_EXT); // pas de..
+ else
+ strcat(b,DEFAULT_EXT_SHORT); // pas de..
+ }
+}
+
+
+/* Petit md5 */
+char* url_md5(char* fil_complete) {
+ char* digest;
+ char* a;
+ NOSTATIC_RESERVE(digest, char, 32+2);
+ digest[0]='\0';
+ a=strchr(fil_complete,'?');
+ if (a) {
+ if (strlen(a)) {
+ char buff[HTS_URLMAXSIZE*2];
+ a++;
+ digest[0]=buff[0]='\0';
+ strcat(buff,a); /* query string MD5 */
+ domd5mem(buff,strlen(buff),digest,1);
+ }
+ }
+ return digest;
+}
+
+// interne à url_savename: ajoute une chaîne à une autre avec \ -> /
+void url_savename_addstr(char* d,char* s) {
+ int i=strlen(d);
+ while(*s) {
+ if (*s=='\\') // remplacer \ par des /
+ d[i++]='/';
+ else
+ d[i++]=*s;
+ s++;
+ }
+ d[i]='\0';
+}
+
+#undef test_flush
diff --git a/src/htsname.h b/src/htsname.h
new file mode 100644
index 0000000..aae5f99
--- /dev/null
+++ b/src/htsname.h
@@ -0,0 +1,50 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* savename routine (compute output filename) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSNAME_DEFH
+#define HTSNAME_DEFH
+
+#include "htscore.h"
+
+int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe);
+void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver);
+void url_savename_addstr(char* d,char* s);
+char* url_md5(char* fil_complete);
+
+#endif
diff --git a/src/htsnet.h b/src/htsnet.h
new file mode 100644
index 0000000..d12b1e4
--- /dev/null
+++ b/src/htsnet.h
@@ -0,0 +1,242 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Net definitions */
+/* Used in .c files that needs connect() functions and so */
+/* Note: includes htsbasenet.h */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_DEFNETH
+#define HTS_DEFNETH
+
+/* basic net definitions */
+#include "htsbasenet.h"
+
+#include <ctype.h>
+#if HTS_WIN
+ // pour read
+ #include <io.h>
+ // pour FindFirstFile
+ #include <winbase.h>
+#else
+ //typedef int T_SOC;
+ #define INVALID_SOCKET -1
+ #include <netdb.h>
+ #include <sys/types.h>
+ #include <sys/socket.h>
+ #include <netinet/in.h>
+ #include <sys/time.h>
+ #include <sys/ioctl.h>
+ /* gethostname & co */
+ #include <unistd.h>
+ /* inet_addr */
+ #include <arpa/inet.h>
+ // pas la peine normalement..
+#if HTS_PLATFORM!=3
+ #include <sys/filio.h>
+#else
+#ifndef HTS_DO_NOT_REDEFINE_in_addr_t
+ typedef unsigned long in_addr_t;
+#endif
+#endif
+#ifndef min
+ #define min(a,b) ((a)>(b)?(b):(a))
+ #define max(a,b) ((a)>(b)?(a):(b))
+#endif
+#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
+#endif
+
+/*
+ ** ipV4 **
+*/
+#if HTS_INET6==0
+
+/* Ipv4 structures */
+typedef struct in_addr INaddr;
+/* This should handle all cases */
+typedef struct {
+ union {
+ struct sockaddr_in in;
+ struct sockaddr sa;
+ unsigned char v4data[4];
+ unsigned char v6data[16];
+ unsigned char pad[128];
+ } m_addr;
+} SOCaddr;
+
+/* Ipv4 structure members */
+#define SOCaddr_sinaddr(server) ((server).m_addr.in.sin_addr)
+#define SOCaddr_sinfamily(server) ((server).m_addr.in.sin_family)
+#define SOCaddr_sinport(server) ((server).m_addr.in.sin_port)
+
+/* AF_xx */
+#define AFinet AF_INET
+
+/* Set port to sockaddr structure */
+#define SOCaddr_initport(server, port) do { \
+ SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
+} while(0)
+
+/* Copy sockaddr to another one */
+#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
+if (hpsize == sizeof(struct sockaddr_in)) { \
+ server_len=sizeof(struct sockaddr_in); \
+ SOCaddr_sinfamily(server) = (*(struct sockaddr_in*)(hpaddr)).sin_family; \
+ memcpy(&SOCaddr_sinaddr(server), &(*(struct sockaddr_in*)(hpaddr)).sin_addr, sizeof(SOCaddr_sinaddr(server))); \
+} else if (hpsize == 4) {\
+ server_len=sizeof(struct sockaddr_in); \
+ SOCaddr_sinfamily(server) = AF_INET; \
+ memcpy(&SOCaddr_sinaddr(server), (hpaddr), sizeof(SOCaddr_sinaddr(server))); \
+} else if ((hpsize > 0) && (hpsize <= sizeof(server))) { \
+ server_len=hpsize; \
+ memcpy(&(server), hpaddr, hpsize); \
+} else { \
+ server_len=0; \
+} \
+} while(0)
+
+/* Get dotted address */
+#define SOCaddr_inetntoa(namebuf, namebuflen, ss, sslen) do { \
+char* dot = (char*) inet_ntoa(SOCaddr_sinaddr(ss)); \
+(namebuf)[0]='\0'; \
+if (dot) { \
+strcpy(namebuf, dot); \
+} \
+} while(0)
+
+/* Get protocol ID */
+#define SOCaddr_getproto(ss, sslen) ('1')
+
+/*
+ ** ipV6 **
+*/
+#else
+
+/* Ipv4 structures */
+typedef struct in6_addr INaddr;
+/* This should handle all cases */
+typedef struct {
+ union {
+ struct sockaddr_in6 in6;
+ struct sockaddr_in in;
+ struct sockaddr sa;
+ unsigned char v4data[4];
+ unsigned char v6data[16];
+ unsigned char pad[128];
+ } m_addr;
+} SOCaddr;
+
+/* Ipv4 structure members */
+#define SOCaddr_sinaddr(server) ((server).m_addr.in6.sin6_addr)
+#define SOCaddr_sinfamily(server) ((server).m_addr.in6.sin6_family)
+#define SOCaddr_sinport(server) ((server).m_addr.in6.sin6_port)
+#define SOCaddr_sinflowinfo(server) ((server).m_addr.in6.sin6_flowinfo)
+/* #define SOCaddr_sinscopeid(a) ((a).m_addr.in6.sin6_scope_id) */
+
+/* AF_xx */
+#define AFinet AF_INET6
+
+/* Set port to sockaddr structure */
+#define SOCaddr_initport(server, port) do { \
+ SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
+} while(0)
+
+/*
+ Copy sockaddr to SOCaddr
+
+ Note;
+ The '> sizeof(struct sockaddr_in6)' hack if for the VC6 structure which
+ lacks the scope id
+*/
+#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
+if (hpsize == sizeof(struct sockaddr_in6)) { \
+ server_len=sizeof(struct sockaddr_in6); \
+ SOCaddr_sinfamily(server) = (*(struct sockaddr_in6*)(hpaddr)).sin6_family; \
+ SOCaddr_sinflowinfo(server) = (*(struct sockaddr_in6*)(hpaddr)).sin6_flowinfo; \
+ memcpy(&SOCaddr_sinaddr(server), &(*(struct sockaddr_in6*)(hpaddr)).sin6_addr, sizeof(SOCaddr_sinaddr(server))); \
+} else if (hpsize > sizeof(struct sockaddr_in6)) { \
+ server_len=hpsize; \
+ memcpy(&(server), hpaddr, hpsize); \
+} else if (hpsize == sizeof(struct sockaddr_in)) { \
+ server_len=sizeof(struct sockaddr_in); \
+ (*(struct sockaddr_in*)(&server)).sin_family = AF_INET; \
+ memcpy(&(*(struct sockaddr_in*)&(server)).sin_addr, &(*(struct sockaddr_in*)(hpaddr)).sin_addr, sizeof((*(struct sockaddr_in*)(hpaddr)).sin_addr)); \
+} else if (hpsize == 4) {\
+ server_len=sizeof(struct sockaddr_in); \
+ (*(struct sockaddr_in*)(&server)).sin_family = AF_INET; \
+ memcpy(&(*(struct sockaddr_in*)&(server)).sin_addr, hpaddr, 4); \
+} else if (hpsize == 16) {\
+ server_len=sizeof(struct sockaddr_in6); \
+ SOCaddr_sinfamily(server) = AF_INET6; \
+ memcpy(&SOCaddr_sinaddr(server), (hpaddr), 16); \
+} else if ((hpsize > 0) && (hpsize <= sizeof(server))) { \
+ server_len=hpsize; \
+ memcpy(&(server), hpaddr, hpsize); \
+} else { \
+ server_len=0; \
+} \
+} while(0)
+
+/* Get dotted address */
+#define SOCaddr_inetntoa(namebuf, namebuflen, ss, sslen) do { \
+(namebuf)[0]='\0'; \
+getnameinfo((struct sockaddr *)&(ss), sslen, \
+ (namebuf), namebuflen, NULL, 0, NI_NUMERICHOST); \
+} while(0)
+
+/* Get protocol ID */
+#define SOCaddr_getproto(ss, sslen) ((sslen == sizeof(struct sockaddr_in6))?('2'):('1'))
+
+#endif
+
+/* Buffer structure to copy various hostent structures */
+typedef struct {
+ t_hostent hp;
+ char* list[2];
+ char addr[HTS_MAXADDRLEN]; /* various struct sockaddr structures */
+ unsigned int addr_maxlen;
+} t_fullhostent;
+
+/* Initialize a t_fullhostent structure */
+#define fullhostent_init(h) do { \
+memset((h), 0, sizeof(t_fullhostent)); \
+(h)->hp.h_addr_list = (char **) & ((h)->list); \
+(h)->list[0] = (char *) & ((h)->addr); \
+(h)->list[1] = NULL; \
+(h)->addr_maxlen = HTS_MAXADDRLEN; \
+} while(0)
+
+
+#endif
+
+
diff --git a/src/htsnostatic.c b/src/htsnostatic.c
new file mode 100644
index 0000000..5971d5d
--- /dev/null
+++ b/src/htsnostatic.c
@@ -0,0 +1,260 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsnostatic.c subroutines: */
+/* thread-safe routines for reentrancy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsnostatic.h"
+
+#include "htsbase.h"
+#include "htshash.h"
+
+typedef struct {
+ /*
+ inthash values;
+ */
+ inthash blocks;
+} hts_varhash;
+
+#if USE_BEGINTHREAD
+static PTHREAD_LOCK_TYPE hts_static_Mutex;
+#endif
+static int hts_static_Mutex_init=0;
+#if HTS_WIN
+#else
+static PTHREAD_KEY_TYPE hts_static_key;
+#endif
+
+int hts_initvar() {
+ if (!hts_static_Mutex_init) {
+ /* Init done */
+ hts_static_Mutex_init=1;
+#if USE_BEGINTHREAD
+ /* Init mutex */
+ htsSetLock(&hts_static_Mutex, -999);
+
+#if HTS_WIN
+#else
+ /* Init hash */
+ PTHREAD_KEY_CREATE(&hts_static_key, hts_destroyvar);
+#endif
+#endif
+ }
+
+ /* Set specific thread value */
+#if USE_BEGINTHREAD
+#if HTS_WIN
+#else
+ {
+ void* thread_val;
+ hts_varhash* hts_static_hash = (hts_varhash*) malloc(sizeof(hts_static_hash));
+ if (!hts_static_hash)
+ return 0;
+ /*
+ hts_static_hash->values = inthash_new(HTS_VAR_MAIN_HASH);
+ if (!hts_static_hash->values)
+ return 0;
+ */
+ hts_static_hash->blocks = inthash_new(HTS_VAR_MAIN_HASH);
+ if (!hts_static_hash->blocks)
+ return 0;
+ /* inthash_value_is_malloc(hts_static_hash->values, 0); */ /* Regular values */
+ inthash_value_is_malloc(hts_static_hash->blocks, 1); /* We'll have to free them upon term! */
+ inthash_value_set_free_handler(hts_static_hash->blocks, hts_destroyvar_key); /* free handler */
+ thread_val = (void*) hts_static_hash;
+
+ PTHREAD_KEY_SET(hts_static_key, thread_val, inthash);
+ }
+#endif
+#endif
+
+ return 1;
+}
+
+/*
+ hash table free handler to free all keys
+*/
+void hts_destroyvar_key(void* adr) {
+#if HTS_WIN
+#else
+ hts_NostaticComplexKey* cKey = (hts_NostaticComplexKey*) adr;
+ if (cKey) {
+ void* block_address = NULL;
+ PTHREAD_KEY_GET(cKey->localKey, &block_address, void*);
+ /* Free block */
+ if (block_address) {
+ free(block_address);
+ }
+ cKey->localInit = 0;
+ }
+#endif
+}
+
+void hts_destroyvar(void* ptrkey) {
+#if HTS_WIN
+#else
+ if (ptrkey) {
+ hts_varhash* hashtables = (hts_varhash*) ptrkey;
+ PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */
+
+ /* Destroy has table */
+ inthash_delete(&(hashtables->blocks)); /* will magically call hts_destroyvar_key(), too */
+ /*
+ inthash_delete(&(hashtables->values));
+ */
+ free(ptrkey);
+ }
+#endif
+}
+
+/*
+ destroy all key values (for the current thread)
+*/
+int hts_freevar() {
+#if HTS_WIN
+#if 0
+ void* thread_val = NULL;
+ PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash);
+ hts_destroyvar(thread_val);
+ PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */
+ /*
+ PTHREAD_KEY_DELETE(hts_static_key); NO
+ */
+#endif
+#endif
+ return 1;
+}
+
+int hts_resetvar() {
+ int r;
+ hts_lockvar();
+ {
+ hts_freevar();
+ r = hts_initvar();
+ }
+ hts_unlockvar();
+ return r;
+}
+
+int hts_maylockvar() {
+ return hts_static_Mutex_init;
+}
+
+int hts_lockvar() {
+#if USE_BEGINTHREAD
+ htsSetLock(&hts_static_Mutex, 1);
+#endif
+ return 1;
+}
+
+int hts_unlockvar() {
+#if USE_BEGINTHREAD
+ htsSetLock(&hts_static_Mutex, 0);
+#endif
+ return 1;
+}
+
+int hts_setvar(char* name, long int value) {
+ return hts_setextvar(name, (long int)value, 0);
+}
+
+int hts_setblkvar(char* name, void* value) {
+ return hts_setextvar(name, (long int)value, 1);
+}
+
+int hts_setextvar(char* name, long int value, int flag) {
+#if HTS_WIN
+#else
+ void* thread_val = NULL;
+ hts_varhash* hashtables;
+
+ /*
+ hts_lockvar(); // NO - MUST be protected by caller
+ {
+ */
+ PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash);
+ hashtables = (hts_varhash*) thread_val;
+ if (hashtables) { // XXc XXC hack for win version
+ inthash_write(hashtables->blocks, name, value);
+ }
+#endif
+
+ return 1;
+}
+
+
+int hts_getvar(char* name, long int* ptrvalue) {
+ return hts_getextvar(name, (long int*)ptrvalue, 0);
+}
+
+int hts_getblkvar(char* name, void** ptrvalue) {
+ return hts_getextvar(name, (long int*)ptrvalue, 1);
+}
+
+int hts_getextvar(char* name, long int* ptrvalue, int flag) {
+#if HTS_WIN
+#else
+ void* thread_val = NULL;
+ hts_varhash* hashtables;
+
+ hts_lockvar();
+ {
+ PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash);
+ hashtables = (hts_varhash*) thread_val;
+ /* if (flag) {
+ */
+ inthash_read(hashtables->blocks, name, ptrvalue);
+ /*
+ } else {
+ inthash_read(hashtables->values, name, ptrvalue);
+ }
+ */
+ }
+ hts_unlockvar();
+#endif
+
+ return 1;
+}
+
+long int hts_directgetvar(char* name) {
+ long int value=0;
+ hts_getvar(name, &value);
+ return value;
+}
+
+void* hts_directgetblkvar(char* name) {
+ void* value=NULL;
+ hts_getblkvar(name, &value);
+ return value;
+}
diff --git a/src/htsnostatic.h b/src/htsnostatic.h
new file mode 100644
index 0000000..6dbb072
--- /dev/null
+++ b/src/htsnostatic.h
@@ -0,0 +1,223 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsnostatic.c subroutines: */
+/* thread-safe routines for reentrancy */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+/*
+ Okay, with these routines, the engine should be fully reentrant (thread-safe)
+ All static references have been changed:
+
+ from
+ function foo() {
+ static bartype bar;
+ }
+ to:
+ function foo() {
+ bartype* bar;
+ NOSTATIC_RESERVE(bar, bartype, 1);
+ }
+*/
+
+#ifndef HTSNOSTATIC_DEFH
+#define HTSNOSTATIC_DEFH
+
+#include "htscore.h"
+#include "htsthread.h"
+
+/*
+#if USE_PTHREAD
+#if HTS_WIN
+#undef HTS_REENTRANT
+#else
+#define HTS_REENTRANT
+#endif
+#else
+#undef HTS_REENTRANT
+#endif
+*/
+
+#define HTS_VAR_MAIN_HASH 127
+
+/*
+ MutEx
+*/
+
+
+/* Magic per-thread variables functions
+
+ Example:
+ hts_lockvar();
+ hts_setvar("MyFoo", (long int)(void*)&foo);
+ hts_unlockvar();
+ ..
+ foo=(void*)(long int)hts_directgetvar("MyFoo");
+
+ Do not forget to initialize (hts_initvar()) the library once per thread
+*/
+int hts_initvar(void);
+int hts_freevar(void);
+int hts_resetvar(void);
+int hts_maylockvar(void);
+int hts_lockvar(void);
+int hts_unlockvar(void);
+
+int hts_setvar(char* name, long int value);
+int hts_getvar(char* name, long int* ptrvalue);
+long int hts_directgetvar(char* name);
+
+int hts_setblkvar(char* name, void* value);
+int hts_getblkvar(char* name, void** ptrvalue);
+void* hts_directgetblkvar(char* name);
+
+/* Internal */
+int hts_setextvar(char* name, long int value, int flag);
+int hts_getextvar(char* name, long int* ptrvalue, int flag);
+void hts_destroyvar(void* ptrkey);
+void hts_destroyvar_key(void* adr);
+
+/*
+ Ensure that the variable 'name' has 'nelts' of type 'type' reserved
+ fnc is an UNIQUE function name
+*/
+#define NOSTATIC_RESERVE(name, type, nelt) NOSTATIC_XRESERVE(name, type, nelt)
+
+/*
+ Note:
+ Yes, we first read the localInit flag variable without MutEx protection,
+ for optimization purpose, because the flag is set once initialization DONE.
+ If the first read fails, we *securely* re-check and initialize *if* necessary.
+ The abort() things should NEVER be called, and are here for safety reasons
+*/
+/*
+ function-specific static cKey:
+ cKey = { localKey, localInit }
+ || \
+ \/ \ ==1 upon initialization
+ thread variable
+ ||
+ \/
+ void*
+ ||
+ \/
+ 'thread-static' value
+
+ the function-specific static cKey is also referenced in the global
+ hashtable for free() purpose: (see hts_destroyvar())
+
+ global static key variable
+ 'hts_static_key'
+ ||
+ \/
+ thread variable
+ ||
+ \/
+ void*
+ ||
+ \/
+ hashtable
+ ||
+ \/
+ function-specific hash key
+ ||
+ \/
+ &cKey
+
+*/
+#if HTS_WIN
+
+/* Windows: handled by the compiler */
+#define NOSTATIC_XRESERVE(name, type, nelt) do { \
+ __declspec( thread ) static type thValue[nelt]; \
+ __declspec( thread ) int static initValue = 0; \
+ name = thValue; \
+ if (!initValue) { \
+ initValue = 1; \
+ memset(&thValue, 0, sizeof(thValue)); \
+ } \
+} while(0)
+
+#else
+
+/* Un*x : slightly more complex, we have to create a thread-key */
+typedef struct {
+ PTHREAD_KEY_TYPE localKey;
+ unsigned char localInit;
+} hts_NostaticComplexKey;
+#define NOSTATIC_XRESERVE(name, type, nelt) do { \
+static hts_NostaticComplexKey cKey={0,0}; \
+name = NULL; \
+if ( cKey.localInit ) { \
+ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
+} \
+if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \
+ if (!hts_maylockvar()) { \
+ abort(); \
+ } \
+ hts_lockvar(); \
+ { \
+ { \
+ name = (type *) calloc((nelt), sizeof(type)); \
+ if (name == NULL) { \
+ abort(); \
+ } \
+ { \
+ char elt_name[64+8]; \
+ sprintf(elt_name, #name "_%d", (int) __LINE__); \
+ PTHREAD_KEY_CREATE(&(cKey.localKey), NULL); \
+ hts_setblkvar(elt_name, &cKey); \
+ } \
+ PTHREAD_KEY_SET(cKey.localKey, name, type*); \
+ name = NULL; \
+ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
+ if (name == NULL) { \
+ abort(); \
+ } \
+ if ( ! cKey.localInit ) { \
+ cKey.localInit = 1; \
+ } \
+ } \
+ } \
+ hts_unlockvar(); \
+} \
+else { \
+ PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
+ if (name == NULL) { \
+ abort(); \
+ } \
+} \
+} while(0)
+#endif
+
+#endif
diff --git a/src/htsopt.h b/src/htsopt.h
new file mode 100644
index 0000000..13bc962
--- /dev/null
+++ b/src/htsopt.h
@@ -0,0 +1,186 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: HTTrack parameters block */
+/* Called by httrack.h and some other files */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTTRACK_DEFOPT
+#define HTTRACK_DEFOPT
+
+#include <stdio.h>
+#include "htsbasenet.h"
+#include "htsbauth.h"
+
+// structure proxy
+typedef struct {
+ int active;
+ char name[1024];
+ int port;
+} t_proxy;
+
+/* Structure utile pour copier en bloc les paramètres */
+typedef struct {
+ char*** filters;
+ int* filptr;
+ //int* filter_max;
+} htsfilters;
+
+/* Structure état du miroir */
+typedef struct {
+ int stop;
+} htsoptstate;
+
+
+// paramètres httrack (options)
+typedef struct {
+ int wizard; // wizard aucun/grand/petit
+ int flush; // fflush sur les fichiers log
+ int travel; // type de déplacements (same domain etc)
+ int seeker; // up & down
+ int depth; // nombre de niveaux de récursion
+ int extdepth; // nombre de niveaux de récursion à l'éxtérieur
+ int urlmode; // liens relatifs etc
+ int debug; // mode débug log
+ int getmode; // sauver html, images..
+ FILE* log; // fichier log
+ FILE* errlog; // et erreur
+ LLint maxsite; // taille max site
+ LLint maxfile_nonhtml; // taille max non html
+ LLint maxfile_html; // taille max html
+ int maxsoc; // nbre sockets
+ LLint fragment; // fragmentation d'un site
+ int nearlink; // prendre les images/data proche d'une page mais à l'extérieur
+ int makeindex; // faire un index
+ int kindex; // et un index 'keyword'
+ int delete_old; // effacer anciens fichiers
+ int timeout; // nombre de secondes de timeout
+ int rateout; // nombre d'octets minium pour le transfert
+ int maxtime; // temps max en secondes
+ int maxrate; // taux de transfert max
+ int maxconn; // nombre max de connexions/s
+ int waittime; // démarrage programmé
+ int cache; // génération d'un cache
+ int aff_progress; // barre de progression
+ int shell; // gestion d'un shell par pipe stdin/stdout
+ t_proxy proxy; // configuration du proxy
+ int savename_83; // conversion 8-3 pour les noms de fichiers
+ int savename_type; // type de noms: structure originale/html-images en un seul niveau
+ char savename_userdef[256]; // structure userdef (ex: %h%p/%n%q.%t)
+ int user_agent_send; // user agent (ex: httrack/1.0 [sun])
+ char user_agent[128];
+ char path_log[1024]; // chemin pour cache et log
+ char path_html[1024]; // chemin pour miroir
+ char path_bin[1024]; // chemin pour templates
+ int retry; // nombre d'essais supplémentaires en cas d'échec
+ int makestat; // mettre à jour un fichier log de statistiques de transfert
+ int maketrack; // mettre à jour un fichier log de statistiques d'opérations
+ int parsejava; // parsing des classes java pour récupérer les class, gif & cie
+ int hostcontrol; // abandon d'un host trop lent etc.
+ int errpage; // générer une page d'erreur en cas de 404 etc.
+ int check_type; // si type inconnu (cgi,asp,/) alors tester lien (et gérer moved éventuellement)
+ int all_in_cache; // tout mettre en cache!
+ int robots; // traitement des robots
+ int external; // pages externes->pages d'erreur
+ int passprivacy; // pas de mot de pass dans les liens externes?
+ int includequery; // include la query-string
+ int mirror_first_page; // miroir des liens
+ char sys_com[2048]; // commande système
+ int sys_com_exec; // executer commande
+ int accept_cookie; // gestion des cookies
+ t_cookie* cookie;
+ int http10; // forcer http 1.0
+ int nocompression; // pas de compression
+ int sizehack; // forcer réponse "mis à jour" si taille identique
+ int tolerant; // accepter content-length incorrect
+ int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple)
+ int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur
+ int verbosedisplay; // animation textuelle
+ char footer[256]; // ligne d'infos
+ int maxcache; // maximum en mémoire au niveau du cache (backing)
+ //int maxcache_anticipate; // maximum de liens à anticiper (majorant)
+ int ftp_proxy; // proxy http pour ftp
+ char filelist[1024]; // fichier liste URL à inclure
+ htsfilters filters; // contient les pointeurs pour les filtres
+ void* robotsptr; // robots ptr
+ char lang_iso[64]; // en, fr ..
+ char mimedefs[2048]; // ext1=mimetype1\next2=mimetype2..
+ //
+ int maxlink; // nombre max de liens
+ int maxfilter; // nombre max de filtres
+ //
+ char* exec; // adresse du nom de l'éxecutable
+ //
+ int quiet; // poser des questions autres que wizard?
+ int keyboard; // vérifier stdin
+ //
+ int is_update; // c'est une update (afficher "File updated...")
+ int dir_topindex; // reconstruire top index par la suite
+ //
+ htsoptstate state; // état
+} httrackp;
+
+// stats for httrack
+typedef struct {
+ LLint HTS_TOTAL_RECV; // flux entrant reçu
+ LLint stat_bytes; // octets écrits sur disque
+ // int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
+ TStamp stat_timestart; // départ
+ //
+ LLint total_packed; // flux entrant compressé reçu
+ LLint total_unpacked; // flux entrant compressé reçu
+ int total_packedfiles; // fichiers compressés
+ //
+ TStamp istat_timestart[2]; // départ pour calcul instantanné
+ LLint istat_bytes[2]; // calcul pour instantanné
+ TStamp istat_reference01; // top départ donné par #0 à #1
+ int istat_idlasttimer; // id du timer qui a récemment donné une stat
+ //
+ int stat_files; // nombre de fichiers écrits
+ int stat_updated_files; // nombre de fichiers mis à jour
+ //
+ int stat_nsocket; // nombre de sockets
+ int stat_errors; // nombre d'erreurs
+ int stat_errors_front; // idem, mais au tout premier niveau
+ int stat_warnings; // '' warnings
+ int stat_infos; // '' infos
+ int nbk; // fichiers anticipés en arrière plan et terminés
+ LLint nb; // données transférées actuellement (estimation)
+ //
+ LLint rate;
+} hts_stat_struct;
+
+
+#endif
+
diff --git a/src/htsparse.c b/src/htsparse.c
new file mode 100644
index 0000000..b012a8d
--- /dev/null
+++ b/src/htsparse.c
@@ -0,0 +1,2377 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Main source */
+/* DIRECT INCLUDE TO httrack.c */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#if HTS_ANALYSTE
+if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
+#endif
+ FILE* fp=NULL; // fichier écrit localement
+ char* adr=r.adr; // pointeur (on parcourt)
+ char* lastsaved; // adresse du dernier octet sauvé + 1
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"scan file.."LF); test_flush;
+ }
+
+
+ // Indexing!
+#if HTS_MAKE_KEYWORD_INDEX
+ if (opt.kindex) {
+ if (index_keyword(r.adr,r.size,r.contenttype,savename,opt.path_html)) {
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..done"LF); test_flush;
+ }
+ } else {
+ if ( (opt.debug>1) && (opt.log!=NULL) ) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..error!"LF); test_flush;
+ }
+ }
+ }
+#endif
+
+ // Now, parsing
+ if ((opt.getmode & 1) && (ptr>0)) { // récupérer les html sur disque
+ // créer le fichier html local
+ HT_ADD_FOP; // écrire peu à peu le fichier
+ }
+
+ if (!error) {
+ int detect_title=0; // détection du title
+ //
+ char* in_media=NULL; // in other media type (real media and so..)
+ int intag=0; // on est dans un tag
+ int incomment=0; // dans un <!--
+ int inscript=0; // dans un scipt pour applets javascript)
+ int inscript_tag=0; // on est dans un <body onLoad="... terminé par >
+ char inscript_tag_lastc='\0';
+ // terminaison (" ou ') du "<body onLoad=.."
+ int inscriptgen=0; // on est dans un code générant, ex après obj.write("..
+ char scriptgen_q='\0'; // caractère faisant office de guillemet (' ou ")
+ int no_esc_utf=0; // ne pas echapper chars > 127
+ int nofollow=0; // ne pas scanner
+ //
+ int parseall_lastc='\0'; // dernier caractère parsé pour parseall
+ int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";)
+ //
+ char* intag_start=adr;
+ char* intag_startattr=NULL;
+ int intag_start_valid=0;
+ HT_ADD_START; // débuter
+
+
+ /* statistics */
+ if ((opt.getmode & 1) && (ptr>0)) {
+ /*
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r.size;
+ */
+ }
+
+ /* Primary list or URLs */
+ if (ptr == 0) {
+ intag=1;
+ intag_start_valid=0;
+ }
+ /* Check is the file is a .js file */
+ else if (
+ (strfield2(r.contenttype,"application/x-javascript")!=0)
+ || (strfield2(r.contenttype,"text/css")!=0)
+ ) { /* JavaScript js file */
+ inscript=1;
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"note: this file is a javascript file"LF); test_flush;
+ }
+ }
+ /* Or a real audio */
+ else if (strfield2(r.contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */
+ inscript=intag=1;
+ intag_start_valid=0;
+ in_media="RAM"; // real media!
+ }
+ // Detect UTF8 format
+ if (is_unicode_utf8((unsigned char*) r.adr, (unsigned int) r.size) == 1) {
+ no_esc_utf=1;
+ } else {
+ no_esc_utf=0;
+ }
+ // Hack to prevent any problems with ram files of other files
+ * ( r.adr + r.size ) = '\0';
+
+
+ // ------------------------------------------------------------
+ // analyser ce qu'il y a en mémoire (fichier html)
+ // on scanne les balises
+ // ------------------------------------------------------------
+#if HTS_ANALYSTE
+ _hts_in_html_done=0; // 0% scannés
+ _hts_cancel=0; // pas de cancel
+ _hts_in_html_parsing=1; // flag pour indiquer un parsing
+#endif
+ base[0]='\0'; // effacer base-href
+ lastsaved=adr;
+ do {
+ int p=0;
+ int valid_p=0; // force to take p even if == 0
+ int ending_p='\0'; // ending quote?
+ error=0;
+
+ /* Hack to avoid NULL char problems with C syntax */
+ /* Yes, some bogus HTML pages can embed null chars
+ and therefore can not be properly handled if this hack is not done
+ */
+ if ( ! (*adr) ) {
+ if ( ((int) (adr - r.adr)) < r.size)
+ *adr=' ';
+ }
+
+
+
+ /*
+ index.html built here
+ */
+ // Construction index.html (sommaire)
+ // Avant de tester les a href,
+ // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
+ if (!makeindex_done) { // autoriation d'écrire un index
+ if (!detect_title) {
+ if (opt.depth == liens[ptr]->depth) { // on note toujours les premiers liens
+ if (!in_media) {
+ if (opt.makeindex && (ptr>0)) {
+ if (opt.getmode & 1) { // autorisation d'écrire
+ p=strfield(adr,"title");
+ if (p) {
+ if (*(adr-1)=='/') p=0; // /title
+ } else {
+ if (strfield(adr,"/html"))
+ p=-1; // noter, mais sans titre
+ else if (strfield(adr,"body"))
+ p=-1; // noter, mais sans titre
+ else if ( ((int) (adr - r.adr) ) >= (r.size-1) )
+ p=-1; // noter, mais sans titre
+ else if ( (int) (adr - r.adr) >= r.size - 2) // we got to hurry
+ p=-1; // xxc xxc xxc
+ }
+ } else
+ p=0;
+
+ if (p) { // ok center
+ if (makeindex_fp==NULL) {
+ verif_backblue(opt.path_html); // générer gif
+ makeindex_fp=filecreate(fconcat(opt.path_html,"index.html"));
+ if (makeindex_fp!=NULL) {
+
+ // Header
+ fprintf(makeindex_fp,template_header,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ } else makeindex_done=-1; // fait, erreur
+ }
+
+ if (makeindex_fp!=NULL) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char s[HTS_URLMAXSIZE*2];
+ char* a=NULL;
+ char* b=NULL;
+ s[0]='\0';
+ if (p>0) {
+ a=strchr(adr,'>');
+ if (a!=NULL) {
+ a++;
+ while(is_space(*a)) a++; // sauter espaces & co
+ b=strchr(a,'<'); // prochain tag
+ }
+ }
+ if (lienrelatif(tempo,liens[ptr]->sav,concat(opt.path_html,"index.html"))==0) {
+ detect_title=1; // ok détecté pour cette page!
+ makeindex_links++; // un de plus
+ strcpy(makeindex_firstlink,tempo);
+ //
+ if ((b==a) || (a==NULL) || (b==NULL)) { // pas de titre
+ strcpy(s,tempo);
+ } else if ((b-a)<256) {
+ b--;
+ while(is_space(*b)) b--;
+ strncpy(s,a,b-a+1);
+ *(s+(b-a)+1)='\0';
+ }
+
+ // Body
+ fprintf(makeindex_fp,template_body,
+ tempo,
+ s
+ );
+
+ }
+ }
+ }
+ }
+ }
+
+ } else if (liens[ptr]->depth<opt.depth) { // on a sauté level1+1 et level1
+ HT_INDEX_END;
+ }
+ } // if (opt.makeindex)
+ }
+ // FIN Construction index.html (sommaire)
+ /*
+ end -- index.html built here
+ */
+
+
+
+ /* Parse */
+ if (
+ (*adr=='<') /* No starting tag */
+ && (!inscript) /* Not in (java)script */
+ && (!incomment) /* Not in comment (<!--) */
+ ) {
+ intag=1;
+ parseall_incomment=0;
+ //inquote=0; // effacer quote
+ intag_start=adr; intag_start_valid=1;
+ codebase[0]='\0'; // effacer éventuel codebase
+
+ if (opt.getmode & 1) { // sauver html
+ p=strfield(adr,"</html");
+ if (p==0) p=strfield(adr,"<head>");
+ // if (p==0) p=strfield(adr,"<doctype");
+ if (p) {
+ if (strnotempty(opt.footer)) {
+ char tempo[1024+HTS_URLMAXSIZE*2];
+ char gmttime[256];
+ char* eol="\n";
+ tempo[0]='\0';
+ if (strchr(r.adr,'\r'))
+ eol="\r\n";
+ time_gmt_rfc822(gmttime);
+ strcat(tempo,eol);
+ sprintf(tempo+strlen(tempo),opt.footer,jump_identification(urladr),urlfil,gmttime,"","","","","","","","");
+ strcat(tempo,eol);
+ //fwrite(tempo,1,strlen(tempo),fp);
+ HT_ADD(tempo);
+ }
+ }
+ }
+
+ // éliminer les <!-- (commentaires) : intag dévalidé
+ if (*(adr+1)=='!')
+ if (*(adr+2)=='-')
+ if (*(adr+3)=='-') {
+ intag=0;
+ incomment=1;
+ intag_start_valid=0;
+ }
+
+ }
+ else if (
+ (*adr=='>') /* ending tag */
+ && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */
+ ) {
+ if (inscript_tag) {
+ inscript_tag=inscript=0;
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ } else if (!incomment) {
+ intag=0; //inquote=0;
+
+ // entrée dans du javascript?
+ // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
+ //if (!inscript) { // sinon on est dans un obj.write("..
+ if ((intag_start_valid) &&
+ (
+ check_tag(intag_start,"script")
+ ||
+ check_tag(intag_start,"style")
+ )
+ ) {
+ char* a=intag_start; // <
+ // ** while(is_realspace(*(--a)));
+ if (*a=='<') { // sûr que c'est un tag?
+ inscript=1;
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ }
+ }
+ } else { /* end of comment? */
+ // vérifier fermeture correcte
+ if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ }
+#if GT_ENDS_COMMENT
+ /* wrong comment ending */
+ else {
+ /* check if correct ending does not exists
+ <!-- foo > example <!-- bar > is sometimes accepted by browsers
+ when no --> is used somewhere else.. darn those browsers are dirty
+ */
+ if (!strstr(adr,"-->")) {
+ intag=0;
+ incomment=0;
+ intag_start_valid=0;
+ }
+ }
+#endif
+ }
+ //}
+ }
+ //else if (*adr==34) {
+ // inquote=(inquote?0:1);
+ //}
+ else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag
+ int p_type=0;
+ int p_nocatch=0;
+ int p_searchMETAURL=0; // chercher ..URL=<url>
+ int add_class=0; // ajouter .class
+ int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>"
+ char* p_flush=NULL;
+
+
+ // ------------------------------------------------------------
+ // parsing évolé
+ // ------------------------------------------------------------
+ if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester..
+
+
+ /* caractère de terminaison pour "miniparsing" javascript=.. ?
+ (ex: <a href="javascript:()" action="foo"> ) */
+ if (inscript_tag) {
+ if (inscript_tag_lastc) {
+ if (*adr == inscript_tag_lastc) {
+ /* sortir */
+ inscript_tag=inscript=0;
+ incomment=0;
+ }
+ }
+ }
+
+
+ // Note:
+ // Certaines pages ne respectent pas le html
+ // notamment les guillements ne sont pas fixés
+ // Nous sommes dans un tag, donc on peut faire un test plus
+ // large pour pouvoi prendre en compte ces particularités
+
+ // à vérifier: ACTION, CODEBASE, VRML
+
+ if (in_media) {
+ if (strcmp(in_media,"RAM")==0) { // real media
+ p=0;
+ valid_p=1;
+ }
+ } else if (ptr>0) { /* pas première page 0 (primary) */
+ p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p
+
+ // ------------------------------
+ // détection d'écriture JavaScript.
+ // osons les obj.write et les obj.href=.. ! osons!
+ // note: inscript==1 donc on sautera après les \"
+ if (inscript) {
+ if (inscriptgen) { // on est déja dans un objet générant..
+ if (*adr==scriptgen_q) { // fermeture des " ou '
+ if (*(adr-1)!='\\') { // non
+ inscriptgen=0; // ok parsing terminé
+ }
+ }
+ } else {
+ char* a=NULL;
+ char check_this_fking_line=0; // parsing code javascript..
+ char must_be_terminated=0; // caractère obligatoire de terminaison!
+ int token_size;
+ if (!(token_size=strfield(adr,".writeln"))) // détection ...objet.write[ln]("code html")...
+ token_size=strfield(adr,".write");
+ if (token_size) {
+ a=adr+token_size;
+ while(is_realspace(*a)) a++; // sauter espaces
+ if (*a=='(') { // début parenthèse
+ check_this_fking_line=2; // à parser!
+ must_be_terminated=')';
+ a++; // sauter (
+ }
+ }
+ // euhh ??? ???
+ /* else if (strfield(adr,".href")) { // détection ...objet.href="...
+ a=adr+5;
+ while(is_realspace(*a)) a++; // sauter espaces
+ if (*a=='=') { // ohh un égal
+ check_this_fking_line=1; // à noter!
+ must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder
+ a++; // sauter =
+ }
+
+ }*/
+
+ // on a un truc du genre instruction"code généré" dont on parse le code
+ if (check_this_fking_line) {
+ while(is_realspace(*a)) a++;
+ if ((*a=='\'') || (*a=='"')) { // départ de '' ou ""
+ char *b;
+ int ex=0;
+ scriptgen_q=*a; // quote
+ b=a+1; // départ de la chaîne
+ // vérifier forme ("code") et pas ("code"+var), ingérable
+ do {
+ a++; // caractère suivant
+ if (*a==scriptgen_q) if (*(a-1)!='\\') // quote non slash
+ ex=1; // sortie
+ if ((*a==10) || (*a==13))
+ ex=1;
+ } while(!ex);
+ if (*a==scriptgen_q) { // fin du quote
+ a++;
+ while(is_realspace(*a)) a++;
+ if (*a==must_be_terminated) { // parenthèse fermante: ("..")
+
+ // bon, on doit parser une ligne javascript
+ // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
+ // on fixe p sur le saut nécessaire pour atteindre le nom du fichier
+ // et le moteur se débrouillera ensuite tout seul comme un grand
+ // 2) si check==2 c'est un peu plus tordu car là on génére du
+ // code html au sein de code javascript au sein de code html
+ // dans ce cas on doit fixer un flag à un puis ensuite dans la boucle
+ // on devra parser les instructions standard comme <a href etc
+ // NOTE: le code javascript autogénéré n'est pas pris en compte!!
+ // (et ne marche pas dans 50% des cas de toute facon!)
+ if (check_this_fking_line==1) {
+ p=(int) (b - adr); // calculer saut!
+ } else {
+ inscriptgen=1; // SCRIPTGEN actif
+ adr=b; // jump
+ }
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncat(str,b,minimum((int) (a - b + 1), 32));
+ fspc(opt.log,"debug"); fprintf(opt.log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
+ }
+ }
+
+ }
+
+ }
+
+
+ }
+ }
+ }
+ // fin detection code générant javascript vers html
+ // ------------------------------
+
+
+ // analyse proprement dite, A HREF=.. etc..
+ if (!p) {
+ // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
+ if ((intag && (!inscript)) || inscriptgen) {
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ // <A HREF=.. pour les liens HTML
+ p=rech_tageq(adr,"href");
+ if (p) { // href.. tester si c'est une bas href!
+ if ((intag_start_valid) && check_tag(intag_start,"base")) { // oui!
+ // ** note: base href et codebase ne font pas bon ménage..
+ p_type=2; // c'est un chemin
+ }
+ }
+
+ /* Tags supplémentaires à vérifier (<img src=..> etc) */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detect[i])) ) {
+ p=rech_tageq(adr,hts_detect[i]);
+ i++;
+ }
+ }
+
+ /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
+ p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
+ i++;
+ }
+ }
+
+ /* Tags supplémentaires à vérifier : URL=.. */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
+ p=rech_tageq(adr,hts_detectURL[i]);
+ i++;
+ }
+ if (p)
+ p_searchMETAURL=1;
+ }
+
+ /* Tags supplémentaires à vérifier, mais à ne pas capturer */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
+ p=rech_tageq(adr,hts_detectandleave[i]);
+ i++;
+ }
+ if (p)
+ p_nocatch=1; /* ne pas rechercher */
+ }
+
+ /* Evénements */
+ if (p==0) {
+ int i=0;
+ /* détection onLoad etc */
+ while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
+ p=rech_tageq(adr,hts_detect_js[i]);
+ i++;
+ }
+ /* non détecté - détecter également les onXxxxx= */
+ if (p==0) {
+ if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
+ p=0;
+ while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
+ if (p<64) {
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=')
+ p++;
+ else p=0;
+ } else p=0;
+ }
+ }
+ /* OK, événement repéré */
+ if (p) {
+ inscript_tag_lastc=*(adr+p); /* à attendre à la fin */
+ adr+=p; /* saut */
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_tag=inscript=1;
+ }
+ p=0; /* quoi qu'il arrive, ne rien démarrer ici */
+ }
+
+ // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
+ if (p==0) {
+ p=rech_tageq(adr,"code");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-1; // juste le nom de fichier+dossier, écire avant codebase
+ add_class=1; // ajouter .class au besoin
+
+ // vérifier qu'il n'y a pas de codebase APRES
+ // sinon on swappe les deux.
+ // pas très propre mais c'est ce qu'il y a de plus simple à faire!!
+
+ {
+ char *a;
+ a=adr;
+ while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
+ if (rech_tageq(a,"codebase")) { // banzai! codebase=
+ char* b;
+ b=strchr(a,'>');
+ if (b) {
+ if (((int) (b - adr)) < 1000) { // au total < 1Ko
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,a,(int) (b - a) );
+ strcat( tempo," ");
+ strncat(tempo,adr,(int) (a - adr - 1));
+ // éventuellement remplire par des espaces pour avoir juste la taille
+ while((int) strlen(tempo)<((int) (b - adr)))
+ strcat(tempo," ");
+ // pas d'erreur?
+ if ((int) strlen(tempo) == ((int) (b - adr) )) {
+ strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin!
+ p=0; // DEVALIDER!!
+ p_type=0;
+ add_class=0;
+ }
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+
+ // liens à patcher mais pas à charger (ex: codebase)
+ if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même
+ p=rech_tageq(adr,"codebase");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-2;
+ } else p=-1; // ne plus chercher
+ }
+ }
+
+
+ // Meta tags pour robots
+ if (p==0) {
+ if (opt.robots) {
+ if ((intag_start_valid) && check_tag(intag_start,"meta")) {
+ if (rech_tageq(adr,"name")) { // name=robots.txt
+ char tempo[1100];
+ char* a;
+ tempo[0]='\0';
+ a=strchr(adr,'>');
+#if DEBUG_ROBOTS
+ printf("robots.txt meta tag detected\n");
+#endif
+ if (a) {
+ if (((int) (a - adr)) < 999 ) {
+ strncat(tempo,adr,(int) (a - adr));
+ if (strstrcase(tempo,"content")) {
+ if (strstrcase(tempo,"robots")) {
+ if (strstrcase(tempo,"nofollow")) {
+#if DEBUG_ROBOTS
+ printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
+#endif
+ nofollow=1; // NE PLUS suivre liens dans cette page
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // entrée dans une applet javascript
+ /*if (!inscript) { // sinon on est dans un obj.write("..
+ if (p==0)
+ if (rech_sampletag(adr,"script"))
+ if (check_tag(intag_start,"script")) {
+ inscript=1;
+ }
+ }*/
+
+ // Ici on procède à une analyse du code javascript pour tenter de récupérer
+ // certains fichiers évidents.
+ // C'est devenu obligatoire vu le nombre de pages qui intègrent
+ // des images réactives par exemple
+ }
+ } else if (inscript) {
+ if (
+ (
+ (strfield(adr,"/script"))
+ ||
+ (strfield(adr,"/style"))
+ )
+ ) {
+ char* a=adr;
+ //while(is_realspace(*(--a)));
+ while( is_realspace(*a) ) a--;
+ a--;
+ if (*a=='<') { // sûr que c'est un tag?
+ inscript=0;
+ }
+ } else {
+ /*
+ Script Analyzing - different types supported:
+ foo="url"
+ foo("url") or foo(url)
+ foo "url"
+ */
+ int nc;
+ char expected = '='; // caractère attendu après
+ char* expected_end = ";";
+ int can_avoid_quotes=0;
+ char quotes_replacement='\0';
+ if (inscript_tag)
+ expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'"
+ nc = strfield(adr,".src"); // nom.src="image";
+ if (!nc) nc = strfield(adr,".location"); // document.location="doc"
+ if (!nc) nc = strfield(adr,".href"); // document.location="doc"
+ if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
+ expected='('; // parenthèse
+ expected_end="),"; // fin: virgule ou parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ }
+ if (!nc) if ( (nc = strfield(adr,"url")) ) { // url(url)
+ expected='('; // parenthèse
+ expected_end=")"; // fin: parenthèse
+ can_avoid_quotes=1;
+ quotes_replacement=')';
+ }
+ if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
+ if (is_space(*(adr+nc))) {
+ expected=0; // no char expected
+ } else
+ nc=0;
+ }
+ if (nc) {
+ char *a;
+ a=adr+nc;
+ while(is_realspace(*a)) a++;
+ if ((*a == expected) || (!expected)) {
+ if (expected)
+ a++;
+ while(is_realspace(*a)) a++;
+ if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
+ char *b,*c;
+ int ndelim=1;
+ if ((*a==34) || (*a=='\''))
+ a++;
+ else
+ ndelim=0;
+ b=a;
+ if (ndelim) {
+ while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
+ }
+ else {
+ while((*b != quotes_replacement) && (*b!='\0')) b++;
+ }
+ c=b--; c+=ndelim;
+ while(*c==' ') c++;
+ if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
+ c-=(ndelim+1);
+ if ((int) (c - a + 1)) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncat(str,a,minimum((int) (c - a + 1),32));
+ fspc(opt.log,"debug"); fprintf(opt.log,"link detected in javascript: %s"LF,str); test_flush;
+ }
+ p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
+ if (can_avoid_quotes) {
+ ending_p=quotes_replacement;
+ }
+ }
+ }
+
+
+ }
+ }
+ }
+
+ }
+ }
+ }
+
+ } else { // ptr == 0
+ //p=rech_tageq(adr,"primary"); // lien primaire, yeah
+ p=0; // No stupid tag anymore, raw link
+ valid_p=1; // Valid even if p==0
+ while ((adr[p] == '\r') || (adr[p] == '\n'))
+ p++;
+ //can_avoid_quotes=1;
+ ending_p='\r';
+ }
+
+ } else if (isspace((unsigned char)*adr)) {
+ intag_startattr=adr+1; // attribute in tag (for dirty parsing)
+ }
+
+
+ // ------------------------------------------------------------
+ // dernier recours - parsing "sale" : détection systématique des .gif, etc.
+ // risque: générer de faux fichiers parazites
+ // fix: ne parse plus dans les commentaires
+ // ------------------------------------------------------------
+ if ( (opt.parseall) && (ptr>0) && (!in_media) ) { // option parsing "brut"
+ int incomment_justquit=0;
+ if (!is_realspace(*adr)) {
+ int noparse=0;
+
+ // Gestion des /* */
+ if (inscript) {
+ if (parseall_incomment) {
+ if ((*adr=='/') && (*(adr-1)=='*'))
+ parseall_incomment=0;
+ incomment_justquit=1; // ne pas noter dernier caractère
+ } else {
+ if ((*adr=='/') && (*(adr+1)=='*'))
+ parseall_incomment=1;
+ }
+ } else
+ parseall_incomment=0;
+
+ /* vérifier que l'on est pas dans un <!-- --> pur */
+ if ( (!intag) && (incomment) && (!inscript))
+ noparse=1; /* commentaire */
+
+ // recherche d'URLs
+ if ((!parseall_incomment) && (!noparse)) {
+ if (!p) { // non déja trouvé
+ if (adr != r.adr) { // >1 caractère
+ // scanner les chaines
+ if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif'
+ if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif..
+ char *a=adr;
+ char stop=*adr; // " ou '
+ int count=0;
+
+ // sauter caractères
+ a++;
+ // copier
+ while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
+
+ // ok chaine terminée par " ou '
+ if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
+ char c;
+ char* aend;
+ //
+ aend=a; // sauver début
+ a++;
+ while(is_taborspace(*a)) a++;
+ c=*a;
+ if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif";
+ // le / est pour funct("img.gif" /* URL */);
+ char tempo[HTS_URLMAXSIZE*2];
+ char type[256];
+ int url_ok=0; // url valide?
+ tempo[0]='\0'; type[0]='\0';
+ //
+ strncat(tempo,adr+1,count);
+ //
+ if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
+ int invalid_url=0;
+
+ // escape
+ unescape_amp(tempo);
+
+ // Couper au # ou ? éventuel
+ {
+ char* a=strchr(tempo,'#');
+ if (a)
+ *a='\0';
+ a=strchr(tempo,'?');
+ if (a)
+ *a='\0';
+ }
+
+ // vérifier qu'il n'y a pas de caractères spéciaux
+ if (!strnotempty(tempo))
+ invalid_url=1;
+ else if (strchr(tempo,'*')
+ || strchr(tempo,'<')
+ || strchr(tempo,'>'))
+ invalid_url=1;
+
+ /* non invalide? */
+ if (!invalid_url) {
+ // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
+ if (c!='+') { // PAS de plus à la fin
+ char* a;
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème
+ if (
+ (strfield(tempo,"http:"))
+ || (strfield(tempo,"ftp:"))
+#if HTS_USEOPENSSL
+ || (strfield(tempo,"https:"))
+#endif
+ ) // ok pas de problème
+ url_ok=1;
+ else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok..
+ if (inscript) // sinon si pas javascript, méfiance (répertoire style base?)
+ url_ok=1;
+ } else if ((a=strchr(tempo,'/'))) { // un slash: ok..
+ if (inscript) { // sinon si pas javascript, méfiance (style "text/css")
+ if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css')
+ url_ok=1;
+ }
+ }
+ }
+ // Prendre si extension reconnue
+ if (!url_ok) {
+ get_httptype(type,tempo,0);
+ if (strnotempty(type)) // type reconnu!
+ url_ok=1;
+ else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp..
+ url_ok=1;
+ // MAIS pas les foobar@aol.com !!
+ if (strchr(tempo,'@'))
+ url_ok=0;
+ }
+ //
+ // Ok, cela pourrait être une URL
+ if (url_ok) {
+
+ // Check if not fodbidden tag (id,name..)
+ if (intag_start_valid) {
+ if (intag_start)
+ if (intag_startattr)
+ if (intag)
+ if (!inscript)
+ if (!incomment) {
+ int i=0,nop=0;
+ while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
+ nop=rech_tageq(intag_startattr,hts_nodetect[i]);
+ i++;
+ }
+ // Forbidden tag
+ if (nop) {
+ url_ok=0;
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
+ }
+ }
+ }
+ }
+
+
+ // Accepter URL, on la traitera comme une URL normale!!
+ if (url_ok)
+ p=1;
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ } // p == 0
+
+ // plus dans un commentaire
+ if (!incomment_justquit)
+ parseall_lastc=*adr; // caractère avant le prochain
+
+ } // not in comment
+
+ } // if realspace
+ } // if parseall
+
+
+ // ------------------------------------------------------------
+ // p!=0 : on a repéré un éventuel lien
+ // ------------------------------------------------------------
+ //
+ if ((p>0) || (valid_p)) { // on a repéré un lien
+ //int lien_valide=0;
+ char* eadr=NULL; /* fin de l'URL */
+ char* quote_adr=NULL; /* adresse du ? dans l'adresse */
+ int ok=1;
+ char quote='\0';
+
+ // si nofollow ou un stop a été déclenché, réécrire tous les liens en externe
+ if ((nofollow) || (opt.state.stop))
+ p_nocatch=1;
+
+ // écrire codebase avant, flusher avant code
+ if ((p_type==-1) || (p_type==-2)) {
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD_ADR; // refresh
+ }
+ lastsaved=adr; // dernier écrit+1
+ }
+
+ // sauter espaces
+ adr+=p;
+ while((is_space(*adr)) && (quote=='\0')) {
+ if (!quote)
+ if ((*adr=='\"') || (*adr=='\''))
+ quote=*adr; // on doit attendre cela à la fin
+ // puis quitter
+ adr++; // sauter les espaces, "" et cie
+ }
+
+ /* Stop at \n (LF) if primary links*/
+ if (ptr == 0)
+ quote='\n';
+ /* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
+ else if (inscript)
+ quote='\0';
+
+ // sauter éventuel \" ou \' javascript
+ if (inscript) { // on est dans un obj.write("..
+ if (*adr=='\\') {
+ if ((*(adr+1)=='\'') || (*(adr+1)=='"')) { // \" ou \'
+ adr+=2; // sauter
+ }
+ }
+ }
+
+ // sauter content="1;URL=http://..
+ if (p_searchMETAURL) {
+ int l=0;
+ while(
+ (adr + l + 4 < r.adr + r.size)
+ && (!strfield(adr+l,"URL="))
+ && (l<128) ) l++;
+ if (!strfield(adr+l,"URL="))
+ ok=-1;
+ else
+ adr+=(l+4);
+ }
+
+ /* éviter les javascript:document.location=.. : les parser, plutôt */
+ if (ok!=-1) {
+ if (strfield(adr,"javascript:")) {
+ ok=-1;
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_tag=inscript=1;
+ inscript_tag_lastc=quote; /* à attendre à la fin */
+ }
+ }
+
+ if (p_type==1) {
+ if (*adr=='#') {
+ adr++; // sauter # pour usemap etc
+ }
+ }
+ eadr=adr;
+
+ // ne pas flusher après code si on doit écrire le codebase avant!
+ if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD_ADR; // refresh
+ }
+ lastsaved=adr; // dernier écrit+1
+ // après on écrira soit les données initiales,
+ // soir une URL/lien modifié!
+ } else if (p_type==-1) p_flush=adr; // flusher jusqu'à adr ensuite
+
+ if (ok!=-1) { // continuer
+ // découper le lien
+ do {
+ if ((* (unsigned char*) eadr)<32) { // caractère de contrôle (ou \0)
+ if (!is_space(*eadr))
+ ok=0;
+ }
+ if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path)
+ ok=-1; // ne pas traiter ce lien
+
+ if (ok > 0) {
+ //if (*eadr!=' ') {
+ if (is_space(*eadr)) { // guillemets,CR, etc
+ if ((!quote) || (*eadr==quote)) // si pas d'attente de quote spéciale ou si quote atteinte
+ ok=0;
+ } else if (ending_p && (*eadr==ending_p))
+ ok=0;
+ else {
+ switch(*eadr) {
+ case '>':
+ if (!quote) {
+ if (!inscript) {
+ intag=0; // PLUS dans un tag!
+ intag_start_valid=0;
+ }
+ ok=0;
+ }
+ break;
+ /*case '<':*/
+ case '#':
+ if (*(eadr-1) != '&') // &#40;
+ ok=0;
+ break;
+ // case '?': non!
+ case '\\': if (inscript) ok=0; break; // \" ou \' point d'arrêt
+ case '?': quote_adr=adr; break; // noter position query
+ }
+ }
+ //}
+ }
+ eadr++;
+ } while(ok==1);
+
+ // Empty link detected
+ if ( (((int) (eadr - adr))) <= 1) { // link empty
+ ok=-1; // No
+ if (*adr != '#') { // Not empty+unique #
+ if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
+ if (quote) {
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD("#"); // We add this for a <href="">
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ if (ok==0) { // tester un lien
+ char lien[HTS_URLMAXSIZE*2];
+ int meme_adresse=0; // 0 par défaut pour primary
+ //char *copie_de_adr=adr;
+ //char* p;
+
+ // construire lien (découpage)
+ if ( (((int) (eadr - adr))-1) < HTS_URLMAXSIZE ) { // pas trop long?
+ strncpy(lien,adr,((int) (eadr - adr))-1);
+ *(lien+ (((int) (eadr - adr)))-1 )='\0';
+ //printf("link: %s\n",lien);
+ // supprimer les espaces
+ while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
+
+
+#if HTS_STRIP_DOUBLE_SLASH
+ // supprimer les // en / (sauf pour http://)
+ {
+ char *a,*p,*q;
+ int done=0;
+ a=strchr(lien,':'); // http://
+ if (a) {
+ a++;
+ while(*a=='/') a++; // position après http://
+ } else {
+ a=lien; // début
+ while(*a=='/') a++; // position après http://
+ }
+ q=strchr(a,'?'); // ne pas traiter après '?'
+ if (!q)
+ q=a+strlen(a)-1;
+ while(( p=strstr(a,"//")) && (!done) ) { // remplacer // par /
+ if ((int) p>(int) q) { // après le ? (toto.cgi?param=1//2.3)
+ done=1; // stopper
+ } else {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,a,(int) p - (int) a);
+ strcat (tempo,p+1);
+ strcpy(a,tempo); // recopier
+ }
+ }
+ }
+#endif
+
+ } else
+ lien[0]='\0'; // erreur
+
+ // ------------------------------------------------------
+ // Lien repéré et extrait
+ if (strnotempty(lien)>0) { // construction du lien
+ char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2]; // ATTENTION adr cache le "vrai" adr
+ int forbidden_url=-1; // lien non interdit (mais non autorisé..)
+ int just_test_it=0; // mode de test des liens
+ int set_prio_to=0; // pour capture de page isolée
+ int import_done=0; // lien importé (ne pas scanner ensuite *à priori*)
+ //
+ adr[0]='\0'; fil[0]='\0';
+ //
+ // 0: autorisé
+ // 1: interdit (patcher tout de même adresse)
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link detected in html: %s"LF,lien); test_flush;
+ }
+
+ // external check
+#if HTS_ANALYSTE
+ if (!hts_htmlcheck_linkdetected(lien)) {
+ error=1; // erreur
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s refused by external wrapper"LF,lien);
+ test_flush;
+ }
+ }
+#endif
+
+ // purger espaces de début et fin, CR,LF résiduels
+ // (IMG SRC="foo.<\n>gif")
+ {
+ char* a;
+ while (is_realspace(lien[0])) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcpy(tempo,lien+1);
+ strcpy(lien,tempo);
+ }
+ while(strnotempty(lien)
+ && (is_realspace(lien[max(0,(int)(strlen(lien))-1)])) ) {
+ lien[strlen(lien)-1]='\0';
+ }
+ while ((a=strchr(lien,'\n'))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,lien,(int) (a - lien));
+ strcat(tempo,a+1);
+ strcpy(lien,tempo);
+ }
+ while ((a=strchr(lien,'\r'))) {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,lien,(int) (a - lien));
+ strcat(tempo,a+1);
+ strcpy(lien,tempo);
+ }
+ }
+
+ /* Unescape/escape %20 and other &nbsp; */
+ {
+ char query[HTS_URLMAXSIZE*2];
+ char* a=strchr(lien,'?');
+ if (a) {
+ strcpy(query,a);
+ *a='\0';
+ } else
+ query[0]='\0';
+ // conversion &amp; -> & et autres joyeusetés
+ unescape_amp(lien);
+ unescape_amp(query);
+ // décoder l'inutile (%2E par exemple) et coder espaces
+ // XXXXXXXXXXXXXXXXX strcpy(lien,unescape_http(lien));
+ strcpy(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
+ escape_spc_url(lien);
+ strcat(lien,query); /* restore */
+ }
+
+ // convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance!
+ {
+ char* a=jump_identification(lien);
+ while( (a=strchr(a,'\\')) ) *a='/';
+ }
+
+ // supprimer le(s) ./
+ while ((lien[0]=='.') && (lien[1]=='/')) {
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(tempo,lien+2);
+ strcpy(lien,tempo);
+ }
+ if (strnotempty(lien)==0) // sauf si plus de nom de fichier
+ strcpy(lien,"./");
+
+ // vérifie les /~machin -> /~machin/
+ // supposition dangereuse?
+ // OUI!!
+#if HTS_TILDE_SLASH
+ if (lien[strlen(lien)-1]!='/') {
+ char *a=lien+strlen(lien)-1;
+ // éviter aussi index~1.html
+ while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
+ if (*a=='~') {
+ strcat(lien,"/"); // ajouter slash
+ }
+ }
+#endif
+
+ // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
+ // yes, this is dirty
+ // but I'm so lazzy..
+ // and besides the java "code" convention is really a pain in html code
+ if (p_type==-1) {
+ char* a=strrchr(lien,'.');
+ add_class_dots_to_patch=0;
+ if (a) {
+ char* b;
+ do {
+ b=strchr(lien,'.');
+ if ((b != a) && (b)) {
+ add_class_dots_to_patch++;
+ *b='/';
+ }
+ } while((b != a) && (b));
+ }
+ }
+
+ // éliminer les éventuels :80 (port par défaut!)
+ if (link_has_authority(lien)) {
+ char * a;
+ a=strstr(lien,"//"); // "//" authority
+ if (a)
+ a+=2;
+ else
+ a=lien;
+ // while((*a) && (*a!='/') && (*a!=':')) a++;
+ a=jump_toport(a);
+ if (a) { // port
+ int port=0;
+ int defport=80;
+ char* b=a+1;
+#if HTS_USEOPENSSL
+ // FIXME
+ //if (strfield(adr, "https:")) {
+ //}
+#endif
+ while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
+ if (port==defport) { // port 80, default - c'est débile
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncat(tempo,lien,(int) (a - lien));
+ strcat(tempo,a+3); // sauter :80
+ strcpy(lien,tempo);
+ }
+ }
+ }
+
+ // filtrer les parazites (mailto & cie)
+ /*
+ if (strfield(lien,"mailto:")) { // ne pas traiter
+ error=1;
+ } else if (strfield(lien,"news:")) { // ne pas traiter
+ error=1;
+ }
+ */
+
+ // vérifier que l'on ne doit pas ajouter de .class
+ if (!error) {
+ if (add_class) {
+ char *a = lien+strlen(lien)-1;
+ while(( a > lien) && (*a!='/') && (*a!='.')) a--;
+ if (*a != '.')
+ strcat(lien,".class"); // ajouter .class
+ else if (!strfield2(a,".class"))
+ strcat(lien,".class"); // idem
+ }
+ }
+
+ // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/)
+ if (!error) {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"position link check %s"LF,lien); test_flush;
+ }
+
+ if ((p_type==2) || (p_type==-2)) { // code ou codebase
+ // Vérifier les codebase=applet (au lieu de applet/)
+ if (p_type==-2) { // codebase
+ if (strnotempty(lien)) {
+ if (fil[strlen(lien)-1]!='/') { // pas répertoire
+ strcat(lien,"/");
+ }
+ }
+ }
+ /* only one ending / (bug on some pages) */
+ if ((int)strlen(lien)>2) {
+ while( (lien[strlen(lien)-2]=='/') && ((int)strlen(lien)>2) ) /* double // (bug) */
+ lien[strlen(lien)-1]='\0';
+ }
+ // copier nom host si besoin est
+ if (!link_has_authority(lien)) { // pas de http://
+ char adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2]; // ** euh ident_url_relatif??
+ if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {
+ error=1;
+ } else {
+ strcpy(lien,"http://");
+ strcat(lien,adr2);
+ if (*fil2!='/')
+ strcat(lien,"/");
+ strcat(lien,fil2);
+ {
+ char* a;
+ a=lien+strlen(lien)-1;
+ while((*a) && (*a!='/') && ( a> lien)) a--;
+ if (*a=='/') {
+ *(a+1)='\0';
+ }
+ }
+ //char tempo[HTS_URLMAXSIZE*2];
+ //strcpy(tempo,"http://");
+ //strcat(tempo,urladr); // host
+ //if (*lien!='/')
+ // strcat(tempo,"/");
+ //strcat(tempo,lien);
+ //strcpy(lien,tempo);
+ }
+ }
+
+ if (!error) { // pas d'erreur?
+ if (p_type==2) { // code ET PAS codebase
+ char* a=lien+strlen(lien)-1;
+ while( (a > lien) && (*a) && (*a!='/')) a--;
+ if (*a=='/') // ok on a repéré le dernier /
+ *(a+1)='\0'; // couper
+ else {
+ *lien='\0'; // éliminer
+ error=1; // erreur, ne pas poursuivre
+ }
+ }
+
+ // stocker base ou codebase?
+ switch(p_type) {
+ case 2: {
+ //if (*lien!='/') strcat(base,"/");
+ strcpy(base,lien);
+ }
+ break; // base
+ case -2: {
+ //if (*lien!='/') strcat(codebase,"/");
+ strcpy(codebase,lien);
+ }
+ break; // base
+ }
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"code/codebase link %s base %s"LF,lien,base); test_flush;
+ }
+ //printf("base code: %s - %s\n",lien,base);
+ }
+
+ } else {
+ char* _base;
+ if (p_type==-1) // code (applet)
+ _base=codebase;
+ else
+ _base=base;
+
+
+ // ajouter chemin de base href..
+ if (strnotempty(_base)) { // considérer base
+ if (!link_has_authority(lien)) { // non absolue
+ //if (*lien!='/') { // non absolu sur le site (/)
+ if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
+ // mailto: and co: do NOT add base
+ if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
+ char tempo[HTS_URLMAXSIZE*2];
+ // base est absolue
+ strcpy(tempo,_base);
+ strcat(tempo,lien + ((*lien=='/')?1:0) );
+ strcpy(lien,tempo); // patcher en considérant base
+ // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..)
+
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link modified with code/codebase %s"LF,lien); test_flush;
+ }
+ }
+ } else {
+ error=1; // erreur
+ if (opt.errlog) {
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s too long with base href"LF,lien);
+ test_flush;
+ }
+ }
+ //}
+ }
+ }
+
+
+ }
+ }
+
+
+ // transformer lien quelconque (http, relatif, etc) en une adresse
+ // et un chemin+fichier (adr,fil)
+ if (!error) {
+ int reponse;
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"build relative link %s with %s%s"LF,lien,urladr,urlfil); test_flush;
+ }
+ if ((reponse=ident_url_relatif(lien,urladr,urlfil,adr,fil))<0) {
+ adr[0]='\0'; // erreur
+ if (reponse==-2) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s not caught (unknown ftp:// protocol)"LF,lien);
+ test_flush;
+ }
+ } else {
+ if ((opt.debug>1) && (opt.errlog!=NULL)) {
+ fspc(opt.errlog,"debug"); fprintf(opt.errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,urladr,urlfil); test_flush;
+ }
+ }
+ }
+ } else {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link %s not build, error detected before"LF,lien); test_flush;
+ }
+ adr[0]='\0';
+ }
+
+#if HTS_CHECK_STRANGEDIR
+ // !ATTENTION!
+ // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin)
+ // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire
+ // et un fichier en http A PRIORI : je fais donc un test
+ // En cas de moved xxx, on recalcule adr et fil, tout simplement
+ // DEFAUT: test effectué plusieurs fois! à revoir!!!
+ if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
+ //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
+ if (fil[strlen(fil)-1]!='/') { // pas répertoire
+ if (ishtml(fil)==-2) { // pas d'extension
+ char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position
+ loc[0]='\0';
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link-check-directory: %s%s"LF,adr,fil);
+ test_flush;
+ }
+
+ // tester éventuelle nouvelle position
+ switch (http_location(adr,fil,loc).statuscode) {
+ case 200: // ok au final
+ if (strnotempty(loc)) { // a changé d'adresse
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
+ test_flush;
+ }
+
+ // recalculer adr et fil!
+ if (ident_url_absolute(loc,adr,fil)==-1) {
+ adr[0]='\0'; // cancel
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link-check-dir: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+
+ }
+ break;
+ case -2: case -3: // timeout ou erreur grave
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
+ test_flush;
+ }
+
+ break;
+ }
+
+ }
+ }
+ }
+#endif
+
+ // Le lien doit juste être réécrit, mais ne doit pas générer un lien
+ // exemple: <FORM ACTION="url_cgi">
+ if (p_nocatch) {
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link forced external at %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if ((p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens,
+ adr,fil,
+ &filters,&filptr,opt.maxfilter,
+ &robots,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard link test: %d"LF,forbidden_url);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ // calculer meme_adresse
+ meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
+
+
+
+ // Début partie sauvegarde
+
+ // ici on forme le nom du fichier à sauver, et on patche l'URL
+ if (adr[0]!='\0') {
+ // savename: simplifier les ../ et autres joyeusetés
+ char save[HTS_URLMAXSIZE*2];
+ int r_sv=0;
+ // En cas de moved, adresse première
+ char former_adr[HTS_URLMAXSIZE*2];
+ char former_fil[HTS_URLMAXSIZE*2];
+ //
+ save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
+ //
+
+ // nom du chemin à sauver si on doit le calculer
+ // note: url_savename peut décider de tester le lien si il le trouve
+ // suspect, et modifier alors adr et fil
+ // dans ce cas on aura une référence directe au lieu des traditionnels
+ // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers
+ // gif sont impliqués par exemple)
+ if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase
+ if (forbidden_url!=1) {
+ char last_adr[HTS_URLMAXSIZE*2];
+ last_adr[0]='\0';
+ //char last_fil[HTS_URLMAXSIZE*2]="";
+ strcpy(last_adr,adr); // ancienne adresse
+ //strcpy(last_fil,fil); // ancien chemin
+ r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe);
+ if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
+
+ // 2e test si moved
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"wizard moved link retest at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens,
+ adr,fil,
+ &filters,&filptr,opt.maxfilter,
+ &robots,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard moved link retest: %d"LF,forbidden_url);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ //import_done=1; // c'est un import!
+ meme_adresse=0; // on a changé
+ }
+ } else {
+ strcpy(save,""); // dummy
+ }
+ }
+ if (r_sv!=-1) { // pas d'erreur, on continue
+ /* log */
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug");
+ if (forbidden_url!=1) { // le lien va être chargé
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien
+ fprintf(opt.log,"Code/Codebase: %s%s"LF,adr,fil);
+ } else if ((opt.getmode & 4)==0) {
+ fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save);
+ } else {
+ if (!ishtml(fil))
+ fprintf(opt.log,"Record after: %s%s -> %s"LF,adr,fil,save);
+ else
+ fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save);
+ }
+ } else
+ fprintf(opt.log,"External: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ /* FIN log */
+
+ // écrire lien
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter
+ lastsaved=eadr-1+1; // sauter "
+ }
+ /* */
+ else if (opt.urlmode==0) { // URL absolue dans tous les cas
+ if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncat(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ }
+ }
+
+ if (!opt.passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ /* */
+ } else if (opt.urlmode >= 4) { // ne rien faire dans tous les cas!
+ /* */
+ /* leave the link 'as is' */
+ /* Sinon, dépend de interne/externe */
+ } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe!
+ if ((opt.getmode & 1) && (ptr>0)) {
+ if (p_type!=-1) { // pas que le nom de fichier (pas classe java)
+ if (!opt.external) {
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncat(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ if (!opt.passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ }
+ //
+ } else { // fichier/page externe, mais on veut générer une erreur
+ //
+ int patch_it=0;
+ int add_url=0;
+ char* cat_name=NULL;
+ char* cat_data=NULL;
+ int cat_nb=0;
+ int cat_data_len=0;
+
+ // ajouter lien external
+ switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) {
+ case 1: case -2: // html ou répertoire
+ if (opt.getmode & 1) { // sauver html
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ }
+ break;
+ default: // inconnu
+ // asp, cgi..
+ if (is_dyntype(get_ext(fil))) {
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ } else if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif"))
+ || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg"))
+ || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm"))
+ || (ishtml(fil)!=0) ) {
+ patch_it=1; // redirect
+ add_url=1; // avec link aussi
+ cat_name="external.gif";
+ cat_nb=1;
+ cat_data=HTS_DATA_UNKNOWN_GIF;
+ cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
+ }
+ break;
+ }// html,gif
+
+ if (patch_it) {
+ char save[HTS_URLMAXSIZE*2];
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpy(save,opt.path_html);
+ strcat(save,cat_name);
+ if (lienrelatif(tempo,save,savename)==0) {
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else
+ escape_uri_utf(tempo); // escape with %xx
+ HT_ADD(tempo); // page externe
+ if (add_url) {
+ HT_ADD("?link="); // page externe
+
+ // same as above
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncat(tmp, adr, (int) (aut - adr) + 2); // scheme
+ HT_ADD(tmp);
+ if (!opt.passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ }
+ //
+
+ }
+ }
+
+ // écrire fichier?
+ if (verif_external(cat_nb,1)) {
+ //if (!fexist(fconcat(opt.path_html,cat_name))) {
+ FILE* fp = filecreate(fconcat(opt.path_html,cat_name));
+ if (fp) {
+ if (cat_data_len==0) { // texte
+ verif_backblue(opt.path_html);
+ fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
+ } else { // data
+ fwrite(cat_data,cat_data_len,1,fp);
+ }
+ fclose(fp);
+ usercommand(0,NULL,fconcat(opt.path_html,cat_name));
+ }
+ }
+ } else { // écrire normalement le nom de fichier
+ HT_ADD("http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }// patcher?
+ } // external
+ } else { // que le nom de fichier (classe java)
+ // en gros recopie de plus bas: copier codebase et base
+ if (p_flush) {
+ char tempo[HTS_URLMAXSIZE*2]; // <-- ajouté
+ char tempo_pat[HTS_URLMAXSIZE*2];
+
+ // Calculer chemin
+ tempo_pat[0]='\0';
+ strcpy(tempo,fil); // <-- ajouté
+ {
+ char* a=strrchr(tempo,'/');
+
+ // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
+ // we have to do the contrary now
+ if (add_class_dots_to_patch>0) {
+ while( (add_class_dots_to_patch>0) && (a) ) {
+ *a='.'; // convert "false" java / into .
+ add_class_dots_to_patch--;
+ a=strrchr(tempo,'/');
+ }
+ // if add_class_dots_to_patch, this is because there is a problem!!
+ if (add_class_dots_to_patch) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
+ test_flush;
+ }
+ }
+ }
+
+ // Cut path/filename
+ if (a) {
+ char tempo2[HTS_URLMAXSIZE*2];
+ strcpy(tempo2,a+1); // FICHIER
+ strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
+ strcpy(tempo,tempo2); // fichier
+ }
+ }
+
+ // érire codebase="chemin"
+ if ((opt.getmode & 1) && (ptr>0)) {
+ char tempo4[HTS_URLMAXSIZE*2];
+ tempo4[0]='\0';
+
+ if (strnotempty(tempo_pat)) {
+ HT_ADD("codebase=\"http://");
+ if (!opt.passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*tempo_pat!='/') HT_ADD("/");
+ HT_ADD(tempo_pat);
+ HT_ADD("\" ");
+ }
+
+ strncat(tempo4,lastsaved,(int) (p_flush - lastsaved));
+ HT_ADD(tempo4); // refresh code="
+ HT_ADD(tempo);
+ }
+ }
+ }
+ }
+ lastsaved=eadr-1;
+ }
+ /*
+ else if (opt.urlmode==1) { // ABSOLU, c'est le cas le moins courant
+ // NE FONCTIONNE PAS!! (et est inutile)
+ if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ // écrire le lien modifié, absolu
+ HT_ADD("file:");
+ if (*save=='/')
+ HT_ADD(save+1)
+ else
+ HT_ADD(save)
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ */
+ else if (opt.urlmode==3) { // URI absolue /
+ if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ HT_ADD(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ else if (opt.urlmode==2) { // RELATIF
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ // calculer le lien relatif
+
+ if (lienrelatif(tempo,save,savename)==0) {
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else
+ escape_uri_utf(tempo); // escape with %xx
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
+ test_flush;
+ }
+
+ // lien applet (code) - il faut placer un codebase avant
+ if (p_type==-1) { // que le nom de fichier
+
+ if (p_flush) {
+ char tempo_pat[HTS_URLMAXSIZE*2];
+ tempo_pat[0]='\0';
+ {
+ char* a=strrchr(tempo,'/');
+
+ // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
+ // we have to do the contrary now
+ if (add_class_dots_to_patch>0) {
+ while( (add_class_dots_to_patch>0) && (a) ) {
+ *a='.'; // convert "false" java / into .
+ add_class_dots_to_patch--;
+ a=strrchr(tempo,'/');
+ }
+ // if add_class_dots_to_patch, this is because there is a problem!!
+ if (add_class_dots_to_patch) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
+ test_flush;
+ }
+ }
+ }
+
+ if (a) {
+ char tempo2[HTS_URLMAXSIZE*2];
+ strcpy(tempo2,a+1);
+ strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
+ strcpy(tempo,tempo2); // fichier
+ }
+ }
+
+ // érire codebase="chemin"
+ if ((opt.getmode & 1) && (ptr>0)) {
+ char tempo4[HTS_URLMAXSIZE*2];
+ tempo4[0]='\0';
+
+ if (strnotempty(tempo_pat)) {
+ HT_ADD("codebase=\"");
+ HT_ADD(tempo_pat);
+ HT_ADD("\" ");
+ }
+
+ strncat(tempo4,lastsaved,(int) (p_flush - lastsaved));
+ HT_ADD(tempo4); // refresh code="
+ }
+ }
+ //lastsaved=adr; // dernier écrit+1
+ }
+
+ if ((opt.getmode & 1) && (ptr>0)) {
+ // écrire le lien modifié, relatif
+ HT_ADD(tempo);
+
+ // Add query-string, for informational purpose only
+ // Useless, because all parameters-pages are saved into different targets
+ if (opt.includequery) {
+ char* a=strchr(lien,'?');
+ if (a) {
+ HT_ADD(a);
+ }
+ }
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ } else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Error building relative link %s and %s"LF,save,savename);
+ test_flush;
+ }
+ }
+ } // sinon le lien sera écrit normalement
+
+
+#if 0
+ if (fexist(save)) { // le fichier existe..
+ adr[0]='\0';
+ //if ((opt.debug>0) && (opt.log!=NULL)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link has already been written on disk, cancelled: %s"LF,save);
+ test_flush;
+ }
+ }
+#endif
+
+ /* Security check */
+ if (strlen(save) >= HTS_URLMAXSIZE) {
+ adr[0]='\0';
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link is too long: %s"LF,save);
+ test_flush;
+ }
+ }
+
+ if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && ( (forbidden_url!=1) || (just_test_it))) { // si le fichier n'existe pas, ajouter à la liste
+ // n'y a-t-il pas trop de liens?
+ if (lien_tot+1 >= lien_max-4) { // trop de liens!
+ printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
+ fprintf(opt.errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
+ test_flush;
+ }
+ if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+
+ } else { // noter le lien sur la listes des liens à charger
+ int pass_fix,dejafait=0;
+
+ // Calculer la priorité de ce lien
+ if ((opt.getmode & 4)==0) { // traiter html après
+ pass_fix=0;
+ } else { // vérifier que ce n'est pas un !html
+ if (!ishtml(fil))
+ pass_fix=1; // priorité inférieure (traiter après)
+ else
+ pass_fix=max(0,numero_passe); // priorité normale
+ }
+
+ /* If the file seems to be an html file, get depth-1 */
+ /*
+ if (strnotempty(save)) {
+ if (ishtml(save) == 1) {
+ // descore_prio = 2;
+ } else {
+ // descore_prio = 1;
+ }
+ }
+ */
+
+ // vérifier que le lien n'a pas déja été noté
+ // si c'est le cas, alors il faut s'assurer que la priorité associée
+ // au fichier est la plus grande des deux priorités
+ //
+ // On part de la fin et on essaye de se presser (économise temps machine)
+#if HTS_HASH
+ {
+ int i=hash_read(&hash,save,"",0); // lecture type 0 (sav)
+ if (i>=0) {
+ liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
+ dejafait=1;
+ }
+ }
+#else
+ {
+ int l;
+ int i;
+ l=strlen(save); // opti
+ for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
+ if (liens[i]->sav_len==l) { // même taille de chaîne
+ if (strcmp(liens[i]->sav,save)==0) { // existe déja
+ liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
+ dejafait=1;
+ }
+ }
+ }
+ }
+#endif
+
+ // le lien n'a jamais été créé.
+ // cette fois ci, on le crée!
+ if (!dejafait) {
+ //
+ // >>>> CREER LE LIEN <<<<
+ //
+ // enregistrer lien à charger
+ //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
+ // même adresse: l'objet père est l'objet père de l'actuel
+
+ // DEBUT ROBOTS.TXT AJOUT
+ if (!just_test_it) {
+ if (
+ (!strfield(adr,"ftp://")) // non ftp
+ && (!strfield(adr,"file://")) ) { // non file
+ if (opt.robots) { // récupérer robots
+ if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés
+ if (checkrobots(&robots,adr,"") != -1) { // robots.txt ?
+ checkrobots_set(&robots,adr,""); // ajouter entrée vide
+ if (checkrobots(&robots,adr,"") == -1) { // robots.txt ?
+ // enregistrer robots.txt (MACRO)
+ liens_record(adr,"/robots.txt","","","");
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ liens[lien_tot]->testmode=0; // pas mode test
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->premier=lien_tot;
+ liens[lien_tot]->precedent=ptr;
+ liens[lien_tot]->depth=0;
+ liens[lien_tot]->pass2=max(0,numero_passe);
+ liens[lien_tot]->retry=0;
+ lien_tot++; // UN LIEN DE PLUS
+#if DEBUG_ROBOTS
+ printf("robots.txt: added file robots.txt for %s\n",adr);
+#endif
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"robots.txt added at %s"LF,adr);
+ test_flush;
+ }
+ } else {
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // FIN ROBOTS.TXT AJOUT
+
+ // enregistrer (MACRO)
+ liens_record(adr,fil,save,former_adr,former_fil);
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt.errlog) {
+ fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+
+ // mode test?
+ if (!just_test_it)
+ liens[lien_tot]->testmode=0; // pas mode test
+ else
+ liens[lien_tot]->testmode=1; // mode test
+ if (!import_done)
+ liens[lien_tot]->link_import=0; // pas mode import
+ else
+ liens[lien_tot]->link_import=1; // mode import
+ // écrire autres paramètres de la structure-lien
+ if ((meme_adresse) && (!import_done) && (liens[ptr]->premier != 0))
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ else // sinon l'objet père est le précédent lui même
+ liens[lien_tot]->premier=lien_tot;
+ // liens[lien_tot]->premier=ptr;
+
+ liens[lien_tot]->precedent=ptr;
+ // noter la priorité
+ if (!set_prio_to)
+ liens[lien_tot]->depth=liens[ptr]->depth - 1;
+ else
+ liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1)); // PRIORITE NULLE (catch page)
+ // noter pass
+ liens[lien_tot]->pass2=pass_fix;
+ liens[lien_tot]->retry=opt.retry;
+
+ //strcpy(liens[lien_tot]->adr,adr);
+ //strcpy(liens[lien_tot]->fil,fil);
+ //strcpy(liens[lien_tot]->sav,save);
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ if (!just_test_it) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
+ } else {
+ fspc(opt.log,"debug"); fprintf(opt.log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
+ }
+ test_flush;
+ }
+
+ lien_tot++; // UN LIEN DE PLUS
+ } else { // if !dejafait
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"link has already been recorded, cancelled: %s"LF,save);
+ test_flush;
+ }
+
+ }
+
+
+ } // si pas trop de liens
+ } // si adr[0]!='\0'
+
+
+ } // if adr[0]!='\0'
+
+ } // if adr[0]!='\0'
+
+ } // if strlen(lien)>0
+
+ } // if ok==0
+
+ adr=eadr-1; // ** sauter
+
+ } // if (p)
+
+ } // si '<' ou '>'
+
+ // plus loin
+ adr++;
+
+
+ /* Otimization: if we are scanning in HTML data (not in tag or script),
+ then jump to the next starting tag */
+ if (ptr>0) {
+ if ( (!intag) /* Not in tag */
+ && (!inscript) /* Not in (java)script */
+ && (!incomment) /* Not in comment (<!--) */
+ && (!inscript_tag) /* Not in tag with script inside */
+ )
+ {
+ /* Not at the end */
+ if (( ((int) (adr - r.adr)) ) < r.size) {
+ /* Not on a starting tag yet */
+ if (*adr != '<') {
+ char* adr_next = strchr(adr,'<');
+ /* Jump to near end (index hack) */
+ if (!adr_next) {
+ if (
+ ( (int)(adr - r.adr) < (r.size - 4))
+ &&
+ (r.size > 4)
+ ) {
+ adr = r.adr + r.size - 2;
+ }
+ } else {
+ adr = adr_next;
+ }
+ }
+ }
+ }
+ }
+
+ // ----------
+ // écrire peu à peu
+ if ((opt.getmode & 1) && (ptr>0)) HT_ADD_ADR;
+ lastsaved=adr; // dernier écrit+1
+ // ----------
+
+ // pour les stats du shell si parsing trop long
+#if HTS_ANALYSTE
+ if (r.size)
+ _hts_in_html_done=(100 * ((int) (adr - r.adr)) ) / (int)(r.size);
+ if (_hts_in_html_poll) {
+ _hts_in_html_poll=0;
+ // temps à attendre, et remplir autant que l'on peut le cache (backing)
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt.errlog) {
+ fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ exit_xh=1; // exit requested
+ XH_uninit;
+ return 0;
+ //adr = r.adr + r.size; // exit
+ } else if (_hts_cancel==1) {
+ // adr = r.adr + r.size; // exit
+ nofollow=1; // moins violent
+ _hts_cancel=0;
+ }
+ }
+
+ // refresh the backing system each 2 seconds
+ if (engine_stats()) {
+ back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+ }
+#endif
+ } while(( ((int) (adr - r.adr)) ) < r.size);
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=0; // flag
+ _hts_cancel=0; // pas de cancel
+#endif
+ if ((opt.getmode & 1) && (ptr>0)) {
+ HT_ADD_END; // achever
+ }
+ //
+ //
+ //
+ } // if !error
+
+
+ if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ // sauver fichier
+ //structcheck(savename);
+ //filesave(r.adr,r.size,savename);
+
+#if HTS_ANALYSTE
+ } // analyse OK
+#endif
+
diff --git a/src/htsrobots.c b/src/htsrobots.c
new file mode 100644
index 0000000..8aabdd4
--- /dev/null
+++ b/src/htsrobots.c
@@ -0,0 +1,118 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* robots.txt (website robot file) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include "htsrobots.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/* END specific definitions */
+
+
+// -- robots --
+
+// fil="" : vérifier si règle déja enregistrée
+int checkrobots(robots_wizard* robots,char* adr,char* fil) {
+ while(robots) {
+ if (strfield2(robots->adr,adr)) {
+ if (fil[0]) {
+ int ptr=0;
+ char line[250];
+ if (strnotempty(robots->token)) {
+ do {
+ ptr+=binput(robots->token+ptr,line,200);
+ if (line[0]=='/') { // absolu
+ if (strfield(fil,line)) { // commence avec ligne
+ return -1; // interdit
+ }
+ } else { // relatif
+ if (strstrcase(fil,line)) {
+ return -1;
+ }
+ }
+ } while( (strnotempty(line)) && (ptr<(int) strlen(robots->token)) );
+ }
+ } else {
+ return -1;
+ }
+ }
+ robots=robots->next;
+ }
+ return 0;
+}
+int checkrobots_set(robots_wizard* robots,char* adr,char* data) {
+ if (((int) strlen(data)) > 999) return 0;
+ while(robots) {
+ if (strfield2(robots->adr,adr)) { // entrée existe
+ strcpy(robots->token,data);
+#if DEBUG_ROBOTS
+ printf("robots.txt: set %s to %s\n",adr,data);
+#endif
+ return -1;
+ }
+ else if (!robots->next) {
+ robots->next=(robots_wizard*) calloct(1,sizeof(robots_wizard));
+ if (robots->next) {
+ robots->next->next=NULL;
+ strcpy(robots->next->adr,adr);
+ strcpy(robots->next->token,data);
+#if DEBUG_ROBOTS
+ printf("robots.txt: new set %s to %s\n",adr,data);
+#endif
+ }
+#if DEBUG_ROBOTS
+ else
+ printf("malloc error!!\n");
+#endif
+ }
+ robots=robots->next;
+ }
+ return 0;
+}
+void checkrobots_free(robots_wizard* robots) {
+ if (robots->next) {
+ checkrobots_free(robots->next);
+ freet(robots->next);
+ robots->next=NULL;
+ }
+}
+
+// -- robots --
diff --git a/src/htsrobots.h b/src/htsrobots.h
new file mode 100644
index 0000000..62b9689
--- /dev/null
+++ b/src/htsrobots.h
@@ -0,0 +1,56 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* robots.txt (website robot file) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSROBOTS_DEFH
+#define HTSROBOTS_DEFH
+
+// robots wizard
+typedef struct robots_wizard {
+ char adr[1024];
+ char token[1024];
+ struct robots_wizard* next;
+} robots_wizard;
+
+
+// robots
+int checkrobots(robots_wizard* robots,char* adr,char* fil);
+void checkrobots_free(robots_wizard* robots);
+int checkrobots_set(robots_wizard* robots,char* adr,char* data);
+
+
+#endif
diff --git a/src/htssystem.h b/src/htssystem.h
new file mode 100644
index 0000000..989607d
--- /dev/null
+++ b/src/htssystem.h
@@ -0,0 +1,15 @@
+// Définition de la plate-forme utilisée
+
+// Sun Solaris .......... 0
+// Windows/95 ........... 1
+// Ibm 580 .............. 2
+
+#define HTS_PLATFORM 1
+
+// SHELL
+#define HTS_ANALYSTE 2
+
+
+// Fin de la définition
+
+
diff --git a/src/htssystem.h.windows9x b/src/htssystem.h.windows9x
new file mode 100644
index 0000000..0689e0c
--- /dev/null
+++ b/src/htssystem.h.windows9x
@@ -0,0 +1,11 @@
+/* HTTrack, Offline Browser for Windows and Unix */
+
+/* HTTrack system definition for Windows */
+/* This should be the only file you have to change */
+
+/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */
+
+
+/* Fix plateform number to 1 (Windows) */
+/* If it doesn't compile, try another one */
+#define HTS_PLATEFORM 1
diff --git a/src/htsthread.c b/src/htsthread.c
new file mode 100644
index 0000000..0a3bee6
--- /dev/null
+++ b/src/htsthread.c
@@ -0,0 +1,97 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Threads */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#include "htsglobal.h"
+#include "htsthread.h"
+
+// Threads - emulate _beginthread under Linux/Unix using pthread_XX
+// Some changes will have to be done, see PTHREAD_RETURN,PTHREAD_TYPE
+#if USE_PTHREAD
+#include <pthread.h> /* _beginthread, _endthread */
+
+unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist )
+{
+ pthread_t th;
+ int retcode;
+ /* create a thread */
+ retcode = pthread_create(&th, NULL, start_address, arglist);
+ if (retcode != 0) /* error */
+ return -1;
+ /* detach the thread from the main process so that is can be independent */
+ pthread_detach(th);
+ return 0;
+}
+#endif
+
+#if USE_BEGINTHREAD
+/*
+ Simple lock function
+
+ Return value: always 0
+ Parameter:
+ 1 wait for lock (mutex) available and lock it
+ 0 unlock the mutex
+ [-1 check if locked (always return 0 with mutex)]
+ -999 initialize
+*/
+int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) {
+#if HTS_WIN
+ /* lock */
+ if (lock==1)
+ WaitForSingleObject(*hMutex,INFINITE);
+ /* unlock */
+ else if (lock==0)
+ ReleaseMutex(*hMutex);
+ /* create */
+ else if (lock==-999)
+ *hMutex=CreateMutex(NULL,FALSE,NULL);
+#else
+ /* lock */
+ if (lock==1)
+ pthread_mutex_lock(hMutex);
+ /* unlock */
+ else if (lock==0)
+ pthread_mutex_unlock(hMutex);
+ /* create */
+ else if (lock==-999)
+ pthread_mutex_init(hMutex,0);
+#endif
+ return 0;
+}
+
+#endif
+
diff --git a/src/htsthread.h b/src/htsthread.h
new file mode 100644
index 0000000..cb3a139
--- /dev/null
+++ b/src/htsthread.h
@@ -0,0 +1,95 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Threads */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_DEFTHREAD
+#define HTS_DEFTHREAD
+
+#include "htsglobal.h"
+#if USE_PTHREAD
+#include <pthread.h> /* _beginthread, _endthread */
+#endif
+#if HTS_WIN
+#include "windows.h"
+#endif
+
+#if USE_BEGINTHREAD
+#if HTS_WIN
+
+#define PTHREAD_RETURN
+#define PTHREAD_TYPE void __cdecl
+#define PTHREAD_LOCK_TYPE HANDLE
+
+/* Useless - see '__declspec( thread )' */
+/*
+#define PTHREAD_KEY_TYPE void*
+#define PTHREAD_KEY_CREATE(ptrkey, uninit) do { *(ptrkey)=(void*)NULL; } while(0)
+#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0)
+#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0)
+#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0)
+*/
+
+#else
+
+#define PTHREAD_RETURN NULL
+#define PTHREAD_TYPE void*
+#define PTHREAD_LOCK_TYPE pthread_mutex_t
+#define PTHREAD_KEY_TYPE pthread_key_t
+#define PTHREAD_KEY_CREATE(ptrkey, uninit) pthread_key_create(ptrkey, uninit)
+#define PTHREAD_KEY_DELETE(key) pthread_key_delete(key)
+#define PTHREAD_KEY_SET(key, val, ptrtype) pthread_setspecific(key, (void*)val)
+#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)pthread_getspecific(key); } while(0)
+
+#endif
+
+#else
+
+#define PTHREAD_LOCK_TYPE void*
+#define PTHREAD_KEY_TYPE void*
+#define PTHREAD_KEY_CREATE(ptrkey, uninit) do { *(ptrkey)=(void*)NULL; } while(0)
+#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0)
+#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0)
+#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0)
+
+#endif
+
+int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock);
+
+#if USE_PTHREAD
+unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist );
+#endif
+
+#endif
+
diff --git a/src/htstools.c b/src/htstools.c
new file mode 100644
index 0000000..1eeafbf
--- /dev/null
+++ b/src/htstools.c
@@ -0,0 +1,785 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* various tools (filename analyzing ..) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htstools.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* END specific definitions */
+
+
+// forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil
+// [adr et fil sont des buffers de 1ko]
+// 0 : ok
+// -1 : erreur
+// -2 : protocole non supporté (ftp)
+int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,char* fil) {
+ int ok=0;
+ int scheme=0;
+
+ adr[0]='\0'; fil[0]='\0'; //effacer buffers
+
+ // lien non vide!
+ if (strnotempty(lien)==0) return -1; // erreur!
+
+ // Scheme?
+ {
+ char* a=lien;
+ while (isalpha((unsigned char)*a))
+ a++;
+ if (*a == ':')
+ scheme=1;
+ }
+
+ // filtrer les parazites (mailto & cie)
+ // scheme+authority (//)
+ if (
+ (strfield(lien,"http://")) // scheme+//
+ || (strfield(lien,"file://")) // scheme+//
+ || (strncmp(lien,"//",2)==0) // // sans scheme (-> default)
+ ) {
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+ }
+ else if (strfield(lien,"ftp://")) {
+ // Note: ftp:foobar.gif is not valid
+ if (ftp_available()) { // ftp supporté
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+ } else {
+ ok=-2; // non supporté
+ }
+#if HTS_USEOPENSSL
+ } else if (strfield(lien,"https://")) {
+ // Note: ftp:foobar.gif is not valid
+ if (ident_url_absolute(lien,adr,fil)==-1) {
+ ok=-1; // erreur URL
+ }
+#endif
+ } else if ((scheme) && (
+ (!strfield(lien,"http:"))
+ && (!strfield(lien,"https:"))
+ && (!strfield(lien,"ftp:"))
+ )) {
+ ok=-1; // unknown scheme
+ } else { // c'est un lien relatif
+ char* a;
+
+ // On forme l'URL complète à partie de l'url actuelle
+ // et du chemin actuel si besoin est.
+
+ // copier adresse
+ if (((int) strlen(origin_adr)<HTS_URLMAXSIZE) && ((int) strlen(origin_fil)<HTS_URLMAXSIZE) && ((int) strlen(lien)<HTS_URLMAXSIZE)) {
+
+ /* patch scheme if necessary */
+ if (strfield(lien,"http:")) {
+ lien+=5;
+ strcpy(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http)
+ } else if (strfield(lien,"https:")) {
+ lien+=6;
+ strcpy(adr, "https://"); // même adresse forcée en https
+ strcat(adr, jump_protocol(origin_adr));
+ } else if (strfield(lien,"ftp:")) {
+ lien+=4;
+ strcpy(adr, "ftp://"); // même adresse forcée en ftp
+ strcat(adr, jump_protocol(origin_adr));
+ } else {
+ strcpy(adr,origin_adr); // même adresse ; et même éventuel protocole
+ }
+
+ if (*lien!='/') { // sinon c'est un lien absolu
+ a=strchr(origin_fil,'?');
+ if (!a) a=origin_fil+strlen(origin_fil);
+ while((*a!='/') && ( a > origin_fil) ) a--;
+ if (*a=='/') { // ok on a un '/'
+ if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) {
+ // copier chemin
+ strncpy(fil,origin_fil,((int) (a - origin_fil))+1);
+ *(fil + ((int) (a - origin_fil))+1)='\0';
+
+ // copier chemin relatif
+ if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
+ strcat(fil,lien + ((*lien=='/')?1:0) );
+ // simplifier url pour les ../
+ fil_simplifie(fil);
+ } else
+ ok=-1; // erreur
+ } else { // erreur
+ ok=-1; // erreur URL
+ }
+ } else { // erreur
+ ok=-1; // erreur URL
+ }
+ } else { // chemin absolu
+ // copier chemin directement
+ strcat(fil,lien);
+ } // *lien!='/'
+ } else
+ ok=-1;
+
+ } // test news: etc.
+
+ // case insensitive pour adresse
+ {
+ char *a=jump_identification(adr);
+ while(*a) {
+ if ((*a>='A') && (*a<='Z'))
+ *a+='a'-'A';
+ a++;
+ }
+ }
+
+ return ok;
+}
+
+
+
+
+
+// créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu)
+// un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif
+int lienrelatif(char* s,char* link,char* curr_fil) {
+ char _curr[HTS_URLMAXSIZE*2];
+ char newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2];
+ char* curr;
+ //int n=0;
+ char* a;
+ int slash=0;
+ //
+ newcurr_fil[0]='\0'; newlink[0]='\0';
+ //
+
+ // patch: éliminer les ? (paramètres) sinon bug
+ if ( (a=strchr(curr_fil,'?')) ) {
+ strncat(newcurr_fil,curr_fil,(int) (a - curr_fil));
+ curr_fil = newcurr_fil;
+ }
+ if ( (a=strchr(link,'?')) ) {
+ strncat(newlink,link,(int) (a - link));
+ link = newlink;
+ }
+
+ // recopier uniquement le chemin courant
+ curr=_curr;
+ strcpy(curr,curr_fil);
+ if ((a=strchr(curr,'?'))==NULL) // couper au ? (params)
+ a=curr+strlen(curr)-1; // pas de params: aller à la fin
+ while((*a!='/') && ( a> curr)) a--; // chercher dernier / du chemin courant
+ if (*a=='/') *(a+1)='\0'; // couper dernier /
+
+ // "effacer" s
+ s[0]='\0';
+
+ // sauter ce qui est commun aux 2 chemins
+ {
+ char *l,*c;
+ if (*link=='/') link++; // sauter slash
+ if (*curr=='/') curr++;
+ l=link;
+ c=curr;
+ // couper ce qui est commun
+#if HTS_CASSE
+ while ((*link==*curr) && (*link!=0)) {link++; curr++; }
+#else
+ while ((streql(*link,*curr)) && (*link!=0)) {link++; curr++; }
+#endif
+ // mais on veut un répertoirer entier!
+ // si on a /toto/.. et /toto2/.. on ne veut pas sauter /toto !
+ while(((*link!='/') || (*curr!='/')) && ( link > l)) { link--; curr--; }
+ //if (*link=='/') link++;
+ //if (*curr=='/') curr++;
+ }
+
+ // calculer la profondeur du répertoire courant et remonter
+ // LES ../ ONT ETE SIMPLIFIES
+ a=curr;
+ if (*a=='/') a++;
+ while(*a) if (*(a++)=='/') strcat(s,"../");
+ //if (strlen(s)==0) strcat(s,"/");
+
+ if (slash) strcat(s,"/"); // garder absolu!!
+
+ // on est dans le répertoire de départ, copier
+ strcat(s,link + ((*link=='/')?1:0) );
+
+ /* Security check */
+ if (strlen(s) >= HTS_URLMAXSIZE)
+ return -1;
+
+ // on a maintenant une chaine de la forme ../../test/truc.html
+ return 0;
+}
+
+/* Is the link absolute (http://www..) or relative (/bar/foo.html) ? */
+int link_has_authority(char* lien) {
+ char* a=lien;
+ if (isalpha((unsigned char)*a)) {
+ // Skip scheme?
+ while (isalpha((unsigned char)*a))
+ a++;
+ if (*a == ':')
+ a++;
+ else
+ return 0;
+ }
+ if (strncmp(a,"//",2) == 0)
+ return 1;
+ return 0;
+}
+
+int link_has_authorization(char* lien) {
+ char* adr = jump_protocol(lien);
+ char* firstslash = strchr(adr, '/');
+ char* detect = strchr(adr, '@');
+ if (firstslash) {
+ if (detect) {
+ return (detect < firstslash);
+ }
+ } else {
+ return (detect != NULL);
+ }
+ return 0;
+}
+
+
+// conversion chemin de fichier/dossier vers 8-3 ou ISO9660
+void long_to_83(int mode,char* n83,char* save) {
+ n83[0]='\0';
+
+ while(*save) {
+ char fn83[256],fnl[256];
+ int i=0;
+ fn83[0]=fnl[0]='\0';
+ while((save[i]) && (save[i]!='/')) { fnl[i]=save[i]; i++; }
+ fnl[i]='\0';
+ // conversion
+ longfile_to_83(mode,fn83,fnl);
+ strcat(n83,fn83);
+
+ save+=i;
+ if (*save=='/') { strcat(n83,"/"); save++; }
+ }
+}
+
+
+// conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660
+void longfile_to_83(int mode,char* n83,char* save) {
+ int i=0,j=0,max=0;
+ char nom[256];
+ char ext[256];
+ nom[0]=ext[0]='\0';
+
+ switch(mode) {
+ case 1:
+ max=8;
+ break;
+ case 2:
+ max=30;
+ break;
+ default:
+ max=8;
+ break;
+ }
+
+ /* No starting . */
+ if (save[0] == '.') {
+ save[0]='_';
+ }
+ /* No multiple dots */
+ {
+ char* last_dot=strrchr(save, '.');
+ char* dot;
+ while((dot=strchr(save, '.'))) {
+ *dot = '_';
+ }
+ if (last_dot) {
+ *last_dot='.';
+ }
+ }
+ /*
+ Avoid: (ISO9660, but also suitable for 8-3)
+ (Thanks to jonat@cellcast.com for te hint)
+ /:;?\#*~
+ 0x00-0x1f and 0x80-0xff
+ */
+ for(i=0 ; i < (int) strlen(save) ; i++) {
+ if (
+ (strchr("/:;?\\#*~", save[i]))
+ ||
+ (save[i] < 32)
+ ||
+ (save[i] >= 127)
+ ) {
+ save[i]='_';
+ }
+ }
+
+ i=j=0;
+ while((i<max) && (save[j]) && (save[j]!='.')) {
+ if (save[j]!=' ') {
+ nom[i]=save[j];
+ i++;
+ }
+ j++;
+ } // recopier nom
+ nom[i]='\0';
+ if (save[j]) { // il reste au moins un point
+ i=strlen(save)-1;
+ while((i>0) && (save[i]!='.') && (save[i]!='/')) i--; // rechercher dernier .
+ if (save[i]=='.') { // point!
+ int j=0;
+ i++;
+ while((j<3) && (save[i]) ) { if (save[i]!=' ') { ext[j]=save[i]; j++; } i++; }
+ ext[j]='\0';
+ }
+ }
+ // corriger vers 8-3
+ n83[0]='\0';
+ strncat(n83,nom,8);
+ if (strnotempty(ext)) {
+ strcat(n83,".");
+ strncat(n83,ext,3);
+ }
+}
+
+// écrire backblue.gif
+int verif_backblue(char* base) {
+ int* done;
+ int ret=0;
+ NOSTATIC_RESERVE(done, int, 1);
+ //
+ if (!base) { // init
+ *done=0;
+ return 0;
+ }
+ if ( (!*done)
+ || (fsize(fconcat(base,"backblue.gif")) != HTS_DATA_BACK_GIF_LEN)) {
+ FILE* fp = filecreate(fconcat(base,"backblue.gif"));
+ *done=1;
+ if (fp) {
+ if (fwrite(HTS_DATA_BACK_GIF,HTS_DATA_BACK_GIF_LEN,1,fp) != HTS_DATA_BACK_GIF_LEN)
+ ret=1;
+ fclose(fp);
+ usercommand(0,NULL,fconcat(base,"backblue.gif"));
+ } else
+ ret=1;
+ //
+ fp = filecreate(fconcat(base,"fade.gif"));
+ if (fp) {
+ if (fwrite(HTS_DATA_FADE_GIF,HTS_DATA_FADE_GIF_LEN,1,fp) != HTS_DATA_FADE_GIF_LEN)
+ ret=1;
+ fclose(fp);
+ usercommand(0,NULL,fconcat(base,"fade.gif"));
+ } else
+ ret=1;
+ }
+ return ret;
+}
+
+// flag
+int verif_external(int nb,int test) {
+ int* status;
+ NOSTATIC_RESERVE(status, int, 2);
+ if (!test)
+ status[nb]=0; // reset
+ else if (!status[nb]) {
+ status[nb]=1;
+ return 1;
+ }
+ return 0;
+}
+
+
+// recherche chaîne de type truc<espaces>=
+// renvoi décalage à effectuer ou 0 si non trouvé
+/* SECTION OPTIMISEE:
+#define rech_tageq(adr,s) ( \
+ ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) ? \
+ ( (streql(*adr,*s)) ? \
+ (__rech_tageq(adr,s)) \
+ : 0 \
+ ) \
+ : 0\
+ )
+*/
+/*
+HTS_INLINE int rech_tageq(const char* adr,const char* s) {
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ if (streql(*adr,*s)) { // tester premier octet (optimisation)
+ return __rech_tageq(adr,s);
+ }
+ }
+ return 0;
+}
+*/
+// Deuxième partie
+HTS_INLINE int __rech_tageq(const char* adr,const char* s) {
+ int p;
+ p=strfield(adr,s);
+ if (p) {
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=') {
+ return p+1;
+ }
+ }
+ return 0;
+}
+// same, but check begining of adr wirh s (for <object src="bar.mov" .. hotspot123="foo.html">)
+HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s) {
+ int p;
+ p=strfield(adr,s);
+ if (p) {
+ while(isdigit((unsigned char)adr[p])) p++; // jump digits
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=') {
+ return p+1;
+ }
+ }
+ return 0;
+}
+
+// tag sans =
+HTS_INLINE int rech_sampletag(const char* adr,const char* s) {
+ int p;
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ p=strfield(adr,s);
+ if (p) {
+ if (!isalnum((unsigned char)adr[p])) { // <srcbis n'est pas <src
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+// teste si le tag contenu dans from est égal à "tag"
+HTS_INLINE int check_tag(char* from,const char* tag) {
+ char* a=from+1;
+ int i=0;
+ char s[256];
+ while(is_space(*a)) a++;
+ while((isalnum((unsigned char)*a) || (*a=='/')) && (i<250)) { s[i++]=*a; a++; }
+ s[i++]='\0';
+ return (strfield2(s,tag)); // comparer
+}
+
+// teste si un fichier dépasse le quota
+int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) {
+ int ok=1;
+ if (size>0) {
+ if (is_hypertext_mime(type)) {
+ if (maxhtml>0) {
+ if (size>maxhtml)
+ ok=0;
+ }
+ } else {
+ if (maxnhtml>0) {
+ if (size>maxnhtml)
+ ok=0;
+ }
+ }
+ }
+ return (!ok);
+}
+
+
+int hts_buildtopindex(char* path,char* binpath) {
+ FILE* fpo;
+ int retval=0;
+ char rpath[1024*2];
+ char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL;
+
+ // et templates html
+ toptemplate_header=readfile_or(fconcat(binpath,"templates/topindex-header.html"),HTS_INDEX_HEADER);
+ toptemplate_body=readfile_or(fconcat(binpath,"templates/topindex-body.html"),HTS_INDEX_BODY);
+ toptemplate_footer=readfile_or(fconcat(binpath,"templates/topindex-footer.html"),HTS_INDEX_FOOTER);
+
+ if (toptemplate_header && toptemplate_body && toptemplate_footer) {
+
+ strcpy(rpath,path);
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]=='/')
+ rpath[strlen(rpath)-1]='\0';
+ }
+
+ fpo=fopen(fconcat(rpath,"/index.html"),"wb");
+ if (fpo) {
+ find_handle h;
+ verif_backblue(concat(rpath,"/")); // générer gif
+ // Header
+ fprintf(fpo,toptemplate_header,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ /* Find valid project names */
+ h = hts_findfirst(rpath);
+ if (h) {
+ struct topindex_chain * chain=NULL;
+ struct topindex_chain * startchain=NULL;
+ do {
+ if (hts_findisdir(h)) {
+ char iname[HTS_URLMAXSIZE*2];
+ strcpy(iname,rpath);
+ strcat(iname,"/");
+ strcat(iname,hts_findgetname(h));
+ strcat(iname,"/index.html");
+ if (fexist(iname)) {
+ struct topindex_chain * oldchain=chain;
+ chain=calloc(sizeof(struct topindex_chain), 1);
+ if (!startchain) {
+ startchain=chain;
+ }
+ if (chain) {
+ if (oldchain) {
+ oldchain->next=chain;
+ }
+ chain->next=NULL;
+ strcpy(chain->name, hts_findgetname(h));
+ }
+ }
+
+ }
+ } while(hts_findnext(h));
+ hts_findclose(h);
+
+ /* Build sorted index */
+ chain=startchain;
+ while(chain) {
+ char hname[HTS_URLMAXSIZE*2];
+ strcpy(hname,chain->name);
+ escape_check_url(hname);
+ fprintf(fpo,toptemplate_body,
+ hname,
+ chain->name
+ );
+
+ chain=chain->next;
+ }
+
+
+ retval=1;
+ }
+
+ // Footer
+ fprintf(fpo,toptemplate_footer,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ fclose(fpo);
+
+ }
+
+ }
+
+ if (toptemplate_header)
+ freet(toptemplate_header);
+ if (toptemplate_body)
+ freet(toptemplate_body);
+ if (toptemplate_footer)
+ freet(toptemplate_footer);
+
+ return retval;
+}
+
+
+
+
+// Portable directory find functions
+/*
+// Example:
+find_handle h = hts_findfirst("/tmp");
+if (h) {
+ do {
+ if (hts_findisfile(h))
+ printf("File: %s (%d octets)\n",hts_findgetname(h),hts_findgetsize(h));
+ else if (hts_findisdir(h))
+ printf("Dir: %s\n",hts_findgetname(h));
+ } while(hts_findnext(h));
+ hts_findclose(h);
+}
+*/
+find_handle hts_findfirst(char* path) {
+ if (path) {
+ if (strnotempty(path)) {
+ find_handle_struct* find = (find_handle_struct*) calloc(1,sizeof(find_handle_struct));
+ if (find) {
+ memset(find, 0, sizeof(find_handle_struct));
+#if HTS_WIN
+ {
+ char rpath[1024*2];
+ strcpy(rpath,path);
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]!='\\')
+ strcat(rpath,"\\");
+ }
+ strcat(rpath,"*.*");
+ find->handle = FindFirstFile(rpath,&find->hdata);
+ if (find->handle != INVALID_HANDLE_VALUE)
+ return find;
+ }
+#else
+ strcpy(find->path,path);
+ {
+ if (find->path[0]) {
+ if (find->path[strlen(find->path)-1]!='/')
+ strcat(find->path,"/");
+ }
+ }
+ find->hdir=opendir(path);
+ if (find->hdir != NULL) {
+ if (hts_findnext(find) == 1)
+ return find;
+ }
+#endif
+ free((void*)find);
+ }
+ }
+ }
+ return NULL;
+}
+int hts_findnext(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ if ( (FindNextFile(find->handle,&find->hdata)))
+ return 1;
+#else
+ memset(&(find->filestat), 0, sizeof(find->filestat));
+ if ((find->dirp=readdir(find->hdir)))
+ if (find->dirp->d_name)
+ if (!stat(concat(find->path,find->dirp->d_name),&find->filestat))
+ return 1;
+#endif
+ }
+ return 0;
+}
+int hts_findclose(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ if (find->handle) {
+ FindClose(find->handle);
+ find->handle=NULL;
+ }
+#else
+ if (find->hdir) {
+ closedir (find->hdir);
+ find->hdir=NULL;
+ }
+#endif
+ free((void*)find);
+ }
+ return 0;
+}
+char* hts_findgetname(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ return find->hdata.cFileName;
+#else
+ if (find->dirp)
+ return find->dirp->d_name;
+#endif
+ }
+ return NULL;
+}
+int hts_findgetsize(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ return find->hdata.nFileSizeLow;
+#else
+ return find->filestat.st_size;
+#endif
+ }
+ return -1;
+}
+int hts_findisdir(find_handle find) {
+ if (find) {
+ if (!hts_findissystem(find)) {
+#if HTS_WIN
+ if (find->hdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ return 1;
+#else
+ if (S_ISDIR(find->filestat.st_mode))
+ return 1;
+#endif
+ }
+ }
+ return 0;
+}
+int hts_findisfile(find_handle find) {
+ if (find) {
+ if (!hts_findissystem(find)) {
+#if HTS_WIN
+ if (!(find->hdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
+ return 1;
+#else
+ if (S_ISREG(find->filestat.st_mode))
+ return 1;
+#endif
+ }
+ }
+ return 0;
+}
+int hts_findissystem(find_handle find) {
+ if (find) {
+#if HTS_WIN
+ if (find->hdata.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_TEMPORARY))
+ return 1;
+ else if ( (!strcmp(find->hdata.cFileName,"..")) || (!strcmp(find->hdata.cFileName,".")) )
+ return 1;
+#else
+ if (
+ (S_ISCHR(find->filestat.st_mode))
+ ||
+ (S_ISBLK(find->filestat.st_mode))
+ ||
+ (S_ISFIFO(find->filestat.st_mode))
+ ||
+ (S_ISSOCK(find->filestat.st_mode))
+ )
+ return 1;
+ else if ( (!strcmp(find->dirp->d_name,"..")) || (!strcmp(find->dirp->d_name,".")) )
+ return 1;
+#endif
+ }
+ return 0;
+}
diff --git a/src/htstools.h b/src/htstools.h
new file mode 100644
index 0000000..b3e2c7e
--- /dev/null
+++ b/src/htstools.h
@@ -0,0 +1,138 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* various tools (filename analyzing ..) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSTOOLS_DEFH
+#define HTSTOOLS_DEFH
+
+/* specific definitions */
+#include <stdio.h>
+#include <stdlib.h>
+#include "htsbase.h"
+#include "htscore.h"
+
+#if HTS_WIN
+#else
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#endif
+
+int ident_url_relatif(char *lien,char* urladr,char* urlfil,char* adr,char* fil);
+int lienrelatif(char* s,char* link,char* curr);
+int link_has_authority(char* lien);
+int link_has_authorization(char* lien);
+void long_to_83(int mode,char* n83,char* save);
+void longfile_to_83(int mode,char* n83,char* save);
+HTS_INLINE int __rech_tageq(const char* adr,const char* s);
+HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s);
+#define rech_tageq(adr,s) \
+ ( \
+ ( (*((adr)-1)=='<') || (is_space(*((adr)-1))) ) ? \
+ ( \
+ (streql(*(adr),*(s))) ? \
+ (__rech_tageq((adr),(s))) \
+ : 0 \
+ ) \
+ : 0\
+ )
+#define rech_tageqbegdigits(adr,s) \
+ ( \
+ ( (*((adr)-1)=='<') || (is_space(*((adr)-1))) ) ? \
+ ( \
+ (streql(*(adr),*(s))) ? \
+ (__rech_tageqbegdigits((adr),(s))) \
+ : 0 \
+ ) \
+ : 0\
+ )
+//HTS_INLINE int rech_tageq(const char* adr,const char* s);
+HTS_INLINE int rech_sampletag(const char* adr,const char* s);
+HTS_INLINE int check_tag(char* from,const char* tag);
+int verif_backblue(char* base);
+int verif_external(int nb,int test);
+
+int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type);
+
+int hts_buildtopindex(char* path,char* binpath);
+
+
+
+// Portable directory find functions
+
+#if HTS_WIN
+
+typedef struct {
+ WIN32_FIND_DATA hdata;
+ HANDLE handle;
+} find_handle_struct;
+
+
+#else
+
+typedef struct {
+ DIR * hdir;
+ struct dirent* dirp;
+ struct stat filestat;
+ char path[2048];
+} find_handle_struct;
+
+#endif
+
+typedef find_handle_struct* find_handle;
+
+typedef struct topindex_chain {
+ char name[2048]; /* path */
+ struct topindex_chain* next; /* next element */
+} topindex_chain ;
+
+
+// Directory find functions
+find_handle hts_findfirst(char* path);
+int hts_findnext(find_handle find);
+int hts_findclose(find_handle find);
+//
+char* hts_findgetname(find_handle find);
+int hts_findgetsize(find_handle find);
+int hts_findisdir(find_handle find);
+int hts_findisfile(find_handle find);
+int hts_findissystem(find_handle find);
+
+
+
+
+#endif
diff --git a/src/htswizard.c b/src/htswizard.c
new file mode 100644
index 0000000..b23f5fb
--- /dev/null
+++ b/src/htswizard.c
@@ -0,0 +1,880 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wizard system (accept/refuse links) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htswizard.h"
+#include "htsdefines.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+/* END specific definitions */
+
+// version 1 pour httpmirror
+// flusher si on doit lire peu à peu le fichier
+#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); }
+
+// pour alléger la syntaxe, des raccourcis sont créés
+#define urladr (liens[ptr]->adr)
+#define urlfil (liens[ptr]->fil)
+
+// libérer filters[0] pour insérer un élément dans filters[0]
+#define HT_INSERT_FILTERS0 {\
+ int i;\
+ if (*filptr > 0) {\
+ for(i = (*filptr)-1 ; i>=0 ; i--) {\
+ strcpy(filters[i+1],filters[i]);\
+ }\
+ }\
+ strcpy(filters[0],"");\
+ (*filptr)++;\
+ (*filptr)=minimum((*filptr),filter_max);\
+}
+
+
+
+/*
+httrackp opt bloc d'options
+int ptr,int lien_tot,lien_url** liens
+ relatif aux liens
+char* adr,char* fil
+ adresse/fichier à tester
+char** filters,int filptr,int filter_max
+ relatif aux filtres
+robots_wizard* robots
+ relatif aux robots
+int* set_prio_to
+ callback obligatoire "capturer ce lien avec prio=N-1"
+int* just_test_it
+ callback optionnel "ne faire que tester ce lien éventuellement"
+retour:
+ 0 accepté
+ 1 refusé
+ -1 pas d'avis
+*/
+int hts_acceptlink(httrackp* opt,
+ int ptr,int lien_tot,lien_url** liens,
+ char* adr,char* fil,
+ char*** ptrfilters,int* filptr,int filter_max,
+ robots_wizard* robots,
+ int* set_prio_to,
+ int* just_test_it) {
+
+ int forbidden_url=-1;
+ int meme_adresse;
+ char** filters = *ptrfilters;
+
+ // -------------------- PHASE 1 --------------------
+
+ /* Infos */
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil);
+ test_flush;
+ }
+
+ /* Doit-on traiter les non html? */
+ if ((opt->getmode & 2)==0) { // non on ne doit pas
+ if (!ishtml(fil)) { // non il ne faut pas
+ //adr[0]='\0'; // ne pas traiter ce lien, pas traiter
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"non-html file ignored at %s : %s"LF,adr,fil);
+ test_flush;
+ }
+
+ }
+ }
+
+ /* Niveau 1: ne pas parser suivant! */
+ if (ptr>0) {
+ if (liens[ptr]->depth <= 1) {
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"file from too far level ignored at %s : %s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ /* en cas d'échec en phase 1, retour immédiat! */
+ if (forbidden_url==1) {
+ return forbidden_url;
+ }
+
+ // -------------------- PHASE 2 --------------------
+
+ // ------------------------------------------------------
+ // doit-on traiter ce lien?.. vérifier droits de déplacement
+ meme_adresse=strfield2(adr,urladr);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug");
+ if (meme_adresse)
+ fprintf(opt->log,"Compare addresses: %s=%s"LF,adr,urladr);
+ else
+ fprintf(opt->log,"Compare addresses: %s!=%s"LF,adr,urladr);
+ test_flush;
+ }
+ if (meme_adresse) { // même adresse
+ { // tester interdiction de descendre
+ // MODIFIE : en cas de remontée puis de redescente, il se pouvait qu'on ne puisse pas atteindre certains fichiers
+ // problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*..
+ char tempo[HTS_URLMAXSIZE*2];
+ char tempo2[HTS_URLMAXSIZE*2];
+
+ // note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
+ // ex: si on descend 2 fois on peut remonter 1 fois
+
+ if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
+ if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"build relative links to test: %s %s (with %s and %s)"LF,tempo,tempo2,liens[liens[ptr]->premier]->fil,liens[ptr]->fil);
+ test_flush;
+ }
+
+ // si vient de primary, ne pas tester lienrelatif avec (car host "différent")
+ /*if (liens[liens[ptr]->premier] == 0) { // vient de primary
+ }
+ */
+
+ // NEW: finalement OK, sauf pour les moved repérés par link_import
+ // PROBLEME : annulé a cause d'un lien éventuel isolé accepté..qui entrainerait un miroir
+
+ // (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés))
+ // NEW
+ if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) {
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ // down
+ if ( (strncmp(tempo,"../",3)) || (strncmp(tempo2,"../",3))) { // pas montée sinon ne nbous concerne pas
+ int test1,test2;
+ if (!strncmp(tempo,"../",3))
+ test1=0;
+ else
+ test1 = (strchr(tempo +((*tempo =='/')?1:0),'/')!=NULL);
+ if (!strncmp(tempo2,"../",3))
+ test2=0;
+ else
+ test2 = (strchr(tempo2+((*tempo2=='/')?1:0),'/')!=NULL);
+ if ( (test1) && (test2) ) { // on ne peut que descendre
+ if ((opt->seeker & 1)==0) { // interdiction de descendre
+ forbidden_url=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"lower link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ } else { // autorisé à priori - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ } else if ( (test1) || (test2) ) { // on peut descendre pour accéder au lien
+ if ((opt->seeker & 1)!=0) { // on peut descendre - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ }
+ }
+
+
+ // up
+ if ( (!strncmp(tempo,"../",3)) && (!strncmp(tempo2,"../",3)) ) { // impossible sans monter
+ if ((opt->seeker & 2)==0) { // interdiction de monter
+ forbidden_url=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"upper link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ } else { // autorisé à monter - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ } else if ( (!strncmp(tempo,"../",3)) || (!strncmp(tempo2,"../",3)) ) { // Possible en montant
+ if ((opt->seeker & 2)!=0) { // autorisé à monter - NEW
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ } // sinon autorisé en descente
+ }
+
+
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil);
+ test_flush;
+ }
+ }
+
+ } // tester interdiction de descendre?
+
+ { // tester interdiction de monter
+ char tempo[HTS_URLMAXSIZE*2];
+ char tempo2[HTS_URLMAXSIZE*2];
+ if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) {
+ if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) {
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil);
+ test_flush;
+ }
+
+ }
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil);
+ test_flush;
+ }
+
+ }
+ } // fin tester interdiction de monter
+
+ } else { // adresse différente, sortir?
+
+ //if (!opt->wizard) { // mode non wizard
+ // doit-on traiter ce lien?.. vérifier droits de sortie
+ switch((opt->travel & 255)) {
+ case 0:
+ if (!opt->wizard) // mode non wizard
+ forbidden_url=1; break; // interdicton de sortir au dela de l'adresse
+ case 1: { // sortie sur le même dom.xxx
+ int i=strlen(adr)-1;
+ int j=strlen(urladr)-1;
+ while( (i>0) && (adr[i]!='.')) i--;
+ while( (j>0) && (urladr[j]!='.')) j--;
+ i--; j--;
+ while( (i>0) && (adr[i]!='.')) i--;
+ while( (j>0) && (urladr[j]!='.')) j--;
+ if ((i>0) && (j>0)) {
+ if (!strfield2(adr+i,urladr+j)) { // !=
+ if (!opt->wizard) { // mode non wizard
+ //printf("refused: %s\n",adr);
+ forbidden_url=1; // pas même domaine
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"foreign domain link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+
+ } else {
+ if (opt->wizard) { // mode wizard
+ forbidden_url=0; // même domaine
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same domain link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ } else
+ forbidden_url=1;
+ }
+ break;
+ case 2: { // sortie sur le même .xxx
+ int i=strlen(adr)-1;
+ int j=strlen(urladr)-1;
+ while( (i>0) && (adr[i]!='.')) i--;
+ while( (j>0) && (urladr[j]!='.')) j--;
+ if ((i>0) && (j>0)) {
+ if (!strfield2(adr+i,urladr+j)) { // !-
+ if (!opt->wizard) { // mode non wizard
+ //printf("refused: %s\n",adr);
+ forbidden_url=1; // pas même .xx
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"foreign location link canceled: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt->wizard) { // mode wizard
+ forbidden_url=0; // même domaine
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same location link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ } else forbidden_url=1;
+ }
+ break;
+ case 7: // everywhere!!
+ if (opt->wizard) { // mode wizard
+ forbidden_url=0;
+ break;
+ }
+ } // switch
+
+ // ANCIENNE POS -- récupérer les liens à côtés d'un lien (nearlink)
+
+ } // fin test adresse identique/différente
+
+ // -------------------- PHASE 3 --------------------
+
+ // récupérer les liens à côtés d'un lien (nearlink) (nvelle pos)
+ if (opt->nearlink) {
+ if (!ishtml(fil)) { // non html
+ //printf("ok %s%s\n",ad,fil);
+ forbidden_url=0; // autoriser
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ }
+ }
+
+ // -------------------- PHASE 4 --------------------
+
+ // ------------------------------------------------------
+ // Si wizard, il se peut qu'on autorise ou qu'on interdise
+ // un lien spécial avant même de tester sa position, sa hiérarchie etc.
+ // peut court-circuiter le forbidden_url précédent
+ if (opt->wizard) { // le wizard entre en action..
+ //
+ int question=1; // poser une question
+ int force_mirror=0; // pour mirror links
+ int filters_answer=0; // décision prise par les filtres
+ char l[HTS_URLMAXSIZE*2];
+ char lfull[HTS_URLMAXSIZE*2];
+
+ if (forbidden_url!=-1) question=0; // pas de question, résolu
+
+ // former URL complète du lien actuel
+ strcpy(l,jump_identification(adr));
+ if (*fil!='/') strcat(l,"/");
+ strcat(l,fil);
+ // full version (http://foo:bar@www.foo.com/bar.html)
+ if (!link_has_authority(adr))
+ strcpy(lfull,"http://");
+ else
+ lfull[0]='\0';
+ strcat(lfull,adr);
+ if (*fil!='/') strcat(lfull,"/");
+ strcat(lfull,fil);
+
+ // tester filters (URLs autorisées ou interdites explicitement)
+
+ // si lien primaire on saute le joker, on est pas lémur
+ if (ptr==0) { // lien primaire, autoriser
+ question=1; // la question sera résolue automatiquement
+ forbidden_url=0;
+ } else {
+ int jok;
+ // filters, 0=sait pas 1=ok -1=interdit
+ {
+ int jokDepth1=0,jokDepth2=0;
+ int jok1=0,jok2=0;
+ jok1 = fa_strjoker(filters,*filptr,lfull,NULL,NULL,&jokDepth1);
+ jok2 = fa_strjoker(filters,*filptr,l, NULL,NULL,&jokDepth2);
+ if (jok2 == 0) // #2 doesn't know
+ jok = jok1; // then, use #1
+ else if (jok1 == 0) // #1 doesn't know
+ jok = jok2; // then, use #2
+ else if (jokDepth1 >= jokDepth2) // #1 matching rule is "after" #2, then it is prioritary
+ jok = jok1;
+ else // #2 matching rule is "after" #1, then it is prioritary
+ jok = jok2;
+ }
+
+ if (jok == 1) { // autorisé
+ filters_answer=1; // décision prise par les filtres
+ question=0; // ne pas poser de question, autorisé
+ forbidden_url=0; // URL autorisée
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ } else if (jok == -1) {
+ filters_answer=1; // décision prise par les filtres
+ question=0; // ne pas poser de question:
+ forbidden_url=1; // URL interdite
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ } // sinon on touche à rien
+ }
+
+ // vérifier mode mirror links
+ if (question) {
+ if (opt->mirror_first_page) { // mode mirror links
+ if (liens[ptr]->precedent==0) { // parent=primary!
+ forbidden_url=0; // autorisé
+ question=1; // résolution auto
+ force_mirror=5; // mirror (5)
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit mirror link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ // vérifier récursivité extérieure
+ if ((question) && (ptr>0) && (!force_mirror)) {
+ if (opt->extdepth>0) {
+ // *set_prio_to = opt->extdepth + 1;
+ *set_prio_to = opt->extdepth + 1;
+ forbidden_url=0; // autorisé
+ question=0; // résolution auto
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+
+ // on doit poser la question.. peut on la poser?
+ // (oui je sais quel preuve de délicatesse, merci merci)
+ if ((question) && (ptr>0) && (!force_mirror)) {
+ if (opt->wizard==2) { // éliminer tous les liens non répertoriés comme autorisés (ou inconnus)
+ question=0;
+ forbidden_url=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous forbidden link: link %s at %s%s"LF,l,urladr,urlfil);
+ test_flush;
+ }
+ }
+ }
+
+ // vérifier robots.txt
+ if (opt->robots) {
+ int r = checkrobots(robots,adr,fil);
+ if (r == -1) { // interdiction
+#if DEBUG_ROBOTS
+ printf("robots.txt forbidden: %s%s\n",adr,fil);
+#endif
+ // question résolue, par les filtres, et mode robot non strict
+ if ((!question) && (filters_answer) && (opt->robots == 1) && (forbidden_url!=1)) {
+ r=0; // annuler interdiction des robots
+ if (!forbidden_url) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Warning link followed against robots.txt: link %s at %s%s"LF,l,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ if (r == -1) { // interdire
+ forbidden_url=1;
+ question=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(robots.txt) forbidden link: link %s at %s%s"LF,l,adr,fil);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ if (!question) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ if (!forbidden_url) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) shared foreign domain link: link %s at %s%s"LF,l,urladr,urlfil);
+ } else {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) cancelled foreign domain link: link %s at %s%s"LF,l,urladr,urlfil);
+ }
+ test_flush;
+ }
+#if BDEBUG==3
+ printf("at %s in %s, wizard says: url %s ",urladr,urlfil,l);
+ if (forbidden_url) printf("cancelled"); else printf(">SHARED<");
+ printf("\n");
+#endif
+ }
+
+ /* en cas de question, ou lien primaire (enregistrer autorisations) */
+ if (question || (ptr==0)) {
+#if HTS_ANALYSTE
+ char* s;
+#else
+ char s[4];
+#endif
+ int n=0;
+
+ // si primaire (plus bas) alors ...
+ if ((ptr!=0) && (force_mirror==0)) {
+ HTS_REQUEST_START;
+ HT_PRINT("\n");
+ HT_PRINT("At "); HT_PRINT(urladr); HT_PRINT(", there is a link ("); HT_PRINT(adr); HT_PRINT("/"); HT_PRINT(fil); HT_PRINT(") which goes outside the address."LF);
+ HT_PRINT("What should I do? (press a key + enter)"LF LF);
+ HT_PRINT("* Ignore all further links" LF);
+ HT_PRINT("0 Ignore this link (default if empty entry)"LF);
+ HT_PRINT("1 Ignore directory and lower structures"LF);
+ HT_PRINT("2 Ignore all domain"LF);
+ //HT_PRINT("3 (Ignore location, not implemented)\n");
+ HT_PRINT(LF);
+ HT_PRINT("4 Get only this page/link"LF);
+ HT_PRINT("5 Mirror this link (useful)"LF);
+ HT_PRINT("6 Mirror links located in the same domain"LF);
+ HT_PRINT(LF);
+//#if HTS_ANALYSTE!=2
+//HT_PRINT("! View extract of html code where the link is located"LF);
+//#endif
+ HTS_REQUEST_END;
+#if HTS_ANALYSTE
+ {
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strcat(tempo,adr);
+ strcat(tempo,"/");
+ strcat(tempo,fil);
+ s=hts_htmlcheck_query3(tempo);
+ }
+#else
+ do {
+ io_flush; linput(stdin,s,2);
+#endif
+ if (strnotempty(s)==0) // entrée
+ n=0;
+ else if (isdigit((unsigned char)*s))
+ sscanf(s,"%d",&n);
+ else {
+ switch(*s) {
+ case '*': n=-1; break;
+ case '!': n=-999; {
+ /*char *a;
+ int i;
+ a=copie_de_adr-128;
+ if (a<r.adr) a=r.adr;
+ for(i=0;i<256;i++) {
+ if (a==copie_de_adr) printf("\nHERE:\n");
+ printf("%c",*a++);
+ }
+ printf("\n\n");
+ */
+ }
+ break;
+ default: n=-999; printf("What did you say?\n"); break;
+
+ }
+ }
+#if HTS_ANALYSTE
+#else
+ } while(n==-999);
+#endif
+ io_flush;
+ } else { // lien primaire: autoriser répertoire entier
+
+ /* sanity check */
+ if ((*filptr) + 1 >= opt->maxfilter) {
+ opt->maxfilter += HTS_FILTERSINC;
+ if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n", (*filptr),__LINE__);
+ fflush(stdout);
+ if (opt->errlog) {
+ fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*filptr) );
+ fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ test_flush;
+ }
+ abort(); // wild..
+ }
+ //opt->filters.filters=filters;
+ //*ptrfilters = filters;
+ }
+
+ if (!force_mirror) {
+ if ((opt->seeker & 1)==0) { // interdiction de descendre
+ n=7;
+ } else {
+ n=5; // autoriser miroir répertoires descendants (lien primaire)
+ }
+ } else // forcer valeur (sub-wizard)
+ n=force_mirror;
+ }
+
+ switch(n) {
+ case -1: // sauter tout le reste
+ forbidden_url=1;
+ opt->wizard=2; // sauter tout le reste
+ break;
+ case 0: // interdire les mêmes liens: adr/fil
+ forbidden_url=1;
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"-");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strcat(filters[0],fil);
+ break;
+
+ case 1: // éliminer répertoire entier et sous rép: adr/path/ *
+ forbidden_url=1;
+ {
+ int i=strlen(fil)-1;
+ while((fil[i]!='/') && (i>0)) i--;
+ if (fil[i]=='/') {
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"-");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strncat(filters[0],fil,i);
+ if (filters[0][strlen(filters[0])-1]!='/') strcat(filters[0],"/");
+ strcat(filters[0],"*");
+ }
+ }
+
+ // ** ...
+ break;
+
+ case 2: // adresse adr*
+ forbidden_url=1;
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"-");
+ strcat(filters[0],jump_identification(adr));
+ strcat(filters[0],"*");
+ break;
+
+ case 3: // ** A FAIRE
+ forbidden_url=1;
+ /*
+ {
+ int i=strlen(adr)-1;
+ while((adr[i]!='/') && (i>0)) i--;
+ if (i>0) {
+
+ }
+
+ }*/
+
+ break;
+ //
+ case 4: // same link
+ // PAS BESOIN!!
+ /*HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"+");
+ strcat(filters[0],adr);
+ if (*fil!='/') strcat(filters[0],"/");
+ strcat(filters[0],fil);*/
+
+
+ // étant donné le renversement wizard/primary filter (les primary autorisent up/down ET interdisent)
+ // il faut éviter d'un lien isolé effectue un miroir total..
+
+ *set_prio_to = 0+1; // niveau de récursion=0 (pas de miroir)
+
+ break;
+
+ case 5: // autoriser répertoire entier et fils
+ if ((opt->seeker & 2)==0) { // interdiction de monter
+ int i=strlen(fil)-1;
+ while((fil[i]!='/') && (i>0)) i--;
+ if (fil[i]=='/') {
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strncat(filters[0],fil,i+1);
+ strcat(filters[0],"*");
+ }
+ } else { // autoriser domaine alors!!
+ HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+");
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ strcat(filters[0],"*");
+ }
+ break;
+
+ case 6: // same domain
+ HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+");
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ strcat(filters[0],"*");
+ break;
+ //
+ case 7: // autoriser ce répertoire
+ {
+ int i=strlen(fil)-1;
+ while((fil[i]!='/') && (i>0)) i--;
+ if (fil[i]=='/') {
+ HT_INSERT_FILTERS0; // insérer en 0
+ strcpy(filters[0],"+");
+ strcat(filters[0],jump_identification(adr));
+ if (*fil!='/') strcat(filters[0],"/");
+ strncat(filters[0],fil,i+1);
+ strcat(filters[0],"*[file]");
+ }
+ }
+
+ break;
+
+ case 50: // on fait rien
+ break;
+ } // switch
+
+ } // test du wizard sur l'url
+ } // fin du test wizard..
+
+ // -------------------- PHASE 5 --------------------
+
+ // lien non autorisé, peut-on juste le tester?
+ if (just_test_it) {
+ if (forbidden_url==1) {
+ if (opt->travel&256) { // tester tout de même
+ if (strfield(adr,"ftp://")==0) { // PAS ftp!
+ forbidden_url=1; // oui oui toujours interdit (note: sert à rien car ==1 mais c pour comprendre)
+ *just_test_it=1; // mais on teste
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Testing link %s%s"LF,adr,fil);
+ }
+ }
+ }
+ }
+ //adr[0]='\0'; // cancel
+ }
+
+ // -------------------- PHASE 6 --------------------
+#if HTS_ANALYSTE
+ {
+ int test_url=hts_htmlcheck_check(adr,fil,forbidden_url);
+ if (test_url!=-1)
+ forbidden_url=test_url;
+ }
+#endif
+ return forbidden_url;
+}
+
+// tester taille
+int hts_testlinksize(httrackp* opt,
+ char* adr,char* fil,
+ LLint size) {
+ int jok=0;
+ if (size>=0) {
+ char l[HTS_URLMAXSIZE*2];
+ char lfull[HTS_URLMAXSIZE*2];
+ if (size>=0) {
+ LLint sz=size;
+ int size_flag=0;
+
+ // former URL complète du lien actuel
+ strcpy(l,jump_identification(adr));
+ if (*fil!='/') strcat(l,"/");
+ strcat(l,fil);
+ //
+ if (!link_has_authority(adr))
+ strcpy(lfull,"http://");
+ else
+ lfull[0]='\0';
+ strcat(lfull,adr);
+ if (*fil!='/') strcat(l,"/");
+ strcat(lfull,fil);
+
+ // tester filtres (taille)
+ // jok = fa_strjoker(opt->filters.filters,*opt->filters.filptr,l,&sz,&size_flag,NULL);
+
+ // filters, 0=sait pas 1=ok -1=interdit
+ {
+ int jokDepth1=0,jokDepth2=0;
+ int jok1=0,jok2=0;
+ LLint sz1=size,sz2=size;
+ int size_flag1=0,size_flag2=0;
+ jok1 = fa_strjoker(*opt->filters.filters,*opt->filters.filptr,lfull,&sz1,&size_flag1,&jokDepth1);
+ jok2 = fa_strjoker(*opt->filters.filters,*opt->filters.filptr,l, &sz2,&size_flag2,&jokDepth2);
+ if (jok2 == 0) { // #2 doesn't know
+ jok = jok1; // then, use #1
+ sz = sz1;
+ size_flag = size_flag1;
+ } else if (jok1 == 0) { // #1 doesn't know
+ jok = jok2; // then, use #2
+ sz = sz2;
+ size_flag = size_flag2;
+ } else if (jokDepth1 >= jokDepth2) { // #1 matching rule is "after" #2, then it is prioritary
+ jok = jok1;
+ sz = sz1;
+ size_flag = size_flag1;
+ } else { // #2 matching rule is "after" #1, then it is prioritary
+ jok = jok2;
+ sz = sz2;
+ size_flag = size_flag2;
+ }
+ }
+
+
+ // log
+ if (jok==1) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File confirmed (size test): %s%s ("LLintP")"LF,adr,fil,(LLint)(size));
+ }
+ } else if (jok==-1) {
+ if (size_flag) { /* interdit à cause de la taille */
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled due to its size: %s%s ("LLintP", limit: "LLintP")"LF,adr,fil,(LLint)(size),(LLint)(sz));
+ }
+ } else {
+ jok=1;
+ }
+ }
+ }
+ }
+ return jok;
+}
+
+
+
+#undef test_flush
+#undef urladr
+#undef urlfil
+
+#undef HT_INSERT_FILTERS0
+
diff --git a/src/htswizard.h b/src/htswizard.h
new file mode 100644
index 0000000..28c5d2f
--- /dev/null
+++ b/src/htswizard.h
@@ -0,0 +1,53 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wizard system (accept/refuse links) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTSWIZARD_DEFH
+#define HTSWIZARD_DEFH
+
+#include "htscore.h"
+
+int hts_acceptlink(httrackp* opt,
+ int ptr,int lien_tot,lien_url** liens,
+ char* adr,char* fil,
+ char*** filters,int* filptr,int filter_max,
+ robots_wizard* robots,
+ int* set_prio_to_0,
+ int* just_test_it);
+int hts_testlinksize(httrackp* opt,
+ char* adr,char* fil,
+ LLint size);
+#endif
diff --git a/src/htswrap.c b/src/htswrap.c
new file mode 100644
index 0000000..824af7e
--- /dev/null
+++ b/src/htswrap.c
@@ -0,0 +1,69 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wrapper system (for shell */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htswrap.h"
+#include "htshash.h"
+
+// typedef long (__stdcall * XSHBFF_WndProc_type)(HWND ,UINT ,WPARAM ,LPARAM);
+
+inthash wrappers=NULL;
+
+int htswrap_init(void) {
+ if (!wrappers)
+ wrappers=inthash_new(42);
+ return inthash_created(wrappers);
+}
+
+int htswrap_free(void) {
+ inthash_delete(&wrappers);
+ return 1;
+}
+
+int htswrap_add(char* name,void* fct) {
+ if (!wrappers)
+ htswrap_init();
+ inthash_write(wrappers,name,(unsigned long int)fct);
+ return 1;
+}
+
+unsigned long int htswrap_read(char* name) {
+ unsigned long int fct=0;
+ if (!wrappers)
+ htswrap_init();
+ inthash_read(wrappers,name,(void*)&fct);
+ return fct;
+}
diff --git a/src/htswrap.h b/src/htswrap.h
new file mode 100644
index 0000000..03bf73f
--- /dev/null
+++ b/src/htswrap.h
@@ -0,0 +1,48 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* wrapper system (for shell */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSWRAP_DEFH
+#define HTSWRAP_DEFH
+
+int htswrap_init(void);
+int htswrap_add(char* name,void* fct);
+int htswrap_free(void);
+unsigned long int htswrap_read(char* name);
+
+#endif
diff --git a/src/htszlib.c b/src/htszlib.c
new file mode 100644
index 0000000..d138a1c
--- /dev/null
+++ b/src/htszlib.c
@@ -0,0 +1,84 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Unpacking subroutines using Jean-loup Gailly's Zlib */
+/* for http compressed data */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+/* specific definitions */
+#include <stdio.h>
+#include <stdlib.h>
+#include "htsbase.h"
+#include "htscore.h"
+
+#if HTS_USEZLIB
+
+/* zlib */
+#include <zlib.h>
+#include "htszlib.h"
+
+/*
+ Unpack file into a new file
+ Return value: size of the new file, or -1 if an error occured
+*/
+int hts_zunpack(char* filename,char* newfile) {
+ if (filename && newfile) {
+ if (filename[0] && newfile[0]) {
+ gzFile gz = gzopen (filename, "rb");
+ if (gz) {
+ FILE* fpout=fopen(fconv(newfile),"wb");
+ int size=0;
+ if (fpout) {
+ int nr;
+ do {
+ char buff[1024];
+ nr=gzread (gz, buff, 1024);
+ if (nr>0) {
+ size+=nr;
+ if ((int)fwrite(buff,1,nr,fpout) != nr)
+ nr=size=-1;
+ }
+ } while(nr>0);
+ fclose(fpout);
+ } else
+ size=-1;
+ gzclose(gz);
+ return size;
+ }
+ }
+ }
+ return -1;
+}
+
+#endif
diff --git a/src/htszlib.h b/src/htszlib.h
new file mode 100644
index 0000000..63310b8
--- /dev/null
+++ b/src/htszlib.h
@@ -0,0 +1,49 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Unpacking subroutines using Jean-loup Gailly's Zlib */
+/* for http compressed data */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTS_DEFZLIB
+#define HTS_DEFZLIB
+
+#if HTS_USEZLIB
+
+int hts_zunpack(char* filename,char* newfile);
+
+#endif
+
+#endif
+
diff --git a/src/httrack-library.h b/src/httrack-library.h
new file mode 100644
index 0000000..13ecb46
--- /dev/null
+++ b/src/httrack-library.h
@@ -0,0 +1,50 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: HTTrack definition file for library usage */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+#ifndef HTTRACK_DEFLIB
+#define HTTRACK_DEFLIB
+
+#include "htsglobal.h"
+#include "htsopt.h"
+#include "htswrap.h"
+
+int hts_init(void);
+int hts_main(int argc, char **argv);
+
+
+#endif
+
diff --git a/src/httrack.c b/src/httrack.c
new file mode 100644
index 0000000..0289fca
--- /dev/null
+++ b/src/httrack.c
@@ -0,0 +1,571 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsshow.c console progress info */
+/* Only used on Linux version */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#if HTS_WIN
+#else
+#ifndef Sleep
+#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
+#endif
+#endif
+
+#include "htsglobal.h"
+#include "httrack.h"
+
+// htswrap_add
+#include "htswrap.h"
+
+#if HTS_ANALYSTE_CONSOLE
+
+/* specific definitions */
+#include "htsbase.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef _WIN32
+#include "Winsock.h"
+#endif
+/* END specific definitions */
+
+// ISO VT100/220 definitions
+#define VT_COL_TEXT_BLACK "30"
+#define VT_COL_TEXT_RED "31"
+#define VT_COL_TEXT_GREEN "32"
+#define VT_COL_TEXT_YELLOW "33"
+#define VT_COL_TEXT_BLUE "34"
+#define VT_COL_TEXT_MAGENTA "35"
+#define VT_COL_TEXT_CYAN "36"
+#define VT_COL_TEXT_WHITE "37"
+#define VT_COL_BACK_BLACK "40"
+#define VT_COL_BACK_RED "41"
+#define VT_COL_BACK_GREEN "42"
+#define VT_COL_BACK_YELLOW "43"
+#define VT_COL_BACK_BLUE "44"
+#define VT_COL_BACK_MAGENTA "45"
+#define VT_COL_BACK_CYAN "46"
+#define VT_COL_BACK_WHITE "47"
+//
+#define VT_GOTOXY(X,Y) "\33["Y";"X"f"
+#define VT_COLOR(C) "\33["C"m"
+#define VT_RESET "\33[m"
+#define VT_REVERSE "\33[7m"
+#define VT_UNREVERSE "\33[27m"
+#define VT_BOLD "\33[1m"
+#define VT_UNBOLD "\33[22m"
+#define VT_BLINK "\33[5m"
+#define VT_UNBLINK "\33[25m"
+//
+#define VT_CLREOL "\33[K"
+#define VT_CLRSOL "\33[1K"
+#define VT_CLRLIN "\33[2K"
+#define VT_CLREOS "\33[J"
+#define VT_CLRSOS "\33[1J"
+#define VT_CLRSCR "\33[2J"
+//
+#define csi(X) printf(s_csi( X ));
+void vt_clear(void) {
+ printf("%s%s%s",VT_RESET,VT_CLRSCR,VT_GOTOXY("1","0"));
+}
+void vt_home(void) {
+ printf("%s%s",VT_RESET,VT_GOTOXY("1","0"));
+}
+//
+
+
+/*
+#define STYLE_STATVALUES VT_COLOR(VT_COL_TEXT_BLACK)
+#define STYLE_STATTEXT VT_COLOR(VT_COL_TEXT_BLUE)
+*/
+#define STYLE_STATVALUES VT_BOLD
+#define STYLE_STATTEXT VT_UNBOLD
+#define STYLE_STATRESET VT_UNBOLD
+#define NStatsBuffer 14
+#define MAX_LEN_INPROGRESS 40
+
+static int use_show;
+
+
+int main(int argc, char **argv) {
+ hts_init();
+
+ /*
+ hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
+Log: "engine: init"
+
+ hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
+Log: "engine: free"
+
+ hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
+Log: "engine: start"
+
+ hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
+Log: "engine: end"
+
+ hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+Log: "engine: change-options"
+
+ hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+Log: "check-html: <url>"
+
+ hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
+ hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
+ hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
+ hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
+ hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
+Log: none
+
+ hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
+Log: "pause: <lockfile>"
+
+ hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
+ hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+Log: none
+
+ hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
+Log:
+ "engine: transfer-status: link updated: <url> -> <file>"
+ | "engine: transfer-status: link added: <url> -> <file>"
+ | "engine: transfer-status: link recorded: <url> -> <file>"
+ | "engine: transfer-status: link link error (<errno>, '<err_msg>'): <url>"
+ hts_htmlcheck_savename = (t_hts_htmlcheck_savename ) htswrap_read("save-name");
+Log:
+ "engine: save-name: local name: <url> -> <file>"
+*/
+
+ htswrap_add("init",htsshow_init);
+ htswrap_add("free",htsshow_uninit);
+ htswrap_add("start",htsshow_start);
+ htswrap_add("change-options",htsshow_chopt);
+ htswrap_add("end",htsshow_end);
+ htswrap_add("check-html",htsshow_checkhtml);
+ htswrap_add("loop",htsshow_loop);
+ htswrap_add("query",htsshow_query);
+ htswrap_add("query2",htsshow_query2);
+ htswrap_add("query3",htsshow_query3);
+ htswrap_add("check-link",htsshow_check);
+ htswrap_add("pause",htsshow_pause);
+ htswrap_add("save-file",htsshow_filesave);
+ htswrap_add("link-detected",htsshow_linkdetected);
+ htswrap_add("transfer-status",htsshow_xfrstatus);
+ htswrap_add("save-name",htsshow_savename);
+
+ return hts_main(argc,argv);
+}
+
+
+/* CALLBACK FUNCTIONS */
+
+/* Initialize the Winsock */
+void __cdecl htsshow_init(void) {
+#ifdef _WIN32
+ {
+ WORD wVersionRequested; // requested version WinSock API
+ WSADATA wsadata; // Windows Sockets API data
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ printf("Winsock not found!\n");
+ return;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ printf("WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ return;
+ }
+ }
+#endif
+
+}
+void __cdecl htsshow_uninit(void) {
+#ifdef _WIN32
+ WSACleanup();
+#endif
+}
+int __cdecl htsshow_start(httrackp* opt) {
+ use_show=0;
+ if (opt->verbosedisplay==2) {
+ use_show=1;
+ vt_clear();
+ }
+ return 1;
+}
+int __cdecl htsshow_chopt(httrackp* opt) {
+ return __cdecl htsshow_start(opt);
+}
+int __cdecl htsshow_end(void) {
+ return 1;
+}
+int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
+ static TStamp prev_mytime=0; /* ok */
+ static t_InpInfo SInfo; /* ok */
+ //
+ TStamp mytime;
+ long int rate=0;
+ char st[256];
+ //
+ int stat_written=-1;
+ int stat_updated=-1;
+ int stat_errors=-1;
+ int stat_warnings=-1;
+ int stat_infos=-1;
+ int nbk=-1;
+ LLint nb=-1;
+ int stat_nsocket=-1;
+ LLint stat_bytes=-1;
+ LLint stat_bytes_recv=-1;
+ int irate=-1;
+ if (stats) {
+ stat_written=stats->stat_files;
+ stat_updated=stats->stat_updated_files;
+ stat_errors=stats->stat_errors;
+ stat_warnings=stats->stat_warnings;
+ stat_infos=stats->stat_infos;
+ nbk=stats->nbk;
+ stat_nsocket=stats->stat_nsocket;
+ irate=(int)stats->rate;
+ nb=stats->nb;
+ stat_bytes=stats->nb;
+ stat_bytes_recv=stats->HTS_TOTAL_RECV;
+ }
+
+ if (!use_show)
+ return 1;
+
+ mytime=mtime_local();
+ if ((stat_time>0) && (stat_bytes_recv>0))
+ rate=(int)(stat_bytes_recv/stat_time);
+ else
+ rate=0; // pas d'infos
+
+ /* Infos */
+ if (stat_bytes>=0) SInfo.stat_bytes=stat_bytes; // bytes
+ if (stat_time>=0) SInfo.stat_time=stat_time; // time
+ if (lien_tot>=0) SInfo.lien_tot=lien_tot; // nb liens
+ if (lien_n>=0) SInfo.lien_n=lien_n; // scanned
+ SInfo.stat_nsocket=stat_nsocket; // socks
+ if (rate>0) SInfo.rate=rate; // rate
+ if (irate>=0) SInfo.irate=irate; // irate
+ if (SInfo.irate<0) SInfo.irate=SInfo.rate;
+ if (SInfo.stat_back>=0) SInfo.stat_back=nbk;
+ if (stat_written>=0) SInfo.stat_written=stat_written;
+ if (stat_updated>=0) SInfo.stat_updated=stat_updated;
+ if (stat_errors>=0) SInfo.stat_errors=stat_errors;
+ if (stat_warnings>=0) SInfo.stat_warnings=stat_warnings;
+ if (stat_infos>=0) SInfo.stat_infos=stat_infos;
+
+
+ if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) {
+ prev_mytime=mytime;
+
+
+ st[0]='\0';
+ qsec2str(st,stat_time);
+ vt_home();
+ printf(
+ VT_GOTOXY("1","1")
+ VT_CLREOL
+ STYLE_STATTEXT "Bytes saved:"
+ STYLE_STATVALUES " \t%s"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","1")
+ STYLE_STATTEXT "Links scanned:"
+ STYLE_STATVALUES " \t%d/%d (+%d)"
+ VT_CLREOL"\n"VT_CLREOL
+ VT_GOTOXY("1","2")
+ STYLE_STATTEXT "Time:"
+ " \t"
+ STYLE_STATVALUES "%s"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","2")
+ STYLE_STATTEXT "Files written:"
+ " \t"
+ STYLE_STATVALUES "%d"
+ VT_CLREOL"\n"VT_CLREOL
+ VT_GOTOXY("1","3")
+ STYLE_STATTEXT "Transfer rate:"
+ " \t"
+ STYLE_STATVALUES "%s (%s)"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","3")
+ STYLE_STATTEXT "Files updated:"
+ " \t"
+ STYLE_STATVALUES "%d"
+ VT_CLREOL"\n"VT_CLREOL
+ VT_GOTOXY("1","4")
+ STYLE_STATTEXT "Active connections:"
+ " \t"
+ STYLE_STATVALUES "%d"
+ "\t"
+ VT_CLREOL
+ VT_GOTOXY("40","4")
+ STYLE_STATTEXT "Errors:"
+ STYLE_STATVALUES " \t"
+ STYLE_STATVALUES "%d"
+ VT_CLREOL"\n"
+ STYLE_STATRESET
+ ,
+ /* */
+ (char*)int2bytes(SInfo.stat_bytes),
+ (int)lien_n,(int)SInfo.lien_tot,(int)nbk,
+ (char*)st,
+ (int)SInfo.stat_written,
+ (char*)int2bytessec(SInfo.irate),(char*)int2bytessec(SInfo.rate),
+ (int)SInfo.stat_updated,
+ (int)SInfo.stat_nsocket,
+ (int)SInfo.stat_errors
+ /* */
+ );
+
+
+ // parcourir registre des liens
+ if (back_index>=0) { // seulement si index passé
+ int j,k;
+ int index=0;
+ int ok=0; // idem
+ int l; // idem
+ //
+ t_StatsBuffer StatsBuffer[NStatsBuffer];
+
+ {
+ int i;
+ for(i=0;i<NStatsBuffer;i++) {
+ strcpy(StatsBuffer[i].state,"");
+ strcpy(StatsBuffer[i].name,"");
+ strcpy(StatsBuffer[i].file,"");
+ strcpy(StatsBuffer[i].url_sav,"");
+ StatsBuffer[i].back=0;
+ StatsBuffer[i].size=0;
+ StatsBuffer[i].sizetot=0;
+ }
+ }
+ for(k=0;k<2;k++) { // 0: lien en cours 1: autres liens
+ for(j=0;(j<3) && (index<NStatsBuffer);j++) { // passe de priorité
+ int _i;
+ for(_i=0+k;(_i< max(back_max*k,1) ) && (index<NStatsBuffer);_i++) { // no lien
+ int i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel)
+ if (back[i].status>=0) { // signifie "lien actif"
+ // int ok=0; // OPTI
+ ok=0;
+ switch(j) {
+ case 0: // prioritaire
+ if ((back[i].status>0) && (back[i].status<99)) {
+ strcpy(StatsBuffer[index].state,"receive"); ok=1;
+ }
+ break;
+ case 1:
+ if (back[i].status==99) {
+ strcpy(StatsBuffer[index].state,"request"); ok=1;
+ }
+ else if (back[i].status==100) {
+ strcpy(StatsBuffer[index].state,"connect"); ok=1;
+ }
+ else if (back[i].status==101) {
+ strcpy(StatsBuffer[index].state,"search"); ok=1;
+ }
+ else if (back[i].status==1000) { // ohh le beau ftp
+ sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1;
+ }
+ break;
+ default:
+ if (back[i].status==0) { // prêt
+ if ((back[i].r.statuscode==200)) {
+ strcpy(StatsBuffer[index].state,"ready"); ok=1;
+ }
+ else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) {
+ char tempo[256]; tempo[0]='\0';
+ infostatuscode(tempo,back[i].r.statuscode);
+ strcpy(StatsBuffer[index].state,tempo); ok=1;
+ }
+ else {
+ strcpy(StatsBuffer[index].state,"error"); ok=1;
+ }
+ }
+ break;
+ }
+
+ if (ok) {
+ char s[HTS_URLMAXSIZE*2];
+ //
+ StatsBuffer[index].back=i; // index pour + d'infos
+ //
+ s[0]='\0';
+ strcpy(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel
+ if (strcmp(back[i].url_adr,"file://"))
+ strcat(s,back[i].url_adr);
+ else
+ strcat(s,"localhost");
+ if (back[i].url_fil[0]!='/')
+ strcat(s,"/");
+ strcat(s,back[i].url_fil);
+
+ StatsBuffer[index].file[0]='\0';
+ {
+ char* a=strrchr(s,'/');
+ if (a) {
+ strncat(StatsBuffer[index].file,a,200);
+ *a='\0';
+ }
+ }
+
+ if ((l=strlen(s))<MAX_LEN_INPROGRESS)
+ strcpy(StatsBuffer[index].name,s);
+ else {
+ // couper
+ StatsBuffer[index].name[0]='\0';
+ strncat(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
+ strcat(StatsBuffer[index].name,"...");
+ strcat(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
+ }
+
+ if (back[i].r.totalsize>0) { // taille prédéfinie
+ StatsBuffer[index].sizetot=back[i].r.totalsize;
+ StatsBuffer[index].size=back[i].r.size;
+ } else { // pas de taille prédéfinie
+ if (back[i].status==0) { // prêt
+ StatsBuffer[index].sizetot=back[i].r.size;
+ StatsBuffer[index].size=back[i].r.size;
+ } else {
+ StatsBuffer[index].sizetot=8192;
+ StatsBuffer[index].size=(back[i].r.size % 8192);
+ }
+ }
+ index++;
+ }
+ }
+ }
+ }
+ }
+
+ /* LF */
+ printf("%s\n",VT_CLREOL);
+
+ /* Display current job */
+ {
+ int parsing=0;
+ printf("Current job: ");
+ if (!(parsing=hts_is_parsing(-1)))
+ printf("receiving files");
+ else {
+ switch(hts_is_testing()) {
+ case 0:
+ printf("parsing HTML file (%d%%)",parsing);
+ break;
+ case 1:
+ printf("parsing HTML file: testing links (%d%%)",parsing);
+ break;
+ case 2:
+ printf("purging files");
+ break;
+ }
+ }
+ printf("%s\n",VT_CLREOL);
+ }
+
+ /* Display background jobs */
+ {
+ int i;
+ for(i=0;i<NStatsBuffer;i++) {
+ if (strnotempty(StatsBuffer[i].state)) {
+ printf(VT_CLREOL" %s - \t%s%s \t%s / \t%s",
+ StatsBuffer[i].state,
+ StatsBuffer[i].name,
+ StatsBuffer[i].file,
+ int2bytes(StatsBuffer[i].size),
+ int2bytes(StatsBuffer[i].sizetot)
+ );
+ }
+ printf("%s\n",VT_CLREOL);
+ }
+ }
+
+
+ }
+
+ }
+
+
+
+ return 1;
+}
+char* __cdecl htsshow_query(char* question) {
+ static char s[12]=""; /* ok */
+ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question);
+ io_flush; linput(stdin,s,4);
+ return s;
+}
+char* __cdecl htsshow_query2(char* question) {
+ static char s[12]=""; /* ok */
+ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question);
+ io_flush; linput(stdin,s,4);
+ return s;
+}
+char* __cdecl htsshow_query3(char* question) {
+ static char line[256]; /* ok */
+ do {
+ io_flush; linput(stdin,line,206);
+ } while(!strnotempty(line));
+ printf("ok..\n");
+ return line;
+}
+int __cdecl htsshow_check(char* adr,char* fil,int status) {
+ return -1;
+}
+void __cdecl htsshow_pause(char* lockfile) {
+ while (fexist(lockfile)) {
+ Sleep(1000);
+ }
+}
+void __cdecl htsshow_filesave(char* file) {
+}
+int __cdecl htsshow_linkdetected(char* link) {
+ return 1;
+}
+int __cdecl htsshow_xfrstatus(lien_back* back) {
+ return 1;
+}
+int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+ return 1;
+}
+
+
+#endif
diff --git a/src/httrack.dsp b/src/httrack.dsp
new file mode 100644
index 0000000..7fc08da
--- /dev/null
+++ b/src/httrack.dsp
@@ -0,0 +1,324 @@
+# Microsoft Developer Studio Project File - Name="httrack" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=httrack - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "httrack.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "httrack.mak" CFG="httrack - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "httrack - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "httrack - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE "httrack - Win32 Release avec debug" (based on "Win32 (x86) Console Application")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "httrack - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "c:\temp\vcpp"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FR /YX /FD /c
+# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD BASE RSC /l 0x40c /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# SUBTRACT LINK32 /verbose
+
+!ELSEIF "$(CFG)" == "httrack - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "c:\temp\vcpp"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /Gm /GR /GX /ZI /Od /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FAcs /Fr /YX /FD /c
+# ADD BASE RSC /l 0x40c /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /map /debug /debugtype:both /machine:I386 /out:"c:\temp\test\httrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# SUBTRACT LINK32 /profile
+
+!ELSEIF "$(CFG)" == "httrack - Win32 Release avec debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "httrack___Win32_Release_avec_debug"
+# PROP BASE Intermediate_Dir "httrack___Win32_Release_avec_debug"
+# PROP BASE Ignore_Export_Lib 0
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release_avec_debug"
+# PROP Intermediate_Dir "c:\temp\vcpp"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# SUBTRACT BASE CPP /Ox /Oa /Ow /Og /Os
+# ADD CPP /nologo /MT /W3 /GX /Zi /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /FAcs /FR /YX /FD /c
+# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 wsock32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe"
+# SUBTRACT BASE LINK32 /verbose
+# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /debug /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# SUBTRACT LINK32 /verbose
+
+!ENDIF
+
+# Begin Target
+
+# Name "httrack - Win32 Release"
+# Name "httrack - Win32 Debug"
+# Name "httrack - Win32 Release avec debug"
+# Begin Source File
+
+SOURCE=.\htsalias.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsalias.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsback.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsback.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsbauth.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsbauth.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscache.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscache.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscatchurl.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscatchurl.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsconfig.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscore.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscore.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscoremain.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htscoremain.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsdefines.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsfilters.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsfilters.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsftp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsftp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsglobal.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshash.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshash.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshelp.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htshelp.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsindex.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsindex.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsjava.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsjava.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htslib.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htslib.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsmd5.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsmd5.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsname.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsname.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsnostatic.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsnostatic.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsrobots.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsrobots.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsthread.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsthread.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htstools.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htstools.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswizard.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswizard.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswrap.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htswrap.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\httrack.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\httrack.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\md5.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\md5.h
+# End Source File
+# End Target
+# End Project
diff --git a/src/httrack.dsw b/src/httrack.dsw
new file mode 100644
index 0000000..9aa199f
--- /dev/null
+++ b/src/httrack.dsw
@@ -0,0 +1,29 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "httrack"=.\httrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/src/httrack.h b/src/httrack.h
new file mode 100644
index 0000000..a8633de
--- /dev/null
+++ b/src/httrack.h
@@ -0,0 +1,107 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsshow.c console progress info */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTSTOOLS_DEFH
+#define HTSTOOLS_DEFH
+
+#if HTS_ANALYSTE_CONSOLE
+
+#include "htsglobal.h"
+#include "htscore.h"
+
+typedef struct {
+ char name[1000];
+ char file[256];
+ char state[20];
+ char url_sav[HTS_URLMAXSIZE*2]; // pour cancel
+ char url_adr[HTS_URLMAXSIZE*2];
+ char url_fil[HTS_URLMAXSIZE*2];
+ LLint size;
+ LLint sizetot;
+ int offset;
+ //
+ int back;
+ //
+ int actived; // pour disabled
+} t_StatsBuffer;
+
+typedef struct {
+ int ask_refresh;
+ int refresh;
+ LLint stat_bytes;
+ int stat_time;
+ int lien_n;
+ int lien_tot;
+ int stat_nsocket;
+ int rate;
+ int irate;
+ int ft;
+ LLint stat_written;
+ int stat_updated;
+ int stat_errors;
+ int stat_warnings;
+ int stat_infos;
+ TStamp stat_timestart;
+ int stat_back;
+} t_InpInfo;
+
+// wrappers
+void __cdecl htsshow_init(void);
+void __cdecl htsshow_uninit(void);
+int __cdecl htsshow_start(httrackp* opt);
+int __cdecl htsshow_chopt(httrackp* opt);
+int __cdecl htsshow_end(void);
+int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+char* __cdecl htsshow_query(char* question);
+char* __cdecl htsshow_query2(char* question);
+char* __cdecl htsshow_query3(char* question);
+int __cdecl htsshow_check(char* adr,char* fil,int status);
+void __cdecl htsshow_pause(char* lockfile);
+void __cdecl htsshow_filesave(char* file);
+int __cdecl htsshow_linkdetected(char* link);
+int __cdecl htsshow_xfrstatus(lien_back* back);
+int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+
+int main(int argc, char **argv);
+void vt_color(int text,int back);
+void vt_clear(void);
+void vt_home(void);
+
+#endif
+
+#endif
+
diff --git a/src/md5.c b/src/md5.c
new file mode 100644
index 0000000..f5dadf2
--- /dev/null
+++ b/src/md5.c
@@ -0,0 +1,271 @@
+/*
+* This code implements the MD5 message-digest algorithm.
+* The algorithm is due to Ron Rivest. This code was
+* written by Colin Plumb in 1993, no copyright is claimed.
+* This code is in the public domain; do with it what you wish.
+*
+* Equivalent code is available from RSA Data Security, Inc.
+* This code has been tested against that, and is equivalent,
+* except that you don't need to include two pages of legalese
+* with every copy.
+*
+* To compute the message digest of a chunk of bytes, declare an
+* MD5Context structure, pass it to MD5Init, call MD5Update as
+* needed on buffers full of bytes, and then call MD5Final, which
+* will fill a supplied 16-byte array with the digest.
+*/
+
+/* #include "config.h" */
+
+#include <string.h> /* for memcpy() */
+#include "md5.h"
+
+static void byteReverse(unsigned char *buf, unsigned longs);
+
+/*
+* Note: this code is harmless on little-endian machines.
+*/
+#define byteSwap(a, b) do { \
+ a ^= b; \
+ b ^= a; \
+ a ^= b; \
+} while(0)
+static void byteReverse(unsigned char *buf, unsigned longs)
+{
+ /*uint32 t;*/
+ do {
+ /*
+ t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
+ ((unsigned) buf[1] << 8 | buf[0]);
+ *(uint32 *) buf = t;
+ */
+ byteSwap(buf[0], buf[3]);
+ byteSwap(buf[1], buf[2]);
+ buf += 4;
+ } while (--longs);
+}
+
+/*
+* Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
+* initialization constants.
+*/
+void MD5Init(struct MD5Context *ctx, int brokenEndian)
+{
+ ctx->buf[0] = 0x67452301;
+ ctx->buf[1] = 0xefcdab89;
+ ctx->buf[2] = 0x98badcfe;
+ ctx->buf[3] = 0x10325476;
+
+ ctx->bits[0] = 0;
+ ctx->bits[1] = 0;
+
+ /*#ifdef WORDS_BIGENDIAN*/
+ if (brokenEndian) {
+ ctx->doByteReverse = 0;
+ } else {
+ ctx->doByteReverse = 1;
+ }
+ /*#else
+ ctx->doByteReverse = 0;
+ #endif
+ */
+}
+
+/*
+* Update context to reflect the concatenation of another buffer full
+* of bytes.
+*/
+void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
+{
+ uint32 t;
+
+ /* Update bitcount */
+
+ t = ctx->bits[0];
+ if ((ctx->bits[0] = t + ((uint32) len << 3)) < t)
+ ctx->bits[1]++; /* Carry from low to high */
+ ctx->bits[1] += len >> 29;
+
+ t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
+
+ /* Handle any leading odd-sized chunks */
+
+ if (t) {
+ unsigned char *p = (unsigned char *) ctx->in + t;
+
+ t = 64 - t;
+ if (len < t) {
+ memcpy(p, buf, len);
+ return;
+ }
+ memcpy(p, buf, t);
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ buf += t;
+ len -= t;
+ }
+ /* Process data in 64-byte chunks */
+
+ while (len >= 64) {
+ memcpy(ctx->in, buf, 64);
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ buf += 64;
+ len -= 64;
+ }
+
+ /* Handle any remaining bytes of data. */
+
+ memcpy(ctx->in, buf, len);
+}
+
+/*
+* Final wrapup - pad to 64-byte boundary with the bit pattern
+* 1 0* (64-bit count of bits processed, MSB-first)
+*/
+void MD5Final(unsigned char digest[16], struct MD5Context *ctx)
+{
+ unsigned count;
+ unsigned char *p;
+
+ /* Compute number of bytes mod 64 */
+ count = (ctx->bits[0] >> 3) & 0x3F;
+
+ /* Set the first char of padding to 0x80. This is safe since there is
+ always at least one byte free */
+ p = ctx->in + count;
+ *p++ = 0x80;
+
+ /* Bytes of padding needed to make 64 bytes */
+ count = 64 - 1 - count;
+
+ /* Pad out to 56 mod 64 */
+ if (count < 8) {
+ /* Two lots of padding: Pad the first block to 64 bytes */
+ memset(p, 0, count);
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 16);
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+
+ /* Now fill the next block with 56 bytes */
+ memset(ctx->in, 0, 56);
+ } else {
+ /* Pad block to 56 bytes */
+ memset(p, 0, count - 8);
+ }
+ if (ctx->doByteReverse)
+ byteReverse(ctx->in, 14);
+
+ /* Append length in bits and transform */
+ ((uint32 *) ctx->in)[14] = ctx->bits[0];
+ ((uint32 *) ctx->in)[15] = ctx->bits[1];
+
+ MD5Transform(ctx->buf, (uint32 *) ctx->in);
+ if (ctx->doByteReverse)
+ byteReverse((unsigned char *) ctx->buf, 4);
+ memcpy(digest, ctx->buf, 16);
+ memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */
+}
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f, w, x, y, z, data, s) \
+( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
+
+/*
+* The core of the MD5 algorithm, this alters an existing MD5 hash to
+* reflect the addition of 16 longwords of new data. MD5Update blocks
+* the data and converts bytes into longwords for this routine.
+*/
+void MD5Transform(uint32 buf[4], uint32 const in[16])
+{
+ register uint32 a, b, c, d;
+
+ a = buf[0];
+ b = buf[1];
+ c = buf[2];
+ d = buf[3];
+
+ MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+ MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+ MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+ MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+}
+
diff --git a/src/md5.h b/src/md5.h
new file mode 100644
index 0000000..f5e5428
--- /dev/null
+++ b/src/md5.h
@@ -0,0 +1,36 @@
+#ifndef MD5_H
+#define MD5_H
+
+#ifdef __alpha
+typedef unsigned int uint32;
+#else
+typedef unsigned long uint32;
+#endif
+
+struct MD5Context {
+ uint32 buf[4];
+ uint32 bits[2];
+ unsigned char in[64];
+ int doByteReverse;
+};
+
+void MD5Init(struct MD5Context *context, int brokenEndian);
+void MD5Update(struct MD5Context *context, unsigned char const *buf,
+ unsigned len);
+void MD5Final(unsigned char digest[16], struct MD5Context *context);
+void MD5Transform(uint32 buf[4], uint32 const in[16]);
+
+int mdfile(char *fn, unsigned char *digest);
+int mdbinfile(char *fn, unsigned char *bindigest);
+
+/* These assume a little endian machine and return incorrect results!
+They are here for compatibility with old (broken) versions of RPM */
+int mdfileBroken(char *fn, unsigned char *digest);
+int mdbinfileBroken(char *fn, unsigned char *bindigest);
+
+/*
+* This is needed to make RSAREF happy on some MS-DOS compilers.
+*/
+typedef struct MD5Context MD5_CTX;
+
+#endif /* !MD5_H */
diff --git a/src/postinst-config.in b/src/postinst-config.in
new file mode 100755
index 0000000..f0edc72
--- /dev/null
+++ b/src/postinst-config.in
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# Config file location
+cnfdir="__ETCPATH__"
+cnf="__ETCPATH__/httrack.conf"
+
+if test "`id -u`" -eq 0; then
+ mkdir -p "$cnfdir"
+ if ! test -f "$cnf"; then
+ echo "creating $cnf (please modify it) .."
+ cat>"$cnf" << EOF
+# HTTrack Website Copier Settings
+# See httrack --help for more information
+
+# Examples: (to uncomment)
+
+# set proxy proxy.myisp.com:8080
+# retries=2
+# set max-size 10000000
+# set max-time 36000
+# set user-agent Mouzilla/17.0 (compatible; HTTrack; I)
+#
+# There are MUCH more options.. try 'httrack --quiet --help | more'
+
+# Deny and allow for links
+# this will be used by default for all mirrors
+allow *.gif
+allow *.png
+deny ad.doubleclick.net/*
+
+# Path and other options
+# '~' in the *begining* means 'home dir'
+# '#' at the *end* means "projectname" (that is, the first URL given)
+# Example: '~/websites/#' will create /home/smith/websites/www.foo.com
+# folder when launching 'httrack www.foo.com'
+set path ~/websites/#
+
+EOF
+ fi
+
+ if ! grep "set path" "$cnf" >/dev/null; then
+ echo "default path set to <home dir>/websites/<first_site_name>"
+ fi
+
+ chown root:__ROOTGROUP__ "$cnf"
+ chmod 744 "$cnf"
+else
+ cat << EOF
+
+You are not root, therefore $cnf configuration file hasn't been created
+Re-run this sript ($0) as root if you want to do that
+
+EOF
+fi
+
diff --git a/src/strip_cr.in b/src/strip_cr.in
new file mode 100755
index 0000000..03af084
--- /dev/null
+++ b/src/strip_cr.in
@@ -0,0 +1,32 @@
+__PERL__
+# A simple script to convert DOS text files to
+# Unix one. Useful to strip all CR on .c and .h
+# sourcefiles.
+# Usage: strip_cr <files>
+foreach $fname (@ARGV) {
+ $ad=1;
+ if (open(FL,$fname)) {
+ if (open(FO,">".$fname.".tmp")) {
+ while(<FL>) {
+ s/\r\n$/\n/g;
+ print FO "$_";
+ }
+ close(FL);
+ close(FO);
+ if ((-s $fname) != (-s $fname.".tmp")) {
+ print("Stripping ".$fname."..\n");
+ rename($fname.".tmp",$fname);
+ } else {
+ unlink($fname.".tmp");
+ }
+ } else {
+ print "Unable to open ".$fname.".tmp\n";
+ }
+ } else {
+ print "Unable to open $fname\n";
+ }
+}
+if (!$ad) {
+ print "Ensure that a text file has no lines ended with CR (DOS)\n";
+ print "Usage: strip_cr <file>\n";
+}