summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:51:31 +0000
committerXavier Roche <xroche@users.noreply.github.com>2012-03-19 12:51:31 +0000
commit25adbdabb47499fe641c7bd9595024ff82667058 (patch)
tree4200bb5e746bc1c0606de615ec99f0a247d4d9ba /src
parentad5b7acc19290ff91e0f42a0de448a26760fcf99 (diff)
httrack 3.30.1
Diffstat (limited to 'src')
-rw-r--r--src/Makefile10
-rw-r--r--src/Makefile.am60
-rw-r--r--src/Makefile.in1052
-rwxr-xr-xsrc/configure603
-rw-r--r--src/gpl.txt287
-rw-r--r--src/hts-indextmpl.h12
-rw-r--r--src/htsalias.c80
-rw-r--r--src/htsalias.h2
-rw-r--r--src/htsback.c1037
-rw-r--r--src/htsback.h13
-rw-r--r--src/htsbase.h343
-rw-r--r--src/htsbasenet.h80
-rw-r--r--src/htsbauth.c70
-rw-r--r--src/htscache.c544
-rw-r--r--src/htscache.h6
-rw-r--r--src/htscatchurl.c36
-rw-r--r--src/htscatchurl.h8
-rw-r--r--src/htscore.c2789
-rw-r--r--src/htscore.h125
-rw-r--r--src/htscoremain.c569
-rw-r--r--src/htscoremain.h6
-rw-r--r--src/htsdefines.h10
-rw-r--r--src/htsfilters.c2
-rw-r--r--src/htsftp.c141
-rw-r--r--src/htsglobal.h187
-rw-r--r--src/htshash.c263
-rw-r--r--src/htshash.h57
-rw-r--r--src/htshelp.c144
-rw-r--r--src/htsindex.c15
-rw-r--r--src/htsinthash.c252
-rw-r--r--src/htsinthash.h94
-rw-r--r--src/htsjava.c100
-rw-r--r--src/htsjava.h9
-rw-r--r--src/htslib.c1319
-rw-r--r--src/htslib.h123
-rw-r--r--src/htsmodules.c305
-rw-r--r--src/htsmodules.h111
-rw-r--r--src/htsname.c420
-rw-r--r--src/htsnet.h34
-rw-r--r--src/htsnostatic.c3
-rw-r--r--src/htsnostatic.h8
-rw-r--r--src/htsopt.h16
-rw-r--r--src/htsparse.c4162
-rw-r--r--src/htsparse.h108
-rw-r--r--src/htsrobots.c9
-rw-r--r--src/htsrobots.h4
-rw-r--r--src/htsserver.c1814
-rw-r--r--src/htsserver.h149
-rw-r--r--src/htssystem.h16
-rw-r--r--src/htssystem.h.windows9x11
-rw-r--r--src/htsthread.c2
-rw-r--r--src/htsthread.h2
-rw-r--r--src/htstools.c144
-rw-r--r--src/htstools.h43
-rw-r--r--src/htsweb.c653
-rw-r--r--src/htsweb.h110
-rw-r--r--src/htswizard.c326
-rw-r--r--src/htswizard.h2
-rw-r--r--src/htswrap.c9
-rw-r--r--src/htswrap.h14
-rw-r--r--src/htszlib.c11
-rw-r--r--src/htszlib.h21
-rw-r--r--src/httrack-library.h154
-rw-r--r--src/httrack.c102
-rw-r--r--src/httrack.dsp213
-rw-r--r--src/httrack.dsw15
-rw-r--r--src/httrack.h6
-rwxr-xr-xsrc/postinst-config.in55
-rwxr-xr-xsrc/strip_cr.in32
-rwxr-xr-xsrc/webhttrack101
-rwxr-xr-xsrc/webhttrack.dsp120
-rwxr-xr-xsrc/webhttrack.dsw44
72 files changed, 12975 insertions, 6822 deletions
diff --git a/src/Makefile b/src/Makefile
deleted file mode 100644
index be898d9..0000000
--- a/src/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-all :
- @echo "please launch configure before! :"
- @echo "./configure"
- @echo ""
- @echo "then, you can launch:"
- @echo "make"
- @echo "make install"
- @echo ""
- @echo "(see INSTALL file to know how-to-install)"
-
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..c391d3f
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,60 @@
+#SUBDIRS = swf
+
+DevIncludesdir = $(includedir)/httrack
+DevIncludes_DATA = httrack-library.h \
+ htsglobal.h \
+ htsopt.h \
+ htswrap.h \
+ htssystem.h \
+ htsconfig.h \
+ ../config.h \
+ htsmodules.h \
+ htsbasenet.h \
+ htsbauth.h
+
+INCLUDES = \
+ @DEFAULT_CFLAGS@ \
+ @THREADS_CFLAGS@ \
+ @V6_FLAG@ \
+ @LFS_FLAG@ \
+ -DPREFIX=\""$(prefix)"\" \
+ -DSYSCONFDIR=\""$(sysconfdir)"\" \
+ -DDATADIR=\""$(datadir)"\" \
+ -DLIBDIR=\""$(libdir)"\"
+
+bin_PROGRAMS = httrack htsserver
+
+httrack_LDADD = $(THREADS_LIBS) -lhttrack
+htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack
+
+lib_LTLIBRARIES = libhttrack.la
+
+htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h
+
+whttrackrundir = $(bindir)
+whttrackrun_SCRIPTS = webhttrack
+
+libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
+ htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \
+ htshelp.c htsjava.c htslib.c htscoremain.c \
+ htsname.c htsrobots.c htstools.c htswizard.c \
+ htsalias.c htsthread.c htsindex.c htsbauth.c \
+ htsmd5.c htszlib.c htsnostatic.c htswrap.c \
+ htsmodules.c \
+ md5.c \
+ hts-indextmpl.h htsalias.h htsback.h htsbase.h \
+ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \
+ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
+ htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \
+ htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \
+ htsmodules.h htsname.h htsnet.h htsnostatic.h \
+ htsopt.h htsrobots.h htssystem.h htsthread.h \
+ htstools.h htswizard.h htswrap.h htszlib.h \
+ httrack-library.h md5.h
+
+libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS)
+libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO)
+
+EXTRA_DIST = httrack.h webhttrack \
+ httrack.dsp httrack.dsw \
+ webhttrack.dsp webhttrack.dsw
diff --git a/src/Makefile.in b/src/Makefile.in
index 57bf1d3..22590c6 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,417 +1,639 @@
-# HTTrack Website Copier, Offline Browser for Windows and Unix
-# Copyright (C) Xavier Roche and other contributors
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or any later version.
-#
+# Makefile.in generated by automake 1.7 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# Version: 1.3 (09/2001)
-# Usage: just type 'make'
-
-# Generated by configure
-AUTONAME =
-#__AUTONAME__
-
-
-### Makefile commands
-SHELL = /bin/sh
-MAIN =
-CC = gcc
-CFLAGS = __CFLAGS__
-# threads
-LFLAGS = __LFLAGS__ __LFLAGS2__ __LFLAGS3__
-
-
-BINARIES = htscore.o htsback.o htscache.o\
- htscatchurl.o htsfilters.o htsftp.o htshash.o\
- htshelp.o htsjava.o htslib.o htscoremain.o\
- htsname.o htsrobots.o htstools.o htswizard.o\
- htsalias.o htsthread.o htsindex.o\
- htsbauth.o htsmd5.o\
- htswrap.o md5.o htszlib.o\
- htsnostatic.o \
- httrack.o
-
-SOBINARIES = htscore.o htsback.o htscache.o\
- htscatchurl.o htsfilters.o htsftp.o htshash.o\
- htshelp.o htsjava.o htslib.o htscoremain.o\
- htsname.o htsrobots.o htstools.o htswizard.o\
- htsalias.o htsthread.o htsindex.o\
- htsbauth.o htsmd5.o htszlib.o\
- htsnostatic.o \
- htswrap.o md5.o
-
-MAINBIN = httrack.c
-
-BOUTPUT = httrack
-BOUTPUTSO = libhttrack.so
-DOCS = ../HelpHtml ../templates ../httrack-doc.html ../COPYING ../INSTALL ../README ../*.txt
-HTSSYSTEM = htssystem.h
-BINPATH = __BINPATH__
-ETCPATH = __ETCPATH__
-LIBPATH = __LIBPATH__
-PREFIX = __PREFIX__
-
-## Defines for "library" (program compiled with HTTrack)
-BINARIES_LIB = example.o httracklib.o\
- src/htscore.o src/htsback.o src/htscache.o\
- src/htscatchurl.o src/htsfilters.o src/htsftp.o src/htshash.o\
- src/htshelp.o src/htsjava.o src/htslib.o src/htscoremain.o\
- src/htsname.o src/htsrobots.o src/htstools.o src/htswizard.o\
- src/htsalias.o src/htsthread.o src/htsindex.o\
- src/htsbauth.o src/htsmd5.o htszlib.o\
- src/htsshow.o src/htswrap.o\
- src/htsnostatic.o \
- src/md5.o
-BOUTPUT_LIB = example
-HTSSYSTEM_LIB = src/htssystem.h
-
-# in_addr_t problems :
-# In case of problems during compiling,
-# make htss
-# edit htssystem.h and add the following line:
-# #define HTS_DO_NOT_REDEFINE_in_addr_t
-# make manual
-
-
-# Keywords for build types (example: make linux)
-MAKE_LINUX = linux
-MAKE_NETBSD = netbsd
-MAKE_OPENBSD = openbsd
-MAKE_SOLARIS = solaris
-MAKE_AIX = aix
-MAKE_STD = standard
-MAKE_STD2 = standard2
-MAKE_STD3 = standard3
-MAKE_STD4 = standard4
-###
-MAKE_LIB = build_httracklib
-###
-MAKE_AUTO = auto
-###
-
-# First, detect OS Type
-# If your make does not recognize this, change it!
-SHORTUNAME = $(shell uname)
-FULLUNAME = $(shell uname -a)
-
-### Targets:
-
-# Default target, attempt to use uname if necessary
-# if uname is empty, display info message
-all :
- @if test -n "$(AUTONAME)"; then\
- $(MAKE) $(AUTONAME);\
- elif test -n "$(SHORTUNAME)"; then\
- $(MAKE) $(SHORTUNAME);\
- else\
- $(MAKE) help;\
- fi
-
-# If we can not detect the OS Type, show a message info
-help :
- @clear
- @echo ""
- @echo "Welcome to HTTrack Website Copier install!"
- @echo "-----------------------------------------"
- @echo ""
- @echo "1. To make HTTrack, just type in:"
- @echo " make $(MAKE_LINUX)"
- @echo " or"
- @echo " make $(MAKE_NETBSD)"
- @echo " or"
- @echo " make $(MAKE_OPENBSD)"
- @echo " or"
- @echo " make $(MAKE_SOLARIS)"
- @echo " or"
- @echo " make $(MAKE_AIX)"
- @echo " or"
- @echo " make $(MAKE_STD)"
- @echo " or (problems with in_addr_t)"
- @echo " make $(MAKE_STD2)"
- @echo " or (problems with 64-bit)"
- @echo " make $(MAKE_STD3)"
- @echo " or (problems with both in_addr_t and 64-bit)"
- @echo " make $(MAKE_STD4)"
- @echo ""
- @echo "According to your OS type"
- @echo "(example: type in 'make $(MAKE_LINUX)' if you compile HTTrack with linux)"
- @echo
- @echo "Or, if it does not work, you can try "
- @echo " make htss"
- @echo " edit htssystem.h (check OS type), and add the following line:"
- @echo " #define HTS_DO_NOT_REDEFINE_in_addr_t"
- @echo " make manual"
- @echo
- @echo "2. Then, type in 'make install' to copy httrack to $(BINPATH)"
- @echo " or just use ./httrack to launch the program"
- @echo ""
- @echo "3. Build problems, type in:"
- @echo " make moreinfo"
- @echo ""
- @echo "Have fun with HTTrack Website Copier!"
- @echo ""
-info : help
-
-# Troubleshooter
-moreinfo :
- @echo "Known problems:"
- @echo ""
- @echo "\`in_addr_t' undeclared (first use this function)"
- @echo "see in_addr_t problems in Makefile"
- @echo ""
-
-###
-
-## Build Targets (this is the name given by 'uname')
-Linux : $(MAKE_LINUX)
-SunOS : $(MAKE_SOLARIS)
-AIX : $(MAKE_AIX)
-NetBSD : $(MAKE_NETBSD)
-OpenBSD : $(MAKE_OPENBSD)
-
-### Build Targets (standard types)
-default : firstinfo htssystem htssystem_default build_default strip clean lastinfo
-$(MAKE_LINUX) : firstinfo htssystem htssystem_linux build_default strip clean lastinfo
-$(MAKE_SOLARIS) : firstinfo htssystem htssystem_solaris build_solaris strip clean lastinfo
-$(MAKE_AIX) : firstinfo htssystem htssystem_aix build_default strip clean lastinfo
-$(MAKE_NETBSD) : firstinfo htssystem htssystem_netbsd build_default strip clean lastinfo
-$(MAKE_OPENBSD) : firstinfo htssystem htssystem_openbsd build_nopthread strip clean lastinfo
-$(MAKE_STD) : firstinfo htssystem htssystem_default build_default strip clean lastinfo
-$(MAKE_STD2) : firstinfo htssystem htssystem_default2 build_default strip clean lastinfo
-$(MAKE_STD3) : firstinfo htssystem htssystem_default3 build_default strip clean lastinfo
-$(MAKE_STD4) : firstinfo htssystem htssystem_default4 build_default strip clean lastinfo
-### Defines for "library" (program compiled with HTTrack)
-$(MAKE_LIB) : build_lib strip_lib clean_lib lastinfo
-###
-$(MAKE_AUTO) : __MAKEAUTO__
-###
-
-## Defines for OSes
-lib_default : htssystem htssystem_default addhtssystem_lib info_lib
-lib_linux : htssystem htssystem_linux addhtssystem_lib info_lib
-lib_solaris : htssystem htssystem_solaris addhtssystem_lib info_lib
-lib_aix : htssystem htssystem_aix addhtssystem_lib info_lib
-lib_netbsd : htssystem htssystem_netbsd addhtssystem_lib info_lib
-lib_openbsd : htssystem htssystem_openbsd addhtssystem_lib info_lib
-lib_std : htssystem htssystem_default addhtssystem_lib info_lib
-
-
-# manual build
-htss : htssystem htssystem_default
-manual : build_default strip clean lastinfo
-
-# Creates htssystem.h file
-htssystem :
- @echo "/* HTTrack, Offline Browser for Windows and Unix */" > $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
- @echo "/* HTTrack system definition */" >> $(HTSSYSTEM)
- @echo "/* This should be the only file you have to change */" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
- @echo "/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
- @echo "/* Fix plateform number to 0 (SunOS) */" >> $(HTSSYSTEM)
- @echo "/* If it doesn't compile, try another one */" >> $(HTSSYSTEM)
-
-htssystem_solaris :
- @echo "#define HTS_PLATFORM 0" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_aix :
- @echo "#define HTS_PLATFORM 2" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_LITTLE_ENDIAN" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_linux :
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_netbsd:
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_openbsd:
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_USE_UID" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_nopthread:
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_USE_PTHREAD" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_default :
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_default2 :
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_default3 :
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-htssystem_default4 :
- @echo "#define HTS_PLATFORM 3" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define HTS_DO_NOT_REDEFINE_in_addr_t" >> $(HTSSYSTEM)
- @echo "#define HTS_NO_64_BIT" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-# Generated by configure
-htssystem_auto :
- @echo "#define HTS_PLATFORM __PLATFORM__" >> $(HTSSYSTEM)
- @echo "#define HTS_PLATFORM_NAME \"$(FULLUNAME)\"" >> $(HTSSYSTEM)
- @echo "#define __DEFINEUID__" >> $(HTSSYSTEM)
- @echo "#define __DEFINEINA__" >> $(HTSSYSTEM)
- @echo "#define __DEFINEPTH__" >> $(HTSSYSTEM)
- @echo "#define __DEFINE64B__" >> $(HTSSYSTEM)
- @echo "#define __DEFINEFTI__" >> $(HTSSYSTEM)
- @echo "#define HTS_PREFIX \"__DEFINEPRE__\"" >> $(HTSSYSTEM)
- @echo "#define HTS_BINPATH \"__BINPATH__\"" >> $(HTSSYSTEM)
- @echo "#define HTS_ETCPATH \"__ETCPATH__\"" >> $(HTSSYSTEM)
- @echo "#define HTS_LIBPATH \"__LIBPATH__\"" >> $(HTSSYSTEM)
- @echo "#define HTS_USEZLIB __ZLIB__" >> $(HTSSYSTEM)
- @echo "#define HTS_ALIGN __PTRALIGN__" >> $(HTSSYSTEM)
- @echo "#define HTS_INET6 __INET6__" >> $(HTSSYSTEM)
- @echo "#define HTS_USEOPENSSL __SSL__" >> $(HTSSYSTEM)
-
-## Defines for "library" (program compiled with HTTrack)
-addhtssystem_lib :
- @echo "/* Extended functions */" >> $(HTSSYSTEM)
- @echo "#define HTS_ANALYSTE 2" >> $(HTSSYSTEM)
- @echo "" >> $(HTSSYSTEM)
-
-# Info message before build
-firstinfo :
- @echo "Building all, please wait"
- @echo "In case of problems, type in:"
- @echo "make help"
- @echo ""
- @echo "OS TYPE: $(SHORTUNAME)"
- @echo "Make mode: $(MAKECMDGOALS)"
- @echo
-
-##
-info_lib :
- @echo "Please copy htssystem.h to src/htssystem.h by typing:"
- @echo "cp htssystem.h src/htssystem.h"
-
-### Targets for compiling
-build_solaris : $(BINARIES)
- $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS) -lnsl -lsocket
- chmod 755 $(BOUTPUT)
-
-build_nopthread: $(BINARIES)
- $(CC) $(NOPCFLAGS) $(BINARIES) -o $(BOUTPUT) $(NOPLFLAGS)
- chmod 755 $(BOUTPUT)
-
-build_default : $(BINARIES)
- $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS)
- chmod 755 $(BOUTPUT)
-
-## Defines for "library" (program compiled with HTTrack)
-build_lib : $(BINARIES_LIB)
- $(CC) $(CFLAGS) $(BINARIES_LIB) -o $(BOUTPUT_LIB) $(LFLAGS)
- chmod 644 $(BOUTPUT_LIB)
-
-## Auto
-build_auto : build_bin__DYNAMIC__
-
-build_bin : $(BINARIES)
- $(CC) $(CFLAGS) $(BINARIES) -o $(BOUTPUT) $(LFLAGS)
- chmod 755 $(BOUTPUT)
-
-build_binso : $(SOBINARIES)
- $(CC) $(CFLAGS) -shared -Wl,-x,-soname,$(BOUTPUTSO) -o $(BOUTPUTSO) $(SOBINARIES) -lc $(LFLAGS)
- $(CC) -L. -lhttrack $(MAINBIN) -o $(BOUTPUT)
- chmod 755 $(BOUTPUT)
-
-##
-# Strip file so that is can be shorter
-strip :
- strip --strip-all $(BOUTPUT) || strip $(BOUTPUT)
- __STRIPLIB__
-
-strip_lib :
- strip --strip-unneeded $(BOUTPUT_LIB)
-
-# Cleaning up..
-clean :
- rm -f $(BINARIES)
-
-## Defines for "library" (program compiled with HTTrack)
-clean_lib :
- rm -f $(BINARIES_LIB)
-
-# Bye bye
-lastinfo :
- @echo "Build successful"
-
-# Installing httrack into the correct folder
-install : __INSTALL__
-uninstall : remove
-remove : __UNINSTALL__
-
-# Install docs
-docinstall :
- (mkdir -p "$(PREFIX)/doc/httrack" && chmod 755 "$(PREFIX)/doc/httrack") || true
- cp -fR $(DOCS) "$(PREFIX)/doc/httrack/"
-
-# Uninstall docs
-docremove :
- rm -rf "$(PREFIX)/doc/httrack"
-
-# Install binaries and conf files
-bininstall :
- @echo "Copying $(BOUTPUT) to $(BINPATH).."
- test ! -d "$(BINPATH)" && (mkdir -p "$(BINPATH)" && chmod 755 "$(BINPATH)") || true
- cp -f $(BOUTPUT) $(BINPATH)
- chmod 755 $(BINPATH)/$(BOUTPUT)
-
-libremove :
- rm -f "$(LIBPATH)/$(BOUTPUTSO)"
- test -L "$(PREFIX)/lib/$(BOUTPUTSO).1" && rm -f "$(PREFIX)/lib/$(BOUTPUTSO).1" || true
-
-libinstall :
- @echo "Copying $(BOUTPUTSO) to $(LIBPATH)/.."
- test ! -d "$(LIBPATH)" && (mkdir -p "$(LIBPATH)" && chmod 755 "$(LIBPATH)") || true
- cp -f $(BOUTPUTSO) $(LIBPATH)/
- chmod 644 $(LIBPATH)/$(BOUTPUTSO)
- ln -sf "$(BOUTPUTSO)" "$(PREFIX)/lib/$(BOUTPUTSO).1"
-
-
-# Uninstall binaries
-binremove :
- rm -f $(BINPATH)/$(BOUTPUT)
- rm -f $(ETCPATH)/httrack.conf
-
-# Configure program
-config :
- @./postinst-config
-
-###
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#SUBDIRS = swf
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_triplet = @host@
+ACLOCAL = @ACLOCAL@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DL_LIBS = @DL_LIBS@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LFS_FLAG = @LFS_FLAG@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
+MAKEINFO = @MAKEINFO@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SOCKET_LIBS = @SOCKET_LIBS@
+STRIP = @STRIP@
+THREADS_CFLAGS = @THREADS_CFLAGS@
+THREADS_LIBS = @THREADS_LIBS@
+V6_FLAG = @V6_FLAG@
+VERSION = @VERSION@
+VERSION_INFO = @VERSION_INFO@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_RANLIB = @ac_ct_RANLIB@
+ac_ct_STRIP = @ac_ct_STRIP@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
+am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
+am__include = @am__include@
+am__quote = @am__quote@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+datadir = @datadir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+oldincludedir = @oldincludedir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+
+DevIncludesdir = $(includedir)/httrack
+DevIncludes_DATA = httrack-library.h \
+ htsglobal.h \
+ htsopt.h \
+ htswrap.h \
+ htssystem.h \
+ htsconfig.h \
+ ../config.h \
+ htsmodules.h \
+ htsbasenet.h \
+ htsbauth.h
+
+
+INCLUDES = \
+ @DEFAULT_CFLAGS@ \
+ @THREADS_CFLAGS@ \
+ @V6_FLAG@ \
+ @LFS_FLAG@ \
+ -DPREFIX=\""$(prefix)"\" \
+ -DSYSCONFDIR=\""$(sysconfdir)"\" \
+ -DDATADIR=\""$(datadir)"\" \
+ -DLIBDIR=\""$(libdir)"\"
+
+
+bin_PROGRAMS = httrack htsserver
+
+httrack_LDADD = $(THREADS_LIBS) -lhttrack
+htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack
+
+lib_LTLIBRARIES = libhttrack.la
+
+htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h
+
+whttrackrundir = $(bindir)
+whttrackrun_SCRIPTS = webhttrack
+
+libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \
+ htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \
+ htshelp.c htsjava.c htslib.c htscoremain.c \
+ htsname.c htsrobots.c htstools.c htswizard.c \
+ htsalias.c htsthread.c htsindex.c htsbauth.c \
+ htsmd5.c htszlib.c htsnostatic.c htswrap.c \
+ htsmodules.c \
+ md5.c \
+ hts-indextmpl.h htsalias.h htsback.h htsbase.h \
+ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \
+ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \
+ htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \
+ htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \
+ htsmodules.h htsname.h htsnet.h htsnostatic.h \
+ htsopt.h htsrobots.h htssystem.h htsthread.h \
+ htstools.h htswizard.h htswrap.h htszlib.h \
+ httrack-library.h md5.h
+
+
+libhttrack_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) $(SOCKET_LIBS)
+libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO)
+
+EXTRA_DIST = httrack.h webhttrack \
+ httrack.dsp httrack.dsw \
+ webhttrack.dsp webhttrack.dsw
+
+subdir = src
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+libhttrack_la_DEPENDENCIES =
+am_libhttrack_la_OBJECTS = htscore.lo htsparse.lo htsback.lo htscache.lo \
+ htscatchurl.lo htsfilters.lo htsftp.lo htshash.lo htsinthash.lo \
+ htshelp.lo htsjava.lo htslib.lo htscoremain.lo htsname.lo \
+ htsrobots.lo htstools.lo htswizard.lo htsalias.lo htsthread.lo \
+ htsindex.lo htsbauth.lo htsmd5.lo htszlib.lo htsnostatic.lo \
+ htswrap.lo htsmodules.lo md5.lo
+libhttrack_la_OBJECTS = $(am_libhttrack_la_OBJECTS)
+bin_PROGRAMS = httrack$(EXEEXT) htsserver$(EXEEXT)
+PROGRAMS = $(bin_PROGRAMS)
+
+am_htsserver_OBJECTS = htsserver.$(OBJEXT) htsweb.$(OBJEXT)
+htsserver_OBJECTS = $(am_htsserver_OBJECTS)
+htsserver_DEPENDENCIES =
+htsserver_LDFLAGS =
+httrack_SOURCES = httrack.c
+httrack_OBJECTS = httrack.$(OBJEXT)
+httrack_DEPENDENCIES =
+httrack_LDFLAGS =
+SCRIPTS = $(whttrackrun_SCRIPTS)
+
+
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/htsalias.Plo ./$(DEPDIR)/htsback.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsbauth.Plo ./$(DEPDIR)/htscache.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htscatchurl.Plo ./$(DEPDIR)/htscore.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htscoremain.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsfilters.Plo ./$(DEPDIR)/htsftp.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htshash.Plo ./$(DEPDIR)/htshelp.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsindex.Plo ./$(DEPDIR)/htsinthash.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsjava.Plo ./$(DEPDIR)/htslib.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsmd5.Plo ./$(DEPDIR)/htsmodules.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsname.Plo ./$(DEPDIR)/htsnostatic.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsparse.Plo ./$(DEPDIR)/htsrobots.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htsserver.Po ./$(DEPDIR)/htsthread.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htstools.Plo ./$(DEPDIR)/htsweb.Po \
+@AMDEP_TRUE@ ./$(DEPDIR)/htswizard.Plo ./$(DEPDIR)/htswrap.Plo \
+@AMDEP_TRUE@ ./$(DEPDIR)/htszlib.Plo ./$(DEPDIR)/httrack.Po \
+@AMDEP_TRUE@ ./$(DEPDIR)/md5.Plo
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c
+DATA = $(DevIncludes_DATA)
+
+DIST_COMMON = Makefile.am Makefile.in
+SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+libLTLIBRARIES_INSTALL = $(INSTALL)
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \
+ $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" = "$$p" && dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libhttrack.la: $(libhttrack_la_OBJECTS) $(libhttrack_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libhttrack_la_LDFLAGS) $(libhttrack_la_OBJECTS) $(libhttrack_la_LIBADD) $(LIBS)
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ if test -f $$p \
+ || test -f $$p1 \
+ ; then \
+ f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f || exit 1; \
+ else :; fi; \
+ done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+ echo " rm -f $(DESTDIR)$(bindir)/$$f"; \
+ rm -f $(DESTDIR)$(bindir)/$$f; \
+ done
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+htsserver$(EXEEXT): $(htsserver_OBJECTS) $(htsserver_DEPENDENCIES)
+ @rm -f htsserver$(EXEEXT)
+ $(LINK) $(htsserver_LDFLAGS) $(htsserver_OBJECTS) $(htsserver_LDADD) $(LIBS)
+httrack$(EXEEXT): $(httrack_OBJECTS) $(httrack_DEPENDENCIES)
+ @rm -f httrack$(EXEEXT)
+ $(LINK) $(httrack_LDFLAGS) $(httrack_OBJECTS) $(httrack_LDADD) $(LIBS)
+whttrackrunSCRIPT_INSTALL = $(INSTALL_SCRIPT)
+install-whttrackrunSCRIPTS: $(whttrackrun_SCRIPTS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(whttrackrundir)
+ @list='$(whttrackrun_SCRIPTS)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ if test -f $$d$$p; then \
+ f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
+ echo " $(whttrackrunSCRIPT_INSTALL) $$d$$p $(DESTDIR)$(whttrackrundir)/$$f"; \
+ $(whttrackrunSCRIPT_INSTALL) $$d$$p $(DESTDIR)$(whttrackrundir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-whttrackrunSCRIPTS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(whttrackrun_SCRIPTS)'; for p in $$list; do \
+ f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
+ echo " rm -f $(DESTDIR)$(whttrackrundir)/$$f"; \
+ rm -f $(DESTDIR)$(whttrackrundir)/$$f; \
+ done
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsalias.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsback.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsbauth.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscache.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscatchurl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscore.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htscoremain.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsfilters.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsftp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htshash.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htshelp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsindex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsinthash.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsjava.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htslib.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmd5.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmodules.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsname.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsnostatic.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsparse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsrobots.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsserver.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsthread.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htstools.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsweb.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htswizard.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htswrap.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htszlib.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/httrack.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@
+
+distclean-depend:
+ -rm -rf ./$(DEPDIR)
+
+.c.o:
+@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \
+@am__fastdepCC_TRUE@ -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \
+@am__fastdepCC_TRUE@ -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'`; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ if $(LTCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \
+@am__fastdepCC_TRUE@ -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<; \
+@am__fastdepCC_TRUE@ then mv "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Plo"; \
+@am__fastdepCC_TRUE@ else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; \
+@am__fastdepCC_TRUE@ fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+DevIncludesDATA_INSTALL = $(INSTALL_DATA)
+install-DevIncludesDATA: $(DevIncludes_DATA)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(DevIncludesdir)
+ @list='$(DevIncludes_DATA)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(DevIncludesDATA_INSTALL) $$d$$p $(DESTDIR)$(DevIncludesdir)/$$f"; \
+ $(DevIncludesDATA_INSTALL) $$d$$p $(DESTDIR)$(DevIncludesdir)/$$f; \
+ done
+
+uninstall-DevIncludesDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(DevIncludes_DATA)'; for p in $$list; do \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " rm -f $(DESTDIR)$(DevIncludesdir)/$$f"; \
+ rm -f $(DESTDIR)$(DevIncludesdir)/$$f; \
+ done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+CTAGS = ctags
+CTAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ esac; \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(DATA)
+install-binPROGRAMS: install-libLTLIBRARIES
+
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) $(DESTDIR)$(whttrackrundir) $(DESTDIR)$(DevIncludesdir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \
+ clean-libtool mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+ distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-DevIncludesDATA install-whttrackrunSCRIPTS
+
+install-exec-am: install-binPROGRAMS install-libLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-DevIncludesDATA uninstall-binPROGRAMS \
+ uninstall-info-am uninstall-libLTLIBRARIES \
+ uninstall-whttrackrunSCRIPTS
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libLTLIBRARIES clean-libtool ctags \
+ distclean distclean-compile distclean-depend distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am info \
+ info-am install install-DevIncludesDATA install-am \
+ install-binPROGRAMS install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-strip \
+ install-whttrackrunSCRIPTS installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+ uninstall-DevIncludesDATA uninstall-am uninstall-binPROGRAMS \
+ uninstall-info-am uninstall-libLTLIBRARIES \
+ uninstall-whttrackrunSCRIPTS
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/configure b/src/configure
deleted file mode 100755
index 7c2d472..0000000
--- a/src/configure
+++ /dev/null
@@ -1,603 +0,0 @@
-#!/bin/sh
-# No, this isn't generated by autoconf
-# Some parts are inspired by autoconf (Free Software Foundation), however
-# And the idea is slightly the same
-
-# Usage:
-# './configure' and then 'make' and 'make install', or
-# './configure --make --install'
-
-SHELL=/bin/sh
-
-ac_prev=
-for ac_option
-do
- # If the previous option needs an argument, assign it.
- if test -n "$ac_prev"; then
- eval "$ac_prev=\$ac_option"
- ac_prev=
- continue
- fi
-
- case "$ac_option" in
- -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
- *) ac_optarg= ;;
- esac
-
- case "$ac_option" in
-
- --pthread | --thread) THREADS=1 ;;
- --nopthread | --nothread) THREADS=0 ;;
-
- --zlib | --gzip) ZLIB=1 ;;
- --nozlib | --nogzip) ZLIB=0 ;;
-
- --static | --noso) DYNAMIC=0
- ;;
- --dynamic | --so) DYNAMIC=1
- ;;
-
- --longlong) LONGLONG=1 ;;
- --nolonglong) LONGLONG=0 ;;
-
- --inaddrt) NODECLINADDRT=0 ;;
- --noinaddrt) NODECLINADDRT=1 ;;
-
- --ipv6) IPV6=1 ;;
- --noipv6) IPV6=0 ;;
-
- --ssl) SSL=1 ;;
- --https) SSL=1 ;;
- --nossl) SSL=0 ;;
- --nohttps) SSL=0 ;;
-
- --useuid) NOUID=0 ;;
- --nouseuid) NOUID=1 ;;
-
- --useftime) NOFTIME=0 ;;
- --nouseftime) NOFTIME=1 ;;
-
- --system=*) SYSTEMTYPE="$ac_optarg" ;;
- --system) ac_prev=SYSTEMTYPE ;;
-
- --debug) OTYPE="-O0 -g3"
- MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo" ;;
-
- --make) DOMAKE=1 ;;
- --install) DOINSTALL=1 ;;
- --bininstall) DOINSTALL=1
- NODOCINSTALL=1
- ;;
- --docinstall) DOINSTALL=1
- DOCINSTALL=1
- ;;
- --cls)
- cd ..
- chmod 'u=rw,go=r' `find ./ -type f`
- chmod 'u=rwx,go=rx' `find ./ -type d`
- chmod 'u=rwx,go=rx' ./src/configure
- chmod 'u=rwx,go=rx' ./src/strip_cr.in
- chmod 'u=rwx,go=rx' ./src/postinst-config.in
- cd src
- strip_cr *.c *.h
- rm -f ./httrack 2>/dev/null
- exit
- ;;
- -prefix | --prefix | --prefi | --pref | --pre | --pr)
- ac_prev=PREFIX ;;
- -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=*)
- PREFIX="$ac_optarg" ;;
- -bindir | --bindir | --bindi | --bind | --bin | --bi)
- ac_prev=BINPATH ;;
- -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
- BINPATH="$ac_optarg" ;;
- -etcdir | --etcdir | --etcdi | --etcd | --etc | --et)
- ac_prev=ETCPATH ;;
- -etcdir=* | --etcdir=* | --etcdi=* | --etcd=* | --etc=* | --et=*)
- ETCPATH="$ac_optarg" ;;
- -libdir | --libdir | --libdi | --libd | --lib | --li)
- ac_prev=LIBPATH ;;
- -libdir=* | --libdir=* | --libdi=* | --libd=* | --lib=* | --li=*)
- LIBPATH="$ac_optarg" ;;
-
- --help)
- cat <<EOF
---prefix=.. : prefix (/usr)
---bindir=.. : binary dir (/usr/bin)
---etcdir=.. : config dir (/usr/etc or /etc)
---libdir=.. : library dir (/usr/lib)
---dynamic : do use dynamic (.so) mode
---static : do use static mode
---nopthread : do not use threads (pthread.h)
---pthread : do use threads (pthread.h)
---noipv6 : do not use ipv6 extensions
---ipv6 : do use ipv6 extensions
---nohttps : do not use SSL extensions
---https : do use SSL extensions
---nozlib : do not use compression (zlib)
---zlib : do use compression (zlib)
---nolonglong : do not use 64-bit int
---longlong : do use 64-bit int
---noinaddrt : do not redeclare in_addr_t
---inaddrt : do redeclare in_addr_t
---nouseuid : do not use setuid()/setgid()
---useuid : do use setuid()/setgid()
---nouseftime : do not use ftime()
---useftime : do use ftime()
---system=<type> : override system type (uname) - NOT RECOMMENDED! (types: 'Default','Linux','SunOS','AIX')
---make : 'make' after configure
---install : 'make install' after configure
---bininstall : 'make bininstall' after configure
---docinstall : 'make docinstall' after configure
---debug : add debug information (for gdb)
-EOF
- exit
- ;;
-
- *) echo "Unrecognized option: $ac_option"
- exit
- ;;
-
- esac
-
-done
-
-echo "Welcome to HTTrack Website Copier!"
-echo "Type in ./configure --help for more details"
-echo "If this script fails, you can enter supplemental options through '--option=value'"
-echo "or enter in manual make, through 'make help'"
-echo ""
-
-if cp -f Makefile.in Makefile; then
-
-SEDEXEC=
-
-# System (OS) type?
-printf "Checking for OS type.. "
-if test -z "$SYSTEMTYPE"; then
- SYSTEMTYPE="`uname`"
-fi
-case "$SYSTEMTYPE" in
- SunOS) printf "SunOS/Solaris\n";
- PLATFORM=0
- SOLSOCK=1
- ;;
- AIX) printf "AIX\n"; PLATFORM=2 ;;
- *) printf "Linux type\n"; PLATFORM=3 ;;
-esac
-
-WTYPE="-Wall -Wcast-align -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wpointer-arith -Wnested-externs"
-
-if test -z "$OTYPE"; then
- OTYPE="-O3"
-fi
-if test -z "$MKTYPE"; then
- MKTYPE="firstinfo htssystem htssystem_auto build_auto strip clean lastinfo"
-fi
-
-# Root group
-if test -z "$ROOTGROUP"; then
- printf "Checking for 'root' group.."
- if egrep "^root:" /etc/group >/dev/null; then
- ROOTGROUP="root"
- elif egrep "^wheel:" /etc/group >/dev/null; then
- ROOTGROUP="wheel"
- fi
- printf "$ROOTGROUP\n"
-else
- echo "Overriding ROOTGROUP=$ROOTGROUP"
-fi
-
-
-# Binaries location
-if test -z "$BINPATH"; then
-printf "Checking for bin directory.. "
-if test -n "$PREFIX"; then
- BINPATH="$PREFIX/bin"
-elif test -d "/usr/bin"; then
- BINPATH="/usr/bin"
-else
- BINPATH="/bin"
-fi
-printf "$BINPATH\n"
-else
- echo "Overriding BINPATH=$BINPATH"
-fi
-
-# shlib?
-if test -d "/usr/shlib"; then
-SYSLIB=/usr/shlib
-else
-SYSLIB=/usr/lib
-fi
-
-# /etc location
-if test -z "$ETCPATH"; then
-printf "Checking for etc directory.. "
-if test -n "$PREFIX"; then
- ETCPATH="$PREFIX/etc"
-elif test -d "/usr/etc"; then
- ETCPATH="/usr/etc"
-else
- ETCPATH="/etc"
-fi
-printf "$ETCPATH\n"
-else
- echo "Overriding ETCPATH=$ETCPATH"
-fi
-
-# /usr/lib location
-if test -z "$LIBPATH"; then
-printf "Checking for lib directory.. "
-if test -n "$PREFIX"; then
- LIBPATH="$PREFIX/lib"
-elif test -d "/usr/lib"; then
- LIBPATH="/usr/lib"
-elif test -d "/usr/local/lib"; then
- LIBPATH="/usr/local/lib"
-else
- LIBPATH="/lib"
-fi
-printf "$LIBPATH\n"
-else
- echo "Overriding LIBPATH=$LIBPATH"
-fi
-
-# Prefix location
-if test -z "$PREFIX"; then
- printf "Checking for prefix directory.. "
- PREFIX="/usr"
- printf "$PREFIX\n"
-else
- echo "Overriding PREFIX=$PREFIX"
-fi
-
-
-# 64-bit (long long) cause some troubles to some processors
-# because some alignements aren't properly defined
-# we only accept 64-bit on tested processors here
-if test -z "$LONGLONG"; then
-printf "Checking for long long.. "
-LONGLONG=
-if grep "long long" /usr/include/sys/types.h >/dev/null; then
-if uname -a|egrep ' i[3-9]86 ' >/dev/null; then
- LONGLONG=1
-fi
-fi
-if test -n "$LONGLONG"; then
- printf "supported\n"
-else
- printf "not tested/supported. Use --LONGLONG=1 to override\n"
-fi
-else
- echo "Overriding LONGLONG=$LONGLONG"
-fi
-
-# IPV6?
-# NOT TESTED FOR OTHER PLATFORMS.. FIXME!
-if test -z "$IPV6"; then
-printf "Checking for ipv6 support.. "
-if test -f "/usr/include/linux/in6.h" -o -f "/usr/local/include/linux/in6.h"; then
-IPV6=1
-else
-IPV6=0
-fi
-if test "$IPV6" -eq 1; then
-printf "supported\n"
-else
-printf "not supported\n"
-fi
-else
- echo "Overriding IPV6=$IPV6"
-fi
-if test "$IPV6" -eq 1; then
-IPTYPE="-DINET6"
-LIPTYPE=""
-else
-IPTYPE=
-LIPTYPE=
-fi
-
-# HTTPS?
-# NOT TESTED FOR OTHER PLATFORMS.. FIXME!
-if test -z "$SSL"; then
-printf "Checking for SSL support.. "
-if test -f "/usr/include/openssl/ssl.h" -o -f "/usr/local/include/openssl/ssl.h"; then
-SSL=1
-else
-SSL=0
-fi
-if test "$SSL" -eq 1; then
-printf "supported\n"
-else
-printf "not supported\n"
-fi
-else
- echo "Overriding SSL=$SSL"
-fi
-if test "$SSL" -eq 1; then
-SSTYPE="-lssl -lcrypto"
-else
-SSTYPE=
-fi
-
-# Alignement
-if test -z "$PTRALIGN"; then
-printf "Checking for pointer alignements.. "
-PTRALIGN=
-if test `uname -p` = "alpha" -o `uname -p` = "sparc"; then
-PTRALIGN=8
-else
-PTRALIGN=4
-fi
-fi
-printf "$PTRALIGN\n"
-
-
-# Dynamic (.so) module?
-if test -z "$DYNAMIC"; then
- DYNAMIC=1
-fi
-printf "Checking for compilation mode: "
-if test "$DYNAMIC" -eq "1"; then
- echo "dynamic"
- SOTYPE=-fPIC
-else
- echo "static"
- SOTYPE=
-fi
-
-# Do we not have to redeclare in_addr_t ?
-# Sometimes this type is defined, or not..
-if test -z "$NODECLINADDRT"; then
-printf "Checking for in_addr_t declaration in in.h.. "
-if grep -E "typedef .* in_addr_t" /usr/include/netinet/in.h >/dev/null || grep -E "typedef .* in_addr_t" /usr/include/sys/types.h; then
- printf "found, do not redeclare\n"
- NODECLINADDRT=1
-else
- printf "not found, declaring\n"
- NODECLINADDRT=
-fi
-else
- echo "Overriding NODECLINADDRT=$NODECLINADDRT"
-fi
-
-# Test if we can use zlib (/usr/lib/libz.so)
-# This allow to speed up transfers using HTTP compression
-if test -z "$ZLIB"; then
-printf "Checking for ${SYSLIB}/libz.so.. "
-if test -f "${SYSLIB}/libz.so"; then
- printf "found\n"
- ZLIB=1
-else
- printf "library not found (too bad), no http compression will be available\n"
- ZLIB=0
-fi
-else
- echo "Overriding ZLIB=$ZLIB"
-fi
-
-# Sometimes, pthread.h doesn't exists on some systems
-# This is sad, because it speeds up some useful things, like DNS or ftp
-if test -z "$THREADS"; then
-printf "Checking for /usr/include/pthread.h.. "
-if test -f "/usr/include/pthread.h" -o -f "/usr/local/include/pthread.h"; then
-if test -f "${SYSLIB}/libpthread.so"; then
- printf "found\n"
- THREADS=1
-else
- printf "library not found (too bad), no threads will be available\n"
- THREADS=
-fi
-else
- printf "not found, no threads will be available\n"
- THREADS=
-fi
-else
- echo "Overriding THREADS=$THREADS"
-fi
-
-# Sometimes, setuid and setgid can't be used (missing pwd.h and unistd.h ?!)
-if test -z "$NOUID"; then
-NOUID=1
-printf "Checking for /usr/include/pwd.h and /usr/include/unistd.h.. "
-if test -f "/usr/include/pwd.h" -o "/usr/local/include/pwd.h"; then
-if test -f "/usr/include/unistd.h" -o -f "/usr/local/include/unistd.h" ; then
- NOUID=
-fi
-fi
-if test -z "$NOUID"; then
- printf "found\n"
-else
- printf "not found, not using setuid() and setgid()\n"
-fi
-else
- echo "Overriding NOUID=$NOUID"
-fi
-
-# Sometimes, ftime can't be used (missing declaration...)
-if test -z "$NOFTIME"; then
-NOFTIME=1
-printf "Checking for ftime in /usr/include/sys/timeb.h.. "
-if grep "int ftime" /usr/include/sys/timeb.h >/dev/null; then
- NOFTIME=
-fi
-if test -z "$NOFTIME"; then
- printf "found\n"
-else
- printf "not found (too bad), not using ftime()\n"
-fi
-else
- echo "Overriding NOFTIME=$NOFTIME"
-fi
-
-# Test variables
-if test "$NOUID" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEUID__/HTS_DO_NOT_USE_UID/'"
-fi
-if test "$NOFTIME" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEFTI__/HTS_DO_NOT_USE_FTIME/'"
-fi
-if test "$NODECLINADDRT" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEINA__/HTS_DO_NOT_REDEFINE_in_addr_t/'"
-fi
-if test "$THREADS" = 1; then
-THTYPE="-D_REENTRANT"
-LPTHTYPE="-lpthread"
-else
-THTYPE=
-LPTHTYPE=
-fi
-SEDEXEC="$SEDEXEC | sed -e \"s/__CFLAGS__/$SOTYPE $OTYPE $WTYPE $IPTYPE $THTYPE/g\""
-SEDEXEC="$SEDEXEC | sed -e \"s/__LFLAGS__/$LPTHTYPE $SSTYPE $LIPTYPE/g\""
-if test ! "$THREADS" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__DEFINEPTH__/HTS_DO_NOT_USE_PTHREAD/'"
-fi
-if test "$ZLIB" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__/-lz/g'"
-else
- SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS3__//g'"
-fi
-SEDEXEC="$SEDEXEC | sed -e \"s/__ZLIB__/$ZLIB/\""
-if test "$SOLSOCK" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__/-lnsl -lsocket/g'"
-else
- SEDEXEC="$SEDEXEC | sed -e 's/__LFLAGS2__//g'"
-fi
-if test ! "$LONGLONG" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__DEFINE64B__/HTS_NO_64_BIT/'"
-fi
-if test -n "$PTRALIGN"; then
- SEDEXEC="$SEDEXEC | sed -e \"s/__PTRALIGN__/$PTRALIGN/g\""
-fi
-if test -n "$IPV6"; then
- SEDEXEC="$SEDEXEC | sed -e \"s/__INET6__/$IPV6/g\""
-fi
-if test -n "$SSL"; then
- SEDEXEC="$SEDEXEC | sed -e \"s/__SSL__/$SSL/g\""
-fi
-if test "$DYNAMIC" = 1; then
- SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__/so/'"
- SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall libinstall docinstall/'"
- SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove libremove docremove/'"
- SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__/strip --strip-unneeded \\\$(BOUTPUTSO)/'"
-else
- SEDEXEC="$SEDEXEC | sed -e 's/__DYNAMIC__//'"
- SEDEXEC="$SEDEXEC | sed -e 's/__INSTALL__/bininstall docinstall/'"
- SEDEXEC="$SEDEXEC | sed -e 's/__UNINSTALL__/binremove docremove/'"
- SEDEXEC="$SEDEXEC | sed -e 's/__STRIPLIB__//'"
-fi
-
-SEDEXEC="$SEDEXEC | sed -e \"s/__PLATFORM__/$PLATFORM/g\""
-SEDEXEC="$SEDEXEC | sed -e 's/#__AUTONAME__/AUTONAME = auto/'"
-SEDEXEC="$SEDEXEC | sed -e 's/#define __DEFINE.*__//g'"
-
-# Paths
-TMP=`echo $BINPATH | sed -e 's/\\//\\\\\\//g'`
-SEDEXEC="$SEDEXEC | sed -e \"s/__BINPATH__/$TMP/g\""
-TMP=`echo $ETCPATH | sed -e 's/\\//\\\\\\//g'`
-SEDEXEC="$SEDEXEC | sed -e \"s/__ETCPATH__/$TMP/g\""
-TMP=`echo $LIBPATH | sed -e 's/\\//\\\\\\//g'`
-SEDEXEC="$SEDEXEC | sed -e \"s/__LIBPATH__/$TMP/g\""
-TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'`
-SEDEXEC="$SEDEXEC | sed -e \"s/__PREFIX__/$TMP/g\""
-TMP=`echo $ROOTGROUP | sed -e 's/\\//\\\\\\//g'`
-SEDEXEC="$SEDEXEC | sed -e \"s/__ROOTGROUP__/$TMP/g\""
-
-TMP=`echo $PREFIX | sed -e 's/\\//\\\\\\//g'`
-SEDEXEC="$SEDEXEC | sed -e \"s/__DEFINEPRE__/$TMP/g\""
-SEDEXEC="$SEDEXEC | sed -e 's/__MAKEAUTO__/$MKTYPE/'"
-
-# Search for gmake
-printf "Checking for make.. "
-MAKEPATH=
-if test -f "/usr/bin/gmake"; then
-MAKEPATH=/usr/bin/gmake
-else
-if test -f "/bin/gmake"; then
-MAKEPATH=/bin/gmake
-else
-if test -f "/usr/local/bin/gmake"; then
-MAKEPATH=/usr/local/bin/gmake
-fi
-fi
-fi
-if test -n "$MAKEPATH"; then
-printf "found $MAKEPATH\n"
-else
-MAKEPATH=make
-printf "not found, assume make will work\n"
-fi
-
-# Sed strip_cr
-EXCL='#!'
-printf "Checking for perl.. "
-PERLPATH=
-cp -f strip_cr.in strip_cr
-if test -f "/usr/bin/perl"; then
-PERLPATH=/usr/bin/perl
-else
-if test -f "/bin/perl"; then
-PERLPATH=/bin/perl
-else
-if test -f "/usr/local/bin/perl"; then
-PERLPATH=/usr/local/bin/perl
-fi
-fi
-fi
-if test -n "$PERLPATH"; then
-printf "found $PERLPATH\nEnsuring that *.c/*.h source files don't contains CR (^M).. "
-TMP=`echo $PERLPATH | sed -e 's/\\//\\\\\\//g'`
-cat strip_cr | sed -e "s/__PERL__/${EXCL}${TMP}/" > __tmp; mv __tmp strip_cr
-chmod 755 strip_cr
-./strip_cr *.c *.h
-printf "done\n"
-fi
-
-# Sed postinst-config
-cp -f postinst-config.in postinst-config
-CMD="cat postinst-config $SEDEXEC > __tmp; mv __tmp postinst-config"
-if eval $CMD; then
-chmod 755 postinst-config
-else
-echo "Error while seding postinst-config"
-exit 1
-fi
-
-# Sed all __VARS__
-CMD="cat Makefile $SEDEXEC > __tmp; mv __tmp Makefile"
-echo "Command: $CMD"
-if eval $CMD; then
-
-echo ""
-echo "Makefile created!"
-echo "Type in '$MAKEPATH' to build and '$MAKEPATH install' to install"
-
-if test -n "$DOMAKE"; then
- echo "Making.."
- eval $MAKEPATH clean
- eval $MAKEPATH
-fi
-if test -n "$DOINSTALL"; then
- if test -n "$NODOCINSTALL"; then
- echo "Installing binary.."
- eval $MAKEPATH bininstall
- elif test -n "$DOCINSTALL"; then
- echo "Installing docs.."
- eval $MAKEPATH docinstall
- else
- echo "Installing.."
- eval $MAKEPATH install
- fi
-fi
-
-else
- echo "Error while seding Makefile.."
- exit 1
-fi
-
-else
- echo "Error copying Makefile.in -> Makefile.. giving up"
- exit 1
-fi
-
diff --git a/src/gpl.txt b/src/gpl.txt
deleted file mode 100644
index 546a71a..0000000
--- a/src/gpl.txt
+++ /dev/null
@@ -1,287 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
-
- GNU GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
- 1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
- 2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
-
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-
- 4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
- 5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
- 6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
- 7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-
- 8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
- 9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
- 10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
- 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
-
-
diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h
index a82b69d..29dd122 100644
--- a/src/hts-indextmpl.h
+++ b/src/hts-indextmpl.h
@@ -167,7 +167,7 @@ regen:
" <BR>"LF\
" <BR>"LF\
" <H6 ALIGN=\"RIGHT\">"LF\
- " <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2002]</I>"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&amp;CO'2003]</I>"LF\
" </H6>"LF\
" %s"LF\
" <!-- Thanks for using HTTrack Website Copier! -->"LF\
@@ -186,7 +186,7 @@ regen:
""LF\
"<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
@@ -317,7 +317,7 @@ regen:
" </TABLE>"LF\
" <BR>"LF\
" <H6 ALIGN=\"RIGHT\">"LF\
- " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2002]</I>"LF\
+ " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2003]</I>"LF\
" </H6>"LF\
" %s"LF\
" <!-- Thanks for using HTTrack Website Copier! -->"LF\
@@ -335,7 +335,7 @@ regen:
""LF\
"<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
@@ -476,7 +476,7 @@ regen:
""LF\
"<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
@@ -613,7 +613,7 @@ regen:
""LF\
"<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\
" <tr>"LF\
- " <td id=\"footer\"><small>&copy; 2002 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
+ " <td id=\"footer\"><small>&copy; 2003 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\
" </tr>"LF\
"</table>"LF\
""LF\
diff --git a/src/htsalias.c b/src/htsalias.c
index 65a34e3..1b65945 100644
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -38,13 +38,14 @@ Please visit our Website: http://www.httrack.com
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "htsbase.h"
#include "htsalias.h"
#include "htsglobal.h"
void linput(FILE* fp,char* s,int max);
void hts_lowcase(char* s);
#define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
-#define is_realspace(c) (strchr(" \x0d\x0a\x09\x0c",(c))!=NULL)
+#define is_realspace(c) (strchr(" \x0d\x0a\x09\x0b\x0c",(c))!=NULL)
// COPY OF cmdl_ins in htsmain.c
// Insert a command in the argc/argv
@@ -55,7 +56,7 @@ void hts_lowcase(char* s);
argv[i]=argv[i-1];\
} \
argv[0]=(buff+ptr); \
- strcpy(argv[0],token); \
+ strcpybuff(argv[0],token); \
ptr += (strlen(argv[0])+1); \
argc++
// END OF COPY OF cmdl_ins in htsmain.c
@@ -80,7 +81,7 @@ void hts_lowcase(char* s);
param1 : this option must be alone, and needs one distinct parameter (-P <path>)
param0 : this option must be alone, but the parameter should be put together (+*.gif)
*/
-const char hts_optalias[][4][64] = {
+const char* hts_optalias[][4] = {
/* {"","","",""}, */
{"path","-O","param1","output path"},
{"chroot","-%O","param1","default top path"},
@@ -90,6 +91,7 @@ const char hts_optalias[][4][64] = {
{"quiet","-q","single",""},
{"mirrorlinks","-Y","single",""},
{"proxy","-P","param1","proxy name:port"},
+ {"bind","-%b","param1","hostname to bind"},
{"httpproxy-ftp","-%f","param",""},
{"depth","-r","param",""},{"recurse-levels","-r","param",""},
{"ext-depth","-%e","param",""},
@@ -108,12 +110,14 @@ const char hts_optalias[][4][64] = {
{"near","-n","single",""},
{"test","-t","single",""},
{"list","-%L","param1",""},
+ {"urllist","-%S","param1",""},
{"language","-%l","param1",""}, {"lang","-%l","param1",""},
{"structure","-N","param",""}, {"user-structure","-N","param1",""},
{"long-names","-L","param",""},
{"keep-links","-K","param",""},
+ {"mime-html","-%M","param",""}, {"mht","-%M","param",""},
{"replace-external","-x","single",""},
- {"no-passwords","-%x","single",""},{"no-password","-%x","single",""},
+ {"disable-passwords","-%x","single",""},{"disable-password","-%x","single",""},
{"include-query-string","-%q","single",""},
{"generate-errors","-o","single",""},
{"purge-old","-X","param",""},
@@ -124,9 +128,12 @@ const char hts_optalias[][4][64] = {
{"protocol","-@i","param",""},
{"robots","-s","param",""},
{"http-10","-%h","single",""},{"http-1.0","-%h","single",""},
- {"no-compression","-%z","single",""},
+ {"keep-alive","-%k","single",""},
+ {"build-top-index","-%i","single",""},
+ {"disable-compression","-%z","single",""},
{"tolerant","-%B","single",""},
{"updatehack","-%s","single",""}, {"sizehack","-%s","single",""},
+ {"urlhack","-%u","single",""},
{"user-agent","-F","param1","user-agent identity"},
{"footer","-%F","param1",""},
{"cache","-C","param","number of retries for non-fatal errors"},
@@ -143,6 +150,7 @@ const char hts_optalias[][4][64] = {
{"priority","-p","param",""},
{"debug-headers","-%H","single",""},
{"userdef-cmd","-V","param1",""},
+ {"callback","-%W","param1",""}, {"wrapper","-%W","param1",""},
{"structure","-N","param1","user-defined structure"},
{"usercommand","-V","param1","user-defined command"},
{"display","-%v","single","show files transfered and other funny realtime information"},
@@ -167,13 +175,16 @@ const char hts_optalias[][4][64] = {
{"version","-#h","single",""},
{"debug-scanstdin","-#K","single",""},
{"advanced-maxlinks","-#L","single",""},
- {"advanced-progressinfo","-#p","single",""},
+ {"advanced-progressinfo","-#p","single","deprecated"},
{"catch-url","-#P","single","catch complex URL through proxy"},
{"debug-oldftp","-#R","single",""},
{"debug-xfrstats","-#T","single",""},
{"advanced-wait","-#u","single",""},
{"debug-ratestats","-#Z","single",""},
{"exec","-#!","param1",""},
+ {"fast-engine","-#X","single","Enable fast routines"},
+ {"debug-overflows","-#X0","single","Attempt to detect buffer overflows"},
+ {"debug-cache","-#C","param1","List files in the cache"},
/* STANDARD ALIASES */
{"spider","-p0C0I0t","single",""},
@@ -194,6 +205,7 @@ const char hts_optalias[][4][64] = {
{"ultrawide","-c48","single",""},
{"http10","-%h","single",""},
{"filelist","-%L","single",""}, {"list","-%L","single",""},
+ {"filterlist","-%S","single",""},
/* END OF ALIASES */
/* Filters */
@@ -249,25 +261,25 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
/* --sockets=8 */
if ( (position=strchr(argv[n_arg],'=')) ) {
/* Copy command */
- strncat(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) );
+ strncatbuff(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) );
/* Copy parameter */
- strcpy(param,position+1);
+ strcpybuff(param,position+1);
}
/* --nocache */
else if (strncmp(argv[n_arg]+2,"no",2)==0) {
- strcpy(command,argv[n_arg]+4);
- strcpy(param,"0");
+ strcpybuff(command,argv[n_arg]+4);
+ strcpybuff(param,"0");
}
/* --sockets 8 */
else {
if (strncmp(argv[n_arg]+2,"wide-",5)==0) {
- strcpy(addcommand,"c32");
- strcpy(command,strchr(argv[n_arg]+2,'-')+1);
+ strcpybuff(addcommand,"c32");
+ strcpybuff(command,strchr(argv[n_arg]+2,'-')+1);
} else if (strncmp(argv[n_arg]+2,"tiny-",5)==0) {
- strcpy(addcommand,"c1");
- strcpy(command,strchr(argv[n_arg]+2,'-')+1);
+ strcpybuff(addcommand,"c1");
+ strcpybuff(command,strchr(argv[n_arg]+2,'-')+1);
} else
- strcpy(command,argv[n_arg]+2);
+ strcpybuff(command,argv[n_arg]+2);
need_param=2;
}
@@ -275,7 +287,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
pos=optalias_find(command);
if (pos>=0) {
/* Copy real name */
- strcpy(command,hts_optalias[pos][1]);
+ strcpybuff(command,hts_optalias[pos][1]);
/* With parameters? */
if (strncmp(hts_optalias[pos][2],"param",5)==0) {
/* Copy parameters? */
@@ -286,7 +298,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
command,command,_NOT_NULL(optalias_help(command)));
return 0;
}
- strcpy(param,argv[n_arg+1]);
+ strcpybuff(param,argv[n_arg+1]);
need_param=2;
}
} else
@@ -296,30 +308,30 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
/* Must be alone (-P /tmp) */
if (strcmp(hts_optalias[pos][2],"param1")==0) {
- strcpy(return_argv[0],command);
- strcpy(return_argv[1],param);
+ strcpybuff(return_argv[0],command);
+ strcpybuff(return_argv[1],param);
*return_argc=2; /* 2 parameters returned */
}
/* Alone with parameter (+*.gif) */
else if (strcmp(hts_optalias[pos][2],"param0")==0) {
/* Command */
- strcpy(return_argv[0],command);
- strcat(return_argv[0],param);
+ strcpybuff(return_argv[0],command);
+ strcatbuff(return_argv[0],param);
}
/* Together (-c8) */
else {
/* Command */
- strcpy(return_argv[0],command);
+ strcpybuff(return_argv[0],command);
/* Parameters accepted */
if (strncmp(hts_optalias[pos][2],"param",5)==0) {
/* --cache=off or --index=on */
if (strcmp(param,"off")==0)
- strcat(return_argv[0],"0");
+ strcatbuff(return_argv[0],"0");
else if (strcmp(param,"on")==0) {
// on is the default
- // strcat(return_argv[0],"1");
+ // strcatbuff(return_argv[0],"1");
} else
- strcat(return_argv[0],param);
+ strcatbuff(return_argv[0],param);
}
*return_argc=1; /* 1 parameter returned */
}
@@ -342,8 +354,8 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
return 0;
}
/* Copy parameters */
- strcpy(return_argv[0],argv[n_arg]);
- strcpy(return_argv[1],argv[n_arg+1]);
+ strcpybuff(return_argv[0],argv[n_arg]);
+ strcpybuff(return_argv[1],argv[n_arg+1]);
/* And return */
*return_argc=2; /* 2 parameters returned */
return 2; /* 2 parameters used */
@@ -352,7 +364,7 @@ int optalias_check(int argc,const char * const * argv,int n_arg,
}
/* Copy and return other unknown option */
- strcpy(return_argv[0],argv[n_arg]);
+ strcpybuff(return_argv[0],argv[n_arg]);
return 1;
}
@@ -461,9 +473,9 @@ int optinclude_file(const char* name,
char _tmp_argv[4][HTS_CDLMAXSIZE];
char* tmp_argv[4];
tmp_argv[0]=_tmp_argv[0]; tmp_argv[1]=_tmp_argv[1]; tmp_argv[2]=_tmp_argv[2]; tmp_argv[3]=_tmp_argv[3];
- strcpy(tmp_argv[0],"--");
- strcat(tmp_argv[0],a);
- strcpy(tmp_argv[1],b);
+ strcpybuff(tmp_argv[0],"--");
+ strcatbuff(tmp_argv[0],a);
+ strcpybuff(tmp_argv[1],b);
result=optalias_check(2,(const char * const *)tmp_argv,0,
&return_argc,(tmp_argv+2),
@@ -513,8 +525,8 @@ char* hts_gethome(void) {
void expand_home(char* str) {
if (str[0] == '~') {
char tempo[HTS_URLMAXSIZE*2];
- strcpy(tempo,hts_gethome());
- strcat(tempo,str+1);
- strcpy(str,tempo);
+ strcpybuff(tempo,hts_gethome());
+ strcatbuff(tempo,str+1);
+ strcpybuff(str,tempo);
}
}
diff --git a/src/htsalias.h b/src/htsalias.h
index 1c94b19..e5e8f82 100644
--- a/src/htsalias.h
+++ b/src/htsalias.h
@@ -39,7 +39,7 @@ Please visit our Website: http://www.httrack.com
#ifndef HTSALIAS_DEFH
#define HTSALIAS_DEFH
-extern const char hts_optalias[][4][64];
+extern const char* hts_optalias[][4];
int optalias_check(int argc,const char * const * argv,int n_arg,
int* return_argc,char** return_argv,
char* return_error);
diff --git a/src/htsback.c b/src/htsback.c
index d99564f..6d0b119 100644
--- a/src/htsback.c
+++ b/src/htsback.c
@@ -122,7 +122,16 @@ int back_nsoc(lien_back* back,int back_max) {
int n=0;
int i;
for(i=0;i<back_max;i++)
- if (back[i].status>0) // réception uniquement
+ if (back[i].status > 0) // only receive
+ n++;
+
+ return n;
+}
+int back_nsoc_overall(lien_back* back,int back_max) {
+ int n=0;
+ int i;
+ for(i=0;i<back_max;i++)
+ if (back[i].status > 0 || back[i].status == -103)
n++;
return n;
@@ -144,32 +153,32 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
/* décompression */
#if HTS_USEZLIB
- if (back[p].r.compressed) {
+ if (gz_is_available && back[p].r.compressed) {
if (back[p].r.size > 0) {
//if ( (back[p].r.adr) && (back[p].r.size>0) ) {
// stats
back[p].compressed_size=back[p].r.size;
// en mémoire -> passage sur disque
if (!back[p].r.is_write) {
- back[p].tmpfile[0]='\0';
- strcpy(back[p].tmpfile,tempnam(NULL,"httrz"));
- if (back[p].tmpfile[0]) {
+ back[p].tmpfile_buffer[0]='\0';
+ back[p].tmpfile=tmpnam(back[p].tmpfile_buffer);
+ if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') {
back[p].r.out=fopen(back[p].tmpfile,"wb");
if (back[p].r.out) {
if ((back[p].r.adr) && (back[p].r.size>0)) {
- if ((INTsys)fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
+ if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) {
back[p].r.statuscode=-1;
- strcpy(back[p].r.msg,"Write error when decompressing");
+ strcpybuff(back[p].r.msg,"Write error when decompressing");
}
} else {
back[p].tmpfile[0]='\0';
back[p].r.statuscode=-1;
- strcpy(back[p].r.msg,"Empty compressed file");
+ strcpybuff(back[p].r.msg,"Empty compressed file");
}
} else {
back[p].tmpfile[0]='\0';
back[p].r.statuscode=-1;
- strcpy(back[p].r.msg,"Open error when decompressing");
+ strcpybuff(back[p].r.msg,"Open error when decompressing");
}
}
}
@@ -179,17 +188,18 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
back[p].r.out=NULL;
}
// décompression
- if (back[p].tmpfile[0] && back[p].url_sav[0]) {
+ if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0' && back[p].url_sav[0]) {
LLint size;
filecreateempty(back[p].url_sav); // filenote & co
if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) {
back[p].r.size=back[p].r.totalsize=size;
// fichier -> mémoire
if (!back[p].r.is_write) {
+ deleteaddr(&back[p].r);
back[p].r.adr=readfile(back[p].url_sav);
if (!back[p].r.adr) {
back[p].r.statuscode=-1;
- strcpy(back[p].r.msg,"Read error when decompressing");
+ strcpybuff(back[p].r.msg,"Read error when decompressing");
}
remove(back[p].url_sav);
}
@@ -214,39 +224,45 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
struct tm* A;
tt=time(NULL);
A=localtime(&tt);
+ if (A == NULL) {
+ int localtime_returned_null=0;
+ assert(localtime_returned_null);
+ }
strftime(s,250,"%H:%M:%S",A);
flags[0]='\0';
/* input flags */
if (back[p].is_update)
- strcat(flags, "U"); // update request
+ strcatbuff(flags, "U"); // update request
else
- strcat(flags, "-");
+ strcatbuff(flags, "-");
if (back[p].range_req_size)
- strcat(flags, "R"); // range request
+ strcatbuff(flags, "R"); // range request
else
- strcat(flags, "-");
+ strcatbuff(flags, "-");
/* state flags */
if (back[p].r.is_file) // direct to disk
- strcat(flags, "F");
+ strcatbuff(flags, "F");
else
- strcat(flags, "-");
+ strcatbuff(flags, "-");
/* output flags */
if (!back[p].r.notmodified)
- strcat(flags, "M"); // modified
+ strcatbuff(flags, "M"); // modified
else
- strcat(flags, "-");
+ strcatbuff(flags, "-");
if (back[p].r.is_chunk) // chunked
- strcat(flags, "C");
+ strcatbuff(flags, "C");
else
- strcat(flags, "-");
+ strcatbuff(flags, "-");
if (back[p].r.compressed)
- strcat(flags, "Z"); // gzip
+ strcatbuff(flags, "Z"); // gzip
else
- strcat(flags, "-");
- fprintf(cache->txt,"%s\t"LLintP"/"LLintP"\t%s\t", s,
- back[p].r.size, back[p].r.totalsize,
- flags);
+ strcatbuff(flags, "-");
+ /* Err I had to split these.. */
+ fprintf(cache->txt,"%s\t", s);
+ fprintf(cache->txt,LLintP"/", (LLint)back[p].r.size);
+ fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize);
+ fprintf(cache->txt,"\t%s\t",flags);
}
if (back[p].r.statuscode==200) {
if (back[p].r.size>=0) {
@@ -323,7 +339,7 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
/* Cache */
cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,back[p].url_sav);
-
+
// status finished callback
#if HTS_ANALYSTE
hts_htmlcheck_xfrstatus(&back[p]);
@@ -333,9 +349,165 @@ int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p) {
return -1;
}
+/* try to keep the connection alive */
+int back_letlive(httrackp* opt, lien_back* back, int p) {
+ htsblk* src = &back[p].r;
+ if (src && !src->is_file
+ && src->soc != INVALID_SOCKET
+ && src->statuscode >= 0 /* no timeout errors & co */
+ && src->keep_alive_trailers == 0 /* not yet supported (chunk trailers) */
+ && !check_sockerror(src->soc)
+ /*&& !check_sockdata(src->soc)*/ /* no unexpected data */
+ ) {
+ htsblk tmp;
+ memset(&tmp, 0, sizeof(tmp));
+ /* clear everything but connection: switch, close, and reswitch */
+ back_connxfr(src, &tmp);
+ back_delete(opt, back, p);
+ //deletehttp(src);
+ back_connxfr(&tmp, src);
+ src->req.flush_garbage=1; /* ignore CRLF garbage */
+ return 1;
+ }
+ return 0;
+}
+
+void back_connxfr(htsblk* src, htsblk* dst) {
+ dst->soc = src->soc;
+ src->soc = INVALID_SOCKET;
+#if HTS_USEOPENSSL
+ dst->ssl = src->ssl;
+ src->ssl = 0;
+ dst->ssl_con = src->ssl_con;
+ src->ssl_con = NULL;
+#endif
+ dst->keep_alive = src->keep_alive;
+ src->keep_alive = 0;
+ dst->keep_alive_max = src->keep_alive_max;
+ src->keep_alive_max = 0;
+ dst->keep_alive_t = src->keep_alive_t;
+ src->keep_alive_t = 0;
+ dst->debugid = src->debugid;
+ src->debugid = 0;
+}
+
+// clear, or leave for keep-alive
+int back_maydelete(httrackp* opt,lien_back* back, int p) {
+ if (p>=0) { // on sait jamais..
+ if (!opt->nokeepalive
+ && back[p].r.keep_alive
+ && back[p].r.keep_alive_max > 1
+ && back[p].ka_time_start
+ && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t
+ ) {
+ lien_back tmp;
+ strcpybuff(tmp.url_adr, back[p].url_adr);
+ if (back_letlive(opt, back, p)) {
+ strcpybuff(back[p].url_adr, tmp.url_adr);
+ back[p].status = -103; // alive & waiting
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully saved #%d (%s)"LF,
+ back[p].r.debugid,
+ back[p].url_adr); test_flush;
+ }
+ return 1;
+ }
+ }
+ back_delete(opt,back, p);
+ }
+ return 0;
+}
+
+// clear, or leave for keep-alive
+void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p) {
+ if (back[p].r.soc!=INVALID_SOCKET) {
+ int q;
+ if (!opt->nokeepalive
+ && back[p].r.keep_alive
+ && back[p].r.keep_alive_max > 1
+ && back[p].ka_time_start
+ && time_local() < back[p].ka_time_start + back[p].r.keep_alive_t
+ && ( q = back_search(opt, back, back_max) ) >= 0
+ )
+ {
+ lien_back tmp;
+ strcpybuff(tmp.url_adr, back[p].url_adr);
+ deletehttp(&back[q].r); // security check
+ back_connxfr(&back[p].r, &back[q].r); // transfer live connection settings from p to q
+ back[q].ka_time_start = back[p].ka_time_start; // refresh
+ back[p].r.soc = INVALID_SOCKET;
+ strcpybuff(back[q].url_adr, tmp.url_adr); // address
+ back[q].status = -103; // alive & waiting
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully preserved #%d (%s)"LF,
+ back[q].r.debugid,
+ back[q].url_adr); test_flush;
+ }
+ } else {
+ deletehttp(&back[p].r);
+ back[p].r.soc = INVALID_SOCKET;
+ }
+ }
+}
+
+
+/* attempt to attach a live connection to this slot */
+int back_trylive(httrackp* opt,lien_back* back, int back_max, int p) {
+ if (p>=0 && back[p].status != -103) { // we never know..
+ int i = back_searchlive(opt,back, back_max, back[p].url_adr); // search slot
+ if (i >= 0 && i != p) {
+ deletehttp(&back[p].r); // security check
+ back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p
+ back_delete(opt,back, i); // delete old slot
+ back[p].status=100; // ready to connect
+ return 1; // success: will reuse live connection
+ }
+ }
+ return 0;
+}
+
+/* search for a live position, or, if not possible, try to return a new one */
+int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr) {
+ int i;
+
+ /* search for a live socket */
+ for(i = 0 ; i < back_max ; i++ ) {
+ if (back[i].status == -103) {
+ if (strfield2(back[i].url_adr, search_addr)) { /* same location (xxc: check also virtual hosts?) */
+ if (time_local() < back[i].ka_time_start + back[i].r.keep_alive_t) {
+ return i;
+ }
+ }
+ }
+ }
+ return -1;
+}
+
+int back_search(httrackp* opt,lien_back* back, int back_max) {
+ int i;
+
+ /* try to find an empty place */
+ for(i = 0 ; i < back_max ; i++ ) {
+ if (back[i].status == -1) {
+ return i;
+ }
+ }
+
+ /* couldn't find an empty place, try to requisition a keep-alive place */
+ for(i = 0 ; i < back_max ; i++ ) {
+ if (back[i].status == -103) {
+ /* close this place */
+ back_delete(opt,back, i);
+ return i;
+ }
+ }
+
+ /* oops, can't find a place */
+ return -1;
+}
// effacer entrée
-int back_delete(lien_back* back,int p) {
+int back_delete(httrackp* opt, lien_back* back, int p) {
if (p>=0) { // on sait jamais..
// Vérificateur d'intégrité
#if DEBUG_CHECKINT
@@ -356,21 +528,6 @@ int back_delete(lien_back* back,int p) {
back[p].r.soc=INVALID_SOCKET;
}
-#if HTS_USEOPENSSL
- /* Free OpenSSL structures */
- if (back[p].r.ssl_con) {
- SSL_shutdown(back[p].r.ssl_con);
- SSL_free(back[p].r.ssl_con);
- back[p].r.ssl_con=NULL;
- }
- /*
- if (back[p].r.ssl_soc) {
- BIO_free_all(back[p].r.ssl_soc);
- back[p].r.ssl_soc=NULL;
- }
- */
-#endif
-
if (back[p].r.adr!=NULL) { // reste un bloc à désallouer
freet(back[p].r.adr);
back[p].r.adr=NULL;
@@ -379,6 +536,7 @@ int back_delete(lien_back* back,int p) {
freet(back[p].chunk_adr);
back[p].chunk_adr=NULL;
back[p].chunk_size=0;
+ back[p].chunk_blocksize=0;
back[p].is_chunk=0;
}
// if (back[p].r.is_file) { // fermer fichier entrée
@@ -402,7 +560,7 @@ int back_delete(lien_back* back,int p) {
set_filetime_rfc822(back[p].url_sav,back[p].r.lastmodified);
/* executer commande utilisateur après chargement du fichier */
- usercommand(0,NULL,back[p].url_sav);
+ //xx usercommand(opt,0,NULL,back[p].url_sav, back[p].url_adr, back[p].url_fil);
back[p].r.is_write=0;
}
@@ -426,7 +584,7 @@ int back_stack_available(lien_back* back,int back_max) {
}
// ajouter un lien en backing
-int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr) {
+int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) {
int p=0;
// vérifier cohérence de adr et fil (non vide!)
@@ -444,37 +602,37 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
// FIN vérifier cohérence de adr et fil (non vide!)
+ // stats
+ opt->state.back_add_stats++;
+
// rechercher emplacement
- while((p<back_max) && back[p].status!=-1) p++;
- if (back[p].status==-1) { // ok on a de la place
+ back_clean(opt, cache, back, back_max);
+ if ( ( p = back_search(opt, back, back_max) ) >= 0) {
back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur
- // ne sert à rien normalement
- if (back[p].r.soc!=INVALID_SOCKET) {
-#if HTS_DEBUG_CLOSESOCK
- DEBUG_W("back_add: deletehttp\n");
-#endif
+ // clear r
+ if (back[p].r.soc!=INVALID_SOCKET) { /* we never know */
deletehttp(&back[p].r);
}
-
- // effacer r
- memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer;
+ memset(&(back[p].r), 0, sizeof(htsblk));
+ back[p].r.soc=INVALID_SOCKET;
+ back[p].r.location=back[p].location_buffer;
// créer entrée
- strcpy(back[p].url_adr,adr);
- strcpy(back[p].url_fil,fil);
- strcpy(back[p].url_sav,save);
+ strcpybuff(back[p].url_adr,adr);
+ strcpybuff(back[p].url_fil,fil);
+ strcpybuff(back[p].url_sav,save);
back[p].pass2_ptr=pass2_ptr;
// copier referer si besoin
- strcpy(back[p].referer_adr,"");
- strcpy(back[p].referer_fil,"");
+ strcpybuff(back[p].referer_adr,"");
+ strcpybuff(back[p].referer_fil,"");
if ((referer_adr) && (referer_fil)) { // existe
if ((strnotempty(referer_adr)) && (strnotempty(referer_fil))) { // non vide
if (referer_adr[0]!='!') { // non détruit
if (strcmp(referer_adr,"file://")) { // PAS file://
if (strcmp(referer_adr,"primary")) { // pas referer 1er lien
- strcpy(back[p].referer_adr,referer_adr);
- strcpy(back[p].referer_fil,referer_fil);
+ strcpybuff(back[p].referer_adr,referer_adr);
+ strcpybuff(back[p].referer_fil,referer_fil);
}
}
}
@@ -482,7 +640,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
// sav ne sert à rien pour le moment
back[p].r.size=0; // rien n'a encore été chargé
- back[p].r.soc=INVALID_SOCKET; // pas de socket
back[p].r.adr=NULL; // pas de bloc de mémoire
back[p].r.is_write=0; // à priori stockage en mémoire
back[p].maxfile_html=opt->maxfile_html;
@@ -496,11 +653,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
else if (strcmp(back[p].url_sav,BACK_ADD_TEST2)==0) // test en GET
back[p].head_request=2; // test en get
-
/* Stop requested - abort backing */
if (opt->state.stop) {
back[p].r.statuscode=-1; // fatal
- strcpy(back[p].r.msg,"mirror stopped by user");
+ strcpybuff(back[p].r.msg,"mirror stopped by user");
back[p].status=0; // terminé
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush;
@@ -508,7 +664,6 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
return 0;
}
-
// tester cache
if ((strcmp(adr,"file://")) /* pas fichier */
&& ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */
@@ -528,10 +683,10 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
#endif
char buff[HTS_URLMAXSIZE*4];
#if HTS_FAST_CACHE
- strcpy(buff,adr); strcat(buff,fil);
+ strcpybuff(buff,adr); strcatbuff(buff,fil);
hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
#else
- buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n");
+ buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n");
a=strstr(cache->use,buff);
#endif
@@ -550,21 +705,45 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
sscanf(a,"%d",&pos); // lire position
#endif
if (pos<0) { // pas de mise en cache data, vérifier existence
- if (fsize(antislash(save)) <= 0) { // fichier existe pas ou est vide!
+ if (fsize(fconv(save)) <= 0) { // fichier existe pas ou est vide!
+ int found=0;
+
+ /* It is possible that the file has been moved due to changes in build structure */
+ {
+ char previous_save[HTS_URLMAXSIZE*2];
+ previous_save[0] = '\0';
+ back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0);
+ if (previous_save[0] != '\0' && fexist(fconv(previous_save))) {
+ rename(fconv(previous_save), fconv(save));
+ if (fexist(fconv(save))) {
+ found = 1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush;
+ }
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"error"); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush;
+ }
+ }
+ }
+ }
+
+ if (!found) {
#if HTS_FAST_CACHE
- hash_pos_return=0;
+ hash_pos_return=0;
#else
- a=NULL;
-#endif
- // dévalider car non présent sur disque dans structure originale!!!
- // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour
- // en être sûr
- if (opt->norecatch) { // tester norecatch
- if (!fexist(antislash(save))) { // fichier existe pas mais déclaré: on l'a effacé
- FILE* fp=fopen(antislash(save),"wb");
- if (fp) fclose(fp);
- if (opt->log!=NULL) {
- fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ a=NULL;
+#endif
+ // dévalider car non présent sur disque dans structure originale!!!
+ // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour
+ // en être sûr
+ if (opt->norecatch) { // tester norecatch
+ if (!fexist(fconv(save))) { // fichier existe pas mais déclaré: on l'a effacé
+ FILE* fp=fopen(fconv(save),"wb");
+ if (fp) fclose(fp);
+ if (opt->log!=NULL) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush;
+ }
}
}
}
@@ -587,18 +766,15 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
if (a!=NULL) { // OK existe en cache (et données aussi)!
#endif
if (cache->type==1) { // cache prioritaire (pas de test if-modified..)
- // dans ce cas on peut également lire des réponses cachées comme 404,302...
+ // dans ce cas on peut également lire des réponses cachées comme 404,302...
// lire dans le cache
if (!test)
- back[p].r=cache_read(opt,cache,adr,fil,save);
+ back[p].r = cache_read(opt,cache,adr,fil,save, back[p].location_buffer);
else
- back[p].r=cache_read(opt,cache,adr,fil,NULL); // charger en tête uniquement du cache
- if (!back[p].r.location)
- back[p].r.location=back[p].location_buffer;
- else { /* recopier */
- strcpy(back[p].location_buffer,back[p].r.location);
- back[p].r.location=back[p].location_buffer;
- }
+ back[p].r = cache_read(opt,cache,adr,fil,NULL, back[p].location_buffer); // charger en tête uniquement du cache
+
+ /* ensure correct location buffer set */
+ back[p].r.location=back[p].location_buffer;
/* Interdiction taille par le wizard? --> détruire */
if (back[p].r.statuscode != -1) { // pas d'erreur de lecture
@@ -606,9 +782,9 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
back[p].status=0; // FINI
back[p].r.statuscode=-1;
if (!back[p].testmode)
- strcpy(back[p].r.msg,"Cached file skipped (too big)");
+ strcpybuff(back[p].r.msg,"Cached file skipped (too big)");
else
- strcpy(back[p].r.msg,"Test: Cached file skipped (too big)");
+ strcpybuff(back[p].r.msg,"Test: Cached file skipped (too big)");
return 0;
}
}
@@ -639,42 +815,43 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
} else if (cache->type==2) { // si en cache, demander de tester If-Modified-Since
- htsblk* r=cache_header(opt,cache,adr,fil);
+ htsblk r;
+ cache_header(opt,cache,adr,fil,&r);
/* Interdiction taille par le wizard? */
{
LLint save_totalsize=back[p].r.totalsize;
- back[p].r.totalsize=r->totalsize;
+ back[p].r.totalsize=r.totalsize;
if (!back_checksize(opt,&back[p],1)) {
- r=NULL;
+ r.statuscode = -1;
//
back[p].status=0; // FINI
deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET;
if (!back[p].testmode)
- strcpy(back[p].r.msg,"File too big");
+ strcpybuff(back[p].r.msg,"File too big");
else
- strcpy(back[p].r.msg,"Test: File too big");
+ strcpybuff(back[p].r.msg,"Test: File too big");
return 0;
}
back[p].r.totalsize=save_totalsize;
}
- if (r) {
- if (r->statuscode==200) { // uniquement des 200 (OK)
- if (strnotempty(r->etag)) { // ETag (RFC2616)
+ if (r.statuscode != -1) {
+ if (r.statuscode==200) { // uniquement des 200 (OK)
+ if (strnotempty(r.etag)) { // ETag (RFC2616)
/*
- If both an entity tag and a Last-Modified value have been
provided by the origin server, SHOULD use both validators in
cache-conditional requests. This allows both HTTP/1.0 and
HTTP/1.1 caches to respond appropriately.
*/
- if (strnotempty(r->lastmodified))
- sprintf(back[p].send_too,"If-None-Match: %s\r\nIf-Modified-Since: %s\r\n",r->etag,r->lastmodified);
+ if (strnotempty(r.lastmodified))
+ sprintf(back[p].send_too,"If-None-Match: %s\r\nIf-Modified-Since: %s\r\n",r.etag,r.lastmodified);
else
- sprintf(back[p].send_too,"If-None-Match: %s\r\n",r->etag);
+ sprintf(back[p].send_too,"If-None-Match: %s\r\n",r.etag);
}
- else if (strnotempty(r->lastmodified))
- sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",r->lastmodified);
+ else if (strnotempty(r.lastmodified))
+ sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",r.lastmodified);
else if (strnotempty(cache->lastmodified))
sprintf(back[p].send_too,"If-Modified-Since: %s\r\n",cache->lastmodified);
@@ -754,7 +931,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
filenote(save,NULL); // ne pas purger tout de même
back[p].status=0; // OK prêt
back[p].r.statuscode=-1; // erreur
- strcpy(back[p].r.msg,"Null-size file not recaught");
+ strcpybuff(back[p].r.msg,"Null-size file not recaught");
return 0;
}
} else {
@@ -781,12 +958,13 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
// recopier proxy
memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy));
// et user-agent
- strcpy(back[p].r.req.user_agent,opt->user_agent);
- strcpy(back[p].r.req.lang_iso,opt->lang_iso);
+ strcpybuff(back[p].r.req.user_agent,opt->user_agent);
+ strcpybuff(back[p].r.req.lang_iso,opt->lang_iso);
back[p].r.req.user_agent_send=opt->user_agent_send;
// et http11
back[p].r.req.http11=back[p].http11;
back[p].r.req.nocompression=opt->nocompression;
+ back[p].r.req.nokeepalive=opt->nokeepalive;
// mode ftp, court-circuit!
if (strfield(back[p].url_adr,"ftp://")) {
@@ -804,7 +982,7 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
{
char nid[32];
sprintf(nid,"htsftp%d-in_progress.lock",p);
- strcpy(back[p].location_buffer,fconcat(opt->path_log,nid));
+ strcpybuff(back[p].location_buffer,fconcat(opt->path_log,nid));
}
launch_ftp(&(back[p]),back[p].location_buffer,opt->exec);
#endif
@@ -812,49 +990,60 @@ int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char*
}
}
#if HTS_USEOPENSSL
- else if (strfield(back[p].url_adr,"https://")) { // let's rock
+ else if (SSL_is_available && strfield(back[p].url_adr,"https://")) { // let's rock
back[p].r.ssl = 1;
// back[p].r.ssl_soc = NULL;
back[p].r.ssl_con = NULL;
}
#endif
+ if (!back_trylive(opt,back, back_max, p)) {
#if HTS_XGETHOST
#if HDEBUG
- printf("back_solve..\n");
+ printf("back_solve..\n");
#endif
- back[p].status=101; // tentative de résolution du nom de host
- soc=INVALID_SOCKET; // pas encore ouverte
- back_solve(&back[p]); // préparer
- if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns
+ back[p].status=101; // tentative de résolution du nom de host
+ soc=INVALID_SOCKET; // pas encore ouverte
+ back_solve(&back[p]); // préparer
+ if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns
#if HDEBUG
- printf("ok, dns cache ready..\n");
+ printf("ok, dns cache ready..\n");
#endif
- soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
- if (soc==INVALID_SOCKET) {
- back[p].status=0; // fini, erreur
+ soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
+ if (soc==INVALID_SOCKET) {
+ back[p].status=0; // fini, erreur
+ }
}
- }
-//
+ //
#else
-//
+ //
#if CNXDEBUG
- printf("XFopen..\n");
+ printf("XFopen..\n");
#endif
-
- if (strnotempty(back[p].send_too)) // envoyer un if-modified-since
+
+ if (strnotempty(back[p].send_too)) // envoyer un if-modified-since
#if HTS_XCONN
- soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
+ soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r));
#else
- soc=http_xfopen(0,0,1,back[p].send_too,adr,fil,&(back[p].r));
+ soc=http_xfopen(0,0,1,back[p].send_too,adr,fil,&(back[p].r));
#endif
- else
+ else
#if HTS_XCONN
- soc=http_xfopen(test,0,0,NULL,adr,fil,&(back[p].r));
+ soc=http_xfopen(test,0,0,NULL,adr,fil,&(back[p].r));
#else
- soc=http_xfopen(test,0,1,NULL,adr,fil,&(back[p].r));
+ soc=http_xfopen(test,0,1,NULL,adr,fil,&(back[p].r));
#endif
#endif
+ } else {
+ soc = back[p].r.soc;
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully linked #%d (for %s%s)"LF,
+ back[p].r.debugid,
+ back[p].url_adr, back[p].url_fil); test_flush;
+ }
+ }
+
if (opt->timeout>0) { // gestion du opt->timeout
back[p].timeout=opt->timeout;
back[p].timeout_refresh=time_local();
@@ -938,7 +1127,7 @@ PTHREAD_TYPE Hostlookup(void* iadr_p) {
#if DEBUGDNS
printf("resolv in background: %s\n",jump_identification(iadr_p));
#endif
- strcpy(iadr,jump_identification(iadr_p));
+ strcpybuff(iadr,jump_identification(iadr_p));
// couper éventuel :
{
char *a;
@@ -965,7 +1154,7 @@ PTHREAD_TYPE Hostlookup(void* iadr_p) {
cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache));
if (cache->n!=NULL) {
t_fullhostent fullhostent_buffer;
- strcpy(cache->n->iadr,iadr);
+ strcpybuff(cache->n->iadr,iadr);
cache->n->host_length=0; /* pour le moment rien */
cache->n->n=NULL;
_hts_lockdns(0); // délocker
@@ -1019,7 +1208,7 @@ void back_solve(lien_back* back) {
{
char* p = calloct(strlen(a)+2,1);
if (p) {
- strcpy(p,a);
+ strcpybuff(p,a);
_beginthread( Hostlookup , 0, p );
}
}
@@ -1031,7 +1220,7 @@ void back_solve(lien_back* back) {
#if USE_BEGINTHREAD
char* p = calloct(strlen(a)+2,1);
if (p) {
- strcpy(p,a);
+ strcpybuff(p,a);
_beginthread( Hostlookup , 0, p );
}
#else
@@ -1061,32 +1250,79 @@ int host_wait(lien_back* back) {
// élimine les fichiers non html en backing (anticipation)
// cleanup non-html files in backing to save backing space
// and allow faster "save in cache" operation
+// also cleanup keep-alive sockets and ensure that not too many sockets are being opened
void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
+#if HTS_ANALYSTE
+ int oneMore = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links
+#endif
int i;
for(i=0;i<back_max;i++) {
if (back[i].status == 0) { // ready
+ /* Check autoclean */
if (!back[i].testmode) { // not test mode
if (strnotempty(back[i].url_sav)) { // filename exists
- if (back[i].r.is_write) { // not in memory (on disk, ready)
+ if (back[i].r.statuscode==200) { // HTTP "OK"
if (back[i].r.size>0) { // size>0
- if (back[i].r.statuscode==200) { // HTTP "OK"
- if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML/hypertext
- if (!may_be_hypertext_mime(back[i].r.contenttype)) { // may NOT be parseable mime type
- if (back[i].pass2_ptr) {
- // finalize
- // // back_finalize(opt,cache,back,i);
- // stats
- //HTS_STAT.stat_bytes+=back[i].r.size;
- //HTS_STAT.stat_files++;
- //if ( (!back[i].r.notmodified) && (opt->is_update) ) {
- // HTS_STAT.stat_updated_files++; // page modifiée
- //}
- //cache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav);
- *back[i].pass2_ptr=-1; // Done!
- back_delete(back,i); // Delete backing entry
- if ((opt->debug>0) && (opt->log!=NULL)) {
- fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush;
+ if (back[i].r.is_write // not in memory (on disk, ready)
+ && !is_hypertext_mime(back[i].r.contenttype) // not HTML/hypertext
+ && !may_be_hypertext_mime(back[i].r.contenttype) // may NOT be parseable mime type
+ ) {
+ if (back[i].pass2_ptr) {
+ // finalize
+ // // back_finalize(opt,cache,back,i);
+ // stats
+ //HTS_STAT.stat_bytes+=back[i].r.size;
+ //HTS_STAT.stat_files++;
+ //if ( (!back[i].r.notmodified) && (opt->is_update) ) {
+ // HTS_STAT.stat_updated_files++; // page modifiée
+ //}
+ //xxxcache_mayadd(opt,cache,&back[i].r,back[i].url_adr,back[i].url_fil,back[i].url_sav);
+ usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil);
+ *back[i].pass2_ptr=-1; // Done!
+ back_maydelete(opt,back,i); // May delete backing entry
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush;
+ }
+ }
+ } else {
+ if (!back[i].finalized) {
+ if (1) {
+ /* Ensure deleted or recycled socket */
+ /* BUT DO NOT YET WIPE back[i].r.adr */
+ back_maydeletehttp(opt, back, back_max, i);
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ } else {
+ /*
+ NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO)
+ */
+ /* Lock the entry but do not keep the html data in memory (in cache) */
+ if (opt->cache) {
+ htsblk r;
+
+ /* Ensure deleted or recycled socket */
+ back_maydeletehttp(opt, back, back_max, i);
+ assertf(back[i].r.soc == INVALID_SOCKET);
+
+ /* Check header */
+ cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r);
+ if (r.statuscode == 200) {
+ if (back[i].r.soc == INVALID_SOCKET) {
+ /* Delete buffer and sockets */
+ deleteaddr(&back[i].r);
+ deletehttp(&back[i].r);
+ back[i].finalized = 1;
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ }
+ } else {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected html cache lookup error during back clean"LF); test_flush;
+ }
}
+ // xxc xxc
}
}
}
@@ -1095,8 +1331,49 @@ void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max) {
}
}
}
+ } else if (back[i].status == -103) { // waiting (keep-alive)
+ if (
+ ! back[i].r.keep_alive
+ || back[i].r.soc == INVALID_SOCKET
+ || back[i].r.keep_alive_max < 1
+ || time_local() >= back[i].ka_time_start + back[i].r.keep_alive_t
+ ) {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): live socket closed #%d (%s)"LF,
+ back[i].r.debugid,
+ back[i].url_adr);
+ test_flush;
+ }
+ back_delete(opt,back, i); // delete backing entry
+ }
}
- }
+ }
+ /* switch connections to live ones */
+ for(i=0;i<back_max;i++) {
+ if (back[i].status == 0) { // ready
+ if (back[i].r.soc != INVALID_SOCKET) {
+ back_maydeletehttp(opt,back, back_max, i);
+ }
+
+ }
+ }
+ /* delete sockets if too many keep-alive'd sockets in background */
+ if (opt->maxsoc > 0) {
+ int max = opt->maxsoc + oneMore;
+ int curr = back_nsoc_overall(back, back_max);
+ if (curr > max) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF,
+ curr - max); test_flush;
+ }
+ }
+ for(i = 0 ; i < back_max && curr > max ; i++) {
+ if (back[i].status == -103) {
+ back_delete(opt,back, i); // delete backing entry
+ curr--;
+ }
+ }
+ }
}
@@ -1208,9 +1485,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
} else {
back[i].r.statuscode=-4;
if (back[i].status==100)
- strcpy(back[i].r.msg,"Connect Error");
+ strcpybuff(back[i].r.msg,"Connect Error");
else
- strcpy(back[i].r.msg,"Receive Error");
+ strcpybuff(back[i].r.msg,"Receive Error");
back[i].status=0; // terminé
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush;
@@ -1251,6 +1528,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (nsockets) {
if (opt->maxrate>0) {
max_read_bytes = ( check_downloadable_bytes(opt->maxrate) / nsockets );
+ if (max_read_bytes > TAILLE_BUFFER) {
+ /* limit size */
+ max_read_bytes = TAILLE_BUFFER;
+ } else if (max_read_bytes < TAILLE_BUFFER) {
+ /* a small pause */
+ Sleep(10);
+ }
}
}
if (!max_read_bytes)
@@ -1273,10 +1557,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-4;
if (back[i].status==100)
- strcpy(back[i].r.msg,"Connect Error");
+ strcpybuff(back[i].r.msg,"Connect Error");
else
- strcpy(back[i].r.msg,"Receive Error");
- back[i].status=0; // terminé
+ strcpybuff(back[i].r.msg,"Receive Error");
+ if (back[i].status == -103) { /* Keep-alive socket */
+ back_delete(opt,back, i);
+ } else {
+ back[i].status=0; // terminé
+ }
}
}
}
@@ -1297,7 +1585,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#if HTS_USEOPENSSL
/* SSL mode */
- if (back[i].r.ssl) {
+ if (SSL_is_available && back[i].r.ssl) {
// handshake not yet launched
if (!back[i].r.ssl_con) {
SSL_CTX_set_options(openssl_ctx, SSL_OP_ALL);
@@ -1315,7 +1603,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
/* Error */
if (back[i].r.statuscode == -6) {
- strcpy(back[i].r.msg, "bad SSL/TLS handshake");
+ strcpybuff(back[i].r.msg, "bad SSL/TLS handshake");
deletehttp(&back[i].r);
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-5;
@@ -1335,14 +1623,16 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
Sleep(1000/opt->maxconn);
}
+ back[i].ka_time_start=time_local();
if (back[i].timeout>0) { // refresh timeout si besoin est
- back[i].timeout_refresh=time_local();
+ back[i].timeout_refresh=back[i].ka_time_start;
}
if (back[i].rateout>0) { // le taux de transfert de base sur le début de la connexion
- back[i].rateout_time=time_local();
+ back[i].rateout_time=back[i].ka_time_start;
}
// envoyer header
//if (strcmp(back[i].url_sav,BACK_ADD_TEST)!=0) // vrai get
+ HTS_STAT.stat_nrequests++;
if (!back[i].head_request)
http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r);
else if (back[i].head_request==2) // test en GET!
@@ -1356,7 +1646,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// attente gethostbyname
}
#if HTS_USEOPENSSL
- else if (back[i].status==102) { // wait for SSL handshake
+ else if (SSL_is_available && back[i].status==102) { // wait for SSL handshake
/* SSL mode */
if (back[i].r.ssl) {
int conn_code;
@@ -1372,7 +1662,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
tmp[0]='\0';
ERR_error_string(err_code, tmp);
back[i].r.msg[0]='\0';
- strncat(back[i].r.msg, tmp, sizeof(back[i].r.msg) - 2);
+ strncatbuff(back[i].r.msg, tmp, sizeof(back[i].r.msg) - 2);
if (!strnotempty(back[i].r.msg)) {
sprintf(back[i].r.msg, "SSL/TLS error %d", err_code);
}
@@ -1385,7 +1675,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].status=100; // back to waitconnect
}
} else {
- strcpy(back[i].r.msg, "unexpected SSL/TLS error");
+ strcpybuff(back[i].r.msg, "unexpected SSL/TLS error");
deletehttp(&back[i].r);
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-5;
@@ -1425,7 +1715,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-5;
if (strnotempty(back[i].r.msg)==0)
- strcpy(back[i].r.msg,"Unable to resolve host name");
+ strcpybuff(back[i].r.msg,"Unable to resolve host name");
}
}
@@ -1451,9 +1741,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.msg[j++]='\0';
fclose(fp);
remove(fconcat(back[i].location_buffer,".ok"));
- strcpy(fconcat(back[i].location_buffer,".ok"),"");
+ strcpybuff(fconcat(back[i].location_buffer,".ok"),"");
} else {
- strcpy(back[i].r.msg,"Unknown ftp result, check if file is ok");
+ strcpybuff(back[i].r.msg,"Unknown ftp result, check if file is ok");
back[i].r.statuscode=-1;
}
back[i].status=0;
@@ -1507,20 +1797,26 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.statuscode==200) { // 'OK'
if (!is_hypertext_mime(back[i].r.contenttype)) { // pas HTML
if (opt->getmode&2) { // on peut ecrire des non html
+ int fcheck=0;
back[i].r.is_write=1; // écrire
if (back[i].r.compressed
&&
/* .gz are *NOT* depacked!! */
(strfield(get_ext(back[i].url_sav),"gz") == 0)
) {
- back[i].tmpfile[0]='\0';
- strcpy(back[i].tmpfile,tempnam(NULL,"httrZ"));
- if (back[i].tmpfile[0])
+ back[i].tmpfile_buffer[0]='\0';
+ back[i].tmpfile=tmpnam(back[i].tmpfile_buffer);
+ if (back[i].tmpfile != NULL && back[i].tmpfile[0])
back[i].r.out=fopen(back[i].tmpfile,"wb");
} else {
back[i].r.compressed=0;
back[i].r.out=filecreate(back[i].url_sav);
}
+ if (back[i].r.out==NULL) {
+ if ((fcheck=check_fatal_io_errno())) {
+ opt->state.exit_xh=-1; /* fatal error */
+ }
+ }
#if HDEBUG
printf("direct-disk: %s\n",back[i].url_sav);
#endif
@@ -1531,7 +1827,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.out==NULL) {
if (opt->errlog) {
fspc(opt->errlog,"error");
- fprintf(opt->errlog,"Unable to save file %s"LF,back[i].url_sav);
+ fprintf(opt->errlog,"Unable to save file %s : %s"LF,back[i].url_sav, strerror(errno));
+ if (fcheck) {
+ fspc(opt->errlog,"error");
+ fprintf(opt->errlog,"* * Fatal write error, giving up"LF);
+ }
test_flush;
}
back[i].r.is_write=0; // erreur, abandonner
@@ -1573,7 +1873,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].status) {
if (back[i].status==99) // recevoir par bloc de lignes
retour_fread=http_xfread1(&(back[i].r),0);
- else if (back[i].status==98) { // recevoir longueur chunk en hexa caractère par caractère
+ else if (back[i].status==98 || back[i].status==97) { // recevoir longueur chunk en hexa caractère par caractère
// backuper pour lire dans le buffer chunk
htsblk r;
memcpy(&r, &(back[i].r), sizeof(htsblk));
@@ -1593,7 +1893,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
else if (back[i].is_chunk) { // attention chunk, limiter taille à lire
#if CHUNKDEBUG==1
- printf("read %d bytes\n",(int)min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
+ printf("[%d] read %d bytes\n",(int)back[i].r.soc,(int)min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
#endif
retour_fread=(int) http_xfread1(&(back[i].r),(int) min(back[i].r.totalsize-back[i].r.size,max_read_bytes));
} else
@@ -1607,38 +1907,47 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].is_chunk) { // attendre prochain chunk
if (back[i].r.size==back[i].r.totalsize) { // fin chunk!
//printf("chunk end at %d\n",back[i].r.size);
- back[i].status=98; // prochain chunk
- if (back[i].chunk_adr!=NULL) { freet(back[i].chunk_adr); back[i].chunk_adr=NULL; } back[i].chunk_size=0;
+ back[i].status=97; /* fetch ending CRLF */
+ if (back[i].chunk_adr!=NULL) {
+ freet(back[i].chunk_adr);
+ back[i].chunk_adr=NULL;
+ }
+ back[i].chunk_size=0;
retour_fread=0; // pas d'erreur
#if CHUNKDEBUG==1
- printf("waiting for next chunk header (soc %d)..\n",back[i].r.soc);
+ printf("[%d] waiting for current chunk CRLF..\n",(int)back[i].r.soc);
#endif
}
+ } else if (back[i].r.keep_alive) {
+ if (back[i].r.size==back[i].r.totalsize) { // fin!
+ retour_fread=-1; // end
+ }
}
}
-
- if (retour_fread < 0) { // erreur réception
+
+ if (retour_fread < 0) { // fin réception
back[i].status=0; // terminé
if (back[i].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(4): deletehttp\n");
#endif
- deletehttp(&back[i].r);
+ /*KA deletehttp(&back[i].r);*/
+ back_maydeletehttp(opt, back, back_max, i);
}
- back[i].r.soc=INVALID_SOCKET;
+ /*KA back[i].r.soc=INVALID_SOCKET; */
#if CHUNKDEBUG==1
if (back[i].is_chunk)
- printf("must be the last chunk for %s (connection closed) - %d/%d\n",back[i].url_fil,back[i].r.size,back[i].r.totalsize);
+ printf("[%d] must be the last chunk for %s (connection closed) - %d/%d\n",(int)back[i].r.soc,back[i].url_fil,back[i].r.size,back[i].r.totalsize);
#endif
//if ((back[i].r.statuscode==-1) && (strnotempty(back[i].r.msg)==0)) {
- if ((back[i].r.statuscode<0) && (strnotempty(back[i].r.msg)==0)) {
+ if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) {
#if HDEBUG
printf("error interruped: %s\n",back[i].r.adr);
#endif
if (back[i].r.size>0)
- strcat(back[i].r.msg,"Interrupted transfer");
+ strcatbuff(back[i].r.msg,"Interrupted transfer");
else
- strcat(back[i].r.msg,"No data (connection closed)");
+ strcatbuff(back[i].r.msg,"No data (connection closed)");
back[i].r.statuscode=-4;
}
@@ -1652,15 +1961,15 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
if (!opt->tolerant) {
//#if HTS_CL_IS_FATAL
- if (back[i].r.adr) freet(back[i].r.adr); back[i].r.adr=NULL;
+ deleteaddr(&back[i].r);
if (back[i].r.size<back[i].r.totalsize)
back[i].r.statuscode=-4; // recatch
- sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",back[i].r.size,back[i].r.totalsize);
+ sprintf(back[i].r.msg,"Incorrect length ("LLintP" Bytes, "LLintP" expected)",(LLint)back[i].r.size,(LLint)back[i].r.totalsize);
} else {
//#else
// Un warning suffira..
if (cache->errlog!=NULL) {
- fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
}
//#endif
}
@@ -1675,92 +1984,156 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
// Traitement des en têtes chunks ou en têtes
- if (back[i].status==98) { // réception taille chunk en hexa ( après les en têtes, peut ne pas
- if (back[i].chunk_size>=2) {
+ if (back[i].status==98 || back[i].status==97) { // réception taille chunk en hexa ( après les en têtes, peut ne pas
+ if (back[i].chunk_size > 0 && back[i].chunk_adr[back[i].chunk_size-1]==10) {
int chunk_size=-1;
- // être présent)
- if (back[i].chunk_adr[back[i].chunk_size-1]==10) { // LF, fin ligne chunk
- char chunk_data[64];
- if (back[i].chunk_size<32) { // pas trop gros
- back[i].chunk_adr[ back[i].chunk_size-1]='\0'; // octet nul
- strcpy(chunk_data,""); // hex number
- strcat(chunk_data,back[i].chunk_adr);
+ char chunk_data[64];
+ if (back[i].chunk_size<32) { // pas trop gros
+ char* chstrip=back[i].chunk_adr;
+ back[i].chunk_adr[ back[i].chunk_size-1]='\0'; // octet nul
+ // skip leading spaces or cr
+ while(isspace(*chstrip)) chstrip++;
+ chunk_data[0] = '\0';
+ strncatbuff(chunk_data, chstrip, sizeof(chunk_data) - 2);
+ // strip chunk-extension
+ while( (chstrip = strchr(chunk_data, ';'))) *chstrip='\0';
+ while( (chstrip = strchr(chunk_data, ' '))) *chstrip='\0';
+ while( (chstrip = strchr(chunk_data, '\r'))) *chstrip='\0';
#if CHUNKDEBUG==1
- printf("chunk received and read: %s\n",chunk_data);
+ printf("[%d] chunk received and read: %s\n",(int)back[i].r.soc,chunk_data);
#endif
- if (back[i].r.totalsize<0)
- back[i].r.totalsize=0; // initialiser à 0
+ if (back[i].r.totalsize<0)
+ back[i].r.totalsize=0; // initialiser à 0
+ if (back[i].status==98) { // "real" chunk
if (sscanf(chunk_data,"%x",&chunk_size) == 1) {
+ if (chunk_size > 0)
+ back[i].chunk_blocksize = chunk_size; /* the data block chunk size */
+ else
+ back[i].chunk_blocksize = -1; /* ending */
back[i].r.totalsize+=chunk_size; // noter taille
back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1);
if (!back[i].r.adr) {
if (cache->errlog!=NULL) {
- fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
}
}
#if CHUNKDEBUG==1
- printf("chunk length: %d - next total "LLintP":\n",chunk_size,back[i].r.totalsize);
+ printf("[%d] chunk length: %d - next total "LLintP":\n",(int)back[i].r.soc,(int)chunk_size,(LLint)back[i].r.totalsize);
#endif
- } else
+ } else {
if (cache->errlog!=NULL) {
fprintf(cache->errlog,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil);
}
- } else {
- if (cache->errlog!=NULL) {
- fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,back[i].chunk_size,back[i].url_adr,back[i].url_fil);
+ }
+ } else { /* back[i].status==97 : just receiving ending CRLF after data */
+ if (chunk_data[0] == '\0') {
+ if (back[i].chunk_blocksize > 0)
+ chunk_size=(int)back[i].chunk_blocksize; /* recent data chunk size */
+ else if (back[i].chunk_blocksize == -1)
+ chunk_size=0; /* ending chunk */
+ else
+ chunk_size=1; /* fake positive size for 1st chunk history */
+#if CHUNKDEBUG==1
+ printf("[%d] chunk CRLF seen\n", (int)back[i].r.soc);
+#endif
+ } else {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Warning: Illegal chunk CRLF (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil);
+ }
+#if CHUNKDEBUG==1
+ printf("[%d] chunk CRLF ERROR!! : '%s'\n", (int)back[i].r.soc, chunk_data);
+#endif
}
}
+ } else {
+ if (cache->errlog!=NULL) {
+ fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,(LLint)back[i].chunk_size,back[i].url_adr,back[i].url_fil);
+ }
+ }
- // ok, continuer sur le body
+ // ok, continuer sur le body
- // si chunk non nul continuer (ou commencer)
- if (chunk_size>0) {
- back[i].status=1; // continuer body
+ // si chunk non nul continuer (ou commencer)
+ if (back[i].status==97 && chunk_size > 0) {
+ back[i].status = 98; /* waiting for next chunk (NN\r\n<data>\r\nNN\r\n<data>..\r\n0\r\n\r\n) */
+#if CHUNKDEBUG==1
+ printf("[%d] waiting for next chunk\n", (int)back[i].r.soc);
+#endif
+ } else if (back[i].status==98 && chunk_size == 0) { /* final chunk */
+ back[i].status=97; /* final CRLF */
+#if CHUNKDEBUG==1
+ printf("[%d] waiting for final CRLF (chunk)\n", (int)back[i].r.soc);
+#endif
+ } else if (back[i].status==98 && chunk_size >= 0) { /* will fetch data now */
+ back[i].status=1; // continuer body
#if CHUNKDEBUG==1
- printf("waiting for body (chunk)\n");
+ printf("[%d] waiting for body (chunk)\n", (int)back[i].r.soc);
#endif
- } else { // chunk nul, c'est la fin
+ } else { /* zero-size-chunk-CRLF (end) or error */
#if CHUNKDEBUG==1
- printf("chunk end, total: %d\n",back[i].r.size);
+ printf("[%d] chunk end, total: %d\n",(int)back[i].r.soc,back[i].r.size);
#endif
- back[i].status=0; // fin
- // finalize transfer
- back_finalize(opt,cache,back,i);
- if (back[i].r.soc!=INVALID_SOCKET) {
+ /* End */
+ //if (back[i].status==97) {
+ back[i].status=0; // fin
+ //}
+
+ // finalize transfer
+ back_finalize(opt,cache,back,i);
+ if (back[i].r.soc!=INVALID_SOCKET) {
#if HTS_DEBUG_CLOSESOCK
- DEBUG_W("back_wait(5): deletehttp\n");
+ DEBUG_W("back_wait(5): deletehttp\n");
#endif
+ /* Error */
+ if (chunk_size < 0) {
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
-
+ deleteaddr(&back[i].r);
+ back[i].r.statuscode=-1;
+ strcpybuff(back[i].r.msg,"Invalid chunk");
+#if CHUNKDEBUG==1
+ printf("[%d] chunk error\n", (int)back[i].r.soc);
+#endif
+ } else /* if chunk_size == 0 */ {
+#if CHUNKDEBUG==1
+ printf("[%d] all chunks now received\n", (int)back[i].r.soc);
+#endif
+
/* Tester totalsize en fin de chunk */
if ((back[i].r.totalsize>0)) { // tester totalsize
if (back[i].r.totalsize!=back[i].r.size) { // pas la même!
#if HTS_CL_IS_FATAL
- if (back[i].r.adr) { freet(back[i].r.adr); back[i].r.adr=NULL; }
+ deleteaddr(&back[i].r);
back[i].r.statuscode=-1;
- strcpy(back[i].r.msg,"Incorrect length");
+ strcpybuff(back[i].r.msg,"Incorrect length");
#else
// Un warning suffira..
if (cache->errlog!=NULL) {
- fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,back[i].r.size,back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
+ fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil);
}
#endif
}
}
-
-
+
+ /* Oops, trailers! */
+ if (back[i].r.keep_alive_trailers) {
+ /* fixme (not yet supported) */
+ }
+
}
+
+
}
-
- // effacer buffer (chunk en tete)
- if (back[i].chunk_adr!=NULL) {
- freet(back[i].chunk_adr);
- back[i].chunk_adr=NULL;
- back[i].chunk_size=0;
- }
+ }
+
+ // effacer buffer (chunk en tete)
+ if (back[i].chunk_adr!=NULL) {
+ freet(back[i].chunk_adr);
+ back[i].chunk_adr=NULL;
+ back[i].chunk_size=0;
+ // NO! xxback[i].chunk_blocksize = 0;
+ }
- } // chunk LF?
- } // taille buffer chunk>2
+ } // taille buffer chunk > 1 && LF
//
} else if (back[i].status==99) { // en têtes (avant le chunk si il est présent)
//
@@ -1779,15 +2152,34 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
printf("..ok, header received\n");
#endif
+
+ // Callback
+#if HTS_ANALYSTE
+ if (hts_htmlcheck_receivehead != NULL) {
+ int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r);
+ if (test_head!=1) {
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
+ }
+ back[i].status=0; // FINI
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ strcpybuff(back[i].r.msg,"External wrapper aborted transfer");
+ back[i].r.statuscode = -1;
+ }
+ }
+#endif
+
/* Hack for zero-length headers */
- if (back[i].r.adr[0] != '<') {
+ if (back[i].status != 0 && back[i].r.adr[0] != '<') {
// ----------------------------------------
// traiter en-tête!
// status-line à récupérer
ptr+=binput(back[i].r.adr+ptr,rcvd,2000);
- if (strnotempty(rcvd)==0)
- ptr+=binput(back[i].r.adr+ptr,rcvd,2000); // "certains serveurs buggés envoient un \n au début" (RFC)
+ if (strnotempty(rcvd)==0) {
+ /* Bogus CRLF, OR recycled connection and trailing chunk CRLF */
+ ptr+=binput(back[i].r.adr+ptr,rcvd,2000);
+ }
// traiter status-line
treatfirstline(&back[i].r,rcvd);
@@ -1797,7 +2189,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
if (_DEBUG_HEAD) {
if (ioinfo) {
- fprintf(ioinfo,"response for %s%s:\r\ncode=%d\r\n",jump_identification(back[i].url_adr),back[i].url_fil,back[i].r.statuscode);
+ fprintf(ioinfo,"[%d] response for %s%s:\r\ncode=%d\r\n",
+ back[i].r.debugid, jump_identification(back[i].url_adr),back[i].url_fil,back[i].r.statuscode);
fprintfio(ioinfo,back[i].r.adr,">>> ");
fprintf(ioinfo,"\r\n");
fflush(ioinfo);
@@ -1831,7 +2224,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// ----------------------------------------
// libérer mémoire -- après! --
- if (back[i].r.adr!=NULL) { freet(back[i].r.adr); back[i].r.adr=NULL; }
+ deleteaddr(&back[i].r);
} else {
// assume text/html, OK
treatfirstline(&back[i].r, back[i].r.adr);
@@ -1870,10 +2263,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// ->
// Content-Range: bytes */2830
if (back[i].range_req_size == back[i].r.crange) {
+ filenote(back[i].url_sav,NULL);
+ //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil);
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
back[i].status=0; // READY
back[i].r.size=back[i].r.totalsize=back[i].range_req_size;
- filenote(back[i].url_sav,NULL);
back[i].r.statuscode=304; // NOT MODIFIED
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -1897,8 +2291,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// If the size is the same, and the option has been set, we assume
// that the file is identical - and therefore let's break the connection
if (back[i].is_update) { // mise à jour
- if (back[i].r.statuscode==200) { // 'OK'
- htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL); // lire entrée cache
+ if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK'
+ htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL,NULL); // lire entrée cache
if (r.statuscode == 200) { // OK pas d'erreur cache
LLint len1,len2;
len1=r.totalsize;
@@ -1930,7 +2324,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
// Detect already downloaded file (with another browser, for example)
if (opt->sizehack) {
if (!back[i].is_update) { // mise à jour
- if (back[i].r.statuscode==200) { // 'OK'
+ if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK'
if (!is_hypertext_mime(back[i].r.contenttype)) { // not HTML
if (strnotempty(back[i].url_sav)) { // target found
int size = fsize(back[i].url_sav); // target size
@@ -1940,6 +2334,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].status=0; // READY
back[i].r.size=back[i].r.totalsize;
filenote(back[i].url_sav,NULL);
+ //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil);
back[i].r.statuscode=304; // NOT MODIFIED
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -1976,6 +2371,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].status=0; // READY
back[i].r.size=back[i].r.totalsize;
filenote(back[i].url_sav,NULL);
+ //xxusercommand(opt,0,NULL,back[i].url_sav,back[i].url_adr,back[i].url_fil);
back[i].r.statuscode=304; // NOT MODIFIED
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -2006,9 +2402,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].status=0; // FINI
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
if (!back[i].testmode)
- strcpy(back[i].r.msg,"File too big");
+ strcpybuff(back[i].r.msg,"File too big");
else
- strcpy(back[i].r.msg,"Test: File too big");
+ strcpybuff(back[i].r.msg,"Test: File too big");
}
}
@@ -2023,7 +2419,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
DEBUG_W("back_wait(head request): deletehttp\n");
#endif
// Couper connexion
- deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ if (!back[i].http11) { /* NO KA */
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ }
back[i].status=0; // terminé
}
// traiter une éventuelle erreur 304 (cache à jour utilisable)
@@ -2033,13 +2431,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#if HTS_DEBUG_CLOSESOCK
DEBUG_W("back_wait(file is not modified): deletehttp\n");
#endif
- deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
- back[i].r=cache_read(opt,cache,back[i].url_adr,back[i].url_fil,back[i].url_sav);
- if (!back[i].r.location)
- back[i].r.location=back[i].location_buffer;
- else { /* recopier */
- strcpy(back[i].location_buffer,back[i].r.location);
+ /* clear everything but connection: switch, close, and reswitch */
+ {
+ htsblk tmp;
+ memset(&tmp, 0, sizeof(tmp));
+ back_connxfr(&back[i].r, &tmp);
+ back[i].r=cache_read(opt,cache,back[i].url_adr,back[i].url_fil,back[i].url_sav,back[i].location_buffer);
back[i].r.location=back[i].location_buffer;
+ back_connxfr(&tmp,&back[i].r);
}
// hack:
@@ -2048,6 +2447,10 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (back[i].r.statuscode == -1) {
if (fexist(back[i].url_sav)) {
back[i].r.statuscode=200; // OK
+ strcpybuff(back[i].r.msg, "OK (cached)");
+ back[i].r.is_file=1;
+ back[i].r.totalsize = back[i].r.size = fsize(back[i].url_sav);
+ get_httptype(back[i].r.contenttype, back[i].url_sav, 1);
if ((opt->debug>0) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
}
@@ -2078,6 +2481,8 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
+/********** NO - must complete the body! ********** */
+#if 0
} else if ((back[i].r.statuscode==301)
|| (back[i].r.statuscode==302)
|| (back[i].r.statuscode==303)
@@ -2089,20 +2494,21 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
DEBUG_W("back_wait(301,302,303,307,412,416..): deletehttp\n");
#endif
// Couper connexion
- deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ /*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/
+ back_maydeletehttp(opt, back, back_max, i);
+
back[i].status=0; // terminé
// finalize
if (back[i].r.statuscode>0) {
back_finalize(opt,cache,back,i);
}
+#endif
+/********** **************************** ********** */
} else { // il faut aller le chercher
// effacer buffer (requète)
if (!noFreebuff) {
- if (back[i].r.adr!=NULL) {
- freet(back[i].r.adr);
- back[i].r.adr=NULL;
- }
+ deleteaddr(&back[i].r);
back[i].r.size=0;
}
@@ -2130,7 +2536,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
} else { // On est dans la m**
back[i].status=0; // terminé (voir plus loin)
- strcpy(back[i].r.msg,"Can not open partial file");
+ strcpybuff(back[i].r.msg,"Can not open partial file");
}
}
} else { // mémoire
@@ -2139,13 +2545,13 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
LLint alloc_mem=sz + 1;
if (back[i].r.totalsize>0)
alloc_mem+=back[i].r.totalsize; // AJOUTER RESTANT!
- if ( (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) {
+ if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) {
back[i].r.size=sz;
if (back[i].r.totalsize>0)
back[i].r.totalsize+=sz; // plus en fait
- if (((int) fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) {
+ if (( fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) {
back[i].status=0; // terminé (voir plus loin)
- strcpy(back[i].r.msg,"Can not read partial file");
+ strcpybuff(back[i].r.msg,"Can not read partial file");
} else {
back[i].r.statuscode=200; // Forcer 'OK'
#if HDEBUG
@@ -2154,17 +2560,17 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
} else {
back[i].status=0; // terminé (voir plus loin)
- strcpy(back[i].r.msg,"No memory for partial file");
+ strcpybuff(back[i].r.msg,"No memory for partial file");
}
fclose(fp);
} else { // Argh..
back[i].status=0; // terminé (voir plus loin)
- strcpy(back[i].r.msg,"Can not open partial file");
+ strcpybuff(back[i].r.msg,"Can not open partial file");
}
}
} else { // Non trouvé??
back[i].status=0; // terminé (voir plus loin)
- strcpy(back[i].r.msg,"Can not find partial file");
+ strcpybuff(back[i].r.msg,"Can not find partial file");
}
// Erreur?
if (back[i].status==0) {
@@ -2178,25 +2584,36 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
//back[i].r.statuscode=206; ????????
back[i].r.statuscode=-5;
if (strnotempty(back[i].r.msg))
- strcpy(back[i].r.msg,"Error attempting to solve status 206 (partial file)");
+ strcpybuff(back[i].r.msg,"Error attempting to solve status 206 (partial file)");
}
}
if (back[i].status!=0) { // non terminé (erreur)
if (!back[i].testmode) { // fichier normal
- if (!back[i].r.is_chunk) { // pas de chunk
+ if (back[i].r.empty && back[i].r.statuscode==200) { // empty response
+ // Couper connexion
+ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
+ back[i].status=0; // terminé
+ if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) {
+ back[i].r.adr[0] = 0;
+ }
+ back_finalize(opt,cache,back,i);
+ }
+ else if (!back[i].r.is_chunk) { // pas de chunk
//if (back[i].r.http11!=2) { // pas de chunk
back[i].is_chunk=0;
back[i].status=1; // start body
} else {
#if CHUNKDEBUG==1
- printf("chunk encoding detected %s..\n",back[i].url_fil);
+ printf("[%d] chunk encoding detected %s..\n",(int)back[i].r.soc, back[i].url_fil);
#endif
back[i].is_chunk=1;
back[i].chunk_adr=NULL;
back[i].chunk_size=0;
+ back[i].chunk_blocksize=0;
back[i].status=98; // start body wait chunk
+ back[i].r.totalsize=0; /* devalidate size! (rfc) */
}
if (back[i].rateout>0) {
back[i].rateout_time=time_local(); // refresh pour transfer rate
@@ -2211,14 +2628,14 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
#endif
deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;
if (back[i].r.statuscode==200) {
- strcpy(back[i].r.msg,"Test: OK");
+ strcpybuff(back[i].r.msg,"Test: OK");
back[i].r.statuscode=-10; // test réussi
}
else { // test a échoué, on ne change rien sauf que l'erreur est à titre indicatif
char tempo[1000];
- strcpy(tempo,back[i].r.msg);
- strcpy(back[i].r.msg,"Test: ");
- strcat(back[i].r.msg,tempo);
+ strcpybuff(tempo,back[i].r.msg);
+ strcpybuff(back[i].r.msg,"Test: ");
+ strcatbuff(back[i].r.msg,tempo);
}
}
@@ -2254,9 +2671,9 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
if (opt->verbosedisplay==1) {
if (back[i].status==0) {
if (back[i].r.statuscode==200)
- printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size);
+ printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size);
else
- printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,back[i].r.size,back[i].r.statuscode);
+ printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size,back[i].r.statuscode);
fflush(stdout);
}
}
@@ -2284,11 +2701,11 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-2;
if (back[i].status==100)
- strcpy(back[i].r.msg,"Connect Time Out");
+ strcpybuff(back[i].r.msg,"Connect Time Out");
else if (back[i].status==101)
- strcpy(back[i].r.msg,"DNS Time Out");
+ strcpybuff(back[i].r.msg,"DNS Time Out");
else
- strcpy(back[i].r.msg,"Receive Time Out");
+ strcpybuff(back[i].r.msg,"Receive Time Out");
back[i].status=0; // terminé
} else if ((back[i].rateout>0) && (back[i].status<99)) {
if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s
@@ -2302,7 +2719,7 @@ void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TSta
}
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-3;
- strcpy(back[i].r.msg,"Transfer Rate Too Low");
+ strcpybuff(back[i].r.msg,"Transfer Rate Too Low");
}
}
}
@@ -2333,7 +2750,7 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) {
if (size_to_test>=0) {
/* Interdiction taille par le wizard? */
- if (hts_testlinksize(opt,eback->url_adr,eback->url_fil,(eback->r.totalsize+1023)/1024)==-1) {
+ if (hts_testlinksize(opt,eback->url_adr,eback->url_fil,eback->r.totalsize/1024)==-1) {
return 0; /* interdit */
}
@@ -2345,13 +2762,31 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) {
return 1;
}
+int back_checkmirror(httrackp* opt) {
+ // Check max time
+ if ((opt->maxsite>0) && (HTS_STAT.stat_bytes >= opt->maxsite)) {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,(LLint)opt->maxsite);
+ test_flush;
+ }
+ return 0;
+ } else if ((opt->maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt->maxtime)) {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"More than %d seconds passed.. giving up"LF,opt->maxtime);
+ test_flush;
+ }
+ return 0;
+ }
+ return 1; /* Ok, go on */
+}
+
// octets transférés + add
LLint back_transfered(LLint nb,lien_back* back,int back_max) {
int i;
// ajouter octets en instance
for(i=0;i<back_max;i++)
- if ((back[i].status>0) && (back[i].status<99))
+ if ((back[i].status>0) && (back[i].status<99 || back[i].status>=1000))
nb+=back[i].r.size;
return nb;
}
@@ -2360,10 +2795,10 @@ LLint back_transfered(LLint nb,lien_back* back,int back_max) {
// j: 1 afficher sockets 2 afficher autres 3 tout afficher
void back_info(lien_back* back,int i,int j,FILE* fp) {
if (back[i].status>=0) {
- char s[256];
+ char s[HTS_URLMAXSIZE*2+1024];
s[0]='\0';
back_infostr(back,i,j,s);
- strcat(s,LF);
+ strcatbuff(s,LF);
fprintf(fp,"%s",s);
}
}
@@ -2375,19 +2810,19 @@ void back_infostr(lien_back* back,int i,int j,char* s) {
int aff=0;
if (j & 1) {
if (back[i].status==100) {
- strcat(s,"CONNECT ");
+ strcatbuff(s,"CONNECT ");
} else if (back[i].status==99) {
- strcat(s,"INFOS ");
+ strcatbuff(s,"INFOS ");
aff=1;
- } else if (back[i].status==98) {
- strcat(s,"INFOSC"); // infos chunk
+ } else if (back[i].status==98 || back[i].status==97) {
+ strcatbuff(s,"INFOSC"); // infos chunk
aff=1;
}
else if (back[i].status>0) {
#if HTS_ANALYSTE==2
- strcat(s,"WAIT ");
+ strcatbuff(s,"WAIT ");
#else
- strcat(s,"RECEIVE ");
+ strcatbuff(s,"RECEIVE ");
#endif
aff=1;
}
@@ -2396,47 +2831,47 @@ void back_infostr(lien_back* back,int i,int j,char* s) {
if (back[i].status==0) {
switch (back[i].r.statuscode) {
case 200:
- strcat(s,"READY ");
+ strcatbuff(s,"READY ");
aff=1;
break;
#if HTS_ANALYSTE==2
default:
- strcat(s,"ERROR ");
+ strcatbuff(s,"ERROR ");
break;
#else
case -1:
- strcat(s,"ERROR ");
+ strcatbuff(s,"ERROR ");
aff=1;
break;
case -2:
- strcat(s,"TIMEOUT ");
+ strcatbuff(s,"TIMEOUT ");
aff=1;
break;
case -3:
- strcat(s,"TOOSLOW ");
+ strcatbuff(s,"TOOSLOW ");
aff=1;
break;
case 400:
- strcat(s,"BADREQUEST ");
+ strcatbuff(s,"BADREQUEST ");
aff=1;
break;
case 401: case 403:
- strcat(s,"FORBIDDEN ");
+ strcatbuff(s,"FORBIDDEN ");
aff=1;
break;
case 404:
- strcat(s,"NOT FOUND ");
+ strcatbuff(s,"NOT FOUND ");
aff=1;
break;
case 500:
- strcat(s,"SERVERROR ");
+ strcatbuff(s,"SERVERROR ");
aff=1;
break;
default:
{
char s2[256];
sprintf(s2,"ERROR(%d)",back[i].r.statuscode);
- strcat(s,s2);
+ strcatbuff(s,s2);
}
aff=1;
#endif
@@ -2446,12 +2881,12 @@ void back_infostr(lien_back* back,int i,int j,char* s) {
if (aff) {
{
- char s2[1024];
- sprintf(s2,"\"%s",back[i].url_adr); strcat(s,s2);
+ char s2[HTS_URLMAXSIZE*2+1024];
+ sprintf(s2,"\"%s",back[i].url_adr); strcatbuff(s,s2);
- if (back[i].url_fil[0]!='/') strcat(s,"/");
- sprintf(s2,"%s\" ",back[i].url_fil); strcat(s,s2);
- sprintf(s,LLintP" "LLintP" ",back[i].r.size,back[i].r.totalsize); strcat(s,s2);
+ if (back[i].url_fil[0]!='/') strcatbuff(s,"/");
+ sprintf(s2,"%s\" ",back[i].url_fil); strcatbuff(s,s2);
+ sprintf(s,LLintP" "LLintP" ",(LLint)back[i].r.size,(LLint)back[i].r.totalsize); strcatbuff(s,s2);
}
}
}
diff --git a/src/htsback.h b/src/htsback.h
index af5fe6c..74fd540 100644
--- a/src/htsback.h
+++ b/src/htsback.h
@@ -50,11 +50,19 @@ int back_available(lien_back* back,int back_max);
LLint back_incache(lien_back* back,int back_max);
HTS_INLINE int back_exist(lien_back* back,int back_max,char* adr,char* fil,char* sav);
int back_nsoc(lien_back* back,int back_max);
-int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,short int* pass2_ptr);
+int back_nsoc_overall(lien_back* back,int back_max);
+int back_add(lien_back* back,int back_max,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr);
int back_stack_available(lien_back* back,int back_max);
+int back_search(httrackp* opt, lien_back* back, int back_max);
void back_clean(httrackp* opt,cache_back* cache,lien_back* back,int back_max);
void back_wait(lien_back* back,int back_max,httrackp* opt,cache_back* cache,TStamp stat_timestart);
-int back_delete(lien_back* back,int p);
+int back_letlive(httrackp* opt, lien_back* back, int p);
+int back_searchlive(httrackp* opt, lien_back* back, int back_max, char* search_addr);
+void back_connxfr(htsblk* src, htsblk* dst);
+int back_delete(httrackp* opt,lien_back* back,int p);
+int back_maydelete(httrackp* opt, lien_back* back, int p);
+void back_maydeletehttp(httrackp* opt, lien_back* back, int back_max, int p);
+int back_trylive(httrackp* opt,lien_back* back, int back_max, int p);
int back_finalize(httrackp* opt,cache_back* cache,lien_back* back,int p);
void back_info(lien_back* back,int i,int j,FILE* fp);
void back_infostr(lien_back* back,int i,int j,char* s);
@@ -65,6 +73,7 @@ void back_solve(lien_back* back);
int host_wait(lien_back* back);
#endif
int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize);
+int back_checkmirror(httrackp* opt);
#if HTS_XGETHOST
#if USE_BEGINTHREAD
diff --git a/src/htsbase.h b/src/htsbase.h
index 3e83471..139e3ed 100644
--- a/src/htsbase.h
+++ b/src/htsbase.h
@@ -38,20 +38,43 @@ Please visit our Website: http://www.httrack.com
#ifndef HTS_BASICH
#define HTS_BASICH
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#include "htsglobal.h"
// size_t et mode_t
#include <stdio.h>
-#if HTS_WIN
-#else
-#include <fcntl.h>
+#include <stdlib.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#ifndef _WIN32
+#include <errno.h>
#endif
#if HTS_WIN
#else
- #define min(a,b) ((a)>(b)?(b):(a))
- #define max(a,b) ((a)>(b)?(a):(b))
+#include <fcntl.h>
#endif
+#include <assert.h>
+
+#undef min
+#undef max
+#define min(a,b) ((a)>(b)?(b):(a))
+#define max(a,b) ((a)>(b)?(a):(b))
// teste égalité de 2 chars, case insensitive
#define hichar(a) ((((a)>='a') && ((a)<='z')) ? ((a)-('a'-'A')) : (a))
@@ -63,6 +86,7 @@ Please visit our Website: http://www.httrack.com
( (strfield2((a),"text/html")!=0)\
|| (strfield2((a),"application/x-javascript")!=0) \
|| (strfield2((a),"text/css")!=0) \
+ /*|| (strfield2((a),"text/vnd.wap.wml")!=0)*/ \
|| (strfield2((a),"image/svg+xml")!=0) \
|| (strfield2((a),"image/svg-xml")!=0) \
/*|| (strfield2((a),"audio/x-pn-realaudio")!=0) */\
@@ -77,60 +101,287 @@ Please visit our Website: http://www.httrack.com
// caractère maj
#define isUpperLetter(a) ( ((a) >= 'A') && ((a) <= 'Z') )
-// conversion éventuelle / vers antislash
-#if HTS_WIN
-char* antislash(char* s);
+// functions
+#ifdef _WIN32
+#define DynamicGet(handle, sym) GetProcAddress(handle, sym)
#else
-#define antislash(A) (A)
+#define DynamicGet(handle, sym) dlsym(handle, sym)
#endif
+// emergency log
+typedef void (*t_abortLog)(char* msg, char* file, int line);
+extern HTSEXT_API t_abortLog abortLog__;
+#define abortLog(a) abortLog__(a, __FILE__, __LINE__)
+#define abortLogFmt(a) do { \
+ FILE* fp = fopen("CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \
+ if (fp) { \
+ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \
+ fprintf(fp, "Reason:\r\n"); \
+ fprintf(fp, a); \
+ fprintf(fp, "\r\n"); \
+ fflush(fp); \
+ fclose(fp); \
+ } \
+} while(0)
-// functions
-#if HTS_PLATFORM!=3
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if HTS_PLATFORM!=2
-#if HTS_PLATFORM!=1
- int open (const char *, int, ...);
-#endif
- //int read (int,const char*,int);
- //int write (int,char*,int);
-#endif
-#if HTS_PLATFORM!=1
- int close (int);
- void* calloc (size_t,size_t);
- void* malloc (size_t);
- void* realloc (void*,size_t);
- void free (void*);
-#endif
-#if HTS_WIN
-#else
- int mkdir (const char*,mode_t);
-#endif
-#ifdef __cplusplus
-}
-#endif
-#endif
+#define _ ,
+#define abortLogFmt(a) do { \
+ FILE* fp = fopen("CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb"); \
+ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb"); \
+ if (fp) { \
+ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '" __FILE__ "', line %d\r\n", __LINE__); \
+ fprintf(fp, "Reason:\r\n"); \
+ fprintf(fp, a); \
+ fprintf(fp, "\r\n"); \
+ fflush(fp); \
+ fclose(fp); \
+ } \
+} while(0)
+#define assertf(exp) do { \
+ if (! ( exp ) ) { \
+ abortLog("assert failed: " #exp); \
+ if (htsCallbackErr != NULL) { \
+ htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \
+ } \
+ assert(exp); \
+ abort(); \
+ } \
+} while(0)
+/* non-fatal assert */
+#define assertnf(exp) do { \
+ if (! ( exp ) ) { \
+ abortLog("assert failed: " #exp); \
+ if (htsCallbackErr != NULL) { \
+ htsCallbackErr("assert failed: " #exp, __FILE__ , __LINE__ ); \
+ } \
+ } \
+} while(0)
-// tracer malloc()
-#if HTS_TRACE_MALLOC
-#define malloct(A) hts_malloc(A,0)
-#define calloct(A,B) hts_malloc(A,B)
-#define freet(A) hts_free(A)
+
+/* regular malloc's() */
+#ifndef HTS_TRACE_MALLOC
+#define malloct(A) malloc(A)
+#define calloct(A,B) calloc((A), (B))
+#define freet(A) do { assertnf((A) != NULL); if ((A) != NULL) { free(A); (A) = NULL; } } while(0)
+#define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) )
+#define memcpybuff(A, B, N) memcpy((A), (B), (N))
+#else
+/* debug version */
+#define malloct(A) hts_malloc(A)
+#define calloct(A,B) hts_calloc(A,B)
+#define freet(A) do { hts_free(A); (A) = NULL; } while(0)
#define realloct(A,B) hts_realloc(A,B)
void hts_freeall();
-void* hts_malloc (size_t,size_t);
+void* hts_malloc (size_t);
+void* hts_calloc(size_t,size_t);
+void* hts_xmalloc(size_t,size_t);
void hts_free (void*);
void* hts_realloc (void*,size_t);
+mlink* hts_find(char* adr);
+/* protected memcpy */
+#define memcpybuff(A, B, N) do { \
+ mlink* lnk = hts_find((void*)(A)); \
+ if (lnk != NULL) { \
+ assertf(lnk != NULL); \
+ assertf( * ( (t_htsboundary*) ( ((char*) lnk->adr) - sizeof(htsboundary) ) ) == htsboundary ); \
+ assertf( * ( (t_htsboundary*) ( ((char*) lnk->adr) + lnk->len ) ) == htsboundary ); \
+ assertf( ( ((char*)(A)) + (N)) < (char*) (lnk->adr + lnk->len) ); \
+ } \
+ memcpy(A, B, N); \
+} while(0)
+
+#endif
+
+typedef void (* htsErrorCallback)(char* msg, char* file, int line);
+extern HTSEXT_API htsErrorCallback htsCallbackErr;
+extern HTSEXT_API int htsMemoryFastXfr;
+
+/*
+*/
+
+
+#ifdef STRDEBUG
+
+/* protected strcat, strncat and strcpy - definitely useful */
+#define strcatbuff(A, B) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strcat(A, B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int sz = (unsigned int) strlen(A); \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(sz + szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (sz + szf + 1 < sizeof(A)) { \
+ memcpy((A) + sz, (B), szf + 1); \
+ } \
+ } \
+ } else if (szf > 0) { \
+ memcpybuff((A) + sz, (B), szf + 1); \
+ } \
+ } \
+} while(0)
+#define strncatbuff(A, B, N) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strncat(A, B, N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int sz = (unsigned int) strlen(A); \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(sz + szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (sz + szf + 1 < sizeof(A)) { \
+ memcpy((A) + sz, (B), szf); \
+ * ( (A) + sz + szf) = '\0'; \
+ } \
+ } \
+ } else if (szf > 0) { \
+ memcpybuff((A) + sz, (B), szf); \
+ * ( (A) + sz + szf) = '\0'; \
+ } \
+ } \
+} while(0)
+#define strcpybuff(A, B) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strcpy(A, B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (szf + 1 < sizeof(A)) { \
+ memcpy((A), (B), szf + 1); \
+ } else { \
+ * (A) = '\0'; \
+ } \
+ } else { \
+ * (A) = '\0'; \
+ } \
+ } else { \
+ memcpybuff((A), (B), szf + 1); \
+ } \
+ } \
+} while(0)
+#define strncpybuff(A, B, N) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (htsMemoryFastXfr) { \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strncpy(A, B, N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+ } else { \
+ unsigned int szf = (unsigned int) strlen(B); \
+ if (szf > (unsigned int) (N)) szf = (unsigned int) (N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf(szf + 1 < sizeof(A)); \
+ if (szf > 0) { \
+ if (szf + 1 < sizeof(A)) { \
+ memcpy((A), (B), szf); \
+ } \
+ } \
+ } else { \
+ memcpybuff((A), (B), szf); \
+ } \
+ } \
+} while(0)
+
+#else
+
+#ifdef STRDEBUGFAST
+
+/* protected strcat, strncat and strcpy - definitely useful */
+#define strcatbuff(A, B) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strcat(A, B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+} while(0)
+#define strncatbuff(A, B, N) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strncat(A, B, N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+} while(0)
+#define strcpybuff(A, B) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strcpy(A, B); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+} while(0)
+#define strncpybuff(A, B, N) do { \
+ assertf( (A) != NULL ); \
+ if ( ! (B) ) { assertf( 0 ); } \
+ if (sizeof(A) != sizeof(char*)) { \
+ (A)[sizeof(A) - 1] = '\0'; \
+ } \
+ strncpy(A, B, N); \
+ if (sizeof(A) != sizeof(char*)) { \
+ assertf((A)[sizeof(A) - 1] == '\0'); \
+ } \
+} while(0)
+
#else
-#define malloct(A) malloc(A)
-#define calloct(A,B) calloc(A,B)
-#define freet(A) free(A)
-#define realloct(A,B) realloc(A,B)
+
+#define strcatbuff strcat
+#define strncatbuff strncat
+#define strcpybuff strcpy
+#define strncpybuff strncpy
+
+#endif
+
#endif
+#ifdef __cplusplus
+ };
#endif
+#endif
diff --git a/src/htsbasenet.h b/src/htsbasenet.h
index d63a2e7..71ac9c9 100644
--- a/src/htsbasenet.h
+++ b/src/htsbasenet.h
@@ -70,9 +70,89 @@ Please visit our Website: http://www.httrack.com
#ifdef __cplusplus
extern "C" {
#endif
+
+/*
#include <openssl/ssl.h>
#include <openssl/crypto.h>
#include <openssl/err.h>
+*/
+
+/* OpenSSL definitions */
+#define SSL_shutdown hts_ptrfunc_SSL_shutdown
+#define SSL_free hts_ptrfunc_SSL_free
+#define SSL_new hts_ptrfunc_SSL_new
+#define SSL_clear hts_ptrfunc_SSL_clear
+#define SSL_set_fd hts_ptrfunc_SSL_set_fd
+#define SSL_set_connect_state hts_ptrfunc_SSL_set_connect_state
+#define SSL_connect hts_ptrfunc_SSL_connect
+#define SSL_get_error hts_ptrfunc_SSL_get_error
+#define SSL_write hts_ptrfunc_SSL_write
+#define SSL_read hts_ptrfunc_SSL_read
+#define SSL_library_init hts_ptrfunc_SSL_library_init
+#define ERR_load_crypto_strings hts_ptrfunc_ERR_load_crypto_strings
+#define ERR_load_SSL_strings hts_ptrfunc_ERR_load_SSL_strings
+#define SSLv23_client_method hts_ptrfunc_SSLv23_client_method
+#define SSL_CTX_new hts_ptrfunc_SSL_CTX_new
+#define ERR_error_string hts_ptrfunc_ERR_error_string
+#define SSL_load_error_strings hts_ptrfunc_SSL_load_error_strings
+#define SSL_CTX_ctrl hts_ptrfunc_SSL_CTX_ctrl
+/* */
+typedef void SSL_CTX;
+typedef void* SSL;
+typedef void SSL_METHOD;
+typedef int (*t_SSL_shutdown)(SSL *ssl);
+typedef void (*t_SSL_free)(SSL *ssl);
+typedef SSL (*t_SSL_new)(SSL_CTX *ctx);
+typedef int (*t_SSL_clear)(SSL *ssl);
+typedef int (*t_SSL_set_fd)(SSL *ssl, int fd);
+typedef void (*t_SSL_set_connect_state)(SSL *ssl);
+typedef int (*t_SSL_connect)(SSL *ssl);
+typedef int (*t_SSL_get_error)(SSL *ssl, int ret);
+typedef int (*t_SSL_write)(SSL *ssl, const void *buf, int num);
+typedef int (*t_SSL_read)(SSL *ssl, void *buf, int num);
+typedef int (*t_SSL_library_init)(void);
+typedef void (*t_ERR_load_crypto_strings)(void);
+typedef void (*t_ERR_load_SSL_strings)(void);
+typedef SSL_METHOD * (*t_SSLv23_client_method)(void);
+typedef SSL_CTX * (*t_SSL_CTX_new)(SSL_METHOD *method);
+typedef char * (*t_ERR_error_string)(unsigned long e, char *buf);
+typedef void (*t_SSL_load_error_strings)(void);
+typedef long (*t_SSL_CTX_ctrl)(SSL_CTX *ctx, int cmd, long larg, char *parg);
+extern int SSL_is_available;
+extern t_SSL_shutdown SSL_shutdown;
+extern t_SSL_free SSL_free;
+extern t_SSL_new SSL_new;
+extern t_SSL_clear SSL_clear;
+extern t_SSL_set_fd SSL_set_fd;
+extern t_SSL_set_connect_state SSL_set_connect_state;
+extern t_SSL_connect SSL_connect;
+extern t_SSL_get_error SSL_get_error;
+extern t_SSL_write SSL_write;
+extern t_SSL_read SSL_read;
+extern t_SSL_library_init SSL_library_init;
+extern t_ERR_load_crypto_strings ERR_load_crypto_strings;
+extern t_ERR_load_SSL_strings ERR_load_SSL_strings;
+extern t_SSLv23_client_method SSLv23_client_method;
+extern t_SSL_CTX_new SSL_CTX_new;
+extern t_ERR_error_string ERR_error_string;
+extern t_SSL_load_error_strings SSL_load_error_strings;
+extern t_SSL_CTX_ctrl SSL_CTX_ctrl;
+/*
+From /usr/include/openssl/ssl.h
+*/
+#define SSL_ERROR_NONE 0
+#define SSL_ERROR_SSL 1
+#define SSL_ERROR_WANT_READ 2
+#define SSL_ERROR_WANT_WRITE 3
+#define SSL_ERROR_WANT_X509_LOOKUP 4
+#define SSL_ERROR_SYSCALL 5 /* look at error stack/return value/errno */
+#define SSL_ERROR_ZERO_RETURN 6
+#define SSL_ERROR_WANT_CONNECT 7
+#define SSL_OP_ALL 0x000FFFFFL
+#define SSL_CTRL_OPTIONS 32
+#define SSL_CTX_set_options(ctx,op) \
+ SSL_CTX_ctrl(ctx,SSL_CTRL_OPTIONS,op,NULL)
+
//#include <openssl/bio.h>
#ifdef __cplusplus
};
diff --git a/src/htsbauth.c b/src/htsbauth.c
index a1506c1..23a22af 100644
--- a/src/htsbauth.c
+++ b/src/htsbauth.c
@@ -86,20 +86,20 @@ int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,ch
}
}
// construction du cookie
- strcpy(cook,domain);
- strcat(cook,"\t");
- strcat(cook,"TRUE");
- strcat(cook,"\t");
- strcat(cook,path);
- strcat(cook,"\t");
- strcat(cook,"FALSE");
- strcat(cook,"\t");
- strcat(cook,"1999999999");
- strcat(cook,"\t");
- strcat(cook,cook_name);
- strcat(cook,"\t");
- strcat(cook,cook_value);
- strcat(cook,"\n");
+ strcpybuff(cook,domain);
+ strcatbuff(cook,"\t");
+ strcatbuff(cook,"TRUE");
+ strcatbuff(cook,"\t");
+ strcatbuff(cook,path);
+ strcatbuff(cook,"\t");
+ strcatbuff(cook,"FALSE");
+ strcatbuff(cook,"\t");
+ strcatbuff(cook,"1999999999");
+ strcatbuff(cook,"\t");
+ strcatbuff(cook,cook_name);
+ strcatbuff(cook,"\t");
+ strcatbuff(cook,cook_value);
+ strcatbuff(cook,"\n");
if (!( ((int) strlen(cookie->data) + (int) strlen(cook)) < cookie->max_len)) return -1; // impossible d'ajouter
cookie_insert(insert,cook);
#if DEBUG_COOK
@@ -179,8 +179,8 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
WIN32_FIND_DATA find;
HANDLE h;
char pth[MAX_PATH + 32];
- strcpy(pth,fpath);
- strcat(pth,"*@*.txt");
+ strcpybuff(pth,fpath);
+ strcatbuff(pth,"*@*.txt");
h = FindFirstFile(pth,&find);
if (h != INVALID_HANDLE_VALUE) {
do {
@@ -233,10 +233,10 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) {
char path[256]; // chemin (/)
char cook_name[256]; // nom cookie (MYCOOK)
char cook_value[8192]; // valeur (ID=toto,S=1234)
- strcpy(domain,cookie_get(line,0)); // host
- strcpy(path,cookie_get(line,2)); // path
- strcpy(cook_name,cookie_get(line,5)); // name
- strcpy(cook_value,cookie_get(line,6)); // value
+ strcpybuff(domain,cookie_get(line,0)); // host
+ strcpybuff(path,cookie_get(line,2)); // path
+ strcpybuff(cook_name,cookie_get(line,5)); // name
+ strcpybuff(cook_value,cookie_get(line,6)); // value
#if DEBUG_COOK
printf("%s\n",line);
#endif
@@ -277,14 +277,14 @@ int cookie_save(t_cookie* cookie,char* name) {
void cookie_insert(char* s,char* ins) {
char* buff;
if (strnotempty(s)==0) { // rien à faire, juste concat
- strcat(s,ins);
+ strcatbuff(s,ins);
} else {
- buff=(char*) malloc(strlen(s)+2);
+ buff=(char*) malloct(strlen(s)+2);
if (buff) {
- strcpy(buff,s); // copie temporaire
- strcpy(s,ins); // insérer
- strcat(s,buff); // copier
- free(buff);
+ strcpybuff(buff,s); // copie temporaire
+ strcpybuff(s,ins); // insérer
+ strcatbuff(s,buff); // copier
+ freet(buff);
}
}
}
@@ -294,11 +294,11 @@ void cookie_delete(char* s,int pos) {
if (strnotempty(s+pos)==0) { // rien à faire, effacer
s[0]='\0';
} else {
- buff=(char*) malloc(strlen(s+pos)+2);
+ buff=(char*) malloct(strlen(s+pos)+2);
if (buff) {
- strcpy(buff,s+pos); // copie temporaire
- strcpy(s,buff); // copier
- free(buff);
+ strcpybuff(buff,s+pos); // copie temporaire
+ strcpybuff(s,buff); // copier
+ freet(buff);
}
}
}
@@ -329,7 +329,7 @@ char* cookie_get(char* cookie_base,int param) {
char* a = cookie_base;
while( (*a) && (*a!='\t') && (*a!='\n')) a++;
buffer[0]='\0';
- strncat(buffer,cookie_base,(int) (a - cookie_base));
+ strncatbuff(buffer,cookie_base,(int) (a - cookie_base));
return buffer;
} else
return "";
@@ -357,8 +357,8 @@ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) {
if (chain->next) {
chain=chain->next;
chain->next=NULL;
- strcpy(chain->auth,auth);
- strcpy(chain->prefix,prefix);
+ strcpybuff(chain->auth,auth);
+ strcpybuff(chain->prefix,prefix);
return 1;
}
}
@@ -388,8 +388,8 @@ char* bauth_prefix(char* adr,char* fil) {
char* prefix;
char* a;
NOSTATIC_RESERVE(prefix, char, HTS_URLMAXSIZE*2);
- strcpy(prefix,jump_identification(adr));
- strcat(prefix,fil);
+ strcpybuff(prefix,jump_identification(adr));
+ strcatbuff(prefix,fil);
a=strchr(prefix,'?');
if (a) *a='\0';
if (strchr(prefix,'/')) {
diff --git a/src/htscache.c b/src/htscache.c
index da8791e..b90fa67 100644
--- a/src/htscache.c
+++ b/src/htscache.c
@@ -91,8 +91,14 @@ with
<int>(size)
<string>(msg)
<string>(contenttype)
+ <string>(charset) [version 3]
<string>(last-modified)
<string>(Etag)
+ <string>location
+ <string>Content-disposition [version 2]
+ <string>hostname [version 4]
+ <string>URI filename [version 4]
+ <string>local filename [version 4]
[<string>"SD" <string>(supplemental data)]
[<string>"SD" <string>(supplemental data)]
...
@@ -191,16 +197,22 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
// Construction header
ok=0;
- if (cache_wint(cache_dat,r.statuscode)!=-1) // statuscode
- if (cache_wLLint(cache_dat,r.size)!=-1) // size
- if (cache_wstr(cache_dat,r.msg)!=-1) // msg
- if (cache_wstr(cache_dat,r.contenttype)!=-1) // contenttype
- if (cache_wstr(cache_dat,r.lastmodified)!=-1) // last-modified
- if (cache_wstr(cache_dat,r.etag)!=-1) // Etag
- if (cache_wstr(cache_dat,(r.location!=NULL)?r.location:"")!=-1) // 'location' pour moved
- if (cache_wstr(cache_dat,r.cdispo)!=-1) // Content-disposition
- if (cache_wstr(cache_dat,"HTS")!=-1) // end of header
+ if (cache_wint(cache_dat,r.statuscode) != -1 // statuscode
+ && cache_wLLint(cache_dat,r.size) != -1 // size
+ && cache_wstr(cache_dat,r.msg) != -1 // msg
+ && cache_wstr(cache_dat,r.contenttype) != -1 // contenttype
+ && cache_wstr(cache_dat,r.charset) != -1 // contenttype
+ && cache_wstr(cache_dat,r.lastmodified) != -1 // last-modified
+ && cache_wstr(cache_dat,r.etag) != -1 // Etag
+ && cache_wstr(cache_dat,(r.location!=NULL)?r.location:"") != -1 // 'location' pour moved
+ && cache_wstr(cache_dat,r.cdispo) != -1 // Content-disposition
+ && cache_wstr(cache_dat,url_adr) != -1 // Original address
+ && cache_wstr(cache_dat,url_fil) != -1 // Original URI filename
+ && cache_wstr(cache_dat,url_save) != -1 // Original save filename
+ && cache_wstr(cache_dat,"HTS") != -1 // end of header
+ ) {
ok=1; /* ok */
+ }
// Fin construction header
/*if ((int) fwrite((char*) &r,1,sizeof(htsblk),cache_dat) == sizeof(htsblk)) {*/
@@ -212,7 +224,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
} else if (r.is_write==0) { // en mémoire, recopie directe
if (cache_wLLint(cache_dat,r.size)!=-1) {
if (r.size>0) { // taille>0
- if ((INTsys) fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size)
+ if (fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size)
ok=0;
} else // taille=0, ne rien écrire
ok=0;
@@ -227,11 +239,11 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
fp=fopen(fconv(url_save),"rb");
if (fp!=NULL) {
char buff[32768];
- int nl;
+ INTsys nl;
do {
nl=fread(buff,1,32768,fp);
if (nl>0) {
- if ((INTsys) fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur
+ if ((INTsys)fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur
nl=-1;
ok=0;
}
@@ -254,7 +266,7 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
// index
// adresse+cr+fichier+cr
if (ok) {
- buff[0]='\0'; strcat(buff,url_adr); strcat(buff,"\n"); strcat(buff,url_fil); strcat(buff,"\n");
+ buff[0]='\0'; strcatbuff(buff,url_adr); strcatbuff(buff,"\n"); strcatbuff(buff,url_fil); strcatbuff(buff,"\n");
cache_wstr(cache_ndx,buff);
fwrite(s,1,strlen(s),cache_ndx);
} // si ok=0 on a peut être écrit des données pour rien mais on s'en tape
@@ -264,9 +276,18 @@ void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_n
}
+htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) {
+ return cache_readex(opt,cache,adr,fil,save,location,NULL,0);
+}
+
+htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) {
+ return cache_readex(opt,cache,adr,fil,save,location,NULL,1);
+}
+
// lecture d'un fichier dans le cache
// si save==null alors test unqiquement
-htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save) {
+htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,
+ char* return_save, int readonly) {
#if HTS_FAST_CACHE
long int hash_pos;
int hash_pos_return;
@@ -274,30 +295,46 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
char* a;
#endif
char buff[HTS_URLMAXSIZE*2];
- char location[HTS_URLMAXSIZE*2];
+ char location_default[HTS_URLMAXSIZE*2];
+ char previous_save[HTS_URLMAXSIZE*2];
htsblk r;
int ok=0;
int header_only=0;
- memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; strcpy(location,""); r.location=location;
+ memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET;
+ if (location) {
+ r.location = location;
+ } else {
+ r.location = location_default;
+ }
+ strcpybuff(r.location, "");
#if HTS_FAST_CACHE
- strcpy(buff,adr); strcat(buff,fil);
+ strcpybuff(buff,adr); strcatbuff(buff,fil);
hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos);
#else
- buff[0]='\0'; strcat(buff,"\n"); strcat(buff,adr); strcat(buff,"\n"); strcat(buff,fil); strcat(buff,"\n");
+ buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n");
if (cache->use)
a=strstr(cache->use,buff);
else
a=NULL; // forcer erreur
#endif
+ /* avoid errors on data entries */
+ if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') {
+#if HTS_FAST_CACHE
+ hash_pos_return = 0;
+#else
+ a = NULL;
+#endif
+ }
+
// en cas de succès
#if HTS_FAST_CACHE
if (hash_pos_return) {
#else
if (a!=NULL) { // OK existe en cache!
#endif
- int pos;
+ INTsys pos;
#if DEBUGCA
fprintf(stdout,"..cache: %s%s at ",adr,fil);
#endif
@@ -320,8 +357,8 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
if (fread((char*) &old_r,1,sizeof(old_r),cache->olddat)==sizeof(old_r)) { // lire tout (y compris statuscode etc)
r.statuscode=old_r.statuscode;
r.size=old_r.size; // taille fichier
- strcpy(r.msg,old_r.msg);
- strcpy(r.contenttype,old_r.contenttype);
+ strcpybuff(r.msg,old_r.msg);
+ strcpybuff(r.contenttype,old_r.contenttype);
ok=1; /* import ok */
}
/* */
@@ -335,11 +372,22 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
cache_rLLint(cache->olddat,&r.size);
cache_rstr(cache->olddat,r.msg);
cache_rstr(cache->olddat,r.contenttype);
+ if (cache->version >= 3)
+ cache_rstr(cache->olddat,r.charset);
cache_rstr(cache->olddat,r.lastmodified);
cache_rstr(cache->olddat,r.etag);
cache_rstr(cache->olddat,r.location);
if (cache->version >= 2)
cache_rstr(cache->olddat,r.cdispo);
+ if (cache->version >= 4) {
+ cache_rstr(cache->olddat, previous_save); // adr
+ cache_rstr(cache->olddat, previous_save); // fil
+ previous_save[0] = '\0';
+ cache_rstr(cache->olddat, previous_save); // save
+ if (return_save != NULL) {
+ strcpybuff(return_save, previous_save);
+ }
+ }
//
cache_rstr(cache->olddat,check);
if (strcmp(check,"HTS")==0) { /* intégrité OK */
@@ -377,14 +425,15 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
#if HTS_DIRECTDISK
// Court-circuit:
// Peut-on stocker le fichier directement sur disque?
- if ((r.statuscode==200) && (!is_hypertext_mime(r.contenttype)) && (strnotempty(save))) { // pas HTML, écrire sur disk directement
+ if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype) && strnotempty(save)) { // pas HTML, écrire sur disk directement
int ok=0;
r.is_write=1; // écrire
- if (fexist(antislash(save))) { // un fichier existe déja
- //if (fsize(antislash(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
+ if (fexist(fconv(save))) { // un fichier existe déja
+ //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree)
ok=1; // plus rien à faire
filenote(save,NULL); // noter comme connu
+ //xxusercommand(opt,0,NULL,save,adr,fil);
//}
}
@@ -393,11 +442,11 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
filecreateempty(save);
//
r.statuscode=-1;
- strcpy(r.msg,"File deleted by user not recaught");
+ strcpybuff(r.msg,"File deleted by user not recaught");
ok=1; // ne pas récupérer (et pas d'erreur)
} else {
r.statuscode=-1;
- strcpy(r.msg,"Previous cache file not found");
+ strcpybuff(r.msg,"Previous cache file not found");
ok=1; // ne pas récupérer
}
}
@@ -409,29 +458,30 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
#endif
if (r.out!=NULL) {
char buff[32768+4];
- LLint nl;
- LLint size;
- size=r.size;
- do {
- nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat);
- if (nl>0) {
- size-=nl;
- if ((INTsys) fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur
- r.statuscode=-1;
- strcpy(r.msg,"Cache Read Error : Read To Disk");
+ LLint size = r.size;
+ if (size > 0) {
+ INTsys nl;
+ do {
+ nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat);
+ if (nl>0) {
+ size-=nl;
+ if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache Read Error : Read To Disk");
+ }
}
- }
- } while((nl>0) && (size>0) && (r.statuscode!=-1));
+ } while((nl>0) && (size>0) && (r.statuscode!=-1));
+ }
fclose(r.out);
r.out=NULL;
#if HTS_WIN==0
chmod(save,HTS_ACCESS_FILE);
#endif
- usercommand(0,NULL,antislash(save));
+ //xxusercommand(opt,0,NULL,fconv(save), adr, fil);
} else {
r.statuscode=-1;
- strcpy(r.msg,"Cache Write Error : Unable to Create File");
+ strcpybuff(r.msg,"Cache Write Error : Unable to Create File");
//printf("%s\n",save);
}
}
@@ -440,24 +490,46 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
#endif
{ // lire en mémoire
- if (pos<0) { // Pas de donnée en cache, bizarre car html!!!
- r.statuscode=-1;
- strcpy(r.msg,"Previous cache file not found (2)");
+ if (pos<0) {
+ if (strnotempty(save)) { // Pas de donnée en cache, bizarre car html!!!
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Previous cache file not found (2)");
+ } else { /* Read in memory from cache */
+ if (strnotempty(return_save) && fexist(return_save)) {
+ FILE* fp = fopen(fconv(return_save), "rb");
+ if (fp != NULL) {
+ r.adr=(char*) malloct((INTsys)r.size + 4);
+ if (adr != NULL) {
+ if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Read error in cache disk data");
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Read error (memory exhausted) from cache");
+ }
+ fclose(fp);
+ }
+ } else {
+ r.statuscode=-1;
+ strcpybuff(r.msg,"Cache file not found on disk");
+ }
+ }
} else {
// lire fichier (d'un coup)
r.adr=(char*) malloct((INTsys)r.size+4);
if (r.adr!=NULL) {
- if ((INTsys) fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur
+ if (fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur
freet(r.adr);
r.adr=NULL;
r.statuscode=-1;
- strcpy(r.msg,"Cache Read Error : Read Data");
+ strcpybuff(r.msg,"Cache Read Error : Read Data");
} else
*(r.adr+r.size)='\0';
//printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode);
} else { // erreur
r.statuscode=-1;
- strcpy(r.msg,"Cache Memory Error");
+ strcpybuff(r.msg,"Cache Memory Error");
}
}
}
@@ -467,28 +539,31 @@ htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save
printf("Cache Read Error : Bad Data");
#endif
r.statuscode=-1;
- strcpy(r.msg,"Cache Read Error : Bad Data");
+ strcpybuff(r.msg,"Cache Read Error : Bad Data");
}
} else { // erreur
#if DEBUGCA
printf("Cache Read Error : Read Header");
#endif
r.statuscode=-1;
- strcpy(r.msg,"Cache Read Error : Read Header");
+ strcpybuff(r.msg,"Cache Read Error : Read Header");
}
} else {
#if DEBUGCA
printf("Cache Read Error : Seek Failed");
#endif
r.statuscode=-1;
- strcpy(r.msg,"Cache Read Error : Seek Failed");
+ strcpybuff(r.msg,"Cache Read Error : Seek Failed");
}
} else {
#if DEBUGCA
printf("File Cache Not Found");
#endif
r.statuscode=-1;
- strcpy(r.msg,"File Cache Not Found");
+ strcpybuff(r.msg,"File Cache Entry Not Found");
+ }
+ if (!location) { /* don't export internal buffer */
+ r.location = NULL;
}
return r;
}
@@ -504,12 +579,12 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char*
pos=ftell(cache_dat);
/* first write data */
if (cache_wint(cache_dat,len)!=-1) { // length
- if ((INTsys) fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data
+ if ((INTsys)fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data
/* then write index */
sprintf(s,"%d\n",pos);
- buff[0]='\0'; strcat(buff,str1); strcat(buff,"\n"); strcat(buff,str2); strcat(buff,"\n");
+ buff[0]='\0'; strcatbuff(buff,str1); strcatbuff(buff,"\n"); strcatbuff(buff,str2); strcatbuff(buff,"\n");
cache_wstr(cache_ndx,buff);
- if (fwrite(s,1,strlen(s),cache_ndx) == strlen(s)) {
+ if (fwrite(s,1,(INTsys)strlen(s),cache_ndx) == strlen(s)) {
fflush(cache_dat); fflush(cache_ndx);
return 1;
}
@@ -526,15 +601,15 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* in
if (cache->hashtable) {
char buff[HTS_URLMAXSIZE*4];
long int pos;
- strcpy(buff,str1); strcat(buff,str2);
+ strcpybuff(buff,str1); strcatbuff(buff,str2);
if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) {
if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) {
- int len;
+ INTsys len;
cache_rint(cache->olddat,&len);
if (len>0) {
char* mem_buff=(char*)malloct(len+4); /* Plus byte 0 */
if (mem_buff) {
- if ((int)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/
+ if ((INTsys)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/
*inbuff=mem_buff;
*inlen=len;
return 1;
@@ -552,17 +627,16 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* in
}
// renvoyer uniquement en tête, ou NULL si erreur
-htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil) {
- htsblk* r;
- NOSTATIC_RESERVE(r, htsblk, 1);
- *r=cache_read(opt,cache,adr,fil,NULL); // test uniquement
+// return NULL upon error, and set -1 to r.statuscode
+htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r) {
+ *r=cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement
if (r->statuscode != -1)
return r;
else
return NULL;
}
-
+
// Initialisation du cache: créer nouveau, renomer ancien, charger..
void cache_init(cache_back* cache,httrackp* opt) {
// ---
@@ -571,80 +645,91 @@ void cache_init(cache_back* cache,httrackp* opt) {
#if DEBUGCA
printf("cache init: ");
#endif
+ if (!cache->ro) {
#if HTS_WIN
- mkdir(fconcat(opt->path_log,"hts-cache"));
+ mkdir(fconcat(opt->path_log,"hts-cache"));
#else
- mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER);
+ mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER);
#endif
- if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
+ if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer
#if DEBUGCA
- printf("work with former cache\n");
+ printf("work with former cache\n");
#endif
- if (fexist(fconcat(opt->path_log,"hts-cache/old.dat")))
- remove(fconcat(opt->path_log,"hts-cache/old.dat"));
- if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))
- remove(fconcat(opt->path_log,"hts-cache/old.ndx"));
-
- rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat"));
- rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx"));
- } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/old.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/old.ndx"));
+
+ rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat"));
+ rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx"));
+ } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel
#if DEBUGCA
- printf("new cache\n");
+ printf("new cache\n");
#endif
- if (fexist(fconcat(opt->path_log,"hts-cache/new.dat")))
- remove(fconcat(opt->path_log,"hts-cache/new.dat"));
- if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))
- remove(fconcat(opt->path_log,"hts-cache/new.ndx"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.dat")))
+ remove(fconcat(opt->path_log,"hts-cache/new.dat"));
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))
+ remove(fconcat(opt->path_log,"hts-cache/new.ndx"));
+ }
}
// charger index cache précédent
- if ((fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/old.ndx")))) { // cache précédent
- if ((fsize(fconcat(opt->path_log,"hts-cache/old.dat"))>=0) && (fsize(fconcat(opt->path_log,"hts-cache/old.ndx"))>0)) {
- FILE* oldndx=NULL;
+ if (
+ (
+ !cache->ro &&
+ fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0
+ )
+ ||
+ (
+ cache->ro &&
+ fsize(fconcat(opt->path_log,"hts-cache/new.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/new.ndx")) > 0
+ )
+ ) {
+ FILE* oldndx=NULL;
#if DEBUGCA
- printf("..load cache\n");
+ printf("..load cache\n");
#endif
+ if (!cache->ro) {
cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/old.dat"),"rb");
oldndx=fopen(fconcat(opt->path_log,"hts-cache/old.ndx"),"rb");
- // les deux doivent être ouvrables
- if ((cache->olddat==NULL) && (oldndx!=NULL)) {
- fclose(oldndx);
- oldndx=NULL;
- }
- if ((cache->olddat!=NULL) && (oldndx==NULL)) {
- fclose(cache->olddat);
- cache->olddat=NULL;
- }
- // lire index
- if (oldndx!=NULL) {
- int buffl;
- fclose(oldndx); oldndx=NULL;
- // lire ndx, et lastmodified
+ } else {
+ cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"rb");
+ oldndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"rb");
+ }
+ // les deux doivent être ouvrables
+ if ((cache->olddat==NULL) && (oldndx!=NULL)) {
+ fclose(oldndx);
+ oldndx=NULL;
+ }
+ if ((cache->olddat!=NULL) && (oldndx==NULL)) {
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ }
+ // lire index
+ if (oldndx!=NULL) {
+ int buffl;
+ fclose(oldndx); oldndx=NULL;
+ // lire ndx, et lastmodified
+ if (!cache->ro) {
buffl=fsize(fconcat(opt->path_log,"hts-cache/old.ndx"));
cache->use=readfile(fconcat(opt->path_log,"hts-cache/old.ndx"));
- if (cache->use!=NULL) {
- char firstline[256];
- char* a=cache->use;
- a+=cache_brstr(a,firstline);
- if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
- if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
- cache->version=(int)(firstline[8]-'0'); // cache 1.x
- if (cache->version <= 2) {
- a+=cache_brstr(a,firstline);
- strcpy(cache->lastmodified,firstline);
- } else {
- if (opt->errlog) {
- fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
- fflush(opt->errlog);
- }
- fclose(cache->olddat);
- cache->olddat=NULL;
- freet(cache->use);
- cache->use=NULL;
- }
- } else { // non supporté
+ } else {
+ buffl=fsize(fconcat(opt->path_log,"hts-cache/new.ndx"));
+ cache->use=readfile(fconcat(opt->path_log,"hts-cache/new.ndx"));
+ }
+ if (cache->use!=NULL) {
+ char firstline[256];
+ char* a=cache->use;
+ a+=cache_brstr(a,firstline);
+ if (strncmp(firstline,"CACHE-",6)==0) { // Nouvelle version du cache
+ if (strncmp(firstline,"CACHE-1.",8)==0) { // Version 1.1x
+ cache->version=(int)(firstline[8]-'0'); // cache 1.x
+ if (cache->version <= 4) {
+ a+=cache_brstr(a,firstline);
+ strcpybuff(cache->lastmodified,firstline);
+ } else {
if (opt->errlog) {
- fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version);
fflush(opt->errlog);
}
fclose(cache->olddat);
@@ -652,126 +737,141 @@ void cache_init(cache_back* cache,httrackp* opt) {
freet(cache->use);
cache->use=NULL;
}
- /* */
- } else { // Vieille version du cache
- /* */
- if (opt->log) {
- fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF);
- fflush(opt->log);
+ } else { // non supporté
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline);
+ fflush(opt->errlog);
}
- cache->version=0; // cache 1.0
- strcpy(cache->lastmodified,firstline);
+ fclose(cache->olddat);
+ cache->olddat=NULL;
+ freet(cache->use);
+ cache->use=NULL;
}
- opt->is_update=1; // signaler comme update
-
- /* Create hash table for the cache (MUCH FASTER!) */
+ /* */
+ } else { // Vieille version du cache
+ /* */
+ if (opt->log) {
+ fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF);
+ fflush(opt->log);
+ }
+ cache->version=0; // cache 1.0
+ strcpybuff(cache->lastmodified,firstline);
+ }
+ opt->is_update=1; // signaler comme update
+
+ /* Create hash table for the cache (MUCH FASTER!) */
#if HTS_FAST_CACHE
- if (cache->use) {
- char line[HTS_URLMAXSIZE*2];
- char linepos[256];
- int pos;
- while ( (a!=NULL) && (a < (cache->use+buffl) ) ) {
- a=strchr(a+1,'\n'); /* start of line */
- if (a) {
- a++;
- /* read "host/file" */
- a+=binput(a,line,HTS_URLMAXSIZE);
- a+=binput(a,line+strlen(line),HTS_URLMAXSIZE);
- /* read position */
- a+=binput(a,linepos,200);
- sscanf(linepos,"%d",&pos);
- inthash_add((inthash)cache->hashtable,line,pos);
- }
+ if (cache->use) {
+ char line[HTS_URLMAXSIZE*2];
+ char linepos[256];
+ int pos;
+ while ( (a!=NULL) && (a < (cache->use+buffl) ) ) {
+ a=strchr(a+1,'\n'); /* start of line */
+ if (a) {
+ a++;
+ /* read "host/file" */
+ a+=binput(a,line,HTS_URLMAXSIZE);
+ a+=binput(a,line+strlen(line),HTS_URLMAXSIZE);
+ /* read position */
+ a+=binput(a,linepos,200);
+ sscanf(linepos,"%d",&pos);
+ inthash_add((inthash)cache->hashtable,line,pos);
}
- /* Not needed anymore! */
- freet(cache->use);
- cache->use=NULL;
}
-#endif
+ /* Not needed anymore! */
+ freet(cache->use);
+ cache->use=NULL;
}
+#endif
}
+ }
} // taille cache>0
- } // cache precedent existe
-
+
#if DEBUGCA
- printf("..create cache\n");
+ printf("..create cache\n");
#endif
- // ouvrir caches actuels
- cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
- cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
- // les deux doivent être ouvrables
- if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
- fclose(cache->ndx);
- cache->ndx=NULL;
- }
- if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
- fclose(cache->dat);
- cache->dat=NULL;
- }
-
- if (cache->ndx!=NULL) {
- char s[256];
-
- cache_wstr(cache->dat,"CACHE-1.2");
- fflush(cache->dat);
- cache_wstr(cache->ndx,"CACHE-1.2");
- fflush(cache->ndx);
- //
- time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
- cache_wstr(cache->ndx,s);
- fflush(cache->ndx); // un petit fflush au cas où
-
- // supprimer old.lst
- if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
- remove(fconcat(opt->path_log,"hts-cache/old.lst"));
- // renommer
- if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
- rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
- // ouvrir
- cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
- {
- filecreate_params tmp;
- strcpy(tmp.path,opt->path_html); // chemin
- tmp.lst=cache->lst; // fichier lst
- filenote("",&tmp); // initialiser filecreate
- }
-
- // supprimer old.txt
- if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
- remove(fconcat(opt->path_log,"hts-cache/old.txt"));
- // renommer
- if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
- rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
- // ouvrir
- cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
- if (cache->txt) {
- fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
- fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ if (!cache->ro) {
+ // ouvrir caches actuels
+ structcheck(fconcat(opt->path_log, "hts-cache/"));
+ cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb");
+ cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb");
+ // les deux doivent être ouvrables
+ if ((cache->dat==NULL) && (cache->ndx!=NULL)) {
+ fclose(cache->ndx);
+ cache->ndx=NULL;
+ }
+ if ((cache->dat!=NULL) && (cache->ndx==NULL)) {
+ fclose(cache->dat);
+ cache->dat=NULL;
+ }
+
+ if (cache->ndx!=NULL) {
+ char s[256];
+
+ cache_wstr(cache->dat,"CACHE-1.4");
+ fflush(cache->dat);
+ cache_wstr(cache->ndx,"CACHE-1.4");
+ fflush(cache->ndx);
+ //
+ time_gmt_rfc822(s); // date et heure actuelle GMT pour If-Modified-Since..
+ cache_wstr(cache->ndx,s);
+ fflush(cache->ndx); // un petit fflush au cas où
+
+ // supprimer old.lst
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.lst")))
+ remove(fconcat(opt->path_log,"hts-cache/old.lst"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.lst")))
+ rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst"));
+ // ouvrir
+ cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb");
+ {
+ filecreate_params tmp;
+ strcpybuff(tmp.path,opt->path_html); // chemin
+ tmp.lst=cache->lst; // fichier lst
+ filenote("",&tmp); // initialiser filecreate
+ }
+
+ // supprimer old.txt
+ if (fexist(fconcat(opt->path_log,"hts-cache/old.txt")))
+ remove(fconcat(opt->path_log,"hts-cache/old.txt"));
+ // renommer
+ if (fexist(fconcat(opt->path_log,"hts-cache/new.txt")))
+ rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt"));
+ // ouvrir
+ cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb");
+ if (cache->txt) {
+ fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t");
+ fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF);
+ }
+
+ // test
+ // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
+ }
+
+ } else {
+ cache->lst = cache->dat = cache->ndx = NULL;
}
-
- // test
- // cache_writedata(cache->ndx,cache->dat,"//[TEST]//","test1","TEST PIPO",9);
- }
-
+
}
}
-
-
-
+
+
+
// lire un fichier.. (compatible \0)
char* readfile(char* fil) {
char* adr=NULL;
- int len=0;
+ INTsys len=0;
len=fsize(fil);
- if (len>0) { // existe
+ if (len >= 0) { // exists
FILE* fp;
fp=fopen(fconv(fil),"rb");
if (fp!=NULL) { // n'existe pas (!)
adr=(char*) malloct(len+1);
if (adr!=NULL) {
- if ((int) fread(adr,1,len,fp)!=len) { // fichier endommagé ?
+ if (len > 0 && (INTsys)fread(adr,1,len,fp) != len) { // fichier endommagé ?
freet(adr);
adr=NULL;
} else
@@ -794,7 +894,7 @@ char* readfile_or(char* fil,char* defaultdata) {
else {
char *adr=malloct(strlen(defaultdata)+2);
if (adr) {
- strcpy(adr,defaultdata);
+ strcpybuff(adr,defaultdata);
return adr;
}
}
@@ -804,22 +904,24 @@ char* readfile_or(char* fil,char* defaultdata) {
// écriture/lecture d'une chaîne sur un fichier
// -1 : erreur, sinon 0
int cache_wstr(FILE* fp,char* s) {
- int i;
+ INTsys i;
char buff[256+4];
i=strlen(s);
- sprintf(buff,"%d\n",i);
- if (fwrite(buff,1,strlen(buff),fp) != strlen(buff))
+ sprintf(buff,INTsysP "\n",i);
+ if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff))
return -1;
if (i>0)
- if ((int) fwrite(s,1,i,fp) != i)
+ if ((INTsys)fwrite(s,1,i,fp) != i)
return -1;
return 0;
}
void cache_rstr(FILE* fp,char* s) {
- int i;
+ INTsys i;
char buff[256+4];
linput(fp,buff,256);
- sscanf(buff,"%d",&i);
+ sscanf(buff,INTsysP,&i);
+ if (i < 0 || i > 32768) /* error, something nasty happened */
+ i=0;
if (i>0)
fread(s,1,i,fp);
*(s+i)='\0';
diff --git a/src/htscache.h b/src/htscache.h
index 08069d1..ef897f1 100644
--- a/src/htscache.h
+++ b/src/htscache.h
@@ -45,8 +45,10 @@ Please visit our Website: http://www.httrack.com
// cache
void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save);
void cache_add(htsblk r,char* url_adr,char* url_fil,char* url_save,FILE* cache_ndx,FILE* cache_dat,int all_in_cache);
-htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save);
-htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil);
+htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location);
+htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location);
+htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,char* return_save,int readonly);
+htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r);
void cache_init(cache_back* cache,httrackp* opt);
int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len);
diff --git a/src/htscatchurl.c b/src/htscatchurl.c
index c119677..8455ea0 100644
--- a/src/htscatchurl.c
+++ b/src/htscatchurl.c
@@ -67,7 +67,7 @@ Please visit our Website: http://www.httrack.com
// 0- Init the URL catcher with standard port
// catch_url_init(&port,&return_host);
-T_SOC catch_url_init_std(int* port_prox,char* adr_prox) {
+HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox) {
T_SOC soc;
int try_to_listen_to[]={8080,3128,80,81,82,8081,3129,31337,0,-1};
int i=0;
@@ -83,28 +83,10 @@ T_SOC catch_url_init_std(int* port_prox,char* adr_prox) {
// 1- Init the URL catcher
// catch_url_init(&port,&return_host);
-T_SOC catch_url_init(int* port,char* adr) {
+HTSEXT_API T_SOC catch_url_init(int* port,char* adr) {
T_SOC soc = INVALID_SOCKET;
char h_loc[256+2];
- /*
-#ifdef _WIN32
- {
- WORD wVersionRequested;
- WSADATA wsadata;
- int stat;
- wVersionRequested = 0x0101;
- stat = WSAStartup( wVersionRequested, &wsadata );
- if (stat != 0) {
- return INVALID_SOCKET;
- } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
- WSACleanup();
- return INVALID_SOCKET;
- }
- }
-#endif
- */
-
if (gethostname(h_loc,256)==0) { // host name
SOCaddr server;
int server_size=sizeof(server);
@@ -132,7 +114,7 @@ T_SOC catch_url_init(int* port,char* adr) {
if (listen(soc,10)>=0) { // au pif le 10
SOCaddr_inetntoa(adr, 128, server2, len);
} else {
-#if _WIN32
+#ifdef _WIN32
closesocket(soc);
#else
close(soc);
@@ -142,7 +124,7 @@ T_SOC catch_url_init(int* port,char* adr) {
} else {
-#if _WIN32
+#ifdef _WIN32
closesocket(soc);
#else
close(soc);
@@ -152,7 +134,7 @@ T_SOC catch_url_init(int* port,char* adr) {
} else {
-#if _WIN32
+#ifdef _WIN32
closesocket(soc);
#else
close(soc);
@@ -171,7 +153,7 @@ T_SOC catch_url_init(int* port,char* adr) {
// returns 0 if error
// url: buffer where URL must be stored - or ip:port in case of failure
// data: 32Kb
-int catch_url(T_SOC soc,char* url,char* method,char* data) {
+HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) {
int retour=0;
// connexion (accept)
@@ -234,11 +216,11 @@ int catch_url(T_SOC soc,char* url,char* method,char* data) {
while(strnotempty(line)) {
socinput(soc,line,1000);
treathead(NULL,NULL,NULL,&blkretour,line); // traiter
- strcat(data,line);
- strcat(data,"\r\n");
+ strcatbuff(data,line);
+ strcatbuff(data,"\r\n");
}
// CR/LF final de l'en tête inutile car déja placé via la ligne vide juste au dessus
- //strcat(data,"\r\n");
+ //strcatbuff(data,"\r\n");
if (blkretour.totalsize>0) {
int len=(int)min(blkretour.totalsize,32000);
int pos=strlen(data);
diff --git a/src/htscatchurl.h b/src/htscatchurl.h
index 77036fd..a2514ef 100644
--- a/src/htscatchurl.h
+++ b/src/htscatchurl.h
@@ -43,9 +43,11 @@ Please visit our Website: http://www.httrack.com
// Fonctions
void socinput(T_SOC soc,char* s,int max);
-T_SOC catch_url_init_std(int* port_prox,char* adr_prox);
-T_SOC catch_url_init(int* port,char* adr);
-int catch_url(T_SOC soc,char* url,char* method,char* data);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox);
+HTSEXT_API T_SOC catch_url_init(int* port,char* adr);
+HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data);
+#endif
#define CATCH_RESPONSE \
"HTTP/1.0 200 OK\r\n"\
diff --git a/src/htscore.c b/src/htscore.c
index 1b9db7a..ba1e226 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -51,30 +51,40 @@ Please visit our Website: http://www.httrack.com
#include "htsmd5.h"
#include "htsindex.h"
+/* external modules */
+#include "htsmodules.h"
+
// htswrap_add
#include "htswrap.h"
+
+// parser
+#include "htsparse.h"
+
/* END specific definitions */
/* HTML parsing */
#if HTS_ANALYSTE
-t_hts_htmlcheck_init hts_htmlcheck_init;
-t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
-t_hts_htmlcheck_start hts_htmlcheck_start;
-t_hts_htmlcheck_end hts_htmlcheck_end;
-t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
-t_hts_htmlcheck hts_htmlcheck;
-t_hts_htmlcheck_query hts_htmlcheck_query;
-t_hts_htmlcheck_query2 hts_htmlcheck_query2;
-t_hts_htmlcheck_query3 hts_htmlcheck_query3;
-t_hts_htmlcheck_loop hts_htmlcheck_loop;
-t_hts_htmlcheck_check hts_htmlcheck_check;
-t_hts_htmlcheck_pause hts_htmlcheck_pause;
-t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
-t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
-t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
-t_hts_htmlcheck_savename hts_htmlcheck_savename;
+t_hts_htmlcheck_init hts_htmlcheck_init = NULL;
+t_hts_htmlcheck_uninit hts_htmlcheck_uninit = NULL;
+t_hts_htmlcheck_start hts_htmlcheck_start = NULL;
+t_hts_htmlcheck_end hts_htmlcheck_end = NULL;
+t_hts_htmlcheck_chopt hts_htmlcheck_chopt = NULL;
+t_hts_htmlcheck hts_htmlcheck = NULL;
+t_hts_htmlcheck_query hts_htmlcheck_query = NULL;
+t_hts_htmlcheck_query2 hts_htmlcheck_query2 = NULL;
+t_hts_htmlcheck_query3 hts_htmlcheck_query3 = NULL;
+t_hts_htmlcheck_loop hts_htmlcheck_loop = NULL;
+t_hts_htmlcheck_check hts_htmlcheck_check = NULL;
+t_hts_htmlcheck_pause hts_htmlcheck_pause = NULL;
+t_hts_htmlcheck_filesave hts_htmlcheck_filesave = NULL;
+t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected = NULL;
+t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus = NULL;
+t_hts_htmlcheck_savename hts_htmlcheck_savename = NULL;
+t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead = NULL;
+t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead = NULL;
+
char _hts_errmsg[1100]="";
int _hts_in_html_parsing=0;
@@ -84,6 +94,10 @@ int _hts_setpause=0;
//httrackp* _hts_setopt=NULL;
char** _hts_addurl=NULL;
+/* external modules */
+extern int hts_parse_externals(htsmoduleStruct* str);
+extern void htspe_init(void);
+
//
int _hts_cancel=0;
#endif
@@ -163,7 +177,7 @@ hts_htmlcheck_end(); \
#define HTMLCHECK_UNINIT
#endif
-#define XH_extuninit { \
+#define XH_extuninit do { \
int i; \
HTMLCHECK_UNINIT \
if (liens!=NULL) { \
@@ -187,7 +201,7 @@ hts_htmlcheck_end(); \
if (back) { \
int i; \
for(i=0;i<back_max;i++) { \
- back_delete(back,i); \
+ back_delete(&opt,back,i); \
} \
freet(back); back=NULL; \
} \
@@ -208,9 +222,9 @@ hts_htmlcheck_end(); \
if (template_header) { freet(template_header); template_header=NULL; } \
if (template_body) { freet(template_body); template_body=NULL; } \
if (template_footer) { freet(template_footer); template_footer=NULL; } \
- structcheck_init(-1); \
-}
-#define XH_uninit XH_extuninit if (r.adr) { freet(r.adr); r.adr=NULL; }
+ /*structcheck_init(-1);*/ \
+} while(0)
+#define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0)
// Enregistrement d'un lien:
// on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité)
@@ -218,14 +232,13 @@ hts_htmlcheck_end(); \
// enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite
// codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
// FA,FS: former_adr et former_fil, lien original
-#define REALLOC_SIZE 8192
#if HTS_HASH
#define liens_record_sav_len(A)
#else
#define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
#endif
-#define liens_record(A,F,S,FA,FF) { \
+#define liens_record(A,F,S,FA,FF,NORM) { \
int notecode=0; \
int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
adr_len=strlen(A),\
@@ -257,179 +270,21 @@ liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
liens[lien_tot]->cod=NULL; \
-if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpy(liens[lien_tot]->cod,codebase); } \
+if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
if (former_adr_len>0) {\
liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
-strcpy(liens[lien_tot]->former_adr,FA); \
-strcpy(liens[lien_tot]->former_fil,FF); \
+strcpybuff(liens[lien_tot]->former_adr,FA); \
+strcpybuff(liens[lien_tot]->former_fil,FF); \
}\
-strcpy(liens[lien_tot]->adr,A); \
-strcpy(liens[lien_tot]->fil,F); \
-strcpy(liens[lien_tot]->sav,S); \
+strcpybuff(liens[lien_tot]->adr,A); \
+strcpybuff(liens[lien_tot]->fil,F); \
+strcpybuff(liens[lien_tot]->sav,S); \
liens_record_sav_len(liens[lien_tot]); \
-hash_write(&hash,lien_tot); \
+hash_write(hashptr,lien_tot,NORM); \
} \
}
-/* - abandonné (simplifie) -
-// Ajouter à un lien EXISTANT deux champs former_adr et former_fil pour indiquer le nom d'un fichier avant un "move"
-// NOTE: si un alloc est fait ici il n'y aura pas de freet() à la fin, tant pis (firstbloc)
-#define liens_add_former(index,A,F) { \
-int adr_len=strlen(A),fil_len=strlen(F); \
-adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN+4; \
-if ((int) lien_size < (int) (adr_len+fil_len)) { \
-lien_buffer=(char*) calloct(add_tab_alloc,1); \
-lien_size=add_tab_alloc; \
-} \
-if (lien_buffer!=NULL) { \
-if (liens[lien_tot]!=NULL) { \
-liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
-liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
-strcpy(liens[lien_tot]->former_adr,A); \
-strcpy(liens[lien_tot]->former_fil,F); \
-} \
-} \
-}
-*/
-
-#if 0
-#define HT_ADD_ADR { \
- fwrite(lastsaved,1,((int) (adr - lastsaved)),fp); \
- lastsaved=adr; }
-#define HT_ADD(A) fwrite(A,1,(int) strlen(A),fp);
-#define HT_ADD_START
-#define HT_ADD_END if (fp) { fclose(fp); fp=NULL; }
-#define HT_ADD_FOP { \
- fp=filecreate(savename); \
- if (fp==NULL) { \
- if (opt.errlog) { \
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to create %s for %s%s"LF,savename,urladr,urlfil); \
- test_flush; \
- } \
- freet(r.adr); r.adr=NULL; \
- error=1; \
- } \
- }
-#else
-// version optimisée, qui permet de ne pas toucher aux html non modifiés (update)
-#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
- ht_size=(A)+ht_len+REALLOC_SIZE; \
- ht_buff=(char*) realloct(ht_buff,ht_size); \
- if (ht_buff==NULL) { \
- printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
- XH_uninit; \
- exit(1); \
- } \
- } \
- ht_len+=A;
-/*
-(Optimized)
-#define HT_ADD_ADR { int i,j=ht_len; HT_ADD_CHK(((int) adr)- ((int) lastsaved)) \
- for(i=0;i<((int) adr)- ((int) lastsaved);i++) \
- ht_buff[j+i]=lastsaved[i]; \
- ht_buff[j+((int) adr)- ((int) lastsaved)]='\0'; \
- lastsaved=adr; }
-*/
-#define HT_ADD_ADR \
- if ((opt.getmode & 1) && (ptr>0)) { \
- int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
- memcpy(ht_buff+j, lastsaved, i); \
- ht_buff[j+i]='\0'; \
- lastsaved=adr; \
- }
-/*
-(Optimized)
-#define HT_ADD(A) { HT_ADD_CHK(strlen(A)) strcat(ht_buff,A); }
-*/
-#define HT_ADD(A) \
- if ((opt.getmode & 1) && (ptr>0)) { \
- int i=strlen(A),j=ht_len; \
- if (i) { \
- HT_ADD_CHK(i) \
- memcpy(ht_buff+j, A, i); \
- ht_buff[j+i]='\0'; \
- } }
-#define HT_ADD_START \
- int ht_size=(int)(r.size*5)/4+REALLOC_SIZE; \
- int ht_len=0; \
- char* ht_buff=NULL; \
- if ((opt.getmode & 1) && (ptr>0)) { \
- ht_buff=(char*) malloct(ht_size); \
- if (ht_buff==NULL) { \
- printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
- XH_uninit; \
- exit(1); \
- } \
- ht_buff[0]='\0'; \
- }
-#define HT_ADD_END { \
- int ok=0;\
- if (ht_buff) { \
- int file_len=(int) strlen(ht_buff);\
- char digest[32+2];\
- digest[0]='\0';\
- domd5mem(ht_buff,file_len,digest,1);\
- if (fsize(antislash(savename))==file_len) { \
- int mlen;\
- char* mbuff;\
- cache_readdata(&cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
- if (mlen) mbuff[mlen]='\0';\
- if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
- ok=1;\
- if ( (opt.debug>1) && (opt.log!=NULL) ) {\
- fspc(opt.log,"debug"); fprintf(opt.log,"File not re-written (md5): %s"LF,savename);\
- test_flush;\
- }\
- } else {\
- ok=0;\
- } \
- }\
- if (!ok) { \
- fp=filecreate(savename); \
- if (fp) { \
- if (file_len>0) {\
- if ((int)fwrite(ht_buff,1,file_len,fp) != file_len) { \
- if (opt.errlog) { \
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to write HTML file %s"LF,savename);\
- test_flush;\
- }\
- }\
- }\
- fclose(fp); fp=NULL; \
- if (strnotempty(r.lastmodified)) \
- set_filetime_rfc822(savename,r.lastmodified); \
- usercommand(0,NULL,antislash(savename)); \
- } else {\
- if (opt.errlog) { \
- fspc(opt.errlog,"error");\
- fprintf(opt.errlog,"Unable to save file %s"LF,savename);\
- test_flush;\
- }\
- }\
- } else {\
- filenote(savename,NULL); \
- }\
- if (cache.ndx)\
- cache_writedata(cache.ndx,cache.dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
- } \
- freet(ht_buff); ht_buff=NULL; \
- }
-#define HT_ADD_FOP
-#endif
-
-// libérer filters[0] pour insérer un élément dans filters[0]
-#define HT_INSERT_FILTERS0 {\
- int i;\
- if (filptr>0) {\
- for(i=filptr-1;i>=0;i--) {\
- strcpy(filters[i+1],filters[i]);\
- }\
- }\
- strcpy(filters[0],"");\
- filptr++;\
- filptr=minimum(filptr,filter_max);\
-}
#define HT_INDEX_END do { \
if (!makeindex_done) { \
@@ -446,7 +301,7 @@ if (makeindex_fp) { \
fflush(makeindex_fp); \
fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
makeindex_fp=NULL; \
- usercommand(0,NULL,fconcat(opt.path_html,"index.html")); \
+ usercommand(&opt,0,NULL,fconcat(opt.path_html,"index.html"),"",""); \
} \
} \
makeindex_done=1; /* ok c'est fait */ \
@@ -463,6 +318,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
int lien_tot=0; // nombre de liens pour le moment
lien_url** liens=NULL; // les pointeurs sur les liens
hash_struct hash; // système de hachage, accélère la recherche dans les liens
+ hash_struct* hashptr = &hash;
t_cookie cookie; // gestion des cookies
int lien_max=0;
int lien_size=0; // octets restants dans buffer liens dispo
@@ -522,8 +378,10 @@ int httpmirror(char* url1,httrackp* ptropt) {
/* reset stats */
HTS_STAT.HTS_TOTAL_RECV=0;
HTS_STAT.istat_bytes[0]=HTS_STAT.istat_bytes[1]=0;
+ /*
if (opt.aff_progress)
lastime=HTS_STAT.stat_timestart;
+ */
if (opt.shell) {
last_info_shell=HTS_STAT.stat_timestart;
}
@@ -533,16 +391,17 @@ int httpmirror(char* url1,httrackp* ptropt) {
// initialiser compteur erreurs
fspc(NULL,NULL);
+ // init external modules
+ htspe_init();
+
// initialiser cookie
if (opt.accept_cookie) {
opt.cookie=&cookie;
cookie.max_len=30000; // max len
- strcpy(cookie.data,"");
+ strcpybuff(cookie.data,"");
// Charger cookies.txt par défaut ou cookies.txt du miroir
- if (fexist(fconcat(opt.path_log,"cookies.txt")))
- cookie_load(opt.cookie,opt.path_log,"cookies.txt");
- else if (fexist("cookies.txt"))
- cookie_load(opt.cookie,"","cookies.txt");
+ cookie_load(opt.cookie,opt.path_log,"cookies.txt");
+ cookie_load(opt.cookie,"","cookies.txt");
} else
opt.cookie=NULL;
@@ -550,16 +409,16 @@ int httpmirror(char* url1,httrackp* ptropt) {
exit_xh=0; // sortir prématurément (var globale)
// initialiser usercommand
- usercommand(opt.sys_com_exec,opt.sys_com,"");
+ usercommand(&opt,opt.sys_com_exec,opt.sys_com,"","","");
// initialiser structcheck
- structcheck_init(1);
+ // structcheck_init(1);
// initialiser tableau options accessible par d'autres fonctions (signal)
hts_declareoptbuffer(&opt);
// initialiser verif_backblue
- verif_backblue(NULL);
+ verif_backblue(&opt,NULL);
verif_external(0,0);
verif_external(1,0);
@@ -597,7 +456,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
_hts_lockdns(-999);
// robots.txt
- strcpy(robots.adr,"!"); // dummy
+ strcpybuff(robots.adr,"!"); // dummy
robots.token[0]='\0';
robots.next=NULL; // suivant
opt.robotsptr = &robots;
@@ -615,6 +474,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
opt.filters.filptr=&filptr;
//opt.filters.filter_max=&filter_max;
+ // hash table
+ opt.hash = &hash;
+
// tableau de pointeurs sur les liens
lien_max=maximum(opt.maxlink,32);
liens=(lien_url**) malloct(lien_max*sizeof(lien_url*)); // tableau de pointeurs sur les liens
@@ -674,12 +536,6 @@ int httpmirror(char* url1,httrackp* ptropt) {
joker=1;
else if (*a=='-')
joker=1;
- /* NON, certaines URL ont des * (!)
- else {
- int i=0;
- while((a[i]!=0) && (a[i]!=' ')) if (a[i++]=='*') joker=1;
- }
- */
if (joker) { // joker ou filters
//char* p;
@@ -697,29 +553,29 @@ int httpmirror(char* url1,httrackp* ptropt) {
// recopier prochaine chaine (+ ou -)
i=0;
- while((*a!=0) && (*a!=' ')) { tempo[i++]=*a; a++; }
+ while((*a!=0) && (!isspace((unsigned char)*a))) { tempo[i++]=*a; a++; }
tempo[i++]='\0';
- while(*a==' ') { a++; }
+ while(isspace((unsigned char)*a)) { a++; }
// sauter les + sans rien après..
if (strnotempty(tempo)) {
if ((plus==0) && (type==1)) { // implicite: *www.edf.fr par exemple
if (tempo[strlen(tempo)-1]!='*') {
- strcat(tempo,"*"); // ajouter un *
+ strcatbuff(tempo,"*"); // ajouter un *
}
}
if (type)
- strcpy(filters[filptr],"+");
+ strcpybuff(filters[filptr],"+");
else
- strcpy(filters[filptr],"-");
+ strcpybuff(filters[filptr],"-");
/*
if (strfield(tempo,"http://"))
- strcat(filters[filptr],tempo+7); // ignorer http://
+ strcatbuff(filters[filptr],tempo+7); // ignorer http://
else if (strfield(tempo,"ftp://"))
- strcat(filters[filptr],tempo+6); // ignorer ftp://
+ strcatbuff(filters[filptr],tempo+6); // ignorer ftp://
else
*/
- strcat(filters[filptr],tempo);
+ strcatbuff(filters[filptr],tempo);
filptr++;
/* sanity check */
@@ -745,16 +601,16 @@ int httpmirror(char* url1,httrackp* ptropt) {
char url[HTS_URLMAXSIZE*2];
// prochaine adresse
i=0;
- while((*a!=0) && (*a!=' ')) { url[i++]=*a; a++; }
- while(*a==' ') { a++; }
+ while((*a!=0) && (!isspace((unsigned char)*a))) { url[i++]=*a; a++; }
+ while(isspace((unsigned char)*a)) { a++; }
url[i++]='\0';
- //strcat(primary,"<PRIMARY=\"");
+ //strcatbuff(primary,"<PRIMARY=\"");
if (strstr(url,":/")==NULL)
- strcat(primary,"http://");
- strcat(primary,url);
- //strcat(primary,"\">");
- strcat(primary,"\n");
+ strcatbuff(primary,"http://");
+ strcatbuff(primary,url);
+ //strcatbuff(primary,"\">");
+ strcatbuff(primary,"\n");
}
} // while
@@ -762,13 +618,13 @@ int httpmirror(char* url1,httrackp* ptropt) {
/* OPTIMIZED for fast load */
if (strnotempty(opt.filelist)) {
char* filelist_buff=NULL;
- int filelist_sz=fsize(opt.filelist);
+ INTsys filelist_sz=fsize(opt.filelist);
if (filelist_sz>0) {
FILE* fp=fopen(opt.filelist,"rb");
if (fp) {
filelist_buff=malloct(filelist_sz + 2);
if (filelist_buff) {
- if ((int)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) {
+ if ((INTsys)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) {
freet(filelist_buff);
filelist_buff=NULL;
} else {
@@ -790,12 +646,12 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (count && line[0]) {
n++;
if (strstr(line,":/")==NULL) {
- strcpy(primary_ptr, "http://");
+ strcpybuff(primary_ptr, "http://");
primary_ptr += strlen(primary_ptr);
}
- strcpy(primary_ptr, line);
+ strcpybuff(primary_ptr, line);
primary_ptr += strlen(primary_ptr);
- strcpy(primary_ptr, "\n");
+ strcpybuff(primary_ptr, "\n");
primary_ptr += 1;
}
}
@@ -815,7 +671,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
// lien primaire
- liens_record("primary","/primary","primary.html","","");
+ liens_record("primary","/primary",fslash(fconcat(opt.path_html,"index.html")),"","",opt.urlhack);
if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
printf("PANIC! : Not enough memory [%d]\n",__LINE__);
if (opt.errlog) {
@@ -836,7 +692,22 @@ int httpmirror(char* url1,httrackp* ptropt) {
lien_tot++;
// Initialiser cache
- cache_init(&cache,&opt);
+ {
+ int backupXFR = htsMemoryFastXfr;
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=4;
+#endif
+ if (!hts_htmlcheck_loop(NULL,0,0,0,lien_tot,0,NULL)) {
+ exit_xh=1; // exit requested
+ }
+ htsMemoryFastXfr = 1; /* fast load */
+ cache_init(&cache,&opt);
+ htsMemoryFastXfr = backupXFR;
+#if HTS_ANALYSTE
+ _hts_in_html_parsing=0;
+#endif
+ }
+
}
#if BDEBUG==3
@@ -961,8 +832,10 @@ int httpmirror(char* url1,httrackp* ptropt) {
// note: recopie de plus haut
// noter heure actuelle de départ en secondes
HTS_STAT.stat_timestart=time_local();
+ /*
if (opt.aff_progress)
lastime=HTS_STAT.stat_timestart;
+ */
if (opt.shell) {
last_info_shell=HTS_STAT.stat_timestart;
}
@@ -1003,7 +876,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
// recopier proxy
memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy));
// et user-agent
- strcpy(r.req.user_agent,opt.user_agent);
+ strcpybuff(r.req.user_agent,opt.user_agent);
r.req.user_agent_send=opt.user_agent_send;
if (!error) {
@@ -1041,502 +914,93 @@ int httpmirror(char* url1,httrackp* ptropt) {
r.statuscode=200;
r.size=strlen(r.adr);
r.soc=INVALID_SOCKET;
- strcpy(r.contenttype,"text/html");
+ strcpybuff(r.contenttype,"text/html");
/*} else if (opt.maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing)
// charger le fichier en mémoire tout bêtement
r=xhttpget(urladr,urlfil);
//
*/
} else { // backing, multiples sockets
- //
- int b;
- int n;
-#if BDEBUG==1
- printf("\nBack test..\n");
-#endif
-
- // pause/lock files
- {
- int do_pause=0;
-
- // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
- if (fexist(fconcat(opt.path_log,"hts-stop.lock"))) {
- // remove lockfile
- remove(fconcat(opt.path_log,"hts-stop.lock"));
- if (!fexist(fconcat(opt.path_log,"hts-stop.lock"))) {
- do_pause=1;
- }
- }
-
- // after receving N bytes, pause
- if (opt.fragment>0) {
- if ((HTS_STAT.stat_bytes-stat_fragment) > opt.fragment) {
- do_pause=1;
- }
- }
-
- // pause?
- if (do_pause) {
- if ( (opt.debug>0) && (opt.log!=NULL) ) {
- fspc(opt.log,"info"); fprintf(opt.log,"engine: pause requested.."LF);
- }
- while (back_nsoc(back,back_max)>0) { // attendre fin des transferts
- back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
- Sleep(200);
-#if HTS_ANALYSTE
- {
- back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
-
- // Transfer rate
- engine_stats();
-
- // Refresh various stats
- HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
- HTS_STAT.stat_errors=fspc(NULL,"error");
- HTS_STAT.stat_warnings=fspc(NULL,"warning");
- HTS_STAT.stat_infos=fspc(NULL,"info");
- HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
- HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
- b=0;
- if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
- if (opt.errlog) {
- fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
- test_flush;
- }
- exit_xh=1; // exit requested
- XH_uninit;
- return 0;
- }
- }
-#endif
- }
- // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause
- // l'utilisateur ferait un rm -r après avoir effectué un tar
- structcheck_init(1);
- {
- FILE* fp = fopen(fconcat(opt.path_log,"hts-paused.lock"),"wb");
- if (fp) {
- fspc(fp,"info"); // dater
- fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror..."LF""LF"",HTS_STAT.stat_bytes);
- fclose(fp);
- }
- }
- stat_fragment=HTS_STAT.stat_bytes;
- /* Info for wrappers */
- if ( (opt.debug>0) && (opt.log!=NULL) ) {
- fspc(opt.log,"info"); fprintf(opt.log,"engine: pause: %s"LF,fconcat(opt.path_log,"hts-paused.lock"));
- }
-#if HTS_ANALYSTE
- hts_htmlcheck_pause(fconcat(opt.path_log,"hts-paused.lock"));
-#else
- while (fexist(fconcat(opt.path_log,"hts-paused.lock"))) {
- //back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives)
- Sleep(1000);
- }
-#endif
- }
- //
- }
- // end of pause/lock files
-#if HTS_ANALYSTE
- // changement dans les préférences
-/*
- if (_hts_setopt) {
- copy_htsopt(_hts_setopt,&opt); // copier au besoin
- _hts_setopt=NULL; // effacer callback
- }
-*/
- if (_hts_addurl) {
- char add_adr[HTS_URLMAXSIZE*2];
- char add_fil[HTS_URLMAXSIZE*2];
- while(*_hts_addurl) {
- char add_url[HTS_URLMAXSIZE*2];
- add_adr[0]=add_fil[0]=add_url[0]='\0';
- if (!link_has_authority(*_hts_addurl))
- strcpy(add_url,"http://"); // ajouter http://
- strcat(add_url,*_hts_addurl);
- if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
- // ----Ajout----
- // noter NOUVEAU lien
- char add_sav[HTS_URLMAXSIZE*2];
- // calculer lien et éventuellement modifier addresse/fichier
- if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) {
- if (hash_read(&hash,add_sav,"",0)<0) { // n'existe pas déja
- // enregistrer lien (MACRO)
- liens_record(add_adr,add_fil,add_sav,"","");
- if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
- liens[lien_tot]->testmode=0; // mode test?
- liens[lien_tot]->link_import=0; // mode normal
- liens[lien_tot]->depth=opt.depth;
- liens[lien_tot]->pass2=max(0,numero_passe);
- liens[lien_tot]->retry=opt.retry;
- liens[lien_tot]->premier=lien_tot;
- liens[lien_tot]->precedent=lien_tot;
- lien_tot++;
- //
- if ((opt.debug>0) && (opt.log!=NULL)) {
- fspc(opt.log,"info"); fprintf(opt.log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
- }
- //
- } else { // oups erreur, plus de mémoire!!
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
- test_flush;
- }
- //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
- XH_uninit; // désallocation mémoire & buffers
- return 0;
- }
- } else {
- if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
- test_flush;
- }
- }
-
- }
- } else {
- if (opt.errlog) {
- fspc(opt.errlog,"error");
- fprintf(opt.errlog,"Error during URL decoding for %s"LF,add_url);
- test_flush;
- }
- }
- // ----Fin Ajout----
- _hts_addurl++; // suivante
- }
- _hts_addurl=NULL; // libérer _hts_addurl
- }
- // si une pause a été demandée
- if (_hts_setpause) {
- // index du lien actuel
- int b=back_index(back,back_max,urladr,urlfil,savename);
- if (b<0) b=0; // forcer pour les stats
- while(_hts_setpause) { // on fait la pause..
- back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
-
- // Transfer rate
- engine_stats();
-
- // Refresh various stats
- HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
- HTS_STAT.stat_errors=fspc(NULL,"error");
- HTS_STAT.stat_warnings=fspc(NULL,"warning");
- HTS_STAT.stat_infos=fspc(NULL,"info");
- HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
- HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
- if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
- if (opt.errlog) {
- fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
- test_flush;
- }
- exit_xh=1; // exit requested
- XH_uninit;
- return 0;
- }
- if (back_nsoc(back,back_max)==0)
- Sleep(250); // tite pause
- }
- }
-#endif
-
- // si le fichier n'est pas en backing, le mettre..
- if (!back_exist(back,back_max,urladr,urlfil,savename)) {
-#if BDEBUG==1
- printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
-#endif
- if (back_add(back,back_max,&opt,&cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
- printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
-#if BDEBUG==1
- printf("error while crash adding\n");
-#endif
- if (opt.errlog) {
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
- test_flush;
- }
-
- }
- }
-
-#if BDEBUG==1
- printf("test number of socks\n");
-#endif
-
- // ajouter autant de socket qu'on peut ajouter
- n=opt.maxsoc-back_nsoc(back,back_max);
-#if BDEBUG==1
- printf("%d sockets available for backing\n",n);
-#endif
-
-#if HTS_ANALYSTE
- if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter
-#else
- if (n>0) { // si sockets libre
-#endif
- // remplir autant que l'on peut le cache (backing)
- back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
- }
-
- // index du lien actuel
-/*
- b=back_index(back,back_max,urladr,urlfil,savename);
-
- if (b>=0)
-*/
+ /*
+ **************************************
+ Get the next link, waiting for other files, handling external callbacks
+ */
{
- // ------------------------------------------------------------
- // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE
- do {
-
- // index du lien actuel
- b=back_index(back,back_max,urladr,urlfil,savename);
-#if BDEBUG==1
- printf("back index %d, waiting\n",b);
-#endif
- // Continue to the loop if link still present
- if (b<0)
- continue;
-
- // Receive data
- if (back[b].status>0)
- back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
-
- // Continue to the loop if link still present
- b=back_index(back,back_max,urladr,urlfil,savename);
- if (b<0)
- continue;
-
- // And fill the backing stack
- if (back[b].status>0)
- back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
-
- // Continue to the loop if link still present
- b=back_index(back,back_max,urladr,urlfil,savename);
- if (b<0)
- continue;
-
- // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
- if ((opt.makestat) || (opt.maketrack)) {
- TStamp l=time_local();
- if ((int) (l-makestat_time) >= 60) {
- if (makestat_fp != NULL) {
- fspc(makestat_fp,"info");
- fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-makestat_total)/(l-makestat_time)), HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-makestat_lnk,(int) lien_tot);
- fflush(makestat_fp);
- makestat_total=HTS_STAT.HTS_TOTAL_RECV;
- makestat_lnk=lien_tot;
- }
- if (maketrack_fp!=NULL) {
- int i;
- fspc(maketrack_fp,"info"); fprintf(maketrack_fp,LF);
- for(i=0;i<back_max;i++) {
- back_info(back,i,3,maketrack_fp);
- }
- fprintf(maketrack_fp,LF);
-
- }
- makestat_time=l;
- }
- }
-#if HTS_ANALYSTE
- {
- int i;
- {
- char* s=hts_cancel_file("");
- if (strnotempty(s)) { // fichier à canceller
- for(i=0;i<back_max;i++) {
- if ((back[i].status>0)) {
- if (strcmp(back[i].url_sav,s)==0) { // ok trouvé
- if (back[i].status != 1000) {
-#if HTS_DEBUG_CLOSESOCK
- DEBUG_W("user cancel: deletehttp\n");
-#endif
- if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
- back[i].r.soc=INVALID_SOCKET;
- back[i].r.statuscode=-1;
- strcpy(back[i].r.msg,"Cancelled by User");
- back[i].status=0; // terminé
- } else // cancel ftp.. flag à 1
- back[i].stop_ftp = 1;
- }
- }
- }
- s[0]='\0';
- }
- }
-
- // Transfer rate
- engine_stats();
-
- // Refresh various stats
- HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
- HTS_STAT.stat_errors=fspc(NULL,"error");
- HTS_STAT.stat_warnings=fspc(NULL,"warning");
- HTS_STAT.stat_infos=fspc(NULL,"info");
- HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
- HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
- if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
- if (opt.errlog) {
- fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
- test_flush;
- }
- exit_xh=1; // exit requested
- XH_uninit;
- return 0;
- }
- }
-
-#endif
-#if HTS_POLL
- if ((opt.shell) || (opt.keyboard) || (opt.verbosedisplay) || (!opt.quiet)) {
- TStamp tl;
- info_shell=1;
-
- /* Toggle with ENTER */
- if (!opt.quiet) {
- if (check_stdin()) {
- char com[256];
- linput(stdin,com,200);
- if (opt.verbosedisplay==2)
- opt.verbosedisplay=1;
- else
- opt.verbosedisplay=2;
- /* Info for wrappers */
- if ( (opt.debug>0) && (opt.log!=NULL) ) {
- fspc(opt.log,"info"); fprintf(opt.log,"engine: change-options"LF);
- }
-#if HTS_ANALYSTE
- hts_htmlcheck_chopt(&opt);
-#endif
- }
- }
-
- /*
- ..useless..
- while (check_stdin()) { // données disponibles
- char com[256];
- com[0]='\0';
-
- if (!rcvd) rcvd=1;
- linput(stdin,com,256);
-
- if (strnotempty(com)) {
- if (strlen(com)<=2) {
- switch(*com) {
- case '?': { // Status?
- if (back[b].status>0) printf("WAIT\n");
- else printf("READY\n");
- }
- break;
- case 'f': { // Fichier en attente?
- if (back[b].status>0) printf("WAIT %s\n",back[b].url_fil);
- else printf("READY %s\n",back[b].url_fil);
- }
- break;
- case 'A': case 'F': { // filters
- int i;
- for(i=0;i<filptr;i++) {
- printf("%s ",filters[i]);
- }
- printf("\n");
- }
- break;
- case '#': { // Afficher statistique sur le nombre de liens, etc
- switch(*(com+1)) {
- case 'l': printf("%d\n",lien_tot); break; // nombre de liens enregistrés
- case 's': printf("%d\n",back_nsoc(back,back_max)); break; // nombre de sockets
- case 'r': printf("%d\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart))); break; // taux de transfert
- }
- }
- break;
- case 'K': if (*(com+1)=='!') { // Kill
- XH_uninit;
- return -1;
- }
- break;
- case 'X': if (*(com+1)=='!') { // exit
- exit_xh=1;
- }
- break;
- case 'I': if (*(com+1)=='+') info_shell=1; else info_shell=0;
- break;
- }
- io_flush;
- } else if (*com=='@') {
- printf("%s\n",com+1);
- io_flush;
- }
- }
-
- } // while
- */
- tl=time_local();
-
- // générer un message d'infos sur l'état actuel
- if (opt.shell) { // si shell
- if ((tl-last_info_shell)>0) { // toute les 1 sec
- FILE* fp=stdout;
- int a=0;
- last_info_shell=tl;
- if (fexist(fconcat(opt.path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant
- // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
- // (libérons les robots esclaves de l'internet!)
- remove(fconcat(opt.path_log,"hts-autopsy"));
- fp=fopen(fconcat(opt.path_log,"hts-isalive"),"wb");
- a=1;
- }
- if ((info_shell) || a) {
- int i,j;
-
- fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
- fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
- fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
- fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
- fprintf(fp,"LINK %d"LF,lien_tot);
- {
- LLint mem=0;
- for(i=0;i<back_max;i++)
- if (back[i].r.adr!=NULL)
- mem+=back[i].r.size;
- fprintf(fp,"INMEM "LLintP""LF,mem);
- }
- for(j=0;j<2;j++) { // passes pour ready et wait
- for(i=0;i<back_max;i++) {
- back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // **
- }
- }
- fprintf(fp,LF);
- if (a)
- fclose(fp);
- io_flush;
- }
- }
- } // si shell
-
- } // si shell ou keyboard (option)
- //
-#endif
- } while((b>=0) && (back[max(b,0)].status>0));
-
-
- // If link not found on the stack, it's because it has already been downloaded
- // in background
- // Then, skip it and go to the next one
- if (b<0) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
- test_flush;
- }
-
- // prochain lien
- // ptr++;
-
+ char buff_err_msg[1024];
+ htsmoduleStruct str;
+ htsmoduleStructExtended stre;
+ buff_err_msg[0] = '\0';
+ memset(&str, 0, sizeof(str));
+ memset(&stre, 0, sizeof(stre));
+ /* */
+ str.err_msg = buff_err_msg;
+ str.filename = savename;
+ str.mime = r.contenttype;
+ str.url_host = urladr;
+ str.url_file = urlfil;
+ str.size = (int) r.size;
+ /* */
+ str.addLink = htsAddLink;
+ /* */
+ str.liens = liens;
+ str.opt = &opt;
+ str.back = back;
+ str.back_max = back_max;
+ str.cache = &cache;
+ str.hashptr = hashptr;
+ str.numero_passe = numero_passe;
+ str.add_tab_alloc = add_tab_alloc;
+ /* */
+ str.lien_tot_ = &lien_tot;
+ str.ptr_ = &ptr;
+ str.lien_size_ = &lien_size;
+ str.lien_buffer_ = &lien_buffer;
+ /* */
+ /* */
+ stre.r_ = &r;
+ /* */
+ stre.error_ = &error;
+ stre.exit_xh_ = &exit_xh;
+ stre.store_errpage_ = &store_errpage;
+ /* */
+ stre.base = base;
+ stre.codebase = codebase;
+ /* */
+ stre.filters_ = &filters;
+ stre.filptr_ = &filptr;
+ stre.robots_ = &robots;
+ stre.hash_ = &hash;
+ stre.lien_max_ = &lien_max;
+ /* */
+ stre.makeindex_done_ = &makeindex_done;
+ stre.makeindex_fp_ = &makeindex_fp;
+ stre.makeindex_links_ = &makeindex_links;
+ stre.makeindex_firstlink_ = makeindex_firstlink;
+ /* */
+ stre.template_header_ = template_header;
+ stre.template_body_ = template_body;
+ stre.template_footer_ = template_footer;
+ /* */
+ stre.stat_fragment_ = &stat_fragment;
+ stre.makestat_time = makestat_time;
+ stre.makestat_fp = makestat_fp;
+ stre.makestat_total_ = &makestat_total;
+ stre.makestat_lnk_ = &makestat_lnk;
+ stre.maketrack_fp = maketrack_fp;
+ /* FUNCTION DEPENDANT */
+ stre.loc_ = loc;
+ stre.last_info_shell_ = &last_info_shell;
+ stre.info_shell_ = &info_shell;
+
+ /* Parse */
+ switch(hts_mirror_wait_for_next_file(&str, &stre)) {
+ case -1:
+ XH_uninit;
+ return -1;
+ break;
+ case 2:
// Jump to 'continue'
// This is one of the very very rare cases where goto
// is acceptable
@@ -1544,136 +1008,21 @@ int httpmirror(char* url1,httrackp* ptropt) {
goto jump_if_done;
}
-
-#if HTS_ANALYSTE==2
-#else
- //if (!opt.quiet) { // petite animation
- if (!opt.verbosedisplay) {
- if (!opt.quiet) {
- static int roll=0; /* static: ok */
- roll=(roll+1)%4;
- printf("%c\x0d",("/-\\|")[roll]);
- fflush(stdout);
- }
- } else if (opt.verbosedisplay==1) {
- if (back[b].r.statuscode==200)
- printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size);
- else
- printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,back[b].r.size,back[b].r.statuscode);
- fflush(stdout);
- }
- //}
-#endif
- // ------------------------------------------------------------
- // Vérificateur d'intégrité
-#if DEBUG_CHECKINT
- _CHECKINT(&back[b],"Retour de back_wait, après le while")
- {
- int i;
- for(i=0;i<back_max;i++) {
- char si[256];
- sprintf(si,"Test global après back_wait, index %d",i);
- _CHECKINT(&back[i],si)
- }
- }
-#endif
-
- // copier structure réponse htsblk
- memcpy(&r, &(back[b].r), sizeof(htsblk));
- r.location=loc; // ne PAS copier location!! adresse, pas de buffer
- if (back[b].r.location)
- strcpy(r.location,back[b].r.location);
- back[b].r.adr=NULL; // ne pas faire de desalloc ensuite
-
- // libérer emplacement backing
- back_delete(back,b);
-
- // progression
- if (opt.aff_progress) {
- TStamp tl=time_local();
- if ((tl-HTS_STAT.stat_timestart)>0) {
- char s[32];
- int i=0;
- lastime=tl;
- _CLRSCR; _GOTOXY("1","1");
- printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
- while(i<minimum(back_max,99)) { // **
- if (back[i].status>=0) { // loading..
- s[0]='\0';
- if (strlen(back[i].url_fil)>16)
- strcat(s,back[i].url_fil+strlen(back[i].url_fil)-16);
- else
- strncat(s,back[i].url_fil,16);
- printf("%s : ",s);
-
- printf("[");
- if (back[i].r.totalsize>0) {
- int p;
- int j;
- p=(int)((back[i].r.size*10)/back[i].r.totalsize);
- p=minimum(10,p);
- for(j=0;j<p;j++) printf("*");
- for(j=0;j<(10-p);j++) printf("-");
- } else {
- printf(LLintP,back[i].r.size);
- }
- printf("]");
-
- //} else if (back[i].status==0) {
- // strcpy(s,"ENDED");
- }
- printf("\n");
- i++;
- }
- io_flush;
- }
- }
-
- // débug graphique
-#if BDEBUG==2
- {
- char s[12];
- int i=0;
- _GOTOXY(1,1);
- printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
- while(i<minimum(back_max,160)) {
- if (back[i].status>0) {
- sprintf(s,"%d",back[i].r.size);
- } else if (back[i].status==0) {
- strcpy(s,"ENDED");
- } else
- strcpy(s," - ");
- while(strlen(s)<8) strcat(s," ");
- printf("%s",s); io_flush;
- i++;
- }
- }
-#endif
-
-
-#if BDEBUG==1
- printf("statuscode=%d with %s / msg=%s\n",r.statuscode,r.contenttype,r.msg);
-#endif
-
- }
- /*else {
-#if BDEBUG==1
- printf("back index error\n");
-#endif
}
- */
+
}
// FIN --RECUPERATION LIEN---
// ------------------------------------------------------------
-
-
-
+
+
+
} else { // lien vide..
if (opt.errlog) {
fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush;
- error=1;
}
+ error=1;
+ goto jump_if_done;
} // test si url existe (non vide!)
@@ -1706,7 +1055,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
}
// ---fin tester taille a posteriori---
-
+
// --------------------
// BOGUS MIME TYPE HACK
// Check if we have a bogus MIME type
@@ -1724,39 +1073,82 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (strnotempty(r.cdispo)) { // Content-disposition set!
if (ishtml(savename) == 0) { // Non HTML!!
// patch it!
- strcpy(r.contenttype,"application/octet-stream");
+ strcpybuff(r.contenttype,"application/octet-stream");
}
}
}
}
}
- }
-
- // ------------------------------------
- // BOGUS MIME TYPE HACK II (the revenge)
- // Check if we have a bogus MIME type
- if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
- || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */
- ) {
- if ((r.adr) && (r.size)) {
- unsigned int map[256];
- int i;
- unsigned int nspec = 0;
- map_characters((unsigned char*)r.adr, (unsigned int)r.size, (unsigned int*)map);
- for(i = 1 ; i < 32 ; i++) { // null chars ignored..
- if (!is_realspace(i)
- && i != 27 /* Damn you ISO2022-xx! */
+
+ // ------------------------------------
+ // BOGUS MIME TYPE HACK II (the revenge)
+ // Check if we have a bogus MIME type
+ if ( (is_hypertext_mime(r.contenttype)) /* Is HTML or Js, .. */
+ || (may_be_hypertext_mime(r.contenttype)) /* Is real media, .. */
+ ) {
+ if ((r.adr) && (r.size)) {
+ unsigned int map[256];
+ int i;
+ unsigned int nspec = 0;
+ map_characters((unsigned char*)r.adr, (unsigned int)r.size, (unsigned int*)map);
+ for(i = 1 ; i < 32 ; i++) { // null chars ignored..
+ if (!is_realspace(i)
+ && i != 27 /* Damn you ISO2022-xx! */
+ ) {
+ nspec += map[i];
+ }
+ }
+ /* On-the-fly UCS2 to ISO-8859-1 conversion (note: UCS2 should never be used on the net) */
+ if (
+ map[0] > r.size/10
+ &&
+ r.size % 2 == 0
+ &&
+ (
+ ( ((unsigned char) r.adr[0]) == 0xff && ((unsigned char) r.adr[1]) == 0xfe)
+ ||
+ ( ((unsigned char) r.adr[0]) == 0xfe && ((unsigned char) r.adr[1]) == 0xff)
+ )
) {
- nspec += map[i];
+ int lost=0;
+ int i;
+ int swap = (r.adr[0] == 0xff);
+ for(i = 0 ; i < r.size / 2 - 1 ; i++) {
+ unsigned int unic = 0;
+ if (swap)
+ unic = (r.adr[i*2 + 2] << 8) + r.adr[i*2 + 2 + 1];
+ else
+ unic = r.adr[i*2 + 2] + (r.adr[i*2 + 2 + 1] << 8);
+ if (unic <= 255)
+ r.adr[i] = (char) unic;
+ else {
+ r.adr[i] = '?';
+ lost++;
+ }
+ }
+ r.size = r.size / 2 - 1;
+ r.adr[r.size] = '\0';
+
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File %s%s converted from UCS2 to 8-bit, %d characters lost during conversion (better to use UTF-8)"LF, urladr, urlfil, lost);
+ test_flush;
+ }
+ } else if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters
+ strcpybuff(r.contenttype,"application/octet-stream");
+ if (opt.errlog) {
+ fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
}
- }
- if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters
- strcpy(r.contenttype,"application/octet-stream");
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil);
- test_flush;
+
+ /* This hack allows to avoid problems with parsing '\0' characters */
+ for(i = 0 ; i < r.size ; i++) {
+ if (r.adr[i] == '\0') r.adr[i] = ' ';
}
+
}
+
+
}
}
@@ -1776,8 +1168,15 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (fp) {
r.adr=malloct((int)sz + 2);
if (r.adr) {
- fread(r.adr,(int)sz,1,fp);
- r.size=sz;
+ if (fread(r.adr,1,(INTsys)sz,fp) == sz) {
+ r.size=sz;
+ } else {
+ freet(r.adr);
+ r.size=0;
+ r.adr = NULL;
+ r.statuscode=-1;
+ strcpybuff(r.msg, ".RAM read error");
+ }
fclose(fp);
fp=NULL;
// remove (temporary) file!
@@ -1801,7 +1200,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
if (!error) {
if (ptr>0) {
if (liens[ptr]) {
- cache_mayadd(&opt,&cache,&r,urladr,urlfil,savename);
+ xxcache_mayadd(&opt,&cache,&r,urladr,urlfil,savename);
} else
error=1;
}
@@ -1809,392 +1208,91 @@ int httpmirror(char* url1,httrackp* ptropt) {
*/
// ---fin stockage en cache---
-
-
- // DEBUT rattrapage des 301,302,307..
- // ------------------------------------------------------------
+
+
+ /*
+ **************************************
+ Check "Moved permanently" and other similar errors, retrying URLs if necessary and handling
+ redirect pages.
+ */
if (!error) {
- ////////{
- // on a chargé un fichier en plus
- // if (!error) stat_loaded+=r.size;
+ char buff_err_msg[1024];
+ htsmoduleStruct str;
+ htsmoduleStructExtended stre;
+ buff_err_msg[0] = '\0';
+ memset(&str, 0, sizeof(str));
+ memset(&stre, 0, sizeof(stre));
+ /* */
+ str.err_msg = buff_err_msg;
+ str.filename = savename;
+ str.mime = r.contenttype;
+ str.url_host = urladr;
+ str.url_file = urlfil;
+ str.size = (int) r.size;
+ /* */
+ str.addLink = htsAddLink;
+ /* */
+ str.liens = liens;
+ str.opt = &opt;
+ str.back = back;
+ str.back_max = back_max;
+ str.cache = &cache;
+ str.hashptr = hashptr;
+ str.numero_passe = numero_passe;
+ str.add_tab_alloc = add_tab_alloc;
+ /* */
+ str.lien_tot_ = &lien_tot;
+ str.ptr_ = &ptr;
+ str.lien_size_ = &lien_size;
+ str.lien_buffer_ = &lien_buffer;
+ /* */
+ /* */
+ stre.r_ = &r;
+ /* */
+ stre.error_ = &error;
+ stre.exit_xh_ = &exit_xh;
+ stre.store_errpage_ = &store_errpage;
+ /* */
+ stre.base = base;
+ stre.codebase = codebase;
+ /* */
+ stre.filters_ = &filters;
+ stre.filptr_ = &filptr;
+ stre.robots_ = &robots;
+ stre.hash_ = &hash;
+ stre.lien_max_ = &lien_max;
+ /* */
+ stre.makeindex_done_ = &makeindex_done;
+ stre.makeindex_fp_ = &makeindex_fp;
+ stre.makeindex_links_ = &makeindex_links;
+ stre.makeindex_firstlink_ = makeindex_firstlink;
+ /* */
+ stre.template_header_ = template_header;
+ stre.template_body_ = template_body;
+ stre.template_footer_ = template_footer;
+ /* */
+ stre.stat_fragment_ = &stat_fragment;
+ stre.makestat_time = makestat_time;
+ stre.makestat_fp = makestat_fp;
+ stre.makestat_total_ = &makestat_total;
+ stre.makestat_lnk_ = &makestat_lnk;
+ stre.maketrack_fp = maketrack_fp;
- // ------------------------------------------------------------
- // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing
- // ------------------------------------------------------------
- if ( (r.statuscode==301)
- || (r.statuscode==302)
- || (r.statuscode==303)
- || (r.statuscode==307)
- ) {
- //if (r.adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi]
- //int i=0;
- char *rn=NULL;
- // char* p;
-
- if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
- //if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"%s for %s%s"LF,r.msg,urladr,urlfil);
- test_flush;
- }
-
-
- {
- char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
- int get_it=0; // ne pas prendre le fichier à la même adresse par défaut
- int reponse=0;
- mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
- //
-
- strcpy(mov_url,r.location);
-
- // url qque -> adresse+fichier
- if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {
- int set_prio_to=0; // pas de priotité fixéd par wizard
-
- //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
- // c'est (en gros) la même URL..
- // si c'est un problème de casse dans le host c'est que le serveur est buggé
- // ("RFC says.." : host name IS case insensitive)
- if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
- // on tourne en rond
- if (strcmp(mov_fil,urlfil)==0) {
- error=1;
- get_it=-1; // ne rien faire
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Can not bear crazy server (%s) for %s%s"LF,r.msg,urladr,urlfil);
- test_flush;
- }
- } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
- get_it=1;
- }
- } else { // adresse différente
- if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible)
- // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
- test_flush;
- }
- // accepté?
- if (hts_acceptlink(&opt,ptr,lien_tot,liens,
- mov_adr,mov_fil,
- &filters,&filptr,opt.maxfilter,
- &robots,
- &set_prio_to,
- NULL) != 1) { /* nouvelle adresse non refusée ? */
- get_it=1;
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
- test_flush;
- }
- }
- } /* sinon traité normalement */
- }
-
- //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
- if (get_it==1) {
- // court-circuiter le reste du traitement
- // et reculer pour mieux sauter
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
- test_flush;
- }
- // canceller lien actuel
- error=1;
- strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
-#if HTS_HASH
-#else
- liens[ptr]->sav_len=-1; // taille invalide
-#endif
- // noter NOUVEAU lien
- {
- char mov_sav[HTS_URLMAXSIZE*2];
- // calculer lien et éventuellement modifier addresse/fichier
- if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe)!=-1) {
- if (hash_read(&hash,mov_sav,"",0)<0) { // n'existe pas déja
- // enregistrer lien (MACRO) avec SAV IDENTIQUE
- liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
- //liens_record(mov_adr,mov_fil,mov_sav,"","");
- if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
- // mode test?
- liens[lien_tot]->testmode=liens[ptr]->testmode;
- liens[lien_tot]->link_import=0; // mode normal
- if (!set_prio_to)
- liens[lien_tot]->depth=liens[ptr]->depth;
- else
- liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page)
- liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
- liens[lien_tot]->retry=liens[ptr]->retry;
- liens[lien_tot]->premier=liens[ptr]->premier;
- liens[lien_tot]->precedent=liens[ptr]->precedent;
- lien_tot++;
- } else { // oups erreur, plus de mémoire!!
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
- test_flush;
- }
- //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
- XH_uninit; // désallocation mémoire & buffers
- return 0;
- }
- } else {
- if ( (opt.debug>0) && (opt.errlog!=NULL) ) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
- test_flush;
- }
- }
-
- }
- }
-
- //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
-
- // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
- // sous DOS ca marche pas très bien... mais comme je suis génial url_savename()
- // est à même de régler ce problème
- }
- } // ident_url_xx
-
- if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie)
- rn=(char*) calloct(8192,1);
- if (rn!=NULL) {
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
- test_flush;
- }
- escape_uri(mov_url);
- // On prépare une page qui sautera immédiatement sur la bonne URL
- // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
- strcpy(rn,"<HTML>"CRLF);
- strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
- strcat(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
- strcat(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
- strcat(rn,mov_url); // URL
- strcat(rn,"\">"CRLF);
- strcat(rn,"<A HREF=\"");
- strcat(rn,mov_url);
- strcat(rn,"\">");
- strcat(rn,"<B>Click here...</B></A>"CRLF);
- strcat(rn,"</BODY>"CRLF);
- strcat(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
- strcat(rn,"</HTML>"CRLF);
-
- // changer la page
- if (r.adr) { freet(r.adr); r.adr=NULL; }
- r.adr=rn;
- r.size=strlen(r.adr);
- strcpy(r.contenttype,"text/html");
- }
- } // get_it==0
-
- } // bloc
- // erreur HTTP (ex: 404, not found)
- } else if (
- (r.statuscode==412)
- || (r.statuscode==416)
- ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
- if (fexist(liens[ptr]->sav)) {
- remove(liens[ptr]->sav); // Eliminer
- if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
-#if HDEBUG
- printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
-#endif
- if ( (opt.debug>1) && (opt.errlog!=NULL) ) {
- //if (opt.errlog) {
- fspc(opt.errlog,"debug"); fprintf(opt.errlog,"Partial file reget (%s) for %s%s"LF,r.msg,urladr,urlfil);
- test_flush;
- }
- // enregistrer le MEME lien (MACRO)
- liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
- if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
- liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
- liens[lien_tot]->link_import=0; // pas mode import
- liens[lien_tot]->depth=liens[ptr]->depth;
- liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
- liens[lien_tot]->retry=liens[ptr]->retry;
- liens[lien_tot]->premier=liens[ptr]->premier;
- liens[lien_tot]->precedent=ptr;
- lien_tot++;
- //
- // canceller lien actuel
- error=1;
- strcpy(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
-#if HTS_HASH
-#else
- liens[ptr]->sav_len=-1; // taille invalide
-#endif
- //
- } else { // oups erreur, plus de mémoire!!
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
- test_flush;
- }
- //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
- XH_uninit; // désallocation mémoire & buffers
- return 0;
- }
- } else {
- if (opt.errlog!=NULL) {
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Can not remove old file %s"LF,urlfil);
- test_flush;
- }
- }
- } else {
- if (opt.errlog!=NULL) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r.msg,urladr,urlfil);
- test_flush;
- }
- }
- } else if (r.statuscode!=200) {
- int can_retry=0;
-
- // cas où l'on peut reessayer
- // -2=timeout -3=rateout (interne à httrack)
- switch(r.statuscode) {
- //case -1: can_retry=1; break;
- case -2: if (opt.hostcontrol) { // timeout et retry épuisés
- if ((opt.hostcontrol & 1) && (liens[ptr]->retry<=0)) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
- }
- host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr));
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
- }
- } else can_retry=1;
- } else can_retry=1;
- break;
- case -3: if ((opt.hostcontrol) && (liens[ptr]->retry<=0)) { // too slow
- if (opt.hostcontrol & 2) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
- }
- host_ban(&opt,liens,ptr,lien_tot,back,back_max,filters,opt.maxfilter,&filptr,jump_identification(urladr));
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
- }
- } else can_retry=1;
- } else can_retry=1;
- break;
- case -4: // connect closed
- can_retry=1;
- break;
- case -5: // other (non fatal) error
- can_retry=1;
- break;
- case -6: // bad SSL handskake
- can_retry=1;
- break;
- case 408: case 409: case 500: case 502: case 504: can_retry=1;
- break;
- }
-
- if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0)
- if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible)
- if (opt.errlog) {
- if ((opt.retry>0) && (can_retry)){
- fspc(opt.errlog,"error");
- fprintf(opt.errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r.msg,r.statuscode,opt.retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
- } else {
- if (r.statuscode==-10) { // test OK
- if ((opt.debug>0) && (opt.errlog!=NULL)) {
- fspc(opt.errlog,"info");
- fprintf(opt.errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
- }
- } else {
- if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut
- fspc(opt.errlog,"error");
- fprintf(opt.errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r.msg,r.statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
- } else {
- if (opt.debug>1) {
- fspc(opt.errlog,"info"); fprintf(opt.errlog,"No robots.txt rules at %s"LF,urladr);
- test_flush;
- }
- }
- }
- }
- test_flush;
- }
-
- // NO error in trop level
- // due to the "no connection -> previous restored" hack
- // This prevent the engine from wiping all data if the website has been deleted (or moved)
- // since last time (which is quite annoying)
- if (liens[ptr]->precedent != 0) {
- // ici on teste si on doit enregistrer la page tout de même
- if (opt.errpage) {
- store_errpage=1;
- }
- } else {
- if (strcmp(urlfil,"/robots.txt") != 0) {
- /*
- This is an error caused by a link entered by the user
- That is, link(s) entered by user are invalid (404, 500, connect error, proxy error..)
- If all links entered are invalid, the session failed and we will attempt to restore
- the previous one
- Example: Try to update a website which has been deleted remotely: this may delete
- the website locally, which is really not desired (especially if the website disappeared!)
- With this hack, the engine won't wipe local files (how clever)
- */
- HTS_STAT.stat_errors_front++;
- }
- }
-
- } else { // retry!!
- if (opt.debug>0 && opt.errlog != NULL) { // on fera un alert si le retry échoue
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r.statuscode,r.msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
- test_flush;
- }
- // redemander fichier
- liens_record(urladr,urlfil,savename,"","");
- if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
- liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
- liens[lien_tot]->link_import=0; // pas mode import
- liens[lien_tot]->depth=liens[ptr]->depth;
- liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
- liens[lien_tot]->retry=liens[ptr]->retry-1; // moins 1 retry!
- liens[lien_tot]->premier=liens[ptr]->premier;
- liens[lien_tot]->precedent=liens[ptr]->precedent;
- lien_tot++;
- } else { // oups erreur, plus de mémoire!!
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fspc(opt.errlog,"panic");
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
- test_flush;
- }
- //if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
- XH_uninit; // désallocation mémoire & buffers
- return 0;
- }
- }
- } else {
- if (opt.errlog) {
- if (opt.debug>1) {
- fspc(opt.errlog,"info");
- fprintf(opt.errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
- }
- }
- }
- if (!store_errpage) {
- if (r.adr) { freet(r.adr); r.adr=NULL; } // désalloc
- error=1; // erreur!
- }
+ /* Parse */
+ if (hts_mirror_check_moved(&str, &stre) != 0) {
+ XH_uninit;
+ return -1;
}
- // FIN rattrapage des 301,302,307..
- // ------------------------------------------------------------
-
+ }
- } // if !error
} // if !error
if (!error) {
#if DEBUG_SHOWTYPES
if (strstr(REG,r.contenttype)==NULL) {
- strcat(REG,r.contenttype);
- strcat(REG,"\n");
+ strcatbuff(REG,r.contenttype);
+ strcatbuff(REG,"\n");
printf("%s\n",r.contenttype);
io_flush;
}
@@ -2265,8 +1363,79 @@ int httpmirror(char* url1,httrackp* ptropt) {
fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil);
}
{
+ char buff_err_msg[1024];
+ htsmoduleStruct str;
+ htsmoduleStructExtended stre;
+ buff_err_msg[0] = '\0';
+ memset(&str, 0, sizeof(str));
+ memset(&stre, 0, sizeof(stre));
+ /* */
+ str.err_msg = buff_err_msg;
+ str.filename = savename;
+ str.mime = r.contenttype;
+ str.url_host = urladr;
+ str.url_file = urlfil;
+ str.size = (int) r.size;
+ /* */
+ str.addLink = htsAddLink;
+ /* */
+ str.liens = liens;
+ str.opt = &opt;
+ str.back = back;
+ str.back_max = back_max;
+ str.cache = &cache;
+ str.hashptr = hashptr;
+ str.numero_passe = numero_passe;
+ str.add_tab_alloc = add_tab_alloc;
+ /* */
+ str.lien_tot_ = &lien_tot;
+ str.ptr_ = &ptr;
+ str.lien_size_ = &lien_size;
+ str.lien_buffer_ = &lien_buffer;
+ /* */
+ /* */
+ stre.r_ = &r;
+ /* */
+ stre.error_ = &error;
+ stre.exit_xh_ = &exit_xh;
+ stre.store_errpage_ = &store_errpage;
+ /* */
+ stre.base = base;
+ stre.codebase = codebase;
+ /* */
+ stre.filters_ = &filters;
+ stre.filptr_ = &filptr;
+ stre.robots_ = &robots;
+ stre.hash_ = &hash;
+ stre.lien_max_ = &lien_max;
+ /* */
+ stre.makeindex_done_ = &makeindex_done;
+ stre.makeindex_fp_ = &makeindex_fp;
+ stre.makeindex_links_ = &makeindex_links;
+ stre.makeindex_firstlink_ = makeindex_firstlink;
+ /* */
+ stre.template_header_ = template_header;
+ stre.template_body_ = template_body;
+ stre.template_footer_ = template_footer;
+ /* */
+ stre.stat_fragment_ = &stat_fragment;
+ stre.makestat_time = makestat_time;
+ stre.makestat_fp = makestat_fp;
+ stre.makestat_total_ = &makestat_total;
+ stre.makestat_lnk_ = &makestat_lnk;
+ stre.maketrack_fp = maketrack_fp;
+
+ /* Parse */
+ if (htsparse(&str, &stre) != 0) {
+ XH_uninit;
+ return -1;
+ }
+
+
// I'll have to segment this part
-#include "htsparse.c"
+// #include "htsparse.c"
+
+
}
}
// Fin parsing HTML
@@ -2348,15 +1517,28 @@ int httpmirror(char* url1,httrackp* ptropt) {
printf("robots.txt dump:\n%s\n",r.adr);
#endif
do {
+ char* comm;
+ int llen;
bptr+=binput(r.adr+bptr, line, sizeof(line) - 2);
+ /* strip comment */
+ comm=strchr(line, '#');
+ if (comm != NULL) {
+ *comm = '\0';
+ }
+ /* strip spaces */
+ llen=strlen(line);
+ while(llen > 0 && is_realspace(line[llen - 1])) {
+ line[llen - 1] = '\0';
+ llen--;
+ }
if (strfield(line,"user-agent:")) {
char* a;
a=line+11;
- while(*a==' ') a++; // sauter espace(s)
- if (*a == '*') {
+ while(is_realspace(*a)) a++; // sauter espace(s)
+ if ( *a == '*') {
if (record != 2)
record=1; // c pour nous
- } else if (strfield(a,"httrack")) {
+ } else if (strfield(a,"httrack") || strfield(a,"winhttrack") || strfield(a,"webhttrack")) {
buff[0]='\0'; // re-enregistrer
infobuff[0]='\0';
record=2; // locked
@@ -2367,23 +1549,18 @@ int httpmirror(char* url1,httrackp* ptropt) {
else record=0;
} else if (record) {
if (strfield(line,"disallow:")) {
- char* a;
- a=strchr(line,'#');
- if (a) *a='\0';
- while((line[strlen(line)-1]==' ')
- || (line[strlen(line)-1]==10)
- || (line[strlen(line)-1]==13))
- line[strlen(line)-1]='\0'; // supprimer espaces
- a=line+9;
- while((*a==' ') || (*a==10) || (*a==13))
+ char* a=line+9;
+ while(is_realspace(*a))
a++; // sauter espace(s)
if (strnotempty(a)) {
if (strcmp(a,"/") != 0) { /* ignoring disallow: / */
if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) {
- strcat(buff,a);
- strcat(buff,"\n");
- if (strnotempty(infobuff)) strcat(infobuff,", ");
- strcat(infobuff,a);
+ strcatbuff(buff,a);
+ strcatbuff(buff,"\n");
+ if ( (strlen(infobuff) + strlen(a) + 8) < sizeof(infobuff)) {
+ if (strnotempty(infobuff)) strcatbuff(infobuff,", ");
+ strcatbuff(infobuff,a);
+ }
}
} else {
if (opt.errlog!=NULL) {
@@ -2428,8 +1605,8 @@ int httpmirror(char* url1,httrackp* ptropt) {
char tempo[HTS_URLMAXSIZE*2];
FILE* fp;
tempo[0]='\0';
- strcpy(tempo,savename);
- strcat(tempo,".readme");
+ strcpybuff(tempo,savename);
+ strcatbuff(tempo,".readme");
#if HTS_DOSNAME
// remplacer / par des slash arrière
@@ -2445,7 +1622,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
#endif
if ((fp=fopen(tempo,"wb"))!=NULL) {
- fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION""CRLF""CRLF);
+ fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, WHAT_is_available);
fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename);
fprintf(fp,"Some links contained in it may be unreachable locally."CRLF);
fprintf(fp,"If you want to get these files, you have to set an upper recurse level, ");
@@ -2454,7 +1631,7 @@ int httpmirror(char* url1,httrackp* ptropt) {
#if HTS_WIN==0
chmod(tempo,HTS_ACCESS_FILE);
#endif
- usercommand(0,NULL,antislash(tempo));
+ usercommand(&opt,0,NULL,fconv(tempo),"","");
}
@@ -2473,7 +1650,9 @@ int httpmirror(char* url1,httrackp* ptropt) {
fspc(opt.log,"debug"); fprintf(opt.log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil);
test_flush;
}
- freet(r.adr); r.adr=NULL;
+ if (r.adr) {
+ freet(r.adr); r.adr=NULL;
+ }
}
}
@@ -2481,9 +1660,17 @@ int httpmirror(char* url1,httrackp* ptropt) {
// ATTENTION C'EST ICI QU'ON SAUVE LE FICHIER!!
if (r.adr) {
- if (filesave(r.adr,(int)r.size,savename)!=0) {
+ if (filesave(&opt,r.adr,(int)r.size,savename,urladr,urlfil)!=0) {
+ int fcheck;
+ if ((fcheck=check_fatal_io_errno())) {
+ exit_xh=-1; /* fatal error */
+ }
if (opt.errlog) {
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s"LF,savename);
+ fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));
+ if (fcheck) {
+ fspc(opt.errlog,"error");
+ fprintf(opt.errlog,"* * Fatal write error, giving up"LF);
+ }
test_flush;
}
} else {
@@ -2520,284 +1707,71 @@ int httpmirror(char* url1,httrackp* ptropt) {
}
}
} else */
- if (opt.parsejava) {
- if (strlen(savename)>6) { // fichier.class
- if (strfield(savename+strlen(savename)-6,".class")) { // ok c'est une classe
- if (fexist(savename)) { // ok, existe bien!
- char err_msg[1100];
- int r;
- err_msg[0]='\0';
-
- //##char* buffer;
- // JavaParsing f34R!
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing %s"LF,savename); test_flush;
- }
-
- //##buffer=(char*) malloct(32768);
- //##if (buffer) {
- //
- //##strcpy(buffer,"$BUFFER$");
- //##hts_add_file(buffer); // déclarer buffer
- while(hts_add_file(NULL,-1) >= 0); // clear chain
-
- r=hts_parse_java(savename,(char*) &err_msg); // parsing
- if (!r) { // error
- if (opt.errlog) {
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to parse java file %s : %s"LF,savename,err_msg);
- test_flush;
- }
- } else { // ok
- char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2],save[HTS_URLMAXSIZE*2]; // nom du fichier à sauver dans la boucle
- char codebase[HTS_URLMAXSIZE*2]; // codebase classe java
- char lien[HTS_URLMAXSIZE*2];
- //##char* a;
- int file_position;
- int pass_fix,prio_fix;
- codebase[0]='\0';
- //
-
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): parsing finished, now copying links.."LF); test_flush;
- }
- // recopie de "creer le lien"
- //
-
- // adr = c'est la même
- // fil et save: save2 et fil2
- prio_fix=maximum(liens[ptr]->depth-1,0);
- pass_fix=max(liens[ptr]->pass2,numero_passe);
- if (liens[ptr]->cod) strcpy(codebase,liens[ptr]->cod); // codebase valable pour tt les classes descendantes
- if (strnotempty(codebase)==0) { // pas de codebase, construire
- char* a;
- strcpy(codebase,liens[ptr]->fil);
- a=codebase+strlen(codebase)-1;
- while((*a) && (*a!='/') && ( a > codebase)) a--;
- if (*a=='/')
- *(a+1)='\0'; // couper
- } else { // couper http:// éventuel
- if (strfield(codebase,"http://")) {
- char tempo[HTS_URLMAXSIZE*2];
- char* a=codebase+7;
- a=strchr(a,'/'); // après host
- if (a) { // ** msg erreur et vérifier?
- strcpy(tempo,a);
- strcpy(codebase,tempo); // couper host
- } else {
- if (opt.errlog) {
- fprintf(opt.errlog,"Unexpected strstr error in base %s"LF,codebase);
- test_flush;
- }
- }
- }
- }
- //##a=buffer;
- //##strcat(buffer,"&"); // fin du buffer
- if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long
- if (opt.errlog) {
- fprintf(opt.errlog,"Codebase too long, parsing skipped (%s)"LF,codebase);
- test_flush;
- }
- //##a=NULL;
- while(hts_add_file(NULL,-1) >= 0); // clear chain
- }
- while ( (file_position=hts_add_file(lien,-1)) >= 0 ) {
- int dejafait=0;
- /* //##
- char* b;
-
- // prochain fichier à noter!
- lien[0]='\0';
- b=strchr(a,'&'); // marqueur de fin de chaine (voir hts_add_file)
- if (b) {
- if ( ( ((int) b-(int) a) + strlen(codebase)) < HTS_URLMAXSIZE)
- strncat(lien,a,(int) b-(int) a); // nom du fichier
- else {
- if (opt.errlog) {
- fprintf(opt.errlog,"Error: Java-Parser generated link that exceeds %d bytes"LF,HTS_URLMAXSIZE);
- test_flush;
- }
- }
- } else a=NULL;
-
- if (strnotempty(lien)==0) a=NULL; // fin
- if (a)
- a=b+1;
- */
-
- if (strnotempty(lien)) {
-
- // calculer les chemins et noms de sauvegarde
- if (ident_url_relatif(lien,urladr,codebase,adr,fil)>=0) { // reformage selon chemin
- int r;
-
- // patcher opt pour garder structure originale!! (on ne patche pas les noms dans la classe java!)
- //##if (!strstr(lien,"://")) { // PAS tester les http://.. inutile (on ne va pas patcher le binaire :-( )
- if (1) {
- char tempo[HTS_URLMAXSIZE*2];
- int a,b;
- tempo[0]='\0';
- a=opt.savename_type;
- b=opt.savename_83;
- opt.savename_type=0;
- opt.savename_83=0;
- // note: adr,fil peuvent être patchés
- r=url_savename(adr,fil,save,NULL,NULL,NULL,NULL,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe);
- opt.savename_type=a;
- opt.savename_83=b;
- if (r != -1) {
- if (savename) {
- if (lienrelatif(tempo,save,savename)==0) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
- test_flush;
- }
- //
- // xxc xxc xxc xxc TODO java:
- // rebuild the java class with patched strings...
- //
- if (strlen(tempo)<=strlen(lien)) {
- FILE* fp=fopen(savename,"r+b");
- if (fp) {
- if (!fseek(fp,file_position,SEEK_SET)) {
- //unsigned short int string_length=strlen(tempo);
- //fwrite(&valint,sizeof(string_length),1,fp);
- // xxc xxc ARGH! SI la taille est <, décaler le code ?!
- } else {
- if (opt.log!=NULL) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to patch: %s"LF,savename);
- test_flush;
- }
- }
- fclose(fp);
- } else {
- if (opt.log!=NULL) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): unable to open: %s"LF,savename);
- test_flush;
- }
- }
- } else {
- if (opt.log!=NULL) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): link too long, unable to write it: %s"LF,tempo);
- test_flush;
- }
- }
- }
- }
- }
- } else {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): file not caught: %s"LF,lien); test_flush;
- }
- r=-1;
- }
- //
- if (r != -1) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush;
- }
-
- // modifié par rapport à l'autre version (cf prio_fix notamment et save2)
-
- // vérifier que le lien n'a pas déja été noté
- // si c'est le cas, alors il faut s'assurer que la priorité associée
- // au fichier est la plus grande des deux priorités
- //
- // On part de la fin et on essaye de se presser (économise temps machine)
-#if HTS_HASH
- {
- int i=hash_read(&hash,save,"",0); // lecture type 0 (sav)
- if (i>=0) {
- liens[i]->depth=maximum(liens[i]->depth,prio_fix);
- dejafait=1;
- }
- }
-#else
- {
- int l;
- int i;
- l=strlen(save);
- for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
- if (liens[i]->sav_len==l) { // même taille de chaîne
- if (strcmp(liens[i]->sav,save)==0) { // existe déja
- liens[i]->depth=maximum(liens[i]->depth,prio_fix);
- dejafait=1;
- }
- }
- }
- }
-#endif
-
-
- if (!dejafait) {
- //
- // >>>> CREER LE LIEN JAVA <<<<
-
- // enregistrer fichier de java (MACRO)
- liens_record(adr,fil,save,"","");
- if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
- printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
- test_flush;
- }
- // if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
- XH_extuninit; // désallocation mémoire & buffers
- return 0;
- }
-
- // mode test?
- liens[lien_tot]->testmode=0; // pas mode test
-
- liens[lien_tot]->link_import=0; // pas mode import
-
- // écrire autres paramètres de la structure-lien
- //if (meme_adresse)
- liens[lien_tot]->premier=liens[ptr]->premier;
- //else // sinon l'objet père est le précédent lui même
- // liens[lien_tot]->premier=ptr;
-
- liens[lien_tot]->precedent=ptr;
- // noter la priorité
- liens[lien_tot]->depth=prio_fix;
- liens[lien_tot]->pass2=max(pass_fix,numero_passe);
- liens[lien_tot]->retry=opt.retry;
-
- //strcpy(liens[lien_tot]->adr,adr);
- //strcpy(liens[lien_tot]->fil,fil);
- //strcpy(liens[lien_tot]->sav,save);
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"(JavaClass catch file): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
- test_flush;
- }
-
- lien_tot++; // UN LIEN DE PLUS
- }
- }
- }
-
- }
- }
-
- }
- //##// effacer buffer temporaire
- //##if (buffer) freet(buffer); buffer=NULL;
- //##} // if buffer
- } // if exist
- } // if .class
- } // if strlen-savename
- } // if opt.parsejava
-
-
+
+
+ /* External modules */
+ if (opt.parsejava && fexist(savename)) {
+ char buff_err_msg[1024];
+ htsmoduleStruct str;
+ buff_err_msg[0] = '\0';
+ memset(&str, 0, sizeof(str));
+ /* */
+ str.err_msg = buff_err_msg;
+ str.filename = savename;
+ str.mime = r.contenttype;
+ str.url_host = urladr;
+ str.url_file = urlfil;
+ str.size = (int) r.size;
+ /* */
+ str.addLink = htsAddLink;
+ /* */
+ str.liens = liens;
+ str.opt = &opt;
+ str.back = back;
+ str.back_max = back_max;
+ str.cache = &cache;
+ str.hashptr = hashptr;
+ str.numero_passe = numero_passe;
+ str.add_tab_alloc = add_tab_alloc;
+ /* */
+ str.lien_tot_ = &lien_tot;
+ str.ptr_ = &ptr;
+ str.lien_size_ = &lien_size;
+ str.lien_buffer_ = &lien_buffer;
+ /* Parse if recognized */
+ switch(hts_parse_externals(&str)) {
+ case 1:
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(External module): parsed successfully %s"LF,savename); test_flush;
+ }
+ break;
+ case 0:
+ if ((opt.debug>1) && (opt.log!=NULL)) {
+ fspc(opt.log,"debug"); fprintf(opt.log,"(External module): couldn't parse successfully %s : %s"LF,savename, str.err_msg); test_flush;
+ }
+ break;
+ }
+ }
+
} // text/html ou autre
+
+ /* Post-processing */
+ if (fexist(savename)) {
+ usercommand(&opt, 0, NULL, savename, urladr, urlfil);
+ }
+
+
} // if !error
jump_if_done:
// libérer les liens
- if (r.adr) { freet(r.adr); r.adr=NULL; } // libérer la mémoire!
+ if (r.adr) {
+ freet(r.adr);
+ r.adr=NULL;
+ } // libérer la mémoire!
// prochain lien
ptr++;
@@ -2826,23 +1800,22 @@ jump_if_done:
}
}
}
-
+
+ // copy abort state if necessary from outside
+ if (!exit_xh && opt.state.exit_xh) {
+ exit_xh=opt.state.exit_xh;
+ }
// a-t-on dépassé le quota?
- if ((opt.maxsite>0) && (HTS_STAT.stat_bytes>=opt.maxsite)) {
- if (opt.errlog) {
- fprintf(opt.errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,opt.maxsite);
- test_flush;
- }
- ptr=lien_tot;
- } else if ((opt.maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt.maxtime)) {
- if (opt.errlog) {
- fprintf(opt.errlog,"More than %d seconds passed.. giving up"LF,opt.maxtime);
- test_flush;
- }
+ if (!back_checkmirror(&opt)) {
ptr=lien_tot;
} else if (exit_xh) { // sortir
if (opt.errlog) {
- fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ fspc(opt.errlog,"info");
+ if (exit_xh==1) {
+ fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ } else {
+ fprintf(opt.errlog,"Exit requested by engine"LF);
+ }
test_flush;
}
ptr=lien_tot;
@@ -2910,15 +1883,15 @@ jump_if_done:
if ((new_lst) && (sz>0)) {
char* adr=(char*) malloct((INTsys)sz);
if (adr) {
- if ((int) fread(adr,1,(INTsys)sz,new_lst) == sz) {
+ if (fread(adr,1,(INTsys)sz,new_lst) == sz) {
char line[1100];
int purge=0;
while(!feof(old_lst)) {
linput(old_lst,line,1000);
if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau?
char file[HTS_URLMAXSIZE*2];
- strcpy(file,opt.path_html);
- strcat(file,line+1);
+ strcpybuff(file,opt.path_html);
+ strcatbuff(file,line+1);
file[strlen(file)-1]='\0';
if (fexist(file)) { // toujours sur disque: virer
if (opt.log) {
@@ -2940,8 +1913,8 @@ jump_if_done:
if (strnotempty(line))
if (!strstr(adr,line)) { // non trouvé?
char file[HTS_URLMAXSIZE*2];
- strcpy(file,opt.path_html);
- strcat(file,line+1);
+ strcpybuff(file,opt.path_html);
+ strcatbuff(file,line+1);
while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait)
purge=1;
if (opt.log) {
@@ -2987,15 +1960,39 @@ jump_if_done:
int warning = fspc(NULL,"warning");
int info = fspc(NULL,"info");
char htstime[256];
+ char infoupdated[256];
// int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart));
- int n=(int) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart)));
+ LLint n=(LLint) (HTS_STAT.HTS_TOTAL_RECV/(max(1,time_local()-HTS_STAT.stat_timestart)));
sec2str(htstime,time_local()-HTS_STAT.stat_timestart);
//fprintf(opt.log,LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n);
- fprintf(opt.log,LF"HTTrack mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]",htstime,(int)lien_tot-1,(int)HTS_STAT.stat_files,(int)HTS_STAT.stat_bytes,(int)HTS_STAT.HTS_TOTAL_RECV,(int)n);
- if (HTS_STAT.total_packed) {
+ infoupdated[0] = '\0';
+ if (opt.is_update) {
+ if (HTS_STAT.stat_updated_files < 0) {
+ sprintf(infoupdated, ", %d files updated", (int)HTS_STAT.stat_updated_files);
+ } else {
+ sprintf(infoupdated, ", no files updated");
+ }
+ }
+ fprintf(opt.log,LF
+ "HTTrack mirror complete in %s : "
+ "%d links scanned, %d files written ("LLintP" bytes overall)%s "
+ "["LLintP" bytes received at "LLintP" bytes/sec]",
+ htstime,
+ (int)lien_tot-1,
+ (int)HTS_STAT.stat_files,
+ (LLint)HTS_STAT.stat_bytes,
+ infoupdated,
+ (LLint)HTS_STAT.HTS_TOTAL_RECV,
+ (LLint)n
+ );
+ if (HTS_STAT.total_packed > 0 && HTS_STAT.total_unpacked > 0) {
int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked);
- fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,packed_ratio);
+ fprintf(opt.log,", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio);
+ }
+ if (!opt.nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) {
+ int rq = (HTS_STAT.stat_nrequests * 10) / HTS_STAT.stat_sockid;
+ fprintf(opt.log,", %d.%d requests per connection", rq/10, rq%10);
}
fprintf(opt.log,LF);
if (error)
@@ -3029,10 +2026,12 @@ jump_if_done:
}
#endif
// fin afficher résumé dans log
-
- // désallocation mémoire & buffers
- XH_uninit
+ // ending
+ usercommand(&opt,0,NULL,NULL,NULL,NULL);
+
+ // désallocation mémoire & buffers
+ XH_uninit;
return 1; // OK
}
@@ -3108,8 +2107,12 @@ fprintf(debug_fp,"resync timer 1\n"); fflush(debug_fp);
}
+#define _FILTERS (*opt->filters.filters)
+#define _FILTERS_PTR (opt->filters.filptr)
+#define _ROBOTS ((robots_wizard*)opt->robotsptr)
+
// bannir host (trop lent etc)
-void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host) {
+void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char* host) {
//int l;
int i;
@@ -3117,26 +2120,26 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac
return; // erreur.. déja cancellé.. bizarre.. devrait pas arriver
/* sanity check */
- if (*filptr + 1 >= opt->maxfilter) {
+ if (*_FILTERS_PTR + 1 >= opt->maxfilter) {
opt->maxfilter += HTS_FILTERSINC;
- if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) {
- printf("PANIC! : Too many filters : >%d [%d]\n",*filptr,__LINE__);
+ if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n",*_FILTERS_PTR,__LINE__);
if (opt->errlog) {
- fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*filptr);
+ fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*_FILTERS_PTR);
fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
fflush(opt->errlog);
}
- abort();
+ assertf("too many filters - giving up" == NULL);
}
- //opt->filters.filters=&filters;
}
// interdire host
- if (*filptr < filter_max) {
- strcpy(filters[*filptr],"-");
- strcat(filters[*filptr],host);
- strcat(filters[*filptr],"/*"); // host/ * interdit
- (*filptr)++; *filptr=minimum(*filptr,filter_max);
+ assertf((*_FILTERS_PTR) < opt->maxfilter);
+ if (*_FILTERS_PTR < opt->maxfilter) {
+ strcpybuff(_FILTERS[*_FILTERS_PTR],"-");
+ strcatbuff(_FILTERS[*_FILTERS_PTR],host);
+ strcatbuff(_FILTERS[*_FILTERS_PTR],"/*"); // host/ * interdit
+ (*_FILTERS_PTR)++;
}
// oups
@@ -3161,7 +2164,7 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac
if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
back[i].r.soc=INVALID_SOCKET;
back[i].r.statuscode=-2; // timeout (peu importe si c'est un traffic jam)
- strcpy(back[i].r.msg,"Link Cancelled by host control");
+ strcpybuff(back[i].r.msg,"Link Cancelled by host control");
if ((opt->debug>1) && (opt->log!=NULL)) {
fprintf(opt->log,"Shutdown: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush;
@@ -3183,7 +2186,7 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac
if ((opt->debug>1) && (opt->log!=NULL)) {
fprintf(opt->log,"Cancel: %s%s"LF,liens[i]->adr,liens[i]->fil); test_flush;
}
- strcpy(liens[i]->adr,"!"); // cancel (invalide hash)
+ strcpybuff(liens[i]->adr,"!"); // cancel (invalide hash)
#if HTS_HASH
#else
liens[i]->sav_len=-1; // taille invalide
@@ -3194,7 +2197,7 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac
if (opt->log!=NULL) {
char dmp[1040];
dmp[0]='\0';
- strncat(dmp,liens[i]->adr,1024);
+ strncatbuff(dmp,liens[i]->adr,1024);
fprintf(opt->log,"WARNING! HostCancel detected memory leaks [len %d at %d]"LF,l,i); test_flush;
fprintf(opt->log,"dump 1024 bytes (address %p): "LF"%s"LF,liens[i]->adr,dmp); test_flush;
}
@@ -3214,42 +2217,46 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* bac
}
+#if 0
/* Init structure */
/* 1 : init */
/* -1 : off */
-char* structcheck_init(int init) {
- char** structcheck_buff;
- int* structcheck_buff_size;
- NOSTATIC_RESERVE(structcheck_buff, char*, 1);
- NOSTATIC_RESERVE(structcheck_buff_size, int, 1);
- if (init < 2) {
+/* 0 : query */
+/* 2 : LOCK */
+/* -2 : UNLOCK */
+void* structcheck_init(int init) {
+ int structcheck_size = 1024;
+ inthash structcheck_hash=NULL;
+ /* */
+ static PTHREAD_LOCK_TYPE structcheck_init_mutex;
+ static int structcheck_init_mutex_init=0;
+
+ if (init == 1 || init == -1) {
if (init) {
- if (*structcheck_buff)
- freet(*structcheck_buff);
- *structcheck_buff=NULL;
+ if (structcheck_hash)
+ inthash_delete(&structcheck_hash);
+ structcheck_hash=NULL;
}
if (init != -1) {
- if (*structcheck_buff==NULL) {
- *structcheck_buff_size = 65536;
- *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx
- if (*structcheck_buff)
- strcpy(*structcheck_buff,"#");
+ if (structcheck_init_mutex_init == 0) {
+ htsSetLock(&structcheck_init_mutex, -999);
+ structcheck_init_mutex_init=1;
}
- }
- } else { /* Ensure enough room */
- if (*structcheck_buff_size < init) {
- *structcheck_buff_size = init + 65536;
- *structcheck_buff=(char*) realloct(*structcheck_buff, *structcheck_buff_size);
- if (*structcheck_buff == NULL) { /* Reset :( */
- *structcheck_buff_size = 65536;
- *structcheck_buff=(char*) malloct(*structcheck_buff_size); // désalloué xh_xx
- if (*structcheck_buff)
- strcpy(*structcheck_buff,"#");
+ if (structcheck_hash==NULL) {
+ structcheck_hash=inthash_new(structcheck_size); // désalloué xh_xx
}
}
}
- return *structcheck_buff;
+
+ /* Lock / Unlock */
+ if (init == 2) { // Lock
+ htsSetLock(&structcheck_init_mutex, 1);
+ } else if (init == -2) { // Unlock
+ htsSetLock(&structcheck_init_mutex, 0);
+ }
+ return structcheck_hash;
}
+#endif
int filters_init(char*** ptrfilters, int maxfilter, int filterinc) {
char** filters = *ptrfilters;
@@ -3291,94 +2298,78 @@ int filters_init(char*** ptrfilters, int maxfilter, int filterinc) {
}
// vérifier présence de l'arbo
-int structcheck(char* s) {
+HTSEXT_API int structcheck(char* s) {
// vérifier la présence des dossier(s)
char *a=s;
char nom[HTS_URLMAXSIZE*2];
char *b;
- char* structcheck_buff=NULL;
+ //inthash structcheck_hash=NULL;
if (strnotempty(s)==0) return 0;
if (strlen(s)>HTS_URLMAXSIZE) return 0;
// Get buffer address
- structcheck_buff=structcheck_init(0);
- if (!structcheck_buff)
+ /*
+ structcheck_hash = (inthash)structcheck_init(0);
+ if (structcheck_hash == NULL) {
return -1;
-
- if (strlen(structcheck_buff) > 65000) {
- strcpy(structcheck_buff,"#"); // réinit.. c'est idiot ** **
}
-
- if (structcheck_buff) {
- b=nom;
- do {
- if (*a) *b++=*a++;
- while((*a!='/') && (*a!='\0')) *b++=*a++;
- *b='\0'; // pas de ++ pour boucler
- if (*a=='/') { // toujours dossier
- if (strnotempty(nom)) {
- char tempo[HTS_URLMAXSIZE*2];
-
- strcpy(tempo,"#"); strcat(tempo,nom); strcat(tempo,"#");
- if (strstr(structcheck_buff,tempo)==NULL) { // non encore créé
-
- /* Check room */
- structcheck_init(strlen(structcheck_buff) + strlen(nom) + 8192);
- if (!structcheck_buff)
- return -1;
+ */
- strcat(structcheck_buff,"#"); strcat(structcheck_buff,nom); strcat(structcheck_buff,"#"); // ajouter à la liste
-
+ b=nom;
+ do {
+ if (*a) *b++=*a++;
+ while((*a!='/') && (*a!='\0')) *b++=*a++;
+ *b='\0'; // pas de ++ pour boucler
+ if (*a=='/') { // toujours dossier
+ if (strnotempty(nom)) {
+ //if (inthash_write(structcheck_hash, nom, 1)) { // non encore créé
#if HTS_WIN
- if (mkdir(fconv(nom))!=0)
+ if (mkdir(fconv(nom))!=0)
#else
- if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0)
+ if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0)
#endif
- {
+ {
#if HTS_REMOVE_ANNOYING_INDEX
- // might be a filename with same name than this folder
- // then, remove it to allow folder creation
- // it happends when servers gives a folder index while
- // requesting / page
- // -> if the file can be opened (not a folder) then rename it
- FILE* fp=fopen(fconv(nom),"ab");
- if (fp) {
- fclose(fp);
- rename(fconv(nom),fconcat(fconv(nom),".txt"));
- }
- // if it fails, that's too bad
+ // might be a filename with same name than this folder
+ // then, remove it to allow folder creation
+ // it happends when servers gives a folder index while
+ // requesting / page
+ // -> if the file can be opened (not a folder) then rename it
+ if (fexist(fconv(nom))) {
+ rename(fconv(nom),fconcat(fconv(nom),".txt"));
+ }
+ // if it fails, that's too bad
#if HTS_WIN
- mkdir(fconv(nom));
+ mkdir(fconv(nom));
#else
- mkdir(fconv(nom),HTS_ACCESS_FOLDER);
+ mkdir(fconv(nom),HTS_ACCESS_FOLDER);
#endif
#endif
- // Si existe déja renvoie une erreur.. tant pis
- }
+ // Si existe déja renvoie une erreur.. tant pis
+ }
#if HTS_WIN==0
- chmod(fconv(nom),HTS_ACCESS_FOLDER);
+ /*chmod(fconv(nom),HTS_ACCESS_FOLDER);*/
#endif
- }
- }
- *b++=*a++; // slash
- }
- } while(*a);
- }
+ //}
+ }
+ *b++=*a++; // slash
+ }
+ } while(*a);
return 0;
}
// sauver un fichier
-int filesave(char* adr,int len,char* s) {
+int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr,char* url_fil) {
FILE* fp;
// écrire le fichier
if ((fp=filecreate(s))!=NULL) {
int nl=0;
if (len>0) {
- nl=(int) fwrite(adr,1,len,fp);
+ nl=(int) fwrite(adr,1,(INTsys)len,fp);
}
fclose(fp);
- usercommand(0,NULL,antislash(s));
+ //xxusercommand(opt,0,NULL,fconv(s),url_adr,url_fil);
if (nl!=len) // erreur
return -1;
} else
@@ -3387,6 +2378,24 @@ int filesave(char* adr,int len,char* s) {
return 0;
}
+/* We should stop */
+int check_fatal_io_errno(void) {
+ switch(errno) {
+#ifdef EMFILE
+ case EMFILE: /* Too many open files */
+#endif
+#ifdef ENOSPC
+ case ENOSPC: /* No space left on device */
+#endif
+#ifdef EROFS
+ case EROFS: /* Read-only file system */
+#endif
+ return 1;
+ break;
+ }
+ return 0;
+}
+
// ouvrir un fichier (avec chemin Un*x)
FILE* filecreate(char* s) {
@@ -3397,8 +2406,8 @@ FILE* filecreate(char* s) {
// noter lst
filenote(s,NULL);
- // if (*s=='/') strcpy(fname,s+1); else strcpy(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path>
- strcpy(fname,s);
+ // if (*s=='/') strcpybuff(fname,s+1); else strcpybuff(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path>
+ strcpybuff(fname,s);
#if HTS_DOSNAME
// remplacer / par des slash arrière
@@ -3413,13 +2422,14 @@ FILE* filecreate(char* s) {
// a partir d'ici le slash devient antislash
#endif
- // construite le chemin si besoin est
- if (structcheck(s)!=0) {
- return NULL;
- }
-
// ouvrir
fp=fopen(fname,"wb");
+ if (fp == NULL) {
+ // construire le chemin si besoin est
+ (void)structcheck(s);
+ fp=fopen(fname,"wb");
+ }
+
#if HTS_WIN==0
if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE);
#endif
@@ -3450,16 +2460,16 @@ int filenote(char* s,filecreate_params* params) {
// gestion du fichier liste liste
if (params) {
//filecreate_params* p = (filecreate_params*) params;
- strcpy(strc->path,params->path);
+ strcpybuff(strc->path,params->path);
strc->lst=params->lst;
return 0;
} else if (strc->lst) {
char savelst[HTS_URLMAXSIZE*2];
- strcpy(savelst,fslash(s));
+ strcpybuff(savelst,fslash(s));
// couper chemin?
if (strnotempty(strc->path)) {
if (strncmp(fslash(strc->path),savelst,strlen(strc->path))==0) { // couper
- strcpy(savelst,s+strlen(strc->path));
+ strcpybuff(savelst,s+strlen(strc->path));
}
}
fprintf(strc->lst,"[%s]"LF,savelst);
@@ -3469,30 +2479,35 @@ int filenote(char* s,filecreate_params* params) {
}
// executer commande utilisateur
+static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil);
typedef struct {
int exe;
char cmd[2048];
} usercommand_strc;
-HTS_INLINE void usercommand(int _exe,char* _cmd,char* file) {
+HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* adr,char* fil) {
usercommand_strc* strc;
NOSTATIC_RESERVE(strc, usercommand_strc, 1);
-
+
+ /* Callback */
if (_exe) {
- strcpy(strc->cmd,_cmd);
+ strcpybuff(strc->cmd,_cmd);
if (strnotempty(strc->cmd))
strc->exe=_exe;
else
strc->exe=0;
}
+ /* post-processing */
+ postprocess_file(opt, file, adr, fil);
+
#if HTS_ANALYSTE
- if (hts_htmlcheck_filesave)
- if (strnotempty(file))
+ if (hts_htmlcheck_filesave != NULL)
+ if (file != NULL && strnotempty(file))
hts_htmlcheck_filesave(file);
#endif
if (strc->exe) {
- if (strnotempty(file)) {
+ if (file != NULL && strnotempty(file)) {
if (strnotempty(strc->cmd)) {
usercommand_exe(strc->cmd,file);
}
@@ -3507,16 +2522,119 @@ void usercommand_exe(char* cmd,char* file) {
//
for(i=0;i<(int) strlen(cmd);i++) {
if ((cmd[i]=='$') && (cmd[i+1]=='0')) {
- strcat(temp,file);
+ strcatbuff(temp,file);
i++;
} else {
c[0]=cmd[i]; c[1]='\0';
- strcat(temp,c);
+ strcatbuff(temp,c);
}
}
system(temp);
}
+
+static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) {
+ int first = 0;
+ /* MIME-html archive to build */
+ if (opt != NULL && opt->mimehtml) {
+ if (adr != NULL && strcmp(adr, "primary") == 0) {
+ adr = NULL;
+ }
+ if (save != NULL && opt != NULL && adr != NULL && adr[0] && strnotempty(save) && fexist(save)) {
+ char* rsc_save = save;
+ char* rsc_fil = strrchr(fil, '/');
+ int n;
+ if (rsc_fil == NULL)
+ rsc_fil = fil;
+ if (strncmp(fslash(save), fslash(opt->path_html), (n = (int)strlen(opt->path_html))) == 0) {
+ rsc_save += n;
+ }
+
+ if (!opt->state.mimehtml_created) {
+ first = 1;
+ opt->state.mimefp = fopen(fconcat(opt->path_html,"index.mht"), "wb");
+ if (opt->state.mimefp != NULL) {
+ char rndtmp[1024], currtime[256];
+ srand(time(NULL));
+ time_gmt_rfc822(currtime);
+ sprintf(rndtmp, "%d_%d", (int)time(NULL), (int) rand());
+ sprintf(opt->state.mimemid, "----=_MIMEPart_%s_=----", rndtmp);
+ fprintf(opt->state.mimefp, "From: HTTrack Website Copier <nobody@localhost>\r\n"
+ "Subject: Local mirror\r\n"
+ "Date: %s\r\n"
+ "Message-ID: <httrack_%s@localhost>\r\n"
+ "Content-Type: multipart/related;\r\n"
+ "\tboundary=\"%s\";\r\n"
+ "\ttype=\"text/html\"\r\n"
+ "MIME-Version: 1.0\r\n"
+ "\r\nThis message is a RFC MIME-compliant multipart message.\r\n"
+ "\r\n"
+ , currtime, rndtmp, opt->state.mimemid);
+ opt->state.mimehtml_created = 1;
+ } else {
+ opt->state.mimehtml_created = -1;
+ if ( opt->errlog != NULL ) {
+ fspc(opt->errlog,"error"); fprintf(opt->log,"unable to create index.mht"LF);
+ }
+ }
+ }
+ if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) {
+ FILE* fp = fopen(save, "rb");
+ if (fp != NULL) {
+ char buff[60*100 + 2];
+ char mimebuff[256];
+ char cid[HTS_URLMAXSIZE*3];
+ int len;
+ int isHtml = ( ishtml(save) == 1 );
+ mimebuff[0] = '\0';
+
+ /* CID */
+ strcpybuff(cid, adr);
+ strcatbuff(cid, fil);
+ escape_in_url(cid);
+ { char* a = cid; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
+
+ guess_httptype(mimebuff, save);
+ fprintf(opt->state.mimefp, "--%s\r\n", opt->state.mimemid);
+ /*if (first)
+ fprintf(opt->state.mimefp, "Content-disposition: inline\r\n");
+ else*/
+ fprintf(opt->state.mimefp, "Content-disposition: attachment; filename=\"%s\"\r\n", rsc_save);
+ fprintf(opt->state.mimefp,
+ "Content-Type: %s\r\n"
+ "Content-Transfer-Encoding: %s\r\n"
+ /*"Content-Location: http://localhost/%s\r\n"*/
+ "Content-ID: <%s>\r\n"
+ "\r\n"
+ , mimebuff
+ , isHtml ? "8bit" : "base64"
+ /*, rsc_save*/
+ , cid);
+ while((len = fread(buff, 1, sizeof(buff) - 2, fp)) > 0) {
+ buff[len] = '\0';
+ if (!isHtml) {
+ char base64buff[60*100*2];
+ code64((unsigned char*)buff, len, (unsigned char*)base64buff, 1);
+ fprintf(opt->state.mimefp, "%s", base64buff);
+ } else {
+ fprintf(opt->state.mimefp, "%s", buff);
+ }
+ }
+ fclose(fp);
+ fprintf(opt->state.mimefp, "\r\n\r\n");
+ }
+ }
+ } else if (save == NULL) {
+ if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) {
+ fprintf(opt->state.mimefp,
+ "--%s--\r\n", opt->state.mimemid);
+ fclose(opt->state.mimefp);
+ opt->state.mimefp = NULL;
+ }
+ }
+ }
+}
+
// écrire n espaces dans fp
typedef struct {
int error;
@@ -3534,6 +2652,10 @@ HTS_INLINE int fspc(FILE* fp,char* type) {
struct tm* A;
tt=time(NULL);
A=localtime(&tt);
+ if (A == NULL) {
+ int localtime_returned_null=0;
+ assert(localtime_returned_null);
+ }
strftime(s,250,"%H:%M:%S",A);
if (strnotempty(type))
fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1);
@@ -3611,9 +2733,10 @@ HTS_INLINE int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_bac
// remplir backing
int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) {
int n;
+ int oneLess = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links
// ajouter autant de socket qu'on peut ajouter
- n=opt->maxsoc-back_nsoc(back,back_max);
+ n=opt->maxsoc-back_nsoc(back,back_max) - oneLess;
// vérifier qu'il restera assez de place pour les tests ensuite (en théorie, 1 entrée libre restante suffirait)
n=min( n, back_available(back,back_max) - 8 );
@@ -3633,7 +2756,7 @@ int back_fill(lien_back* back,int back_max,httrackp* opt,cache_back* cache,lien_
/* on a déja parcouru */
if (p<cache->ptr_ant)
p=cache->ptr_ant;
- while( (p<lien_tot) && (n>0) ) {
+ while( (p<lien_tot) && (n>0) && back_checkmirror(opt)) {
//while((p<lien_tot) && (n>0) && (p < ptr+opt->maxcache_anticipate)) {
int ok=1;
@@ -3750,6 +2873,7 @@ void sig_ask( int code ) { // demander
httrackp* opt=hts_declareoptbuffer(NULL);
if (opt) {
// ask for stop
+ printf("finishing pending transfers.. please wait\n");
opt->state.stop=1;
}
signal(code,sig_ask); // remettre signal
@@ -3762,14 +2886,16 @@ void sig_ask( int code ) { // demander
void sig_ignore( int code ) { // ignorer signal
}
void sig_brpipe( int code ) { // treat if necessary
+ /*
if (!sig_ignore_flag(-1)) {
sig_term(code);
}
+ */
}
void sig_doback(int blind) { // mettre en backing
int out=-1;
//
- printf("\nMoving to background to complete the mirror...\n"); fflush(stdout);
+ printf("\nMoving into background to complete the mirror...\n"); fflush(stdout);
{
httrackp* opt=hts_declareoptbuffer(NULL);
@@ -3839,6 +2965,29 @@ HTS_INLINE int check_stdin(void) {
#endif
#endif
+HTS_INLINE int check_sockerror(T_SOC s) {
+ fd_set fds;
+ struct timeval tv;
+ FD_ZERO(&fds);
+ FD_SET((T_SOC) s,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(s+1,NULL,NULL,&fds,&tv);
+ return FD_ISSET(s,&fds);
+}
+
+/* check incoming data */
+HTS_INLINE int check_sockdata(T_SOC s) {
+ fd_set fds;
+ struct timeval tv;
+ FD_ZERO(&fds);
+ FD_SET((T_SOC) s,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(s+1,&fds,NULL,NULL,&tv);
+ return FD_ISSET(s,&fds);
+}
+
// Attente de touche
#if HTS_ANALYSTE
int ask_continue(void) {
@@ -3896,12 +3045,18 @@ char* next_token(char* p,int flag) {
if (c) {
char tempo[8192];
tempo[0]=c; tempo[1]='\0';
- strcat(tempo,p+2);
- strcpy(p,tempo);
+ strcatbuff(tempo,p+2);
+ strcpybuff(p,tempo);
}
}
}
else if (*p==34) { // guillemets (de fin)
+ char tempo[8192];
+ tempo[0]='\0';
+ strcatbuff(tempo,p+1);
+ strcpybuff(p,tempo); /* wipe "" */
+ p--;
+ /* */
quote=!quote;
}
else if (*p==32) {
@@ -3920,18 +3075,18 @@ char* next_token(char* p,int flag) {
#if HTS_ANALYSTE
// canceller un fichier (noter comme cancellable)
// !!NOT THREAD SAFE!!
-char* hts_cancel_file(char * s) {
+HTSEXT_API char* hts_cancel_file(char * s) {
static char sav[HTS_URLMAXSIZE*2]="";
if (s[0]!='\0')
if (sav[0]=='\0')
- strcpy(sav,s);
+ strcpybuff(sav,s);
return sav;
}
-void hts_cancel_test(void) {
+HTSEXT_API void hts_cancel_test(void) {
if (_hts_in_html_parsing==2)
_hts_cancel=2;
}
-void hts_cancel_parsing(void) {
+HTSEXT_API void hts_cancel_parsing(void) {
if (_hts_in_html_parsing)
_hts_cancel=1;
}
@@ -3940,7 +3095,7 @@ void hts_cancel_parsing(void) {
// i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel)
// if (back[i].status>=0) { // signifie "lien actif"
-
+#if 0
/*
hts_add_file, add/get elements in the add chain for java parsing
if file_position >= 0
@@ -3958,7 +3113,7 @@ typedef struct addfile_chain {
struct addfile_chain* next;
} addfile_chain;
typedef addfile_chain* addfile_chain_ptr;
-int hts_add_file(char* file,int file_position) {
+int opt->(char* file,int file_position) {
addfile_chain** chain;
NOSTATIC_RESERVE(chain, addfile_chain_ptr, 1);
@@ -3977,7 +3132,7 @@ int hts_add_file(char* file,int file_position) {
(*current)->name[0]='\0';
}
if (*current) {
- strcpy((*current)->name,file);
+ strcpybuff((*current)->name,file);
(*current)->pos=file_position;
return 1;
} else {
@@ -3995,7 +3150,7 @@ int hts_add_file(char* file,int file_position) {
current=&( (*current)->next ); /* 'next' address */
}
if (file)
- strcpy(file,(*current)->name);
+ strcpybuff(file,(*current)->name);
pos=(*current)->pos;
freet(*current);
*current=NULL;
@@ -4006,11 +3161,12 @@ int hts_add_file(char* file,int file_position) {
return 0;
}
+#endif
#if HTS_ANALYSTE
// en train de parser un fichier html? réponse: % effectués
// flag>0 : refresh demandé
-int hts_is_parsing(int flag) {
+HTSEXT_API int hts_is_parsing(int flag) {
if (_hts_in_html_parsing) { // parsing?
if (flag>=0) _hts_in_html_poll=1; // faudrait un tit refresh
return max(_hts_in_html_done,1); // % effectués
@@ -4018,24 +3174,29 @@ int hts_is_parsing(int flag) {
return 0; // non
}
}
-int hts_is_testing(void) { // 0 non 1 test 2 purge
+HTSEXT_API int hts_is_testing(void) { // 0 non 1 test 2 purge
if (_hts_in_html_parsing==2)
return 1;
else if (_hts_in_html_parsing==3)
return 2;
+ else if (_hts_in_html_parsing==4)
+ return 3;
return 0;
}
+HTSEXT_API int hts_is_exiting(void) {
+ return exit_xh;
+}
// message d'erreur?
char* hts_errmsg(void) {
return _hts_errmsg;
}
// mode pause transfer
-int hts_setpause(int p) {
+HTSEXT_API int hts_setpause(int p) {
if (p>=0) _hts_setpause=p;
return _hts_setpause;
}
// ask for termination
-int hts_request_stop(int force) {
+HTSEXT_API int hts_request_stop(int force) {
httrackp* opt=hts_declareoptbuffer(NULL);
if (opt) {
opt->state.stop=1;
@@ -4044,7 +3205,7 @@ int hts_request_stop(int force) {
}
// régler en cours de route les paramètres réglables..
// -1 : erreur
-int hts_setopt(httrackp* set_opt) {
+HTSEXT_API int hts_setopt(httrackp* set_opt) {
if (set_opt) {
httrackp* engine_opt=hts_declareoptbuffer(NULL);
if (engine_opt) {
@@ -4056,16 +3217,16 @@ int hts_setopt(httrackp* set_opt) {
}
// ajout d'URL
// -1 : erreur
-int hts_addurl(char** url) {
+HTSEXT_API int hts_addurl(char** url) {
if (url) _hts_addurl=url;
return (_hts_addurl!=NULL);
}
-int hts_resetaddurl(void) {
+HTSEXT_API int hts_resetaddurl(void) {
_hts_addurl=NULL;
return (_hts_addurl!=NULL);
}
// copier nouveaux paramètres si besoin
-int copy_htsopt(httrackp* from,httrackp* to) {
+HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) {
if (from->maxsite > -1)
to->maxsite = from->maxsite;
@@ -4094,7 +3255,7 @@ int copy_htsopt(httrackp* from,httrackp* to) {
to->maxrate = from->maxrate;
if (strnotempty(from->user_agent))
- strcpy(to->user_agent , from->user_agent);
+ strcpybuff(to->user_agent , from->user_agent);
if (from->retry > -1)
to->retry = from->retry;
@@ -4124,6 +3285,248 @@ int copy_htsopt(httrackp* from,httrackp* to) {
#endif
//
+/* External modules callback */
+int htsAddLink(htsmoduleStruct* str, char* link) {
+ if (link != NULL && str != NULL && link[0] != '\0') {
+ lien_url** liens = (lien_url**) str->liens;
+ httrackp* opt = (httrackp*) str->opt;
+ lien_back* back = (lien_back*) str->back;
+ cache_back* cache = (cache_back*) str->cache;
+ hash_struct* hashptr = (hash_struct*) str->hashptr;
+ int back_max = str->back_max;
+ int numero_passe = str->numero_passe;
+ int add_tab_alloc = str->add_tab_alloc;
+ /* */
+ int lien_tot = * ( (int*) (str->lien_tot_) );
+ int ptr = * ( (int*) (str->ptr_) );
+ int lien_size = * ( (int*) (str->lien_size_) );
+ char* lien_buffer = * ( (char**) (str->lien_buffer_) );
+ /* */
+ /* */
+ char adr[HTS_URLMAXSIZE*2],
+ fil[HTS_URLMAXSIZE*2],
+ save[HTS_URLMAXSIZE*2];
+ char codebase[HTS_URLMAXSIZE*2];
+ /* */
+ int pass_fix, prio_fix;
+ /* */
+ int forbidden_url = 1;
+
+ codebase[0]='\0';
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(module): adding link : '%s'"LF, link); test_flush;
+ }
+ // recopie de "creer le lien"
+ //
+
+#if HTS_ANALYSTE
+ if (!hts_htmlcheck_linkdetected(link)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF, link);
+ test_flush;
+ }
+ return 0;
+ }
+#endif
+
+ // adr = c'est la même
+ // fil et save: save2 et fil2
+ prio_fix=maximum(liens[ptr]->depth-1,0);
+ pass_fix=max(liens[ptr]->pass2,numero_passe);
+ if (liens[ptr]->cod) strcpybuff(codebase,liens[ptr]->cod); // codebase valable pour tt les classes descendantes
+ if (strnotempty(codebase)==0) { // pas de codebase, construire
+ char* a;
+ if (str->relativeToHtmlLink == 0)
+ strcpybuff(codebase,liens[ptr]->fil);
+ else
+ strcpybuff(codebase,liens[liens[ptr]->precedent]->fil);
+ a=codebase+strlen(codebase)-1;
+ while((*a) && (*a!='/') && ( a > codebase)) a--;
+ if (*a=='/')
+ *(a+1)='\0'; // couper
+ } else { // couper http:// éventuel
+ if (strfield(codebase,"http://")) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char* a=codebase+7;
+ a=strchr(a,'/'); // après host
+ if (a) { // ** msg erreur et vérifier?
+ strcpybuff(tempo,a);
+ strcpybuff(codebase,tempo); // couper host
+ } else {
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Unexpected strstr error in base %s"LF,codebase);
+ test_flush;
+ }
+ }
+ }
+ }
+
+ if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Codebase too long, parsing skipped (%s)"LF,codebase);
+ test_flush;
+ }
+ }
+
+ {
+ char* lien = link;
+ int dejafait=0;
+
+ if (strnotempty(lien) && strlen(lien) < HTS_URLMAXSIZE) {
+
+ // calculer les chemins et noms de sauvegarde
+ if (ident_url_relatif(lien,urladr,codebase,adr,fil)>=0) { // reformage selon chemin
+ int r;
+ int set_prio_to = 0;
+ int just_test_it = 0;
+ forbidden_url = hts_acceptlink(opt, ptr, lien_tot, liens,
+ adr,fil,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard external module link: %d"LF,forbidden_url);
+ test_flush;
+ }
+
+ /* Link accepted */
+ if (!forbidden_url) {
+ char tempo[HTS_URLMAXSIZE*2];
+ int a,b;
+ tempo[0]='\0';
+ a=opt->savename_type;
+ b=opt->savename_83;
+ opt->savename_type=0;
+ opt->savename_83=0;
+ // note: adr,fil peuvent être patchés
+ r=url_savename(adr,fil,save,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hashptr,ptr,numero_passe);
+ opt->savename_type=a;
+ opt->savename_83=b;
+ if (r != -1) {
+ if (savename) {
+ if (lienrelatif(tempo,save,savename)==0) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(module): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
+ test_flush;
+ if (str->localLink && str->localLinkSize > (int) strlen(tempo) + 1) {
+ strcpybuff(str->localLink, tempo);
+ }
+ }
+ }
+ }
+ }
+ } else {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(module): file not caught: %s"LF,lien); test_flush;
+ }
+ if (str->localLink && str->localLinkSize > (int) ( strlen(adr) + strlen(fil) + 8 ) ) {
+ str->localLink[0] = '\0';
+ if (!link_has_authority(adr))
+ strcpybuff(str->localLink,"http://");
+ strcatbuff(str->localLink, adr);
+ strcatbuff(str->localLink, fil);
+ }
+ r=-1;
+ }
+ //
+ if (r != -1) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(module): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush;
+ }
+
+ // modifié par rapport à l'autre version (cf prio_fix notamment et save2)
+
+ // vérifier que le lien n'a pas déja été noté
+ // si c'est le cas, alors il faut s'assurer que la priorité associée
+ // au fichier est la plus grande des deux priorités
+ //
+ // On part de la fin et on essaye de se presser (économise temps machine)
+#if HTS_HASH
+ {
+ int i=hash_read(hashptr,save,"",0,opt->urlhack); // lecture type 0 (sav)
+ if (i>=0) {
+ liens[i]->depth=maximum(liens[i]->depth,prio_fix);
+ dejafait=1;
+ }
+ }
+#else
+ {
+ int l;
+ int i;
+ l=strlen(save);
+ for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
+ if (liens[i]->sav_len==l) { // même taille de chaîne
+ if (strcmp(liens[i]->sav,save)==0) { // existe déja
+ liens[i]->depth=maximum(liens[i]->depth,prio_fix);
+ dejafait=1;
+ }
+ }
+ }
+ }
+#endif
+
+
+ if (!dejafait) {
+ //
+ // >>>> CREER LE LIEN JAVA <<<<
+
+ // enregistrer fichier (MACRO)
+ liens_record(adr,fil,save,"","",opt->urlhack);
+ if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ exit_xh=-1; /* fatal error -> exit */
+ return 0;
+ }
+
+ // mode test?
+ liens[lien_tot]->testmode=0; // pas mode test
+
+ liens[lien_tot]->link_import=0; // pas mode import
+
+ // écrire autres paramètres de la structure-lien
+ //if (meme_adresse)
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ //else // sinon l'objet père est le précédent lui même
+ // liens[lien_tot]->premier=ptr;
+
+ liens[lien_tot]->precedent=ptr;
+ // noter la priorité
+ if (!set_prio_to)
+ liens[lien_tot]->depth=prio_fix;
+ else
+ liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1)); // PRIORITE NULLE (catch page)
+ liens[lien_tot]->pass2=max(pass_fix,numero_passe);
+ liens[lien_tot]->retry=opt->retry;
+
+ //strcpybuff(liens[lien_tot]->adr,adr);
+ //strcpybuff(liens[lien_tot]->fil,fil);
+ //strcpybuff(liens[lien_tot]->sav,save);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(module): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
+ test_flush;
+ }
+
+ lien_tot++; // UN LIEN DE PLUS
+ }
+ }
+ }
+ }
+ }
+
+ /* Apply changes */
+ * ( (int*) (str->lien_tot_) ) = lien_tot;
+ * ( (int*) (str->ptr_) ) = ptr;
+ * ( (int*) (str->lien_size_) ) = lien_size;
+ * ( (char**) (str->lien_buffer_) ) = lien_buffer;
+ return (forbidden_url == 0);
+ }
+ return 0;
+}
+
diff --git a/src/htscore.h b/src/htscore.h
index a50aac8..d9e5d0a 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -55,8 +55,10 @@ Please visit our Website: http://www.httrack.com
#include <direct.h>
#else
#include <signal.h>
+#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#endif
/* END specific definitions */
@@ -69,13 +71,13 @@ Please visit our Website: http://www.httrack.com
typedef struct {
char firstblock; // flag 1=premier malloc
char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down
- short int depth; // profondeur autorisée lien ; >0 forte 0=faible
- short int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background
+ int depth; // profondeur autorisée lien ; >0 forte 0=faible
+ int pass2; // traiter après les autres, seconde passe. si == -1, lien traité en background
int premier; // pointeur sur le premier lien qui a donné lieu aux autres liens du domaine
int precedent; // pointeur sur le lien qui a donné lieu à ce lien précis
- //int moved; // pointeur sur moved
- short int retry; // nombre de retry restants
- short int testmode; // mode test uniquement, envoyer juste un head!
+ //int moved; // pointeur sur moved
+ int retry; // nombre de retry restants
+ int testmode; // mode test uniquement, envoyer juste un head!
char* adr; // adresse
char* fil; // nom du fichier distant
char* sav; // nom à sauver sur disque (avec chemin éventuel)
@@ -101,10 +103,11 @@ typedef struct {
char referer_adr[HTS_URLMAXSIZE*2]; // adresse host page referer
char referer_fil[HTS_URLMAXSIZE*2]; // fichier page referer
char location_buffer[HTS_URLMAXSIZE*2]; // "location" en cas de "moved" (302,..)
- char tmpfile[HTS_URLMAXSIZE*2]; // nom à sauver temporairement (compressé)
+ char* tmpfile; // nom à sauver temporairement (compressé)
+ char tmpfile_buffer[HTS_URLMAXSIZE*2]; // buffer pour le nom à sauver temporairement
char send_too[1024]; // données à envoyer en même temps que le header
- int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours)
- int testmode; // mode de test
+ int status; // status (-1=non utilisé, 0: prêt, >0: opération en cours)
+ int testmode; // mode de test
int timeout; // gérer des timeouts? (!=0 : nombre de secondes)
TStamp timeout_refresh; // si oui, time refresh
int rateout; // timeout refresh? (!=0 : taux minimum toléré en octets/s)
@@ -112,20 +115,23 @@ typedef struct {
LLint maxfile_nonhtml; // taille max d'un fichier non html
LLint maxfile_html; // idem pour un ficheir html
htsblk r; // structure htsblk de chaque objet en background
- short int is_update; // mode update
+ int is_update; // mode update
int head_request; // requète HEAD?
LLint range_req_size; // range utilisé
+ TStamp ka_time_start; // refresh time for KA
//
int http11; // L'en tête doit être signé HTTP/1.1 et non HTTP/1.0
int is_chunk; // chunk?
char* chunk_adr; // adresse chunk en cours de chargement
LLint chunk_size; // taille chunk en cours de chargement
+ LLint chunk_blocksize; // taille data declaree par le chunk
LLint compressed_size; // taille compressés (stats uniquement)
//
- short int* pass2_ptr; // pointeur sur liens[ptr]->pass2
+ int* pass2_ptr; // pointeur sur liens[ptr]->pass2
//
- char info[256]; // éventuel status pour le ftp
- int stop_ftp; // flag stop pour ftp
+ char info[256]; // éventuel status pour le ftp
+ int stop_ftp; // flag stop pour ftp
+ int finalized; // finalized (optim memory)
#if DEBUG_CHECKINT
char magic2;
#endif
@@ -136,6 +142,7 @@ typedef struct {
int version; // 0 ou 1
/* */
int type;
+ int ro;
FILE *dat,*ndx,*olddat;
char *use; // liste des adr+fil
FILE *lst; // liste des fichiers pour la "purge"
@@ -209,6 +216,7 @@ typedef struct {
// gestion hashage
#include "htshash.h"
+#include "htsinthash.h"
// gestion réentrance
#include "htsnostatic.h"
@@ -236,42 +244,57 @@ typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url
typedef char* (* t_hts_htmlcheck_query)(char* question);
typedef char* (* t_hts_htmlcheck_query2)(char* question);
typedef char* (* t_hts_htmlcheck_query3)(char* question);
-typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,LLint stat_bytes,LLint stat_bytes_recv,int stat_time,int stat_nsocket, LLint stat_written, int stat_updated, int stat_errors, int irate, int nbk );
+typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);
typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
+typedef void (* t_hts_htmlcheck_filesave)(char* file);
+typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
+typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
+typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
+typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
*/
// demande d'interaction avec le shell
#if HTS_ANALYSTE
//char HTbuff[1024];
/*
-extern t_hts_htmlcheck_init hts_htmlcheck_init;
-extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
-extern t_hts_htmlcheck_start hts_htmlcheck_start;
-extern t_hts_htmlcheck_end hts_htmlcheck_end;
-extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
-extern t_hts_htmlcheck hts_htmlcheck;
-extern t_hts_htmlcheck_query hts_htmlcheck_query;
-extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
-extern t_hts_htmlcheck_query3 hts_htmlcheck_query3;
-extern t_hts_htmlcheck_loop hts_htmlcheck_loop;
-extern t_hts_htmlcheck_check hts_htmlcheck_check;
-extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
+extern t_hts_htmlcheck_init hts_htmlcheck_init;
+extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit;
+extern t_hts_htmlcheck_start hts_htmlcheck_start;
+extern t_hts_htmlcheck_end hts_htmlcheck_end;
+extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt;
+extern t_hts_htmlcheck hts_htmlcheck;
+extern t_hts_htmlcheck_query hts_htmlcheck_query;
+extern t_hts_htmlcheck_query2 hts_htmlcheck_query2;
+extern t_hts_htmlcheck_query3 hts_htmlcheck_query3;
+extern t_hts_htmlcheck_loop hts_htmlcheck_loop;
+extern t_hts_htmlcheck_check hts_htmlcheck_check;
+extern t_hts_htmlcheck_pause hts_htmlcheck_pause;
+extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
+extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
+extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
+extern t_hts_htmlcheck_savename hts_htmlcheck_savename;
+extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead;
+extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead;
*/
//
-int hts_is_parsing(int flag);
-int hts_is_testing(void);
-int hts_setopt(httrackp* opt);
-int hts_addurl(char** url);
-int hts_resetaddurl(void);
-int copy_htsopt(httrackp* from,httrackp* to);
-char* hts_errmsg(void);
-int hts_setpause(int); // pause transfer
-int hts_request_stop(int force);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int hts_is_parsing(int flag);
+HTSEXT_API int hts_is_testing(void);
+HTSEXT_API int hts_is_exiting(void);
+HTSEXT_API int hts_setopt(httrackp* opt);
+HTSEXT_API int hts_addurl(char** url);
+HTSEXT_API int hts_resetaddurl(void);
+HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to);
+HTSEXT_API char* hts_errmsg(void);
+HTSEXT_API int hts_setpause(int); // pause transfer
+HTSEXT_API int hts_request_stop(int force);
//
-char* hts_cancel_file(char * s);
-void hts_cancel_test(void);
-void hts_cancel_parsing(void);
+HTSEXT_API char* hts_cancel_file(char * s);
+HTSEXT_API void hts_cancel_test(void);
+HTSEXT_API void hts_cancel_parsing(void);
+#endif
//
// Variables globales
extern int _hts_in_html_parsing;
@@ -291,17 +314,20 @@ extern int _hts_cancel;
//int httpmirror(char* url,int level,httrackp opt);
int httpmirror(char* url1,httrackp* opt);
-int filesave(char* adr,int len,char* s);
+int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr /* = NULL */,char* url_fil /* = NULL */);
+int check_fatal_io_errno(void);
int engine_stats(void);
-void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char** filters,int filter_max,int* filptr,char* host);
+void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,lien_back* back,int back_max,char* host);
FILE* filecreate(char* s);
int filecreateempty(char* filename);
int filenote(char* s,filecreate_params* params);
-HTS_INLINE void usercommand(int exe,char* cmd,char* file);
+HTS_INLINE void usercommand(httrackp* opt,int exe,char* cmd,char* file,char* adr,char* fil);
void usercommand_exe(char* cmd,char* file);
-char* structcheck_init(int init);
+//void* structcheck_init(int init);
int filters_init(char*** ptrfilters, int maxfilter, int filterinc);
-int structcheck(char* s);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int structcheck(char* s);
+#endif
HTS_INLINE int fspc(FILE* fp,char* type);
char* next_token(char* p,int flag);
//
@@ -322,16 +348,20 @@ int back_fillmax(lien_back* back,int back_max,httrackp* opt,cache_back* cache,li
// cancel file
#if HTS_ANALYSTE
-char* hts_cancel_file(char * s);
-void hts_cancel_test(void);
-void hts_cancel_parsing(void);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API char* hts_cancel_file(char * s);
+HTSEXT_API void hts_cancel_test(void);
+HTSEXT_API void hts_cancel_parsing(void);
+#endif
#endif
int ask_continue(void);
int nombre_digit(int n);
// Java
+#if 0
int hts_add_file(char* file,int file_position);
+#endif
// Polling
#if HTS_POLL
@@ -339,6 +369,8 @@ HTS_INLINE int check_flot(T_SOC s);
HTS_INLINE int check_stdin(void);
int read_stdin(char* s,int max);
#endif
+HTS_INLINE int check_sockerror(T_SOC s);
+HTS_INLINE int check_sockdata(T_SOC s);
httrackp* hts_declareoptbuffer(httrackp* optdecl);
void sig_finish( int code ); // finir et quitter
@@ -353,6 +385,9 @@ void sig_brpipe( int code ); // treat if necessary
void sig_doback(int); // mettre en arrière plan
#endif
+/* external modules */
+int htsAddLink(htsmoduleStruct* str, char* link);
+
// Void
void voidf(void);
diff --git a/src/htscoremain.c b/src/htscoremain.c
index a03635f..1162c18 100644
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -42,15 +42,19 @@ Please visit our Website: http://www.httrack.com
#include "htsdefines.h"
#include "htsalias.h"
#include "htswrap.h"
+#include "htsmodules.h"
+
#include <ctype.h>
#if HTS_WIN
#else
#ifndef HTS_DO_NOT_USE_UID
/* setuid */
#include <pwd.h>
+#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#endif
+#endif
extern int exit_xh; // sortir prématurément
@@ -61,7 +65,7 @@ extern int IPV6_resolver;
// Add a command in the argc/argv
#define cmdl_add(token,argc,argv,buff,ptr) \
argv[argc]=(buff+ptr); \
- strcpy(argv[argc],token); \
+ strcpybuff(argv[argc],token); \
ptr += (strlen(argv[argc])+2); \
argc++
@@ -73,15 +77,56 @@ extern int IPV6_resolver;
argv[i]=argv[i-1];\
} \
argv[0]=(buff+ptr); \
- strcpy(argv[0],token); \
+ strcpybuff(argv[0],token); \
ptr += (strlen(argv[0])+2); \
argc++
#define htsmain_free() do { if (url != NULL) { free(url); } } while(0)
+#define ensureUrlCapacity(url, urlsize, size) do { \
+ if (urlsize < size || url == NULL) { \
+ urlsize = size; \
+ if (url == NULL) { \
+ url = (char*) malloct(urlsize); \
+ if (url != NULL) url[0]='\0'; \
+ } else { \
+ url = (char*) realloct(url, urlsize); \
+ } \
+ if (url == NULL) { \
+ HTS_PANIC_PRINTF("* memory exhausted"); \
+ htsmain_free(); \
+ return -1; \
+ } \
+ } \
+} while(0)
+
+static void set_wrappers(void) {
+#if HTS_ANALYSTE
+ // custom wrappers
+ hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
+ hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
+ hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
+ hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
+ hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+ hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+ hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
+ hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
+ hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
+ hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
+ hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
+ hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
+ hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
+ hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+ hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
+ hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name");
+ hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header");
+ hts_htmlcheck_receivehead = (t_hts_htmlcheck_receivehead) htswrap_read("receive-header");
+#endif
+}
+
// Main, récupère les paramètres et appelle le robot
#if HTS_ANALYSTE
-int hts_main(int argc, char **argv) {
+HTSEXT_API int hts_main(int argc, char **argv) {
#else
int main(int argc, char **argv) {
#endif
@@ -96,6 +141,7 @@ int main(int argc, char **argv) {
int argv_url=-1; // ==0 : utiliser cache et doit.log
char* argv_firsturl=NULL; // utilisé pour nommage par défaut
char* url = NULL; // URLS séparées par un espace
+ int url_sz = 65535;
//char url[65536]; // URLS séparées par un espace
// the parametres
httrackp httrack;
@@ -113,33 +159,12 @@ int main(int argc, char **argv) {
int switch_chroot=0; /* chroot ? */
#endif
//
- url = malloc(65536);
- if (url == NULL) {
- HTS_PANIC_PRINTF("* memory exhausted");
- htsmain_free();
- return -1;
- }
- url[0]='\0';
+ ensureUrlCapacity(url, url_sz, 65536);
//
#if HTS_ANALYSTE
// custom wrappers
- hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
- hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
- hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
- hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
- hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
- hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
- hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
- hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
- hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
- hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
- hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
- hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
- hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
- hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
- hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
- hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name");
+ set_wrappers();
#endif
// options par défaut
@@ -157,7 +182,7 @@ int main(int argc, char **argv) {
httrack.maxsite=-1; // taille max site (aucune)
httrack.maxfile_nonhtml=-1; // taille max fichier non html
httrack.maxfile_html=-1; // idem pour html
- httrack.maxsoc=8; // nbre socket max
+ httrack.maxsoc=4; // nbre socket max
httrack.fragment=-1; // pas de fragmentation
httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents"
httrack.makeindex=1; // faire un index
@@ -169,10 +194,12 @@ int main(int argc, char **argv) {
httrack.cache=1; // cache prioritaire
httrack.shell=0; // pas de shell par defaut
httrack.proxy.active=0; // pas de proxy
+ strcpybuff(httrack.proxy.bindhost, ""); // bind default host
httrack.user_agent_send=1; // envoyer un user-agent
- strcpy(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
+ strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
httrack.savename_83=0; // noms longs par défaut
httrack.savename_type=0; // avec structure originale
+ httrack.mimehtml=0; // pas MIME-html
httrack.parsejava=1; // parser classes
httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer
httrack.retry=2; // 2 retry par défaut
@@ -187,26 +214,29 @@ int main(int argc, char **argv) {
httrack.accept_cookie=1; // gérer les cookies
httrack.cookie=NULL;
httrack.http10=0; // laisser http/1.1
+ httrack.nokeepalive = 0; // pas keep-alive
httrack.nocompression=0; // pas de compression
httrack.tolerant=0; // ne pas accepter content-length incorrect
httrack.parseall=1; // tout parser (tags inconnus, par exemple)
httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur
httrack.verbosedisplay=0; // pas d'animation texte
- strcpy(httrack.footer,HTS_DEFAULT_FOOTER);
+ httrack.sizehack=0; // size hack
+ httrack.urlhack=1; // url hack (normalizer)
+ strcpybuff(httrack.footer,HTS_DEFAULT_FOOTER);
httrack.ftp_proxy=1; // proxy http pour ftp
- strcpy(httrack.filelist,"");
- strcpy(httrack.lang_iso,"en, *");
- strcpy(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT)
+ strcpybuff(httrack.filelist,"");
+ strcpybuff(httrack.lang_iso,"en, *");
+ strcpybuff(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT)
//
httrack.log=stdout;
httrack.errlog=stderr;
httrack.flush=1; // flush sur les fichiers log
- httrack.aff_progress=0;
+ //httrack.aff_progress=0;
httrack.keyboard=0;
//
- strcpy(httrack.path_html,"");
- strcpy(httrack.path_log,"");
- strcpy(httrack.path_bin,"");
+ strcpybuff(httrack.path_html,"");
+ strcpybuff(httrack.path_log,"");
+ strcpybuff(httrack.path_bin,"");
//
httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb)
httrack.maxfilter=200; // 200 filtres max par défaut
@@ -222,8 +252,10 @@ int main(int argc, char **argv) {
httrack.dir_topindex=0; // do not built top index (yet)
//
httrack.state.stop=0; // stopper
+ httrack.state.exit_xh=0; // abort
//
_DEBUG_HEAD=0; // pas de debuggage en têtes
+
#if HTS_WIN
#if HTS_ANALYSTE!=2
@@ -269,9 +301,9 @@ int main(int argc, char **argv) {
lien_back r;
char* path;
FILE* fp;
- strcpy(r.url_adr,argv[2]);
- strcpy(r.url_fil,argv[3]);
- strcpy(r.url_sav,argv[4]);
+ strcpybuff(r.url_adr,argv[2]);
+ strcpybuff(r.url_fil,argv[3]);
+ strcpybuff(r.url_sav,argv[4]);
path=argv[5];
r.status=1000;
run_launch_ftp(&r);
@@ -298,11 +330,11 @@ int main(int argc, char **argv) {
char* a;
if ((a=strrchr(path,'/'))) {
httrack.path_bin[0]='\0';
- strncat(httrack.path_bin,argv[0],(int) a - (int) path);
+ strncatbuff(httrack.path_bin,argv[0],(int) a - (int) path);
}
}
#else
- strcpy(httrack.path_bin,HTS_HTTRACKDIR);
+ strcpybuff(httrack.path_bin, HTS_HTTRACKDIR);
#endif
@@ -316,7 +348,7 @@ int main(int argc, char **argv) {
while( (a=strchr(argv[na],9)) ) *a=' ';
/* equivalent to "empty parameter" */
if ((strcmp(argv[na],HTS_NOPARAM)==0) || (strcmp(argv[na],HTS_NOPARAM2)==0)) // (none)
- strcpy(argv[na],"\"\"");
+ strcpybuff(argv[na],"\"\"");
if (strncmp(argv[na],"-&",2)==0)
argv[na][1]='%';
}
@@ -402,6 +434,11 @@ int main(int argc, char **argv) {
argv_url=-1; /* forcer */
httrack.quiet=1;
}
+ } else if (strcmp(tmp_argv[0] + 2,"quiet") == 0) {
+ httrack.quiet=1; // ne pas poser de questions! (nohup par exemple)
+ } else if (strcmp(tmp_argv[0] + 2,"continue") == 0) {
+ argv_url=-1; /* forcer */
+ httrack.quiet=1;
}
}
}
@@ -436,7 +473,7 @@ int main(int argc, char **argv) {
FILE* fp;
int x_argc2;
- //strcpy(x_argvblk2,"httrack ");
+ //strcpybuff(x_argvblk2,"httrack ");
fp=fopen("config","rb");
if (fp) {
linput(fp,x_argvblk2+strlen(x_argvblk2),32000);
@@ -482,7 +519,7 @@ int main(int argc, char **argv) {
if (argv[na][0]=='"') {
char tempo[HTS_CDLMAXSIZE];
- strcpy(tempo,argv[na]+1);
+ strcpybuff(tempo,argv[na]+1);
if (tempo[strlen(tempo)-1]!='"') {
char s[HTS_CDLMAXSIZE];
sprintf(s,"Missing quote in %s",argv[na]);
@@ -491,7 +528,7 @@ int main(int argc, char **argv) {
return -1;
}
tempo[strlen(tempo)-1]='\0';
- strcpy(argv[na],tempo);
+ strcpybuff(argv[na],tempo);
}
if (cmdl_opt(argv[na])) { // option
@@ -509,34 +546,34 @@ int main(int argc, char **argv) {
} else {
char* a;
na++;
- strcpy(httrack.path_html,"");
- strcpy(httrack.path_log,"");
+ strcpybuff(httrack.path_html,"");
+ strcpybuff(httrack.path_log,"");
a=strstr(argv[na],"\",\""); // rechercher en premier, au cas ou -O "c:\pipo,test","c:\test"
if (!a)
a=strchr(argv[na],','); // 2 path
else
a++; // position ,
if (a) {
- strncat(httrack.path_html,argv[na],(int) (a-argv[na]));
- strcat(httrack.path_log,a+1);
+ strncatbuff(httrack.path_html,argv[na],(int) (a-argv[na]));
+ strcatbuff(httrack.path_log,a+1);
} else {
- strcpy(httrack.path_log,argv[na]);
- strcpy(httrack.path_html,argv[na]);
+ strcpybuff(httrack.path_log,argv[na]);
+ strcpybuff(httrack.path_html,argv[na]);
}
// Eliminer les cas comme -O "C:\mirror\"
if (httrack.path_log[0]=='"') { // Guillemets
char tmp[256];
- strcpy(tmp,httrack.path_log+1);
+ strcpybuff(tmp,httrack.path_log+1);
if (tmp[strlen(tmp)-1]=='"')
tmp[strlen(tmp)-1]='\0';
- strcpy(httrack.path_log,tmp);
+ strcpybuff(httrack.path_log,tmp);
}
if (httrack.path_html[0]=='"') {
char tmp[256];
- strcpy(tmp,httrack.path_html+1);
+ strcpybuff(tmp,httrack.path_html+1);
if (tmp[strlen(tmp)-1]=='"')
tmp[strlen(tmp)-1]='\0';
- strcpy(httrack.path_html,tmp);
+ strcpybuff(httrack.path_html,tmp);
}
check_path(httrack.path_log,argv_firsturl);
if (check_path(httrack.path_html,argv_firsturl)) {
@@ -583,8 +620,6 @@ int main(int argc, char **argv) {
} // traiter -O
-
-
/* load doit.log and insert in current command line */
if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) {
FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb");
@@ -611,8 +646,21 @@ int main(int argc, char **argv) {
/* Insert parameters BUT so that they can be in the same order */
if (lastp) {
if (strnotempty(lastp)) {
+ //char* argv0;
+ //int len;
insert_after_argc=argc-insert_after;
+ //argv0 = (argv+insert_after)[0];
cmdl_ins(lastp,insert_after_argc,(argv+insert_after),x_argvblk,x_ptr);
+ /*
+ DONE IN 'next_token'
+ len = strlen(argv0);
+ if (len >= 2 && argv0[0]=='\"' && argv0[len-1]=='\"') { // "foo"
+ char tempo[1024];
+ tempo[0] = '\0';
+ strncatbuff(tempo, argv0+1, len-2);
+ strcpybuff(argv0, tempo);
+ }
+ */
argc=insert_after_argc+insert_after;
insert_after++;
}
@@ -668,7 +716,7 @@ int main(int argc, char **argv) {
if (argv[i][0]=='-') {
if (argv[i][1]=='-') { // --xxx
if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer
- strcpy(argv[i]+1,"");
+ strcpybuff(argv[i]+1,"");
if (fexist(fconcat(httrack.path_log,"hts-log.txt")))
remove(fconcat(httrack.path_log,"hts-log.txt"));
if (fexist(fconcat(httrack.path_log,"hts-err.txt")))
@@ -699,7 +747,7 @@ int main(int argc, char **argv) {
//
} else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire!
argv_url=1; // forcer a passer les parametres
- strcpy(argv[i]+1,"#P");
+ strcpybuff(argv[i]+1,"#P");
//
} else if (strfield2(argv[i]+2,"updatehttrack")) {
#ifdef _WIN32
@@ -714,10 +762,10 @@ int main(int argc, char **argv) {
char *args[8];
printf("Cheking for updates...\n");
- strcpy(_args[0],argv[0]);
- strcpy(_args[1],"--get");
+ strcpybuff(_args[0],argv[0]);
+ strcpybuff(_args[1],"--get");
sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,"");
- strcpy(_args[3],"--quickinfo");
+ strcpybuff(_args[3],"--quickinfo");
args[0]=_args[0];
args[1]=_args[1];
args[2]=_args[2];
@@ -781,7 +829,7 @@ int main(int argc, char **argv) {
FILE* fp;
int x_argc;
- //strcpy(x_argvblk,"httrack ");
+ //strcpybuff(x_argvblk,"httrack ");
fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb");
if (fp) {
linput(fp,x_argvblk+strlen(x_argvblk),8192);
@@ -892,6 +940,10 @@ int main(int argc, char **argv) {
}
} else { // aucune URL définie et pas de cache
+ if (argc > 1 && strcmp(argv[0], "-#h") == 0) {
+ printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available);
+ exit(0);
+ }
#if HTS_ANALYSTE!=2
if (httrack.quiet) {
#endif
@@ -976,7 +1028,7 @@ int main(int argc, char **argv) {
if (argv[na][0]=='"') {
char tempo[HTS_CDLMAXSIZE];
- strcpy(tempo,argv[na]+1);
+ strcpybuff(tempo,argv[na]+1);
if (tempo[strlen(tempo)-1]!='"') {
char s[HTS_CDLMAXSIZE];
sprintf(s,"Missing quote in %s",argv[na]);
@@ -985,7 +1037,7 @@ int main(int argc, char **argv) {
return -1;
}
tempo[strlen(tempo)-1]='\0';
- strcpy(argv[na],tempo);
+ strcpybuff(argv[na],tempo);
}
if (cmdl_opt(argv[na])) { // option
@@ -1009,6 +1061,7 @@ int main(int argc, char **argv) {
httrack.savename_type=1003; // mettre dans le répertoire courant
httrack.depth=0; // ne pas explorer la page
httrack.accept_cookie=0; // pas de cookies
+ httrack.robots=0; // pas de robots
break;
case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions)
httrack.travel=0;
@@ -1078,7 +1131,7 @@ int main(int argc, char **argv) {
sscanf(com+1,"%d",&httrack.maxsoc);
while(isdigit((unsigned char)*(com+1))) com++;
httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1
- } else httrack.maxsoc=8;
+ } else httrack.maxsoc=4;
break;
//
@@ -1122,7 +1175,7 @@ int main(int argc, char **argv) {
htsmain_free();
return -1;
}
- strcpy(httrack.savename_userdef,argv[na]);
+ strcpybuff(httrack.savename_userdef,argv[na]);
if (strnotempty(httrack.savename_userdef))
httrack.savename_type = -1; // userdef!
else
@@ -1175,6 +1228,8 @@ int main(int argc, char **argv) {
case '&': case '%': { // deuxième jeu d'options
com++;
switch(*com) {
+ case 'M': httrack.mimehtml = 1; if (*(com+1)=='0') { httrack.mimehtml=0; com++; } break;
+ case 'k': httrack.nokeepalive = 0; if (*(com+1)=='0') { httrack.nokeepalive = 1; com++; } break;
case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files
case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files
case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; }
@@ -1188,7 +1243,9 @@ int main(int argc, char **argv) {
case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser
case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement
case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length
+ case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack
case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break;
+ case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break;
// preserve: no footer, original links
case 'p':
@@ -1208,7 +1265,53 @@ int main(int argc, char **argv) {
htsmain_free();
return -1;
}
- strcpy(httrack.filelist,argv[na]);
+ strcpybuff(httrack.filelist,argv[na]);
+ }
+ break;
+ case 'b': // bind
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %b needs to be followed by a blank space, and a local hostname");
+ printf("Example: -%%b \"ip4.localhost\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ na++;
+ if (strlen(argv[na])>=254) {
+ HTS_PANIC_PRINTF("Hostname string too long");
+ htsmain_free();
+ return -1;
+ }
+ strcpybuff(httrack.proxy.bindhost,argv[na]);
+ }
+ break;
+ case 'S': // Scan Rules list
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %S needs to be followed by a blank space, and a text filename");
+ printf("Example: -%%S \"myfilterlist.txt\"\n");
+ htsmain_free();
+ return -1;
+ } else{
+ INTsys fz;
+ na++;
+ fz = fsize(argv[na]);
+ if (fz < 0) {
+ HTS_PANIC_PRINTF("File url list could not be opened");
+ htsmain_free();
+ return -1;
+ } else {
+ FILE* fp = fopen(argv[na], "rb");
+ if (fp != NULL) {
+ int cl = (int) strlen(url);
+ ensureUrlCapacity(url, url_sz, cl + fz + 8192);
+ if ((INTsys)fread(url + cl, 1, fz, fp) != fz) {
+ HTS_PANIC_PRINTF("File url list could not be read");
+ htsmain_free();
+ return -1;
+ }
+ fclose(fp);
+ *(url + cl + fz) = '\0';
+ }
+ }
}
break;
case 'A': // assume
@@ -1227,12 +1330,12 @@ int main(int argc, char **argv) {
}
// --assume standard
if (strcmp(argv[na],"standard") == 0) {
- strcpy(httrack.mimedefs,"\n");
- strcat(httrack.mimedefs,HTS_ASSUME_STANDARD);
- strcat(httrack.mimedefs,"\n");
+ strcpybuff(httrack.mimedefs,"\n");
+ strcatbuff(httrack.mimedefs,HTS_ASSUME_STANDARD);
+ strcatbuff(httrack.mimedefs,"\n");
} else {
- strcat(httrack.mimedefs,argv[na]);
- strcat(httrack.mimedefs,"\n");
+ strcatbuff(httrack.mimedefs,argv[na]);
+ strcatbuff(httrack.mimedefs,"\n");
}
a=httrack.mimedefs;
while(*a) {
@@ -1259,7 +1362,7 @@ int main(int argc, char **argv) {
htsmain_free();
return -1;
}
- strcpy(httrack.lang_iso,argv[na]);
+ strcpybuff(httrack.lang_iso,argv[na]);
}
break;
//
@@ -1276,7 +1379,7 @@ int main(int argc, char **argv) {
htsmain_free();
return -1;
}
- strcpy(httrack.footer,argv[na]);
+ strcpybuff(httrack.footer,argv[na]);
}
break;
case 'H': // debug headers
@@ -1316,6 +1419,81 @@ int main(int argc, char **argv) {
}
break;
+ case 'W': // Wrapper callback
+ // --wrapper check-link=obj.so:check_link
+ if ((na+1>=argc) || (argv[na+1][0]=='-')) {
+ HTS_PANIC_PRINTF("Option %W needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field");
+ printf("Example: -%%W check-link=checklink.so:check\n");
+ htsmain_free();
+ return -1;
+ } else {
+ char callbackname[128];
+ char* a = argv[na + 1];
+ char* pos = strchr(a, '=');
+ na++;
+ if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) {
+ char* posf = strchr(pos + 1, ':');
+ char filename[1024];
+ callbackname[0] = '\0';
+ strncatbuff(callbackname, a, pos - a);
+ pos++;
+ if (posf != NULL && (posf - pos) > 0 && (posf - pos + 2) < sizeof(filename)) {
+ void* userfunction;
+ filename[0] = '\0';
+ strncatbuff(filename, pos, posf - pos);
+ posf++;
+ userfunction = getFunctionPtr(filename, posf);
+ if (userfunction != NULL) {
+ if ((void*)htswrap_read(callbackname) != NULL) {
+ if (htswrap_add(callbackname, userfunction)) {
+ if (!httrack.quiet) {
+ set_wrappers();
+ if ((void*)htswrap_read(callbackname) == userfunction) {
+ printf("successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename);
+ } else {
+ char tmp[1024 * 2];
+ sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname);
+ HTS_PANIC_PRINTF(tmp);
+ htsmain_free();
+ return -1;
+ }
+ }
+ } else {
+ char tmp[1024 * 2];
+ sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname);
+ HTS_PANIC_PRINTF(tmp);
+ htsmain_free();
+ return -1;
+ }
+ } else {
+ char tmp[1024 * 2];
+ sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname);
+ HTS_PANIC_PRINTF(tmp);
+ htsmain_free();
+ return -1;
+ }
+ } else {
+ char tmp[1024 * 2];
+ sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname);
+ HTS_PANIC_PRINTF(tmp);
+ htsmain_free();
+ return -1;
+ }
+ } else {
+ HTS_PANIC_PRINTF("Syntax error in option %W : filename error : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field");
+ printf("Example: -%%W check-link=checklink.so:check\n");
+ htsmain_free();
+ return -1;
+ }
+ } else {
+ HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field");
+ printf("Example: -%%W check-link=checklink.so:check\n");
+ htsmain_free();
+ return -1;
+ }
+ }
+ break;
+
default: {
char s[HTS_CDLMAXSIZE];
sprintf(s,"invalid option %%%c\n",*com);
@@ -1376,17 +1554,185 @@ int main(int argc, char **argv) {
}
}
break;
-
+
//
- case '#': { // non documenté (appel de l'interface)
+ case '#': { // non documenté
com++;
switch(*com) {
+ case 'C': // list cache files : httrack -#C '*spid*.gif' will attempt to find the matching file
+ {
+ int hasFilter = 0;
+ int found = 0;
+ char* filter=NULL;
+ cache_back cache;
+ inthash cache_hashtable=inthash_new(HTS_HASH_SIZE);
+ int backupXFR = htsMemoryFastXfr;
+ int sendb = 0;
+ if (isdigit((unsigned char)*(com+1))) {
+ sscanf(com+1,"%d",&sendb);
+ while(isdigit((unsigned char)*(com+1))) com++;
+ } else sendb=0;
+ if (!((na+1>=argc) || (argv[na+1][0]=='-'))) {
+ na++;
+ hasFilter = 1;
+ filter=argv[na];
+ }
+ htsMemoryFastXfr = 1; /* fast load */
+
+ memset(&cache, 0, sizeof(cache_back));
+ cache.type=1; // cache?
+ cache.log=stdout; // log?
+ cache.errlog=stderr; // err log?
+ cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper
+ cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */
+ cache.ro = 1; /* read only */
+ if (cache.hashtable) {
+ char adr[HTS_URLMAXSIZE*2];
+ char fil[HTS_URLMAXSIZE*2];
+ char url[HTS_URLMAXSIZE*2];
+ char linepos[256];
+ int pos;
+ char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx"));
+ cache_init(&cache,&httrack); /* load cache */
+ if (cacheNdx != NULL) {
+ char firstline[256];
+ char* a = cacheNdx;
+ a+=cache_brstr(a, firstline);
+ a+=cache_brstr(a, firstline);
+ while ( a != NULL ) {
+ a=strchr(a+1,'\n'); /* start of line */
+ if (a) {
+ htsblk r;
+ /* */
+ a++;
+ /* read "host/file" */
+ a+=binput(a,adr,HTS_URLMAXSIZE);
+ a+=binput(a,fil,HTS_URLMAXSIZE);
+ url[0]='\0';
+ if (!link_has_authority(adr))
+ strcatbuff(url, "http://");
+ strcatbuff(url, adr);
+ strcatbuff(url, fil);
+ /* read position */
+ a+=binput(a,linepos,200);
+ sscanf(linepos,"%d",&pos);
+ if (!hasFilter
+ ||
+ (strjoker(url, filter, NULL, NULL) != NULL)
+ ) {
+ r = cache_read(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data
+ if (r.statuscode != -1) { // No errors
+ found++;
+ if (!hasFilter) {
+ fprintf(stdout, "%s%s%s\r\n",
+ (link_has_authority(adr)) ? "" : "http://",
+ adr, fil);
+ } else {
+ char msg[256], cdate[256];
+ char sav[HTS_URLMAXSIZE*2];
+ infostatuscode(msg, r.statuscode);
+ time_gmt_rfc822(cdate);
+
+ fprintf(stdout, "HTTP/1.1 %d %s\r\n",
+ r.statuscode,
+ r.msg[0] ? r.msg : msg
+ );
+ fprintf(stdout, "X-Host: %s\r\n", adr);
+ fprintf(stdout, "X-File: %s\r\n", fil);
+ fprintf(stdout, "X-URL: %s%s%s\r\n",
+ (link_has_authority(adr)) ? "" : "http://",
+ adr, fil);
+ if (url_savename(adr, fil, sav, NULL, NULL, NULL, NULL,
+ &httrack, NULL, 0, NULL, 0, &cache, NULL, 0, 0)!=-1) {
+ if (fexist(sav)) {
+ fprintf(stdout, "Content-location: %s\r\n", sav);
+ }
+ }
+ fprintf(stdout, "Date: %s\r\n", cdate);
+ fprintf(stdout, "Server: HTTrack Website Copier/"HTTRACK_VERSION"\r\n");
+ if (r.lastmodified[0]) {
+ fprintf(stdout, "Last-Modified: %s\r\n", r.lastmodified);
+ }
+ if (r.etag[0]) {
+ fprintf(stdout, "Etag: %s\r\n", r.etag);
+ }
+ if (r.totalsize >= 0) {
+ fprintf(stdout, "Content-Length: "LLintP"\r\n", r.totalsize);
+ }
+ fprintf(stdout, "X-Content-Length: "LLintP"\r\n", (r.size >= 0) ? r.size : (-r.size) );
+ if (r.contenttype >= 0) {
+ fprintf(stdout, "Content-Type: %s\r\n", r.contenttype);
+ }
+ if (r.cdispo[0]) {
+ fprintf(stdout, "Content-Disposition: %s\r\n", r.cdispo);
+ }
+ if (r.contentencoding[0]) {
+ fprintf(stdout, "Content-Encoding: %s\r\n", r.contentencoding);
+ }
+ if (r.is_chunk) {
+ fprintf(stdout, "Transfer-Encoding: chunked\r\n");
+ }
+#if HTS_USEOPENSSL
+ if (r.ssl) {
+ fprintf(stdout, "X-SSL: yes\r\n");
+ }
+#endif
+ if (r.is_write) {
+ fprintf(stdout, "X-Direct-To-Disk: yes\r\n");
+ }
+ if (r.compressed) {
+ fprintf(stdout, "X-Compressed: yes\r\n");
+ }
+ if (r.notmodified) {
+ fprintf(stdout, "X-Not-Modified: yes\r\n");
+ }
+ if (r.is_chunk) {
+ fprintf(stdout, "X-Chunked: yes\r\n");
+ }
+ fprintf(stdout, "\r\n");
+ /* Send the body */
+ if (sendb && r.adr) {
+ fprintf(stdout, "%s\r\n", r.adr);
+ }
+ }
+ }
+ }
+ }
+ }
+ freet(cacheNdx);
+ }
+ }
+ if (!found) {
+ fprintf(stderr, "No cache entry found%s%s%s\r\n",
+ (hasFilter)?" for '":"",
+ (hasFilter)?filter:"",
+ (hasFilter)?"'":""
+ );
+ }
+ htsMemoryFastXfr = backupXFR;
+ return 0;
+ }
+ break;
+ case 'X':
+#ifndef STRDEBUG
+ fprintf(stderr, "warning: no string debugging support built, option has no effect\n");
+#endif
+ htsMemoryFastXfr=1;
+ if (*(com+1)=='0') { htsMemoryFastXfr=0; com++; }
+ break;
+ case '~': /* internal lib test */
+ {
+ char thisIsATestYouShouldSeeAnError[12];
+ strcpybuff(thisIsATestYouShouldSeeAnError, "0123456789012345678901234567890123456789");
+ return 0;
+ }
+ break;
case 'f': httrack.flush=1; break;
case 'h':
- printf("HTTrack version "HTTRACK_VERSION"\n");
- exit(1);
+ printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available);
+ return 0;
break;
- case 'p': httrack.aff_progress=1; break;
+ case 'p': /* httrack.aff_progress=1; deprecated */ break;
case 'S': httrack.shell=1; break; // stdin sur un shell
case 'K': httrack.keyboard=1; break; // vérifier stdin
//
@@ -1458,10 +1804,10 @@ int main(int argc, char **argv) {
if (*a == ':') { // un port est présent, <proxy>:port
sscanf(a+1,"%d",&httrack.proxy.port);
httrack.proxy.name[0]='\0';
- strncat(httrack.proxy.name,argv[na],(int) (a - argv[na]));
+ strncatbuff(httrack.proxy.name,argv[na],(int) (a - argv[na]));
} else { // <proxy>
httrack.proxy.port=8080;
- strcpy(httrack.proxy.name,argv[na]);
+ strcpybuff(httrack.proxy.name,argv[na]);
}
}
break;
@@ -1478,7 +1824,7 @@ int main(int argc, char **argv) {
htsmain_free();
return -1;
}
- strcpy(httrack.user_agent,argv[na]);
+ strcpybuff(httrack.user_agent,argv[na]);
if (strnotempty(httrack.user_agent))
httrack.user_agent_send=1;
else
@@ -1499,7 +1845,7 @@ int main(int argc, char **argv) {
htsmain_free();
return -1;
}
- strcpy(httrack.sys_com,argv[na]);
+ strcpybuff(httrack.sys_com,argv[na]);
if (strnotempty(httrack.sys_com))
httrack.sys_com_exec=1;
else
@@ -1521,10 +1867,10 @@ int main(int argc, char **argv) {
} else { // URL/filters
char tempo[1024];
- if (strnotempty(url)) strcat(url," "); // espace de séparation
- strcpy(tempo,unescape_http_unharm(argv[na],1));
+ if (strnotempty(url)) strcatbuff(url," "); // espace de séparation
+ strcpybuff(tempo,unescape_http_unharm(argv[na],1));
escape_spc_url(tempo);
- strcat(url,tempo);
+ strcatbuff(url,tempo);
} // if argv=- etc.
} // for
@@ -1563,28 +1909,28 @@ int main(int argc, char **argv) {
rpath[0]='\0';
if (c != httrack.path_html) {
if (httrack.path_html[0]!='/')
- strcat(rpath,"./");
- strncat(rpath,httrack.path_html,(int) (c - httrack.path_html));
+ strcatbuff(rpath,"./");
+ strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html));
}
{
char tmp[1024];
- strcpy(tmp,c); strcpy(httrack.path_html,tmp);
- strcpy(tmp,d); strcpy(httrack.path_log,tmp);
+ strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp);
+ strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp);
}
} else {
- strcpy(rpath,"./");
- strcpy(httrack.path_html,"/");
- strcpy(httrack.path_log,"/");
+ strcpybuff(rpath,"./");
+ strcpybuff(httrack.path_html,"/");
+ strcpybuff(httrack.path_log,"/");
}
if (rpath[0]) {
printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log);
if (chroot(rpath)) {
printf("ERROR! Can not chroot to %s!\n",rpath);
- exit(0);
+ return -1;
}
if (chdir("/")) { /* new root */
printf("ERROR! Can not chdir to %s!\n",rpath);
- exit(0);
+ return -1;
}
} else
printf("WARNING: chroot not possible with these paths\n");
@@ -1668,6 +2014,9 @@ int main(int argc, char **argv) {
if (fexist(fconcat(httrack.path_log,"hts-err.txt")))
remove(fconcat(httrack.path_log,"hts-err.txt"));
+ /* Check FS directory structure created */
+ structcheck(httrack.path_log);
+
httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w");
if (httrack_logmode==2)
httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w");
@@ -1705,7 +2054,7 @@ int main(int argc, char **argv) {
if (fp) {
fprintf(fp,"What's in this folder?"LF);
fprintf(fp,""LF);
- fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION""LF);
+ fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, WHAT_is_available);
fprintf(fp,"and is used for updating this website."LF);
fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF);
fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF);
@@ -1732,8 +2081,8 @@ int main(int argc, char **argv) {
}*/
// vérifier existence de la structure
- structcheck(httrack.path_html);
- structcheck(httrack.path_log);
+ structcheck(fconcat(httrack.path_html, "/"));
+ structcheck(fconcat(httrack.path_log, "/"));
// reprise/update
if (httrack.cache) {
@@ -1799,7 +2148,9 @@ int main(int argc, char **argv) {
// fichier log
if (httrack.log) {
int i;
- fprintf(httrack.log,"HTTrack"HTTRACK_VERSION" launched on %s at %s"LF,t,url);
+ fprintf(httrack.log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF,
+ WHAT_is_available,
+ t, url);
fprintf(httrack.log,"(");
for(i=0;i<argc;i++) {
if ((strchr(argv[i],' ')==NULL) || (strchr(argv[i],'\"')))
@@ -1814,8 +2165,8 @@ int main(int argc, char **argv) {
fprintf(httrack.log,LF);
}
- if (httrack_logmode) {
- printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS""LF,t);
+ if (httrack_logmode) {
+ printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,WHAT_is_available);
if (httrack.wizard==0) {
printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode);
} else { // the magic wizard
@@ -1877,7 +2228,7 @@ deprecated - see SIGCHLD
if (httrack.dir_topindex) {
char rpath[1024*2];
char* a;
- strcpy(rpath,httrack.path_html);
+ strcpybuff(rpath,httrack.path_html);
if (rpath[0]) {
if (rpath[strlen(rpath)-1]=='/')
rpath[strlen(rpath)-1]='\0';
@@ -1885,7 +2236,7 @@ deprecated - see SIGCHLD
a=strrchr(rpath,'/');
if (a) {
*a='\0';
- hts_buildtopindex(rpath,httrack.path_bin);
+ hts_buildtopindex(&httrack,rpath,httrack.path_bin);
if (httrack.log) {
fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF);
}
@@ -1931,7 +2282,7 @@ deprecated - see SIGCHLD
// WSACleanup(); // ** non en cas de thread tjs présent!..
#endif
#endif
-#if HTS_TRACE_MALLOC
+#ifdef HTS_TRACE_MALLOC
hts_freeall();
#endif
@@ -1968,9 +2319,9 @@ int check_path(char* s,char* defaultname) {
char* a=strchr(defaultname,'#'); // we never know..
if (a) *a='\0';
tempo[0]='\0';
- strncat(tempo,s,i-1);
- strcat(tempo,defaultname);
- strcpy(s,tempo);
+ strncatbuff(tempo,s,i-1);
+ strcatbuff(tempo,defaultname);
+ strcpybuff(s,tempo);
} else
s[0]='\0'; // Clear path (no name/default url given)
return_value=1; // expanded
@@ -1980,7 +2331,7 @@ int check_path(char* s,char* defaultname) {
// ending /
if (strnotempty(s))
if (s[strlen(s)-1]!='/') // ajouter slash à la fin
- strcat(s,"/");
+ strcatbuff(s,"/");
return return_value;
}
diff --git a/src/htscoremain.h b/src/htscoremain.h
index 0775492..3662793 100644
--- a/src/htscoremain.h
+++ b/src/htscoremain.h
@@ -42,13 +42,15 @@ Please visit our Website: http://www.httrack.com
// --assume standard
#define HTS_ASSUME_STANDARD \
- "php2,php3,php4,php,cgi,asp,jsp,pl,cfm=text/html"
+ "php2,php3,php4,php,cgi,asp,jsp,pl,cfm,nsf=text/html"
#include "htsglobal.h"
// Main, récupère les paramètres et appelle le robot
#if HTS_ANALYSTE
-int hts_main(int argc, char **argv);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int hts_main(int argc, char **argv);
+#endif
#else
int main(int argc, char **argv);
#endif
diff --git a/src/htsdefines.h b/src/htsdefines.h
index 223fae1..0ab2cfa 100644
--- a/src/htsdefines.h
+++ b/src/htsdefines.h
@@ -54,6 +54,8 @@ typedef void (* t_hts_htmlcheck_filesave)(char* file);
typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
+typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
// demande d'interaction avec le shell
#if HTS_ANALYSTE
@@ -74,17 +76,19 @@ extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave;
extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected;
extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus;
extern t_hts_htmlcheck_savename hts_htmlcheck_savename;
+extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead;
+extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead;
#endif
-#if HTS_ANALYSTE==2
-#define HT_PRINT(A) strcat(HTbuff,A);
+#if HTS_ANALYSTE
+#define HT_PRINT(A) strcatbuff(HTbuff,A);
#define HT_REQUEST_START HTbuff[0]='\0';
#define HT_REQUEST_END
#define HTT_REQUEST_START HTbuff[0]='\0';
#define HTT_REQUEST_END
#define HTS_REQUEST_START HTbuff[0]='\0';
#define HTS_REQUEST_END
-#define HTS_PANIC_PRINTF(S) strcpy(_hts_errmsg,S);
+#define HTS_PANIC_PRINTF(S) strcpybuff(_hts_errmsg,S);
#else
#define HT_PRINT(A) printf("%s",A);
#define HT_REQUEST_START /*printf("§\n");*/
diff --git a/src/htsfilters.c b/src/htsfilters.c
index ed0dee4..be8b482 100644
--- a/src/htsfilters.c
+++ b/src/htsfilters.c
@@ -301,7 +301,7 @@ HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag) {
}
// recherche multiple
-// exemple: find dans un texte de strcpy(*[A-Z,a-z],"*[0-9]"); va rechercher la première occurence
+// exemple: find dans un texte de strcpybuff(*[A-Z,a-z],"*[0-9]"); va rechercher la première occurence
// d'un strcpy sur une variable ayant un nom en lettres et copiant une chaine de chiffres
// ATTENTION!! Eviter les jokers en début, où gare au temps machine!
char* strjokerfind(char* chaine,char* joker) {
diff --git a/src/htsftp.c b/src/htsftp.c
index 5fbe895..68a8af5 100644
--- a/src/htsftp.c
+++ b/src/htsftp.c
@@ -152,13 +152,13 @@ void launch_ftp(lien_back* back,char* path,char* exec) {
char *args[8];
fclose(fp); fp=NULL;
- strcpy(_args[0],exec);
- strcpy(_args[1],"-#R");
- strcpy(_args[2],back->url_adr);
- strcpy(_args[3],back->url_fil);
- strcpy(_args[4],back->url_sav);
- strcpy(_args[5],path);
- //strcpy(_args[6],"");
+ strcpybuff(_args[0],exec);
+ strcpybuff(_args[1],"-#R");
+ strcpybuff(_args[2],back->url_adr);
+ strcpybuff(_args[3],back->url_fil);
+ strcpybuff(_args[4],back->url_sav);
+ strcpybuff(_args[5],path);
+ //strcpybuff(_args[6],"");
args[0]=_args[0];
args[1]=_args[1];
args[2]=_args[2];
@@ -234,7 +234,7 @@ int run_launch_ftp(lien_back* back) {
timeout=300;
// effacer
- strcpy(back->r.msg,"");
+ strcpybuff(back->r.msg,"");
back->r.statuscode=0;
back->r.size=0;
@@ -265,10 +265,19 @@ int run_launch_ftp(lien_back* back) {
// Calculer RETR <nom>
{
char* a;
+#if 0
a=back->url_fil + strlen(back->url_fil)-1;
while( (a > back->url_fil) && (*a!='/')) a--;
- if (*a == '/') { // ok repéré
+ if (*a != '/') {
+ a = NULL;
+ }
+#else
+ a = back->url_fil;
+#endif
+ if (a != NULL && *a != '\0') {
+#if 0
a++; // sauter /
+#endif
ftp_filename=a;
if (strnotempty(a)) {
char* ua=unescape_http(a);
@@ -288,7 +297,7 @@ int run_launch_ftp(lien_back* back) {
sprintf(line_retr,"LIST -A");
}
} else {
- strcpy(back->r.msg,"Unexpected PORT error");
+ strcpybuff(back->r.msg,"Unexpected PORT error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -314,15 +323,15 @@ int run_launch_ftp(lien_back* back) {
a=strchr(adr,':'); // port
if (a) {
sscanf(a+1,"%d",&port);
- strncat(_adr,adr,(int) (a - adr));
+ strncatbuff(_adr,adr,(int) (a - adr));
} else
- strcpy(_adr,adr);
+ strcpybuff(_adr,adr);
// récupérer adresse résolue
- strcpy(back->info,"host name");
+ strcpybuff(back->info,"host name");
hp = hts_gethostbyname(_adr, &fullhostent_buffer);
if (hp == NULL) {
- strcpy(back->r.msg,"Unable to get server's address");
+ strcpybuff(back->r.msg,"Unable to get server's address");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-5;
_HALT_FTP
@@ -339,7 +348,7 @@ int run_launch_ftp(lien_back* back) {
// créer ("attachement") une socket (point d'accès) internet,en flot
soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
if (soc_ctl==INVALID_SOCKET) {
- strcpy(back->r.msg,"Unable to create a socket");
+ strcpybuff(back->r.msg,"Unable to create a socket");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
_HALT_FTP
@@ -350,14 +359,14 @@ int run_launch_ftp(lien_back* back) {
// server.sin_port = htons((unsigned short int) port);
// connexion (bloquante, on est en thread)
- strcpy(back->info,"connect");
+ strcpybuff(back->info,"connect");
#if HTS_WIN
if (connect(soc_ctl, (const struct sockaddr FAR *)&server, server_size) != 0) {
#else
if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) {
#endif
- strcpy(back->r.msg,"Unable to connect to the server");
+ strcpybuff(back->r.msg,"Unable to connect to the server");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
_HALT_FTP
@@ -378,19 +387,20 @@ int run_launch_ftp(lien_back* back) {
_CHECK_HALT_FTP;
if (line[0]=='2') { // ok, connecté
- strcpy(back->info,"login: user");
+ strcpybuff(back->info,"login: user");
sprintf(line,"USER %s",user);
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
if ((line[0]=='3') || (line[0]=='2')) {
// --PASS--
- strcpy(back->info,"login: pass");
+ strcpybuff(back->info,"login: pass");
sprintf(line,"PASS %s",pass);
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
if (line[0]=='2') { // ok
+#if 0
// --CWD--
char* a;
a=back->url_fil + strlen(back->url_fil)-1;
@@ -398,10 +408,10 @@ int run_launch_ftp(lien_back* back) {
if (*a == '/') { // ok repéré
char target[1024];
target[0]='\0';
- strncat(target,back->url_fil,(int) (a - back->url_fil));
+ strncatbuff(target,back->url_fil,(int) (a - back->url_fil));
if (strnotempty(target)==0)
- strcat(target,"/");
- strcpy(back->info,"cwd");
+ strcatbuff(target,"/");
+ strcpybuff(back->info,"cwd");
sprintf(line,"CWD %s",target);
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
@@ -413,7 +423,7 @@ int run_launch_ftp(lien_back* back) {
if (line[0]=='2') {
// ok..
} else {
- strcpy(back->r.msg,"TYPE I error");
+ strcpybuff(back->r.msg,"TYPE I error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -423,10 +433,11 @@ int run_launch_ftp(lien_back* back) {
back->r.statuscode=-1;
} // sinon on est prêts
} else {
- strcpy(back->r.msg,"Unexpected ftp error");
+ strcpybuff(back->r.msg,"Unexpected ftp error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
+#endif
} else {
sprintf(back->r.msg,"Bad password: %s",linejmp(line));
@@ -453,7 +464,7 @@ int run_launch_ftp(lien_back* back) {
//
#if FTP_PASV
if (SOCaddr_getproto(server, server_size) == '1') {
- strcpy(back->info,"pasv");
+ strcpybuff(back->info,"pasv");
sprintf(line,"PASV");
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
@@ -475,7 +486,7 @@ int run_launch_ftp(lien_back* back) {
c=a; while( (c=strchr(c,',')) ) *c='.'; // remplacer , par .
if (b) *b='\0';
//
- strcpy(adr_ip,a); // copier adresse ip
+ strcpybuff(adr_ip,a); // copier adresse ip
//
if (b) {
a=b+1; // début du port
@@ -506,7 +517,7 @@ int run_launch_ftp(lien_back* back) {
/*
* try epsv (ipv6) *
*/
- strcpy(back->info,"pasv");
+ strcpybuff(back->info,"pasv");
sprintf(line,"EPSV");
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
@@ -564,14 +575,23 @@ int run_launch_ftp(lien_back* back) {
}
// SIZE?
- strcpy(back->info,"size");
+ strcpybuff(back->info,"size");
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
if (line[0]=='2') { // SIZE compris, ALORS tester REST (sinon pas tester: cf probleme des txt.gz decompresses a la volee)
+ char* szstr = strchr(line, ' ');
+ if (szstr) {
+ LLint size = 0;
+ szstr++;
+ if (sscanf(szstr, LLintP, &size) == 1) {
+ back->r.totalsize = size;
+ }
+ }
+
// REST?
if (fexist(back->url_sav) && (transfer_list==0)) {
- strcpy(back->info,"rest");
+ strcpybuff(back->info,"rest");
sprintf(line,"REST "LLintP,(LLint)fsize(back->url_sav));
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
@@ -600,7 +620,7 @@ int run_launch_ftp(lien_back* back) {
memset(&server, 0, sizeof(server));
// infos
- strcpy(back->info,"resolv");
+ strcpybuff(back->info,"resolv");
// résoudre
if (adr_ip[0]) {
@@ -616,7 +636,7 @@ int run_launch_ftp(lien_back* back) {
}
// infos
- strcpy(back->info,"cnxdata");
+ strcpybuff(back->info,"cnxdata");
#if FTP_DEBUG
printf("Data: Connecting to %s:%d...\n", adr_ip, port_pasv);
#endif
@@ -632,8 +652,8 @@ int run_launch_ftp(lien_back* back) {
#else
if (connect(soc_dat, (struct sockaddr *)&server, server_size) != -1) {
#endif
- strcpy(back->info,"retr");
- strcpy(line,line_retr);
+ strcpybuff(back->info,"retr");
+ strcpybuff(line,line_retr);
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
@@ -652,12 +672,12 @@ int run_launch_ftp(lien_back* back) {
#endif
deletesoc(soc_dat); soc_dat=INVALID_SOCKET;
//
- strcpy(back->r.msg,"Unable to connect");
+ strcpybuff(back->r.msg,"Unable to connect");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
} else {
- strcpy(back->r.msg,"Unable to create a socket");
+ strcpybuff(back->r.msg,"Unable to create a socket");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
} // sinon on est prêts
@@ -673,15 +693,15 @@ int run_launch_ftp(lien_back* back) {
} // sinon on est prêts
#else
//T_SOC soc_servdat;
- strcpy(back->info,"listening");
+ strcpybuff(back->info,"listening");
if ( (soc_servdat = get_datasocket(line)) != INVALID_SOCKET) {
_CHECK_HALT_FTP;
send_line(soc_ctl,line); // envoi du RETR
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
if (line[0]=='2') { // ok
- strcpy(back->info,"retr");
- strcpy(line,line_retr);
+ strcpybuff(back->info,"retr");
+ strcpybuff(line,line_retr);
send_line(soc_ctl,line);
get_ftp_line(soc_ctl,line,timeout);
_CHECK_HALT_FTP;
@@ -690,7 +710,7 @@ int run_launch_ftp(lien_back* back) {
struct sockaddr dummyaddr;
int dummylen = sizeof(struct sockaddr);
if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) {
- strcpy(back->r.msg,"Unable to accept connection");
+ strcpybuff(back->r.msg,"Unable to accept connection");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -710,7 +730,7 @@ int run_launch_ftp(lien_back* back) {
close(soc_servdat);
#endif
} else {
- strcpy(back->r.msg,"Unable to listen to a port");
+ strcpybuff(back->r.msg,"Unable to listen to a port");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -725,7 +745,7 @@ int run_launch_ftp(lien_back* back) {
back->r.fp = fopen(fconv(back->url_sav),"ab");
} else
back->r.fp = filecreate(back->url_sav);
- strcpy(back->info,"receiving");
+ strcpybuff(back->info,"receiving");
if (back->r.fp != NULL) {
char buff[1024];
int len=1;
@@ -737,7 +757,7 @@ int run_launch_ftp(lien_back* back) {
len=1; // pas d'erreur pour le moment
switch(wait_socket_receive(soc_dat,timeout)) {
case -1:
- strcpy(back->r.msg,"Read error");
+ strcpybuff(back->r.msg,"FTP read error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
len=0; // fin
@@ -757,21 +777,30 @@ int run_launch_ftp(lien_back* back) {
back->r.size+=len;
HTS_STAT.HTS_TOTAL_RECV+=len;
if (back->r.fp) {
- if ((int) fwrite(buff,1,len,back->r.fp) != len) {
- strcpy(back->r.msg,"Write error");
+ if ((INTsys)fwrite(buff,1,(INTsys)len,back->r.fp) != len) {
+ /*
+ int fcheck;
+ if ((fcheck=check_fatal_io_errno())) {
+ opt->state.exit_xh=-1;
+ }
+ */
+ strcpybuff(back->r.msg,"Write error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
len=0; // error
}
} else {
- strcpy(back->r.msg,"Unexpected write error");
+ strcpybuff(back->r.msg,"Unexpected write error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
} else { // Erreur ou terminé
- //strcpy(back->r.msg,"Read error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=0;
+ if (back->r.totalsize > 0 && back->r.size != back->r.totalsize) {
+ back->r.statuscode=-1;
+ strcpybuff(back->r.msg,"FTP file incomplete");
+ }
}
read_len=1024;
//HTS_TOTAL_RECV_CHECK(read_len); // Diminuer au besoin si trop de données reçues
@@ -782,7 +811,7 @@ int run_launch_ftp(lien_back* back) {
back->r.fp=NULL;
}
} else {
- strcpy(back->r.msg,"Unable to write file");
+ strcpybuff(back->r.msg,"Unable to write file");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -798,7 +827,7 @@ int run_launch_ftp(lien_back* back) {
// récupérer 226 transfer complete
get_ftp_line(soc_ctl,line,timeout);
if (line[0]=='2') { // OK
- strcpy(back->r.msg,"OK");
+ strcpybuff(back->r.msg,"OK");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=200;
} else {
@@ -807,7 +836,7 @@ int run_launch_ftp(lien_back* back) {
back->r.statuscode=-1;
}
} else {
- strcpy(back->r.msg,"Read error");
+ strcpybuff(back->r.msg,"FTP read error");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
}
@@ -823,7 +852,7 @@ int run_launch_ftp(lien_back* back) {
}
_CHECK_HALT_FTP;
- strcpy(back->info,"quit");
+ strcpybuff(back->info,"quit");
send_line(soc_ctl,"QUIT"); // bye bye
get_ftp_line(soc_ctl,NULL,timeout);
#if HTS_WIN
@@ -835,7 +864,7 @@ int run_launch_ftp(lien_back* back) {
if (back->r.statuscode!=-1) {
back->r.statuscode=200;
- strcpy(back->r.msg,"OK");
+ strcpybuff(back->r.msg,"OK");
}
back->status=FTP_STATUS_READY; // fini
return 0;
@@ -887,7 +916,7 @@ T_SOC get_datasocket(char* to_send) {
SOCaddr_inetntoa(dot, 256, server2, sizeof(server2));
//
dots[0]='\0';
- strncat(dots, dot, 128);
+ strncatbuff(dots, dot, 128);
while( (a=strchr(dots,'.')) ) *a=','; // virgules!
while( (a=strchr(dots,':')) ) *a=','; // virgules!
sprintf(to_send,"PORT %s,%d,%d",dots,n1,n2);
@@ -992,7 +1021,7 @@ int get_ftp_line(T_SOC soc,char* line,int timeout) {
// vérifier données
switch(wait_socket_receive(soc,timeout)) {
case -1: // erreur de lecture
- if (line) strcpy(line,"500 *read error");
+ if (line) strcpybuff(line,"500 *read error");
return 0;
break;
case 0:
@@ -1010,7 +1039,7 @@ int get_ftp_line(T_SOC soc,char* line,int timeout) {
data[i++]=b;
break;
default:
- if (line) strcpy(line,"500 *read error");
+ if (line) strcpybuff(line,"500 *read error");
return 0; // error
break;
}
@@ -1041,7 +1070,7 @@ int get_ftp_line(T_SOC soc,char* line,int timeout) {
fprintf(dd,"<--- %s\n",data); fflush(dd);
printf("<--- %s\n",data);
#endif
- if (line) strcpy(line,data);
+ if (line) strcpybuff(line,data);
return (strnotempty(data));
}
@@ -1122,7 +1151,7 @@ int wait_socket_receive(T_SOC soc,int timeout) {
// cancel reçu?
int stop_ftp(lien_back* back) {
if (back->stop_ftp) {
- strcpy(back->r.msg,"Cancelled by User");
+ strcpybuff(back->r.msg,"Cancelled by User");
back->status=FTP_STATUS_READY; // fini
back->r.statuscode=-1;
return 1;
diff --git a/src/htsglobal.h b/src/htsglobal.h
index ce54d3d..38faebc 100644
--- a/src/htsglobal.h
+++ b/src/htsglobal.h
@@ -40,10 +40,10 @@ Please visit our Website: http://www.httrack.com
#define HTTRACK_GLOBAL_DEFH
// Version
-#define HTTRACK_VERSION "3.20-2"
-#define HTTRACK_VERSIONID "3.20.02"
+#define HTTRACK_VERSION "3.30"
+#define HTTRACK_VERSIONID "3.30.01"
#define HTTRACK_AFF_VERSION "3.x"
-//#define HTTRACK_AFF_WARNING "This is a RELEASE CANDIDATE version of WinHTTrack Website Copier 3.0\nPlease report us any bug or problem"
+//#define HTTRACK_AFF_WARNING "This is a BETA release of WinHTTrack Website Copier ("HTTRACK_VERSION")\nPlease report any crashes, bugs or problems"
@@ -51,11 +51,79 @@ Please visit our Website: http://www.httrack.com
#include "htssystem.h"
#include "htsconfig.h"
+// config.h
+#ifdef _WIN32
+
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_SYS_STAT_H 1
+#ifndef DLLIB
+#define DLLIB 1
+#endif
+#ifndef HTS_INET6
+#define HTS_INET6 1
+#endif
+#ifndef S_ISREG
+#define S_ISREG(m) ((m) & _S_IFREG)
+#endif
+
+#else
+
+#include "config.h"
+
+#ifndef FTIME
+#define HTS_DO_NOT_USE_FTIME
+#endif
+
+#ifndef SETUID
+#define HTS_DO_NOT_USE_UID
+#endif
+
+#ifndef HTS_LONGLONG
+#ifdef SIZEOF_LONG_LONG
+#if SIZEOF_LONG_LONG==8
+#define HTS_LONGLONG 1
+#endif
+#endif
+
+#ifndef HTS_LONGLONG
+#ifdef __sun
+#define HTS_LONGLONG 0
+#endif
+#ifdef __osf__
+#define HTS_LONGLONG 0
+#endif
+#ifdef __linux
+#define HTS_LONGLONG 1
+#endif
+#ifdef _WIN32
+#define HTS_LONGLONG 1
+#endif
+#endif
+#endif
+
+#ifdef DLLIB
+#define HTS_DLOPEN 1
+#else
+#define HTS_DLOPEN 0
+#endif
+
+#endif
+
+
// Socket windows ou socket unix
-#if HTS_PLATFORM==1
+#ifdef _WIN32
+#undef HTS_PLATFORM
+#define HTS_PLATFORM 1
#define HTS_WIN 1
+
#else
+
#define HTS_WIN 0
+#ifdef __linux
+#undef HTS_PLATFORM
+#define HTS_PLATFORM 3
+#endif
#endif
// compatibilité DOS
@@ -66,12 +134,10 @@ Please visit our Website: http://www.httrack.com
#endif
// utiliser zlib?
-#if HTS_USEZLIB
-#else
-#ifdef _WINDOWS
+#ifndef HTS_USEZLIB
+// autoload
#define HTS_USEZLIB 1
#endif
-#endif
#ifndef HTS_INET6
#define HTS_INET6 0
@@ -79,28 +145,27 @@ Please visit our Website: http://www.httrack.com
// utiliser openssl?
#ifndef HTS_USEOPENSSL
+// autoload
#define HTS_USEOPENSSL 1
#endif
-#if HTS_WIN
-#else
-#define __cdecl
+#ifndef HTS_DLOPEN
+#define HTS_DLOPEN 1
#endif
-/*
-#if HTS_XGETHOST
-#if HTS_PLATFORM==1
-#ifndef __cplusplus
-#undef HTS_XGMETHOD
-#undef HTS_XGETHOST
-#endif
+#ifndef HTS_USESWF
+#define HTS_USESWF 1
#endif
+
+#if HTS_WIN
#else
-#undef HTS_XGMETHOD
-#undef HTS_XGETHOST
+#define __cdecl
#endif
-*/
+#ifdef HTS_ANALYSTE_CONSOLE
+#undef HTS_ANALYSTE_CONSOLE
+#define HTS_ANALYSTE_CONSOLE 1
+#endif
#if HTS_ANALYSTE
#else
@@ -134,19 +199,25 @@ Please visit our Website: http://www.httrack.com
#define HTS_HTTRACKRC ".httrackrc"
#define HTS_HTTRACKCNF HTS_ETCPATH"/httrack.conf"
-#define HTS_HTTRACKDIR HTS_PREFIX"/doc/httrack/"
+
+#ifdef DATADIR
+#define HTS_HTTRACKDIR DATADIR"/httrack/"
+#else
+#define HTS_HTTRACKDIR HTS_PREFIX"/share/httrack/"
+#endif
#endif
/* Gestion des tables de hashage */
#define HTS_HASH_SIZE 20147
/* Taille max d'une URL */
-#define HTS_URLMAXSIZE 512
+#define HTS_URLMAXSIZE 1024
/* Taille max ligne de commande (>=HTS_URLMAXSIZE*2) */
#define HTS_CDLMAXSIZE 1024
/* Copyright (C) Xavier Roche and other contributors */
-#define HTTRACK_AFF_AUTHORS "[XR&CO'2002]"
+#define HTTRACK_AFF_AUTHORS "[XR&CO'2003]"
#define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->"
+#define HTTRACK_WEB "http://www.httrack.com"
#define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s"
#define H_CRLF "\x0d\x0a"
@@ -175,43 +246,69 @@ Please visit our Website: http://www.httrack.com
#define HTS_INLINE
#endif
+#ifdef _WIN32
+#ifdef LIBHTTRACK_EXPORTS
+#define HTSEXT_API __declspec(dllexport)
+#else
+#define HTSEXT_API __declspec(dllimport)
+#endif
+#else
+#define HTSEXT_API
+#endif
+
+#ifndef HTS_LONGLONG
#ifdef HTS_NO_64_BIT
#define HTS_LONGLONG 0
#else
#define HTS_LONGLONG 1
#endif
+#endif
// long long int? (or int)
// (and int cast for system functions like malloc() )
+
#if HTS_LONGLONG
+#ifdef LLINT_FORMAT
+ typedef LLINT_TYPE LLint;
+ typedef LLINT_TYPE TStamp;
+ #define LLintP LLINT_FORMAT
+#else
#if HTS_WIN
typedef __int64 LLint;
typedef __int64 TStamp;
- typedef int INTsys;
#define LLintP "%I64d"
#else
#if HTS_PLATFORM==0
typedef long long int LLint;
typedef long long int TStamp;
- typedef int INTsys;
#define LLintP "%lld"
#else
typedef long long int LLint;
typedef long long int TStamp;
- typedef int INTsys;
#define LLintP "%Ld"
#endif
#endif
+#endif
#else
typedef int LLint;
- typedef int INTsys;
- typedef double TStamp;
#define LLintP "%d"
+ typedef double TStamp;
+#endif
+
+#ifdef LFS_FLAG
+typedef LLint INTsys;
+#define INTsysP LLintP
+#ifdef __linux
+#define HTS_FSEEKO
+#endif
+#else
+typedef int INTsys;
+#define INTsysP "%d"
#endif
-/* Alignement */
+/* Default alignement */
#ifndef HTS_ALIGN
-#define HTS_ALIGN 4
+#define HTS_ALIGN (sizeof(void*))
#endif
/* IPV4, IPV6 and various unified structures */
@@ -265,6 +362,11 @@ Please visit our Website: http://www.httrack.com
#if HTS_WIN
#else
// use pthreads.h
+
+#ifndef THREADS
+#define HTS_DO_NOT_USE_PTHREAD
+#endif
+
#ifdef HTS_DO_NOT_USE_PTHREAD
#define USE_PTHREAD 0
#else
@@ -283,6 +385,27 @@ Please visit our Website: http://www.httrack.com
#endif
#endif
+#ifdef _DEBUG
+// trace mallocs
+//#define HTS_TRACE_MALLOC
+#ifdef HTS_TRACE_MALLOC
+typedef unsigned long int t_htsboundary;
+typedef struct _mlink {
+ char* adr;
+ int len;
+ int id;
+ struct _mlink* next;
+} mlink;
+static const t_htsboundary htsboundary = 0xDEADBEEF;
+#endif
+#endif
+
+/* strxxx debugging */
+#ifndef NOSTRDEBUG
+#define STRDEBUG 1
+#endif
+
+
/* ------------------------------------------------------------ */
/* Debugging */
/* ------------------------------------------------------------ */
@@ -309,8 +432,6 @@ Please visit our Website: http://www.httrack.com
#define DEBUG_CHECKINT 0
// nbr sockets debug
#define NSDEBUG 0
-// tracer mallocs
-#define HTS_TRACE_MALLOC 0
// débuggage HTSLib
#define HDEBUG 0
diff --git a/src/htshash.c b/src/htshash.c
index b02f2ba..3cbdb5f 100644
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -39,12 +39,21 @@ Please visit our Website: http://www.httrack.com
/* specific definitions */
#include "htsbase.h"
+#include "htsglobal.h"
#include "htsmd5.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* END specific definitions */
+/* Specific macros */
+#ifndef malloct
+#define malloct malloc
+#define freet free
+#define calloct calloc
+#define strcpybuff strcpy
+#endif
+
// GESTION DES TABLES DE HACHAGE
// Méthode à 2 clés (adr+fil), 2e cle facultative
// hash[no_enregistrement][pos]->hash est un index dans le tableau général liens
@@ -53,7 +62,10 @@ Please visit our Website: http://www.httrack.com
#if HTS_HASH
// recherche dans la table selon nom1,nom2 et le no d'enregistrement
// retour: position ou -1 si non trouvé
-int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) {
+ char normfil_[HTS_URLMAXSIZE*2];
+ char* normfil;
+ char* normadr;
unsigned int cle;
int pos;
// calculer la clé de recherche, non modulée
@@ -64,7 +76,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
// la position se calcule en modulant
pos = (int) (cle%HTS_HASH_SIZE);
// entrée trouvée?
- if (hash->hash[type][pos] >= 0) { // un enregistrement avec une telle clé existe..
+ if (hash->hash[type][pos] >= 0) { // un ou plusieurs enregistrement(s) avec une telle clé existe..
// tester table de raccourcis (hash)
// pos est maintenant la position recherchée dans liens
pos = hash->hash[type][pos];
@@ -79,20 +91,42 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
}
break;
case 1: // adr+fil
- if ((strcmp(nom1,jump_identification(hash->liens[pos]->adr))==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) {
+ {
+ if (!normalized)
+ normfil=hash->liens[pos]->fil;
+ else
+ normfil=fil_normalized(hash->liens[pos]->fil,normfil_);
+ if (!normalized)
+ normadr = jump_identification(hash->liens[pos]->adr);
+ else
+ normadr = jump_normalized(hash->liens[pos]->adr);
+ if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) {
#if DEBUG_HASH==2
- printf("hash: found shortcut at %d\n",pos);
+ printf("hash: found shortcut at %d\n",pos);
#endif
- return pos;
+ return pos;
+ }
}
break;
case 2: // former_adr+former_fil
- if (hash->liens[pos]->former_adr)
- if ((strcmp(nom1,jump_identification(hash->liens[pos]->former_adr))==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) {
+ {
+ if (hash->liens[pos]->former_adr) {
+ if (!normalized)
+ normfil=hash->liens[pos]->former_fil;
+ else
+ normfil=fil_normalized(hash->liens[pos]->former_fil,normfil_);
+ if (!normalized)
+ normadr = jump_identification(hash->liens[pos]->former_adr);
+ else
+ normadr = jump_normalized(hash->liens[pos]->former_adr);
+
+ if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) {
#if DEBUG_HASH==2
- printf("hash: found shortcut at %d\n",pos);
+ printf("hash: found shortcut at %d\n",pos);
#endif
- return pos;
+ return pos;
+ }
+ }
}
break;
}
@@ -164,7 +198,9 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
}
// enregistrement lien lpos dans les 3 tables hash1..3
-void hash_write(hash_struct* hash,int lpos) {
+void hash_write(hash_struct* hash,int lpos,int normalized) {
+ char normfil_[HTS_URLMAXSIZE*2];
+ char* normfil;
unsigned int cle;
int pos;
int* ptr;
@@ -185,7 +221,14 @@ void hash_write(hash_struct* hash,int lpos) {
printf("[%d",pos);
#endif
//
- cle = hash_cle(jump_identification(hash->liens[lpos]->adr),hash->liens[lpos]->fil);
+ if (!normalized)
+ normfil=hash->liens[lpos]->fil;
+ else
+ normfil=fil_normalized(hash->liens[lpos]->fil,normfil_);
+ if (!normalized)
+ cle = hash_cle(jump_identification(hash->liens[lpos]->adr),normfil);
+ else
+ cle = hash_cle(jump_normalized(hash->liens[lpos]->adr),normfil);
pos = (int) (cle%HTS_HASH_SIZE);
ptr = hash_calc_chaine(hash,1,pos); // calculer adresse chaine
*ptr = lpos; // noter dernier enregistré
@@ -194,7 +237,14 @@ void hash_write(hash_struct* hash,int lpos) {
#endif
//
if (hash->liens[lpos]->former_adr) { // former_adr existe?
- cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),hash->liens[lpos]->former_fil);
+ if (!normalized)
+ normfil=hash->liens[lpos]->former_fil;
+ else
+ normfil=fil_normalized(hash->liens[lpos]->former_fil,normfil_);
+ if (!normalized)
+ cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),normfil);
+ else
+ cle = hash_cle(jump_normalized(hash->liens[lpos]->former_adr),normfil);
pos = (int) (cle%HTS_HASH_SIZE);
ptr = hash_calc_chaine(hash,2,pos); // calculer adresse chaine
*ptr = lpos; // noter dernier enregistré
@@ -209,6 +259,7 @@ void hash_write(hash_struct* hash,int lpos) {
#if DEBUT_HASH
else {
printf("* hash_write=0!!\n");
+ abortLogFmt("unexpected error in hash_write (pos=%d)" _ pos);
exit(1);
}
#endif
@@ -263,191 +314,3 @@ int* hash_calc_chaine(hash_struct* hash,int type,int pos) {
#endif
// FIN GESTION DES TABLES DE HACHAGE
-
-
-
-
-
-
-
-
-
-
-
-// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
-
-unsigned long int inthash_key(char* value) {
- return md5sum32(value);
-}
-
-// Check for duplicate entry (==1 : added)
-int inthash_write(inthash hashtable,char* name,long int value) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain* h=hashtable->hash[pos];
- while (h) {
- if (strcmp(h->name,name)==0) {
- h->value.intg=value;
- return 0;
- }
- h=h->next;
- }
- // Not found, add it!
- inthash_add(hashtable,name,value);
- return 1;
-}
-
-// Increment pos value, create one if necessary (=0)
-// (==1 : created)
-int inthash_inc(inthash hashtable,char* name) {
- long int value=0;
- int r=0;
- if (inthash_read(hashtable,name,&value)) {
- value++;
- }
- else { /* create new value */
- value=0;
- r=1;
- }
- inthash_write(hashtable,name,value);
- return (r);
-}
-
-
-// Does not check for duplicate entry
-void inthash_add(inthash hashtable,char* name,long int value) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain** h=&hashtable->hash[pos];
-
- while (*h)
- h=&((*h)->next);
- *h=(inthash_chain*)calloc(1,
- sizeof(inthash_chain)
- +
- strlen(name)+2
- );
- if (*h) {
- (*h)->name=((char*)(*h)) + sizeof(inthash_chain);
- (*h)->next=NULL;
- strcpy((*h)->name,name);
- (*h)->value.intg=value;
- }
-}
-
-void* inthash_addblk(inthash hashtable,char* name,int blksize) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain** h=&hashtable->hash[pos];
-
- while (*h)
- h=&((*h)->next);
- *h=(inthash_chain*)calloc(1,
- sizeof(inthash_chain)
- +
- strlen(name)+2
- +
- blksize
- );
- if (*h) {
- (*h)->name = ((char*)(*h)) + sizeof(inthash_chain);
- (*h)->next=NULL;
- strcpy((*h)->name,name);
- (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2;
- return (void*)(*h)->value.intg;
- }
- return NULL;
-}
-
-int inthash_read(inthash hashtable,char* name,long int* value) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain* h=hashtable->hash[pos];
- while (h) {
- if (strcmp(h->name,name)==0) {
- *value=h->value.intg;
- return 1;
- }
- h=h->next;
- }
- return 0;
-}
-
-void inthash_init(inthash hashtable) {
- unsigned int i;
- for(i=0;i<hashtable->hash_size;i++) {
- hashtable->hash[i]=NULL;
- }
-}
-
-void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) {
- if (hash) {
- inthash_delchain(hash->next,free_handler);
- if (free_handler) { // pos is a malloc() block, delete it!
- if (hash->value.intg) {
- if (free_handler)
- free_handler((void*)hash->value.intg);
- else
- free((void*)hash->value.intg);
- }
- hash->value.intg=0;
- }
- free(hash);
- }
-}
-
-void inthash_default_free_handler(void* value) {
- if (value)
- free(value);
-}
-
-// --
-
-inthash inthash_new(int size) {
- inthash hashtable=(inthash)calloc(1,sizeof(struct_inthash));
- if (hashtable) {
- hashtable->hash_size=0;
- hashtable->flag_valueismalloc=0;
- if ((hashtable->hash=(inthash_chain**)calloc(size,sizeof(inthash_chain*)))) {
- hashtable->hash_size=size;
- inthash_init(hashtable);
- }
- }
- return hashtable;
-}
-
-int inthash_created(inthash hashtable) {
- if (hashtable)
- if (hashtable->hash)
- return 1;
- return 0;
-}
-
-void inthash_value_is_malloc(inthash hashtable,int flag) {
- hashtable->flag_valueismalloc=flag;
-}
-
-void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) {
- hashtable->free_handler = free_handler;
-}
-
-void inthash_delete(inthash* hashtable) {
- if (hashtable) {
- if (*hashtable) {
- if ((*hashtable)->hash) {
- unsigned int i;
- t_inthash_freehandler free_handler=NULL;
- if ( (*hashtable)->flag_valueismalloc ) {
- if ( (*hashtable)->free_handler )
- free_handler=(*hashtable)->free_handler;
- else
- free_handler=inthash_default_free_handler;
- }
- for(i=0;i<(*hashtable)->hash_size;i++) {
- inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler);
- (*hashtable)->hash[i]=NULL;
- }
- }
- free(*hashtable);
- *hashtable=NULL;
- }
- }
-}
-
-
diff --git a/src/htshash.h b/src/htshash.h
index 9a54710..c4acff1 100644
--- a/src/htshash.h
+++ b/src/htshash.h
@@ -43,62 +43,9 @@ Please visit our Website: http://www.httrack.com
#include "htscore.h"
// tables de hashage
-int hash_read(hash_struct* hash,char* nom1,char* nom2,int type);
-void hash_write(hash_struct* hash,int lpos);
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized);
+void hash_write(hash_struct* hash,int lpos,int normalized);
int* hash_calc_chaine(hash_struct* hash,int type,int pos);
unsigned long int hash_cle(char* nom1,char* nom2);
-
-
-
-// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
-
-// simple hash table for other routines
-typedef struct inthash_chain {
- char* name; /* key (name) */
- union {
- unsigned long int intg; /* integer value */
- void* ptr; /* ptr value */
- } value;
- struct inthash_chain* next; /* next element */
-} inthash_chain;
-
-// structure behind inthash
-typedef void (* t_inthash_freehandler)(void* value);
-typedef struct {
- inthash_chain** hash;
- t_inthash_freehandler free_handler;
- unsigned int hash_size;
- unsigned short flag_valueismalloc;
-} struct_inthash;
-
-// main inthash type
-typedef struct_inthash* inthash;
-
-// subfunctions
-unsigned long int inthash_key(char* value);
-void inthash_init(inthash hashtable);
-void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler);
-void inthash_default_free_handler(void* value);
-
-// main functions:
-
-
-/* Hash functions: */
-inthash inthash_new(int size); /* Create a new hash table */
-int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */
-void inthash_delete(inthash* hashtable); /* Delete an hash table */
-void inthash_value_is_malloc(inthash hashtable,int flag); /* Is the 'value' member a value that needs to be free()'ed ? */
-void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */
- t_inthash_freehandler free_handler);
-/* */
-int inthash_read(inthash hashtable,char* name,long int* value); /* Read entry from the hash table */
-/* */
-void inthash_add(inthash hashtable,char* name,long int value); /* Add entry in the hash table */
-void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */
-int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */
-int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */
-/* End of hash functions: */
-
-
#endif
diff --git a/src/htshelp.c b/src/htshelp.c
index 3d743fe..7046929 100644
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -48,8 +48,10 @@ Please visit our Website: http://www.httrack.com
#include <string.h>
#if HTS_WIN
#else
+#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#endif
/* END specific definitions */
#define waitkey if (more) { char s[4]; printf("\nMORE.. q to quit\n"); linput(stdin,s,4); if (strcmp(s,"q")==0) quit=1; else printf("Page %d\n\n",++m); }
@@ -115,15 +117,15 @@ void infomsg(char* msg) {
}
}
void help_wizard(httrackp* opt) {
- char* urls = (char*) malloc(HTS_URLMAXSIZE*2);
- char* mainpath = (char*) malloc(256);
- char* projname = (char*) malloc(256);
- char* stropt = (char*) malloc(2048); // options
- char* stropt2 = (char*) malloc(2048); // options longues
- char* strwild = (char*) malloc(2048); // wildcards
- char* cmd = (char*) malloc(4096);
- char* str = (char*) malloc(256);
- char** argv = (char**) malloc(256 * sizeof(char*));
+ char* urls = (char*) malloct(HTS_URLMAXSIZE*2);
+ char* mainpath = (char*) malloct(256);
+ char* projname = (char*) malloct(256);
+ char* stropt = (char*) malloct(2048); // options
+ char* stropt2 = (char*) malloct(2048); // options longues
+ char* strwild = (char*) malloct(2048); // wildcards
+ char* cmd = (char*) malloct(4096);
+ char* str = (char*) malloct(256);
+ char** argv = (char**) malloct(256 * sizeof(char*));
//
char* a;
//
@@ -134,12 +136,12 @@ void help_wizard(httrackp* opt) {
}
urls[0] = mainpath[0] = projname[0] = stropt[0] = stropt2[0] = strwild[0] = cmd[0] = str[0] = '\0';
//
- strcpy(stropt,"-");
+ strcpybuff(stropt,"-");
mainpath[0]=projname[0]=stropt2[0]=strwild[0]='\0';
//
printf("\n");
- printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"\n");
+ printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", WHAT_is_available);
printf("Copyright (C) Xavier Roche and other contributors\n");
#ifdef _WIN32
printf("Note: You are running the commandline version,\n");
@@ -174,13 +176,13 @@ void help_wizard(httrackp* opt) {
printf("\nBase path (return=current directory) :");
linput(stdin,str,250);
if (!strnotempty(str)) {
- strcat(str,hts_gethome());
- strcat(str,"/websites/");
+ strcatbuff(str,hts_gethome());
+ strcatbuff(str,"/websites/");
}
if (strnotempty(str))
if ((str[strlen(str)-1]!='/') && (str[strlen(str)-1]!='\\'))
- strcat(str,"/");
- strcat(stropt2,"-O \""); strcat(stropt2,str); strcat(stropt2,projname); strcat(stropt2,"\" ");
+ strcatbuff(str,"/");
+ strcatbuff(stropt2,"-O \""); strcatbuff(stropt2,str); strcatbuff(stropt2,projname); strcatbuff(stropt2,"\" ");
// Créer si ce n'est fait un index.html 1er niveau
make_empty_index(str);
//
@@ -196,11 +198,11 @@ void help_wizard(httrackp* opt) {
printf("\nAction:\n");
switch(help_query("Mirror Web Site(s)|Mirror Web Site(s) with Wizard|Just Get Files Indicated|Mirror ALL links in URLs (Multiple Mirror)|Test Links In URLs (Bookmark Test)|Update/Continue a Mirror",1)) {
case 1: break;
- case 2: strcat(stropt,"W"); break;
- case 3: strcat(stropt2,"--get "); break;
- case 4: strcat(stropt2,"--mirrorlinks "); break;
- case 5: strcat(stropt2,"--testlinks "); break;
- case 6: strcat(stropt2,"--update "); break;
+ case 2: strcatbuff(stropt,"W"); break;
+ case 3: strcatbuff(stropt2,"--get "); break;
+ case 4: strcatbuff(stropt2,"--mirrorlinks "); break;
+ case 5: strcatbuff(stropt2,"--testlinks "); break;
+ case 6: strcatbuff(stropt2,"--update "); break;
case 0: return; break;
}
@@ -213,17 +215,17 @@ void help_wizard(httrackp* opt) {
char str2[256];
printf("\nProxy port (return=8080) :");
linput(stdin,str2,250);
- strcat(str,":");
+ strcatbuff(str,":");
if (strnotempty(str2)==0)
- strcat(str,"8080");
+ strcatbuff(str,"8080");
else
- strcat(str,str2);
+ strcatbuff(str,str2);
}
- strcat(stropt2,"-P "); strcat(stropt2,str); strcat(stropt2," ");
+ strcatbuff(stropt2,"-P "); strcatbuff(stropt2,str); strcatbuff(stropt2," ");
}
// Display
- strcat(stropt2," -%v ");
+ strcatbuff(stropt2," -%v ");
// Wildcards
printf("\nYou can define wildcards, like: -*.gif +www.*.com/*.zip -*img_*.zip\n");
@@ -239,8 +241,8 @@ void help_wizard(httrackp* opt) {
if (strfield2(str,"help")) {
help("httrack",2);
} else if (strnotempty(str)) {
- strcat(stropt2,str);
- strcat(stropt2," ");
+ strcatbuff(stropt2,str);
+ strcatbuff(stropt2," ");
}
} while(strfield2(str,"help"));
@@ -288,14 +290,14 @@ void help_wizard(httrackp* opt) {
}
/* Free buffers */
- free(urls);
- free(mainpath);
- free(projname);
- free(stropt);
- free(stropt2);
- free(strwild);
- free(cmd);
- free(str);
+ freet(urls);
+ freet(mainpath);
+ freet(projname);
+ freet(stropt);
+ freet(stropt2);
+ freet(strwild);
+ freet(cmd);
+ freet(str);
}
int help_query(char* list,int def) {
char s[256];
@@ -309,7 +311,7 @@ int help_query(char* list,int def) {
char str[256];
str[0]='\0';
//
- strncat(str,a,(int) (b - a));
+ strncatbuff(str,a,(int) (b - a));
if (n==def)
printf("(enter)\t%d\t%s\n",n++,str);
else
@@ -394,12 +396,13 @@ void make_empty_index(char* str) {
// mini-aide (h: help)
// y
void help(char* app,int more) {
+ char info[2048];
infomsg("");
if (more)
infomsg("1");
if (more != 2) {
- char info[2048];
- infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")");
+ sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available);
+ infomsg(info);
#ifdef HTTRACK_AFF_WARNING
infomsg("NOTE: "HTTRACK_AFF_WARNING);
#endif
@@ -424,6 +427,7 @@ void help(char* app,int more) {
infomsg("Proxy options:");
infomsg(" P proxy use (-P proxy:port or -P user:pass@proxy:port)");
infomsg(" %f *use proxy for ftp (f0 don't use)");
+ infomsg(" %b use this local hostname to make/send requests (-%b hostname)");
infomsg("");
infomsg("Limits options:");
infomsg(" rN set the mirror depth to N (* r9999)");
@@ -448,10 +452,12 @@ void help(char* app,int more) {
infomsg(" n get non-html files 'near' an html file (ex: an image located outside)");
infomsg(" t test all URLs (even forbidden ones)");
infomsg(" %L <file> add all URL located in this text file (one URL per line)");
+ infomsg(" %S <file> add all scan rules located in this text file (one scan rule per line)");
infomsg("");
infomsg("Build options:");
infomsg(" NN structure type (0 *original structure, 1+: see below)");
infomsg(" or user defined structure (-N \"%h%p/%n%q.%t\")");
+ infomsg(" %M generate a RFC MIME-encapsulated full-archive (.mht)");
infomsg(" LN long names (L1 *long names / L0 8-3 conversion / L2 ISO9660 compatible)");
infomsg(" KN keep original links (e.g. http://www.adr/link) (K0 *relative link, K absolute links, K4 original links, K3 absolute URI links)");
infomsg(" x replace external html links by error pages");
@@ -467,8 +473,10 @@ void help(char* app,int more) {
infomsg(" j *parse Java Classes (j0 don't parse)");
infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always)");
infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)");
+ infomsg(" %k use keep-alive if possible, greately reducing latency for small files and test requests (%k0 don't use)");
infomsg(" %B tolerant requests (accept bogus responses on some servers, but not standard!)");
infomsg(" %s update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..)");
+ infomsg(" %u url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..)");
infomsg(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)");
infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD);
infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)");
@@ -491,6 +499,7 @@ void help(char* app,int more) {
infomsg(" f *log in files");
infomsg(" f2 one single log file");
infomsg(" I *make an index (I0 don't make)");
+ infomsg(" %i make a top index for a project folder (* %i0 don't make)");
infomsg(" %I make an searchable index for this mirror (* %I0 don't make)");
infomsg("");
infomsg("Expert options:");
@@ -511,23 +520,26 @@ void help(char* app,int more) {
infomsg(" %H debug HTTP headers in logfile");
infomsg("");
infomsg("Guru options: (do NOT use if possible)");
- infomsg(" #0 Filter test (-#0 '*.gif' 'www.bar.com/foo.gif')");
- infomsg(" #f Always flush log files");
- infomsg(" #FN Maximum number of filters");
- infomsg(" #h Version info");
- infomsg(" #K Scan stdin (debug)");
- infomsg(" #L Maximum number of links (-#L1000000)");
- infomsg(" #p Display ugly progress information");
- infomsg(" #P Catch URL");
- infomsg(" #R Old FTP routines (debug)");
- infomsg(" #T Generate transfer ops. log every minutes");
- infomsg(" #u Wait time");
- infomsg(" #Z Generate transfer rate statictics every minutes");
- infomsg(" #! Execute a shell command (-#! \"echo hello\")");
+ infomsg(" #X *use optimized engine (limited memory boundary checks)");
+ infomsg(" #0 filter test (-#0 '*.gif' 'www.bar.com/foo.gif')");
+ infomsg(" #C cache list (-#C '*.com/spider*.gif'");
+ infomsg(" #f always flush log files");
+ infomsg(" #FN maximum number of filters");
+ infomsg(" #h version info");
+ infomsg(" #K scan stdin (debug)");
+ infomsg(" #L maximum number of links (-#L1000000)");
+ infomsg(" #p display ugly progress information");
+ infomsg(" #P catch URL");
+ infomsg(" #R old FTP routines (debug)");
+ infomsg(" #T generate transfer ops. log every minutes");
+ infomsg(" #u wait time");
+ infomsg(" #Z generate transfer rate statictics every minutes");
+ infomsg(" #! execute a shell command (-#! \"echo hello\")");
infomsg("");
infomsg("Command-line specific options:");
infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")");
infomsg(" %U run the engine with another id when called as root (-%U smith)");
+ infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction)");
/* infomsg(" %O do a chroot before setuid"); */
infomsg("");
infomsg("Details: Option N");
@@ -562,6 +574,15 @@ void help(char* app,int more) {
infomsg(" '%q' small query string MD5 (16 bits, 4 ascii bytes)");
infomsg(" '%s?' Short name version (ex: %sN)");
infomsg(" '%[param]' param variable in query string");
+ infomsg(" '%[param:before:after:notfound:empty]' advanced variable extraction");
+ infomsg("Details: User-defined option N and advanced variable extraction");
+ infomsg(" %[param:before:after:notfound:empty]");
+ infomsg(" param : parameter name");
+ infomsg(" before : string to prepend if the parameter was found");
+ infomsg(" after : string to append if the parameter was found");
+ infomsg(" notfound : string replacement if the parameter could not be found");
+ infomsg(" empty : string replacement if the parameter was empty");
+ infomsg(" all fields, except the first one (the parameter name), can be empty");
infomsg("");
infomsg("Details: Option K");
infomsg(" K0 foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, default)");
@@ -586,6 +607,24 @@ void help(char* app,int more) {
infomsg("");
infomsg("--http10 force http/1.0 requests (-%h)");
infomsg("");
+ infomsg("Details: Option %W: External callbacks prototypes");
+ infomsg("'init' : void (* myfunction)(void);");
+ infomsg("'free' : void (* myfunction)(void);");
+ infomsg("'start' : int (* myfunction)(httrackp* opt);");
+ infomsg("'end' : int (* myfunction)(void);");
+ infomsg("'change-options' : int (* myfunction)(httrackp* opt);");
+ infomsg("'check-html' : int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);");
+ infomsg("'query' : char* (* myfunction)(char* question);");
+ infomsg("'query2' : char* (* myfunction)(char* question);");
+ infomsg("'query3' : char* (* myfunction)(char* question);");
+ infomsg("'loop' : int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);");
+ infomsg("'check-link' : int (* myfunction)(char* adr,char* fil,int status);");
+ infomsg("'pause' : void (* myfunction)(char* lockfile);");
+ infomsg("'save-file' : void (* myfunction)(char* file);");
+ infomsg("'link-detected' : int (* myfunction)(char* link);");
+ infomsg("'transfer-status' : int (* myfunction)(lien_back* back);");
+ infomsg("'save-name' : int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);");
+ infomsg("");
infomsg("");
infomsg("example: httrack www.someweb.com/bob/");
infomsg("means: mirror site www.someweb.com/bob/ and only this site");
@@ -608,7 +647,8 @@ void help(char* app,int more) {
infomsg("example: httrack --continue");
infomsg("continues a mirror in the current folder");
infomsg("");
- infomsg("HTTrack version "HTTRACK_VERSION" (compiled "__DATE__")");
+ sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available);
+ infomsg(info);
infomsg("Copyright (C) Xavier Roche and other contributors");
#ifdef HTS_PLATFORM_NAME
infomsg("[compiled: "HTS_PLATFORM_NAME"]");
diff --git a/src/htsindex.c b/src/htsindex.c
index 5a66724..1a75103 100644
--- a/src/htsindex.c
+++ b/src/htsindex.c
@@ -44,6 +44,7 @@ Please visit our Website: http://www.httrack.com
#if HTS_MAKE_KEYWORD_INDEX
#include "htshash.h"
+#include "htsinthash.h"
/* Keyword Indexer Parameters */
@@ -92,7 +93,7 @@ Please visit our Website: http://www.httrack.com
// Words begining with these (accepted) characters will be ignored
#define KEYW_NOT_BEG "0123456789"
// Treat these characters as space characters - MUST NOT BE EMPTY!!!
-#define KEYW_SPACE " ',;:!?\"\x0d\x0a\x09\x0c"
+#define KEYW_SPACE " ',;:!?\"\x0d\x0a\x09\x0b\x0c"
// Common words (the,for..) detector
// If a word represents more than KEYW_USELESS1K (%1000) of total words, then ignore it
// 5 (0.5%)
@@ -184,6 +185,8 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char*
|| (strfield2(mime,"text/css"))
) {
inscript=1;
+ //} else if (strfield2(mime, "text/vnd.wap.wml")) { // humm won't work in many cases
+ // inscript=0;
} else
return 0;
@@ -299,9 +302,9 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char*
e++; /* 0 means "once" */
if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper
- strcpy(savelst,filename+strlen(indexpath));
+ strcpybuff(savelst,filename+strlen(indexpath));
else
- strcpy(savelst,filename);
+ strcpybuff(savelst,filename);
// Add entry for this file and word
fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst);
@@ -331,7 +334,7 @@ void index_finish(const char* indexpath,int mode) {
#if HTS_MAKE_KEYWORD_INDEX
char** tab;
char* blk;
- int size;
+ INTsys size;
size=fpsize(fp_tmpproject);
if (size>0) {
@@ -342,7 +345,7 @@ void index_finish(const char* indexpath,int mode) {
blk = malloct(size+4);
if (blk) {
fseek(fp_tmpproject,0,SEEK_SET);
- if ((int)fread(blk,1,size,fp_tmpproject) == size) {
+ if ((INTsys)fread(blk,1,size,fp_tmpproject) == size) {
char *a=blk,*b;
int index=0;
int i;
@@ -430,7 +433,7 @@ void index_finish(const char* indexpath,int mode) {
fprintf(fp,"<tr>\r\n<td>%s</td>\r\n<td>\r\n",word);
}
fflush(fp); last_pos=ftell(fp);
- strcpy(current_word,word);
+ strcpybuff(current_word,word);
total_hit=total_line=0;
}
total_hit+=hit;
diff --git a/src/htsinthash.c b/src/htsinthash.c
new file mode 100644
index 0000000..95b8711
--- /dev/null
+++ b/src/htsinthash.c
@@ -0,0 +1,252 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* hash table system (fast index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include "htsinthash.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsglobal.h"
+#include "htsmd5.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/* END specific definitions */
+
+/* Specific macros */
+#ifndef malloct
+#define malloct malloc
+#define freet free
+#define calloct calloc
+#define strcpybuff strcpy
+#endif
+
+// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
+
+unsigned long int inthash_key(char* value) {
+ return md5sum32(value);
+}
+
+// Check for duplicate entry (==1 : added)
+int inthash_write(inthash hashtable,char* name,long int value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain* h=hashtable->hash[pos];
+ while (h) {
+ if (strcmp(h->name,name)==0) {
+ /* Delete element */
+ if (hashtable->flag_valueismalloc) {
+ if (h->value.intg) {
+ if (hashtable->free_handler)
+ hashtable->free_handler((void*)h->value.intg);
+ else
+ freet((void*)h->value.intg);
+ }
+ }
+ /* Insert */
+ h->value.intg=value;
+ return 0;
+ }
+ h=h->next;
+ }
+ // Not found, add it!
+ inthash_add(hashtable,name,value);
+ return 1;
+}
+
+// Increment pos value, create one if necessary (=0)
+// (==1 : created)
+int inthash_inc(inthash hashtable,char* name) {
+ long int value=0;
+ int r=0;
+ if (inthash_read(hashtable,name,&value)) {
+ value++;
+ }
+ else { /* create new value */
+ value=0;
+ r=1;
+ }
+ inthash_write(hashtable,name,value);
+ return (r);
+}
+
+
+// Does not check for duplicate entry
+void inthash_add(inthash hashtable,char* name,long int value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain** h=&hashtable->hash[pos];
+
+ while (*h)
+ h=&((*h)->next);
+ *h=(inthash_chain*)calloct(1,
+ sizeof(inthash_chain)
+ +
+ strlen(name)+2
+ );
+ if (*h) {
+ (*h)->name=((char*)(*h)) + sizeof(inthash_chain);
+ (*h)->next=NULL;
+ strcpybuff((*h)->name,name);
+ (*h)->value.intg=value;
+ }
+}
+
+void* inthash_addblk(inthash hashtable,char* name,int blksize) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain** h=&hashtable->hash[pos];
+
+ while (*h)
+ h=&((*h)->next);
+ *h=(inthash_chain*)calloct(1,
+ sizeof(inthash_chain)
+ +
+ strlen(name)+2
+ +
+ blksize
+ );
+ if (*h) {
+ (*h)->name = ((char*)(*h)) + sizeof(inthash_chain);
+ (*h)->next=NULL;
+ strcpybuff((*h)->name,name);
+ (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2;
+ return (void*)(*h)->value.intg;
+ }
+ return NULL;
+}
+
+int inthash_read(inthash hashtable,char* name,long int* value) {
+ int pos = (inthash_key(name) % hashtable->hash_size);
+ inthash_chain* h=hashtable->hash[pos];
+ while (h) {
+ if (strcmp(h->name,name)==0) {
+ *value=h->value.intg;
+ return 1;
+ }
+ h=h->next;
+ }
+ return 0;
+}
+
+int inthash_readptr(inthash hashtable,char* name,long int* value) {
+ int ret;
+ *value = 0;
+ ret = inthash_read(hashtable, name, value);
+ if (*value == 0)
+ ret = 0;
+ return ret;
+}
+
+void inthash_init(inthash hashtable) {
+ unsigned int i;
+ for(i=0;i<hashtable->hash_size;i++) {
+ hashtable->hash[i]=NULL;
+ }
+}
+
+void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) {
+ if (hash) {
+ inthash_delchain(hash->next,free_handler);
+ if (free_handler) { // pos is a malloc() block, delete it!
+ if (hash->value.intg) {
+ if (free_handler)
+ free_handler((void*)hash->value.intg);
+ else
+ freet((void*)hash->value.intg);
+ }
+ hash->value.intg=0;
+ }
+ freet(hash);
+ }
+}
+
+void inthash_default_free_handler(void* value) {
+ if (value)
+ freet(value);
+}
+
+// --
+
+inthash inthash_new(int size) {
+ inthash hashtable=(inthash)calloct(1,sizeof(struct_inthash));
+ if (hashtable) {
+ hashtable->hash_size=0;
+ hashtable->flag_valueismalloc=0;
+ if ((hashtable->hash=(inthash_chain**)calloct(size,sizeof(inthash_chain*)))) {
+ hashtable->hash_size=size;
+ inthash_init(hashtable);
+ }
+ }
+ return hashtable;
+}
+
+int inthash_created(inthash hashtable) {
+ if (hashtable)
+ if (hashtable->hash)
+ return 1;
+ return 0;
+}
+
+void inthash_value_is_malloc(inthash hashtable,int flag) {
+ hashtable->flag_valueismalloc=flag;
+}
+
+void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) {
+ hashtable->free_handler = free_handler;
+}
+
+void inthash_delete(inthash* hashtable) {
+ if (hashtable) {
+ if (*hashtable) {
+ if ((*hashtable)->hash) {
+ unsigned int i;
+ t_inthash_freehandler free_handler=NULL;
+ if ( (*hashtable)->flag_valueismalloc ) {
+ if ( (*hashtable)->free_handler )
+ free_handler=(*hashtable)->free_handler;
+ else
+ free_handler=inthash_default_free_handler;
+ }
+ for(i=0;i<(*hashtable)->hash_size;i++) {
+ inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler);
+ (*hashtable)->hash[i]=NULL;
+ }
+ freet((*hashtable)->hash);
+ (*hashtable)->hash = NULL;
+ }
+ freet(*hashtable);
+ *hashtable=NULL;
+ }
+ }
+}
diff --git a/src/htsinthash.h b/src/htsinthash.h
new file mode 100644
index 0000000..c667cd4
--- /dev/null
+++ b/src/htsinthash.h
@@ -0,0 +1,94 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: httrack.c subroutines: */
+/* hash table system (fast index) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+
+#ifndef HTSINTHASH_DEFH
+#define HTSINTHASH_DEFH
+
+// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
+
+// simple hash table for other routines
+typedef struct inthash_chain {
+ char* name; /* key (name) */
+ union {
+ unsigned long int intg; /* integer value */
+ void* ptr; /* ptr value */
+ } value;
+ struct inthash_chain* next; /* next element */
+} inthash_chain;
+
+// structure behind inthash
+typedef void (* t_inthash_freehandler)(void* value);
+typedef struct {
+ inthash_chain** hash;
+ t_inthash_freehandler free_handler;
+ unsigned int hash_size;
+ unsigned short flag_valueismalloc;
+} struct_inthash;
+
+// main inthash type
+typedef struct_inthash* inthash;
+
+// subfunctions
+unsigned long int inthash_key(char* value);
+void inthash_init(inthash hashtable);
+void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler);
+void inthash_default_free_handler(void* value);
+
+// main functions:
+
+
+/* Hash functions: */
+inthash inthash_new(int size); /* Create a new hash table */
+int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */
+void inthash_delete(inthash* hashtable); /* Delete an hash table */
+void inthash_value_is_malloc(inthash hashtable,int flag); /* Is the 'value' member a value that needs to be free()'ed ? */
+void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */
+ t_inthash_freehandler free_handler);
+/* */
+int inthash_read(inthash hashtable,char* name,long int* value); /* Read entry from the hash table */
+int inthash_readptr(inthash hashtable,char* name,long int* value); /* Same function, but returns 0 upon null ptr */
+/* */
+void inthash_add(inthash hashtable,char* name,long int value); /* Add entry in the hash table */
+void* inthash_addblk(inthash hashtable,char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */
+int inthash_write(inthash hashtable,char* name,long int value); /* Overwrite/add entry in the hash table */
+int inthash_inc(inthash hashtable,char* name); /* Increment entry in the hash table */
+/* End of hash functions: */
+
+
+#endif
diff --git a/src/htsjava.c b/src/htsjava.c
index bb29692..afb166b 100644
--- a/src/htsjava.c
+++ b/src/htsjava.c
@@ -41,8 +41,9 @@ Please visit our Website: http://www.httrack.com
// htsjava.c - Parseur de classes java
#include "stdio.h"
-#include "htssystem.h"
+#include "htsglobal.h"
#include "htscore.h"
+
#include "htsjava.h"
#include <stdio.h>
@@ -53,11 +54,10 @@ Please visit our Website: http://www.httrack.com
//#include <math.h>
-#ifndef HTS_LITTLE_ENDIAN
-#define REVERSE_ENDIAN 1
-#else
-#define REVERSE_ENDIAN 0
-#endif
+static int reverse_endian(void) {
+ int endian = 1;
+ return ( * ( (char*) &endian) == 1);
+}
/* big/little endian swap */
#define hts_swap16(A) ( (((A) & 0xFF)<<8) | (((A) & 0xFF00)>>8) )
@@ -80,19 +80,33 @@ Please visit our Website: http://www.httrack.com
#define JAVADEBUG 0
-int hts_parse_java(char *file,char* err_msg)
+int hts_detect_java(htsmoduleStruct* str) {
+ char* savename = str->filename;
+ if (savename) {
+ int len = (int) strlen(savename);
+ if (len > 6 && strfield(savename + len - 6,".class")) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int hts_parse_java(htsmoduleStruct* str)
{
FILE *fpout;
JAVA_HEADER header;
RESP_STRUCT *tab;
+ char* file = str->filename;
+ str->relativeToHtmlLink = 1;
+
#if JAVADEBUG
printf("fopen\n");
#endif
if ((fpout = fopen(fconv(file), "r+b")) == NULL)
{
//fprintf(stderr, "Cannot open input file.\n");
- sprintf(err_msg,"Unable to open file %s",file);
+ sprintf(str->err_msg,"Unable to open file %s",file);
return 0; // une erreur..
}
@@ -102,7 +116,7 @@ int hts_parse_java(char *file,char* err_msg)
//if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet..
if (fread(&header,1,10,fpout) != 10) { // pas complet..
fclose(fpout);
- sprintf(err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file));
+ sprintf(str->err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file));
return 0;
}
@@ -110,19 +124,19 @@ int hts_parse_java(char *file,char* err_msg)
printf("header\n");
#endif
// tester en tête
-#if REVERSE_ENDIAN
- header.magic = hts_swap32(header.magic);
- header.count = hts_swap16(header.count);
-#endif
+ if (reverse_endian()) {
+ header.magic = hts_swap32(header.magic);
+ header.count = hts_swap16(header.count);
+ }
if(header.magic!=0xCAFEBABE) {
- sprintf(err_msg,"non java file");
+ sprintf(str->err_msg,"non java file");
if (fpout) { fclose(fpout); fpout=NULL; }
return 0;
}
tab =(RESP_STRUCT*)calloct(header.count,sizeof(RESP_STRUCT));
if (!tab) {
- sprintf(err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT));
+ sprintf(str->err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT));
if (fpout) { fclose(fpout); fpout=NULL; }
return 0; // erreur..
}
@@ -135,12 +149,12 @@ int hts_parse_java(char *file,char* err_msg)
for (i = 1; i < header.count; i++) {
int err=0; // ++
- tab[i]=readtable(fpout,tab[i],&err,err_msg);
+ tab[i]=readtable(str,fpout,tab[i],&err);
if (!err) {
if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float
} else { // ++ une erreur est survenue!
- if (strnotempty(err_msg)==0)
- strcpy(err_msg,"Internal readtable error");
+ if (strnotempty(str->err_msg)==0)
+ strcpybuff(str->err_msg,"Internal readtable error");
freet(tab);
if (fpout) { fclose(fpout); fpout=NULL; }
return 0;
@@ -180,7 +194,7 @@ int hts_parse_java(char *file,char* err_msg)
printf("add %s\n",tempo);
#endif
if (tab[tab[i].index1].file_position >= 0)
- hts_add_file(tempo,tab[tab[i].index1].file_position);
+ str->addLink(str,tempo); /* tab[tab[i].index1].file_position */
}
}
@@ -205,7 +219,8 @@ int hts_parse_java(char *file,char* err_msg)
// error: !=0 si erreur fatale
-RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
+RESP_STRUCT readtable(htsmoduleStruct* str,
+ FILE *fp, RESP_STRUCT trans, int* error)
{
unsigned short int length;
int j;
@@ -214,54 +229,54 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
trans.type = (int)(unsigned char)fgetc(fp);
switch (trans.type) {
case HTS_CLASS:
- strcpy(trans.name,"Class");
+ strcpybuff(trans.name,"Class");
trans.index1 = readshort(fp);
break;
case HTS_FIELDREF:
- strcpy(trans.name,"Field Reference");
+ strcpybuff(trans.name,"Field Reference");
trans.index1 = readshort(fp);
readshort(fp);
break;
case HTS_METHODREF:
- strcpy(trans.name,"Method Reference");
+ strcpybuff(trans.name,"Method Reference");
trans.index1 = readshort(fp);
readshort(fp);
break;
case HTS_INTERFACE:
- strcpy(trans.name,"Interface Method Reference");
+ strcpybuff(trans.name,"Interface Method Reference");
trans.index1 =readshort(fp);
readshort(fp);
break;
case HTS_NAMEANDTYPE:
- strcpy(trans.name,"Name and Type");
+ strcpybuff(trans.name,"Name and Type");
trans.index1 = readshort(fp);
readshort(fp);
break;
case HTS_STRING: // CONSTANT_String
- strcpy(trans.name,"String");
+ strcpybuff(trans.name,"String");
trans.index1 = readshort(fp);
break;
case HTS_INTEGER:
- strcpy(trans.name,"Integer");
+ strcpybuff(trans.name,"Integer");
for(j=0;j<4;j++) fgetc(fp);
break;
case HTS_FLOAT:
- strcpy(trans.name,"Float");
+ strcpybuff(trans.name,"Float");
for(j=0;j<4;j++) fgetc(fp);
break;
case HTS_LONG:
- strcpy(trans.name,"Long");
+ strcpybuff(trans.name,"Long");
for(j=0;j<8;j++) fgetc(fp);
break;
case HTS_DOUBLE:
- strcpy(trans.name,"Double");
+ strcpybuff(trans.name,"Double");
for(j=0;j<8;j++) fgetc(fp);
break;
@@ -269,9 +284,9 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
case HTS_UNICODE:
if (trans.type == HTS_ASCIZ)
- strcpy(trans.name,"HTS_ASCIZ");
+ strcpybuff(trans.name,"HTS_ASCIZ");
else
- strcpy(trans.name,"HTS_UNICODE");
+ strcpybuff(trans.name,"HTS_UNICODE");
{
char buffer[1024];
@@ -295,10 +310,10 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
// if(tris(buffer)==1) printf("%s\n ",buffer);
// if(tris(buffer)==2) printf("%s\n ",printname(buffer));
//#endif
- if(tris(buffer)==1) hts_add_file(buffer,trans.file_position);
- else if(tris(buffer)==2) hts_add_file(printname(buffer),trans.file_position);
+ if(tris(buffer)==1) str->addLink(str, buffer); /* trans.file_position */
+ else if(tris(buffer)==2) str->addLink(str, printname(buffer));
- strcpy(trans.name,buffer);
+ strcpybuff(trans.name,buffer);
} else { // gros pb
while ( (length > 0) && (!feof(fp))) {
fgetc(fp);
@@ -307,7 +322,7 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
if (!feof(fp)) {
trans.type=-1;
} else {
- sprintf(err_msg,"Internal stucture error (ASCII)");
+ sprintf(str->err_msg,"Internal stucture error (ASCII)");
*error = 1;
}
return(trans);
@@ -317,7 +332,7 @@ RESP_STRUCT readtable(FILE *fp,RESP_STRUCT trans,int* error,char* err_msg)
default:
// printf("Type inconnue\n");
// on arrête tout
- sprintf(err_msg,"Internal structure unknown (type %d)",trans.type);
+ sprintf(str->err_msg,"Internal structure unknown (type %d)",trans.type);
*error = 1;
return(trans);
break;
@@ -331,11 +346,10 @@ unsigned short int readshort(FILE *fp)
unsigned short int valint;
fread(&valint,sizeof(valint),1,fp);
-#if REVERSE_ENDIAN
- return hts_swap16(valint);
-#else
- return valint;
-#endif
+ if (reverse_endian())
+ return hts_swap16(valint);
+ else
+ return valint;
}
@@ -383,7 +397,7 @@ char * printname(char name[1024])
for (j = 0; j < (int) strlen(name); j++,p++) {
if (*p == '/') *p1='.';
if (*p==';'){*p1='\0';
- strcat(rname,".class");
+ strcatbuff(rname,".class");
return (rname);}
else *p1=*p;
p1++;
diff --git a/src/htsjava.h b/src/htsjava.h
index 66a75a5..b3d17d4 100644
--- a/src/htsjava.h
+++ b/src/htsjava.h
@@ -38,8 +38,8 @@ Please visit our Website: http://www.httrack.com
#ifndef HTSJAVA_DEFH
#define HTSJAVA_DEFH
-/* LLint fsize(char* s); */
-int fsize(char* s);
+#include <stdio.h>
+#include "htsmodules.h"
typedef struct {
unsigned long int magic;
@@ -57,10 +57,11 @@ typedef struct {
} RESP_STRUCT;
-int hts_parse_java(char *file,char* err_msg);
+int hts_detect_java(htsmoduleStruct* str);
+int hts_parse_java(htsmoduleStruct* str);
RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5);
//unsigned int swap(long int nomber,int digit);
-RESP_STRUCT readtable(FILE *fp,RESP_STRUCT,int*,char*);
+RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*);
unsigned short int readshort(FILE *fp);
int tris(char*);
char * printname(char [1024]);
diff --git a/src/htslib.c b/src/htslib.c
index e4e6006..3954f9c 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -50,8 +50,16 @@ Please visit our Website: http://www.httrack.com
#if HTS_WIN
#include <direct.h>
#else
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#endif
#include <stdlib.h>
#include <string.h>
#include <time.h>
@@ -61,11 +69,7 @@ Please visit our Website: http://www.httrack.com
#if HTS_WIN
#include <sys/utime.h>
#else
-#if HTS_PLATFORM!=3
#include <utime.h>
-#else
-#include <utime.h>
-#endif
#endif
/* END specific definitions */
@@ -95,7 +99,7 @@ int IPV6_resolver = 0;
/* détection complémentaire */
-const char hts_detect[][32] = {
+const char* hts_detect[] = {
"archive",
"background",
"data", // OBJECT
@@ -112,13 +116,13 @@ const char hts_detect[][32] = {
};
/* détecter début */
-const char hts_detectbeg[][32] = {
+const char* hts_detectbeg[] = {
"hotspot", /* hotspot1=..,hotspot2=.. */
""
};
/* ne pas détcter de liens dedans */
-const char hts_nodetect[][32] = {
+const char* hts_nodetect[] = {
"accept-charset",
"accesskey",
"action",
@@ -160,7 +164,7 @@ const char hts_nodetect[][32] = {
/* détection de mini-code javascript */
/* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */
-const char hts_detect_js[][32] = {
+const char* hts_detect_js[] = {
"onAbort",
"onBlur",
"onChange",
@@ -184,30 +188,42 @@ const char hts_detect_js[][32] = {
"onSelect",
"onSubmit",
"onUnload",
+ "style", /* hack for CSS code data */
+ ""
+};
+
+const char* hts_main_mime[] = {
+ "application",
+ "audio",
+ "image",
+ "message",
+ "multipart",
+ "text",
+ "video",
""
};
/* détection "...URL=<url>" */
-const char hts_detectURL[][32] = {
+const char* hts_detectURL[] = {
"content",
""
};
/* tags où l'URL doit être réécrite mais non capturée */
-const char hts_detectandleave[][32] = {
+const char* hts_detectandleave[] = {
"action",
""
};
-/* ne pas renommer les types renvoyés (couvent types inconnus) */
-const char hts_mime_keep[][32] = {
+/* ne pas renommer les types renvoyés (souvent types inconnus) */
+const char* hts_mime_keep[] = {
"application/octet-stream",
"text/plain",
""
};
/* pas de type mime connu, mais extension connue */
-const char hts_ext_dynamic[][32] = {
+const char* hts_ext_dynamic[] = {
"php3",
"php",
"php4",
@@ -218,11 +234,14 @@ const char hts_ext_dynamic[][32] = {
"pl",
/*"exe",*/
"cfm",
+ "nsf", /* lotus */
""
};
-/* types MIME */
-const char hts_mime[][2][32] = {
+/* types MIME
+ note: application/octet-stream should not be used here
+*/
+const char* hts_mime[][2] = {
{"application/acad","dwg"},
{"application/arj","arj"},
{"application/clariscad","ccad"},
@@ -386,24 +405,167 @@ const char hts_mime[][2][32] = {
{"x-conference/x-cooltalk","ice"},
/*{"application/x-httpd-cgi","cgi"},*/
{"x-world/x-vrml","wrl"},
-
+
+ /* More from w3schools.com */
+ { "application/envoy", "evy" },
+ { "application/fractals", "fif" },
+ { "application/futuresplash", "spl" },
+ { "application/hta", "hta" },
+ { "application/internet-property-stream", "acx" },
+ { "application/msword", "dot" },
+ { "application/olescript", "axs" },
+ { "application/pics-rules", "prf" },
+ { "application/pkcs10", "p10" },
+ { "application/pkix-crl", "crl" },
+ { "application/set-payment-initiation", "setpay" },
+ { "application/set-registration-initiation", "setreg" },
+ { "application/vnd.ms-excel", "xla" },
+ { "application/vnd.ms-excel", "xlc" },
+ { "application/vnd.ms-excel", "xlm" },
+ { "application/vnd.ms-excel", "xls" },
+ { "application/vnd.ms-excel", "xlt" },
+ { "application/vnd.ms-excel", "xlw" },
+ { "application/vnd.ms-pkicertstore", "sst" },
+ { "application/vnd.ms-pkiseccat", "cat" },
+ { "application/vnd.ms-powerpoint", "pot" },
+ { "application/vnd.ms-powerpoint", "pps" },
+ { "application/vnd.ms-powerpoint", "ppt" },
+ { "application/vnd.ms-project", "mpp" },
+ { "application/vnd.ms-works", "wcm" },
+ { "application/vnd.ms-works", "wdb" },
+ { "application/vnd.ms-works", "wks" },
+ { "application/vnd.ms-works", "wps" },
+ { "application/x-compress", "z" },
+ { "application/x-compressed", "tgz" },
+ { "application/x-internet-signup", "ins" },
+ { "application/x-internet-signup", "isp" },
+ { "application/x-iphone", "iii" },
+ { "application/x-javascript", "js" },
+ { "application/x-msaccess", "mdb" },
+ { "application/x-mscardfile", "crd" },
+ { "application/x-msclip", "clp" },
+ { "application/x-msmediaview", "m13" },
+ { "application/x-msmediaview", "m14" },
+ { "application/x-msmediaview", "mvb" },
+ { "application/x-msmetafile", "wmf" },
+ { "application/x-msmoney", "mny" },
+ { "application/x-mspublisher", "pub" },
+ { "application/x-msschedule", "scd" },
+ { "application/x-msterminal", "trm" },
+ { "application/x-perfmon", "pma" },
+ { "application/x-perfmon", "pmc" },
+ { "application/x-perfmon", "pml" },
+ { "application/x-perfmon", "pmr" },
+ { "application/x-perfmon", "pmw" },
+ { "application/x-pkcs12", "p12" },
+ { "application/x-pkcs12", "pfx" },
+ { "application/x-pkcs7-certificates", "p7b" },
+ { "application/x-pkcs7-certificates", "spc" },
+ { "application/x-pkcs7-certreqresp", "p7r" },
+ { "application/x-pkcs7-mime", "p7c" },
+ { "application/x-pkcs7-mime", "p7m" },
+ { "application/x-pkcs7-signature", "p7s" },
+ { "application/x-troff-me", "me" },
+ { "application/x-x509-ca-cert", "cer" },
+ { "application/x-x509-ca-cert", "crt" },
+ { "application/x-x509-ca-cert", "der" },
+ { "application/ynd.ms-pkipko", "pko" },
+ { "audio/mid", "mid" },
+ { "audio/mid", "rmi" },
+ { "audio/mpeg", "mp3" },
+ { "audio/x-mpegurl", "m3u" },
+ { "image/bmp", "bmp" },
+ { "image/cis-cod", "cod" },
+ { "image/pipeg", "jfif" },
+ { "image/x-cmx", "cmx" },
+ { "image/x-icon", "ico" },
+ { "image/x-portable-bitmap", "pbm" },
+ { "message/rfc822", "mht" },
+ { "message/rfc822", "mhtml" },
+ { "message/rfc822", "nws" },
+ { "text/css", "css" },
+ { "text/h323", "323" },
+ { "text/html", "stm" },
+ { "text/iuls", "uls" },
+ { "text/plain", "bas" },
+ { "text/scriptlet", "sct" },
+ { "text/webviewhtml", "htt" },
+ { "text/x-component", "htc" },
+ { "text/x-vcard", "vcf" },
+ { "video/mpeg", "mp2" },
+ { "video/mpeg", "mpa" },
+ { "video/mpeg", "mpv2" },
+ { "video/x-la-asf", "lsf" },
+ { "video/x-la-asf", "lsx" },
+ { "video/x-ms-asf", "asf" },
+ { "video/x-ms-asf", "asr" },
+ { "video/x-ms-asf", "asx" },
+ { "x-world/x-vrml", "flr" },
+ { "x-world/x-vrml", "vrml" },
+ { "x-world/x-vrml", "wrz" },
+ { "x-world/x-vrml", "xaf" },
+ { "x-world/x-vrml", "xof" },
+
+ /* Various */
+ { "application/ogg", "ogg" },
+
{"*","class"},
{"",""}};
// Reserved (RFC2396)
-#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
+#define CIS(c,ch) ( ((unsigned char)(c)) == (ch) )
+#define CHAR_RESERVED(c) ( CIS(c,';') \
+ || CIS(c,'/') \
+ || CIS(c,'?') \
+ || CIS(c,':') \
+ || CIS(c,'@') \
+ || CIS(c,'&') \
+ || CIS(c,'=') \
+ || CIS(c,'+') \
+ || CIS(c,'$') \
+ || CIS(c,',') )
+//#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
// Delimiters (RFC2396)
-#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
+#define CHAR_DELIM(c) ( CIS(c,'<') \
+ || CIS(c,'>') \
+ || CIS(c,'#') \
+ || CIS(c,'%') \
+ || CIS(c,'\"') )
+//#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
// Unwise (RFC2396)
-#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
+#define CHAR_UNWISE(c) ( CIS(c,'{') \
+ || CIS(c,'}') \
+ || CIS(c,'|') \
+ || CIS(c,'\\') \
+ || CIS(c,'^') \
+ || CIS(c,'[') \
+ || CIS(c,']') \
+ || CIS(c,'`') )
+//#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
// Special (escape chars) (RFC2396 + >127 )
#define CHAR_LOW(c) ( ((unsigned char)(c) <= 31) )
#define CHAR_HIG(c) ( ((unsigned char)(c) >= 127) )
#define CHAR_SPECIAL(c) ( CHAR_LOW(c) || CHAR_HIG(c) )
// We try to avoid them and encode them instead
-#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
+#define CHAR_XXAVOID(c) ( CIS(c,' ') \
+ || CIS(c,'*') \
+ || CIS(c,'\'') \
+ || CIS(c,'\"') \
+ || CIS(c,'!') )
+//#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
+#define CHAR_MARK(c) ( CIS(c,'-') \
+ || CIS(c,'_') \
+ || CIS(c,'.') \
+ || CIS(c,'!') \
+ || CIS(c,'~') \
+ || CIS(c,'*') \
+ || CIS(c,'\'') \
+ || CIS(c,'(') \
+ || CIS(c,')') )
+//#define CHAR_MARK(c) ( strchr("-_.!~*'()",(unsigned char)(c)) != 0 )
+
// conversion éventuelle / vers antislash
@@ -413,7 +575,7 @@ char* antislash(char* s) {
char* a;
NOSTATIC_RESERVE(buff, char, HTS_URLMAXSIZE*2);
- strcpy(buff,s);
+ strcpybuff(buff,s);
while(a=strchr(buff,'/')) *a='\\';
return buff;
}
@@ -442,7 +604,7 @@ htsblk httpget(char* url) {
retour.size=0;
retour.msg[0]='\0';
retour.statuscode=-1;
- strcpy(retour.msg,"Error invalid URL");
+ strcpybuff(retour.msg,"Error invalid URL");
return retour;
}
@@ -514,7 +676,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f
if (retour) {
if (retour->msg) {
if (!strnotempty(retour->msg)) {
- strcpy(retour->msg,"Connect error");
+ strcpybuff(retour->msg,"Connect error");
}
}
}
@@ -531,8 +693,8 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f
if (!fexist(fconv(unescape_http(fil))))
if (fexist(fconv(unescape_http(fil+1)))) {
char tempo[HTS_URLMAXSIZE*2];
- strcpy(tempo,fil+1);
- strcpy(fil,tempo);
+ strcpybuff(tempo,fil+1);
+ strcpybuff(fil,tempo);
}
// Ouvrir
@@ -540,9 +702,9 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f
retour->msg[0]='\0';
soc=INVALID_SOCKET;
if (retour->totalsize<0)
- strcpy(retour->msg,"Unable to open file");
+ strcpybuff(retour->msg,"Unable to open local file");
else if (retour->totalsize==0)
- strcpy(retour->msg,"File empty");
+ strcpybuff(retour->msg,"File empty");
else {
// Note: On passe par un FILE* (plus propre)
//soc=open(fil,O_RDONLY,0); // en lecture seule!
@@ -555,13 +717,13 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f
retour->soc=soc;
if (soc!=INVALID_SOCKET) {
retour->statuscode=200; // OK
- strcpy(retour->msg,"OK");
+ strcpybuff(retour->msg,"OK");
guess_httptype(retour->contenttype,fil);
} else if (strnotempty(retour->msg)==0)
- strcpy(retour->msg,"Unable to open file");
+ strcpybuff(retour->msg,"Unable to open local file");
return soc; // renvoyer
} else { // HEAD ou POST : interdit sur un local!!!! (c'est idiot!)
- strcpy(retour->msg,"Unexpected Head/Post local request");
+ strcpybuff(retour->msg,"Unexpected Head/Post local request");
soc=INVALID_SOCKET; // erreur
retour->soc=soc;
return soc;
@@ -647,10 +809,10 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
buff[0]='\0';
// header Date
- //strcat(buff,"Date: ");
+ //strcatbuff(buff,"Date: ");
//time_gmt_rfc822(buff); // obtenir l'heure au format rfc822
//sendc("\n");
- //strcat(buff,buff);
+ //strcatbuff(buff,buff);
// possibilité non documentée: >post: et >postfile:
// si présence d'un tag >post: alors executer un POST
@@ -686,58 +848,58 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
if (strnotempty(buff)==0) { // PAS POSTFILE
// Type de requète?
if ((search_tag) && (mode==0)) {
- strcat(buff,"POST ");
+ strcatbuff(buff,"POST ");
} else if (mode==0) { // GET
- strcat(buff,"GET ");
+ strcatbuff(buff,"GET ");
} else { // if (mode==1) {
if (!retour->req.http11) // forcer HTTP/1.0
- strcat(buff,"GET "); // certains serveurs (cgi) buggent avec HEAD
+ strcatbuff(buff,"GET "); // certains serveurs (cgi) buggent avec HEAD
else
- strcat(buff,"HEAD ");
+ strcatbuff(buff,"HEAD ");
}
// si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom
- if (retour->req.proxy.active) {
+ if ( retour->req.proxy.active && (strncmp(adr,"https://", 8) != 0) ) {
if (!link_has_authority(adr)) { // default http
#if HDEBUG
printf("Proxy Use: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port);
#endif
- strcat(buff,"http://");
- strcat(buff,jump_identification(adr));
+ strcatbuff(buff,"http://");
+ strcatbuff(buff,jump_identification(adr));
} else { // ftp:// en proxy http
#if HDEBUG
printf("Proxy Use for ftp: for %s%s proxy %d port %d\n",adr,fil,retour->req.proxy.name,retour->req.proxy.port);
#endif
direct_url=1; // ne pas analyser user/pass
- strcat(buff,adr);
+ strcatbuff(buff,adr);
}
}
// NOM DU FICHIER
// on slash doit être présent en début, sinon attention aux bad request! (400)
- if (*fil!='/') strcat(buff,"/");
+ if (*fil!='/') strcatbuff(buff,"/");
{
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
if (search_tag)
- strncat(tempo,fil,(int) (search_tag - fil));
+ strncatbuff(tempo,fil,(int) (search_tag - fil));
else
- strcpy(tempo,fil);
+ strcpybuff(tempo,fil);
escape_check_url(tempo);
- strcat(buff,tempo); // avec échappement
+ strcatbuff(buff,tempo); // avec échappement
}
// protocole
if (!retour->req.http11) { // forcer HTTP/1.0
//use_11=0;
- strcat(buff," HTTP/1.0\x0d\x0a");
+ strcatbuff(buff," HTTP/1.0\x0d\x0a");
} else { // Requète 1.1
//use_11=1;
- strcat(buff," HTTP/1.1\x0d\x0a");
+ strcatbuff(buff," HTTP/1.1\x0d\x0a");
}
/* supplemental data */
- if (xsend) strcat(buff,xsend); // éventuelles autres lignes
+ if (xsend) strcatbuff(buff,xsend); // éventuelles autres lignes
// tester proxy authentication
if (retour->req.proxy.active) {
@@ -748,12 +910,12 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
char user_pass[256];
autorisation[0]=user_pass[0]='\0';
//
- strncat(user_pass,astart,(int) (a - astart) - 1);
- strcpy(user_pass,unescape_http(user_pass));
- code64(user_pass,autorisation);
- strcat(buff,"Proxy-Authorization: Basic ");
- strcat(buff,autorisation);
- strcat(buff,H_CRLF);
+ strncatbuff(user_pass,astart,(int) (a - astart) - 1);
+ strcpybuff(user_pass,unescape_http(user_pass));
+ code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0);
+ strcatbuff(buff,"Proxy-Authorization: Basic ");
+ strcatbuff(buff,autorisation);
+ strcatbuff(buff,H_CRLF);
#if HDEBUG
printf("Proxy-Authenticate, %s (code: %s)\n",user_pass,autorisation);
#endif
@@ -772,11 +934,11 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
(strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
)
) { // PAS file://
- strcat(buff,"Referer: ");
- strcat(buff,"http://");
- strcat(buff,jump_identification(referer_adr));
- strcat(buff,referer_fil);
- strcat(buff,H_CRLF);
+ strcatbuff(buff,"Referer: ");
+ strcatbuff(buff,"http://");
+ strcatbuff(buff,jump_identification(referer_adr));
+ strcatbuff(buff,referer_fil);
+ strcatbuff(buff,H_CRLF);
}
}
}
@@ -786,7 +948,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
if (search_tag) {
char clen[256];
sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(search_tag+strlen(POSTTOK)+1))));
- strcat(buff,clen);
+ strcatbuff(buff,clen);
}
}
@@ -802,40 +964,40 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
if (b) {
max_cookies--;
if (!cook) {
- strcat(buff,"Cookie: ");
- strcat(buff,"$Version=1; ");
+ strcatbuff(buff,"Cookie: ");
+ strcatbuff(buff,"$Version=1; ");
cook=1;
} else
- strcat(buff,"; ");
- strcat(buff,cookie_get(b,5));
- strcat(buff,"=");
- strcat(buff,cookie_get(b,6));
- strcat(buff,"; $Path=");
- strcat(buff,cookie_get(b,2));
+ strcatbuff(buff,"; ");
+ strcatbuff(buff,cookie_get(b,5));
+ strcatbuff(buff,"=");
+ strcatbuff(buff,cookie_get(b,6));
+ strcatbuff(buff,"; $Path=");
+ strcatbuff(buff,cookie_get(b,2));
b=cookie_nextfield(b);
}
} while( (b) && (max_cookies>0) && ((int)strlen(buff)<max_size));
if (cook) { // on a envoyé un (ou plusieurs) cookie?
- strcat(buff,H_CRLF);
+ strcatbuff(buff,H_CRLF);
#if DEBUG_COOK
printf("Header:\n%s\n",buff);
#endif
}
}
- // connection close?
- //if (use_11) // Si on envoie une requète 1.1, préciser qu'on ne veut pas de keep-alive!!
- strcat(buff,"Connection: close"H_CRLF);
-
// gérer le keep-alive (garder socket)
- //strcat(buff,"Connection: Keep-Alive\n");
+ if (retour->req.http11 && !retour->req.nokeepalive) {
+ strcatbuff(buff,"Connection: Keep-Alive"H_CRLF);
+ } else {
+ strcatbuff(buff,"Connection: close"H_CRLF);
+ }
{
char* real_adr=jump_identification(adr);
//if ((use_11) || (retour->user_agent_send)) { // Pour le 1.1 on utilise un Host:
if (!direct_url) { // pas ftp:// par exemple
//if (!retour->req.proxy.active) {
- strcat(buff,"Host: "); strcat(buff,real_adr); strcat(buff,H_CRLF);
+ strcatbuff(buff,"Host: "); strcatbuff(buff,real_adr); strcatbuff(buff,H_CRLF);
//}
}
//}
@@ -845,26 +1007,45 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
char s[256];
// HyperTextSeeker/"HTSVERSION
sprintf(s,"User-Agent: %s"H_CRLF,retour->req.user_agent);
- strcat(buff,s);
+ strcatbuff(buff,s);
// pour les serveurs difficiles
- strcat(buff,"Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/svg+xml, */*"H_CRLF);
+ strcatbuff(buff,"Accept: "
+ "image/png, image/jpeg, image/pjpeg, image/x-xbitmap, image/svg+xml" /* Accepted */
+ ", "
+ "image/gif;q=0.9" /* also accepted but with lower preference */
+ ", "
+ "*/*;q=0.1" /* also accepted but with even lower preference */
+ H_CRLF);
if (strnotempty(retour->req.lang_iso)) {
- strcat(buff,"Accept-Language: "); strcat(buff,retour->req.lang_iso); strcat(buff,H_CRLF);
+ strcatbuff(buff,"Accept-Language: "); strcatbuff(buff,retour->req.lang_iso); strcatbuff(buff,H_CRLF);
}
- strcat(buff,"Accept-Charset: iso-8859-1, *"H_CRLF);
+ strcatbuff(buff,"Accept-Charset: "
+ "iso-8859-1" /* we prefer ISO-8859-1 */
+ ", "
+ "iso-8859-*;q=0.9" /* or ISO-8859-* */
+ ", "
+ "utf-8;q=0.66" /* UTF8 is also accepted */
+ ", "
+ "*;q=0.33" /* and any other charset */
+ H_CRLF);
if (retour->req.http11) {
#if HTS_USEZLIB
- if ((!retour->req.range_used) && (!retour->req.nocompression))
- strcat(buff,"Accept-Encoding: gzip, deflate, compress, identity"H_CRLF);
+ //strcatbuff(buff,"Accept-Encoding: gzip, deflate, compress, identity"H_CRLF);
+ if (gz_is_available && (!retour->req.range_used) && (!retour->req.nocompression))
+ strcatbuff(buff,"Accept-Encoding: "
+ "gzip" /* gzip if the preffered encoding */
+ ", "
+ "identity;q=0.9"
+ H_CRLF);
else
- strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
+ strcatbuff(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
#else
- strcat(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
+ strcatbuff(buff,"Accept-Encoding: identity"H_CRLF); /* no compression */
#endif
}
} else {
- strcat(buff,"Accept: */*"H_CRLF); // le minimum
+ strcatbuff(buff,"Accept: */*"H_CRLF); // le minimum
}
/* Authentification */
@@ -878,40 +1059,40 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
if (!direct_url) { // pas ftp:// par exemple
char user_pass[256];
user_pass[0]='\0';
- strncat(user_pass,astart,(int) (a - astart) - 1);
- strcpy(user_pass,unescape_http(user_pass));
- code64(user_pass,autorisation);
+ strncatbuff(user_pass,astart,(int) (a - astart) - 1);
+ strcpybuff(user_pass,unescape_http(user_pass));
+ code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0);
if (strcmp(fil,"/robots.txt")) /* pas robots.txt */
bauth_add(cookie,astart,fil,autorisation);
}
} else if ( (a=bauth_check(cookie,real_adr,fil)) )
- strcpy(autorisation,a);
+ strcpybuff(autorisation,a);
/* On a une autorisation a donner? */
if (strnotempty(autorisation)) {
- strcat(buff,"Authorization: Basic ");
- strcat(buff,autorisation);
- strcat(buff,H_CRLF);
+ strcatbuff(buff,"Authorization: Basic ");
+ strcatbuff(buff,autorisation);
+ strcatbuff(buff,H_CRLF);
}
}
}
- //strcat(buff,"Accept-Language: en\n");
- //strcat(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
+ //strcatbuff(buff,"Accept-Language: en\n");
+ //strcatbuff(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
// CRLF de fin d'en tête
- strcat(buff,H_CRLF);
+ strcatbuff(buff,H_CRLF);
// données complémentaires?
if (search_tag)
if (mode==0) // GET!
- strcat(buff,unescape_http(search_tag+strlen(POSTTOK)+1));
+ strcatbuff(buff,unescape_http(search_tag+strlen(POSTTOK)+1));
}
#if HDEBUG
#endif
if (_DEBUG_HEAD) {
if (ioinfo) {
- fprintf(ioinfo,"request for %s%s:\r\n",jump_identification(adr),fil);
+ fprintf(ioinfo,"[%d] request for %s%s:\r\n",retour->debugid,jump_identification(adr),fil);
fprintfio(ioinfo,buff,"<<< ");
fprintf(ioinfo,"\r\n");
fflush(ioinfo);
@@ -919,13 +1100,25 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char
} // Fin test pas postfile
//
+ // Callback
+#if HTS_ANALYSTE
+ if (hts_htmlcheck_sendhead != NULL) {
+ int test_head=hts_htmlcheck_sendhead(buff, adr, fil, referer_adr, referer_fil, retour);
+ if (test_head!=1) {
+ deletesoc_r(retour);
+ strcpybuff(retour->msg,"Header refused by external wrapper");
+ retour->soc=INVALID_SOCKET;
+ }
+ }
+#endif
+
// Envoi
if (sendc(retour, buff)<0) { // ERREUR, socket rompue?...
//if (sendc(retour->soc,buff) != strlen(buff)) { // ERREUR, socket rompue?...
deletesoc_r(retour); // fermer tout de même
// et tenter de reconnecter
- strcpy(retour->msg,"Broken pipe");
+ strcpybuff(retour->msg,"Write error");
retour->soc=INVALID_SOCKET;
}
@@ -955,44 +1148,46 @@ void treatfirstline(htsblk* retour,char* rcvd) {
while ((*a!=' ') && (*a!='\0') && (*a!=10) && (*a!=13) && (*a!=9)) a++;
while ((*a==' ') || (*a==10) || (*a==13) || (*a==9)) a++; // épurer espaces
if ((strlen(a) > 1) && (strlen(a) < 64) ) // message retour
- strcpy(retour->msg,a);
+ strcpybuff(retour->msg,a);
else
infostatuscode(retour->msg,retour->statuscode);
// type MIME par défaut2
- strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
} else { // pas de code!
retour->statuscode=-1;
- strcpy(retour->msg,"Unknown response structure");
+ strcpybuff(retour->msg,"Unknown response structure");
}
} else { // euhh??
retour->statuscode=-1;
- strcpy(retour->msg,"Unknown response structure");
+ strcpybuff(retour->msg,"Unknown response structure");
}
} else {
if (*a == '<') {
/* This is dirty .. */
retour->statuscode=200;
- strcpy(retour->msg, "Unknown, assuming junky server");
- strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ retour->keep_alive=0;
+ strcpybuff(retour->msg, "Unknown, assuming junky server");
+ strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
} else if (strnotempty(a)) {
retour->statuscode=-1;
- strcpy(retour->msg,"Unknown response structure, no HTTP/ response given");
+ strcpybuff(retour->msg,"Unknown (not HTTP/xx) response structure");
} else {
/* This is dirty .. */
retour->statuscode=200;
- strcpy(retour->msg, "Unknown, assuming junky server");
- strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ retour->keep_alive=0;
+ strcpybuff(retour->msg, "Unknown, assuming junky server");
+ strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
}
}
} else { // vide!
/*
retour->statuscode=-1;
- strcpy(retour->msg,"Empty reponse or internal error");
+ strcpybuff(retour->msg,"Empty reponse or internal error");
*/
/* This is dirty .. */
retour->statuscode=200;
- strcpy(retour->msg, "Unknown, assuming junky server");
- strcpy(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
+ strcpybuff(retour->msg, "Unknown, assuming junky server");
+ strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME);
}
}
@@ -1005,13 +1200,16 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
printf("ok, Content-length: détecté\n");
#endif
sscanf(rcvd+p,LLintP,&(retour->totalsize));
+ if (retour->totalsize == 0) {
+ retour->empty = 1;
+ }
}
else if ((p=strfield(rcvd,"Content-Disposition:"))!=0) {
- while(*(rcvd+p)==' ') p++; // sauter espaces
+ while(is_realspace(*(rcvd+p))) p++; // sauter espaces
if ((int) strlen(rcvd+p)<250) { // pas trop long?
char tmp[256];
char *a=NULL,*b=NULL;
- strcpy(tmp,rcvd+p);
+ strcpybuff(tmp,rcvd+p);
a=strstr(tmp,"filename=");
if (a) {
a+=strlen("filename=");
@@ -1029,7 +1227,7 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
if (b) {
*b='\0';
if ((int) strlen(a) < 200) { // pas trop long?
- strcpy(retour->cdispo,a);
+ strcpybuff(retour->cdispo,a);
}
}
}
@@ -1037,36 +1235,40 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
}
}
else if ((p=strfield(rcvd,"Last-Modified:"))!=0) {
- while(*(rcvd+p)==' ') p++; // sauter espaces
+ while(is_realspace(*(rcvd+p))) p++; // sauter espaces
if ((int) strlen(rcvd+p)<64) { // pas trop long?
//struct tm* tm_time=convert_time_rfc822(rcvd+p);
- strcpy(retour->lastmodified,rcvd+p);
+ strcpybuff(retour->lastmodified,rcvd+p);
}
}
else if ((p=strfield(rcvd,"Date:"))!=0) {
if (strnotempty(retour->lastmodified)==0) { /* pas encore de last-modified */
- while(*(rcvd+p)==' ') p++; // sauter espaces
+ while(is_realspace(*(rcvd+p))) p++; // sauter espaces
if ((int) strlen(rcvd+p)<64) { // pas trop long?
//struct tm* tm_time=convert_time_rfc822(rcvd+p);
- strcpy(retour->lastmodified,rcvd+p);
+ strcpybuff(retour->lastmodified,rcvd+p);
}
}
}
else if ((p=strfield(rcvd,"Etag:"))!=0) { /* Etag */
if (retour) {
- while(*(rcvd+p)==' ') p++; // sauter espaces
+ while(is_realspace(*(rcvd+p))) p++; // sauter espaces
if ((int) strlen(rcvd+p)<64) // pas trop long?
- strcpy(retour->etag,rcvd+p);
+ strcpybuff(retour->etag,rcvd+p);
else // erreur.. ignorer
retour->etag[0]='\0';
}
}
- else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk!
- retour->is_chunk=1; // chunked
- //retour->http11=2; // chunked
+ // else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk!
+ else if ((p=strfield(rcvd,"Transfer-Encoding:"))!=0) { // chunk!
+ while(is_realspace(*(rcvd+p))) p++; // sauter espaces
+ if (strfield(rcvd+p,"chunked")) {
+ retour->is_chunk=1; // chunked
+ //retour->http11=2; // chunked
#if HDEBUG
- printf("ok, Transfer-Encoding: détecté\n");
+ printf("ok, Transfer-Encoding: détecté\n");
#endif
+ }
}
else if ((p=strfield(rcvd,"Content-type:"))!=0) {
if (retour) {
@@ -1074,13 +1276,37 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
// éviter les text/html; charset=foo
{
char* a=strchr(rcvd+p,';');
- if (a) *a='\0';
+ if (a) { // extended information
+ *a='\0';
+ a++;
+ while(is_space(*a)) a++;
+ if (strfield(a, "charset")) {
+ a += 7;
+ while(is_space(*a)) a++;
+ if (*a == '=') {
+ a++;
+ while(is_space(*a)) a++;
+ if (*a == '\"') a++;
+ while(is_space(*a)) a++;
+ if (*a) {
+ char* chs = a;
+ while(*a && !is_space(*a) && *a != '\"' && *a != ';') a++;
+ *a = '\0';
+ if (*chs) {
+ if (strlen(chs) < sizeof(retour->charset) - 2) {
+ strcpybuff(retour->charset, chs);
+ }
+ }
+ }
+ }
+ }
+ }
}
sscanf(rcvd+p,"%s",tempo);
- if (strlen(tempo)<64) // pas trop long!!
- strcpy(retour->contenttype,tempo);
+ if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
+ strcpybuff(retour->contenttype,tempo);
else
- strcpy(retour->contenttype,"application/octet-stream-unknown"); // erreur
+ strcpybuff(retour->contenttype,"application/octet-stream-unknown"); // erreur
}
}
else if ((p=strfield(rcvd,"Content-Range:"))!=0) {
@@ -1091,16 +1317,63 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
}
}
}
- else if ((p=strfield(rcvd,"Content-Encoding:"))!=0) {
- if (retour) {
- char tempo[1100];
- {
- char* a=strchr(rcvd+p,';');
- if (a) *a='\0';
+ else if ((p=strfield(rcvd,"Connection:"))!=0) {
+ char* a = rcvd + p;
+ while(is_space(*a)) a++;
+ if (*a) {
+ if (strfield(a, "Keep-Alive")) {
+ if (!retour->keep_alive) {
+ retour->keep_alive_max = 10;
+ retour->keep_alive_t = 15;
+ }
+ retour->keep_alive = 1;
+ } else {
+ retour->keep_alive = 0;
}
- sscanf(rcvd+p,"%s",tempo);
+ }
+ }
+ else if ((p=strfield(rcvd,"Keep-Alive:"))!=0) {
+ char* a = rcvd + p;
+ while(is_space(*a)) a++;
+ if (*a) {
+ char* p;
+ retour->keep_alive = 1;
+ retour->keep_alive_max = 10;
+ retour->keep_alive_t = 15;
+ if ((p=strstr(a, "timeout="))) {
+ p+=strlen("timeout=");
+ sscanf(p, "%d", &retour->keep_alive_t);
+ }
+ if ((p=strstr(a, "max="))) {
+ p+=strlen("max=");
+ sscanf(p, "%d", &retour->keep_alive_max);
+ }
+ if (retour->keep_alive_max <= 1 || retour->keep_alive_t < 3) {
+ retour->keep_alive = 0;
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"TE:"))!=0) {
+ char* a = rcvd + p;
+ while(is_space(*a)) a++;
+ if (*a) {
+ if (strfield(a, "trailers")) {
+ retour->keep_alive_trailers=1;
+ }
+ }
+ }
+ else if ((p=strfield(rcvd,"Content-Encoding:"))!=0) {
+ if (retour) {
+ char tempo[1100];
+ char* a = rcvd + p;
+ while(is_space(*a)) a++;
+ {
+ char* a=strchr(rcvd+p,';');
+ if (a) *a='\0';
+ }
+ sscanf(a,"%s",tempo);
if (strlen(tempo)<64) // pas trop long!!
- strcpy(retour->contentencoding,tempo);
+ strcpybuff(retour->contentencoding,tempo);
else
retour->contentencoding[0]='\0'; // erreur
#if HTS_USEZLIB
@@ -1125,20 +1398,14 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
else if ((p=strfield(rcvd,"Location:"))!=0) {
if (retour) {
if (retour->location) {
- while(*(rcvd+p)==' ') p++; // sauter espaces
+ while(is_realspace(*(rcvd+p))) p++; // sauter espaces
if ((int) strlen(rcvd+p)<HTS_URLMAXSIZE) // pas trop long?
- strcpy(retour->location,rcvd+p);
+ strcpybuff(retour->location,rcvd+p);
else // erreur.. ignorer
retour->location[0]='\0';
}
}
}
- else if ((p=strfield(rcvd,"Connection: Keep-Alive"))!=0) {
- // non, pas de keep-alive! on déconnectera..
- }
- else if ((p=strfield(rcvd,"Keep-Alive:"))!=0) { // params keep-alive
- // rien à faire
- }
else if ( ((p=strfield(rcvd,"Set-Cookie:"))!=0) && (cookie) ) { // ohh un cookie
char* a = rcvd+p; // pointeur
char domain[256]; // domaine cookie (.netscape.com)
@@ -1159,10 +1426,10 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
// initialiser cookie lu actuellement
if (adr)
- strcpy(domain,jump_identification(adr)); // domaine
- strcpy(path,"/"); // chemin (/)
- strcpy(cook_name,""); // nom cookie (MYCOOK)
- strcpy(cook_value,""); // valeur (ID=toto,S=1234)
+ strcpybuff(domain,jump_identification(adr)); // domaine
+ strcpybuff(path,"/"); // chemin (/)
+ strcpybuff(cook_name,""); // nom cookie (MYCOOK)
+ strcpybuff(cook_value,""); // valeur (ID=toto,S=1234)
// boucler jusqu'au prochain cookie ou la fin
do {
char* start_loop=a;
@@ -1184,16 +1451,16 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
&& (((int) (token_end - token_st))>0) && (((int) (value_end - value_st))>0) ) {
name[0]='\0';
value[0]='\0';
- strncat(name,token_st,(int) (token_end - token_st));
- strncat(value,value_st,(int) (value_end - value_st));
+ strncatbuff(name,token_st,(int) (token_end - token_st));
+ strncatbuff(value,value_st,(int) (value_end - value_st));
#if DEBUG_COOK
printf("detected cookie-av: name=\"%s\" value=\"%s\"\n",name,value);
#endif
if (strfield2(name,"domain")) {
- strcpy(domain,value);
+ strcpybuff(domain,value);
}
else if (strfield2(name,"path")) {
- strcpy(path,value);
+ strcpybuff(path,value);
}
else if (strfield2(name,"max-age")) {
// ignoré..
@@ -1212,8 +1479,8 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
}
else {
if (strnotempty(cook_name)==0) { // noter premier: nom et valeur cookie
- strcpy(cook_name,name);
- strcpy(cook_value,value);
+ strcpybuff(cook_name,name);
+ strcpybuff(cook_value,value);
} else { // prochain cookie
a=start_loop; // on devra recommencer à cette position
next=1; // enregistrer
@@ -1238,52 +1505,52 @@ void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd) {
// transforme le message statuscode en chaîne
-void infostatuscode(char* msg,int statuscode) {
+HTSEXT_API void infostatuscode(char* msg,int statuscode) {
switch( statuscode) {
// Erreurs HTTP, selon RFC
- case 100: strcpy( msg,"Continue"); break;
- case 101: strcpy( msg,"Switching Protocols"); break;
- case 200: strcpy( msg,"OK"); break;
- case 201: strcpy( msg,"Created"); break;
- case 202: strcpy( msg,"Accepted"); break;
- case 203: strcpy( msg,"Non-Authoritative Information"); break;
- case 204: strcpy( msg,"No Content"); break;
- case 205: strcpy( msg,"Reset Content"); break;
- case 206: strcpy( msg,"Partial Content"); break;
- case 300: strcpy( msg,"Multiple Choices"); break;
- case 301: strcpy( msg,"Moved Permanently"); break;
- case 302: strcpy( msg,"Moved Temporarily"); break;
- case 303: strcpy( msg,"See Other"); break;
- case 304: strcpy( msg,"Not Modified"); break;
- case 305: strcpy( msg,"Use Proxy"); break;
- case 306: strcpy( msg,"Undefined 306 error"); break;
- case 307: strcpy( msg,"Temporary Redirect"); break;
- case 400: strcpy( msg,"Bad Request"); break;
- case 401: strcpy( msg,"Unauthorized"); break;
- case 402: strcpy( msg,"Payment Required"); break;
- case 403: strcpy( msg,"Forbidden"); break;
- case 404: strcpy( msg,"Not Found"); break;
- case 405: strcpy( msg,"Method Not Allowed"); break;
- case 406: strcpy( msg,"Not Acceptable"); break;
- case 407: strcpy( msg,"Proxy Authentication Required"); break;
- case 408: strcpy( msg,"Request Time-out"); break;
- case 409: strcpy( msg,"Conflict"); break;
- case 410: strcpy( msg,"Gone"); break;
- case 411: strcpy( msg,"Length Required"); break;
- case 412: strcpy( msg,"Precondition Failed"); break;
- case 413: strcpy( msg,"Request Entity Too Large"); break;
- case 414: strcpy( msg,"Request-URI Too Large"); break;
- case 415: strcpy( msg,"Unsupported Media Type"); break;
- case 416: strcpy( msg,"Requested Range Not Satisfiable"); break;
- case 417: strcpy( msg,"Expectation Failed"); break;
- case 500: strcpy( msg,"Internal Server Error"); break;
- case 501: strcpy( msg,"Not Implemented"); break;
- case 502: strcpy( msg,"Bad Gateway"); break;
- case 503: strcpy( msg,"Service Unavailable"); break;
- case 504: strcpy( msg,"Gateway Time-out"); break;
- case 505: strcpy( msg,"HTTP Version Not Supported"); break;
+ case 100: strcpybuff( msg,"Continue"); break;
+ case 101: strcpybuff( msg,"Switching Protocols"); break;
+ case 200: strcpybuff( msg,"OK"); break;
+ case 201: strcpybuff( msg,"Created"); break;
+ case 202: strcpybuff( msg,"Accepted"); break;
+ case 203: strcpybuff( msg,"Non-Authoritative Information"); break;
+ case 204: strcpybuff( msg,"No Content"); break;
+ case 205: strcpybuff( msg,"Reset Content"); break;
+ case 206: strcpybuff( msg,"Partial Content"); break;
+ case 300: strcpybuff( msg,"Multiple Choices"); break;
+ case 301: strcpybuff( msg,"Moved Permanently"); break;
+ case 302: strcpybuff( msg,"Moved Temporarily"); break;
+ case 303: strcpybuff( msg,"See Other"); break;
+ case 304: strcpybuff( msg,"Not Modified"); break;
+ case 305: strcpybuff( msg,"Use Proxy"); break;
+ case 306: strcpybuff( msg,"Undefined 306 error"); break;
+ case 307: strcpybuff( msg,"Temporary Redirect"); break;
+ case 400: strcpybuff( msg,"Bad Request"); break;
+ case 401: strcpybuff( msg,"Unauthorized"); break;
+ case 402: strcpybuff( msg,"Payment Required"); break;
+ case 403: strcpybuff( msg,"Forbidden"); break;
+ case 404: strcpybuff( msg,"Not Found"); break;
+ case 405: strcpybuff( msg,"Method Not Allowed"); break;
+ case 406: strcpybuff( msg,"Not Acceptable"); break;
+ case 407: strcpybuff( msg,"Proxy Authentication Required"); break;
+ case 408: strcpybuff( msg,"Request Time-out"); break;
+ case 409: strcpybuff( msg,"Conflict"); break;
+ case 410: strcpybuff( msg,"Gone"); break;
+ case 411: strcpybuff( msg,"Length Required"); break;
+ case 412: strcpybuff( msg,"Precondition Failed"); break;
+ case 413: strcpybuff( msg,"Request Entity Too Large"); break;
+ case 414: strcpybuff( msg,"Request-URI Too Large"); break;
+ case 415: strcpybuff( msg,"Unsupported Media Type"); break;
+ case 416: strcpybuff( msg,"Requested Range Not Satisfiable"); break;
+ case 417: strcpybuff( msg,"Expectation Failed"); break;
+ case 500: strcpybuff( msg,"Internal Server Error"); break;
+ case 501: strcpybuff( msg,"Not Implemented"); break;
+ case 502: strcpybuff( msg,"Bad Gateway"); break;
+ case 503: strcpybuff( msg,"Service Unavailable"); break;
+ case 504: strcpybuff( msg,"Gateway Time-out"); break;
+ case 505: strcpybuff( msg,"HTTP Version Not Supported"); break;
//
- default: if (strnotempty(msg)==0) strcpy( msg,"Unknown error"); break;
+ default: if (strnotempty(msg)==0) strcpybuff( msg,"Unknown error"); break;
}
}
@@ -1376,6 +1643,25 @@ int check_readinput(htsblk* r) {
return 0;
}
+// check if data is available
+int check_readinput_t(T_SOC soc, int timeout) {
+ if (soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(soc,&fds);
+ tv.tv_sec=timeout;
+ tv.tv_usec=0;
+ select(soc + 1,&fds,NULL,NULL,&tv);
+ if (FD_ISSET(soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
+}
+
+
// lecture d'un bloc sur une socket (ou un fichier!)
// >=0 : nombre d'octets lus
// <0 : fin ou erreur
@@ -1462,9 +1748,9 @@ LLint http_xfread1(htsblk* r,int bufl) {
// nouvelle taille
if (nl > 0) {
r->size+=nl;
- if ((int) fwrite(buff,1,nl,r->out)!=nl) {
+ if ((INTsys)fwrite(buff,1,nl,r->out)!=nl) {
r->statuscode=-1;
- strcpy(r->msg,"Write error on disk");
+ strcpybuff(r->msg,"Write error on disk");
nl=-1;
}
}
@@ -1669,7 +1955,7 @@ htsblk http_test(char* adr,char* fil,char* loc) {
}
} else {
retour.statuscode=-2;
- strcpy(retour.msg,"Timeout While Testing");
+ strcpybuff(retour.msg,"Timeout While Testing");
}
@@ -1730,7 +2016,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
}
// adresse véritable (sans :xx)
- strncat(iadr2,iadr,(int) (a - iadr));
+ strncatbuff(iadr2,iadr,(int) (a - iadr));
// adresse sans le :xx
hp = hts_gethostbyname(iadr2, &fullhostent_buffer);
@@ -1755,7 +2041,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
#endif
if (retour)
if (retour->msg)
- strcpy(retour->msg,"Unable to get server's address");
+ strcpybuff(retour->msg,"Unable to get server's address");
return INVALID_SOCKET;
}
// copie adresse
@@ -1770,15 +2056,33 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
DEBUG_W("socket\n");
#endif
soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
+ if (retour != NULL) {
+ retour->debugid = HTS_STAT.stat_sockid++;
+ }
#if HTS_WIDE_DEBUG
DEBUG_W("socket done\n");
#endif
if (soc==INVALID_SOCKET) {
if (retour)
if (retour->msg)
- strcpy(retour->msg,"Unable to create a socket");
+ strcpybuff(retour->msg,"Unable to create a socket");
return INVALID_SOCKET; // erreur création socket impossible
}
+
+ // bind this address
+ if (retour != NULL && retour->req.proxy.bindhost[0] != '\0') {
+ t_fullhostent bind_buffer;
+ hp = hts_gethostbyname(retour->req.proxy.bindhost, &bind_buffer);
+ if (hp == NULL ||
+ bind(soc, (struct sockaddr *)hp->h_addr_list[0], hp->h_length) != 0) {
+ if (retour)
+ if (retour->msg)
+ strcpybuff(retour->msg,"Unable to bind the specificied server address");
+ deletesoc(soc);
+ return INVALID_SOCKET;
+ }
+ }
+
// structure: connexion au domaine internet, port 80 (ou autre)
SOCaddr_initport(server, port);
#if HDEBUG
@@ -1820,7 +2124,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) {
#endif
if (retour)
if (retour->msg)
- strcpy(retour->msg,"Unable to connect to the server");
+ strcpybuff(retour->msg,"Unable to connect to the server");
/* Close the socket and notify the error!!! */
deletesoc(soc);
return INVALID_SOCKET;
@@ -1877,15 +2181,15 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
// 1. optional scheme ":"
if ((pos=strfield(url,"file:"))) { // fichier local!! (pour les tests)
//!! p+=3;
- strcpy(adr,"file://");
+ strcpybuff(adr,"file://");
} else if ((pos=strfield(url,"http:"))) { // HTTP
//!!p+=3;
} else if ((pos=strfield(url,"ftp:"))) { // FTP
- strcpy(adr,"ftp://"); // FTP!!
+ strcpybuff(adr,"ftp://"); // FTP!!
//!!p+=3;
#if HTS_USEOPENSSL
- } else if ((pos=strfield(url,"https:"))) { // HTTPS
- strcpy(adr,"https://");
+ } else if (SSL_is_available && (pos=strfield(url,"https:"))) { // HTTPS
+ strcpybuff(adr,"https://");
#endif
} else if (scheme) {
return -1; // erreur non reconnu
@@ -1911,17 +2215,17 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
// chemin www... trop long!!
if ( ( ((int) (q - p)) ) > HTS_URLMAXSIZE) {
- //strcpy(retour.msg,"Path too long");
+ //strcpybuff(retour.msg,"Path too long");
return -1; // erreur
}
// recopier adresse www..
- strncat(adr,p, ((int) (q - p)) );
+ strncatbuff(adr,p, ((int) (q - p)) );
// *( adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
// recopier chemin /pub/..
if (q[0] != '/') // page par défaut (/)
- strcat(fil,"/");
- strcat(fil,q);
+ strcatbuff(fil,"/");
+ strcatbuff(fil,q);
// SECURITE:
// simplifier url pour les ../
fil_simplifie(fil);
@@ -1931,8 +2235,13 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
char* a;
p=url+pos;
-
- strcat(fil,p); // fichier local ; adr="#"
+ if (*p == '/' || *p == '\\') { /* file:///.. */
+ strcatbuff(fil,p); // fichier local ; adr="#"
+ } else {
+ strcatbuff(fil,"//"); /* file://server/foo */
+ strcatbuff(fil,p);
+ }
+
a=strchr(fil,'?');
if (a)
*a='\0'; /* couper query (inutile pour file:// lors de la requête) */
@@ -1948,7 +2257,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) {
// nommer au besoin.. (non utilisé normalement)
if (!strnotempty(fil))
- strcpy(fil,"default-index.html");
+ strcpybuff(fil,"default-index.html");
// case insensitive pour adresse
{
@@ -1981,12 +2290,12 @@ void fil_simplifie(char* f) {
tempo[0]='\0';
//
if (!last) /* can't go upper.. */
- strcpy(tempo,"/");
+ strcpybuff(tempo,"/");
else
strncpy(tempo,f,last+1);
tempo[last+1]='\0';
- strcat(tempo,f+i+4);
- strcpy(f,tempo); // remplacer
+ strcatbuff(tempo,f+i+4);
+ strcpybuff(f,tempo); // remplacer
i=-1; // recommencer
last=0;
}
@@ -2004,20 +2313,19 @@ void fil_simplifie(char* f) {
while ( (a=strstr(f,"./")) ) {
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
- strcpy(tempo,a+2);
- strcpy(a,tempo);
+ strcpybuff(tempo,a+2);
+ strcpybuff(a,tempo);
}
// delete all remaining ../ (potential threat)
while ( (a=strstr(f,"../")) ) {
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
- strcpy(tempo,a+3);
- strcpy(a,tempo);
+ strcpybuff(tempo,a+3);
+ strcpybuff(a,tempo);
}
}
-
// fermer liaison fichier ou socket
HTS_INLINE void deletehttp(htsblk* r) {
#if HTS_DEBUG_CLOSESOCK
@@ -2025,6 +2333,14 @@ HTS_INLINE void deletehttp(htsblk* r) {
sprintf(info,"deletehttp: (htsblk*) %d\n",r);
DEBUG_W2(info);
#endif
+#if HTS_USEOPENSSL
+ /* Free OpenSSL structures */
+ if (SSL_is_available && r->ssl_con) {
+ SSL_shutdown(r->ssl_con);
+ SSL_free(r->ssl_con);
+ r->ssl_con=NULL;
+ }
+#endif
if (r->soc!=INVALID_SOCKET) {
if (r->is_file) {
if (r->fp)
@@ -2038,6 +2354,16 @@ HTS_INLINE void deletehttp(htsblk* r) {
}
}
+// free the addr buffer
+// always returns 1
+HTS_INLINE int deleteaddr(htsblk* r) {
+ if (r->adr) {
+ freet(r->adr);
+ r->adr = NULL;
+ }
+ return 1;
+}
+
// fermer une socket
HTS_INLINE void deletesoc(T_SOC soc) {
if (soc!=INVALID_SOCKET) {
@@ -2067,7 +2393,7 @@ HTS_INLINE void deletesoc(T_SOC soc) {
/* Will also clean other things */
HTS_INLINE void deletesoc_r(htsblk* r) {
#if HTS_USEOPENSSL
- if (r->ssl_con) {
+ if (SSL_is_available && r->ssl_con) {
SSL_shutdown(r->ssl_con);
// SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1);
SSL_free(r->ssl_con);
@@ -2084,7 +2410,7 @@ HTS_INLINE TStamp time_local(void) {
}
// number of millisec since 1970
-HTS_INLINE TStamp mtime_local(void) {
+HTSEXT_API HTS_INLINE TStamp mtime_local(void) {
#ifndef HTS_DO_NOT_USE_FTIME
struct timeb B;
ftime( &B );
@@ -2120,7 +2446,7 @@ void sec2str(char *st,TStamp t) {
}
// idem, plus court (chaine)
-void qsec2str(char *st,TStamp t) {
+HTSEXT_API void qsec2str(char *st,TStamp t) {
int j,h,m,s;
j=(int) (t/(3600*24));
@@ -2181,7 +2507,7 @@ struct tm* convert_time_rfc822(char* s) {
if ((int) strlen(s) > 200)
return NULL;
- strcpy(str,s);
+ strcpybuff(str,s);
hts_lowcase(str);
/* éliminer :,- */
while( (a=strchr(str,'-')) ) *a=' ';
@@ -2200,7 +2526,7 @@ struct tm* convert_time_rfc822(char* s) {
tok[0]='\0';
if (first!=last) {
char* pos;
- strncat(tok,first,(int) (last - first));
+ strncatbuff(tok,first,(int) (last - first));
/* analyser */
if ( (pos=strstr(months,tok)) ) { /* month always in letters */
result_mm=((int) (pos - months))/4;
@@ -2274,36 +2600,44 @@ int set_filetime_rfc822(char* file,char* date) {
// heure au format rfc (taille buffer 256o)
HTS_INLINE void time_rfc822(char* s,struct tm * A) {
+ if (A == NULL) {
+ int localtime_returned_null=0;
+ assert(localtime_returned_null);
+ }
strftime(s,256,"%a, %d %b %Y %H:%M:%S GMT",A);
}
// heure locale au format rfc (taille buffer 256o)
HTS_INLINE void time_rfc822_local(char* s,struct tm * A) {
+ if (A == NULL) {
+ int localtime_returned_null=0;
+ assert(localtime_returned_null);
+ }
strftime(s,256,"%a, %d %b %Y %H:%M:%S",A);
}
// conversion en b,Kb,Mb
-char* int2bytes(LLint n) {
+HTSEXT_API char* int2bytes(LLint n) {
char** a=int2bytes2(n);
char* buff;
NOSTATIC_RESERVE(buff, char, 256);
- strcpy(buff,a[0]);
- strcat(buff,a[1]);
+ strcpybuff(buff,a[0]);
+ strcatbuff(buff,a[1]);
return concat(buff,"");
}
// conversion en b/s,Kb/s,Mb/s
-char* int2bytessec(long int n) {
+HTSEXT_API char* int2bytessec(long int n) {
char* buff;
char** a=int2bytes2(n);
NOSTATIC_RESERVE(buff, char, 256);
- strcpy(buff,a[0]);
- strcat(buff,a[1]);
+ strcpybuff(buff,a[0]);
+ strcatbuff(buff,a[1]);
return concat(buff,"/s");
}
-char* int2char(int n) {
+HTSEXT_API char* int2char(int n) {
char* buffer;
NOSTATIC_RESERVE(buffer, char, 32);
sprintf(buffer,"%d",n);
@@ -2327,35 +2661,35 @@ typedef struct {
char buff2[32];
char* buffadr[2];
} strc_int2bytes2;
-char** int2bytes2(LLint n) {
+HTSEXT_API char** int2bytes2(LLint n) {
strc_int2bytes2* strc;
NOSTATIC_RESERVE(strc, strc_int2bytes2, 1);
if (n < ToLLintKiB) {
sprintf(strc->buff1,"%d",(int)(LLint)n);
- strcpy(strc->buff2,"B");
+ strcpybuff(strc->buff2,"B");
} else if (n < ToLLintMiB) {
sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/ToLLintKiB)),(int)((LLint)((n%ToLLintKiB)*100)/ToLLintKiB));
- strcpy(strc->buff2,"KiB");
+ strcpybuff(strc->buff2,"KiB");
}
#ifdef HTS_LONGLONG
else if (n < ToLLintGiB) {
sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB))));
- strcpy(strc->buff2,"MiB");
+ strcpybuff(strc->buff2,"MiB");
} else if (n < ToLLintTiB) {
sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintGiB))),(int)((LLint)(((n%(ToLLintGiB))*100)/(ToLLintGiB))));
- strcpy(strc->buff2,"GiB");
+ strcpybuff(strc->buff2,"GiB");
} else if (n < ToLLintPiB) {
sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintTiB))),(int)((LLint)(((n%(ToLLintTiB))*100)/(ToLLintTiB))));
- strcpy(strc->buff2,"TiB");
+ strcpybuff(strc->buff2,"TiB");
} else {
sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintPiB))),(int)((LLint)(((n%(ToLLintPiB))*100)/(ToLLintPiB))));
- strcpy(strc->buff2,"PiB");
+ strcpybuff(strc->buff2,"PiB");
}
#else
else {
sprintf(strc->buff1,"%d,%02d",(int)((LLint)(n/(ToLLintMiB))),(int)((LLint)(((n%(ToLLintMiB))*100)/(ToLLintMiB))));
- strcpy(strc->buff2,"MiB");
+ strcpybuff(strc->buff2,"MiB");
}
#endif
strc->buffadr[0]=strc->buff1;
@@ -2376,34 +2710,34 @@ int sig_ignore_flag( int setflag ) { // flag ignore
// envoi de texte (en têtes généralement) sur la socket soc
HTS_INLINE int sendc(htsblk* r, char* s) {
- int n;
+ int n, ssz = (int)strlen(s);
#if HTS_WIN
#else
sig_ignore_flag(1);
#endif
#if HDEBUG
- write(0,s,strlen(s));
+ write(0,s,ssz);
#endif
#if HTS_USEOPENSSL
- if (r->ssl) {
- n = SSL_write(r->ssl_con, s, strlen(s));
+ if (SSL_is_available && r->ssl) {
+ n = SSL_write(r->ssl_con, s, ssz);
} else
#endif
- n = send(r->soc,s,strlen(s),0);
+ n = send(r->soc,s,ssz,0);
#if HTS_WIN
#else
sig_ignore_flag(0);
#endif
- return n;
+ return ( n == ssz ) ? n : -1;
}
// Remplace read
-void finput(int fd,char* s,int max) {
+int finput(int fd,char* s,int max) {
char c;
int j=0;
do {
@@ -2419,7 +2753,8 @@ void finput(int fd,char* s,int max) {
}
}
} while((c!=0) && (j<max-1));
- s[j++]='\0';
+ s[j]='\0';
+ return j;
}
// Like linput, but in memory (optimized)
@@ -2444,7 +2779,7 @@ int binput(char* buff,char* s,int max) {
count--;
// copy
if (count > 0) {
- strncat(s, buff, count);
+ strncatbuff(s, buff, count);
}
// and terminate with a null char
s[count]='\0';
@@ -2470,6 +2805,34 @@ int linput(FILE* fp,char* s,int max) {
s[j]='\0';
return j;
}
+int linputsoc(T_SOC soc, char* s, int max) {
+ int c;
+ int j=0;
+ do {
+ unsigned char ch;
+ if (recv(soc, &ch, 1, 0) == 1) {
+ c = ch;
+ } else {
+ c = EOF;
+ }
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+int linputsoc_t(T_SOC soc, char* s, int max, int timeout) {
+ if (check_readinput_t(soc, timeout)) {
+ return linputsoc(soc, s, max);
+ }
+ return -1;
+}
int linput_trim(FILE* fp,char* s,int max) {
int rlen=0;
char* ls=(char*) malloct(max+2);
@@ -2660,7 +3023,7 @@ int ishtml(char* fil) {
char fil_noquery[HTS_URLMAXSIZE*2];
fil_noquery[0]='\0';
a++; // pointer sur extension
- strncat(fil_noquery,a,HTS_URLMAXSIZE);
+ strncatbuff(fil_noquery,a,HTS_URLMAXSIZE);
a=strchr(fil_noquery,'?');
if (a)
*a='\0';
@@ -2710,7 +3073,7 @@ HTS_INLINE int ishttperror(int err) {
// retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant
// une identification
-char* jump_identification(char* source) {
+HTSEXT_API char* jump_identification(char* source) {
char *a,*trytofind;
// rechercher dernier @ (car parfois email transmise dans adresse!)
// mais sauter ftp:// éventuel
@@ -2719,9 +3082,55 @@ char* jump_identification(char* source) {
return (trytofind != NULL)?trytofind:a;
}
+HTSEXT_API char* jump_normalized(char* source) {
+ source = jump_identification(source);
+ if (strfield(source, "www") && source[3] != '\0') {
+ if (source[3] == '.') { // www.foo.com -> foo.com
+ source += 4;
+ } else { // www-4.foo.com -> foo.com
+ char* a = source + 3;
+ while(*a && ( isdigit(*a) || *a == '-') ) a++;
+ if (*a == '.') {
+ source = a + 1;
+ }
+ }
+ }
+ return source;
+}
+
+HTSEXT_API char* fil_normalized(char* source, char* dest_) {
+ char* dest=dest_;
+ char lastc = 0;
+ int gotquery=0;
+ while(*source) {
+ if (*source == '?')
+ gotquery=1;
+ if (
+ (!gotquery && lastc == '/' && *source == '/') // foo//bar -> foo/bar
+ ) {
+ }
+ else {
+ *dest++ = *source;
+ }
+ lastc = *source;
+ source++;
+ }
+ *dest++ = '\0';
+ return dest_;
+}
+
+#define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
+HTSEXT_API char* adr_normalized(char* source, char* dest) {
+ /* not yet too aggressive (no com<->net<->org checkings) */
+ strcpybuff(dest, jump_normalized(source));
+ return dest;
+}
+#undef endwith
+
+
// find port (:80) or NULL if not found
// can handle IPV6 addresses
-char* jump_toport(char* source) {
+HTSEXT_API char* jump_toport(char* source) {
char *a,*trytofind;
a = jump_identification(source);
trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html)
@@ -2732,7 +3141,7 @@ char* jump_toport(char* source) {
// strrchr, but not too far
char* strrchr_limit(char* s, char c, char* limit) {
if (limit == NULL) {
- char* p = strchr(s, c);
+ char* p = strrchr(s, c);
return p?(p+1):NULL;
} else {
char *a=NULL, *p;
@@ -2765,17 +3174,18 @@ HTS_INLINE char* jump_protocol(char* source) {
}
// codage base 64 a vers b
-void code64(char* a,char* b) {
+void code64(unsigned char* a,int size_a,unsigned char* b,int crlf) {
int i1=0,i2=0,i3=0,i4=0;
- unsigned long store;
+ int loop=0;
+ unsigned long int store;
int n;
const char _hts_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
- b[0]='\0';
- while(*a) {
+ while(size_a-- > 0) {
// 24 bits
- n=1; store=0; store |= ((*a++) & 0xff);
- if (*a) { n=2; store <<= 8; store |= ((*a++) & 0xff); }
- if (*a) { n=3; store <<= 8; store |= ((*a++) & 0xff); }
+ n=1;
+ store = *a++;
+ if (size_a-- > 0) { n=2; store <<= 8; store |= *a++; }
+ if (size_a-- > 0) { n=3; store <<= 8; store |= *a++; }
if (n==3) {
i4=store & 63;
i3=(store>>6) & 63;
@@ -2802,6 +3212,11 @@ void code64(char* a,char* b) {
*b++ = _hts_base64[i4];
else
*b++ = '=';
+
+ if (crlf && ( ( loop += 3 ) % 60) == 0 ) {
+ *b++ = '\r';
+ *b++ = '\n';
+ }
}
*b++='\0';
}
@@ -2809,7 +3224,7 @@ void code64(char* a,char* b) {
// remplacer &quot; par " etc..
// buffer MAX 1Ko
#define strcmpbeg(a, b) strncmp(a, b, strlen(b))
-void unescape_amp(char* s) {
+HTSEXT_API void unescape_amp(char* s) {
while(*s) {
if (*s=='&') {
char* end=strchr(s,';');
@@ -3043,8 +3458,8 @@ void unescape_amp(char* s) {
if (c) {
char buff[HTS_URLMAXSIZE*2];
buff[0]=(char) c;
- strcpy(buff+1,end+1);
- strcpy(s,buff);
+ strcpybuff(buff+1,end+1);
+ strcpybuff(s,buff);
}
}
}
@@ -3054,7 +3469,7 @@ void unescape_amp(char* s) {
// remplacer %20 par ' ', | par : etc..
// buffer MAX 1Ko
-char* unescape_http(char* s) {
+HTSEXT_API char* unescape_http(char* s) {
char* tempo;
int i,j=0;
NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2);
@@ -3078,7 +3493,7 @@ char* unescape_http(char* s) {
}
// unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI
-char* unescape_http_unharm(char* s, int no_high) {
+HTSEXT_API char* unescape_http_unharm(char* s, int no_high) {
char* tempo;
int i,j=0;
NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2);
@@ -3120,32 +3535,46 @@ char* unescape_http_unharm(char* s, int no_high) {
// remplacer " par %xx etc..
// buffer MAX 1Ko
-void escape_spc_url(char* s) {
+HTSEXT_API void escape_spc_url(char* s) {
x_escape_http(s,2);
}
// smith / john -> smith%20%2f%20john
-void escape_in_url(char* s) {
+HTSEXT_API void escape_in_url(char* s) {
x_escape_http(s,1);
}
// smith / john -> smith%20/%20john
-void escape_uri(char* s) {
+HTSEXT_API void escape_uri(char* s) {
x_escape_http(s,3);
}
-void escape_uri_utf(char* s) {
+HTSEXT_API void escape_uri_utf(char* s) {
x_escape_http(s,30);
}
-void escape_check_url(char* s) {
+HTSEXT_API void escape_check_url(char* s) {
x_escape_http(s,0);
}
// same as escape_check_url, but returns char*
-char* escape_check_url_addr(char* s) {
+HTSEXT_API char* escape_check_url_addr(char* s) {
char* adr;
escape_check_url(adr = concat(s,""));
return adr;
}
+// strip all control characters
+HTSEXT_API void escape_remove_control(char* s) {
+ unsigned char* ss = (unsigned char*) s;
+ while(*ss) {
+ if (*ss < 32) { /* CONTROL characters go away! */
+ char tmp[HTS_URLMAXSIZE*2];
+ strcpybuff(tmp, ss+1);
+ strcpybuff(ss, tmp);
+ } else {
+ ss++;
+ }
+ }
+}
+
-void x_escape_http(char* s,int mode) {
+HTSEXT_API void x_escape_http(char* s,int mode) {
while(*s) {
int test=0;
if (mode == 0)
@@ -3155,7 +3584,8 @@ void x_escape_http(char* s,int mode) {
|| CHAR_DELIM(*s)
|| CHAR_UNWISE(*s)
|| CHAR_SPECIAL(*s)
- || CHAR_XXAVOID(*s) );
+ || CHAR_XXAVOID(*s)
+ || CHAR_MARK(*s));
}
else if (mode==2)
test=(strchr(" ",*s)!=0); // n'escaper que espace
@@ -3171,12 +3601,12 @@ void x_escape_http(char* s,int mode) {
}
if (test) {
- char buffer[HTS_URLMAXSIZE*2];
+ char buffer[HTS_URLMAXSIZE*3];
int n;
n=(int)(unsigned char) *s;
- strcpy(buffer,s+1);
+ strcpybuff(buffer,s+1);
sprintf(s,"%%%02x",n);
- strcat(s,buffer);
+ strcatbuff(s,buffer);
}
s++;
}
@@ -3206,8 +3636,8 @@ char* concat(const char* a,const char* b) {
concat_strc* strc;
NOSTATIC_RESERVE(strc, concat_strc, 1);
strc->rol=((strc->rol+1)%16); // roving pointer
- strcpy(strc->buff[strc->rol],a);
- if (b) strcat(strc->buff[strc->rol],b);
+ strcpybuff(strc->buff[strc->rol],a);
+ if (b) strcatbuff(strc->buff[strc->rol],b);
return strc->buff[strc->rol];
}
// conversion fichier / -> antislash
@@ -3244,7 +3674,7 @@ char* convtolower(char* a) {
concat_strc* strc;
NOSTATIC_RESERVE(strc, concat_strc, 1);
strc->rol=((strc->rol+1)%16); // roving pointer
- strcpy(strc->buff[strc->rol],a);
+ strcpybuff(strc->buff[strc->rol],a);
hts_lowcase(strc->buff[strc->rol]); // lower case
return strc->buff[strc->rol];
}
@@ -3308,7 +3738,7 @@ void guess_httptype(char *s,char *fil) {
// flag: 1 si toujours renvoyer un type
void get_httptype(char *s,char *fil,int flag) {
if (ishtml(fil)==1)
- strcpy(s,"text/html");
+ strcpybuff(s,"text/html");
else {
char *a=fil+strlen(fil)-1;
while ( (*a!='.') && (*a!='/') && (a>fil)) a--;
@@ -3319,7 +3749,7 @@ void get_httptype(char *s,char *fil,int flag) {
while( (!ok) && (strnotempty(hts_mime[j][1])) ) {
if (strfield2(hts_mime[j][1],a)) {
if (hts_mime[j][0][0]!='*') { // Une correspondance existe
- strcpy(s,hts_mime[j][0]);
+ strcpybuff(s,hts_mime[j][0]);
ok=1;
}
}
@@ -3328,7 +3758,7 @@ void get_httptype(char *s,char *fil,int flag) {
if (!ok) if (flag) sprintf(s,"application/%s",a);
} else {
- if (flag) strcpy(s,"application/octet-stream");
+ if (flag) strcpybuff(s,"application/octet-stream");
}
}
}
@@ -3364,7 +3794,7 @@ int get_userhttptype(int setdefs,char *s,char *ext) {
char* a;
a=strchr(detect,'\n');
if (a) {
- strncat(s,detect,(int) (a - detect));
+ strncatbuff(s,detect,(int) (a - detect));
}
}
return 1;
@@ -3383,7 +3813,7 @@ void give_mimext(char *s,char *st) {
while( (!ok) && (strnotempty(hts_mime[j][1])) ) {
if (strfield2(hts_mime[j][0],st)) {
if (hts_mime[j][1][0]!='*') { // Une correspondance existe
- strcpy(s,hts_mime[j][1]);
+ strcpybuff(s,hts_mime[j][1]);
ok=1;
}
}
@@ -3403,7 +3833,7 @@ void give_mimext(char *s,char *st) {
if (a) {
if ((int)strlen(a) >= 1) {
if ((int)strlen(a) <= 4) {
- strcpy(s,a);
+ strcpybuff(s,a);
ok=1;
}
}
@@ -3441,7 +3871,7 @@ char* get_ext(char *fil) {
if (*a=='.') {
fil_noquery[0]='\0';
a++; // pointer sur extension
- strncat(fil_noquery,a,HTS_URLMAXSIZE);
+ strncatbuff(fil_noquery,a,HTS_URLMAXSIZE);
a=strchr(fil_noquery,'?');
if (a)
*a='\0';
@@ -3531,60 +3961,54 @@ void fprintfio(FILE* fp,char* buff,char* prefix) {
/* Le fichier existe-t-il? (ou est-il accessible?) */
int fexist(char* s) {
- FILE* fp;
- if (strnotempty(s)==0) // nom vide: non trouvé
- return 0;
- fp=fopen(fconv(s),"rb");
- if (fp!=NULL) fclose(fp);
- return (fp!=NULL);
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ if (stat(s, &st) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ return 1;
+ }
+ }
+ return 0;
}
/* Taille d'un fichier, -1 si n'existe pas */
/* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */
/* Note: NOT YET READY FOR 64-bit */
-//LLint fsize(char* s) {
-int fsize(char* s) {
- /*
-#if HTS_WIN
- HANDLE hFile;
- DWORD dwSizeHigh = 0;
- DWORD dwSizeLow = 0;
- hFile = CreateFile(s,0,0,NULL,OPEN_EXISTING,0,NULL);
- if (hFile) {
- dwSizeLow = GetFileSize (hFile, & dwSizeHigh) ;
- CloseHandle(hFile);
- if (dwSizeLow != 0xFFFFFFFF)
- return (dwSizeLow & (dwSizeHigh<<32));
- else
- return -1;
- } else
- return -1;
-#else
- */
+INTsys fsize(char* s) {
FILE* fp;
if (strnotempty(s)==0) // nom vide: erreur
return -1;
fp=fopen(fconv(s),"rb");
if (fp!=NULL) {
- int i;
+ INTsys i;
fseek(fp,0,SEEK_END);
+#ifdef HTS_FSEEKO
+ i=ftello(fp);
+#else
i=ftell(fp);
+#endif
fclose(fp);
return i;
} else return -1;
- /*
-#endif
- */
}
-int fpsize(FILE* fp) {
- int oldpos,size;
+INTsys fpsize(FILE* fp) {
+ INTsys oldpos,size;
if (!fp)
return -1;
+#ifdef HTS_FSEEKO
+ oldpos=ftello(fp);
+#else
oldpos=ftell(fp);
+#endif
fseek(fp,0,SEEK_END);
+#ifdef HTS_FSEEKO
+ size=ftello(fp);
+ fseeko(fp,oldpos,SEEK_SET);
+#else
size=ftell(fp);
fseek(fp,oldpos,SEEK_SET);
+#endif
return size;
}
@@ -3593,7 +4017,7 @@ typedef struct {
char path[1024+4];
int init;
} hts_rootdir_strc;
-char* hts_rootdir(char* file) {
+HTSEXT_API char* hts_rootdir(char* file) {
static hts_rootdir_strc strc = {"", 0};
//NOSTATIC_RESERVE(strc, hts_rootdir_strc, 1);
if (file) {
@@ -3602,7 +4026,7 @@ char* hts_rootdir(char* file) {
strc.init=1;
if (strnotempty(file)) {
char* a;
- strcpy(strc.path,file);
+ strcpybuff(strc.path,file);
while((a=strrchr(strc.path,'\\'))) *a='/';
if ((a=strrchr(strc.path,'/'))) {
*(a+1)='\0';
@@ -3613,7 +4037,7 @@ char* hts_rootdir(char* file) {
if( getcwd( strc.path, 1024 ) == NULL )
strc.path[0]='\0';
else
- strcat(strc.path,"/");
+ strcatbuff(strc.path,"/");
}
}
return NULL;
@@ -3625,7 +4049,7 @@ char* hts_rootdir(char* file) {
-hts_stat_struct HTS_STAT;
+HTSEXT_API hts_stat_struct HTS_STAT;
//
// return number of downloadable bytes, depending on rate limiter
// see engine_stats() routine, too
@@ -3693,7 +4117,7 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
DEBUG_W("read\n");
#endif
if (r->fp)
- retour=fread(buff,1,size,r->fp);
+ retour=(int)fread(buff,1,size,r->fp);
else
retour=-1;
} else {
@@ -3704,7 +4128,7 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) {
#endif
//HTS_TOTAL_RECV_CHECK(size); // Diminuer au besoin si trop de données reçues
#if HTS_USEOPENSSL
- if (r->ssl) {
+ if (SSL_is_available && r->ssl) {
retour = SSL_read(r->ssl_con, buff, size);
if (retour <= 0) {
int err_code = SSL_get_error(r->ssl_con, retour);
@@ -3745,6 +4169,21 @@ t_dnscache* _hts_cache(void) {
NOSTATIC_RESERVE(cache, t_dnscache, 1);
return cache;
}
+// free the cache
+static void hts_cache_free_(t_dnscache* cache) {
+ if (cache != NULL) {
+ if (cache->n != NULL) {
+ hts_cache_free_(cache->n);
+ }
+ freet(cache);
+ }
+}
+void hts_cache_free(t_dnscache* cache) {
+ if (cache != NULL) {
+ hts_cache_free_(cache->n);
+ cache->n = NULL;
+ }
+}
// lock le cache dns pour tout opération d'ajout
// plus prudent quand plusieurs threads peuvent écrire dedans..
@@ -3819,7 +4258,7 @@ int hts_dnstest(char* _iadr) {
NOSTATIC_RESERVE(iadr, char, HTS_URLMAXSIZE*2);
// sauter user:pass@ éventuel
- strcpy(iadr,jump_identification(_iadr));
+ strcpybuff(iadr,jump_identification(_iadr));
// couper éventuel :
{
char *a;
@@ -3852,7 +4291,7 @@ int hts_dnstest(char* _iadr) {
}
-t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
+HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
t_fullhostent* buffer = (t_fullhostent*) v_buffer;
/* Clear */
fullhostent_init(buffer);
@@ -3869,8 +4308,8 @@ t_hostent* vxgethostbyname(char* hostname, void* v_buffer) {
if ((hostname[0] == '[') && (hostname[strlen(hostname)-1] == ']')) {
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
- strncat(tempo, hostname+1, strlen(hostname)-2);
- strcpy(hostname, tempo);
+ strncatbuff(tempo, hostname+1, strlen(hostname)-2);
+ strcpybuff(hostname, tempo);
}
{
@@ -3935,7 +4374,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) {
/* Clear */
fullhostent_init(buffer);
- strcpy(iadr,jump_identification(_iadr));
+ strcpybuff(iadr,jump_identification(_iadr));
// couper éventuel :
{
char *a;
@@ -3993,7 +4432,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) {
#endif
cache->n=(t_dnscache*) calloct(1,sizeof(t_dnscache));
if (cache->n!=NULL) {
- strcpy(cache->n->iadr,iadr);
+ strcpybuff(cache->n->iadr,iadr);
if (hp!=NULL) {
memcpy(cache->n->host_addr, hp->h_addr_list[0], hp->h_length);
cache->n->host_length=hp->h_length;
@@ -4028,102 +4467,161 @@ HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) {
// --- Tracage des mallocs() ---
-#if HTS_TRACE_MALLOC
-typedef struct _mlink {
+#ifdef HTS_TRACE_MALLOC
+//#define htsLocker(A, N) htsLocker(A, N)
+#define htsLocker(A, N) do {} while(0)
+static mlink trmalloc = {NULL,0,0,NULL};
+static int trmalloc_id=0;
+static PTHREAD_LOCK_TYPE* mallocMutex = NULL;
+static void hts_meminit(void) {
+ //if (mallocMutex == NULL) {
+ // mallocMutex = calloc(sizeof(*mallocMutex), 1);
+ // htsLocker(mallocMutex, -999);
+ //}
+}
+void* hts_malloc(size_t len) {
void* adr;
- int len;
- int id;
- struct _mlink* next;
-} mlink;
-mlink trmalloc = {NULL,0,0,NULL};
-int trmalloc_id=0;
-
-HTS_INLINE void* hts_malloc(size_t len,size_t len2) {
+ hts_meminit();
+ htsLocker(mallocMutex, 1);
+ fassert(len > 0);
+ adr = hts_xmalloc(len, 0);
+ htsLocker(mallocMutex, 0);
+ return adr;
+}
+void* hts_calloc(size_t len,size_t len2) {
+ void* adr;
+ hts_meminit();
+ fassert(len > 0);
+ fassert(len2 > 0);
+ htsLocker(mallocMutex, 1);
+ adr = hts_xmalloc(len, len2);
+ htsLocker(mallocMutex, 0);
+ memset(adr, 0, len * len2);
+ return adr;
+}
+void* hts_xmalloc(size_t len,size_t len2) {
mlink* lnk = (mlink*) calloc(1,sizeof(mlink));
- void* r = NULL;
+ fassert(lnk != NULL);
+ fassert(len > 0);
+ fassert(len2 >= 0);
if (lnk) {
+ void* r = NULL;
+ int size, bsize = sizeof(t_htsboundary);
if (len2)
- r = calloc(len,len2);
+ size = len * len2;
else
- r = malloc(len);
+ size = len;
+ size += ((bsize - (size % bsize)) % bsize); /* check alignement */
+ r = malloc(size + bsize*2);
+ fassert(r != NULL);
if (r) {
- lnk->adr=r;
- if (len2)
- lnk->len=len*len2;
- else
- lnk->len=len;
- lnk->id=trmalloc_id++;
- lnk->next=trmalloc.next;
- trmalloc.next=lnk;
-#if MEMDEBUG
- //printf("malloc: %d\n",r);
-#endif
- } else free(lnk);
+ * ( (t_htsboundary*) ((char*) r ) )
+ = * ( (t_htsboundary*) ( (char*) r + size + bsize ) )
+ = htsboundary;
+ ((char*) r) += bsize; /* boundary */
+ lnk->adr = r;
+ lnk->len = size;
+ lnk->id = trmalloc_id++;
+ lnk->next = trmalloc.next;
+ trmalloc.next = lnk;
+ return r;
+ } else {
+ free(lnk);
+ }
}
- return r;
+ return NULL;
}
-HTS_INLINE void hts_free(void* adr) {
+void hts_free(void* adr) {
mlink* lnk = &trmalloc;
+ int bsize = sizeof(t_htsboundary);
+ fassert(adr != NULL);
if (!adr) {
-#if MEMDEBUG
- printf("* unexpected free() error at %d\n",adr);
-#endif
return;
}
- do {
- if (lnk->next->adr==adr) {
+ htsLocker(mallocMutex, 1);
+ while(lnk->next != NULL) {
+ if (lnk->next->adr == adr) {
mlink* blk_free=lnk->next;
-#if 1
+ fassert(blk_free->id != -1);
+ fassert( * ( (t_htsboundary*) ( (char*) adr - bsize ) ) == htsboundary );
+ fassert( * ( (t_htsboundary*) ( (char*) adr + blk_free->len ) ) == htsboundary );
lnk->next=lnk->next->next;
free((void*) blk_free);
-#else
-#if MEMDEBUG
- if (blk_free->id==-1) {
- printf("* memory has already been freed: %d (id=%d)\n",blk_free->adr,blk_free->id);
- }
-#endif
- blk_free->id=-1;
-#endif
- free(adr);
-#if MEMDEBUG
- //printf("free: %d (id=%d)\n",blk_free->adr,blk_free->id);
-#endif
+ //blk_free->id=-1;
+ free((char*) adr - bsize);
+ htsLocker(mallocMutex, 0);
return;
}
- lnk=lnk->next;
- } while(lnk->next != NULL);
-#if MEMDEBUG
- printf("* unexpected free() error at %d\n",adr);
-#endif
+ lnk = lnk->next;
+ fassert(lnk->next != NULL);
+ }
free(adr);
+ htsLocker(mallocMutex, 0);
}
-HTS_INLINE void* hts_realloc(void* adr,size_t len) {
+void* hts_realloc(void* adr,size_t len) {
+ int bsize = sizeof(t_htsboundary);
+ len += ((bsize - (len % bsize)) % bsize); /* check alignement */
+ if (adr != NULL) {
+ mlink* lnk = &trmalloc;
+ htsLocker(mallocMutex, 1);
+ while(lnk->next != NULL) {
+ if (lnk->next->adr==adr) {
+ {
+ mlink* blk_free=lnk->next;
+ fassert(blk_free->id != -1);
+ fassert( * ( (t_htsboundary*) ( (char*) adr - bsize ) ) == htsboundary );
+ fassert( * ( (t_htsboundary*) ( (char*) adr + blk_free->len ) ) == htsboundary );
+ }
+ adr = realloc((char*) adr - bsize, len + bsize * 2);
+ fassert(adr != NULL);
+ lnk->next->adr = (char*) adr + bsize;
+ lnk->next->len = len;
+ * ( (t_htsboundary*) ( (char*) adr ) )
+ = * ( (t_htsboundary*) ( (char*) adr + len + bsize) )
+ = htsboundary;
+ htsLocker(mallocMutex, 0);
+ return (char*) adr + bsize;
+ }
+ lnk = lnk->next;
+ fassert(lnk->next != NULL);
+ }
+ htsLocker(mallocMutex, 0);
+ }
+ return hts_malloc(len);
+}
+mlink* hts_find(char* adr) {
+ char* stkframe = (char*) &stkframe;
mlink* lnk = &trmalloc;
- do {
- if (lnk->next->adr==adr) {
- adr = realloc(adr,len);
- lnk->next->adr = adr;
- lnk->next->len = len;
-#if MEMDEBUG
- //printf("realloc: %d (id=%d)\n",lnk->next->adr,lnk->next->id);
-#endif
- return adr;
+ int bsize = sizeof(t_htsboundary);
+ fassert(adr != NULL);
+ if (!adr) {
+ return NULL;
+ }
+ htsLocker(mallocMutex, 1);
+ while(lnk->next != NULL) {
+ if (adr >= lnk->next->adr && adr <= lnk->next->adr + lnk->next->len) { /* found */
+ htsLocker(mallocMutex, 0);
+ return lnk->next;
}
- lnk=lnk->next;
- } while(lnk->next != NULL);
-#if MEMDEBUG
- printf("* unexpected realloc() error at %d\n",adr);
-#endif
- return realloc(adr,len);
+ lnk = lnk->next;
+ }
+ htsLocker(mallocMutex, 0);
+ {
+ int depl = (int) (adr - stkframe);
+ if (depl < 0) depl = -depl;
+ //fassert(depl < 512000); /* near the stack frame.. doesn't look like malloc but stack variable */
+ return NULL;
+ }
}
// check the malloct() and calloct() trace stack
void hts_freeall(void) {
+ int bsize = sizeof(t_htsboundary);
while(trmalloc.next) {
#if MEMDEBUG
printf("* block %d\t not released: at %d\t (%d\t bytes)\n",trmalloc.next->id,trmalloc.next->adr,trmalloc.next->len);
#endif
if (trmalloc.next->id != -1) {
- freet(trmalloc.next->adr);
+ free((char*) trmalloc.next->adr - bsize);
}
}
}
@@ -4145,8 +4643,8 @@ void cut_path(char* fullpath,char* path,char* pname) {
a=fullpath+strlen(fullpath)-2;
while( (*a!='/') && ( a > fullpath)) a--;
if (*a=='/') a++;
- strcpy(pname,a);
- strncat(path,fullpath,(int) (a - fullpath));
+ strcpybuff(pname,a);
+ strncatbuff(path,fullpath,(int) (a - fullpath));
}
}
}
@@ -4168,8 +4666,12 @@ int ftp_available(void) {
-int hts_init(void) {
+HTSEXT_API int hts_init(void) {
static int hts_init_ok = 0;
+
+ /* Ensure external modules are loaded */
+ htspe_init();
+
if (!hts_init_ok) {
hts_init_ok = 1;
// default wrappers
@@ -4196,17 +4698,18 @@ int hts_init(void) {
/*
Initialize the OpensSSL library
*/
- if (!openssl_ctx) {
+ if (!openssl_ctx && SSL_is_available) {
+ if (SSL_load_error_strings) SSL_load_error_strings();
SSL_library_init();
- SSL_load_error_strings();
- ERR_load_crypto_strings();
- ERR_load_SSL_strings();
- SSLeay_add_ssl_algorithms();
+ ///if (SSL_load_error_strings) SSL_load_error_strings();
+ //if (ERR_load_crypto_strings) ERR_load_crypto_strings();
+ // if (ERR_load_SSL_strings) ERR_load_SSL_strings(); ???!!!
// OpenSSL_add_all_algorithms();
openssl_ctx = SSL_CTX_new(SSLv23_client_method());
if (!openssl_ctx) {
fprintf(stderr, "fatal: unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)\n");
- abort();
+ abortLog("unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)");
+ assertf("unable to initialize TLS" == NULL);
}
}
#endif
@@ -4214,9 +4717,13 @@ int hts_init(void) {
/* Init vars and thread-specific values */
hts_initvar();
+ /* initialiser structcheck */
+ // structcheck_init(1);
+
return 1;
}
-int hts_uninit(void) {
+HTSEXT_API int hts_uninit(void) {
+ hts_cache_free(_hts_cache());
hts_freevar();
/* htswrap_free(); */
return 1;
diff --git a/src/htslib.h b/src/htslib.h
index 9b2aca3..d3881d3 100644
--- a/src/htslib.h
+++ b/src/htslib.h
@@ -60,9 +60,11 @@ Please visit our Website: http://www.httrack.com
typedef struct {
short int user_agent_send; // user agent (ex: httrack/1.0 [sun])
short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0
+ short int nokeepalive; // pas de keep-alive
short int range_used; // Range utilisé
short int nocompression; // Pas de compression
- char user_agent[64];
+ short int flush_garbage; // recycled
+ char user_agent[128];
char lang_iso[64];
t_proxy proxy; // proxy
} htsrequest;
@@ -75,11 +77,17 @@ typedef struct {
short int is_write; // sortie sur disque (out) ou en mémoire (adr)
short int is_chunk; // mode chunk
short int compressed; // compressé?
+ short int empty; // vide?
+ short int keep_alive; // Keep-Alive?
+ short int keep_alive_trailers; // ..with trailers extension
+ int keep_alive_t; // KA timeout
+ int keep_alive_max; // KA number of requests
char* adr; // adresse du bloc de mémoire, NULL=vide
FILE* out; // écriture directe sur disque (si is_write=1)
LLint size; // taille fichier
char msg[80]; // message éventuel si échec ("\0"=non précisé)
char contenttype[64]; // content-type ("text/html" par exemple)
+ char charset[64]; // charset ("iso-8859-1" par exemple)
char contentencoding[64]; // content-encoding ("gzip" par exemple)
char* location; // on copie dedans éventuellement la véritable 'location'
LLint totalsize; // taille totale à télécharger (-1=inconnue)
@@ -95,6 +103,7 @@ typedef struct {
char etag[64]; // Etag
char cdispo[256]; // Content-Disposition coupé
LLint crange; // Content-Range
+ int debugid; // debug connection
/* */
htsrequest req; // paramètres pour la requête
/*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/
@@ -147,9 +156,10 @@ int hts_read(htsblk* r,char* buff,int size);
//int HTS_TOTAL_RECV_CHECK(int var);
LLint check_downloadable_bytes(int rate);
-int hts_init(void);
-int hts_uninit(void);
-
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int hts_init(void);
+HTSEXT_API int hts_uninit(void);
+#endif
// fonctions principales
int http_fopen(char* adr,char* fil,htsblk* retour);
@@ -159,26 +169,33 @@ htsblk httpget(char* url);
//int newhttp(char* iadr,char* err=NULL);
int newhttp(char* iadr,htsblk* retour,int port,int waitconnect);
HTS_INLINE void deletehttp(htsblk* r);
+HTS_INLINE int deleteaddr(htsblk* r);
HTS_INLINE void deletesoc(T_SOC soc);
HTS_INLINE void deletesoc_r(htsblk* r);
htsblk http_location(char* adr,char* fil,char* loc);
htsblk http_test(char* adr,char* fil,char* loc);
int check_readinput(htsblk* r);
+int check_readinput_t(T_SOC soc, int timeout);
void http_fread(T_SOC soc,htsblk* retour);
LLint http_fread1(htsblk* r);
void treathead(t_cookie* cookie,char* adr,char* fil,htsblk* retour,char* rcvd);
void treatfirstline(htsblk* retour,char* rcvd);
-void infostatuscode(char* msg,int statuscode);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API void infostatuscode(char* msg,int statuscode);
+#endif
// sous-fonctions
htsblk xhttpget(char* adr,char* fil);
htsblk http_gethead(char* adr,char* fil);
LLint http_xfread1(htsblk* r,int bufl);
HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, void* v_buffer);
-t_hostent* vxgethostbyname(char* hostname, void* v_buffer);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer);
+#endif
t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour);
int ftp_available(void);
#if HTS_DNSCACHE
+void hts_cache_free(t_dnscache* cache);
int hts_dnstest(char* _iadr);
t_dnscache* _hts_cache(void);
int _hts_lockdns(int i);
@@ -186,9 +203,13 @@ int _hts_lockdns(int i);
// outils divers
HTS_INLINE TStamp time_local(void);
-HTS_INLINE TStamp mtime_local(void);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API HTS_INLINE TStamp mtime_local(void);
+#endif
void sec2str(char *s,TStamp t);
-void qsec2str(char *st,TStamp t);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API void qsec2str(char *st,TStamp t);
+#endif
void time_gmt_rfc822(char* s);
void time_local_rfc822(char* s);
struct tm* convert_time_rfc822(char* s);
@@ -196,14 +217,18 @@ int set_filetime(char* file,struct tm* tm_time);
int set_filetime_rfc822(char* file,char* date);
HTS_INLINE void time_rfc822(char* s,struct tm * A);
HTS_INLINE void time_rfc822_local(char* s,struct tm * A);
-char* int2char(int n);
-char* int2bytes(LLint n);
-char* int2bytessec(long int n);
-char** int2bytes2(LLint n);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API char* int2char(int n);
+HTSEXT_API char* int2bytes(LLint n);
+HTSEXT_API char* int2bytessec(long int n);
+HTSEXT_API char** int2bytes2(LLint n);
+#endif
HTS_INLINE int sendc(htsblk* r, char* s);
-void finput(int fd,char* s,int max);
+int finput(int fd,char* s,int max);
int binput(char* buff,char* s,int max);
int linput(FILE* fp,char* s,int max);
+int linputsoc(T_SOC soc, char* s, int max);
+int linputsoc_t(T_SOC soc, char* s, int max, int timeout);
int linput_trim(FILE* fp,char* s,int max);
int linput_cpp(FILE* fp,char* s,int max);
void rawlinput(FILE* fp,char* s,int max);
@@ -226,23 +251,33 @@ int is_userknowntype(char *fil);
int is_dyntype(char *fil);
char* get_ext(char *fil);
int may_unknown(char* st);
-char* jump_identification(char*);
-char* jump_toport(char*);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API char* jump_identification(char*);
+HTSEXT_API char* jump_normalized(char*);
+HTSEXT_API char* jump_toport(char*);
+HTSEXT_API char* fil_normalized(char* source, char* dest);
+HTSEXT_API char* adr_normalized(char* source, char* dest);
+#endif
char* strrchr_limit(char* s, char c, char* limit);
HTS_INLINE char* jump_protocol(char* source);
-void code64(char* a,char* b);
-void unescape_amp(char* s);
-void escape_spc_url(char* s);
-void escape_in_url(char* s);
-void escape_uri(char* s);
-void escape_uri_utf(char* s);
-void escape_check_url(char* s);
-char* escape_check_url_addr(char* s);
-void x_escape_http(char* s,int mode);
-HTS_INLINE int ehexh(char c);
-char* unescape_http(char* s);
-char* unescape_http_unharm(char* s, int no_high);
-char* antislash_unescaped(char* s);
+void code64(unsigned char* a,int size_a,unsigned char* b,int crlf);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API void unescape_amp(char* s);
+HTSEXT_API void escape_spc_url(char* s);
+HTSEXT_API void escape_in_url(char* s);
+HTSEXT_API void escape_uri(char* s);
+HTSEXT_API void escape_uri_utf(char* s);
+HTSEXT_API void escape_check_url(char* s);
+HTSEXT_API char* escape_check_url_addr(char* s);
+HTSEXT_API void x_escape_http(char* s,int mode);
+HTSEXT_API void escape_remove_control(char* s);
+#endif
+int ehexh(char c);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API char* unescape_http(char* s);
+HTSEXT_API char* unescape_http_unharm(char* s, int no_high);
+HTSEXT_API char* antislash_unescaped(char* s);
+#endif
int ehex(char* s);
char* concat(const char* a,const char* b);
#define copychar(a) concat((a),NULL)
@@ -262,10 +297,11 @@ void hts_lowcase(char* s);
void hts_replace(char *s,char from,char to);
/* Spaces: CR,LF,TAB,FF */
-#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)=='\'') )
-#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) )
+#define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') )
+#define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) )
#define is_taborspace(c) ( ((c)==' ') || ((c)==9) )
#define is_quote(c) ( ((c)=='\"') || ((c)=='\'') )
+#define is_retorsep(c) ( ((c)==10) || ((c)==13) || ((c)==9) )
//HTS_INLINE int is_space(char);
//HTS_INLINE int is_realspace(char);
@@ -279,10 +315,12 @@ int sig_ignore_flag( int setflag ); // flag ignore
void cut_path(char* fullpath,char* path,char* pname);
int fexist(char* s);
/*LLint fsize(char* s); */
-int fpsize(FILE* fp);
-int fsize(char* s);
+INTsys fpsize(FILE* fp);
+INTsys fsize(char* s);
/* root dir */
-char* hts_rootdir(char* file);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API char* hts_rootdir(char* file);
+#endif
// Threads
#if USE_PTHREAD
@@ -301,19 +339,20 @@ unsigned long _beginthread( beginthread_type start_address, unsigned stack_size,
/* variables globales */
//extern LLint HTS_TOTAL_RECV; // flux entrant reçu
//extern int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup
-extern hts_stat_struct HTS_STAT;
+extern HTSEXT_API hts_stat_struct HTS_STAT;
extern int _DEBUG_HEAD;
extern FILE* ioinfo;
/* constantes */
-extern const char hts_mime_keep[][32];
-extern const char hts_mime[][2][32];
-extern const char hts_detect[][32];
-extern const char hts_detectbeg[][32];
-extern const char hts_nodetect[][32];
-extern const char hts_detectURL[][32];
-extern const char hts_detectandleave[][32];
-extern const char hts_detect_js[][32];
+extern const char* hts_mime_keep[];
+extern const char* hts_mime[][2];
+extern const char* hts_main_mime[];
+extern const char* hts_detect[];
+extern const char* hts_detectbeg[];
+extern const char* hts_nodetect[];
+extern const char* hts_detectURL[];
+extern const char* hts_detectandleave[];
+extern const char* hts_detect_js[];
// defaut wrappers
void __cdecl htsdefault_init(void);
diff --git a/src/htsmodules.c b/src/htsmodules.c
new file mode 100644
index 0000000..27ab855
--- /dev/null
+++ b/src/htsmodules.c
@@ -0,0 +1,305 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsmodules.c subroutines: */
+/* external modules (parsers) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef _WIN32
+#if HTS_DLOPEN
+#include <dlfcn.h>
+#endif
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "htsglobal.h"
+#include "htsmodules.h"
+#include "htsopt.h"
+extern int fspc(FILE* fp,char* type);
+
+/* >>> Put all modules definitions here */
+#include "htszlib.h"
+#include "htsbase.h"
+
+typedef int (*t_hts_detect_swf)(htsmoduleStruct* str);
+typedef int (*t_hts_parse_swf)(htsmoduleStruct* str);
+/* <<< */
+
+/* >>> Put all modules includes here */
+#include "htsjava.h"
+#if HTS_USESWF
+#endif
+/* <<< */
+
+/* >>> Put all modules variables here */
+
+int swf_is_available = 0;
+t_hts_detect_swf hts_detect_swf = NULL;
+t_hts_parse_swf hts_parse_swf = NULL;
+
+int gz_is_available = 0;
+t_gzopen gzopen = NULL;
+t_gzread gzread = NULL;
+t_gzclose gzclose = NULL;
+
+int SSL_is_available = 0;
+t_SSL_shutdown SSL_shutdown = NULL;
+t_SSL_free SSL_free = NULL;
+t_SSL_CTX_ctrl SSL_CTX_ctrl = NULL;
+t_SSL_new SSL_new = NULL;
+t_SSL_clear SSL_clear = NULL;
+t_SSL_set_fd SSL_set_fd = NULL;
+t_SSL_set_connect_state SSL_set_connect_state = NULL;
+t_SSL_connect SSL_connect = NULL;
+t_SSL_get_error SSL_get_error = NULL;
+t_SSL_write SSL_write = NULL;
+t_SSL_read SSL_read = NULL;
+t_SSL_library_init SSL_library_init = NULL;
+t_ERR_load_crypto_strings ERR_load_crypto_strings = NULL;
+t_ERR_load_SSL_strings ERR_load_SSL_strings = NULL;
+t_SSLv23_client_method SSLv23_client_method = NULL;
+t_SSL_CTX_new SSL_CTX_new = NULL;
+t_ERR_error_string ERR_error_string = NULL;
+t_SSL_load_error_strings SSL_load_error_strings = NULL;
+
+int V6_is_available = HTS_INET6;
+
+char WHAT_is_available[64]="";
+/* <<< */
+
+/* memory checks */
+HTSEXT_API htsErrorCallback htsCallbackErr = NULL;
+HTSEXT_API int htsMemoryFastXfr = 1; /* fast xfr by default */
+void abortLog__fnc(char* msg, char* file, int line);
+void abortLog__fnc(char* msg, char* file, int line) {
+ FILE* fp = fopen("CRASH.TXT", "wb");
+ if (!fp) fp = fopen("/tmp/CRASH.TXT", "wb");
+ if (!fp) fp = fopen("C:\\CRASH.TXT", "wb");
+ if (fp) {
+ fprintf(fp, "HTTrack " HTTRACK_VERSIONID " closed at '%s', line %d\r\n", file, line);
+ fprintf(fp, "Reason:\r\n%s\r\n", msg);
+ fflush(fp);
+ fclose(fp);
+ }
+}
+HTSEXT_API t_abortLog abortLog__ = abortLog__fnc; /* avoid VC++ inlining */
+
+static void htspe_log(htsmoduleStruct* str, char* msg);
+
+int hts_parse_externals(htsmoduleStruct* str) {
+ /* >>> Put all module calls here */
+
+ /* JAVA */
+ if (hts_detect_java(str)) {
+ htspe_log(str, "java-lib");
+ return hts_parse_java(str);
+ }
+
+#if HTS_USESWF
+ /* FLASH
+ (external module derivated from Macromedia(tm)'s classes)
+ */
+ else if (swf_is_available && hts_detect_swf(str)) {
+ htspe_log(str, "swf-lib");
+ return hts_parse_swf(str);
+ }
+#endif
+
+ /* <<< */
+
+ /* Not detected */
+ return -1;
+}
+
+/* NOTE: handled NOT closed */
+void* getFunctionPtr(char* file_, char* fncname) {
+ char file[1024];
+ void* handle;
+ void* userfunction = NULL;
+ strcpybuff(file, file_);
+#ifdef _WIN32
+ handle = LoadLibrary(file);
+ if (handle == NULL) {
+ strcatbuff(file, ".dll");
+ handle = LoadLibrary(file);
+ }
+#else
+ handle = dlopen(file, RTLD_LAZY);
+ if (handle == NULL) {
+ strcatbuff(file, ".so");
+ handle = dlopen(file, RTLD_LAZY);
+ }
+#endif
+ if (handle) {
+ userfunction = (void*) DynamicGet(handle, fncname);
+ if (userfunction == NULL) {
+#ifdef _WIN32
+ FreeLibrary(handle);
+#else
+ dlclose(handle);
+#endif
+ }
+ }
+ return userfunction;
+}
+
+void htspe_init() {
+ static int initOk = 0;
+ if (!initOk) {
+ initOk = 1;
+
+ /* >>> Put all module initializations here */
+
+ /* Zlib */
+#if HTS_DLOPEN
+ {
+ void* handle;
+#ifdef _WIN32
+ handle = LoadLibrary("zlib");
+#else
+ handle = dlopen("libz.so.1", RTLD_LAZY);
+#endif
+ if (handle) {
+ gzopen = (t_gzopen) DynamicGet(handle, "gzopen");
+ gzread = (t_gzread) DynamicGet(handle, "gzread");
+ gzclose = (t_gzclose) DynamicGet(handle, "gzclose");
+ if (gzopen && gzread && gzclose) {
+ gz_is_available = 1;
+ }
+ }
+ }
+#endif
+
+ /* OpenSSL */
+#if HTS_DLOPEN
+ {
+ void* handle;
+#ifdef _WIN32
+ handle = LoadLibrary("ssleay32");
+#else
+ /* We are compatible with 0.9.6/7 and potentially above */
+ handle = dlopen("libssl.so.0.9.7", RTLD_LAZY);
+ if (handle == NULL) {
+ handle = dlopen("libssl.so.0.9.6", RTLD_LAZY);
+ }
+ if (handle == NULL) {
+ /* Try harder */
+ handle = dlopen("libssl.so.0", RTLD_LAZY);
+ }
+#endif
+ if (handle) {
+ SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, "SSL_shutdown");
+ SSL_free = (t_SSL_free) DynamicGet(handle, "SSL_free");
+ SSL_new = (t_SSL_new) DynamicGet(handle, "SSL_new");
+ SSL_clear = (t_SSL_clear) DynamicGet(handle, "SSL_clear");
+ SSL_set_fd = (t_SSL_set_fd) DynamicGet(handle, "SSL_set_fd");
+ SSL_set_connect_state = (t_SSL_set_connect_state) DynamicGet(handle, "SSL_set_connect_state");
+ SSL_connect = (t_SSL_connect) DynamicGet(handle, "SSL_connect");
+ SSL_get_error = (t_SSL_get_error) DynamicGet(handle, "SSL_get_error");
+ SSL_write = (t_SSL_write) DynamicGet(handle, "SSL_write");
+ SSL_read = (t_SSL_read) DynamicGet(handle, "SSL_read");
+ SSL_library_init = (t_SSL_library_init) DynamicGet(handle, "SSL_library_init");
+ ERR_load_SSL_strings = (t_ERR_load_SSL_strings) DynamicGet(handle, "ERR_load_SSL_strings");
+ SSLv23_client_method = (t_SSLv23_client_method) DynamicGet(handle, "SSLv23_client_method");
+ SSL_CTX_new = (t_SSL_CTX_new) DynamicGet(handle, "SSL_CTX_new");
+ SSL_load_error_strings = (t_SSL_load_error_strings) DynamicGet(handle, "SSL_load_error_strings");
+ SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, "SSL_CTX_ctrl");
+#ifdef _WIN32
+ handle = LoadLibrary("libeay32");
+#endif
+ ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, "ERR_load_crypto_strings");
+ ERR_error_string = (t_ERR_error_string) DynamicGet(handle, "ERR_error_string");
+
+ if (SSL_shutdown && SSL_free && SSL_CTX_ctrl && SSL_new && SSL_clear &&
+ SSL_set_fd && SSL_set_connect_state && SSL_connect && SSL_get_error && SSL_write
+ && SSL_read && SSL_library_init && SSLv23_client_method && SSL_CTX_new
+ && SSL_load_error_strings && ERR_error_string) {
+ SSL_is_available = 1;
+ }
+ }
+ }
+#endif
+ /* */
+
+ /*
+ FLASH
+ Load the library on-the-fly, if available
+ If not, that's not a problem
+ */
+#if HTS_DLOPEN
+ {
+#ifdef _WIN32
+ void* handle = LoadLibrary("htsswf");
+#else
+ void* handle = dlopen("libhtsswf.so.1", RTLD_LAZY);
+#endif
+ if (handle) {
+ hts_detect_swf = (t_hts_detect_swf) DynamicGet(handle, "hts_detect_swf");
+ hts_parse_swf = (t_hts_parse_swf) DynamicGet(handle, "hts_parse_swf");
+ if (hts_detect_swf && hts_parse_swf) {
+ swf_is_available = 1;
+ }
+ }
+ // FreeLibrary(handle);
+ // dlclose(handle);
+ }
+#endif
+
+ /* <<< */
+
+ /* Options availability */
+ sprintf(WHAT_is_available, "%s%s%s%s",
+ V6_is_available ? "" : "-noV6",
+ gz_is_available ? "" : "-nozip",
+ SSL_is_available ? "" : "-nossl",
+ swf_is_available ? "+swf" : "");
+
+
+ }
+}
+
+static void htspe_log(htsmoduleStruct* str, char* msg) {
+ char* savename = str->filename;
+ httrackp* opt = (httrackp*) str->opt;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(External module): parsing %s using module %s"LF,
+ savename, msg);
+ }
+}
+
+HTSEXT_API const char* hts_is_available(void) {
+ return WHAT_is_available;
+}
diff --git a/src/htsmodules.h b/src/htsmodules.h
new file mode 100644
index 0000000..7d1154b
--- /dev/null
+++ b/src/htsmodules.h
@@ -0,0 +1,111 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsmodules.h subroutines: */
+/* external modules (parsers) */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef HTS_MODULES
+#define HTS_MODULES
+
+/* Function type to add links inside the module
+ link : link to add (absolute or relative)
+ str : structure defined below
+ Returns 1 if the link was added, 0 if not
+*/
+typedef struct htsmoduleStruct htsmoduleStruct;
+typedef int (* t_htsAddLink)(htsmoduleStruct* str, char* link);
+
+/* Structure passed to the module */
+struct htsmoduleStruct {
+ /* Read-only elements */
+ char* filename; /* filename (C:\My Web Sites\...) */
+ int size; /* size of filename (should be > 0) */
+ char* mime; /* MIME type of the object */
+ char* url_host; /* incoming hostname (www.foo.com) */
+ char* url_file; /* incoming filename (/bar/bar.gny) */
+
+ /* Write-only */
+ char* err_msg; /* if an error occured, the error message (max. 1KB) */
+
+ /* Read/Write */
+ int relativeToHtmlLink; /* set this to 1 if all urls you pass to addLink
+ are in fact relative to the html file where your
+ module was originally */
+
+ /* Callbacks */
+ t_htsAddLink addLink; /* call this function when links are
+ being detected. it if not your responsability to decide
+ if the engine will keep them, or not. */
+
+ /* Optional */
+ char* localLink; /* if non null, the engine will write there the local
+ relative filename of the link added by addLink(), or
+ the absolute path if the link was refused by the wizard */
+ int localLinkSize; /* size of the optionnal buffer */
+
+ /* User-defined */
+ void* userdef; /* can be used by callback routines
+ */
+
+ /* ---- ---- ---- */
+
+ /* Internal use - please don't touch */
+ void* liens;
+ void* opt;
+ void* back;
+ int back_max;
+ void* cache;
+ void* hashptr;
+ int numero_passe;
+ int add_tab_alloc;
+ /* */
+ int* lien_tot_;
+ int* ptr_;
+ int* lien_size_;
+ char** lien_buffer_;
+ /* Internal use - please don't touch */
+
+};
+
+extern void htspe_init(void);
+extern int hts_parse_externals(htsmoduleStruct* str);
+extern void* getFunctionPtr(char* file, char* fncname);
+
+extern int gz_is_available;
+extern int swf_is_available;
+extern int SSL_is_available;
+extern int V6_is_available;
+extern char WHAT_is_available[64];
+
+#endif
diff --git a/src/htsname.c b/src/htsname.c
index 2df0c98..56fa6a6 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -53,7 +53,7 @@ Please visit our Website: http://www.httrack.com
{ /* ajout nom */\
char buff[HTS_URLMAXSIZE*2];\
buff[0]='\0';\
- strncat(buff,start_pos,(int) (nom_pos - start_pos));\
+ strncatbuff(buff,start_pos,(int) (nom_pos - start_pos));\
url_savename_addstr(save,buff);\
}
@@ -83,6 +83,10 @@ static const char *hts_tbdev[] =
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,lien_back* back,int back_max,cache_back* cache,hash_struct* hash,int ptr,int numero_passe) {
char newfil[HTS_URLMAXSIZE*2]; /* ="" */
+ /*char normadr_[HTS_URLMAXSIZE*2];*/
+ char normfil_[HTS_URLMAXSIZE*2];
+ char* normadr;
+ char* normfil;
char* fil;
char* adr;
char* print_adr;
@@ -111,11 +115,25 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
save[0]='\0';
// fil
fil = fil_complete;
+ // copy of fil, used for lookups (see urlhack)
+ normfil = fil;
// et adr (sauter user/pass)
// on prend le parti de mettre les fichiers avec login/pass au même endroit que si ils
// étaient capturés sans ces paramètres
// c'est pour cette raison qu'on ignore totalement adr_complete (même pour la recherche en table de hachage)
- adr=jump_identification(adr_complete);
+ adr = jump_identification(adr_complete);
+ // copy of adr, used for lookups (see urlhack)
+ normadr = adr;
+
+ // normalize the URL:
+ // www.foo.com -> foo.com
+ // www-42.foo.com -> foo.com
+ // foo.com/bar//foobar -> foo.com/bar/foobar
+ if (opt->urlhack) {
+ // copy of adr (withiotu protocol), used for lookups (see urlhack)
+ normadr=jump_normalized(adr);
+ normfil=fil_normalized(fil,normfil_);
+ }
// à afficher sans ftp://
print_adr=jump_protocol(adr);
@@ -123,7 +141,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// court-circuit pour lien primaire
if (strnotempty(adr)==0) {
if (strcmp(fil,"primary")==0) {
- strcat(save,"primary.html");
+ strcatbuff(save,"primary.html");
return 0;
}
}
@@ -136,43 +154,43 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
int i;
#if HTS_HASH
- i=hash_read(hash,adr,fil_complete,1); // recherche table 1 (adr+fil)
+ i=hash_read(hash,normadr,normfil,1,opt->urlhack); // recherche table 1 (adr+fil)
if (i>=0) { // ok, trouvé
- strcpy(save,liens[i]->sav);
+ strcpybuff(save,liens[i]->sav);
return 0;
}
- i=hash_read(hash,adr,fil_complete,2); // recherche table 2 (former_adr+former_fil)
+ i=hash_read(hash,normadr,normfil,2,opt->urlhack); // recherche table 2 (former_adr+former_fil)
if (i>=0) { // ok, trouvé
// copier location moved!
- strcpy(adr_complete,liens[i]->adr);
- strcpy(fil_complete,liens[i]->fil);
+ strcpybuff(adr_complete,liens[i]->adr);
+ strcpybuff(fil_complete,liens[i]->fil);
// et save
- strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ strcpybuff(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
return 0;
}
#else
for(i=lien_tot-1;i>=0;i--) {
#if HTS_CASSE
- if ((strcmp(liens[i]->adr,adr)==0) && (strcmp(liens[i]->fil,fil_complete)==0))
+ if ((strcmp(liens[i]->adr,normadr)==0) && (strcmp(liens[i]->fil,normfil)==0))
#else
- if ((strfield2(liens[i]->adr,adr)) && (strfield2(liens[i]->fil,fil_complete)))
+ if ((strfield2(liens[i]->adr,normadr)) && (strfield2(liens[i]->fil,normfil)))
#endif
{ // ok c'est le même lien, adresse déja définie
- strcpy(save,liens[i]->sav);
+ strcpybuff(save,liens[i]->sav);
return 0;
}
if (liens[i]->former_adr) { // tester ancienne loc?
#if HTS_CASSE
- if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete)==0))
+ if ((strcmp(liens[i]->former_adr,normadr)==0) && (strcmp(liens[i]->former_fil,normfil)==0))
#else
- if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete)))
+ if ((strfield2(liens[i]->former_adr,normadr)) && (strfield2(liens[i]->former_fil,normfil)))
#endif
{
// copier location moved!
- strcpy(adr_complete,liens[i]->adr);
- strcpy(fil_complete,liens[i]->fil);
+ strcpybuff(adr_complete,liens[i]->adr);
+ strcpybuff(fil_complete,liens[i]->fil);
// et save
- strcpy(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ strcpybuff(save,liens[i]->sav); // copier (formé à partir du nouveau lien!)
return 0;
}
}
@@ -182,20 +200,20 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// chercher sans / ou avec / dans former
{
char fil_complete_patche[HTS_URLMAXSIZE*2];
- strcpy(fil_complete_patche,fil_complete);
+ strcpybuff(fil_complete_patche,normfil);
// Version avec ou sans /
if (fil_complete_patche[strlen(fil_complete_patche)-1]=='/')
fil_complete_patche[strlen(fil_complete_patche)-1]='\0';
else
- strcat(fil_complete_patche,"/");
+ strcatbuff(fil_complete_patche,"/");
#if HTS_HASH
- i=hash_read(hash,adr,fil_complete_patche,2); // recherche table 2 (former_adr+former_fil)
+ i=hash_read(hash,normadr,fil_complete_patche,2,opt->urlhack); // recherche table 2 (former_adr+former_fil)
if (i>=0) {
// écraser fil et adr (pas former_fil?????)
- strcpy(adr_complete,liens[i]->adr);
- strcpy(fil_complete,liens[i]->fil);
+ strcpybuff(adr_complete,liens[i]->adr);
+ strcpybuff(fil_complete,liens[i]->fil);
// écrire save
- strcpy(save,liens[i]->sav);
+ strcpybuff(save,liens[i]->sav);
return 0;
}
#else
@@ -203,16 +221,16 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
for(i=lien_tot-1;i>=0;i--) {
if (liens[i]->former_adr) { // former-adr?
#if HTS_CASSE
- if ((strcmp(liens[i]->former_adr,adr)==0) && (strcmp(liens[i]->former_fil,fil_complete_patche)==0))
+ if ((strcmp(liens[i]->former_adr,normadr)==0) && (strcmp(liens[i]->former_fil,fil_complete_patche)==0))
#else
- if ((strfield2(liens[i]->former_adr,adr)) && (strfield2(liens[i]->former_fil,fil_complete_patche)))
+ if ((strfield2(liens[i]->former_adr,normadr)) && (strfield2(liens[i]->former_fil,fil_complete_patche)))
#endif
{ // ok c'est le même lien, adresse déja définie
// écraser fil et adr (pas former_fil?????)
- strcpy(adr_complete,liens[i]->adr);
- strcpy(fil_complete,liens[i]->fil);
+ strcpybuff(adr_complete,liens[i]->adr);
+ strcpybuff(fil_complete,liens[i]->fil);
// écrire save
- strcpy(save,liens[i]->sav);
+ strcpybuff(save,liens[i]->sav);
return 0;
}
}
@@ -228,14 +246,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
char* a;
a=strchr(fil,'?');
if (a!=NULL) {
- strncat(newfil,fil,(int) (a - fil));
+ strncatbuff(newfil,fil,(int) (a - fil));
} else {
- strcpy(newfil,fil);
+ strcpybuff(newfil,fil);
}
fil=newfil;
}
// décoder %
- strcpy(fil,unescape_http(fil));
+ strcpybuff(fil,unescape_http(fil));
/*
{
char tempo[HTS_URLMAXSIZE*2];
@@ -249,7 +267,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
tempo[j++]=fil[i];
}
tempo[j++]='\0';
- strcpy(fil,tempo);
+ strcpybuff(fil,tempo);
}
*/
@@ -261,7 +279,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
(strcmp(get_ext(fil),"html") != 0)
&& (strcmp(get_ext(fil),"htm") != 0)
) {
- strcpy(ext,"html");
+ strcpybuff(ext,"html");
ext_chg=1;
}
break;
@@ -285,14 +303,15 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// si option check_type activée
if ((opt->check_type) && (!ext_chg)) {
+ int ishtest;
if ( (!strfield(adr_complete,"file://"))
&& (!strfield(adr_complete,"ftp://"))
) {
// tester type avec requète HEAD si on ne connait pas le type du fichier
if (!( (opt->check_type==1) && (fil[strlen(fil)-1]=='/') )) // slash doit être html?
- if (ishtml(fil)<0) { // on ne sait pas si c'est un html ou un fichier..
+ if ((ishtest=ishtml(fil)) < 0) { // on ne sait pas si c'est un html ou un fichier..
// lire dans le cache
- htsblk r = cache_read(opt,cache,adr,fil,NULL); // test uniquement
+ htsblk r = cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement
if (r.statuscode != -1) { // pas d'erreur de lecture cache
char s[16]; s[0]='\0';
if ( (opt->debug>1) && (opt->log!=NULL) ) {
@@ -301,13 +320,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
if (strnotempty(r.cdispo)) { /* filename given */
ext_chg=2; /* change filename */
- strcpy(ext,r.cdispo);
+ strcpybuff(ext,r.cdispo);
}
- else if (!may_unknown(r.contenttype)) { // on peut patcher à priori?
+ else if (!may_unknown(r.contenttype) || ishtest == -2) { // on peut patcher à priori?
give_mimext(s,r.contenttype); // obtenir extension
if (strnotempty(s)>0) { // on a reconnu l'extension
ext_chg=1;
- strcpy(ext,s);
+ strcpybuff(ext,s);
}
}
//
@@ -326,21 +345,25 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete);
test_flush;
}
- strcpy(curr_adr,adr_complete);
- strcpy(curr_fil,fil_complete);
+ strcpybuff(curr_adr,adr_complete);
+ strcpybuff(curr_fil,fil_complete);
// ajouter dans le backing le fichier en mode test
// savename: rien car en mode test
if (back_add(back,back_max,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1,NULL)!=-1) {
int b;
b=back_index(back,back_max,curr_adr,curr_fil,BACK_ADD_TEST);
if (b>=0) {
+ int stop_looping=0;
int petits_tours=0;
int get_test_request=0; // en cas de bouclage sur soi même avec HEAD, tester avec GET.. parfois c'est la cause des problèmes
do {
// temps à attendre, et remplir autant que l'on peut le cache (backing)
- if (back[b].status>0) back_wait(back,back_max,opt,cache,0);
- if (ptr>=0)
+ if (back[b].status>0) {
+ back_wait(back,back_max,opt,cache,0);
+ }
+ if (ptr>=0) {
back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ }
// on est obligé d'appeler le shell pour le refresh..
#if HTS_ANALYSTE
@@ -359,8 +382,9 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
return -1;
- } else if (_hts_cancel) { // cancel 2 ou 1 (cancel parsing)
- back_delete(back,b); // cancel test
+ } else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing)
+ back_delete(opt,back,b); // cancel test
+ stop_looping = 1;
}
}
#endif
@@ -378,7 +402,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
mov_url[0]=mov_adr[0]=mov_fil[0]='\0';
//
- strcpy(mov_url,back[b].r.location); // copier URL
+ strcpybuff(mov_url,back[b].r.location); // copier URL
if (ident_url_relatif(mov_url,curr_adr,curr_fil,mov_adr,mov_fil)>=0) {
// si non bouclage sur soi même, ou si test avec GET non testé
if ((strcmp(mov_adr,curr_adr)) || (strcmp(mov_fil,curr_fil)) || (get_test_request==0)) {
@@ -389,8 +413,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// recopier former_adr/fil?
if ((former_adr) && (former_fil)) {
if (strnotempty(former_adr)==0) { // Pas déja noté
- strcpy(former_adr,curr_adr);
- strcpy(former_fil,curr_fil);
+ strcpybuff(former_adr,curr_adr);
+ strcpybuff(former_fil,curr_fil);
}
}
@@ -400,25 +424,25 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
robots_wizard* robots = (robots_wizard*) opt->robotsptr;
if (hts_acceptlink(opt,ptr,lien_tot,liens,
mov_adr,mov_fil,
- opt->filters.filters,opt->filters.filptr,opt->maxfilter,
- robots,
&set_prio_to,
NULL) == 1)
{ /* forbidden */
has_been_moved = 1;
- back_delete(back,b); // ok
- strcpy(curr_adr,mov_adr);
- strcpy(curr_fil,mov_fil);
+ back_maydelete(opt,back,b); // ok
+ strcpybuff(curr_adr,mov_adr);
+ strcpybuff(curr_fil,mov_fil);
mov_url[0]='\0';
+ stop_looping = 1;
}
}
// ftp: stop!
if (strfield(mov_url,"ftp://")) { // ftp, ok on arrête
has_been_moved = 1;
- back_delete(back,b); // ok
- strcpy(curr_adr,mov_adr);
- strcpy(curr_fil,mov_fil);
+ back_maydelete(opt,back,b); // ok
+ strcpybuff(curr_adr,mov_adr);
+ strcpybuff(curr_fil,mov_fil);
+ stop_looping = 1;
} else if (*mov_url) {
char* methode;
if (!get_test_request)
@@ -438,9 +462,9 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
// libérer emplacement backing actuel et attendre le prochain
- back_delete(back,b);
- strcpy(curr_adr,mov_adr);
- strcpy(curr_fil,mov_fil);
+ back_maydelete(opt,back,b);
+ strcpybuff(curr_adr,mov_adr);
+ strcpybuff(curr_fil,mov_fil);
b=back_index(back,back_max,curr_adr,curr_fil,methode);
if (!get_test_request)
has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé
@@ -470,21 +494,20 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
} // ok, leaving
}
-
- } while(back[b].status>0);
+ } while(!stop_looping && back[b].status > 0 && back[b].status < 1000);
// Si non déplacé, forcer type?
if (!has_been_moved) {
if (back[b].r.statuscode!=-10) { // erreur
if (strnotempty(back[b].r.contenttype)==0)
- strcpy(back[b].r.contenttype,"text/html"); // message d'erreur en html
+ strcpybuff(back[b].r.contenttype,"text/html"); // message d'erreur en html
// Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
// libérer emplacement backing
/*if (opt->errlog!=NULL) {
fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil);
test_flush;
}
- back_delete(back,b);
+ back_delete(opt,back,b);
return -1; // ERREUR (404 par exemple)
*/
}
@@ -494,13 +517,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
s[0]='\0';
if (strnotempty(back[b].r.cdispo)) { /* filename given */
ext_chg=2; /* change filename */
- strcpy(ext,back[b].r.cdispo);
+ strcpybuff(ext,back[b].r.cdispo);
}
- else if ((!may_unknown(back[b].r.contenttype)) || (!get_ext(back[b].url_fil)) ) { // on peut patcher à priori? (pas interdit ou pas de type)
+ else if (!may_unknown(back[b].r.contenttype) || ishtest == -2 ) { // on peut patcher à priori? (pas interdit ou pas de type)
give_mimext(s,back[b].r.contenttype); // obtenir extension
if (strnotempty(s)>0) { // on a reconnu l'extension
ext_chg=1;
- strcpy(ext,s);
+ strcpybuff(ext,s);
}
}
}
@@ -508,14 +531,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// FIN Si non déplacé, forcer type?
// libérer emplacement backing
- back_delete(back,b);
+ back_maydelete(opt,back,b);
// --- --- ---
// oops, a été déplacé.. on recalcule en récursif (osons!)
if (has_been_moved) {
// copier adr, fil (optionnel, mais sinon marche pas pour le rip)
- strcpy(adr_complete,curr_adr);
- strcpy(fil_complete,curr_fil);
+ strcpybuff(adr_complete,curr_adr);
+ strcpybuff(fil_complete,curr_fil);
// copier adr, fil
return url_savename(curr_adr,curr_fil,save,NULL,NULL,referer_adr,referer_fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
@@ -551,12 +574,12 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// Donner nom par défaut?
if (fil[strlen(fil)-1]=='/') {
if (!strfield(adr_complete,"ftp://"))
- strcat(fil,DEFAULT_HTML); // nommer page par défaut!!
+ strcatbuff(fil,DEFAULT_HTML); // nommer page par défaut!!
else {
if (!opt->proxy.active)
- strcat(fil,DEFAULT_FTP); // nommer page par défaut (texte)
+ strcatbuff(fil,DEFAULT_FTP); // nommer page par défaut (texte)
else
- strcat(fil,DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http)
+ strcatbuff(fil,DEFAULT_HTML); // nommer page par défaut (à priori ici html depuis un proxy http)
}
}
// Changer extension?
@@ -574,13 +597,13 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (ext_chg==1) {
while((a > fil) && (*a!='.') && (*a!='/')) a--;
if (*a=='.') *a='\0'; // couper
- strcat(fil,"."); // recopier point
+ strcatbuff(fil,"."); // recopier point
} else {
while(( a > fil) && (*a!='/')) a--;
if (*a=='/') a++;
*a='\0';
}
- strcat(fil,ext); // copier ext/nom
+ strcatbuff(fil,ext); // copier ext/nom
}
// Rechercher premier / et dernier .
@@ -636,24 +659,63 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
}
*b='\0';
switch(tok=*a++) {
- case '[': // %[param]
+ case '[': // %[param:prefix_if_not_empty:suffix_if_not_empty:empty_replacement:notfound_replacement]
if (strchr(a,']')) {
- char name[256];
- char* c=name;
+ int pos=0;
+ char name[5][256];
+ char* c=name[0];
+ for(pos = 0 ; pos < 5 ; pos++) {
+ name[pos][0]='\0';
+ }
+ pos=0;
while(*a!=']') {
- *c++=*a++;
+ if (pos < 5) {
+ if (*a == ':') { // next token
+ c=name[++pos];
+ a++;
+ } else {
+ *c++=*a++;
+ *c='\0';
+ }
+ }
}
a++;
- *c++='\0';
- strcat(name,"="); /* param=.. */
+ strcatbuff(name[0],"="); /* param=.. */
c=strchr(fil_complete,'?');
/* parameters exists */
if (c) {
- c=strstr(c,name); /* finds param= */
- if (c) {
- c+=strlen(name); /* jumps "param=" */
- while( (*c) && (*c!='&'))
- *b++=*c++;
+ char* cp;
+ while((cp = strstr(c+1, name[0])) && *(cp-1) != '?' && *(cp-1) != '&') { /* finds [?&]param= */
+ c = cp;
+ }
+ if (cp) {
+ c = cp + strlen(name[0]); /* jumps "param=" */
+ strcpybuff(b, name[1]); /* prefix */
+ b += strlen(b);
+ if (*c != '\0' && *c != '&') {
+ char* d = name[0];
+ /* */
+ while(*c != '\0' && *c != '&') {
+ *d++ = *c++;
+ }
+ *d = '\0';
+ d = unescape_http(name[0]);
+ if (d && *d) {
+ strcpybuff(b, d); /* value */
+ b += strlen(b);
+ } else {
+ strcpybuff(b, name[3]); /* empty replacement if any */
+ b += strlen(b);
+ }
+ } else {
+ strcpybuff(b, name[3]); /* empty replacement if any */
+ b += strlen(b);
+ }
+ strcpybuff(b, name[2]); /* suffix */
+ b += strlen(b);
+ } else {
+ strcpybuff(b, name[4]); /* not found replacement if any */
+ b += strlen(b);
}
}
}
@@ -662,14 +724,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
case 'n': // nom sans ext
if (dot_pos) {
if (!short_ver) // Noms longs
- strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ strncatbuff(b,nom_pos,(int) (dot_pos - nom_pos));
else
- strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ strncatbuff(b,nom_pos,min((int) (dot_pos - nom_pos),8));
} else {
if (!short_ver) // Noms longs
- strcpy(b,nom_pos);
+ strcpybuff(b,nom_pos);
else
- strncat(b,nom_pos,8);
+ strncatbuff(b,nom_pos,8);
}
b+=strlen(b); // pointer à la fin
break;
@@ -678,28 +740,28 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
*b='\0';
if (dot_pos) {
if (!short_ver) // Noms longs
- strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ strncatbuff(b,nom_pos,(int) (dot_pos - nom_pos));
else
- strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ strncatbuff(b,nom_pos,min((int) (dot_pos - nom_pos),8));
} else {
if (!short_ver) // Noms longs
- strcpy(b,nom_pos);
+ strcpybuff(b,nom_pos);
else
- strncat(b,nom_pos,8);
+ strncatbuff(b,nom_pos,8);
}
b+=strlen(b); // pointer à la fin
// RECOPIE NOM + EXT
*b='\0';
if (dot_pos) {
if (!short_ver) // Noms longs
- strcpy(b,dot_pos+1);
+ strcpybuff(b,dot_pos+1);
else
- strncat(b,dot_pos+1,3);
+ strncatbuff(b,dot_pos+1,3);
} else {
if (!short_ver) // Noms longs
- strcpy(b,DEFAULT_EXT); // pas de..
+ strcpybuff(b,DEFAULT_EXT); // pas de..
else
- strcpy(b,DEFAULT_EXT_SHORT); // pas de..
+ strcpybuff(b,DEFAULT_EXT_SHORT); // pas de..
}
b+=strlen(b); // pointer à la fin
//
@@ -708,14 +770,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
*b='\0';
if (dot_pos) {
if (!short_ver) // Noms longs
- strcpy(b,dot_pos+1);
+ strcpybuff(b,dot_pos+1);
else
- strncat(b,dot_pos+1,3);
+ strncatbuff(b,dot_pos+1,3);
} else {
if (!short_ver) // Noms longs
- strcpy(b,DEFAULT_EXT); // pas de..
+ strcpybuff(b,DEFAULT_EXT); // pas de..
else
- strcpy(b,DEFAULT_EXT_SHORT); // pas de..
+ strcpybuff(b,DEFAULT_EXT_SHORT); // pas de..
}
b+=strlen(b); // pointer à la fin
break;
@@ -723,14 +785,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
*b='\0';
if (nom_pos != fil + 1) { // pas: /index.html (chemin nul)
if (!short_ver) { // Noms longs
- strncat(b,fil,(int) (nom_pos - fil) - 1);
+ strncatbuff(b,fil,(int) (nom_pos - fil) - 1);
} else {
char pth[HTS_URLMAXSIZE*2],n83[HTS_URLMAXSIZE*2];
pth[0]=n83[0]='\0';
//
- strncat(pth,fil,(int) (nom_pos - fil) - 1);
+ strncatbuff(pth,fil,(int) (nom_pos - fil) - 1);
long_to_83(opt->savename_83,n83,pth);
- strcpy(b,n83);
+ strcpybuff(b,n83);
}
}
b+=strlen(b); // pointer à la fin
@@ -739,14 +801,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
*b='\0';
if (strcmp(adr_complete,"file://")==0) {
if (!short_ver) // Noms longs
- strcpy(b,"localhost");
+ strcpybuff(b,"localhost");
else
- strcpy(b,"local");
+ strcpybuff(b,"local");
} else {
if (!short_ver) // Noms longs
- strcpy(b,print_adr);
+ strcpybuff(b,print_adr);
else
- strncat(b,print_adr,8);
+ strncatbuff(b,print_adr,8);
}
b+=strlen(b); // pointer à la fin
break;
@@ -756,17 +818,17 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
char digest[32+2];
char buff[HTS_URLMAXSIZE*2];
digest[0]=buff[0]='\0';
- strcpy(buff,adr);
- strcat(buff,fil_complete);
+ strcpybuff(buff,adr);
+ strcatbuff(buff,fil_complete);
domd5mem(buff,strlen(buff),digest,1);
- strcpy(b,digest);
+ strcpybuff(b,digest);
}
b+=strlen(b); // pointer à la fin
break;
case 'Q': case 'q': /* query MD5 (128-bits/16-bits)
GENERATED ONLY IF query string exists! */
*b='\0';
- strncat(b,url_md5(fil_complete),(tok == 'Q')?32:4);
+ strncatbuff(b,url_md5(fil_complete),(tok == 'Q')?32:4);
b+=strlen(b); // pointer à la fin
break;
}
@@ -788,23 +850,23 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (strcmp(adr_complete,"file://")==0) {
//## if (*adr==lOCAL_CHAR) {
if (opt->savename_83 != 1) // noms longs
- strcat(save,"localhost");
+ strcatbuff(save,"localhost");
else
- strcat(save,"local");
+ strcatbuff(save,"local");
} else {
// adresse url
if (!opt->savename_83) { // noms longs (et pas de .)
- strcat(save,print_adr);
+ strcatbuff(save,print_adr);
} else { // noms 8-3
if (strlen(print_adr)>4) {
if (strfield(print_adr,"www."))
- strncat(save,print_adr+4,max_char);
+ strncatbuff(save,print_adr+4,max_char);
else
- strncat(save,print_adr,8);
- } else strncat(save,print_adr,max_char);
+ strncatbuff(save,print_adr,8);
+ } else strncatbuff(save,print_adr,max_char);
}
}
- if (*fil!='/') strcat(save,"/");
+ if (*fil!='/') strcatbuff(save,"/");
}
}
@@ -819,7 +881,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
else // index.html ou /index.html
url_savename_addstr(save,fil);
if (save[strlen(save)-1]=='/')
- strcat(save,DEFAULT_HTML); // nommer page par défaut!!
+ strcatbuff(save,DEFAULT_HTML); // nommer page par défaut!!
*/
/* add name */
@@ -836,38 +898,38 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (strcmp(adr_complete,"file://")==0) {
//## if (*adr==lOCAL_CHAR) {
if (opt->savename_83 != 1) // noms longs
- strcat(save,"localhost/");
+ strcatbuff(save,"localhost/");
else
- strcat(save,"local/");
+ strcatbuff(save,"local/");
} else {
// adresse url
if (!opt->savename_83) { // noms longs
- strcat(save,print_adr); strcat(save,"/");
+ strcatbuff(save,print_adr); strcatbuff(save,"/");
} else { // noms 8-3
if (strlen(print_adr)>4) {
if (strfield(print_adr,"www."))
- strncat(save,print_adr+4,max_char);
+ strncatbuff(save,print_adr+4,max_char);
else
- strncat(save,print_adr,max_char);
- strcat(save,"/");
+ strncatbuff(save,print_adr,max_char);
+ strcatbuff(save,"/");
} else {
- strncat(save,print_adr,max_char); strcat(save,"/");
+ strncatbuff(save,print_adr,max_char); strcatbuff(save,"/");
}
}
}
} else {
- strcat(save,"web/"); // répertoire général
+ strcatbuff(save,"web/"); // répertoire général
}
}
// si un html à coup sûr
if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) {
if (opt->savename_type%100==2) { // html/
- strcat(save,"html/");
+ strcatbuff(save,"html/");
}
} else {
if ((opt->savename_type%100==1) || (opt->savename_type%100==2)) { // html & images
- strcat(save,"images/");
+ strcatbuff(save,"images/");
}
}
@@ -881,17 +943,17 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// html?
if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) {
if (opt->savename_type%100==5)
- strcat(save,"html/");
+ strcatbuff(save,"html/");
} else {
char* a=fil+strlen(fil)-1;
while(( a> fil) && (*a != '/') && (*a != '.')) a--;
if (*a!='.')
- strcat(save,"other");
+ strcatbuff(save,"other");
else
- strcat(save,a+1);
- strcat(save,"/");
+ strcatbuff(save,a+1);
+ strcatbuff(save,"/");
}
- /*strcat(save,a);*/
+ /*strcatbuff(save,a);*/
/* add name */
ADD_STANDARD_NAME(0);
}
@@ -923,7 +985,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
a=fil+strlen(fil)-1;
while(( a > fil) && (*a != '/') && (*a != '.')) a--;
if (*a=='.') {
- strcat(save,a); // ajouter
+ strcatbuff(save,a); // ajouter
}
}
break;
@@ -933,7 +995,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
char* a=fil+strlen(fil)-1;
while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) a--;
if ((*a=='/') || (*a=='\\')) a++;
- strcat(save,a);
+ strcatbuff(save,a);
*/
/* add name */
@@ -947,7 +1009,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
#endif
if (save[strlen(save)-1]=='/')
- strcat(save,DEFAULT_HTML); // nommer page par défaut!!
+ strcatbuff(save,DEFAULT_HTML); // nommer page par défaut!!
}
@@ -958,8 +1020,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
while(((int) a>(int) save) && (*a!='.') && (*a!='/')) a--;
if (*a=='.') *a='\0'; // couper
// recopier extension
- strcat(save,".");
- strcat(save,ext); // copier ext
+ strcatbuff(save,".");
+ strcatbuff(save,ext); // copier ext
}*/
// de même en cas de manque d'extension on en place une de manière forcée..
// cela évite les /chez/toto et les /chez/toto/index.html incompatibles
@@ -967,8 +1029,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
char* a=save+strlen(save)-1;
while(( a > save) && (*a!='.') && (*a!='/')) a--;
if (*a!='.') { // agh pas de point
- //strcat(save,".none"); // a éviter
- strcat(save,".html"); // préférable!
+ //strcatbuff(save,".none"); // a éviter
+ strcatbuff(save,".html"); // préférable!
if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Default HTML type set for %s%s"LF,adr_complete,fil_complete);
test_flush;
@@ -985,14 +1047,14 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
char tempo[HTS_URLMAXSIZE*2];
char *b;
tempo[0]='\0';
- strcpy(tempo,"[");
+ strcpybuff(tempo,"[");
b=strchr(save,':');
if (!b) b=strchr(save,'@');
if (b)
- strncat(tempo,save,(int) b-(int) a);
- strcat(tempo,"]");
- strcat(tempo,a);
- strcpy(save,a);
+ strncatbuff(tempo,save,(int) b-(int) a);
+ strcatbuff(tempo,"]");
+ strcatbuff(tempo,a);
+ strcpybuff(save,a);
}
}
*/
@@ -1000,8 +1062,8 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// éviter les / au début (cause: N100)
if (save[0]=='/') {
char tempo[HTS_URLMAXSIZE*2];
- strcpy(tempo,save+1);
- strcpy(save,tempo);
+ strcpybuff(tempo,save+1);
+ strcpybuff(save,tempo);
}
// changer les ~,:,",*,? en _ pour sauver sur disque
@@ -1017,6 +1079,12 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
hts_replace(save,'|','_'); // interdit sous windows
//
hts_replace(save,'@','_');
+ if (opt->savename_83 == 2) { // CDROM
+ // maybe other ones?
+ hts_replace(save,'-','_');
+ hts_replace(save,'=','_');
+ hts_replace(save,'+','_');
+ }
//
{ // éliminer les // (comme ftp://)
char* a;
@@ -1031,20 +1099,22 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
#if HTS_OVERRIDE_DOS_FOLDERS
- /* Replace /foo/nul/bar by /foo/nul-/bar */
+ /* Replace /foo/nul/bar by /foo/nul_/bar */
{
int i=0;
while(hts_tbdev[i][0]) {
char* a=save;
- while((a=strstr(a,hts_tbdev[i]))) {
+ while((a=strstrcase(a,(char*)hts_tbdev[i]))) {
switch ( (int) a[strlen(hts_tbdev[i])] ) {
case '\0':
- case '/': {
+ case '/':
+ case '.':
+ {
char tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0';
- strncat(tempo,save,(int) (a - save) + strlen(hts_tbdev[i]));
- strcat(tempo,"-");
- strcat(tempo,a+strlen(hts_tbdev[i]));
- strcpy(save,tempo);
+ strncatbuff(tempo,save,(int) (a - save) + strlen(hts_tbdev[i]));
+ strcatbuff(tempo,"_");
+ strcatbuff(tempo,a+strlen(hts_tbdev[i]));
+ strcpybuff(save,tempo);
}
break;
}
@@ -1059,7 +1129,7 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
if (opt->savename_83) {
char n83[HTS_URLMAXSIZE*2];
long_to_83(opt->savename_83,n83,save);
- strcpy(save,n83);
+ strcpybuff(save,n83);
}
@@ -1079,9 +1149,9 @@ int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_a
// chemin primaire éventuel A METTRE AVANT
if (strnotempty(opt->path_html)) {
char tempo[HTS_URLMAXSIZE*2];
- strcpy(tempo,opt->path_html);
- strcat(tempo,save);
- strcpy(save,tempo);
+ strcpybuff(tempo,opt->path_html);
+ strcatbuff(tempo,save);
+ strcpybuff(save,tempo);
}
@@ -1100,7 +1170,7 @@ printf("\nStart search\n");
#endif
#if HTS_HASH
- i=hash_read(hash,save,"",0); // lecture type 0 (sav)
+ i=hash_read(hash,save,"",0,0); // lecture type 0 (sav)
if (i>=0)
#else
for(i=lien_tot-1;i>=0;i--) {
@@ -1143,9 +1213,9 @@ printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n",liens[i]->fil,fil_complete)
while(( a > save) && (*a!='.') && (*a!='\\') && (*a!='/')) a--;
if (*a=='.')
- strncat(tempo,save,(int) (a - save));
+ strncatbuff(tempo,save,(int) (a - save));
else
- strcat(tempo,save);
+ strcatbuff(tempo,save);
// tester la présence d'un -xx (ex: index-2.html -> index-3.html)
b=tempo+strlen(tempo)-1;
@@ -1172,9 +1242,9 @@ printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n",liens[i]->fil,fil_complete)
// ajouter extension
if (*a=='.')
- strcat(tempo,a);
+ strcatbuff(tempo,a);
- strcpy(save,tempo);
+ strcpybuff(save,tempo);
//printf("switched: %s\n",save);
@@ -1204,29 +1274,29 @@ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int sh
/* Nom */
if (dot_pos) {
if (!short_ver) // Noms longs
- strncat(b,nom_pos,(int) (dot_pos - nom_pos));
+ strncatbuff(b,nom_pos,(int) (dot_pos - nom_pos));
else
- strncat(b,nom_pos,min((int) (dot_pos - nom_pos),8));
+ strncatbuff(b,nom_pos,min((int) (dot_pos - nom_pos),8));
} else {
if (!short_ver) // Noms longs
- strcat(b,nom_pos);
+ strcatbuff(b,nom_pos);
else
- strncat(b,nom_pos,8);
+ strncatbuff(b,nom_pos,8);
}
/* MD5 - 16 bits */
- strncat(b,url_md5(fil_complete),4);
+ strncatbuff(b,url_md5(fil_complete),4);
/* Ext */
if (dot_pos) {
- strcat(b,".");
+ strcatbuff(b,".");
if (!short_ver) // Noms longs
- strcat(b,dot_pos+1);
+ strcatbuff(b,dot_pos+1);
else
- strncat(b,dot_pos+1,3);
+ strncatbuff(b,dot_pos+1,3);
} else {
if (!short_ver) // Noms longs
- strcat(b,DEFAULT_EXT); // pas de..
+ strcatbuff(b,DEFAULT_EXT); // pas de..
else
- strcat(b,DEFAULT_EXT_SHORT); // pas de..
+ strcatbuff(b,DEFAULT_EXT_SHORT); // pas de..
}
}
@@ -1243,7 +1313,7 @@ char* url_md5(char* fil_complete) {
char buff[HTS_URLMAXSIZE*2];
a++;
digest[0]=buff[0]='\0';
- strcat(buff,a); /* query string MD5 */
+ strcatbuff(buff,a); /* query string MD5 */
domd5mem(buff,strlen(buff),digest,1);
}
}
diff --git a/src/htsnet.h b/src/htsnet.h
index d12b1e4..dbdbcc6 100644
--- a/src/htsnet.h
+++ b/src/htsnet.h
@@ -56,23 +56,26 @@ Please visit our Website: http://www.httrack.com
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/time.h>
+ /* Force for sun env. */
+ #ifndef BSD_COMP
+ #define BSD_COMP
+ #endif
#include <sys/ioctl.h>
/* gethostname & co */
+#ifdef HAVE_UNISTD_H
#include <unistd.h>
+#endif
/* inet_addr */
#include <arpa/inet.h>
// pas la peine normalement..
-#if HTS_PLATFORM!=3
- #include <sys/filio.h>
-#else
#ifndef HTS_DO_NOT_REDEFINE_in_addr_t
typedef unsigned long in_addr_t;
#endif
-#endif
-#ifndef min
- #define min(a,b) ((a)>(b)?(b):(a))
- #define max(a,b) ((a)>(b)?(a):(b))
-#endif
+#undef min
+#undef max
+#undef Sleep
+#define min(a,b) ((a)>(b)?(b):(a))
+#define max(a,b) ((a)>(b)?(a):(b))
#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
#endif
@@ -104,9 +107,16 @@ typedef struct {
/* Set port to sockaddr structure */
#define SOCaddr_initport(server, port) do { \
- SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
+ SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
+} while(0)
+
+#define SOCaddr_initany(server, server_len) do { \
+ SOCaddr_sinfamily(server) = AF_INET; \
+ memset(&SOCaddr_sinaddr(server), 0, sizeof(struct sockaddr_in)); \
+ server_len=sizeof(struct sockaddr_in); \
} while(0)
+
/* Copy sockaddr to another one */
#define SOCaddr_copyaddr(server, server_len, hpaddr, hpsize) do { \
if (hpsize == sizeof(struct sockaddr_in)) { \
@@ -171,6 +181,12 @@ typedef struct {
SOCaddr_sinport(server) = htons((unsigned short int) (port)); \
} while(0)
+#define SOCaddr_initany(server, server_len) do { \
+ SOCaddr_sinfamily(server) = AF_INET; \
+ memset(&SOCaddr_sinaddr(server), 0, sizeof(struct sockaddr_in)); \
+ server_len=sizeof(struct sockaddr_in); \
+} while(0)
+
/*
Copy sockaddr to SOCaddr
diff --git a/src/htsnostatic.c b/src/htsnostatic.c
index 5971d5d..eff6184 100644
--- a/src/htsnostatic.c
+++ b/src/htsnostatic.c
@@ -39,6 +39,7 @@ Please visit our Website: http://www.httrack.com
#include "htsbase.h"
#include "htshash.h"
+#include "htsinthash.h"
typedef struct {
/*
@@ -156,7 +157,7 @@ int hts_freevar() {
return 1;
}
-int hts_resetvar() {
+HTSEXT_API int hts_resetvar() {
int r;
hts_lockvar();
{
diff --git a/src/htsnostatic.h b/src/htsnostatic.h
index 6dbb072..f24f0ad 100644
--- a/src/htsnostatic.h
+++ b/src/htsnostatic.h
@@ -88,7 +88,9 @@ Please visit our Website: http://www.httrack.com
*/
int hts_initvar(void);
int hts_freevar(void);
-int hts_resetvar(void);
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int hts_resetvar(void);
+#endif
int hts_maylockvar(void);
int hts_lockvar(void);
int hts_unlockvar(void);
@@ -183,6 +185,7 @@ if ( cKey.localInit ) { \
} \
if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \
if (!hts_maylockvar()) { \
+ abortLog("unable to lock mutex (not initialized?!)"); \
abort(); \
} \
hts_lockvar(); \
@@ -190,6 +193,7 @@ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \
{ \
name = (type *) calloc((nelt), sizeof(type)); \
if (name == NULL) { \
+ abortLog("unable to allocate memory for variable!"); \
abort(); \
} \
{ \
@@ -202,6 +206,7 @@ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \
name = NULL; \
PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
if (name == NULL) { \
+ abortLog("unable to load thread key!"); \
abort(); \
} \
if ( ! cKey.localInit ) { \
@@ -214,6 +219,7 @@ if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \
else { \
PTHREAD_KEY_GET(cKey.localKey, &name, type*); \
if (name == NULL) { \
+ abortLog("unable to load thread key! (2)"); \
abort(); \
} \
} \
diff --git a/src/htsopt.h b/src/htsopt.h
index 13bc962..77910b6 100644
--- a/src/htsopt.h
+++ b/src/htsopt.h
@@ -48,6 +48,7 @@ typedef struct {
int active;
char name[1024];
int port;
+ char bindhost[256]; // bind this host
} t_proxy;
/* Structure utile pour copier en bloc les paramètres */
@@ -60,6 +61,12 @@ typedef struct {
/* Structure état du miroir */
typedef struct {
int stop;
+ int exit_xh;
+ int back_add_stats;
+ /* */
+ int mimehtml_created;
+ char mimemid[256];
+ FILE* mimefp;
} htsoptstate;
@@ -92,12 +99,13 @@ typedef struct {
int maxconn; // nombre max de connexions/s
int waittime; // démarrage programmé
int cache; // génération d'un cache
- int aff_progress; // barre de progression
+ //int aff_progress; // barre de progression
int shell; // gestion d'un shell par pipe stdin/stdout
t_proxy proxy; // configuration du proxy
int savename_83; // conversion 8-3 pour les noms de fichiers
int savename_type; // type de noms: structure originale/html-images en un seul niveau
char savename_userdef[256]; // structure userdef (ex: %h%p/%n%q.%t)
+ int mimehtml; // MIME-html
int user_agent_send; // user agent (ex: httrack/1.0 [sun])
char user_agent[128];
char path_log[1024]; // chemin pour cache et log
@@ -121,8 +129,10 @@ typedef struct {
int accept_cookie; // gestion des cookies
t_cookie* cookie;
int http10; // forcer http 1.0
+ int nokeepalive; // pas de keep-alive
int nocompression; // pas de compression
int sizehack; // forcer réponse "mis à jour" si taille identique
+ int urlhack; // force "url normalization" to avoid loops
int tolerant; // accepter content-length incorrect
int parseall; // essayer de tout parser (tags inconnus contenant des liens, par exemple)
int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur
@@ -132,7 +142,9 @@ typedef struct {
//int maxcache_anticipate; // maximum de liens à anticiper (majorant)
int ftp_proxy; // proxy http pour ftp
char filelist[1024]; // fichier liste URL à inclure
+ char urllist[1024]; // fichier liste de filtres à inclure
htsfilters filters; // contient les pointeurs pour les filtres
+ void* hash; // hash structure
void* robotsptr; // robots ptr
char lang_iso[64]; // en, fr ..
char mimedefs[2048]; // ext1=mimetype1\next2=mimetype2..
@@ -170,6 +182,8 @@ typedef struct {
int stat_files; // nombre de fichiers écrits
int stat_updated_files; // nombre de fichiers mis à jour
//
+ int stat_nrequests; // nombre de requêtes sur socket
+ int stat_sockid; // nombre de sockets allouées au total
int stat_nsocket; // nombre de sockets
int stat_errors; // nombre d'erreurs
int stat_errors_front; // idem, mais au tout premier niveau
diff --git a/src/htsparse.c b/src/htsparse.c
index b012a8d..3d35252 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -30,641 +30,1040 @@ Please visit our Website: http://www.httrack.com
/* ------------------------------------------------------------ */
-/* File: Main source */
-/* DIRECT INCLUDE TO httrack.c */
+/* File: htsparse.c parser */
+/* html/javascript/css parser */
+/* and other parser routines */
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-#if HTS_ANALYSTE
-if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
-#endif
- FILE* fp=NULL; // fichier écrit localement
- char* adr=r.adr; // pointeur (on parcourt)
- char* lastsaved; // adresse du dernier octet sauvé + 1
- if ( (opt.debug>1) && (opt.log!=NULL) ) {
- fspc(opt.log,"debug"); fprintf(opt.log,"scan file.."LF); test_flush;
- }
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#include <ctype.h>
- // Indexing!
-#if HTS_MAKE_KEYWORD_INDEX
- if (opt.kindex) {
- if (index_keyword(r.adr,r.size,r.contenttype,savename,opt.path_html)) {
- if ( (opt.debug>1) && (opt.log!=NULL) ) {
- fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..done"LF); test_flush;
- }
- } else {
- if ( (opt.debug>1) && (opt.log!=NULL) ) {
- fspc(opt.log,"debug"); fprintf(opt.log,"indexing file..error!"LF); test_flush;
- }
- }
+/* File defs */
+#include "htscore.h"
+
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htsbauth.h"
+#include "htsmd5.h"
+#include "htsindex.h"
+
+/* external modules */
+#include "htsmodules.h"
+
+// htswrap_add
+#include "htswrap.h"
+
+// parser
+#include "htsparse.h"
+
+
+// specific defines
+#define urladr (liens[ptr]->adr)
+#define urlfil (liens[ptr]->fil)
+#define savename (liens[ptr]->sav)
+#define parenturladr (liens[liens[ptr]->precedent]->adr)
+#define parenturlfil (liens[liens[ptr]->precedent]->fil)
+#define parentsavename (liens[liens[ptr]->precedent]->sav)
+#define relativeurladr ((!parent_relative)?urladr:parenturladr)
+#define relativeurlfil ((!parent_relative)?urlfil:parenturlfil)
+#define relativesavename ((!parent_relative)?savename:parentsavename)
+
+#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } }
+
+// does nothing
+#define XH_uninit do {} while(0)
+
+// version optimisée, qui permet de ne pas toucher aux html non modifiés (update)
+#define REALLOC_SIZE 8192
+#define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
+ ht_size=(A)+ht_len+REALLOC_SIZE; \
+ ht_buff=(char*) realloct(ht_buff,ht_size); \
+ if (ht_buff==NULL) { \
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
+ XH_uninit; \
+ abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \
+ exit(1); \
+ } \
+ } \
+ ht_len+=A;
+#define HT_ADD_ADR \
+ if ((opt->getmode & 1) && (ptr>0)) { \
+ int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
+ memcpy(ht_buff+j, lastsaved, i); \
+ ht_buff[j+i]='\0'; \
+ lastsaved=adr; \
+ }
+#define HT_ADD(A) \
+ if ((opt->getmode & 1) && (ptr>0)) { \
+ int i=strlen(A),j=ht_len; \
+ if (i) { \
+ HT_ADD_CHK(i) \
+ memcpy(ht_buff+j, A, i); \
+ ht_buff[j+i]='\0'; \
+ } }
+#define HT_ADD_START \
+ int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \
+ int ht_len=0; \
+ char* ht_buff=NULL; \
+ if ((opt->getmode & 1) && (ptr>0)) { \
+ ht_buff=(char*) malloct(ht_size); \
+ if (ht_buff==NULL) { \
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
+ XH_uninit; \
+ abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \
+ exit(1); \
+ } \
+ ht_buff[0]='\0'; \
+ }
+#define HT_ADD_END { \
+ int ok=0;\
+ if (ht_buff) { \
+ INTsys file_len=(INTsys) strlen(ht_buff);\
+ char digest[32+2];\
+ digest[0]='\0';\
+ domd5mem(ht_buff,file_len,digest,1);\
+ if (fsize(fconv(savename))==file_len) { \
+ int mlen;\
+ char* mbuff;\
+ cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
+ if (mlen) mbuff[mlen]='\0';\
+ if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
+ ok=1;\
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {\
+ fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\
+ test_flush;\
+ }\
+ } else {\
+ ok=0;\
+ } \
+ }\
+ if (!ok) { \
+ fp=filecreate(savename); \
+ if (fp) { \
+ if (file_len>0) {\
+ if ((INTsys)fwrite(ht_buff,1,file_len,fp) != file_len) { \
+ int fcheck;\
+ if ((fcheck=check_fatal_io_errno())) {\
+ opt->state.exit_xh=-1;\
+ }\
+ if (opt->errlog) { \
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\
+ if (fcheck) {\
+ fspc(opt->errlog,"error");\
+ fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\
+ }\
+ test_flush;\
+ }\
+ }\
+ }\
+ fclose(fp); fp=NULL; \
+ if (strnotempty(r->lastmodified)) \
+ set_filetime_rfc822(savename,r->lastmodified); \
+ } else {\
+ int fcheck;\
+ if ((fcheck=check_fatal_io_errno())) {\
+ opt->state.exit_xh=-1;\
+ }\
+ if (opt->errlog) { \
+ fspc(opt->errlog,"error");\
+ fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\
+ if (fcheck) {\
+ fspc(opt->errlog,"error");\
+ fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\
+ }\
+ test_flush;\
+ }\
+ }\
+ } else {\
+ filenote(savename,NULL); \
+ }\
+ if (cache->ndx)\
+ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
+ } \
+ freet(ht_buff); ht_buff=NULL; \
}
+#define HT_ADD_FOP
+
+// COPY IN HTSCORE.C
+#define HT_INDEX_END do { \
+if (!makeindex_done) { \
+if (makeindex_fp) { \
+ char tempo[1024]; \
+ if (makeindex_links == 1) { \
+ sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
+ } else \
+ tempo[0]='\0'; \
+ fprintf(makeindex_fp,template_footer, \
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
+ tempo \
+ ); \
+ fflush(makeindex_fp); \
+ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \
+ makeindex_fp=NULL; \
+ usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \
+} \
+} \
+makeindex_done=1; /* ok c'est fait */ \
+} while(0)
+
+// Enregistrement d'un lien:
+// on calcule la taille nécessaire: taille des 3 chaînes à stocker (taille forcée paire, plus 2 octets de sécurité)
+// puis on vérifie qu'on a assez de marge dans le buffer - sinon on en réalloue un autre
+// enfin on écrit à l'adresse courante du buffer, qu'on incrémente. on décrémente la taille dispo d'autant ensuite
+// codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
+// FA,FS: former_adr et former_fil, lien original
+#if HTS_HASH
+#define liens_record_sav_len(A)
+#else
+#define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
#endif
- // Now, parsing
- if ((opt.getmode & 1) && (ptr>0)) { // récupérer les html sur disque
- // créer le fichier html local
- HT_ADD_FOP; // écrire peu à peu le fichier
- }
-
- if (!error) {
- int detect_title=0; // détection du title
- //
- char* in_media=NULL; // in other media type (real media and so..)
- int intag=0; // on est dans un tag
- int incomment=0; // dans un <!--
- int inscript=0; // dans un scipt pour applets javascript)
- int inscript_tag=0; // on est dans un <body onLoad="... terminé par >
- char inscript_tag_lastc='\0';
- // terminaison (" ou ') du "<body onLoad=.."
- int inscriptgen=0; // on est dans un code générant, ex après obj.write("..
- char scriptgen_q='\0'; // caractère faisant office de guillemet (' ou ")
- int no_esc_utf=0; // ne pas echapper chars > 127
- int nofollow=0; // ne pas scanner
- //
- int parseall_lastc='\0'; // dernier caractère parsé pour parseall
- int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";)
- //
- char* intag_start=adr;
- char* intag_startattr=NULL;
- int intag_start_valid=0;
- HT_ADD_START; // débuter
+// COPIE DE HTSCORE.C
+#define liens_record(A,F,S,FA,FF) { \
+int notecode=0; \
+int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
+ adr_len=strlen(A),\
+ fil_len=strlen(F),\
+ sav_len=strlen(S),\
+ cod_len=0,\
+ former_adr_len=strlen(FA),\
+ former_fil_len=strlen(FF); \
+if (former_adr_len>0) {\
+ former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+ former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+} else former_adr_len=former_fil_len=0;\
+if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
+cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
+adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
+if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
+lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
+lien_size=add_tab_alloc; \
+if (lien_buffer!=NULL) { \
+liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+liens[lien_tot]->firstblock=1; \
+} \
+} else { \
+liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
+liens[lien_tot]->firstblock=0; \
+} \
+if (liens[lien_tot]!=NULL) { \
+liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
+liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
+liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
+liens[lien_tot]->cod=NULL; \
+if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
+if (former_adr_len>0) {\
+liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
+liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
+strcpybuff(liens[lien_tot]->former_adr,FA); \
+strcpybuff(liens[lien_tot]->former_fil,FF); \
+}\
+strcpybuff(liens[lien_tot]->adr,A); \
+strcpybuff(liens[lien_tot]->fil,F); \
+strcpybuff(liens[lien_tot]->sav,S); \
+liens_record_sav_len(liens[lien_tot]); \
+hash_write(hashptr,lien_tot,opt->urlhack); \
+} \
+}
+#define ENGINE_LOAD_CONTEXT() \
+ lien_url** liens = (lien_url**) str->liens; \
+ httrackp* opt = (httrackp*) str->opt; \
+ lien_back* back = (lien_back*) str->back; \
+ cache_back* cache = (cache_back*) str->cache; \
+ hash_struct* hashptr = (hash_struct*) str->hashptr; \
+ int back_max = str->back_max; \
+ int numero_passe = str->numero_passe; \
+ int add_tab_alloc = str->add_tab_alloc; \
+ /* */ \
+ int lien_tot = * ( (int*) (str->lien_tot_) ); \
+ int ptr = * ( (int*) (str->ptr_) ); \
+ int lien_size = * ( (int*) (str->lien_size_) ); \
+ char* lien_buffer = * ( (char**) (str->lien_buffer_) ); \
+ /* */ \
+ /* */ \
+ htsblk* r = stre->r_; \
+ hash_struct* hash = stre->hash_; \
+ int lien_max = *stre->lien_max_; \
+ /* */ \
+ int error = * stre->error_; \
+ int store_errpage = * stre->store_errpage_; \
+ char* codebase = stre->codebase; \
+ char* base = stre->base; \
+ /* */ \
+ int makeindex_done = *stre->makeindex_done_; \
+ FILE* makeindex_fp = *stre->makeindex_fp_; \
+ int makeindex_links = *stre->makeindex_links_; \
+ char* makeindex_firstlink = stre->makeindex_firstlink_; \
+ /* */ \
+ char *template_header = stre->template_header_; \
+ char *template_body = stre->template_body_; \
+ char *template_footer = stre->template_footer_; \
+ /* */ \
+ LLint stat_fragment = *stre->stat_fragment_; \
+ TStamp makestat_time = stre->makestat_time; \
+ FILE* makestat_fp = stre->makestat_fp
- /* statistics */
- if ((opt.getmode & 1) && (ptr>0)) {
- /*
- HTS_STAT.stat_files++;
- HTS_STAT.stat_bytes+=r.size;
- */
- }
+#define ENGINE_SAVE_CONTEXT() \
+ /* Apply changes */ \
+ * ( (int*) (str->lien_tot_) ) = lien_tot; \
+ * ( (int*) (str->ptr_) ) = ptr; \
+ * ( (int*) (str->lien_size_) ) = lien_size; \
+ * ( (char**) (str->lien_buffer_) ) = lien_buffer; \
+ /* */ \
+ * stre->error_ = error; \
+ * stre->store_errpage_ = store_errpage; \
+ * stre->lien_max_ = lien_max; \
+ /* */ \
+ *stre->makeindex_done_ = makeindex_done; \
+ *stre->makeindex_fp_ = makeindex_fp; \
+ *stre->makeindex_links_ = makeindex_links; \
+ /* */ \
+ *stre->stat_fragment_ = stat_fragment
+
+#define _FILTERS (*opt->filters.filters)
+#define _FILTERS_PTR (opt->filters.filptr)
+#define _ROBOTS ((robots_wizard*)opt->robotsptr)
- /* Primary list or URLs */
- if (ptr == 0) {
- intag=1;
- intag_start_valid=0;
+
+/* Main parser */
+int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
+ /* Load engine variables */
+ ENGINE_LOAD_CONTEXT();
+
+#if HTS_ANALYSTE
+ if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) {
+#endif
+ FILE* fp=NULL; // fichier écrit localement
+ char* adr=r->adr; // pointeur (on parcourt)
+ char* lastsaved; // adresse du dernier octet sauvé + 1
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush;
}
- /* Check is the file is a .js file */
- else if (
- (strfield2(r.contenttype,"application/x-javascript")!=0)
- || (strfield2(r.contenttype,"text/css")!=0)
- ) { /* JavaScript js file */
- inscript=1;
- intag=1; // because après <script> on y est .. - pas utile
- intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"note: this file is a javascript file"LF); test_flush;
+
+
+ // Indexing!
+#if HTS_MAKE_KEYWORD_INDEX
+ if (opt->kindex) {
+ if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) {
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush;
+ }
+ } else {
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush;
+ }
}
}
- /* Or a real audio */
- else if (strfield2(r.contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */
- inscript=intag=1;
- intag_start_valid=0;
- in_media="RAM"; // real media!
- }
- // Detect UTF8 format
- if (is_unicode_utf8((unsigned char*) r.adr, (unsigned int) r.size) == 1) {
- no_esc_utf=1;
- } else {
- no_esc_utf=0;
+#endif
+
+ // Now, parsing
+ if ((opt->getmode & 1) && (ptr>0)) { // récupérer les html sur disque
+ // créer le fichier html local
+ HT_ADD_FOP; // écrire peu à peu le fichier
}
- // Hack to prevent any problems with ram files of other files
- * ( r.adr + r.size ) = '\0';
+
+ if (!error) {
+ int detect_title=0; // détection du title
+ int back_add_stats = opt->state.back_add_stats;
+ //
+ char* in_media=NULL; // in other media type (real media and so..)
+ int intag=0; // on est dans un tag
+ int incomment=0; // dans un <!--
+ int inscript=0; // dans un scipt pour applets javascript)
+ signed char inscript_state[10][257];
+ typedef enum {
+ INSCRIPT_START=0,
+ INSCRIPT_ANTISLASH,
+ INSCRIPT_INQUOTE,
+ INSCRIPT_INQUOTE2,
+ INSCRIPT_SLASH,
+ INSCRIPT_SLASHSLASH,
+ INSCRIPT_COMMENT,
+ INSCRIPT_COMMENT2,
+ INSCRIPT_ANTISLASH_IN_QUOTE,
+ INSCRIPT_ANTISLASH_IN_QUOTE2,
+ INSCRIPT_DEFAULT=256
+ } INSCRIPT;
+ INSCRIPT inscript_state_pos=INSCRIPT_START;
+ char* inscript_name=NULL; // script tag name
+ int inscript_tag=0; // on est dans un <body onLoad="... terminé par >
+ char inscript_tag_lastc='\0';
+ // terminaison (" ou ') du "<body onLoad=.."
+ int inscriptgen=0; // on est dans un code générant, ex après obj.write("..
+ //int inscript_check_comments=0, inscript_in_comments=0; // javascript comments
+ char scriptgen_q='\0'; // caractère faisant office de guillemet (' ou ")
+ int no_esc_utf=0; // ne pas echapper chars > 127
+ int nofollow=0; // ne pas scanner
+ //
+ int parseall_lastc='\0'; // dernier caractère parsé pour parseall
+ //int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";)
+ //
+ char* intag_start=adr;
+ char* intag_startattr=NULL;
+ int intag_start_valid=0;
+ //
+ int parent_relative=0; // the parent is the base path (.js, .css..)
+ HT_ADD_START; // débuter
+
+ /* Initialize script automate for comments, quotes.. */
+ memset(inscript_state, 0xff, sizeof(inscript_state));
+ inscript_state[INSCRIPT_START][INSCRIPT_DEFAULT]=INSCRIPT_START; /* by default, stay in START */
+ inscript_state[INSCRIPT_START]['\\']=INSCRIPT_ANTISLASH; /* #1: \ escapes the next character whatever it is */
+ inscript_state[INSCRIPT_ANTISLASH][INSCRIPT_DEFAULT]=INSCRIPT_START;
+ inscript_state[INSCRIPT_START]['\'']=INSCRIPT_INQUOTE; /* #2: ' opens quote and only ' returns to 0 */
+ inscript_state[INSCRIPT_INQUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE;
+ inscript_state[INSCRIPT_INQUOTE]['\'']=INSCRIPT_START;
+ inscript_state[INSCRIPT_INQUOTE]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE;
+ inscript_state[INSCRIPT_START]['\"']=INSCRIPT_INQUOTE2; /* #3: " opens double-quote and only " returns to 0 */
+ inscript_state[INSCRIPT_INQUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2;
+ inscript_state[INSCRIPT_INQUOTE2]['\"']=INSCRIPT_START;
+ inscript_state[INSCRIPT_INQUOTE2]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE2;
+ inscript_state[INSCRIPT_START]['/']=INSCRIPT_SLASH; /* #4: / state, default to #0 */
+ inscript_state[INSCRIPT_SLASH][INSCRIPT_DEFAULT]=INSCRIPT_START;
+ inscript_state[INSCRIPT_SLASH]['/']=INSCRIPT_SLASHSLASH; /* #5: // with only LF to escape */
+ inscript_state[INSCRIPT_SLASHSLASH][INSCRIPT_DEFAULT]=INSCRIPT_SLASHSLASH;
+ inscript_state[INSCRIPT_SLASHSLASH]['\n']=INSCRIPT_START;
+ inscript_state[INSCRIPT_SLASH]['*']=INSCRIPT_COMMENT; /* #6: / * with only * / to escape */
+ inscript_state[INSCRIPT_COMMENT][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT;
+ inscript_state[INSCRIPT_COMMENT]['*']=INSCRIPT_COMMENT2; /* #7: closing comments */
+ inscript_state[INSCRIPT_COMMENT2][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT;
+ inscript_state[INSCRIPT_COMMENT2]['/']=INSCRIPT_START;
+ inscript_state[INSCRIPT_COMMENT2]['*']=INSCRIPT_COMMENT2;
+ inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE; /* #8: escape in "" */
+ inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2; /* #9: escape in '' */
- // ------------------------------------------------------------
- // analyser ce qu'il y a en mémoire (fichier html)
- // on scanne les balises
- // ------------------------------------------------------------
+ /* statistics */
+ if ((opt->getmode & 1) && (ptr>0)) {
+ /*
+ HTS_STAT.stat_files++;
+ HTS_STAT.stat_bytes+=r->size;
+ */
+ }
+
+ /* Primary list or URLs */
+ if (ptr == 0) {
+ intag=1;
+ intag_start_valid=0;
+ }
+ /* Check is the file is a .js file */
+ else if (
+ (strfield2(r->contenttype,"application/x-javascript")!=0)
+ || (strfield2(r->contenttype,"text/css")!=0)
+ ) { /* JavaScript js file */
+ inscript=1;
+ inscript_name="script";
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush;
+ }
+ // all links must be checked against parent, not this link
+ if (liens[ptr]->precedent != 0) {
+ parent_relative=1;
+ }
+ }
+ /* Or a real audio */
+ else if (strfield2(r->contenttype,"audio/x-pn-realaudio")!=0) { /* realaudio link file */
+ inscript=intag=1;
+ inscript_name="media";
+ intag_start_valid=0;
+ in_media="RAM"; // real media!
+ }
+ // Detect UTF8 format
+ if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) {
+ no_esc_utf=1;
+ } else {
+ no_esc_utf=0;
+ }
+ // Hack to prevent any problems with ram files of other files
+ * ( r->adr + r->size ) = '\0';
+
+
+ // ------------------------------------------------------------
+ // analyser ce qu'il y a en mémoire (fichier html)
+ // on scanne les balises
+ // ------------------------------------------------------------
#if HTS_ANALYSTE
- _hts_in_html_done=0; // 0% scannés
- _hts_cancel=0; // pas de cancel
- _hts_in_html_parsing=1; // flag pour indiquer un parsing
+ _hts_in_html_done=0; // 0% scannés
+ _hts_cancel=0; // pas de cancel
+ _hts_in_html_parsing=1; // flag pour indiquer un parsing
#endif
- base[0]='\0'; // effacer base-href
- lastsaved=adr;
- do {
- int p=0;
- int valid_p=0; // force to take p even if == 0
- int ending_p='\0'; // ending quote?
- error=0;
-
- /* Hack to avoid NULL char problems with C syntax */
- /* Yes, some bogus HTML pages can embed null chars
- and therefore can not be properly handled if this hack is not done
- */
- if ( ! (*adr) ) {
- if ( ((int) (adr - r.adr)) < r.size)
- *adr=' ';
- }
-
-
-
- /*
- index.html built here
- */
- // Construction index.html (sommaire)
- // Avant de tester les a href,
- // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
- if (!makeindex_done) { // autoriation d'écrire un index
- if (!detect_title) {
- if (opt.depth == liens[ptr]->depth) { // on note toujours les premiers liens
- if (!in_media) {
- if (opt.makeindex && (ptr>0)) {
- if (opt.getmode & 1) { // autorisation d'écrire
- p=strfield(adr,"title");
- if (p) {
- if (*(adr-1)=='/') p=0; // /title
- } else {
- if (strfield(adr,"/html"))
- p=-1; // noter, mais sans titre
- else if (strfield(adr,"body"))
- p=-1; // noter, mais sans titre
- else if ( ((int) (adr - r.adr) ) >= (r.size-1) )
- p=-1; // noter, mais sans titre
- else if ( (int) (adr - r.adr) >= r.size - 2) // we got to hurry
- p=-1; // xxc xxc xxc
- }
- } else
- p=0;
-
- if (p) { // ok center
- if (makeindex_fp==NULL) {
- verif_backblue(opt.path_html); // générer gif
- makeindex_fp=filecreate(fconcat(opt.path_html,"index.html"));
- if (makeindex_fp!=NULL) {
-
- // Header
- fprintf(makeindex_fp,template_header,
- "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
- );
-
- } else makeindex_done=-1; // fait, erreur
- }
+ base[0]='\0'; // effacer base-href
+ lastsaved=adr;
+ do {
+ int p=0;
+ int valid_p=0; // force to take p even if == 0
+ int ending_p='\0'; // ending quote?
+ int archivetag_p=0; // avoid multiple-archives with commas
+ INSCRIPT inscript_state_pos_prev=inscript_state_pos;
+ error=0;
+
+ /* Hack to avoid NULL char problems with C syntax */
+ /* Yes, some bogus HTML pages can embed null chars
+ and therefore can not be properly handled if this hack is not done
+ */
+ if ( ! (*adr) ) {
+ if ( ((int) (adr - r->adr)) < r->size)
+ *adr=' ';
+ }
+
+
+
+ /*
+ index.html built here
+ */
+ // Construction index.html (sommaire)
+ // Avant de tester les a href,
+ // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
+ if (!makeindex_done) { // autoriation d'écrire un index
+ if (!detect_title) {
+ if (opt->depth == liens[ptr]->depth) { // on note toujours les premiers liens
+ if (!in_media) {
+ if (opt->makeindex && (ptr>0)) {
+ if (opt->getmode & 1) { // autorisation d'écrire
+ p=strfield(adr,"title");
+ if (p) {
+ if (*(adr-1)=='/') p=0; // /title
+ } else {
+ if (strfield(adr,"/html"))
+ p=-1; // noter, mais sans titre
+ else if (strfield(adr,"body"))
+ p=-1; // noter, mais sans titre
+ else if ( ((int) (adr - r->adr) ) >= (r->size-1) )
+ p=-1; // noter, mais sans titre
+ else if ( (int) (adr - r->adr) >= r->size - 2) // we got to hurry
+ p=-1; // xxc xxc xxc
+ }
+ } else
+ p=0;
- if (makeindex_fp!=NULL) {
- char tempo[HTS_URLMAXSIZE*2];
- char s[HTS_URLMAXSIZE*2];
- char* a=NULL;
- char* b=NULL;
- s[0]='\0';
- if (p>0) {
- a=strchr(adr,'>');
- if (a!=NULL) {
- a++;
- while(is_space(*a)) a++; // sauter espaces & co
- b=strchr(a,'<'); // prochain tag
- }
+ if (p) { // ok center
+ if (makeindex_fp==NULL) {
+ verif_backblue(opt,opt->path_html); // générer gif
+ makeindex_fp=filecreate(fconcat(opt->path_html,"index.html"));
+ if (makeindex_fp!=NULL) {
+
+ // Header
+ fprintf(makeindex_fp,template_header,
+ "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
+ );
+
+ } else makeindex_done=-1; // fait, erreur
}
- if (lienrelatif(tempo,liens[ptr]->sav,concat(opt.path_html,"index.html"))==0) {
- detect_title=1; // ok détecté pour cette page!
- makeindex_links++; // un de plus
- strcpy(makeindex_firstlink,tempo);
- //
- if ((b==a) || (a==NULL) || (b==NULL)) { // pas de titre
- strcpy(s,tempo);
- } else if ((b-a)<256) {
- b--;
- while(is_space(*b)) b--;
- strncpy(s,a,b-a+1);
- *(s+(b-a)+1)='\0';
+
+ if (makeindex_fp!=NULL) {
+ char tempo[HTS_URLMAXSIZE*2];
+ char s[HTS_URLMAXSIZE*2];
+ char* a=NULL;
+ char* b=NULL;
+ s[0]='\0';
+ if (p>0) {
+ a=strchr(adr,'>');
+ if (a!=NULL) {
+ a++;
+ while(is_space(*a)) a++; // sauter espaces & co
+ b=strchr(a,'<'); // prochain tag
+ }
}
+ if (lienrelatif(tempo,liens[ptr]->sav,concat(opt->path_html,"index.html"))==0) {
+ detect_title=1; // ok détecté pour cette page!
+ makeindex_links++; // un de plus
+ strcpybuff(makeindex_firstlink,tempo);
+ //
- // Body
- fprintf(makeindex_fp,template_body,
- tempo,
- s
- );
+ /* Hack */
+ if (opt->mimehtml) {
+ strcpybuff(makeindex_firstlink, "cid:primary/primary");
+ }
+ if ((b==a) || (a==NULL) || (b==NULL)) { // pas de titre
+ strcpybuff(s,tempo);
+ } else if ((b-a)<256) {
+ b--;
+ while(is_space(*b)) b--;
+ strncpy(s,a,b-a+1);
+ *(s+(b-a)+1)='\0';
+ }
+
+ // Body
+ fprintf(makeindex_fp,template_body,
+ tempo,
+ s
+ );
+
+ }
}
}
}
}
+
+ } else if (liens[ptr]->depth<opt->depth) { // on a sauté level1+1 et level1
+ HT_INDEX_END;
}
-
- } else if (liens[ptr]->depth<opt.depth) { // on a sauté level1+1 et level1
- HT_INDEX_END;
- }
- } // if (opt.makeindex)
- }
- // FIN Construction index.html (sommaire)
- /*
- end -- index.html built here
- */
-
-
-
- /* Parse */
- if (
- (*adr=='<') /* No starting tag */
- && (!inscript) /* Not in (java)script */
- && (!incomment) /* Not in comment (<!--) */
- ) {
- intag=1;
- parseall_incomment=0;
- //inquote=0; // effacer quote
- intag_start=adr; intag_start_valid=1;
- codebase[0]='\0'; // effacer éventuel codebase
+ } // if (opt->makeindex)
+ }
+ // FIN Construction index.html (sommaire)
+ /*
+ end -- index.html built here
+ */
- if (opt.getmode & 1) { // sauver html
- p=strfield(adr,"</html");
- if (p==0) p=strfield(adr,"<head>");
- // if (p==0) p=strfield(adr,"<doctype");
- if (p) {
- if (strnotempty(opt.footer)) {
- char tempo[1024+HTS_URLMAXSIZE*2];
- char gmttime[256];
+
+
+ /* Parse */
+ if (
+ (*adr=='<') /* No starting tag */
+ && (!inscript) /* Not in (java)script */
+ && (!incomment) /* Not in comment (<!--) */
+ ) {
+ intag=1;
+ //parseall_incomment=0;
+ //inquote=0; // effacer quote
+ intag_start=adr; intag_start_valid=1;
+ codebase[0]='\0'; // effacer éventuel codebase
+
+ if (opt->getmode & 1) { // sauver html
+ p=strfield(adr,"</html");
+ if (p==0) p=strfield(adr,"<head>");
+ // if (p==0) p=strfield(adr,"<doctype");
+ if (p) {
char* eol="\n";
- tempo[0]='\0';
- if (strchr(r.adr,'\r'))
+ if (strchr(r->adr,'\r'))
eol="\r\n";
- time_gmt_rfc822(gmttime);
- strcat(tempo,eol);
- sprintf(tempo+strlen(tempo),opt.footer,jump_identification(urladr),urlfil,gmttime,"","","","","","","","");
- strcat(tempo,eol);
- //fwrite(tempo,1,strlen(tempo),fp);
- HT_ADD(tempo);
- }
- }
- }
-
- // éliminer les <!-- (commentaires) : intag dévalidé
- if (*(adr+1)=='!')
- if (*(adr+2)=='-')
- if (*(adr+3)=='-') {
- intag=0;
- incomment=1;
- intag_start_valid=0;
+ if (strnotempty(opt->footer)) {
+ char tempo[1024+HTS_URLMAXSIZE*2];
+ char gmttime[256];
+ tempo[0]='\0';
+ time_gmt_rfc822(gmttime);
+ strcatbuff(tempo,eol);
+ sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","","");
+ strcatbuff(tempo,eol);
+ //fwrite(tempo,1,strlen(tempo),fp);
+ HT_ADD(tempo);
+ }
+ if (r->charset[0]) {
+ HT_ADD("<meta http-equiv=\"content-type\" content=\"text/html;charset=");
+ HT_ADD(r->charset);
+ HT_ADD("\">");
+ HT_ADD(eol);
+ }
}
-
- }
- else if (
- (*adr=='>') /* ending tag */
- && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */
- ) {
- if (inscript_tag) {
- inscript_tag=inscript=0;
- intag=0;
- incomment=0;
- intag_start_valid=0;
- } else if (!incomment) {
- intag=0; //inquote=0;
+ }
- // entrée dans du javascript?
- // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
- //if (!inscript) { // sinon on est dans un obj.write("..
- if ((intag_start_valid) &&
- (
- check_tag(intag_start,"script")
- ||
- check_tag(intag_start,"style")
- )
- ) {
- char* a=intag_start; // <
- // ** while(is_realspace(*(--a)));
- if (*a=='<') { // sûr que c'est un tag?
- inscript=1;
- intag=1; // because après <script> on y est .. - pas utile
- intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
- }
- }
- } else { /* end of comment? */
- // vérifier fermeture correcte
- if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
+ // éliminer les <!-- (commentaires) : intag dévalidé
+ if (*(adr+1)=='!')
+ if (*(adr+2)=='-')
+ if (*(adr+3)=='-') {
+ intag=0;
+ incomment=1;
+ intag_start_valid=0;
+ }
+
+ }
+ else if (
+ (*adr=='>') /* ending tag */
+ && ( (!inscript) || (inscript_tag) ) /* and in tag (or in script) */
+ ) {
+ if (inscript_tag) {
+ inscript_tag=inscript=0;
intag=0;
incomment=0;
intag_start_valid=0;
- }
-#if GT_ENDS_COMMENT
- /* wrong comment ending */
- else {
- /* check if correct ending does not exists
- <!-- foo > example <!-- bar > is sometimes accepted by browsers
- when no --> is used somewhere else.. darn those browsers are dirty
- */
- if (!strstr(adr,"-->")) {
+ } else if (!incomment) {
+ intag=0; //inquote=0;
+
+ // entrée dans du javascript?
+ // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
+ //if (!inscript) { // sinon on est dans un obj.write("..
+ if ((intag_start_valid) &&
+ (
+ check_tag(intag_start,"script")
+ ||
+ check_tag(intag_start,"style")
+ )
+ ) {
+ char* a=intag_start; // <
+ // ** while(is_realspace(*(--a)));
+ if (*a=='<') { // sûr que c'est un tag?
+ if (check_tag(intag_start,"script"))
+ inscript_name="script";
+ else
+ inscript_name="style";
+ inscript=1;
+ inscript_state_pos=INSCRIPT_START;
+ intag=1; // because après <script> on y est .. - pas utile
+ intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag
+ }
+ }
+ } else { /* end of comment? */
+ // vérifier fermeture correcte
+ if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
intag=0;
incomment=0;
intag_start_valid=0;
}
- }
-#endif
- }
- //}
- }
- //else if (*adr==34) {
- // inquote=(inquote?0:1);
- //}
- else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag
- int p_type=0;
- int p_nocatch=0;
- int p_searchMETAURL=0; // chercher ..URL=<url>
- int add_class=0; // ajouter .class
- int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>"
- char* p_flush=NULL;
-
-
- // ------------------------------------------------------------
- // parsing évolé
- // ------------------------------------------------------------
- if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester..
-
-
- /* caractère de terminaison pour "miniparsing" javascript=.. ?
- (ex: <a href="javascript:()" action="foo"> ) */
- if (inscript_tag) {
- if (inscript_tag_lastc) {
- if (*adr == inscript_tag_lastc) {
- /* sortir */
- inscript_tag=inscript=0;
+#if GT_ENDS_COMMENT
+ /* wrong comment ending */
+ else {
+ /* check if correct ending does not exists
+ <!-- foo > example <!-- bar > is sometimes accepted by browsers
+ when no --> is used somewhere else.. darn those browsers are dirty
+ */
+ if (!strstr(adr,"-->")) {
+ intag=0;
incomment=0;
+ intag_start_valid=0;
}
}
+#endif
}
+ //}
+ }
+ //else if (*adr==34) {
+ // inquote=(inquote?0:1);
+ //}
+ else if (intag || inscript) { // nous sommes dans un tag/commentaire, tester si on recoit un tag
+ int p_type=0;
+ int p_nocatch=0;
+ int p_searchMETAURL=0; // chercher ..URL=<url>
+ int add_class=0; // ajouter .class
+ int add_class_dots_to_patch=0; // number of '.' in code="x.y.z<realname>"
+ char* p_flush=NULL;
- // Note:
- // Certaines pages ne respectent pas le html
- // notamment les guillements ne sont pas fixés
- // Nous sommes dans un tag, donc on peut faire un test plus
- // large pour pouvoi prendre en compte ces particularités
-
- // à vérifier: ACTION, CODEBASE, VRML
-
- if (in_media) {
- if (strcmp(in_media,"RAM")==0) { // real media
- p=0;
- valid_p=1;
- }
- } else if (ptr>0) { /* pas première page 0 (primary) */
- p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p
+ // ------------------------------------------------------------
+ // parsing évolé
+ // ------------------------------------------------------------
+ if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (inscriptgen))) { // sinon pas la peine de tester..
- // ------------------------------
- // détection d'écriture JavaScript.
- // osons les obj.write et les obj.href=.. ! osons!
- // note: inscript==1 donc on sautera après les \"
+
+ /* caractère de terminaison pour "miniparsing" javascript=.. ?
+ (ex: <a href="javascript:()" action="foo"> ) */
+ if (inscript_tag) {
+ if (inscript_tag_lastc) {
+ if (*adr == inscript_tag_lastc) {
+ /* sortir */
+ inscript_tag=inscript=0;
+ incomment=0;
+ }
+ }
+ }
+
+ /* automate */
if (inscript) {
- if (inscriptgen) { // on est déja dans un objet générant..
- if (*adr==scriptgen_q) { // fermeture des " ou '
- if (*(adr-1)!='\\') { // non
- inscriptgen=0; // ok parsing terminé
+ int new_state_pos;
+ new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr];
+ if (new_state_pos < 0) {
+ new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT];
+ }
+ assertf(new_state_pos >= 0);
+ assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state));
+ inscript_state_pos=new_state_pos;
+ }
+
+
+ // Note:
+ // Certaines pages ne respectent pas le html
+ // notamment les guillements ne sont pas fixés
+ // Nous sommes dans un tag, donc on peut faire un test plus
+ // large pour pouvoi prendre en compte ces particularités
+
+ // à vérifier: ACTION, CODEBASE, VRML
+
+ if (in_media) {
+ if (strcmp(in_media,"RAM")==0) { // real media
+ p=0;
+ valid_p=1;
+ }
+ } else if (ptr>0) { /* pas première page 0 (primary) */
+ p=0; // saut pour le nom de fichier: adresse nom fichier=adr+p
+
+ // ------------------------------
+ // détection d'écriture JavaScript.
+ // osons les obj.write et les obj.href=.. ! osons!
+ // note: inscript==1 donc on sautera après les \"
+ if (inscript) {
+ if (inscriptgen) { // on est déja dans un objet générant..
+ if (*adr==scriptgen_q) { // fermeture des " ou '
+ if (*(adr-1)!='\\') { // non
+ inscriptgen=0; // ok parsing terminé
+ }
}
- }
- } else {
- char* a=NULL;
- char check_this_fking_line=0; // parsing code javascript..
- char must_be_terminated=0; // caractère obligatoire de terminaison!
- int token_size;
- if (!(token_size=strfield(adr,".writeln"))) // détection ...objet.write[ln]("code html")...
- token_size=strfield(adr,".write");
- if (token_size) {
- a=adr+token_size;
+ } else {
+ char* a=NULL;
+ char check_this_fking_line=0; // parsing code javascript..
+ char must_be_terminated=0; // caractère obligatoire de terminaison!
+ int token_size;
+ if (!(token_size=strfield(adr,".writeln"))) // détection ...objet.write[ln]("code html")...
+ token_size=strfield(adr,".write");
+ if (token_size) {
+ a=adr+token_size;
+ while(is_realspace(*a)) a++; // sauter espaces
+ if (*a=='(') { // début parenthèse
+ check_this_fking_line=2; // à parser!
+ must_be_terminated=')';
+ a++; // sauter (
+ }
+ }
+ // euhh ??? ???
+ /* else if (strfield(adr,".href")) { // détection ...objet.href="...
+ a=adr+5;
while(is_realspace(*a)) a++; // sauter espaces
- if (*a=='(') { // début parenthèse
- check_this_fking_line=2; // à parser!
- must_be_terminated=')';
- a++; // sauter (
+ if (*a=='=') { // ohh un égal
+ check_this_fking_line=1; // à noter!
+ must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder
+ a++; // sauter =
}
- }
- // euhh ??? ???
- /* else if (strfield(adr,".href")) { // détection ...objet.href="...
- a=adr+5;
- while(is_realspace(*a)) a++; // sauter espaces
- if (*a=='=') { // ohh un égal
- check_this_fking_line=1; // à noter!
- must_be_terminated=';'; // et si t'as oublié le ; tu sais pas coder
- a++; // sauter =
- }
-
+
}*/
-
- // on a un truc du genre instruction"code généré" dont on parse le code
- if (check_this_fking_line) {
- while(is_realspace(*a)) a++;
- if ((*a=='\'') || (*a=='"')) { // départ de '' ou ""
- char *b;
- int ex=0;
- scriptgen_q=*a; // quote
- b=a+1; // départ de la chaîne
- // vérifier forme ("code") et pas ("code"+var), ingérable
- do {
- a++; // caractère suivant
- if (*a==scriptgen_q) if (*(a-1)!='\\') // quote non slash
- ex=1; // sortie
- if ((*a==10) || (*a==13))
- ex=1;
- } while(!ex);
- if (*a==scriptgen_q) { // fin du quote
- a++;
- while(is_realspace(*a)) a++;
- if (*a==must_be_terminated) { // parenthèse fermante: ("..")
-
- // bon, on doit parser une ligne javascript
- // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
- // on fixe p sur le saut nécessaire pour atteindre le nom du fichier
- // et le moteur se débrouillera ensuite tout seul comme un grand
- // 2) si check==2 c'est un peu plus tordu car là on génére du
- // code html au sein de code javascript au sein de code html
- // dans ce cas on doit fixer un flag à un puis ensuite dans la boucle
- // on devra parser les instructions standard comme <a href etc
- // NOTE: le code javascript autogénéré n'est pas pris en compte!!
- // (et ne marche pas dans 50% des cas de toute facon!)
- if (check_this_fking_line==1) {
- p=(int) (b - adr); // calculer saut!
- } else {
- inscriptgen=1; // SCRIPTGEN actif
- adr=b; // jump
+
+ // on a un truc du genre instruction"code généré" dont on parse le code
+ if (check_this_fking_line) {
+ while(is_realspace(*a)) a++;
+ if ((*a=='\'') || (*a=='"')) { // départ de '' ou ""
+ char *b;
+ int ex=0;
+ scriptgen_q=*a; // quote
+ b=a+1; // départ de la chaîne
+ // vérifier forme ("code") et pas ("code"+var), ingérable
+ do {
+ a++; // caractère suivant
+ if (*a==scriptgen_q && *(a-1)!='\\') // quote non slash
+ ex=1; // sortie
+ if (*a==10 && *(a-1) != '\\' /* LF and no continue (\) character */
+ && ( *(a-1) != '\r' || *(a-2) != '\\' ) ) /* and not CRLF and no .. */
+ ex=1;
+ } while(!ex);
+ if (*a==scriptgen_q) { // fin du quote
+ a++;
+ while(is_realspace(*a)) a++;
+ if (*a==must_be_terminated) { // parenthèse fermante: ("..")
+
+ // bon, on doit parser une ligne javascript
+ // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
+ // on fixe p sur le saut nécessaire pour atteindre le nom du fichier
+ // et le moteur se débrouillera ensuite tout seul comme un grand
+ // 2) si check==2 c'est un peu plus tordu car là on génére du
+ // code html au sein de code javascript au sein de code html
+ // dans ce cas on doit fixer un flag à un puis ensuite dans la boucle
+ // on devra parser les instructions standard comme <a href etc
+ // NOTE: le code javascript autogénéré n'est pas pris en compte!!
+ // (et ne marche pas dans 50% des cas de toute facon!)
+ if (check_this_fking_line==1) {
+ p=(int) (b - adr); // calculer saut!
+ } else {
+ inscriptgen=1; // SCRIPTGEN actif
+ adr=b; // jump
+ }
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncatbuff(str,b,minimum((int) (a - b + 1), 32));
+ fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
+ }
}
- if ((opt.debug>1) && (opt.log!=NULL)) {
- char str[512];
- str[0]='\0';
- strncat(str,b,minimum((int) (a - b + 1), 32));
- fspc(opt.log,"debug"); fprintf(opt.log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
- }
}
}
+
}
-
-
}
}
- }
- // fin detection code générant javascript vers html
- // ------------------------------
-
-
- // analyse proprement dite, A HREF=.. etc..
- if (!p) {
- // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
- if ((intag && (!inscript)) || inscriptgen) {
- if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
- // <A HREF=.. pour les liens HTML
- p=rech_tageq(adr,"href");
- if (p) { // href.. tester si c'est une bas href!
- if ((intag_start_valid) && check_tag(intag_start,"base")) { // oui!
- // ** note: base href et codebase ne font pas bon ménage..
- p_type=2; // c'est un chemin
- }
- }
-
- /* Tags supplémentaires à vérifier (<img src=..> etc) */
- if (p==0) {
- int i=0;
- while( (p==0) && (strnotempty(hts_detect[i])) ) {
- p=rech_tageq(adr,hts_detect[i]);
- i++;
- }
- }
-
- /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */
- if (p==0) {
- int i=0;
- while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
- p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
- i++;
+ // fin detection code générant javascript vers html
+ // ------------------------------
+
+
+ // analyse proprement dite, A HREF=.. etc..
+ if (!p) {
+ // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
+ if ((intag && (!inscript)) || inscriptgen) {
+ if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) { // <tag < tag etc
+ // <A HREF=.. pour les liens HTML
+ p=rech_tageq(adr,"href");
+ if (p) { // href.. tester si c'est une bas href!
+ if ((intag_start_valid) && check_tag(intag_start,"base")) { // oui!
+ // ** note: base href et codebase ne font pas bon ménage..
+ p_type=2; // c'est un chemin
+ }
}
- }
-
- /* Tags supplémentaires à vérifier : URL=.. */
- if (p==0) {
- int i=0;
- while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
- p=rech_tageq(adr,hts_detectURL[i]);
- i++;
+
+ /* Tags supplémentaires à vérifier (<img src=..> etc) */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detect[i])) ) {
+ p=rech_tageq(adr,hts_detect[i]);
+ if (p) {
+ /* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */
+ if (strcmp(hts_detect[i], "archive") == 0) {
+ archivetag_p = 1;
+ }
+ }
+ i++;
+ }
}
- if (p)
- p_searchMETAURL=1;
- }
-
- /* Tags supplémentaires à vérifier, mais à ne pas capturer */
- if (p==0) {
- int i=0;
- while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
- p=rech_tageq(adr,hts_detectandleave[i]);
- i++;
+
+ /* Tags supplémentaires en début à vérifier (<object .. hotspot1=..> etc) */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
+ p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
+ i++;
+ }
}
- if (p)
- p_nocatch=1; /* ne pas rechercher */
- }
-
- /* Evénements */
- if (p==0) {
- int i=0;
- /* détection onLoad etc */
- while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
- p=rech_tageq(adr,hts_detect_js[i]);
- i++;
+
+ /* Tags supplémentaires à vérifier : URL=.. */
+ if (p==0) {
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
+ p=rech_tageq(adr,hts_detectURL[i]);
+ i++;
+ }
+ if (p)
+ p_searchMETAURL=1;
}
- /* non détecté - détecter également les onXxxxx= */
+
+ /* Tags supplémentaires à vérifier, mais à ne pas capturer */
if (p==0) {
- if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
- p=0;
- while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
- if (p<64) {
- while(is_space(adr[p])) p++;
- if (adr[p]=='=')
- p++;
- else p=0;
- } else p=0;
+ int i=0;
+ while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
+ p=rech_tageq(adr,hts_detectandleave[i]);
+ i++;
}
+ if (p)
+ p_nocatch=1; /* ne pas rechercher */
}
- /* OK, événement repéré */
- if (p) {
- inscript_tag_lastc=*(adr+p); /* à attendre à la fin */
- adr+=p; /* saut */
- /*
- On est désormais dans du code javascript
- */
- inscript_tag=inscript=1;
+
+ /* Evénements */
+ if (p==0 &&
+ ! inscript /* we don't want events inside document.write */
+ ) {
+ int i=0;
+ /* détection onLoad etc */
+ while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
+ p=rech_tageq(adr,hts_detect_js[i]);
+ i++;
+ }
+ /* non détecté - détecter également les onXxxxx= */
+ if (p==0) {
+ if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
+ p=0;
+ while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
+ if (p<64) {
+ while(is_space(adr[p])) p++;
+ if (adr[p]=='=')
+ p++;
+ else p=0;
+ } else p=0;
+ }
+ }
+ /* OK, événement repéré */
+ if (p) {
+ inscript_tag_lastc=*(adr+p); /* à attendre à la fin */
+ adr+=p+1; /* saut */
+ /*
+ On est désormais dans du code javascript
+ */
+ inscript_name="";
+ inscript=inscript_tag=1;
+ inscript_state_pos=INSCRIPT_START;
+ }
+ p=0; /* quoi qu'il arrive, ne rien démarrer ici */
}
- p=0; /* quoi qu'il arrive, ne rien démarrer ici */
- }
-
- // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
- if (p==0) {
- p=rech_tageq(adr,"code");
- if (p) {
- if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
- p_type=-1; // juste le nom de fichier+dossier, écire avant codebase
- add_class=1; // ajouter .class au besoin
-
- // vérifier qu'il n'y a pas de codebase APRES
- // sinon on swappe les deux.
- // pas très propre mais c'est ce qu'il y a de plus simple à faire!!
-
- {
- char *a;
- a=adr;
- while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
- if (rech_tageq(a,"codebase")) { // banzai! codebase=
- char* b;
- b=strchr(a,'>');
- if (b) {
- if (((int) (b - adr)) < 1000) { // au total < 1Ko
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strncat(tempo,a,(int) (b - a) );
- strcat( tempo," ");
- strncat(tempo,adr,(int) (a - adr - 1));
- // éventuellement remplire par des espaces pour avoir juste la taille
- while((int) strlen(tempo)<((int) (b - adr)))
- strcat(tempo," ");
- // pas d'erreur?
- if ((int) strlen(tempo) == ((int) (b - adr) )) {
- strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin!
- p=0; // DEVALIDER!!
- p_type=0;
- add_class=0;
+
+ // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
+ if (p==0) {
+ p=rech_tageq(adr,"code");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-1; // juste le nom de fichier+dossier, écire avant codebase
+ add_class=1; // ajouter .class au besoin
+
+ // vérifier qu'il n'y a pas de codebase APRES
+ // sinon on swappe les deux.
+ // pas très propre mais c'est ce qu'il y a de plus simple à faire!!
+
+ {
+ char *a;
+ a=adr;
+ while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
+ if (rech_tageq(a,"codebase")) { // banzai! codebase=
+ char* b;
+ b=strchr(a,'>');
+ if (b) {
+ if (((int) (b - adr)) < 1000) { // au total < 1Ko
+ char tempo[HTS_URLMAXSIZE*2];
+ tempo[0]='\0';
+ strncatbuff(tempo,a,(int) (b - a) );
+ strcatbuff( tempo," ");
+ strncatbuff(tempo,adr,(int) (a - adr - 1));
+ // éventuellement remplire par des espaces pour avoir juste la taille
+ while((int) strlen(tempo)<((int) (b - adr)))
+ strcatbuff(tempo," ");
+ // pas d'erreur?
+ if ((int) strlen(tempo) == ((int) (b - adr) )) {
+ strncpy(adr,tempo,strlen(tempo)); // PAS d'octet nul à la fin!
+ p=0; // DEVALIDER!!
+ p_type=0;
+ add_class=0;
+ }
}
}
}
}
+
}
-
}
}
- }
-
- // liens à patcher mais pas à charger (ex: codebase)
- if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même
- p=rech_tageq(adr,"codebase");
- if (p) {
- if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
- p_type=-2;
- } else p=-1; // ne plus chercher
+
+ // liens à patcher mais pas à charger (ex: codebase)
+ if (p==0) { // note: si non chargé (ex: ignorer .class) patché tout de même
+ p=rech_tageq(adr,"codebase");
+ if (p) {
+ if ((intag_start_valid) && check_tag(intag_start,"applet")) { // dans un <applet !
+ p_type=-2;
+ } else p=-1; // ne plus chercher
+ }
}
- }
-
-
- // Meta tags pour robots
- if (p==0) {
- if (opt.robots) {
- if ((intag_start_valid) && check_tag(intag_start,"meta")) {
- if (rech_tageq(adr,"name")) { // name=robots.txt
- char tempo[1100];
- char* a;
- tempo[0]='\0';
- a=strchr(adr,'>');
+
+
+ // Meta tags pour robots
+ if (p==0) {
+ if (opt->robots) {
+ if ((intag_start_valid) && check_tag(intag_start,"meta")) {
+ if (rech_tageq(adr,"name")) { // name=robots.txt
+ char tempo[1100];
+ char* a;
+ tempo[0]='\0';
+ a=strchr(adr,'>');
#if DEBUG_ROBOTS
- printf("robots.txt meta tag detected\n");
+ printf("robots.txt meta tag detected\n");
#endif
- if (a) {
- if (((int) (a - adr)) < 999 ) {
- strncat(tempo,adr,(int) (a - adr));
- if (strstrcase(tempo,"content")) {
- if (strstrcase(tempo,"robots")) {
- if (strstrcase(tempo,"nofollow")) {
+ if (a) {
+ if (((int) (a - adr)) < 999 ) {
+ strncatbuff(tempo,adr,(int) (a - adr));
+ if (strstrcase(tempo,"content")) {
+ if (strstrcase(tempo,"robots")) {
+ if (strstrcase(tempo,"nofollow")) {
#if DEBUG_ROBOTS
- printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
+ printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
#endif
- nofollow=1; // NE PLUS suivre liens dans cette page
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
- test_flush;
+ nofollow=1; // NE PLUS suivre liens dans cette page
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
+ test_flush;
+ }
}
}
}
@@ -674,28 +1073,46 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
}
}
}
- }
-
- // entrée dans une applet javascript
- /*if (!inscript) { // sinon on est dans un obj.write("..
- if (p==0)
- if (rech_sampletag(adr,"script"))
- if (check_tag(intag_start,"script")) {
- inscript=1;
- }
+
+ // entrée dans une applet javascript
+ /*if (!inscript) { // sinon on est dans un obj.write("..
+ if (p==0)
+ if (rech_sampletag(adr,"script"))
+ if (check_tag(intag_start,"script")) {
+ inscript=1;
+ }
}*/
-
- // Ici on procède à une analyse du code javascript pour tenter de récupérer
- // certains fichiers évidents.
- // C'est devenu obligatoire vu le nombre de pages qui intègrent
- // des images réactives par exemple
+
+ // Ici on procède à une analyse du code javascript pour tenter de récupérer
+ // certains fichiers évidents.
+ // C'est devenu obligatoire vu le nombre de pages qui intègrent
+ // des images réactives par exemple
}
} else if (inscript) {
+
+#if 0
+ /* Check // javascript comments */
+ if (*adr == 10 || *adr == 13) {
+ inscript_check_comments = 1;
+ inscript_in_comments = 0;
+ }
+ else if (inscript_check_comments) {
+ if (!is_realspace(*adr)) {
+ inscript_check_comments = 0;
+ if (adr[0] == '/' && adr[1] == '/') {
+ inscript_in_comments = 1;
+ }
+ }
+ }
+#endif
+
+ /* Parse */
+ assertf(inscript_name != NULL);
if (
(
- (strfield(adr,"/script"))
+ (strfield(adr,"/script") && strfield(inscript_name, "script"))
||
- (strfield(adr,"/style"))
+ (strfield(adr,"/style") && strfield(inscript_name, "style"))
)
) {
char* a=adr;
@@ -705,26 +1122,29 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (*a=='<') { // sûr que c'est un tag?
inscript=0;
}
- } else {
- /*
- Script Analyzing - different types supported:
- foo="url"
- foo("url") or foo(url)
- foo "url"
+ } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) {
+ /*
+ Script Analyzing - different types supported:
+ foo="url"
+ foo("url") or foo(url)
+ foo "url"
*/
int nc;
char expected = '='; // caractère attendu après
char* expected_end = ";";
int can_avoid_quotes=0;
char quotes_replacement='\0';
+ int ensure_not_mime=0;
if (inscript_tag)
expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'"
nc = strfield(adr,".src"); // nom.src="image";
if (!nc) nc = strfield(adr,".location"); // document.location="doc"
+ if (!nc) nc = strfield(adr,":location"); // javascript:location="doc"
if (!nc) nc = strfield(adr,".href"); // document.location="doc"
if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
expected='('; // parenthèse
expected_end="),"; // fin: virgule ou parenthèse
+ ensure_not_mime=1; //* ensure the url is not a mime type */
}
if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
expected='('; // parenthèse
@@ -734,7 +1154,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
expected='('; // parenthèse
expected_end=")"; // fin: parenthèse
}
- if (!nc) if ( (nc = strfield(adr,"url")) ) { // url(url)
+ if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1)))
+ && *(adr - 1) != '_'
+ ) { // url(url)
expected='('; // parenthèse
expected_end=")"; // fin: parenthèse
can_avoid_quotes=1;
@@ -773,15 +1195,27 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
c-=(ndelim+1);
if ((int) (c - a + 1)) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- char str[512];
- str[0]='\0';
- strncat(str,a,minimum((int) (c - a + 1),32));
- fspc(opt.log,"debug"); fprintf(opt.log,"link detected in javascript: %s"LF,str); test_flush;
+ if (ensure_not_mime) {
+ int i = 0;
+ while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') {
+ int p;
+ if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') {
+ a=NULL;
+ }
+ i++;
+ }
}
- p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
- if (can_avoid_quotes) {
- ending_p=quotes_replacement;
+ if (a != NULL) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ char str[512];
+ str[0]='\0';
+ strncatbuff(str,a,minimum((int) (c - a + 1),32));
+ fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush;
+ }
+ p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
+ if (can_avoid_quotes) {
+ ending_p=quotes_replacement;
+ }
}
}
}
@@ -808,176 +1242,205 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
} else if (isspace((unsigned char)*adr)) {
intag_startattr=adr+1; // attribute in tag (for dirty parsing)
}
-
-
- // ------------------------------------------------------------
- // dernier recours - parsing "sale" : détection systématique des .gif, etc.
- // risque: générer de faux fichiers parazites
- // fix: ne parse plus dans les commentaires
- // ------------------------------------------------------------
- if ( (opt.parseall) && (ptr>0) && (!in_media) ) { // option parsing "brut"
- int incomment_justquit=0;
- if (!is_realspace(*adr)) {
- int noparse=0;
-
- // Gestion des /* */
- if (inscript) {
- if (parseall_incomment) {
- if ((*adr=='/') && (*(adr-1)=='*'))
- parseall_incomment=0;
- incomment_justquit=1; // ne pas noter dernier caractère
- } else {
- if ((*adr=='/') && (*(adr+1)=='*'))
- parseall_incomment=1;
- }
- } else
- parseall_incomment=0;
-
- /* vérifier que l'on est pas dans un <!-- --> pur */
- if ( (!intag) && (incomment) && (!inscript))
- noparse=1; /* commentaire */
-
- // recherche d'URLs
- if ((!parseall_incomment) && (!noparse)) {
- if (!p) { // non déja trouvé
- if (adr != r.adr) { // >1 caractère
- // scanner les chaines
- if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif'
- if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif..
- char *a=adr;
- char stop=*adr; // " ou '
- int count=0;
-
- // sauter caractères
+
+
+ // ------------------------------------------------------------
+ // dernier recours - parsing "sale" : détection systématique des .gif, etc.
+ // risque: générer de faux fichiers parazites
+ // fix: ne parse plus dans les commentaires
+ // ------------------------------------------------------------
+ if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut"
+ //int incomment_justquit=0;
+ if (!is_realspace(*adr)) {
+ int noparse=0;
+
+ // Gestion des /* */
+#if 0
+ if (inscript) {
+ if (parseall_incomment) {
+ if ((*adr=='/') && (*(adr-1)=='*'))
+ parseall_incomment=0;
+ incomment_justquit=1; // ne pas noter dernier caractère
+ } else {
+ if ((*adr=='/') && (*(adr+1)=='*'))
+ parseall_incomment=1;
+ }
+ } else
+ parseall_incomment=0;
+#endif
+ /* ensure automate state 0 (not in comments, quotes..) */
+ if (inscript && (
+ inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2
+ ) ) {
+ noparse=1;
+ }
+
+ /* vérifier que l'on est pas dans un <!-- --> pur */
+ if ( (!intag) && (incomment) && (!inscript))
+ noparse=1; /* commentaire */
+
+ // recherche d'URLs
+ if (!noparse) {
+ //if ((!parseall_incomment) && (!noparse)) {
+ if (!p) { // non déja trouvé
+ if (adr != r->adr) { // >1 caractère
+ // scanner les chaines
+ if ((*adr == '\"') || (*adr=='\'')) { // "xx.gif" 'xx.gif'
+ if (strchr("=(,",parseall_lastc)) { // exemple: a="img.gif.. (handles comments)
+ char *a=adr;
+ char stop=*adr; // " ou '
+ int count=0;
+
+ // sauter caractères
+ a++;
+ // copier
+ while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
+
+ // ok chaine terminée par " ou '
+ if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
+ char c;
+ char* aend;
+ //
+ aend=a; // sauver début
a++;
- // copier
- while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
-
- // ok chaine terminée par " ou '
- if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
- char c;
- char* aend;
+ while(is_taborspace(*a)) a++;
+ c=*a;
+ if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif";
+ // le / est pour funct("img.gif" /* URL */);
+ char tempo[HTS_URLMAXSIZE*2];
+ char type[256];
+ int url_ok=0; // url valide?
+ tempo[0]='\0'; type[0]='\0';
//
- aend=a; // sauver début
- a++;
- while(is_taborspace(*a)) a++;
- c=*a;
- if (strchr("),;>/+\r\n",c)) { // exemple: ..img.gif";
- // le / est pour funct("img.gif" /* URL */);
- char tempo[HTS_URLMAXSIZE*2];
- char type[256];
- int url_ok=0; // url valide?
- tempo[0]='\0'; type[0]='\0';
- //
- strncat(tempo,adr+1,count);
- //
- if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
- int invalid_url=0;
-
- // escape
- unescape_amp(tempo);
-
- // Couper au # ou ? éventuel
- {
- char* a=strchr(tempo,'#');
- if (a)
- *a='\0';
- a=strchr(tempo,'?');
- if (a)
- *a='\0';
- }
-
- // vérifier qu'il n'y a pas de caractères spéciaux
- if (!strnotempty(tempo))
- invalid_url=1;
- else if (strchr(tempo,'*')
- || strchr(tempo,'<')
- || strchr(tempo,'>'))
- invalid_url=1;
-
- /* non invalide? */
- if (!invalid_url) {
- // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
- if (c!='+') { // PAS de plus à la fin
- char* a;
- // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
- //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème
- if (
- (strfield(tempo,"http:"))
- || (strfield(tempo,"ftp:"))
+ strncatbuff(tempo,adr+1,count);
+ //
+ if ((!strchr(tempo,' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
+ int invalid_url=0;
+
+ // escape
+ unescape_amp(tempo);
+
+ // Couper au # ou ? éventuel
+ {
+ char* a=strchr(tempo,'#');
+ if (a)
+ *a='\0';
+ a=strchr(tempo,'?');
+ if (a)
+ *a='\0';
+ }
+
+ // vérifier qu'il n'y a pas de caractères spéciaux
+ if (!strnotempty(tempo))
+ invalid_url=1;
+ else if (strchr(tempo,'*')
+ || strchr(tempo,'<')
+ || strchr(tempo,'>')
+ || strchr(tempo,',') /* list of files ? */
+ || strchr(tempo,'\"') /* potential parsing bug */
+ || strchr(tempo,'\'') /* potential parsing bug */
+ )
+ invalid_url=1;
+ else if (tempo[0] == '.' && isalnum(tempo[1])) // ".gif"
+ invalid_url=1;
+
+ /* non invalide? */
+ if (!invalid_url) {
+ // Un plus à la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
+ if (c!='+') { // PAS de plus à la fin
+#if 0
+ char* a;
+#endif
+ // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
+ //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0)) // ok pas de problème
+ if (
+ (strfield(tempo,"http:"))
+ || (strfield(tempo,"ftp:"))
#if HTS_USEOPENSSL
- || (strfield(tempo,"https:"))
+ || (
+ SSL_is_available &&
+ (strfield(tempo,"https:"))
+ )
#endif
- ) // ok pas de problème
+ ) // ok pas de problème
+ url_ok=1;
+ else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok..
+ if (inscript) // sinon si pas javascript, méfiance (répertoire style base?)
url_ok=1;
- else if (tempo[strlen(tempo)-1]=='/') { // un slash: ok..
- if (inscript) // sinon si pas javascript, méfiance (répertoire style base?)
+ }
+#if 0
+ else if ((a=strchr(tempo,'/'))) { // un slash: ok..
+ if (inscript) { // sinon si pas javascript, méfiance (style "text/css")
+ if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css')
+ if (!strchr(tempo,' ')) // avoid spaces (too dangerous for comments)
url_ok=1;
- } else if ((a=strchr(tempo,'/'))) { // un slash: ok..
- if (inscript) { // sinon si pas javascript, méfiance (style "text/css")
- if (strchr(a+1,'/')) // un seul / : abandon (STYLE type='text/css')
- url_ok=1;
- }
}
}
- // Prendre si extension reconnue
- if (!url_ok) {
- get_httptype(type,tempo,0);
- if (strnotempty(type)) // type reconnu!
- url_ok=1;
- else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp..
- url_ok=1;
- // MAIS pas les foobar@aol.com !!
- if (strchr(tempo,'@'))
- url_ok=0;
- }
- //
- // Ok, cela pourrait être une URL
- if (url_ok) {
-
- // Check if not fodbidden tag (id,name..)
- if (intag_start_valid) {
- if (intag_start)
- if (intag_startattr)
- if (intag)
- if (!inscript)
- if (!incomment) {
- int i=0,nop=0;
- while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
- nop=rech_tageq(intag_startattr,hts_nodetect[i]);
- i++;
- }
- // Forbidden tag
- if (nop) {
- url_ok=0;
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
- }
+#endif
+ }
+ // Prendre si extension reconnue
+ if (!url_ok) {
+ get_httptype(type,tempo,0);
+ if (strnotempty(type)) // type reconnu!
+ url_ok=1;
+ else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp..
+ url_ok=1;
+ // MAIS pas les foobar@aol.com !!
+ if (strchr(tempo,'@'))
+ url_ok=0;
+ }
+ //
+ // Ok, cela pourrait être une URL
+ if (url_ok) {
+
+ // Check if not fodbidden tag (id,name..)
+ if (intag_start_valid) {
+ if (intag_start)
+ if (intag_startattr)
+ if (intag)
+ if (!inscript)
+ if (!incomment) {
+ int i=0,nop=0;
+ while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
+ nop=rech_tageq(intag_startattr,hts_nodetect[i]);
+ i++;
+ }
+ // Forbidden tag
+ if (nop) {
+ url_ok=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
}
}
- }
-
-
- // Accepter URL, on la traitera comme une URL normale!!
- if (url_ok)
- p=1;
-
+ }
+ }
+
+
+ // Accepter URL, on la traitera comme une URL normale!!
+ if (url_ok) {
+ valid_p = 1;
+ p = 0;
}
+
}
}
}
+ }
}
}
}
}
- } // p == 0
-
- // plus dans un commentaire
- if (!incomment_justquit)
- parseall_lastc=*adr; // caractère avant le prochain
+ } // p == 0
} // not in comment
+ // plus dans un commentaire
+ if ( inscript_state_pos == INSCRIPT_START
+ && inscript_state_pos_prev == INSCRIPT_START) {
+ parseall_lastc=*adr; // caractère avant le prochain
+ }
+
+
} // if realspace
} // if parseall
@@ -992,14 +1455,16 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
char* quote_adr=NULL; /* adresse du ? dans l'adresse */
int ok=1;
char quote='\0';
+ int quoteinscript=0;
+ int noquote=0;
// si nofollow ou un stop a été déclenché, réécrire tous les liens en externe
- if ((nofollow) || (opt.state.stop))
+ if ((nofollow) || (opt->state.stop))
p_nocatch=1;
-
+
// écrire codebase avant, flusher avant code
if ((p_type==-1) || (p_type==-2)) {
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
HT_ADD_ADR; // refresh
}
lastsaved=adr; // dernier écrit+1
@@ -1007,20 +1472,31 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// sauter espaces
adr+=p;
- while((is_space(*adr)) && (quote=='\0')) {
+ while( ( is_space(*adr) || (
+ inscriptgen
+ && adr[0] == '\\'
+ && is_space(adr[1])
+ )
+ )
+ && quote == '\0'
+ ) {
if (!quote)
- if ((*adr=='\"') || (*adr=='\''))
+ if ((*adr=='\"') || (*adr=='\'')) {
quote=*adr; // on doit attendre cela à la fin
- // puis quitter
+ if (inscriptgen && *(adr - 1) == '\\') {
+ quoteinscript=1; /* will wait for \" */
+ }
+ }
+ // puis quitter
adr++; // sauter les espaces, "" et cie
}
-
+
/* Stop at \n (LF) if primary links*/
if (ptr == 0)
quote='\n';
/* s'arrêter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
else if (inscript)
- quote='\0';
+ noquote=1;
// sauter éventuel \" ou \' javascript
if (inscript) { // on est dans un obj.write("..
@@ -1035,7 +1511,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (p_searchMETAURL) {
int l=0;
while(
- (adr + l + 4 < r.adr + r.size)
+ (adr + l + 4 < r->adr + r->size)
&& (!strfield(adr+l,"URL="))
&& (l<128) ) l++;
if (!strfield(adr+l,"URL="))
@@ -1043,15 +1519,19 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
else
adr+=(l+4);
}
-
+
/* éviter les javascript:document.location=.. : les parser, plutôt */
if (ok!=-1) {
- if (strfield(adr,"javascript:")) {
+ if (strfield(adr,"javascript:")
+ && ! inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */
+ ) {
ok=-1;
/*
On est désormais dans du code javascript
*/
+ inscript_name="";
inscript_tag=inscript=1;
+ inscript_state_pos=INSCRIPT_START;
inscript_tag_lastc=quote; /* à attendre à la fin */
}
}
@@ -1065,7 +1545,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// ne pas flusher après code si on doit écrire le codebase avant!
if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
HT_ADD_ADR; // refresh
}
lastsaved=adr; // dernier écrit+1
@@ -1086,7 +1566,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (ok > 0) {
//if (*eadr!=' ') {
if (is_space(*eadr)) { // guillemets,CR, etc
- if ((!quote) || (*eadr==quote)) // si pas d'attente de quote spéciale ou si quote atteinte
+ if (
+ ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') ) // end quote
+ || ( noquote && (*eadr == '\"' || *eadr == '\'') ) // end at any quote
+ || (!noquote && quote == '\0' && is_realspace(*eadr) ) // unquoted href
+ ) // si pas d'attente de quote spéciale ou si quote atteinte
ok=0;
} else if (ending_p && (*eadr==ending_p))
ok=0;
@@ -1114,7 +1598,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
//}
}
eadr++;
- } while(ok==1);
+ } while(ok==1);
// Empty link detected
if ( (((int) (eadr - adr))) <= 1) { // link empty
@@ -1122,13 +1606,18 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (*adr != '#') { // Not empty+unique #
if ( (((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
if (quote) {
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
HT_ADD("#"); // We add this for a <href="">
}
}
}
}
}
+
+ // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag
+ if (strfield(adr, "(Empty Reference!)")) {
+ ok=-1; // No
+ }
}
@@ -1145,7 +1634,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
//printf("link: %s\n",lien);
// supprimer les espaces
while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
-
+
#if HTS_STRIP_DOUBLE_SLASH
// supprimer les // en / (sauf pour http://)
@@ -1169,14 +1658,14 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
} else {
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
- strncat(tempo,a,(int) p - (int) a);
- strcat (tempo,p+1);
- strcpy(a,tempo); // recopier
+ strncatbuff(tempo,a,(int) p - (int) a);
+ strcatbuff (tempo,p+1);
+ strcpybuff(a,tempo); // recopier
}
}
}
#endif
-
+
} else
lien[0]='\0'; // erreur
@@ -1194,57 +1683,61 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// 0: autorisé
// 1: interdit (patcher tout de même adresse)
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link detected in html: %s"LF,lien); test_flush;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush;
}
-
+
// external check
#if HTS_ANALYSTE
if (!hts_htmlcheck_linkdetected(lien)) {
error=1; // erreur
- if (opt.errlog) {
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s refused by external wrapper"LF,lien);
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien);
test_flush;
}
}
#endif
// purger espaces de début et fin, CR,LF résiduels
- // (IMG SRC="foo.<\n>gif")
+ // (IMG SRC="foo.<\n><\t>gif<\t>")
{
- char* a;
- while (is_realspace(lien[0])) {
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strcpy(tempo,lien+1);
- strcpy(lien,tempo);
- }
- while(strnotempty(lien)
- && (is_realspace(lien[max(0,(int)(strlen(lien))-1)])) ) {
- lien[strlen(lien)-1]='\0';
+ char* a = lien;
+ int llen;
+
+ // strip ending spaces
+ llen = ( *a != '\0' ) ? strlen(a) : 0;
+ while(llen > 0 && is_realspace(lien[llen - 1]) ) {
+ a[--llen]='\0';
}
- while ((a=strchr(lien,'\n'))) {
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strncat(tempo,lien,(int) (a - lien));
- strcat(tempo,a+1);
- strcpy(lien,tempo);
- }
- while ((a=strchr(lien,'\r'))) {
- char tempo[HTS_URLMAXSIZE*2];
- tempo[0]='\0';
- strncat(tempo,lien,(int) (a - lien));
- strcat(tempo,a+1);
- strcpy(lien,tempo);
+ // skip leading ones
+ while(is_realspace(*a)) a++;
+ // strip cr, lf, tab inside URL
+ llen = 0;
+ while(*a) {
+ if (*a != '\n' && *a != '\r' && *a != '\t') {
+ lien[llen++] = *a;
+ }
+ a++;
}
+ lien[llen] = '\0';
}
+
+ // commas are forbidden
+ if (archivetag_p) {
+ if (strchr(lien, ',')) {
+ error=1; // erreur
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush;
+ }
+ }
+ }
/* Unescape/escape %20 and other &nbsp; */
{
char query[HTS_URLMAXSIZE*2];
char* a=strchr(lien,'?');
if (a) {
- strcpy(query,a);
+ strcpybuff(query,a);
*a='\0';
} else
query[0]='\0';
@@ -1252,10 +1745,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
unescape_amp(lien);
unescape_amp(query);
// décoder l'inutile (%2E par exemple) et coder espaces
- // XXXXXXXXXXXXXXXXX strcpy(lien,unescape_http(lien));
- strcpy(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
+ // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien));
+ strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
+ escape_remove_control(lien);
escape_spc_url(lien);
- strcat(lien,query); /* restore */
+ strcatbuff(lien,query); /* restore */
}
// convertir les éventuels \ en des / pour éviter des problèmes de reconnaissance!
@@ -1267,11 +1761,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// supprimer le(s) ./
while ((lien[0]=='.') && (lien[1]=='/')) {
char tempo[HTS_URLMAXSIZE*2];
- strcpy(tempo,lien+2);
- strcpy(lien,tempo);
+ strcpybuff(tempo,lien+2);
+ strcpybuff(lien,tempo);
}
if (strnotempty(lien)==0) // sauf si plus de nom de fichier
- strcpy(lien,"./");
+ strcpybuff(lien,"./");
// vérifie les /~machin -> /~machin/
// supposition dangereuse?
@@ -1282,7 +1776,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// éviter aussi index~1.html
while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
if (*a=='~') {
- strcat(lien,"/"); // ajouter slash
+ strcatbuff(lien,"/"); // ajouter slash
}
}
#endif
@@ -1305,7 +1799,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
} while((b != a) && (b));
}
}
-
+
// éliminer les éventuels :80 (port par défaut!)
if (link_has_authority(lien)) {
char * a;
@@ -1329,9 +1823,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (port==defport) { // port 80, default - c'est débile
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
- strncat(tempo,lien,(int) (a - lien));
- strcat(tempo,a+3); // sauter :80
- strcpy(lien,tempo);
+ strncatbuff(tempo,lien,(int) (a - lien));
+ strcatbuff(tempo,a+3); // sauter :80
+ strcpybuff(lien,tempo);
}
}
}
@@ -1339,9 +1833,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// filtrer les parazites (mailto & cie)
/*
if (strfield(lien,"mailto:")) { // ne pas traiter
- error=1;
+ error=1;
} else if (strfield(lien,"news:")) { // ne pas traiter
- error=1;
+ error=1;
}
*/
@@ -1351,16 +1845,16 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
char *a = lien+strlen(lien)-1;
while(( a > lien) && (*a!='/') && (*a!='.')) a--;
if (*a != '.')
- strcat(lien,".class"); // ajouter .class
+ strcatbuff(lien,".class"); // ajouter .class
else if (!strfield2(a,".class"))
- strcat(lien,".class"); // idem
+ strcatbuff(lien,".class"); // idem
}
}
// si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/)
if (!error) {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"position link check %s"LF,lien); test_flush;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush;
}
if ((p_type==2) || (p_type==-2)) { // code ou codebase
@@ -1368,14 +1862,24 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (p_type==-2) { // codebase
if (strnotempty(lien)) {
if (fil[strlen(lien)-1]!='/') { // pas répertoire
- strcat(lien,"/");
+ strcatbuff(lien,"/");
}
}
}
+
+ /* base has always authority */
+ if (p_type==2 && !link_has_authority(lien)) {
+ char tmp[HTS_URLMAXSIZE*2];
+ strcpybuff(tmp, "http://");
+ strcatbuff(tmp, lien);
+ strcpybuff(lien, tmp);
+ }
+
/* only one ending / (bug on some pages) */
if ((int)strlen(lien)>2) {
- while( (lien[strlen(lien)-2]=='/') && ((int)strlen(lien)>2) ) /* double // (bug) */
- lien[strlen(lien)-1]='\0';
+ int len = (int) strlen(lien);
+ while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' ) /* double // (bug) */
+ lien[--len]='\0';
}
// copier nom host si besoin est
if (!link_has_authority(lien)) { // pas de http://
@@ -1383,11 +1887,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {
error=1;
} else {
- strcpy(lien,"http://");
- strcat(lien,adr2);
+ strcpybuff(lien,"http://");
+ strcatbuff(lien,adr2);
if (*fil2!='/')
- strcat(lien,"/");
- strcat(lien,fil2);
+ strcatbuff(lien,"/");
+ strcatbuff(lien,fil2);
{
char* a;
a=lien+strlen(lien)-1;
@@ -1397,12 +1901,12 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
}
}
//char tempo[HTS_URLMAXSIZE*2];
- //strcpy(tempo,"http://");
- //strcat(tempo,urladr); // host
+ //strcpybuff(tempo,"http://");
+ //strcatbuff(tempo,urladr); // host
//if (*lien!='/')
- // strcat(tempo,"/");
- //strcat(tempo,lien);
- //strcpy(lien,tempo);
+ // strcatbuff(tempo,"/");
+ //strcatbuff(tempo,lien);
+ //strcpybuff(lien,tempo);
}
}
@@ -1421,19 +1925,19 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// stocker base ou codebase?
switch(p_type) {
case 2: {
- //if (*lien!='/') strcat(base,"/");
- strcpy(base,lien);
+ //if (*lien!='/') strcatbuff(base,"/");
+ strcpybuff(base,lien);
}
break; // base
case -2: {
- //if (*lien!='/') strcat(codebase,"/");
- strcpy(codebase,lien);
+ //if (*lien!='/') strcatbuff(codebase,"/");
+ strcpybuff(codebase,lien);
}
break; // base
}
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"code/codebase link %s base %s"LF,lien,base); test_flush;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush;
}
//printf("base code: %s - %s\n",lien,base);
}
@@ -1449,438 +1953,463 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// ajouter chemin de base href..
if (strnotempty(_base)) { // considérer base
if (!link_has_authority(lien)) { // non absolue
- //if (*lien!='/') { // non absolu sur le site (/)
- if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
- // mailto: and co: do NOT add base
- if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
- char tempo[HTS_URLMAXSIZE*2];
- // base est absolue
- strcpy(tempo,_base);
- strcat(tempo,lien + ((*lien=='/')?1:0) );
- strcpy(lien,tempo); // patcher en considérant base
- // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..)
-
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link modified with code/codebase %s"LF,lien); test_flush;
+ if (*lien!='/') { // non absolu sur le site (/)
+ if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
+ // mailto: and co: do NOT add base
+ if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
+ char tempo[HTS_URLMAXSIZE*2];
+ // base est absolue
+ strcpybuff(tempo,_base);
+ strcatbuff(tempo,lien + ((*lien=='/')?1:0) );
+ strcpybuff(lien,tempo); // patcher en considérant base
+ // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..)
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
+ }
+ }
+ } else {
+ error=1; // erreur
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
+ test_flush;
}
}
} else {
- error=1; // erreur
- if (opt.errlog) {
- fspc(opt.errlog,"error"); fprintf(opt.errlog,"Link %s too long with base href"LF,lien);
- test_flush;
+ char badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2];
+ if (ident_url_absolute(_base, badr, bfil) >=0 ) {
+ if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
+ char tempo[HTS_URLMAXSIZE*2];
+ // base est absolue
+ tempo[0] = '\0';
+ if (!link_has_authority(badr)) {
+ strcatbuff(tempo, "http://");
+ }
+ strcatbuff(tempo,badr);
+ strcatbuff(tempo,lien);
+ strcpybuff(lien,tempo); // patcher en considérant base
+
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
+ }
+ } else {
+ error=1; // erreur
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
+ test_flush;
+ }
+ }
}
}
- //}
}
}
}
+ }
+
+
+ // transformer lien quelconque (http, relatif, etc) en une adresse
+ // et un chemin+fichier (adr,fil)
+ if (!error) {
+ int reponse;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
}
-
-
- // transformer lien quelconque (http, relatif, etc) en une adresse
- // et un chemin+fichier (adr,fil)
- if (!error) {
- int reponse;
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"build relative link %s with %s%s"LF,lien,urladr,urlfil); test_flush;
- }
- if ((reponse=ident_url_relatif(lien,urladr,urlfil,adr,fil))<0) {
- adr[0]='\0'; // erreur
- if (reponse==-2) {
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s not caught (unknown ftp:// protocol)"LF,lien);
- test_flush;
- }
- } else {
- if ((opt.debug>1) && (opt.errlog!=NULL)) {
- fspc(opt.errlog,"debug"); fprintf(opt.errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,urladr,urlfil); test_flush;
- }
+ if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) {
+ adr[0]='\0'; // erreur
+ if (reponse==-2) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien);
+ test_flush;
+ }
+ } else {
+ if ((opt->debug>1) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
}
}
} else {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link %s not build, error detected before"LF,lien); test_flush;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush;
}
- adr[0]='\0';
}
-
+ } else {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush;
+ }
+ adr[0]='\0';
+ }
+
#if HTS_CHECK_STRANGEDIR
- // !ATTENTION!
- // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin)
- // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire
- // et un fichier en http A PRIORI : je fais donc un test
- // En cas de moved xxx, on recalcule adr et fil, tout simplement
- // DEFAUT: test effectué plusieurs fois! à revoir!!!
- if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
- //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
- if (fil[strlen(fil)-1]!='/') { // pas répertoire
- if (ishtml(fil)==-2) { // pas d'extension
- char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position
- loc[0]='\0';
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link-check-directory: %s%s"LF,adr,fil);
- test_flush;
- }
-
- // tester éventuelle nouvelle position
- switch (http_location(adr,fil,loc).statuscode) {
- case 200: // ok au final
- if (strnotempty(loc)) { // a changé d'adresse
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
+ // !ATTENTION!
+ // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash à la fin)
+ // je n'ai pas encore trouvé le moyen de faire la différence entre un répertoire
+ // et un fichier en http A PRIORI : je fais donc un test
+ // En cas de moved xxx, on recalcule adr et fil, tout simplement
+ // DEFAUT: test effectué plusieurs fois! à revoir!!!
+ if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
+ //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
+ if (fil[strlen(fil)-1]!='/') { // pas répertoire
+ if (ishtml(fil)==-2) { // pas d'extension
+ char loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position
+ loc[0]='\0';
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil);
+ test_flush;
+ }
+
+ // tester éventuelle nouvelle position
+ switch (http_location(adr,fil,loc).statuscode) {
+ case 200: // ok au final
+ if (strnotempty(loc)) { // a changé d'adresse
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
+ test_flush;
+ }
+
+ // recalculer adr et fil!
+ if (ident_url_absolute(loc,adr,fil)==-1) {
+ adr[0]='\0'; // cancel
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil);
test_flush;
}
-
- // recalculer adr et fil!
- if (ident_url_absolute(loc,adr,fil)==-1) {
- adr[0]='\0'; // cancel
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link-check-dir: %s%s"LF,adr,fil);
- test_flush;
- }
- }
-
- }
- break;
- case -2: case -3: // timeout ou erreur grave
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
- test_flush;
}
- break;
+ }
+ break;
+ case -2: case -3: // timeout ou erreur grave
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
+ test_flush;
}
+ break;
}
- }
- }
-#endif
-
- // Le lien doit juste être réécrit, mais ne doit pas générer un lien
- // exemple: <FORM ACTION="url_cgi">
- if (p_nocatch) {
- forbidden_url=1; // interdire récupération du lien
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link forced external at %s%s"LF,adr,fil);
- test_flush;
+
}
+ }
+ }
+#endif
+
+ // Le lien doit juste être réécrit, mais ne doit pas générer un lien
+ // exemple: <FORM ACTION="url_cgi">
+ if (p_nocatch) {
+ forbidden_url=1; // interdire récupération du lien
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil);
+ test_flush;
}
-
- // Tester si un lien doit être accepté ou refusé (wizard)
- // forbidden_url=1 : lien refusé
- // forbidden_url=0 : lien accepté
- //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
- if ((p_type!=2) && (p_type!=-2)) { // tester autorisations?
- if (!p_nocatch) {
- if (adr[0]!='\0') {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"wizard link test at %s%s.."LF,adr,fil);
- test_flush;
- }
- forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens,
- adr,fil,
- &filters,&filptr,opt.maxfilter,
- &robots,
- &set_prio_to,
- &just_test_it);
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard link test: %d"LF,forbidden_url);
- test_flush;
- }
+ }
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if ((p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
+ adr,fil,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url);
+ test_flush;
}
}
}
+ }
+
+ // calculer meme_adresse
+ meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
+
+
+
+ // Début partie sauvegarde
+
+ // ici on forme le nom du fichier à sauver, et on patche l'URL
+ if (adr[0]!='\0') {
+ // savename: simplifier les ../ et autres joyeusetés
+ char save[HTS_URLMAXSIZE*2];
+ int r_sv=0;
+ // En cas de moved, adresse première
+ char former_adr[HTS_URLMAXSIZE*2];
+ char former_fil[HTS_URLMAXSIZE*2];
+ //
+ save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
+ //
- // calculer meme_adresse
- meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
-
-
-
- // Début partie sauvegarde
-
- // ici on forme le nom du fichier à sauver, et on patche l'URL
- if (adr[0]!='\0') {
- // savename: simplifier les ../ et autres joyeusetés
- char save[HTS_URLMAXSIZE*2];
- int r_sv=0;
- // En cas de moved, adresse première
- char former_adr[HTS_URLMAXSIZE*2];
- char former_fil[HTS_URLMAXSIZE*2];
- //
- save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
- //
-
- // nom du chemin à sauver si on doit le calculer
- // note: url_savename peut décider de tester le lien si il le trouve
- // suspect, et modifier alors adr et fil
- // dans ce cas on aura une référence directe au lieu des traditionnels
- // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers
- // gif sont impliqués par exemple)
- if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase
- if (forbidden_url!=1) {
- char last_adr[HTS_URLMAXSIZE*2];
- last_adr[0]='\0';
- //char last_fil[HTS_URLMAXSIZE*2]="";
- strcpy(last_adr,adr); // ancienne adresse
- //strcpy(last_fil,fil); // ancien chemin
- r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,&opt,liens,lien_tot,back,back_max,&cache,&hash,ptr,numero_passe);
- if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
-
- // 2e test si moved
-
- // Tester si un lien doit être accepté ou refusé (wizard)
- // forbidden_url=1 : lien refusé
- // forbidden_url=0 : lien accepté
- if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
- if (!p_nocatch) {
- if (adr[0]!='\0') {
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"wizard moved link retest at %s%s.."LF,adr,fil);
- test_flush;
- }
- forbidden_url=hts_acceptlink(&opt,ptr,lien_tot,liens,
- adr,fil,
- &filters,&filptr,opt.maxfilter,
- &robots,
- &set_prio_to,
- &just_test_it);
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"result for wizard moved link retest: %d"LF,forbidden_url);
- test_flush;
- }
+ // nom du chemin à sauver si on doit le calculer
+ // note: url_savename peut décider de tester le lien si il le trouve
+ // suspect, et modifier alors adr et fil
+ // dans ce cas on aura une référence directe au lieu des traditionnels
+ // moved en cascade (impossible à reproduire à priori en local, lorsque des fichiers
+ // gif sont impliqués par exemple)
+ if ((p_type!=2) && (p_type!=-2)) { // pas base href ou codebase
+ if (forbidden_url!=1) {
+ char last_adr[HTS_URLMAXSIZE*2];
+ last_adr[0]='\0';
+ //char last_fil[HTS_URLMAXSIZE*2]="";
+ strcpybuff(last_adr,adr); // ancienne adresse
+ //strcpybuff(last_fil,fil); // ancien chemin
+ r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
+ if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) { // a changé
+
+ // 2e test si moved
+
+ // Tester si un lien doit être accepté ou refusé (wizard)
+ // forbidden_url=1 : lien refusé
+ // forbidden_url=0 : lien accepté
+ if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
+ if (!p_nocatch) {
+ if (adr[0]!='\0') {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil);
+ test_flush;
+ }
+ forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
+ adr,fil,
+ &set_prio_to,
+ &just_test_it);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url);
+ test_flush;
}
}
}
-
- //import_done=1; // c'est un import!
- meme_adresse=0; // on a changé
}
- } else {
- strcpy(save,""); // dummy
+
+ //import_done=1; // c'est un import!
+ meme_adresse=0; // on a changé
}
+ } else {
+ strcpybuff(save,""); // dummy
}
- if (r_sv!=-1) { // pas d'erreur, on continue
- /* log */
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug");
- if (forbidden_url!=1) { // le lien va être chargé
- if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien
- fprintf(opt.log,"Code/Codebase: %s%s"LF,adr,fil);
- } else if ((opt.getmode & 4)==0) {
- fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save);
- } else {
- if (!ishtml(fil))
- fprintf(opt.log,"Record after: %s%s -> %s"LF,adr,fil,save);
- else
- fprintf(opt.log,"Record: %s%s -> %s"LF,adr,fil,save);
- }
- } else
- fprintf(opt.log,"External: %s%s"LF,adr,fil);
- test_flush;
- }
- /* FIN log */
-
- // écrire lien
- if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter
- lastsaved=eadr-1+1; // sauter "
- }
- /* */
- else if (opt.urlmode==0) { // URL absolue dans tous les cas
- if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
- if (!link_has_authority(adr)) {
- HT_ADD("http://");
- } else {
- char* aut = strstr(adr, "//");
- if (aut) {
- char tmp[256];
- tmp[0]='\0';
- strncat(tmp, adr, (int) (aut - adr)); // scheme
- HT_ADD(tmp); // Protocol
- HT_ADD("//");
- }
- }
-
- if (!opt.passprivacy) {
- HT_ADD(jump_protocol(adr)); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (r_sv!=-1) { // pas d'erreur, on continue
+ /* log */
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug");
+ if (forbidden_url!=1) { // le lien va être chargé
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien
+ fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil);
+ } else if ((opt->getmode & 4)==0) {
+ fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
+ } else {
+ if (!ishtml(fil))
+ fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save);
+ else
+ fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
+ }
+ } else
+ fprintf(opt->log,"External: %s%s"LF,adr,fil);
+ test_flush;
+ }
+ /* FIN log */
+
+ // écrire lien
+ if ((p_type==2) || (p_type==-2)) { // base href ou codebase, sauter
+ lastsaved=eadr-1+1; // sauter "
+ }
+ /* */
+ else if (opt->urlmode==0) { // URL absolue dans tous les cas
+ if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
}
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
}
- lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+
+ if (!opt->passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
/* */
- } else if (opt.urlmode >= 4) { // ne rien faire dans tous les cas!
+ } else if (opt->urlmode >= 4) { // ne rien faire dans tous les cas!
/* */
/* leave the link 'as is' */
/* Sinon, dépend de interne/externe */
- } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe!
- if ((opt.getmode & 1) && (ptr>0)) {
- if (p_type!=-1) { // pas que le nom de fichier (pas classe java)
- if (!opt.external) {
- if (!link_has_authority(adr)) {
- HT_ADD("http://");
- if (!opt.passprivacy) {
- HT_ADD(adr); // Password
+ } else if (forbidden_url==1) { // le lien ne sera pas chargé, référence externe!
+ if ((opt->getmode & 1) && (ptr>0)) {
+ if (p_type!=-1) { // pas que le nom de fichier (pas classe java)
+ if (!opt->external) {
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ if (!opt->passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ HT_ADD(tmp); // Protocol
+ HT_ADD("//");
+ if (!opt->passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
} else {
HT_ADD(jump_identification(adr)); // No Password
}
if (*fil!='/')
HT_ADD("/");
HT_ADD(fil);
- } else {
- char* aut = strstr(adr, "//");
- if (aut) {
- char tmp[256];
- tmp[0]='\0';
- strncat(tmp, adr, (int) (aut - adr)); // scheme
- HT_ADD(tmp); // Protocol
- HT_ADD("//");
- if (!opt.passprivacy) {
- HT_ADD(jump_protocol(adr)); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
- }
}
- //
- } else { // fichier/page externe, mais on veut générer une erreur
- //
- int patch_it=0;
- int add_url=0;
- char* cat_name=NULL;
- char* cat_data=NULL;
- int cat_nb=0;
- int cat_data_len=0;
-
- // ajouter lien external
- switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) {
- case 1: case -2: // html ou répertoire
- if (opt.getmode & 1) { // sauver html
- patch_it=1; // redirect
- add_url=1; // avec link?
- cat_name="external.html";
- cat_nb=0;
- cat_data=HTS_DATA_UNKNOWN_HTML;
- cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
- }
- break;
- default: // inconnu
- // asp, cgi..
- if (is_dyntype(get_ext(fil))) {
- patch_it=1; // redirect
- add_url=1; // avec link?
- cat_name="external.html";
- cat_nb=0;
- cat_data=HTS_DATA_UNKNOWN_HTML;
- cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
- } else if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif"))
- || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg"))
- || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm"))
- || (ishtml(fil)!=0) ) {
- patch_it=1; // redirect
- add_url=1; // avec link aussi
- cat_name="external.gif";
- cat_nb=1;
- cat_data=HTS_DATA_UNKNOWN_GIF;
- cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
- }
- break;
- }// html,gif
-
- if (patch_it) {
- char save[HTS_URLMAXSIZE*2];
- char tempo[HTS_URLMAXSIZE*2];
- strcpy(save,opt.path_html);
- strcat(save,cat_name);
- if (lienrelatif(tempo,save,savename)==0) {
- if (!no_esc_utf)
- escape_uri(tempo); // escape with %xx
- else
- escape_uri_utf(tempo); // escape with %xx
- HT_ADD(tempo); // page externe
- if (add_url) {
- HT_ADD("?link="); // page externe
-
- // same as above
- if (!link_has_authority(adr)) {
- HT_ADD("http://");
- if (!opt.passprivacy) {
- HT_ADD(adr); // Password
+ }
+ //
+ } else { // fichier/page externe, mais on veut générer une erreur
+ //
+ int patch_it=0;
+ int add_url=0;
+ char* cat_name=NULL;
+ char* cat_data=NULL;
+ int cat_nb=0;
+ int cat_data_len=0;
+
+ // ajouter lien external
+ switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) {
+ case 1: case -2: // html ou répertoire
+ if (opt->getmode & 1) { // sauver html
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ }
+ break;
+ default: // inconnu
+ // asp, cgi..
+ if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif"))
+ || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg"))
+ || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm"))
+ /*|| (ishtml(fil)!=0)*/ ) {
+ patch_it=1; // redirect
+ add_url=1; // avec link aussi
+ cat_name="external.gif";
+ cat_nb=1;
+ cat_data=HTS_DATA_UNKNOWN_GIF;
+ cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
+ } else /* if (is_dyntype(get_ext(fil))) */ {
+ patch_it=1; // redirect
+ add_url=1; // avec link?
+ cat_name="external.html";
+ cat_nb=0;
+ cat_data=HTS_DATA_UNKNOWN_HTML;
+ cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
+ }
+ break;
+ }// html,gif
+
+ if (patch_it) {
+ char save[HTS_URLMAXSIZE*2];
+ char tempo[HTS_URLMAXSIZE*2];
+ strcpybuff(save,opt->path_html);
+ strcatbuff(save,cat_name);
+ if (lienrelatif(tempo,save, relativesavename)==0) {
+ if (!no_esc_utf)
+ escape_uri(tempo); // escape with %xx
+ else
+ escape_uri_utf(tempo); // escape with %xx
+ HT_ADD(tempo); // page externe
+ if (add_url) {
+ HT_ADD("?link="); // page externe
+
+ // same as above
+ if (!link_has_authority(adr)) {
+ HT_ADD("http://");
+ if (!opt->passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ } else {
+ char* aut = strstr(adr, "//");
+ if (aut) {
+ char tmp[256];
+ tmp[0]='\0';
+ strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme
+ HT_ADD(tmp);
+ if (!opt->passprivacy) {
+ HT_ADD(jump_protocol(adr)); // Password
} else {
HT_ADD(jump_identification(adr)); // No Password
}
if (*fil!='/')
HT_ADD("/");
HT_ADD(fil);
- } else {
- char* aut = strstr(adr, "//");
- if (aut) {
- char tmp[256];
- tmp[0]='\0';
- strncat(tmp, adr, (int) (aut - adr) + 2); // scheme
- HT_ADD(tmp);
- if (!opt.passprivacy) {
- HT_ADD(jump_protocol(adr)); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
- }
}
- //
-
}
+ //
+
}
-
- // écrire fichier?
- if (verif_external(cat_nb,1)) {
- //if (!fexist(fconcat(opt.path_html,cat_name))) {
- FILE* fp = filecreate(fconcat(opt.path_html,cat_name));
- if (fp) {
- if (cat_data_len==0) { // texte
- verif_backblue(opt.path_html);
- fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
- } else { // data
- fwrite(cat_data,cat_data_len,1,fp);
- }
- fclose(fp);
- usercommand(0,NULL,fconcat(opt.path_html,cat_name));
+ }
+
+ // écrire fichier?
+ if (verif_external(cat_nb,1)) {
+ //if (!fexist(fconcat(opt->path_html,cat_name))) {
+ FILE* fp = filecreate(fconcat(opt->path_html,cat_name));
+ if (fp) {
+ if (cat_data_len==0) { // texte
+ verif_backblue(opt,opt->path_html);
+ fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
+ } else { // data
+ fwrite(cat_data,cat_data_len,1,fp);
}
+ fclose(fp);
+ usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"","");
}
- } else { // écrire normalement le nom de fichier
- HT_ADD("http://");
- if (!opt.passprivacy) {
- HT_ADD(adr); // Password
- } else {
- HT_ADD(jump_identification(adr)); // No Password
- }
- if (*fil!='/')
- HT_ADD("/");
- HT_ADD(fil);
- }// patcher?
+ }
+ } else { // écrire normalement le nom de fichier
+ HT_ADD("http://");
+ if (!opt->passprivacy) {
+ HT_ADD(adr); // Password
+ } else {
+ HT_ADD(jump_identification(adr)); // No Password
+ }
+ if (*fil!='/')
+ HT_ADD("/");
+ HT_ADD(fil);
+ }// patcher?
} // external
} else { // que le nom de fichier (classe java)
// en gros recopie de plus bas: copier codebase et base
if (p_flush) {
char tempo[HTS_URLMAXSIZE*2]; // <-- ajouté
char tempo_pat[HTS_URLMAXSIZE*2];
-
+
// Calculer chemin
tempo_pat[0]='\0';
- strcpy(tempo,fil); // <-- ajouté
+ strcpybuff(tempo,fil); // <-- ajouté
{
char* a=strrchr(tempo,'/');
-
+
// Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
// we have to do the contrary now
if (add_class_dots_to_patch>0) {
@@ -1891,30 +2420,30 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
}
// if add_class_dots_to_patch, this is because there is a problem!!
if (add_class_dots_to_patch) {
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
test_flush;
}
}
}
-
+
// Cut path/filename
if (a) {
char tempo2[HTS_URLMAXSIZE*2];
- strcpy(tempo2,a+1); // FICHIER
- strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
- strcpy(tempo,tempo2); // fichier
+ strcpybuff(tempo2,a+1); // FICHIER
+ strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
+ strcpybuff(tempo,tempo2); // fichier
}
}
// érire codebase="chemin"
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
char tempo4[HTS_URLMAXSIZE*2];
tempo4[0]='\0';
if (strnotempty(tempo_pat)) {
HT_ADD("codebase=\"http://");
- if (!opt.passprivacy) {
+ if (!opt->passprivacy) {
HT_ADD(adr); // Password
} else {
HT_ADD(jump_identification(adr)); // No Password
@@ -1924,7 +2453,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
HT_ADD("\" ");
}
- strncat(tempo4,lastsaved,(int) (p_flush - lastsaved));
+ strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
HT_ADD(tempo4); // refresh code="
HT_ADD(tempo);
}
@@ -1934,9 +2463,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
lastsaved=eadr-1;
}
/*
- else if (opt.urlmode==1) { // ABSOLU, c'est le cas le moins courant
+ else if (opt->urlmode==1) { // ABSOLU, c'est le cas le moins courant
// NE FONCTIONNE PAS!! (et est inutile)
- if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
+ if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
// écrire le lien modifié, absolu
HT_ADD("file:");
if (*save=='/')
@@ -1947,24 +2476,34 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
}
*/
- else if (opt.urlmode==3) { // URI absolue /
- if ((opt.getmode & 1) && (ptr>0)) { // ecrire les html
- HT_ADD(fil);
- }
- lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ else if (opt->mimehtml) {
+ char buff[HTS_URLMAXSIZE*3];
+ HT_ADD("cid:");
+ strcpybuff(buff, adr);
+ strcatbuff(buff, fil);
+ escape_in_url(buff);
+ { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
+ HT_ADD(buff);
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ else if (opt->urlmode==3) { // URI absolue /
+ if ((opt->getmode & 1) && (ptr>0)) { // ecrire les html
+ HT_ADD(fil);
}
- else if (opt.urlmode==2) { // RELATIF
+ lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
+ }
+ else if (opt->urlmode==2) { // RELATIF
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
// calculer le lien relatif
- if (lienrelatif(tempo,save,savename)==0) {
+ if (lienrelatif(tempo,save,relativesavename)==0) {
if (!no_esc_utf)
escape_uri(tempo); // escape with %xx
else
escape_uri_utf(tempo); // escape with %xx
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo);
test_flush;
}
@@ -1976,7 +2515,7 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
tempo_pat[0]='\0';
{
char* a=strrchr(tempo,'/');
-
+
// Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
// we have to do the contrary now
if (add_class_dots_to_patch>0) {
@@ -1987,23 +2526,23 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
}
// if add_class_dots_to_patch, this is because there is a problem!!
if (add_class_dots_to_patch) {
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
test_flush;
}
}
}
-
+
if (a) {
char tempo2[HTS_URLMAXSIZE*2];
- strcpy(tempo2,a+1);
- strncat(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
- strcpy(tempo,tempo2); // fichier
+ strcpybuff(tempo2,a+1);
+ strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1); // chemin
+ strcpybuff(tempo,tempo2); // fichier
}
}
// érire codebase="chemin"
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
char tempo4[HTS_URLMAXSIZE*2];
tempo4[0]='\0';
@@ -2013,20 +2552,20 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
HT_ADD("\" ");
}
- strncat(tempo4,lastsaved,(int) (p_flush - lastsaved));
+ strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
HT_ADD(tempo4); // refresh code="
}
}
//lastsaved=adr; // dernier écrit+1
}
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
// écrire le lien modifié, relatif
HT_ADD(tempo);
-
+
// Add query-string, for informational purpose only
// Useless, because all parameters-pages are saved into different targets
- if (opt.includequery) {
+ if (opt->includequery) {
char* a=strchr(lien,'?');
if (a) {
HT_ADD(a);
@@ -2035,8 +2574,8 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
}
lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
} else {
- if (opt.errlog) {
- fprintf(opt.errlog,"Error building relative link %s and %s"LF,save,savename);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Error building relative link %s and %s"LF,save,relativesavename);
test_flush;
}
}
@@ -2046,9 +2585,9 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
#if 0
if (fexist(save)) { // le fichier existe..
adr[0]='\0';
- //if ((opt.debug>0) && (opt.log!=NULL)) {
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link has already been written on disk, cancelled: %s"LF,save);
+ //if ((opt->debug>0) && (opt->log!=NULL)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link has already been written on disk, cancelled: %s"LF,save);
test_flush;
}
}
@@ -2057,30 +2596,30 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
/* Security check */
if (strlen(save) >= HTS_URLMAXSIZE) {
adr[0]='\0';
- if (opt.errlog) {
- fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Link is too long: %s"LF,save);
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link is too long: %s"LF,save);
test_flush;
}
}
-
- if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && ( (forbidden_url!=1) || (just_test_it))) { // si le fichier n'existe pas, ajouter à la liste
+
+ if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) { // si le fichier n'existe pas, ajouter à la liste
// n'y a-t-il pas trop de liens?
if (lien_tot+1 >= lien_max-4) { // trop de liens!
printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
- fprintf(opt.errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
+ if (opt->errlog) {
+ fprintf(opt->errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
+ fprintf(opt->errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
test_flush;
}
- if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
- return 0;
+ return -1;
} else { // noter le lien sur la listes des liens à charger
int pass_fix,dejafait=0;
// Calculer la priorité de ce lien
- if ((opt.getmode & 4)==0) { // traiter html après
+ if ((opt->getmode & 4)==0) { // traiter html après
pass_fix=0;
} else { // vérifier que ce n'est pas un !html
if (!ishtml(fil))
@@ -2092,11 +2631,11 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
/* If the file seems to be an html file, get depth-1 */
/*
if (strnotempty(save)) {
- if (ishtml(save) == 1) {
- // descore_prio = 2;
- } else {
- // descore_prio = 1;
- }
+ if (ishtml(save) == 1) {
+ // descore_prio = 2;
+ } else {
+ // descore_prio = 1;
+ }
}
*/
@@ -2107,8 +2646,17 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// On part de la fin et on essaye de se presser (économise temps machine)
#if HTS_HASH
{
- int i=hash_read(&hash,save,"",0); // lecture type 0 (sav)
+ int i=hash_read(hash,save,"",0,opt->urlhack); // lecture type 0 (sav)
if (i>=0) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ if (
+ strcmp(adr, liens[i]->adr) != 0
+ || strcmp(fil, liens[i]->fil) != 0
+ ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil);
+ test_flush;
+ }
+ }
liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
dejafait=1;
}
@@ -2143,23 +2691,23 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
if (!just_test_it) {
if (
(!strfield(adr,"ftp://")) // non ftp
- && (!strfield(adr,"file://")) ) { // non file
- if (opt.robots) { // récupérer robots
+ && (!strfield(adr,"file://")) ) { // non file
+ if (opt->robots) { // récupérer robots
if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés
- if (checkrobots(&robots,adr,"") != -1) { // robots.txt ?
- checkrobots_set(&robots,adr,""); // ajouter entrée vide
- if (checkrobots(&robots,adr,"") == -1) { // robots.txt ?
+ if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ?
+ checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide
+ if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ?
// enregistrer robots.txt (MACRO)
liens_record(adr,"/robots.txt","","","");
if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
test_flush;
}
- if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
- return 0;
+ return -1;
}
liens[lien_tot]->testmode=0; // pas mode test
liens[lien_tot]->link_import=0; // pas mode import
@@ -2172,13 +2720,13 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
#if DEBUG_ROBOTS
printf("robots.txt: added file robots.txt for %s\n",adr);
#endif
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"robots.txt added at %s"LF,adr);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr);
test_flush;
}
} else {
- if (opt.errlog) {
- fprintf(opt.errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
test_flush;
}
}
@@ -2193,13 +2741,13 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
liens_record(adr,fil,save,former_adr,former_fil);
if (liens[lien_tot]==NULL) { // erreur, pas de place réservée
printf("PANIC! : Not enough memory [%d]\n",__LINE__);
- if (opt.errlog) {
- fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
test_flush;
}
- if ((opt.getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
+ if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
XH_uninit; // désallocation mémoire & buffers
- return 0;
+ return -1;
}
// mode test?
@@ -2226,24 +2774,24 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1)); // PRIORITE NULLE (catch page)
// noter pass
liens[lien_tot]->pass2=pass_fix;
- liens[lien_tot]->retry=opt.retry;
+ liens[lien_tot]->retry=opt->retry;
- //strcpy(liens[lien_tot]->adr,adr);
- //strcpy(liens[lien_tot]->fil,fil);
- //strcpy(liens[lien_tot]->sav,save);
- if ((opt.debug>1) && (opt.log!=NULL)) {
+ //strcpybuff(liens[lien_tot]->adr,adr);
+ //strcpybuff(liens[lien_tot]->fil,fil);
+ //strcpybuff(liens[lien_tot]->sav,save);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
if (!just_test_it) {
- fspc(opt.log,"debug"); fprintf(opt.log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
+ fspc(opt->log,"debug"); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
} else {
- fspc(opt.log,"debug"); fprintf(opt.log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
+ fspc(opt->log,"debug"); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
}
test_flush;
}
lien_tot++; // UN LIEN DE PLUS
} else { // if !dejafait
- if ((opt.debug>1) && (opt.log!=NULL)) {
- fspc(opt.log,"debug"); fprintf(opt.log,"link has already been recorded, cancelled: %s"LF,save);
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save);
test_flush;
}
@@ -2263,15 +2811,20 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
} // if ok==0
adr=eadr-1; // ** sauter
-
+
+ /* We skipped bytes and skip the " : reset state */
+ if (inscript) {
+ inscript_state_pos = INSCRIPT_START;
+ }
+
} // if (p)
} // si '<' ou '>'
// plus loin
adr++;
-
-
+
+
/* Otimization: if we are scanning in HTML data (not in tag or script),
then jump to the next starting tag */
if (ptr>0) {
@@ -2282,18 +2835,23 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
)
{
/* Not at the end */
- if (( ((int) (adr - r.adr)) ) < r.size) {
+ if (( ((int) (adr - r->adr)) ) < r->size) {
/* Not on a starting tag yet */
if (*adr != '<') {
- char* adr_next = strchr(adr,'<');
+ /* strchr does not well behave with null chrs.. */
+ /* char* adr_next = strchr(adr,'<'); */
+ char* adr_next = adr;
+ while(*adr_next != '<' && (adr_next - r->adr) < r->size ) {
+ adr_next++;
+ }
/* Jump to near end (index hack) */
- if (!adr_next) {
+ if (!adr_next || *adr_next != '<') {
if (
- ( (int)(adr - r.adr) < (r.size - 4))
+ ( (int)(adr - r->adr) < (r->size - 4))
&&
- (r.size > 4)
+ (r->size > 4)
) {
- adr = r.adr + r.size - 2;
+ adr = r->adr + r->size - 2;
}
} else {
adr = adr_next;
@@ -2305,20 +2863,30 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
// ----------
// écrire peu à peu
- if ((opt.getmode & 1) && (ptr>0)) HT_ADD_ADR;
+ if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR;
lastsaved=adr; // dernier écrit+1
// ----------
-
+
+ // Checks
+ if (back_add_stats != opt->state.back_add_stats) {
+ back_add_stats = opt->state.back_add_stats;
+
+ // Check max time
+ if (!back_checkmirror(opt)) {
+ adr = r->adr + r->size;
+ }
+ }
+
// pour les stats du shell si parsing trop long
#if HTS_ANALYSTE
- if (r.size)
- _hts_in_html_done=(100 * ((int) (adr - r.adr)) ) / (int)(r.size);
+ if (r->size)
+ _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size);
if (_hts_in_html_poll) {
_hts_in_html_poll=0;
// temps à attendre, et remplir autant que l'on peut le cache (backing)
- back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
- back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
-
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+
// Transfer rate
engine_stats();
@@ -2329,35 +2897,35 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
HTS_STAT.stat_infos=fspc(NULL,"info");
HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
-
+
if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
- if (opt.errlog) {
- fspc(opt.errlog,"info"); fprintf(opt.errlog,"Exit requested by shell or user"LF);
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
test_flush;
}
- exit_xh=1; // exit requested
+ *stre->exit_xh_=1; // exit requested
XH_uninit;
- return 0;
- //adr = r.adr + r.size; // exit
+ return -1;
+ //adr = r->adr + r->size; // exit
} else if (_hts_cancel==1) {
- // adr = r.adr + r.size; // exit
+ // adr = r->adr + r->size; // exit
nofollow=1; // moins violent
_hts_cancel=0;
}
}
-
+
// refresh the backing system each 2 seconds
if (engine_stats()) {
- back_wait(back,back_max,&opt,&cache,HTS_STAT.stat_timestart);
- back_fillmax(back,back_max,&opt,&cache,liens,ptr,numero_passe,lien_tot);
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
}
#endif
- } while(( ((int) (adr - r.adr)) ) < r.size);
+ } while(( ((int) (adr - r->adr)) ) < r->size);
#if HTS_ANALYSTE
_hts_in_html_parsing=0; // flag
_hts_cancel=0; // pas de cancel
#endif
- if ((opt.getmode & 1) && (ptr>0)) {
+ if ((opt->getmode & 1) && (ptr>0)) {
HT_ADD_END; // achever
}
//
@@ -2366,12 +2934,1030 @@ if (hts_htmlcheck(r.adr,(int)r.size,urladr,urlfil)) {
} // if !error
- if (opt.getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
// sauver fichier
//structcheck(savename);
- //filesave(r.adr,r.size,savename);
+ //filesave(opt,r->adr,r->size,savename);
#if HTS_ANALYSTE
} // analyse OK
#endif
+
+ /* Apply changes */
+ ENGINE_SAVE_CONTEXT();
+
+ return 0;
+}
+
+
+
+
+/*
+ Check 301, 302, .. statuscodes (moved)
+*/
+int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
+ /* Load engine variables */
+ ENGINE_LOAD_CONTEXT();
+
+ // DEBUT rattrapage des 301,302,307..
+ // ------------------------------------------------------------
+ if (!error) {
+ ////////{
+ // on a chargé un fichier en plus
+ // if (!error) stat_loaded+=r.size;
+
+ // ------------------------------------------------------------
+ // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing
+ // ------------------------------------------------------------
+ if ( (r->statuscode==301)
+ || (r->statuscode==302)
+ || (r->statuscode==303)
+ || (r->statuscode==307)
+ ) {
+ //if (r->adr!=NULL) { // adr==null si fichier direct. [catch: davename normalement si cgi]
+ //int i=0;
+ char *rn=NULL;
+ // char* p;
+
+ if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
+ //if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil);
+ test_flush;
+ }
+
+
+ {
+ char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
+ int get_it=0; // ne pas prendre le fichier à la même adresse par défaut
+ int reponse=0;
+ mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
+ //
+
+ strcpybuff(mov_url,r->location);
+ // url qque -> adresse+fichier
+ if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {
+ int set_prio_to=0; // pas de priotité fixéd par wizard
+
+ //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
+ // c'est (en gros) la même URL..
+ // si c'est un problème de casse dans le host c'est que le serveur est buggé
+ // ("RFC says.." : host name IS case insensitive)
+ if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ // on tourne en rond
+ if (strcmp(mov_fil,urlfil)==0) {
+ error=1;
+ get_it=-1; // ne rien faire
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
+ test_flush;
+ }
+ } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois
+ get_it=1;
+ }
+ } else { // adresse différente
+ if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible)
+ // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ // accepté?
+ if (hts_acceptlink(opt,ptr,lien_tot,liens,
+ mov_adr,mov_fil,
+ &set_prio_to,
+ NULL) != 1) { /* nouvelle adresse non refusée ? */
+ get_it=1;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
+ test_flush;
+ }
+ }
+ } /* sinon traité normalement */
+ }
+
+ //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique à casse près
+ if (get_it==1) {
+ // court-circuiter le reste du traitement
+ // et reculer pour mieux sauter
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
+ test_flush;
+ }
+ // canceller lien actuel
+ error=1;
+ strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+#if HTS_HASH
+#else
+ liens[ptr]->sav_len=-1; // taille invalide
+#endif
+ // noter NOUVEAU lien
+ //xxc xxc
+ // set_prio_to=0+1; // protection if the moved URL is an html page!!
+ //xxc xxc
+ {
+ char mov_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) {
+ if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO) avec SAV IDENTIQUE
+ liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
+ //liens_record(mov_adr,mov_fil,mov_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ // mode test?
+ liens[lien_tot]->testmode=liens[ptr]->testmode;
+ liens[lien_tot]->link_import=0; // mode normal
+ if (!set_prio_to)
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ else
+ liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth)); // PRIORITE NULLE (catch page)
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
+ test_flush;
+ }
+ }
+
+ }
+ }
+
+ //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
+
+ // note métaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
+ // sous DOS ca marche pas très bien... mais comme je suis génial url_savename()
+ // est à même de régler ce problème
+ }
+ } // ident_url_xx
+
+ if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie)
+ rn=(char*) calloct(8192,1);
+ if (rn!=NULL) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
+ test_flush;
+ }
+ if (!opt->mimehtml) {
+ escape_uri(mov_url);
+ } else {
+ char buff[HTS_URLMAXSIZE*3];
+ strcpybuff(buff, mov_adr);
+ strcatbuff(buff, mov_fil);
+ escape_in_url(buff);
+ { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
+ strcpybuff(mov_url, "cid:");
+ strcatbuff(mov_url, buff);
+ }
+ // On prépare une page qui sautera immédiatement sur la bonne URL
+ // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
+ strcpybuff(rn,"<HTML>"CRLF);
+ strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
+ strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
+ strcatbuff(rn,mov_url); // URL
+ strcatbuff(rn,"\">"CRLF);
+ strcatbuff(rn,"<A HREF=\"");
+ strcatbuff(rn,mov_url);
+ strcatbuff(rn,"\">");
+ strcatbuff(rn,"<B>Click here...</B></A>"CRLF);
+ strcatbuff(rn,"</BODY>"CRLF);
+ strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
+ strcatbuff(rn,"</HTML>"CRLF);
+
+ // changer la page
+ if (r->adr) {
+ freet(r->adr);
+ r->adr=NULL;
+ }
+ r->adr=rn;
+ r->size=strlen(r->adr);
+ strcpybuff(r->contenttype,"text/html");
+ }
+ } // get_it==0
+
+ } // bloc
+ // erreur HTTP (ex: 404, not found)
+ } else if (
+ (r->statuscode==412)
+ || (r->statuscode==416)
+ ) { // Precondition Failed, c'est à dire pour nous redemander TOUT le fichier
+ if (fexist(liens[ptr]->sav)) {
+ remove(liens[ptr]->sav); // Eliminer
+ if (!fexist(liens[ptr]->sav)) { // Bien éliminé? (sinon on boucle..)
+#if HDEBUG
+ printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
+#endif
+ if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
+ //if (opt->errlog) {
+ fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil);
+ test_flush;
+ }
+ // enregistrer le MEME lien (MACRO)
+ liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry;
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=ptr;
+ lien_tot++;
+ //
+ // canceller lien actuel
+ error=1;
+ strcpybuff(liens[ptr]->adr,"!"); // caractère bidon (invalide hash)
+#if HTS_HASH
+#else
+ liens[ptr]->sav_len=-1; // taille invalide
+#endif
+ //
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if (opt->errlog!=NULL) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Can not remove old file %s"LF,urlfil);
+ test_flush;
+ }
+ }
+ } else {
+ if (opt->errlog!=NULL) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil);
+ test_flush;
+ }
+ }
+ } else if (r->statuscode!=200) {
+ int can_retry=0;
+
+ // cas où l'on peut reessayer
+ // -2=timeout -3=rateout (interne à httrack)
+ switch(r->statuscode) {
+ //case -1: can_retry=1; break;
+ case -2: if (opt->hostcontrol) { // timeout et retry épuisés
+ if ((opt->hostcontrol & 1) && (liens[ptr]->retry<=0)) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr));
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ } else can_retry=1;
+ } else can_retry=1;
+ break;
+ case -3: if ((opt->hostcontrol) && (liens[ptr]->retry<=0)) { // too slow
+ if (opt->hostcontrol & 2) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr));
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
+ }
+ } else can_retry=1;
+ } else can_retry=1;
+ break;
+ case -4: // connect closed
+ can_retry=1;
+ break;
+ case -5: // other (non fatal) error
+ can_retry=1;
+ break;
+ case -6: // bad SSL handskake
+ can_retry=1;
+ break;
+ case 408: case 409: case 500: case 502: case 504: can_retry=1;
+ break;
+ }
+
+ if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0)
+ if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible)
+ if (opt->errlog) {
+ if ((opt->retry>0) && (can_retry)){
+ fspc(opt->errlog,"error");
+ fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ } else {
+ if (r->statuscode==-10) { // test OK
+ if ((opt->debug>0) && (opt->errlog!=NULL)) {
+ fspc(opt->errlog,"info");
+ fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ }
+ } else {
+ if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut
+ fspc(opt->errlog,"error");
+ fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ } else {
+ if (opt->debug>1) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr);
+ test_flush;
+ }
+ }
+ }
+ }
+ test_flush;
+ }
+
+ // NO error in trop level
+ // due to the "no connection -> previous restored" hack
+ // This prevent the engine from wiping all data if the website has been deleted (or moved)
+ // since last time (which is quite annoying)
+ if (liens[ptr]->precedent != 0) {
+ // ici on teste si on doit enregistrer la page tout de même
+ if (opt->errpage) {
+ store_errpage=1;
+ }
+ } else {
+ if (strcmp(urlfil,"/robots.txt") != 0) {
+ /*
+ This is an error caused by a link entered by the user
+ That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
+ If all links entered are invalid, the session failed and we will attempt to restore
+ the previous one
+ Example: Try to update a website which has been deleted remotely: this may delete
+ the website locally, which is really not desired (especially if the website disappeared!)
+ With this hack, the engine won't wipe local files (how clever)
+ */
+ HTS_STAT.stat_errors_front++;
+ }
+ }
+
+ } else { // retry!!
+ if (opt->debug>0 && opt->errlog != NULL) { // on fera un alert si le retry échoue
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
+ test_flush;
+ }
+ // redemander fichier
+ liens_record(urladr,urlfil,savename,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=liens[ptr]->testmode; // mode test?
+ liens[lien_tot]->link_import=0; // pas mode import
+ liens[lien_tot]->depth=liens[ptr]->depth;
+ liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
+ liens[lien_tot]->retry=liens[ptr]->retry-1; // moins 1 retry!
+ liens[lien_tot]->premier=liens[ptr]->premier;
+ liens[lien_tot]->precedent=liens[ptr]->precedent;
+ lien_tot++;
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fspc(opt->errlog,"panic");
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ }
+ } else {
+ if (opt->errlog) {
+ if (opt->debug>1) {
+ fspc(opt->errlog,"info");
+ fprintf(opt->errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
+ }
+ }
+ }
+ if (!store_errpage) {
+ if (r->adr) { // désalloc
+ freet(r->adr);
+ r->adr=NULL;
+ }
+ error=1; // erreur!
+ }
+ }
+ // FIN rattrapage des 301,302,307..
+ // ------------------------------------------------------------
+
+ } // if !error
+
+
+ /* Apply changes */
+ ENGINE_SAVE_CONTEXT();
+
+ return 0;
+
+
+}
+
+
+
+/*
+ Wait for next file and
+ check 301, 302, .. statuscodes (moved)
+*/
+int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
+ /* Load engine variables */
+ ENGINE_LOAD_CONTEXT();
+ /* */
+ int b;
+ int n;
+
+#if BDEBUG==1
+ printf("\nBack test..\n");
+#endif
+
+ // pause/lock files
+ {
+ int do_pause=0;
+
+ // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
+ if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
+ // remove lockfile
+ remove(fconcat(opt->path_log,"hts-stop.lock"));
+ if (!fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
+ do_pause=1;
+ }
+ }
+
+ // after receving N bytes, pause
+ if (opt->fragment>0) {
+ if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) {
+ do_pause=1;
+ }
+ }
+
+ // pause?
+ if (do_pause) {
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: pause requested.."LF);
+ }
+ while (back_nsoc(back,back_max)>0) { // attendre fin des transferts
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+ Sleep(200);
+#if HTS_ANALYSTE
+ {
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ b=0;
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)
+ || !back_checkmirror(opt)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+#endif
+ }
+ // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause
+ // l'utilisateur ferait un rm -r après avoir effectué un tar
+ // structcheck_init(1);
+ {
+ FILE* fp = fopen(fconcat(opt->path_log,"hts-paused.lock"),"wb");
+ if (fp) {
+ fspc(fp,"info"); // dater
+ fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes);
+ fclose(fp);
+ }
+ }
+ stat_fragment=HTS_STAT.stat_bytes;
+ /* Info for wrappers */
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: pause: %s"LF,fconcat(opt->path_log,"hts-paused.lock"));
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_pause(fconcat(opt->path_log,"hts-paused.lock"));
+#else
+ while (fexist(fconcat(opt->path_log,"hts-paused.lock"))) {
+ //back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives)
+ Sleep(1000);
+ }
+#endif
+ }
+ //
+ }
+ // end of pause/lock files
+
+#if HTS_ANALYSTE
+ // changement dans les préférences
+ /*
+ if (_hts_setopt) {
+ copy_htsopt(_hts_setopt,opt); // copier au besoin
+ _hts_setopt=NULL; // effacer callback
+ }
+ */
+ if (_hts_addurl) {
+ char add_adr[HTS_URLMAXSIZE*2];
+ char add_fil[HTS_URLMAXSIZE*2];
+ while(*_hts_addurl) {
+ char add_url[HTS_URLMAXSIZE*2];
+ add_adr[0]=add_fil[0]=add_url[0]='\0';
+ if (!link_has_authority(*_hts_addurl))
+ strcpybuff(add_url,"http://"); // ajouter http://
+ strcatbuff(add_url,*_hts_addurl);
+ if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
+ // ----Ajout----
+ // noter NOUVEAU lien
+ char add_sav[HTS_URLMAXSIZE*2];
+ // calculer lien et éventuellement modifier addresse/fichier
+ if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) {
+ if (hash_read(hash,add_sav,"",0,0)<0) { // n'existe pas déja
+ // enregistrer lien (MACRO)
+ liens_record(add_adr,add_fil,add_sav,"","");
+ if (liens[lien_tot]!=NULL) { // OK, pas d'erreur
+ liens[lien_tot]->testmode=0; // mode test?
+ liens[lien_tot]->link_import=0; // mode normal
+ liens[lien_tot]->depth=opt->depth;
+ liens[lien_tot]->pass2=max(0,numero_passe);
+ liens[lien_tot]->retry=opt->retry;
+ liens[lien_tot]->premier=lien_tot;
+ liens[lien_tot]->precedent=lien_tot;
+ lien_tot++;
+ //
+ if ((opt->debug>0) && (opt->log!=NULL)) {
+ fspc(opt->log,"info"); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
+ }
+ //
+ } else { // oups erreur, plus de mémoire!!
+ printf("PANIC! : Not enough memory [%d]\n",__LINE__);
+ if (opt->errlog) {
+ fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
+ test_flush;
+ }
+ //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
+ XH_uninit; // désallocation mémoire & buffers
+ return 0;
+ }
+ } else {
+ if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
+ fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
+ test_flush;
+ }
+ }
+
+ }
+ } else {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error");
+ fprintf(opt->errlog,"Error during URL decoding for %s"LF,add_url);
+ test_flush;
+ }
+ }
+ // ----Fin Ajout----
+ _hts_addurl++; // suivante
+ }
+ _hts_addurl=NULL; // libérer _hts_addurl
+ }
+ // si une pause a été demandée
+ if (_hts_setpause) {
+ // index du lien actuel
+ int b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0) b=0; // forcer pour les stats
+ while(_hts_setpause) { // on fait la pause..
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ if (back_nsoc(back,back_max)==0)
+ Sleep(250); // tite pause
+ }
+ }
+#endif
+
+ // si le fichier n'est pas en backing, le mettre..
+ if (!back_exist(back,back_max,urladr,urlfil,savename)) {
+#if BDEBUG==1
+ printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
+#endif
+ if (back_add(back,back_max,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
+ printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
+#if BDEBUG==1
+ printf("error while crash adding\n");
+#endif
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
+ test_flush;
+ }
+
+ }
+ }
+
+#if BDEBUG==1
+ printf("test number of socks\n");
+#endif
+
+ // ajouter autant de socket qu'on peut ajouter
+ n=opt->maxsoc-back_nsoc(back,back_max);
+#if BDEBUG==1
+ printf("%d sockets available for backing\n",n);
+#endif
+
+#if HTS_ANALYSTE
+ if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter
+#else
+ if (n>0) { // si sockets libre
+#endif
+ // remplir autant que l'on peut le cache (backing)
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+ }
+
+ // index du lien actuel
+ /*
+ b=back_index(back,back_max,urladr,urlfil,savename);
+
+ if (b>=0)
+ */
+ {
+ // ------------------------------------------------------------
+ // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE
+ do {
+
+ // index du lien actuel
+ b=back_index(back,back_max,urladr,urlfil,savename);
+#if BDEBUG==1
+ printf("back index %d, waiting\n",b);
+#endif
+ // Continue to the loop if link still present
+ if (b<0)
+ continue;
+
+ // Receive data
+ if (back[b].status>0)
+ back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // Stop the mirror
+ if (!back_checkmirror(opt)) {
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+
+ // And fill the backing stack
+ if (back[b].status>0)
+ back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
+
+ // Continue to the loop if link still present
+ b=back_index(back,back_max,urladr,urlfil,savename);
+ if (b<0)
+ continue;
+
+ // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
+ if ((opt->makestat) || (opt->maketrack)) {
+ TStamp l=time_local();
+ if ((int) (l-makestat_time) >= 60) {
+ if (makestat_fp != NULL) {
+ fspc(makestat_fp,"info");
+ fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot);
+ fflush(makestat_fp);
+ *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV;
+ *stre->makestat_lnk_=lien_tot;
+ }
+ if (stre->maketrack_fp != NULL) {
+ int i;
+ fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF);
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,3,stre->maketrack_fp);
+ }
+ fprintf(stre->maketrack_fp,LF);
+
+ }
+ makestat_time=l;
+ }
+ }
+#if HTS_ANALYSTE
+ {
+ int i;
+ {
+ char* s=hts_cancel_file("");
+ if (strnotempty(s)) { // fichier à canceller
+ for(i=0;i<back_max;i++) {
+ if ((back[i].status>0)) {
+ if (strcmp(back[i].url_sav,s)==0) { // ok trouvé
+ if (back[i].status != 1000) {
+#if HTS_DEBUG_CLOSESOCK
+ DEBUG_W("user cancel: deletehttp\n");
+#endif
+ if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
+ back[i].r.soc=INVALID_SOCKET;
+ back[i].r.statuscode=-1;
+ strcpybuff(back[i].r.msg,"Cancelled by User");
+ back[i].status=0; // terminé
+ } else // cancel ftp.. flag à 1
+ back[i].stop_ftp = 1;
+ }
+ }
+ }
+ s[0]='\0';
+ }
+ }
+
+ // Transfer rate
+ engine_stats();
+
+ // Refresh various stats
+ HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
+ HTS_STAT.stat_errors=fspc(NULL,"error");
+ HTS_STAT.stat_warnings=fspc(NULL,"warning");
+ HTS_STAT.stat_infos=fspc(NULL,"info");
+ HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
+ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
+
+ if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
+ test_flush;
+ }
+ *stre->exit_xh_=1; // exit requested
+ XH_uninit;
+ return 0;
+ }
+ }
+
+#endif
+#if HTS_POLL
+ if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) {
+ TStamp tl;
+ *stre->info_shell_=1;
+
+ /* Toggle with ENTER */
+ if (!opt->quiet) {
+ if (check_stdin()) {
+ char com[256];
+ linput(stdin,com,200);
+ if (opt->verbosedisplay==2)
+ opt->verbosedisplay=1;
+ else
+ opt->verbosedisplay=2;
+ /* Info for wrappers */
+ if ( (opt->debug>0) && (opt->log!=NULL) ) {
+ fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF);
+ }
+#if HTS_ANALYSTE
+ hts_htmlcheck_chopt(opt);
+#endif
+ }
+ }
+
+ tl=time_local();
+
+ // générer un message d'infos sur l'état actuel
+ if (opt->shell) { // si shell
+ if ((tl-*stre->last_info_shell_)>0) { // toute les 1 sec
+ FILE* fp=stdout;
+ int a=0;
+ *stre->last_info_shell_=tl;
+ if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant
+ // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
+ // (libérons les robots esclaves de l'internet!)
+ remove(fconcat(opt->path_log,"hts-autopsy"));
+ fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb");
+ a=1;
+ }
+ if ((*stre->info_shell_) || a) {
+ int i,j;
+
+ fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
+ fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
+ fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
+ fprintf(fp,"LINK %d"LF,lien_tot);
+ {
+ LLint mem=0;
+ for(i=0;i<back_max;i++)
+ if (back[i].r.adr!=NULL)
+ mem+=back[i].r.size;
+ fprintf(fp,"INMEM "LLintP""LF,(LLint)mem);
+ }
+ for(j=0;j<2;j++) { // passes pour ready et wait
+ for(i=0;i<back_max;i++) {
+ back_info(back,i,j+1,stdout); // maketrack_fp a la place de stdout ?? // **
+ }
+ }
+ fprintf(fp,LF);
+ if (a)
+ fclose(fp);
+ io_flush;
+ }
+ }
+ } // si shell
+
+ } // si shell ou keyboard (option)
+ //
+#endif
+ } while((b>=0) && (back[max(b,0)].status>0));
+
+
+ // If link not found on the stack, it's because it has already been downloaded
+ // in background
+ // Then, skip it and go to the next one
+ if (b<0) {
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
+ test_flush;
+ }
+
+ // prochain lien
+ // ptr++;
+
+ return 2; // goto jump_if_done;
+
+ }
+ /* link put in cache by the backing system for memory spare - reclaim */
+ else if (back[b].finalized) {
+ assertf(back[b].r.adr == NULL);
+ /* read file in cache */
+ back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer);
+ /* ensure correct location buffer set */
+ back[b].r.location=back[b].location_buffer;
+ if (back[b].r.statuscode == -1) {
+ if (opt->errlog) {
+ fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil);
+ test_flush;
+ }
+ } else {
+ if ( (opt->debug>1) && (opt->log!=NULL) ) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush;
+ }
+ }
+ }
+
+
+#if HTS_ANALYSTE==2
+#else
+ //if (!opt->quiet) { // petite animation
+ if (!opt->verbosedisplay) {
+ if (!opt->quiet) {
+ static int roll=0; /* static: ok */
+ roll=(roll+1)%4;
+ printf("%c\x0d",("/-\\|")[roll]);
+ fflush(stdout);
+ }
+ } else if (opt->verbosedisplay==1) {
+ if (back[b].r.statuscode==200)
+ printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size);
+ else
+ printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode);
+ fflush(stdout);
+ }
+ //}
+#endif
+ // ------------------------------------------------------------
+ // Vérificateur d'intégrité
+#if DEBUG_CHECKINT
+ _CHECKINT(&back[b],"Retour de back_wait, après le while")
+ {
+ int i;
+ for(i=0;i<back_max;i++) {
+ char si[256];
+ sprintf(si,"Test global après back_wait, index %d",i);
+ _CHECKINT(&back[i],si)
+ }
+ }
+#endif
+
+ // copier structure réponse htsblk
+ memcpy(r, &(back[b].r), sizeof(htsblk));
+ r->location=stre->loc_; // ne PAS copier location!! adresse, pas de buffer
+ if (back[b].r.location)
+ strcpybuff(r->location,back[b].r.location);
+ back[b].r.adr=NULL; // ne pas faire de desalloc ensuite
+
+ // libérer emplacement backing
+ back_maydelete(opt,back,b);
+
+ // progression
+#if 0
+ if (opt->aff_progress) {
+ TStamp tl=time_local();
+ if ((tl-HTS_STAT.stat_timestart)>0) {
+ char s[32];
+ int i=0;
+ lastime=tl;
+ _CLRSCR; _GOTOXY("1","1");
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,99)) { // **
+ if (back[i].status>=0) { // loading..
+ s[0]='\0';
+ if (strlen(back[i].url_fil)>16)
+ strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16);
+ else
+ strncatbuff(s,back[i].url_fil,16);
+ printf("%s : ",s);
+
+ printf("[");
+ if (back[i].r.totalsize>0) {
+ int p;
+ int j;
+ p=(int)((back[i].r.size*10)/back[i].r.totalsize);
+ p=minimum(10,p);
+ for(j=0;j<p;j++) printf("*");
+ for(j=0;j<(10-p);j++) printf("-");
+ } else {
+ printf(LLintP,(LLint)back[i].r.size);
+ }
+ printf("]");
+
+ //} else if (back[i].status==0) {
+ // strcpybuff(s,"ENDED");
+ }
+ printf("\n");
+ i++;
+ }
+ io_flush;
+ }
+ }
+#endif
+
+ // débug graphique
+#if BDEBUG==2
+ {
+ char s[12];
+ int i=0;
+ _GOTOXY(1,1);
+ printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
+ while(i<minimum(back_max,160)) {
+ if (back[i].status>0) {
+ sprintf(s,"%d",back[i].r.size);
+ } else if (back[i].status==0) {
+ strcpybuff(s,"ENDED");
+ } else
+ strcpybuff(s," - ");
+ while(strlen(s)<8) strcatbuff(s," ");
+ printf("%s",s); io_flush;
+ i++;
+ }
+ }
+#endif
+
+
+#if BDEBUG==1
+ printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg);
+#endif
+
+ }
+ /*else {
+ #if BDEBUG==1
+ printf("back index error\n");
+ #endif
+ }
+ */
+
+
+
+ ENGINE_SAVE_CONTEXT();
+
+ return 0;
+
+
+}
+
+
diff --git a/src/htsparse.h b/src/htsparse.h
new file mode 100644
index 0000000..4efc386
--- /dev/null
+++ b/src/htsparse.h
@@ -0,0 +1,108 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: htsparse.h parser */
+/* html/javascript/css parser */
+/* and other parser routines */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+typedef struct {
+ /* Main object */
+ htsblk* r_;
+
+ /* Error handling */
+ int* error_;
+ int* exit_xh_;
+ int* store_errpage_;
+
+ /* Structural */
+ int* filptr_;
+ char*** filters_;
+ robots_wizard* robots_;
+ hash_struct* hash_;
+ int* lien_max_;
+
+ /* Base & codebase */
+ char* base;
+ char* codebase;
+
+ /* Index */
+ int* makeindex_done_;
+ FILE** makeindex_fp_;
+ int* makeindex_links_;
+ char* makeindex_firstlink_;
+
+ /* Html templates */
+ char *template_header_;
+ char *template_body_;
+ char *template_footer_;
+
+ /* Specific to downloads */
+ LLint* stat_fragment_;
+ TStamp makestat_time;
+ FILE* makestat_fp;
+ LLint* makestat_total_;
+ int* makestat_lnk_;
+ FILE* maketrack_fp;
+
+ /* Function-dependant */
+ char* loc_;
+ TStamp* last_info_shell_;
+ int* info_shell_;
+
+} htsmoduleStructExtended;
+
+
+/*
+ Main parser, attempt to scan links inside the html/css/js file
+ Parameters: The public module structure, and the private module variables
+*/
+int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre);
+
+/*
+ Check for 301,302.. errors ("moved") and handle them; re-isuue requests, make
+ rediretc file, handle filters considerations..
+ Parameters: The public module structure, and the private module variables
+ Returns 0 upon success
+*/
+int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre);
+
+/*
+ Get the next file on the queue, waiting for it, handling other files in background..
+ Parameters: The public module structure, and the private module variables
+ Returns 0 upon success
+*/
+int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre);
+
+
diff --git a/src/htsrobots.c b/src/htsrobots.c
index 8aabdd4..58e97fb 100644
--- a/src/htsrobots.c
+++ b/src/htsrobots.c
@@ -79,10 +79,11 @@ int checkrobots(robots_wizard* robots,char* adr,char* fil) {
return 0;
}
int checkrobots_set(robots_wizard* robots,char* adr,char* data) {
- if (((int) strlen(data)) > 999) return 0;
+ if (((int) strlen(adr)) >= sizeof(robots->adr) - 2) return 0;
+ if (((int) strlen(data)) >= sizeof(robots->token) - 2) return 0;
while(robots) {
if (strfield2(robots->adr,adr)) { // entrée existe
- strcpy(robots->token,data);
+ strcpybuff(robots->token,data);
#if DEBUG_ROBOTS
printf("robots.txt: set %s to %s\n",adr,data);
#endif
@@ -92,8 +93,8 @@ int checkrobots_set(robots_wizard* robots,char* adr,char* data) {
robots->next=(robots_wizard*) calloct(1,sizeof(robots_wizard));
if (robots->next) {
robots->next->next=NULL;
- strcpy(robots->next->adr,adr);
- strcpy(robots->next->token,data);
+ strcpybuff(robots->next->adr,adr);
+ strcpybuff(robots->next->token,data);
#if DEBUG_ROBOTS
printf("robots.txt: new set %s to %s\n",adr,data);
#endif
diff --git a/src/htsrobots.h b/src/htsrobots.h
index 62b9689..ef08183 100644
--- a/src/htsrobots.h
+++ b/src/htsrobots.h
@@ -41,8 +41,8 @@ Please visit our Website: http://www.httrack.com
// robots wizard
typedef struct robots_wizard {
- char adr[1024];
- char token[1024];
+ char adr[128];
+ char token[4096];
struct robots_wizard* next;
} robots_wizard;
diff --git a/src/htsserver.c b/src/htsserver.c
new file mode 100644
index 0000000..0408976
--- /dev/null
+++ b/src/htsserver.c
@@ -0,0 +1,1814 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Mini-server */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+
+/* specific definitions */
+/* specific definitions */
+#include "htsbase.h"
+#include "htsnet.h"
+#include "htslib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <fcntl.h>
+#if HTS_WIN
+#else
+#include <arpa/inet.h>
+#endif
+#ifndef _WIN32
+#include <signal.h>
+#endif
+/* END specific definitions */
+
+/* définitions globales */
+#include "htsglobal.h"
+
+/* htslib */
+/*#include "htslib.h"*/
+
+/* HTTrack Website Copier Library */
+#include "httrack-library.h"
+
+/* Language files */
+#include "htsinthash.h"
+int NewLangStrSz=1024;
+inthash NewLangStr=NULL;
+int NewLangStrKeysSz=1024;
+inthash NewLangStrKeys=NULL;
+int NewLangListSz=1024;
+inthash NewLangList=NULL;
+/* Language files */
+
+
+#include "htsserver.h"
+
+char* gethomedir(void);
+int commandRunning = 0;
+int commandEndRequested = 0;
+int commandEnd = 0;
+int commandReturn = 0;
+char* commandReturnMsg = NULL;
+char* commandReturnCmdl = NULL;
+int commandReturnSet = 0;
+
+/* Extern */
+extern void webhttrack_main(char* cmd);
+extern void webhttrack_lock(int lock);
+
+static int is_image(char* file) {
+ return ( (strstr(file, ".gif") != NULL) );
+}
+static int is_text(char* file) {
+ return ( (strstr(file, ".txt") != NULL) );
+}
+static int is_html(char* file) {
+ return ( (strstr(file, ".htm") != NULL) );
+}
+
+static void sig_brpipe( int code ) {
+ /* ignore */
+}
+
+
+// URL Link catcher
+
+// 0- Init the URL catcher with standard port
+
+// smallserver_init(&port,&return_host);
+T_SOC smallserver_init_std(int* port_prox,char* adr_prox) {
+ T_SOC soc;
+ int try_to_listen_to[]={8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,
+ 32000,32001,32002,32003,32004,32005,32006,32007,32008,32009,
+ 42000,42001,42002,42003,42004,42005,42006,42007,42008,42009,
+ 0,-1};
+ int i=0;
+ do {
+ soc=smallserver_init(&try_to_listen_to[i],adr_prox);
+ *port_prox=try_to_listen_to[i];
+ i++;
+ } while( (soc == INVALID_SOCKET) && (try_to_listen_to[i]>=0));
+ return soc;
+}
+
+
+// 1- Init the URL catcher
+
+// smallserver_init(&port,&return_host);
+T_SOC smallserver_init(int* port,char* adr) {
+ T_SOC soc = INVALID_SOCKET;
+ char h_loc[256+2];
+
+ commandRunning =
+ commandEnd =
+ commandReturn =
+ commandReturnSet =
+ commandEndRequested = 0;
+ if (commandReturnMsg)
+ free(commandReturnMsg);
+ commandReturnMsg = NULL;
+ if (commandReturnCmdl)
+ free(commandReturnCmdl);
+ commandReturnCmdl = NULL;
+
+ if (gethostname(h_loc,256)==0) { // host name
+ SOCaddr server;
+ int server_size=sizeof(server);
+ /*t_hostent* hp_loc;
+ t_fullhostent buffer;*/
+
+ // effacer structure
+ memset(&server, 0, sizeof(server));
+
+ /*if ( (hp_loc=vxgethostbyname(h_loc, &buffer)) )*/ { // notre host
+
+ // copie adresse
+ // NO (bind all)
+ // SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length);
+
+ SOCaddr_initany(server, server_size);
+ if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) {
+ SOCaddr_initport(server, *port);
+ if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) {
+ /*int len;
+ SOCaddr server2;
+ len=sizeof(server2);*/
+ // effacer structure
+ /*memset(&server2, 0, sizeof(server2));
+ if (getsockname(soc,(struct sockaddr*) &server2,&len) == 0) {
+ *port=ntohs(SOCaddr_sinport(server)); // récupérer port*/
+ if (listen(soc,10)>=0) { // au pif le 10
+ // SOCaddr_inetntoa(adr, 128, server2, len);
+ strcpy(adr, h_loc);
+ } else {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+
+
+ /*} else {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }*/
+
+
+ } else {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ soc=INVALID_SOCKET;
+ }
+ }
+ }
+ }
+ return soc;
+}
+
+// 2 - Wait for URL
+
+static int recv_bl(T_SOC soc, void* buffer, size_t len, int timeout) {
+ if (check_readinput_t(soc, timeout)) {
+ int n = 1;
+ size_t size = len;
+ size_t offs = 0;
+ while(n > 0 && size > 0) {
+ n = recv(soc, ((char*)buffer) + offs, (int) size, 0);
+ if (n > 0) {
+ offs += n;
+ size -= n;
+ }
+ }
+ return (int)offs;
+ }
+ return -1;
+}
+
+
+// smallserver
+// returns 0 if error
+// url: buffer where URL must be stored - or ip:port in case of failure
+// data: 32Kb
+
+typedef struct {
+ char* name;
+ int value;
+} initIntElt;
+typedef struct {
+ char* name;
+ char* value;
+} initStrElt;
+
+int smallserver_setkey(char* key, char* value) {
+ return inthash_write(NewLangList, key, (unsigned long int)strdup(value));
+}
+int smallserver_setkeyint(char* key, LLint value) {
+ char tmp[256];
+ sprintf(tmp, LLintP, value);
+ return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp));
+}
+int smallserver_setkeyarr(char* key, int id, char* key2, char* value) {
+ char tmp[256];
+ sprintf(tmp, "%s%d%s", key, id, key2);
+ return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value));
+}
+
+#define SET_ERROR(err) do { \
+ inthash_write(NewLangList, "error", (unsigned long int)strdup(err)); \
+ error_redirect = "/server/error.html"; \
+} while(0)
+
+int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) {
+ int timeout=30;
+ int retour=0;
+ int willexit=0;
+ int buffer_size = 32768;
+ char* buffer = (char*)malloc(buffer_size);
+ String headers = STRING_EMPTY;
+ String output = STRING_EMPTY;
+ String tmpbuff = STRING_EMPTY;
+ String fspath = STRING_EMPTY;
+
+ /* Load strings */
+ htslang_init();
+ if (!htslang_load(NULL, path)) {
+ fprintf(stderr, "unable to find lang.def and/or lang/ strings in %s\n", path);
+ return 0;
+ }
+ LANG_T(path, 0);
+
+ /* Init various values */
+ {
+ char pth[1024];
+ char* initOn[] = { "parseall", "Cache", "ka",
+ "cookies", "parsejava", "testall", "updhack", "index", NULL };
+ initIntElt initInt[] = {
+ { "filter", 4 },
+ { "travel", 2 },
+ { "travel2", 1 },
+ { "travel3", 1 },
+ /* */
+ { "connexion", 4 },
+ /* */
+ { "maxrate", 25000 },
+ /* */
+ { "build", 1 },
+ /* */
+ { "checktype", 2},
+ { "robots", 3 },
+
+ { NULL, 0 }
+ };
+ initStrElt initStr[] = {
+ { "user", "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" },
+ { "footer", "<!-- Mirrored from %s%s by HTTrack Website Copier/3.x [XR&CO'2002], %s -->" },
+ { "url2", "+*.png +*.gif +*.jpg +*.css +*.js -ad.doubleclick.net/*" },
+ { NULL, NULL }
+ };
+ int i = 0;
+ for(i = 0 ; initInt[i].name ; i++) {
+ char tmp[32];
+ sprintf(tmp, "%d", initInt[i].value);
+ inthash_write(NewLangList, initInt[i].name, (unsigned long int)strdup(tmp));
+ }
+ for(i = 0 ; initOn[i] ; i++) {
+ inthash_write(NewLangList, initOn[i], (unsigned long int)strdup("1")); /* "on" */
+ }
+ for(i = 0 ; initStr[i].name ; i++) {
+ inthash_write(NewLangList, initStr[i].name, (unsigned long int)strdup(initStr[i].value));
+ }
+ strcpybuff(pth, gethomedir());
+ strcatbuff(pth, "/websites");
+ inthash_write(NewLangList, "path", (unsigned long int)strdup(pth));
+ }
+
+ /* Lock */
+ webhttrack_lock(1);
+
+ // connexion (accept)
+ while(!willexit && buffer != NULL && soc != INVALID_SOCKET) {
+ char line1[1024];
+ char line[8192];
+ char line2[1024];
+ T_SOC soc_c;
+ struct sockaddr dummyaddr;
+ int dummylen = sizeof(struct sockaddr);
+ LLint length = 0;
+ char* error_redirect = NULL;
+
+ line[0] = '\0';
+ buffer[0] = '\0';
+ StringClear(&headers);
+ StringClear(&output);
+ StringClear(&tmpbuff);
+ StringClear(&fspath);
+ StringStrcat(&headers, "");
+ StringStrcat(&output, "");
+ StringStrcat(&tmpbuff, "");
+ StringStrcat(&fspath, "");
+ memset(&dummyaddr, 0, sizeof(dummyaddr));
+
+ /* UnLock */
+ webhttrack_lock(0);
+
+ /* sigpipe */
+#ifndef _WIN32
+ signal( SIGPIPE , sig_brpipe );
+#endif
+
+ /* Accept */
+ while ( (soc_c = accept(soc, &dummyaddr, &dummylen)) == INVALID_SOCKET);
+
+ /* Lock */
+ webhttrack_lock(1);
+
+ if(linputsoc_t(soc_c, line1, sizeof(line1) - 2, timeout) > 0) {
+ int meth = 0;
+ if (strfield(line1, "get ")) {
+ meth = 1;
+ } else if (strfield(line1, "post ")) {
+ meth = 2;
+ } else if (strfield(line1, "head ")) { /* yes, we can do that */
+ meth = 10;
+ } else {
+#ifdef _DEBUG
+ // assert(FALSE);
+#endif
+ }
+ if (meth) {
+ /* Flush headers */
+ length = buffer_size - 2;
+ while(linputsoc_t(soc_c, line, sizeof(line) - 2, timeout) > 0) {
+ int p;
+ if ((p=strfield(line,"Content-length:"))!=0) {
+ sscanf(line+p, LLintP, &(length));
+ }
+ else if ((p=strfield(line,"Accept-language:"))!=0) {
+ char tmp[32];
+ char* s = line + p;
+ /*int l;*/
+ while(*s == ' ') s++;
+ tmp[0] = '\0';
+ strncatbuff(tmp, s, 2);
+ /*l = LANG_SEARCH(path, tmp);*/
+ }
+ }
+ if (meth == 2) {
+ int sz = 0;
+ if (length > buffer_size - 2) {
+ length = buffer_size - 2;
+ }
+ if (length > 0 && (sz=recv_bl(soc_c, buffer, (int)length, timeout)) < 0) {
+ meth = 0;
+ } else {
+ buffer[sz] = '\0';
+ }
+ }
+ }
+
+ /* Generated variables */
+ if (commandEnd && !commandReturnSet) {
+ commandReturnSet = 1;
+ if (commandReturn) {
+ char tmp[32];
+ sprintf(tmp, "%d", commandReturn);
+ inthash_write(NewLangList, "commandReturn", (unsigned long int)strdup(tmp));
+ inthash_write(NewLangList, "commandReturnMsg", (unsigned long int)commandReturnMsg);
+ inthash_write(NewLangList, "commandReturnCmdl", (unsigned long int)commandReturnCmdl);
+ } else {
+ inthash_write(NewLangList, "commandReturn", (unsigned long int)NULL);
+ inthash_write(NewLangList, "commandReturnMsg", (unsigned long int)NULL);
+ inthash_write(NewLangList, "commandReturnCmdl", (unsigned long int)NULL);
+ }
+ }
+
+ /* SID check */
+ {
+ unsigned long int adr = 0;
+ if (inthash_readptr(NewLangList, "_sid", (long int *)&adr)) {
+ if (inthash_write(NewLangList, "sid", (unsigned long int)strdup((char*)adr))) {
+ }
+ }
+ }
+
+ /* check variables */
+ if (meth && buffer[0]) {
+ char* s = buffer;
+ char *e, *f;
+ strcatbuff(buffer, "&");
+ while( s && (e = strchr(s, '=')) && (f = strchr(s, '&')) ) {
+ char* ua;
+ int len;
+ String sua = STRING_EMPTY;
+ *e = *f = '\0';
+ ua = e + 1;
+ if (strfield2(ua, "on")) /* hack : "on" == 1 */
+ ua = "1";
+ len = strlen(ua);
+ unescapehttp(ua, &sua);
+ inthash_write(NewLangList, s, (unsigned long int)StringAcquire(&sua));
+ s = f + 1;
+ }
+ }
+
+
+ /* Error check */
+ {
+ unsigned long int adr = 0;
+ unsigned long int adr2 = 0;
+ if (inthash_readptr(NewLangList, "sid", (long int *)&adr)) {
+ if (inthash_readptr(NewLangList, "_sid", (long int *)&adr2)) {
+ if (strcmp((char*)adr, (char*)adr2) != 0) {
+ meth = 0;
+ }
+ }
+ }
+ }
+
+ /* Check variables (internal) */
+ if (meth) {
+ int doLoad=0;
+ unsigned long int adr = 0;
+ if (inthash_readptr(NewLangList, "lang", (long int *)&adr)) {
+ int n = 0;
+ if (sscanf((char*)adr, "%d", &n) == 1 && n - 1 != LANG_T(path, -1)) {
+ LANG_T(path, n - 1);
+ }
+ }
+
+ /* Load existing project settings */
+ if (inthash_readptr(NewLangList, "loadprojname", (long int *)&adr)) {
+ char* pname = (char*) adr;
+ if (*pname) {
+ inthash_write(NewLangList, "projname", (unsigned long int)strdup(pname));
+ }
+ inthash_write(NewLangList, "loadprojname", (unsigned long int)NULL);
+ doLoad=1;
+ }
+
+ /* path : <path>/<project> */
+ if (!commandRunning) {
+ unsigned long int adrw = 0, adrpath = 0, adrprojname = 0;
+ if (inthash_readptr(NewLangList, "path", (long int *)&adrpath)
+ && inthash_readptr(NewLangList, "projname", (long int *)&adrprojname)) {
+ StringClear(&fspath);
+ StringStrcat(&fspath, (char*)adrpath);
+ StringStrcat(&fspath, "/");
+ StringStrcat(&fspath, (char*)adrprojname);
+ }
+ }
+
+ /* Load existing project settings */
+ if (doLoad) {
+ FILE* fp;
+ StringStrcat(&fspath, "/hts-cache/winprofile.ini");
+ fp = fopen(StringBuff(&fspath), "rb");
+ if (fp) {
+ /* Read file */
+ while(!feof(fp)) {
+ char* str = line;
+ char* pos;
+ if (!linput(fp, line, sizeof(line) - 2)) {
+ *str = '\0';
+ }
+ pos=strchr(line, '=');
+ if (pos) {
+ String escline = STRING_EMPTY;
+ *pos++='\0';
+ if (pos[0] == '0' && pos[1] == '\0')
+ *pos = '\0'; /* 0 => empty */
+ unescapeini(pos, &escline);
+ inthash_write(NewLangList, line, (unsigned long int)StringAcquire(&escline));
+ }
+ }
+
+ fclose(fp);
+ }
+ }
+
+ }
+
+ /* Execute command */
+ {
+ unsigned long int adr = 0;
+ int p = 0;
+ if (inthash_readptr(NewLangList, "command", (long int *)&adr)) {
+ if (strcmp((char*)adr, "cancel") == 0) {
+ if (commandRunning) {
+ if (!commandEndRequested) {
+ commandEndRequested=1;
+ hts_request_stop(0);
+ } else {
+ hts_request_stop(1); /* note: the force flag does not have anyeffect yet */
+ commandEndRequested=2; /* will break the loop() callback */
+ }
+ }
+ } else if ((p=strfield((char*)adr, "cancel-file="))) {
+ if (commandRunning) {
+ hts_cancel_file((char*)adr + p);
+ }
+ } else if (strcmp((char*)adr, "cancel-parsing") == 0) {
+ if (commandRunning) {
+ hts_cancel_parsing();
+ }
+ } else if ((p=strfield((char*)adr, "pause="))) {
+ if (commandRunning) {
+ hts_setpause(1);
+ }
+ } else if ((p=strfield((char*)adr, "unpause"))) {
+ if (commandRunning) {
+ hts_setpause(0);
+ }
+ } else if (strcmp((char*)adr, "abort") == 0) {
+ if (commandRunning) {
+ hts_request_stop(1);
+ commandEndRequested=2; /* will break the loop() callback */
+ }
+ } else if ((p=strfield((char*)adr, "add-url="))) {
+ if (commandRunning) {
+ char* ptraddr[2];
+ ptraddr[0] = (char*)adr + p;
+ ptraddr[1] = NULL;
+ hts_addurl(ptraddr);
+ }
+ } else if ((p=strfield((char*)adr, "httrack"))) {
+ if (!commandRunning) {
+ unsigned long int adrcd = 0;
+ if (inthash_readptr(NewLangList, "command_do", (long int *)&adrcd)) {
+ unsigned long int adrw = 0, adrpath = 0, adrprojname = 0;
+ if (inthash_readptr(NewLangList, "winprofile", (long int *)&adrw)) {
+ StringClear(&tmpbuff);
+ StringStrcat(&tmpbuff, StringBuff(&fspath));
+ StringStrcat(&tmpbuff, "/hts-cache/");
+
+ /* Create minimal directory structure */
+ if (!structcheck(StringBuff(&tmpbuff))) {
+ FILE* fp;
+ StringStrcat(&tmpbuff, "winprofile.ini");
+ fp = fopen(StringBuff(&tmpbuff), "wb");
+ if (fp != NULL) {
+ int count = (int) strlen((char*)adrw);
+ if ((int)fwrite((void*)adrw, 1, count, fp) == count) {
+
+ /* Wipe the doit.log file, useless here (all options are replicated) and
+ even a bit annoying (duplicate/ghost options)
+ The behaviour is exactly the same as in WinHTTrack
+ */
+ StringClear(&tmpbuff);
+ StringStrcat(&tmpbuff, StringBuff(&fspath));
+ StringStrcat(&tmpbuff, "/hts-cache/doit.log");
+ remove(StringBuff(&tmpbuff));
+
+ /*
+ RUN THE SERVER
+ */
+ if (strcmp((char*)adrcd, "start") == 0) {
+ webhttrack_main((char*)adr + p);
+ } else {
+ commandRunning = 0;
+ commandEnd = 1;
+ }
+ } else {
+ char tmp[1024];
+ sprintf(tmp, "Unable to write %d bytes in the the init file %s", count, StringBuff(&fspath));
+ SET_ERROR(tmp);
+ }
+ fclose(fp);
+ } else {
+ char tmp[1024];
+ sprintf(tmp, "Unable to create the init file %s", StringBuff(&fspath));
+ SET_ERROR(tmp);
+ }
+ } else {
+ char tmp[1024];
+ sprintf(tmp, "Unable to create the directory structure in %s", StringBuff(&fspath));
+ SET_ERROR(tmp);
+ }
+
+ } else {
+ SET_ERROR("Internal server error: unable to fetch project name or path");
+ }
+ }
+ }
+ } else if (strcmp((char*)adr, "quit") == 0) {
+ willexit=1;
+ }
+ inthash_write(NewLangList, "command", (unsigned long int)NULL);
+ }
+ }
+
+ /* Response */
+ if (meth) {
+ int virtualpath = 0;
+ char* pos;
+ char* url = strchr(line1, ' ');
+ if (url && *++url == '/' && (pos = strchr(url, ' ')) && !(*pos = '\0') ) {
+ char fsfile[1024];
+ char* file;
+ FILE* fp;
+ char* qpos;
+
+ /* get the URL */
+ if (error_redirect == NULL) {
+ if ( (qpos = strchr(url, '?')) ) {
+ *qpos = '\0';
+ }
+ fsfile[0] = '\0';
+ if (strcmp(url, "/") == 0) {
+ file = "/server/index.html";
+ meth = 2;
+ } else {
+ file = url;
+ }
+ } else {
+ file = error_redirect;
+ meth = 2;
+ }
+
+ if (strncmp(file, "/website/", 9) == 0) {
+ virtualpath = 1;
+ }
+
+ if (commandRunning) {
+ if (!is_image(file)) {
+ file = "/server/refresh.html";
+ }
+ } else if (commandEnd && !virtualpath && !willexit) {
+ if (!is_image(file)) {
+ file = "/server/finished.html";
+ }
+ }
+
+ if (strlen(path) + strlen(file) + 32 < sizeof(fsfile)) {
+ if (strncmp(file, "/website/", 9) != 0) {
+ sprintf(fsfile, "%shtml%s", path, file);
+ } else {
+ unsigned long int adr = 0;
+ if (inthash_readptr(NewLangList, "projpath", (long int *)&adr)) {
+ sprintf(fsfile, "%s%s", (char*)adr, file + 9);
+ }
+ }
+ }
+
+ if (fsfile[0] && strstr(file, "..") == NULL && (fp = fopen(fsfile, "rb"))) {
+ char ok[] = "HTTP/1.0 200 OK\r\n"
+ "Connection: close\r\n"
+ "Server: httrack-small-server\r\n"
+ "Content-type: text/html\r\n"
+ "Cache-Control: no-cache, must-revalidate, private\r\n"
+ "Pragma: no-cache\r\n"
+ ;
+ char ok_img[] = "HTTP/1.0 200 OK\r\n"
+ "Connection: close\r\n"
+ "Server: httrack small server\r\n"
+ "Content-type: image/gif\r\n"
+ ;
+ char ok_text[] = "HTTP/1.0 200 OK\r\n"
+ "Connection: close\r\n"
+ "Server: httrack small server\r\n"
+ "Content-type: text/plain\r\n"
+ ;
+
+ /* register current page */
+ inthash_write(NewLangList, "thisfile", (unsigned long int)strdup(file));
+
+ /* Force GET for the last request */
+ if (meth == 2 && willexit) {
+ meth = 1;
+ }
+
+ /* posted data are redirected to get protocol */
+ if (meth == 2) {
+ char redir[] = "HTTP/1.0 302 Redirect\r\n"
+ "Connection: close\r\n"
+ "Server: httrack-small-server\r\n";
+ unsigned long int adr = 0;
+ char* newfile = file;
+ if (inthash_readptr(NewLangList, "redirect", (long int *)&adr) && adr != 0) {
+ char* newadr = (char*)adr;
+ if (*newadr) {
+ newfile = newadr;
+ }
+ }
+ StringMemcat(&headers, redir, strlen(redir));
+ {
+ char tmp[256];
+ if (strlen(file) < sizeof(tmp) - 32) {
+ sprintf(tmp, "Location: %s\r\n", newfile);
+ StringMemcat(&headers, tmp, strlen(tmp));
+ }
+ }
+ inthash_write(NewLangList, "redirect", (unsigned long int)NULL);
+ }
+ else if (is_html(file)) {
+ int outputmode = 0;
+ StringMemcat(&headers, ok, sizeof(ok) - 1);
+ while(!feof(fp)) {
+ char* str = line;
+ int prevlen = StringLength(&output);
+ int nocr = 0;
+ if (!linput(fp, line, sizeof(line) - 2)) {
+ *str = '\0';
+ }
+ if (*str && str[strlen(str) - 1] == '\\') {
+ nocr = 1;
+ str[strlen(str) - 1] = '\0';
+ }
+ while(*str) {
+ char* pos;
+ int n;
+ if (*str == '$' && *++str == '{' && (pos = strchr(++str, '}')) && (n = (pos - str) ) && n < 1024 ) {
+ char name_[1024 + 2];
+ char* name = name_;
+ char* langstr = NULL;
+ int p;
+ int format = 0;
+ int listDefault = 0;
+ name[0] = '\0';
+ strncatbuff(name, str, n);
+ if (strncmp(name, "/*", 2) == 0) {
+ /* comments */
+ }
+ else if (( p = strfield(name, "html:"))) {
+ name += p;
+ format = 1;
+ }
+ else if (( p = strfield(name, "list:"))) {
+ name += p;
+ format = 2;
+ }
+ else if (( p = strfield(name, "liststr:"))) {
+ name += p;
+ format = -2;
+ }
+ else if (( p = strfield(name, "file-exists:"))) {
+ char* pos2;
+ name += p;
+ format = 0;
+ pos2 = strchr(name, ':');
+ langstr = "";
+ if (pos2 != NULL) {
+ *pos2 = '\0';
+ if (strstr(name, "..") == NULL) {
+ if (fexist(fconcat(path, name))) {
+ langstr = pos2 + 1;
+ }
+ }
+ }
+ }
+ else if (( p = strfield(name, "do:"))) {
+ char* pos2;
+ name += p;
+ format = 1;
+ pos2 = strchr(name, ':');
+ langstr = "";
+ if (pos2 != NULL) {
+ *pos2 = '\0';
+ pos2++;
+ } else {
+ pos2="";
+ }
+ if (strcmp(name, "output-mode") == 0) {
+ if (strcmp(pos2, "html") == 0) {
+ outputmode = 1;
+ } else if (strcmp(pos2, "inifile") == 0) {
+ outputmode = 2;
+ } else if (strcmp(pos2, "html-urlescaped") == 0) {
+ outputmode = 3;
+ } else {
+ outputmode = 0;
+ }
+ } else if (strcmp(name, "if-file-exists") == 0) {
+ if (strstr(pos2, "..") == NULL) {
+ if (!fexist(fconcat(path, pos2))) {
+ outputmode = -1;
+ }
+ }
+ } else if (strcmp(name, "if-project-file-exists") == 0) {
+ if (strstr(pos2, "..") == NULL) {
+ if (!fexist(fconcat(StringBuff(&fspath), pos2))) {
+ outputmode = -1;
+ }
+ }
+ } else if (strcmp(name, "if-file-do-not-exists") == 0) {
+ if (strstr(pos2, "..") == NULL) {
+ if (fexist(fconcat(path, pos2))) {
+ outputmode = -1;
+ }
+ }
+ } else if (strcmp(name, "if-not-empty") == 0) {
+ unsigned long int adr = 0;
+ if (!inthash_readptr(NewLangList, pos2, (long int *)&adr) || *((char*)adr) == 0 ) {
+ outputmode = -1;
+ }
+ } else if (strcmp(name, "if-empty") == 0) {
+ unsigned long int adr = 0;
+ if (inthash_readptr(NewLangList, pos2, (long int *)&adr) && *((char*)adr) != 0 ) {
+ outputmode = -1;
+ }
+ } else if (strcmp(name, "end-if") == 0) {
+ outputmode = 0;
+ } else if (strcmp(name, "loadhash") == 0) {
+ unsigned long int adr = 0;
+ if (inthash_readptr(NewLangList, "path", (long int *)&adr)) {
+ char* rpath = (char*) adr;
+ find_handle h;
+ if (rpath[0]) {
+ if (rpath[strlen(rpath)-1]=='/') {
+ rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */
+ }
+ }
+ h = hts_findfirst(rpath);
+ if (h) {
+ struct topindex_chain * chain=NULL;
+ struct topindex_chain * startchain=NULL;
+ StringClear(&tmpbuff);
+ do {
+ if (hts_findisdir(h)) {
+ char iname[HTS_URLMAXSIZE*2];
+ strcpybuff(iname,rpath);
+ strcatbuff(iname,"/");
+ strcatbuff(iname,hts_findgetname(h));
+ strcatbuff(iname,"/hts-cache/winprofile.ini");
+ if (fexist(iname)) {
+ if (StringLength(&tmpbuff) > 0) {
+ StringStrcat(&tmpbuff, "\r\n");
+ }
+ StringStrcat(&tmpbuff, hts_findgetname(h));
+ }
+
+ }
+ } while(hts_findnext(h));
+ hts_findclose(h);
+ inthash_write(NewLangList, "winprofile", (unsigned long int)StringAcquire(&tmpbuff));
+ }
+ }
+ } else if (strcmp(name, "copy") == 0) {
+ if (*pos2) {
+ char* pos3 = strchr(pos2, ':');
+ if ( pos3 && *(pos3 + 1) ) {
+ unsigned long int adr = 0;
+ *pos3++ = '\0';
+ if (inthash_readptr(NewLangList, pos2, (long int *)&adr)) {
+ inthash_write(NewLangList, pos3, (unsigned long int)strdup((char*)adr));
+ inthash_write(NewLangList, pos2, (unsigned long int)NULL);
+ }
+ }
+ }
+ } else if (strcmp(name, "set") == 0) {
+ if (*pos2) {
+ char* pos3 = strchr(pos2, ':');
+ if ( pos3 ) {
+ *pos3++ = '\0';
+ inthash_write(NewLangList, pos2, (unsigned long int)strdup(pos3));
+ } else {
+ inthash_write(NewLangList, pos2, (unsigned long int)NULL);
+ }
+ }
+ }
+ }
+ /*
+ test:<if exist>
+ test:<if ==0>:<if ==1>:<if == 2>..
+ ztest:<if == 0 || !exist>:<if == 1>:<if == 2>..
+ */
+ else if ( ( p = strfield(name, "test:")) || ( p = strfield(name, "ztest:")) ) {
+ unsigned long int adr = 0;
+ char* pos2;
+ int ztest = (name[0] == 'z');
+ langstr = "";
+ name += p;
+ pos2 = strchr(name, ':');
+ if (pos2 != NULL) {
+ *pos2 = '\0';
+ if (inthash_readptr(NewLangList, name, (long int *)&adr) || ztest) {
+ char* newadr = (char*)adr;
+ if (!newadr)
+ newadr = "";
+ if (*newadr || ztest) {
+ int npos = 0;
+ name = pos2 + 1;
+ format = 4;
+ if (strchr(name, ':') == NULL) {
+ npos = 0; /* first is good if only one : */
+ format = 0;
+ } else {
+ if (sscanf(newadr, "%d", &npos) != 1) {
+ if (strfield(newadr, "on")) {
+ npos = 1;
+ } else {
+ npos = 0; /* first one will be ok */
+ format = 0;
+ }
+ }
+ }
+ while( *name && *name != '}' && npos >= 0) {
+ int end=0;
+ char* fpos = strchr(name, ':');
+ int n2;
+ if (fpos == NULL) {
+ fpos = name + strlen(name);
+ end=1;
+ }
+ n2 = (int) (fpos - name);
+ if (npos == 0) {
+ langstr = name;
+ *fpos='\0';
+ } else if (end) {
+ npos=0;
+ }
+ name += n2 + 1;
+ npos--;
+ }
+ }
+ }
+ }
+ }
+ else if (( p = strfield(name, "listid:"))) {
+ char* pos2;
+ name += p;
+ format = 2;
+ pos2 = strchr(name, ':');
+ if (pos2) {
+ char dname[32];
+ int n2 = (int) (pos2 - name);
+ if (n2 > 0 && n2 < sizeof(dname) - 2) {
+ unsigned long int adr = 0;
+ dname[0] = '\0';
+ strncatbuff(dname, name, n2);
+ if (inthash_readptr(NewLangList, dname, (long int *)&adr)) {
+ int n = 0;
+ if (sscanf((char*)adr, "%d", &n) == 1) {
+ listDefault = n;
+ }
+ }
+ name += n2 + 1;
+ }
+ }
+ }
+ else if (( p = strfield(name, "checked:"))) {
+ name += p;
+ format = 3;
+ }
+ if (langstr == NULL) {
+ if (strfield2(name, "#iso")) {
+ langstr = line2;
+ langstr[0] = '\0';
+ LANG_LIST(path, langstr);
+ assertf(strlen(langstr) < sizeof(line2) - 2);
+ } else {
+ langstr = LANGSEL(name);
+ if (langstr == NULL || *langstr == '\0') {
+ unsigned long int adr = 0;
+ if (inthash_readptr(NewLangList, name, (long int *)&adr)) {
+ char* newadr = (char*)adr;
+ langstr = newadr;
+ }
+ }
+ }
+ }
+ if (langstr && outputmode != -1) {
+ switch(format) {
+ case 0:
+ {
+ char* a = langstr;
+ while(*a) {
+ if (a[0] == '\\' && isxdigit(a[1]) && isxdigit(a[2])) {
+ int n;
+ char c;
+ if (sscanf(a+1, "%x", &n) == 1) {
+ c = (char)n;
+ StringMemcat(&output, &c, 1);
+ }
+ a += 2;
+ } else if (outputmode && a[0] == '<') {
+ StringStrcat(&output, "&lt;");
+ } else if (outputmode && a[0] == '>') {
+ StringStrcat(&output, "&gt;");
+ } else if (outputmode && a[0] == '&') {
+ StringStrcat(&output, "&amp;");
+ } else if (outputmode == 3 && a[0] == ' ') {
+ StringStrcat(&output, "%20");
+ } else if (outputmode >= 2 && ((unsigned char)a[0]) < 32) {
+ char tmp[32];
+ sprintf(tmp, "%%%02x", (unsigned char)a[0]);
+ StringStrcat(&output, tmp);
+ } else if (outputmode == 2 && a[0] == '%') {
+ StringStrcat(&output, "%%");
+ } else if (outputmode == 3 && a[0] == '%') {
+ StringStrcat(&output, "%25");
+ } else {
+ StringMemcat(&output, a, 1);
+ }
+ a++;
+ }
+ }
+ break;
+ case 3:
+ if (*langstr) {
+ StringStrcat(&output, "checked");
+ }
+ break;
+ default:
+ if (*langstr) {
+ int id=1;
+ char* fstr = langstr;
+ StringClear(&tmpbuff);
+ if (format == 2) {
+ StringStrcat(&output, "<option value=1>");
+ } else if (format == -2) {
+ StringStrcat(&output, "<option value=\"");
+ }
+ while(*fstr) {
+ switch(*fstr) {
+ case 13: break;
+ case 10:
+ if (format == 1) {
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "<br>\r\n");
+ } else if (format == -2) {
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "\">");
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "</option>\r\n");
+ StringStrcat(&output, "<option value=\"");
+ } else {
+ char tmp[32];
+ sprintf(tmp, "%d", ++id);
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "</option>\r\n");
+ StringStrcat(&output, "<option value=");
+ StringStrcat(&output, tmp);
+ if (listDefault == id) {
+ StringStrcat(&output, " selected");
+ }
+ StringStrcat(&output, ">");
+ }
+ StringClear(&tmpbuff);
+ break;
+ case '<':
+ StringStrcat(&tmpbuff, "&lt;");
+ break;
+ case '>':
+ StringStrcat(&tmpbuff, "&gt;");
+ break;
+ case '&':
+ StringStrcat(&tmpbuff, "&amp;");
+ break;
+ default:
+ StringMemcat(&tmpbuff, fstr, 1);
+ break;
+ }
+ fstr++;
+ }
+ if (format == 2) {
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "</option>");
+ } else if (format == -2) {
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "\">");
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ StringStrcat(&output, "</option>");
+ } else {
+ StringStrcat(&output, StringBuff(&tmpbuff));
+ }
+ StringClear(&tmpbuff);
+ }
+ }
+ }
+ str = pos;
+ } else {
+ if (outputmode != -1) {
+ StringMemcat(&output, str, 1);
+ }
+ }
+ str++;
+ }
+ if (!nocr && prevlen != StringLength(&output)) {
+ StringStrcat(&output, "\r\n");
+ }
+ }
+#ifdef _DEBUG
+ {
+ int len = (int)strlen((char*)StringBuff(&output));
+ assert(len == (int)StringLength(&output));
+ }
+#endif
+ } else if (is_text(file)) {
+ StringMemcat(&headers, ok_text, sizeof(ok_text) - 1);
+ while(!feof(fp)) {
+ int n = fread(line, 1, sizeof(line) - 2, fp);
+ if (n > 0) {
+ StringMemcat(&output, line, n);
+ }
+ }
+ } else {
+ StringMemcat(&headers, ok_img, sizeof(ok_img) - 1);
+ while(!feof(fp)) {
+ int n = fread(line, 1, sizeof(line) - 2, fp);
+ if (n > 0) {
+ StringMemcat(&output, line, n);
+ }
+ }
+ }
+ fclose(fp);
+ } else {
+ char error_hdr[] = "HTTP/1.0 404 Not Found\r\n"
+ "Server: httrack small server\r\n"
+ "Content-type: text/html\r\n";
+ char error[] =
+ "Page not found.\r\n";
+ StringStrcat(&headers, error_hdr);
+ StringStrcat(&output, error);
+ //assert(file == NULL);
+ }
+ }
+ } else {
+#ifdef _DEBUG
+ char error_hdr[] = "HTTP/1.0 500 Server Error\r\n"
+ "Server: httrack small server\r\n"
+ "Content-type: text/html\r\n";
+ char error[] =
+ "Server error.\r\n";
+ StringStrcat(&headers, error_hdr);
+ StringStrcat(&output, error);
+#endif
+ }
+ {
+ char tmp[256];
+ sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(&output));
+ StringStrcat(&headers, tmp);
+ }
+ StringStrcat(&headers, "\r\n");
+ if (
+ (send(soc_c, StringBuff(&headers), StringLength(&headers), 0) != StringLength(&headers))
+ ||
+ ( (meth == 1) && (send(soc_c, StringBuff(&output), StringLength(&output), 0) != StringLength(&output)) )
+ ) {
+#ifdef _DEBUG
+ //assert(FALSE);
+#endif
+ }
+ } else {
+#ifdef _DEBUG
+ // assert(FALSE);
+#endif
+ }
+
+ /* Shutdown (FIN) and wait until confirmed */
+ {
+ char c;
+#ifdef _WIN32
+ shutdown(soc_c, SD_SEND);
+#else
+ shutdown(soc_c, 1);
+#endif
+ /* This is necessary as IE sometimes (!) sends an additional CRLF after POST data */
+ while(recv(soc_c, ((char*)&c), 1, 0) > 0);
+ }
+
+#if HTS_WIN
+ closesocket(soc_c);
+#else
+ close(soc_c);
+#endif
+ }
+
+ if (soc != INVALID_SOCKET) {
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ }
+
+ StringFree(&headers);
+ StringFree(&output);
+ StringFree(&tmpbuff);
+ StringFree(&fspath);
+
+ if (buffer)
+ free(buffer);
+
+ if (commandReturnMsg)
+ free(commandReturnMsg);
+ commandReturnMsg = NULL;
+ if (commandReturnCmdl)
+ free(commandReturnCmdl);
+ commandReturnCmdl = NULL;
+
+ /* Unlock */
+ webhttrack_lock(0);
+
+ return retour;
+}
+
+
+
+/* Language files */
+
+
+int htslang_init() {
+ if (NewLangList==NULL) {
+ int i = 0;
+ NewLangList=inthash_new(NewLangListSz);
+ if (NewLangList==NULL) {
+ abortLog("Error in lang.h: not enough memory");
+ } else {
+ inthash_value_is_malloc(NewLangList,1);
+ }
+ }
+ return 1;
+}
+
+int htslang_uninit() {
+ if (NewLangList!=NULL) {
+ inthash_delete(&NewLangList);
+ }
+ return 1;
+}
+
+int htslang_load(char* limit_to, char* path) {
+ char* hashname;
+ //
+ int selected_lang=LANG_T(path, -1);
+ //
+ if (!limit_to) {
+ LANG_DELETE();
+ NewLangStr=inthash_new(NewLangStrSz);
+ NewLangStrKeys=inthash_new(NewLangStrKeysSz);
+ if ((NewLangStr==NULL) || (NewLangStrKeys==NULL)) {
+ abortLog("Error in lang.h: not enough memory");
+ } else {
+ inthash_value_is_malloc(NewLangStr,1);
+ inthash_value_is_malloc(NewLangStrKeys,1);
+ }
+ }
+
+ /* Load master file (list of keys and internal keys) */
+ if (!limit_to) {
+ char* mname = "lang.def";
+ FILE* fp=fopen(fconcat(path, mname),"rb");
+ if (fp) {
+ char intkey[8192];
+ char key[8192];
+ while(!feof(fp)) {
+ linput_cpp(fp,intkey,8000);
+ linput_cpp(fp,key,8000);
+ if (strnotempty(intkey) && strnotempty(key)) {
+ char* test=LANGINTKEY(key);
+
+ /* Increment for multiple definitions */
+ if (strnotempty(test)) {
+ int increment=0;
+ int pos=strlen(key);
+ do {
+ increment++;
+ sprintf(key+pos,"%d",increment);
+ test=LANGINTKEY(key);
+ } while (strnotempty(test));
+ }
+
+ if (!strnotempty(test)) { // éviter doublons
+ // conv_printf(key,key);
+ int len;
+ char* buff;
+ len=strlen(intkey);
+ buff=(char*)malloc(len+2);
+ if (buff) {
+ strcpybuff(buff,intkey);
+ inthash_add(NewLangStrKeys,key,(long int)(char*)buff);
+ }
+ }
+ } // if
+ } // while
+ fclose(fp);
+ } else {
+ return 0;
+ }
+ }
+
+ /* Language Name? */
+ {
+ char name[256];
+ sprintf(name,"LANGUAGE_%d",selected_lang+1);
+ hashname=LANGINTKEY(name);
+ }
+
+ /* Get only language name */
+ if (limit_to) {
+ if (hashname)
+ strcpybuff(limit_to, hashname);
+ else
+ strcpybuff(limit_to, "???");
+ return 0;
+ }
+
+ /* Error */
+ if (!hashname)
+ return 0;
+
+ /* Load specific language file */
+ {
+ int loops;
+ // 2nd loop: load undefined strings
+ for(loops=0;loops<2;loops++) {
+ FILE* fp;
+ char lbasename[1024];
+ {
+ char name[256];
+ sprintf(name,"LANGUAGE_%d",(loops==0)?(selected_lang+1):1);
+ hashname=LANGINTKEY(name);
+ }
+ sprintf(lbasename, "lang/%s.txt",hashname);
+ fp=fopen(fconcat(path, lbasename), "rb");
+ if (fp) {
+ char extkey[8192];
+ char value[8192];
+ while(!feof(fp)) {
+ linput_cpp(fp,extkey,8000);
+ linput_cpp(fp,value,8000);
+ if (strnotempty(extkey) && strnotempty(value)) {
+ int len;
+ char* buff;
+ char* intkey;
+
+ intkey=LANGINTKEY(extkey);
+
+ if (strnotempty(intkey)) {
+
+ /* Increment for multiple definitions */
+ {
+ char* test=LANGSEL(intkey);
+ if (strnotempty(test)) {
+ if (loops == 0) {
+ int increment=0;
+ int pos=strlen(extkey);
+ do {
+ increment++;
+ sprintf(extkey+pos,"%d",increment);
+ intkey=LANGINTKEY(extkey);
+ if (strnotempty(intkey))
+ test=LANGSEL(intkey);
+ else
+ test="";
+ } while (strnotempty(test));
+ } else
+ intkey="";
+ } else {
+ if (loops > 0) {
+ //err_msg += intkey;
+ //err_msg += " ";
+ }
+ }
+ }
+
+ /* Add key */
+ if (strnotempty(intkey)) {
+ len=strlen(value);
+ buff=(char*)malloc(len+2);
+ if (buff) {
+ conv_printf(value,buff);
+ inthash_add(NewLangStr,intkey,(long int)(char*)buff);
+ }
+ }
+
+ }
+ } // if
+ } // while
+ fclose(fp);
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ // Control limit_to
+ if (limit_to)
+ limit_to[0]='\0';
+
+ return 1;
+}
+
+/* NOTE : also contains the "webhttrack" hack */
+void conv_printf(char* from,char* to) {
+ int i=0,j=0,len;
+ len=strlen(from);
+ while(i<len) {
+ switch(from[i]) {
+ case '\\':
+ i++;
+ switch(from[i]) {
+ case 'a': to[j]='\a'; break;
+ case 'b': to[j]='\b'; break;
+ case 'f': to[j]='\f'; break;
+ case 'n': to[j]='\n'; break;
+ case 'r': to[j]='\r'; break;
+ case 't': to[j]='\t'; break;
+ case 'v': to[j]='\v'; break;
+ case '\'': to[j]='\''; break;
+ case '\"': to[j]='\"'; break;
+ case '\\': to[j]='\\'; break;
+ case '?': to[j]='\?'; break;
+ default: to[j]=from[i]; break;
+ }
+ break;
+ default:
+ to[j]=from[i];
+ break;
+ }
+ i++;
+ j++;
+ }
+ to[j++]='\0';
+ /* Dirty hack */
+ {
+ char * a = to;
+ while((a = strstr(a, "WinHTTrack"))) {
+ a[0] = 'W';
+ a[1] = 'e';
+ a[2] = 'b';
+ a++;
+ }
+ }
+}
+
+void LANG_DELETE() {
+ inthash_delete(&NewLangStr);
+ inthash_delete(&NewLangStrKeys);
+}
+
+// sélection de la langue
+void LANG_INIT(char* path) {
+ //CWinApp* pApp = AfxGetApp();
+ //if (pApp) {
+ int test = 0; /* pApp->GetProfileInt("Language","IntId",0); */
+ LANG_T(path, 0 /*pApp->GetProfileInt("Language","IntId",0)*/ );
+ //}
+}
+
+int LANG_T(char* path, int l) {
+ if (l>=0) {
+ QLANG_T(l);
+ htslang_load(NULL, path);
+ }
+ return QLANG_T(-1); // 0=default (english)
+}
+
+int LANG_SEARCH(char* path, char* iso) {
+ char lang_str[1024];
+ int i = 0;
+ int curr_lng=LANG_T(path, -1);
+ int found = 0;
+ unsigned long int adr = 0;
+ do {
+ QLANG_T(i);
+ strcpybuff(lang_str,"LANGUAGE_ISO");
+ htslang_load(lang_str, path);
+ if (strfield(iso, lang_str)) {
+ found = i;
+ }
+ i++;
+ } while(strlen(lang_str) > 0);
+ QLANG_T(curr_lng);
+ return found;
+}
+
+int LANG_LIST(char* path, char* buffer) {
+ char lang_str[1024];
+ int i = 0;
+ int curr_lng=LANG_T(path, -1);
+ int found = 0;
+ buffer[0] = '\0';
+ do {
+ QLANG_T(i);
+ strcpybuff(lang_str, "LANGUAGE_NAME");
+ htslang_load(lang_str, path);
+ if (strlen(lang_str) > 0) {
+ if (buffer[0])
+ strcatbuff(buffer, "\n");
+ strcatbuff(buffer, lang_str);
+ }
+ i++;
+ } while(strlen(lang_str) > 0);
+ QLANG_T(curr_lng);
+ return i;
+}
+
+int QLANG_T(int l) {
+ static int lng=0;
+ if (l>=0) {
+ lng=l;
+ }
+ return lng; // 0=default (english)
+}
+
+char* LANGSEL(char* name) {
+ unsigned long int adr = 0;
+ if (NewLangStr)
+ if (!inthash_read(NewLangStr,name,(long int *)&adr))
+ adr=0;
+ if (adr) {
+ return (char*)adr;
+ }
+ return "";
+}
+
+char* LANGINTKEY(char* name) {
+ unsigned long int adr=0;
+ if (NewLangStrKeys)
+ if (!inthash_read(NewLangStrKeys,name,(long int *)&adr))
+ adr=0;
+ if (adr) {
+ return (char*)adr;
+ }
+ return "";
+}
+
+char* gethomedir(void) {
+ char* home = getenv( "HOME" );
+ if (home)
+ return home;
+ else
+ return ".";
+}
+
+int linput_cpp(FILE* fp,char* s,int max) {
+ int rlen=0;
+ s[0]='\0';
+ do {
+ int ret;
+ if (rlen>0)
+ if (s[rlen-1]=='\\')
+ s[--rlen]='\0'; // couper \ final
+ // lire ligne
+ ret=linput_trim(fp,s+rlen,max-rlen);
+ if (ret>0)
+ rlen+=ret;
+ } while((s[max(rlen-1,0)]=='\\') && (rlen<max));
+ return rlen;
+}
+
+// copy of concat
+typedef struct {
+ char buff[16][HTS_URLMAXSIZE*2*2];
+ int rol;
+} concat_strc;
+char* concat(const char* a,const char* b) {
+ static concat_strc* strc = NULL;
+ if (strc == NULL) {
+ strc = (concat_strc*) calloc(16, sizeof(concat_strc));
+ }
+ strc->rol=((strc->rol+1)%16); // roving pointer
+ strcpybuff(strc->buff[strc->rol],a);
+ if (b) strcatbuff(strc->buff[strc->rol],b);
+ return strc->buff[strc->rol];
+}
+#ifdef _WIN32
+char* __fconv(char* a) {
+ int i;
+ for(i=0;i<(int) strlen(a);i++)
+ if (a[i]=='/') // convertir
+ a[i]='\\';
+ return a;
+}
+char* fconcat(char* a,char* b) {
+ return __fconv(concat(a,b));
+}
+char* fconv(char* a) {
+ return __fconv(concat(a,""));
+}
+#endif
+
+/* *** Various functions *** */
+
+
+int fexist(char* s) {
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ if (stat(s, &st) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int linput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 0: case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+
+int linput_trim(FILE* fp,char* s,int max) {
+ int rlen=0;
+ char* ls=(char*) malloct(max+2);
+ s[0]='\0';
+ if (ls) {
+ char* a;
+ // lire ligne
+ rlen=linput(fp,ls,max);
+ if (rlen) {
+ // sauter espaces et tabs en fin
+ while( (rlen>0) && is_realspace(ls[max(rlen-1,0)]) )
+ ls[--rlen]='\0';
+ // sauter espaces en début
+ a=ls;
+ while((rlen>0) && ((*a==' ') || (*a=='\t'))) {
+ a++;
+ rlen--;
+ }
+ if (rlen>0) {
+ memcpy(s,a,rlen); // can copy \0 chars
+ s[rlen]='\0';
+ }
+ }
+ //
+ freet(ls);
+ }
+ return rlen;
+}
+
+int linputsoc(T_SOC soc, char* s, int max) {
+ int c;
+ int j=0;
+ do {
+ unsigned char ch;
+ if (recv(soc, &ch, 1, 0) == 1) {
+ c = ch;
+ } else {
+ c = EOF;
+ }
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+
+int linputsoc_t(T_SOC soc, char* s, int max, int timeout) {
+ if (check_readinput_t(soc, timeout)) {
+ return linputsoc(soc, s, max);
+ }
+ return -1;
+}
+
+// check if data is available
+int check_readinput(htsblk* r) {
+ if (r->soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(r->soc,&fds);
+ tv.tv_sec=0;
+ tv.tv_usec=0;
+ select(r->soc + 1,&fds,NULL,NULL,&tv);
+ if (FD_ISSET(r->soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
+}
+
+// check if data is available
+int check_readinput_t(T_SOC soc, int timeout) {
+ if (soc != INVALID_SOCKET) {
+ fd_set fds; // poll structures
+ struct timeval tv; // structure for select
+ FD_ZERO(&fds);
+ FD_SET(soc,&fds);
+ tv.tv_sec=timeout;
+ tv.tv_usec=0;
+ select(soc + 1,&fds,NULL,NULL,&tv);
+ if (FD_ISSET(soc,&fds))
+ return 1;
+ else
+ return 0;
+ } else
+ return 0;
+}
+
+int strfield(const char* f,const char* s) {
+ int r=0;
+ while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; }
+ if (*s==0)
+ return r;
+ else
+ return 0;
+}
+
+int ehexh(char c) {
+ if ((c>='0') && (c<='9')) return c-'0';
+ if ((c>='a') && (c<='f')) c-=('a'-'A');
+ if ((c>='A') && (c<='F')) return (c-'A'+10);
+ return 0;
+}
+
+int ehex(char* s) {
+ return 16*ehexh(*s)+ehexh(*(s+1));
+}
+
+void unescapehttp(char* s, String* tempo) {
+ int i;
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%' && s[i+1]=='%') {
+ i++;
+ StringAddchar(tempo, '%');
+ } else if (s[i]=='%') {
+ char hc;
+ i++;
+ hc = (char) ehex(s+i);
+ StringAddchar(tempo, (char) hc);
+ i++; // sauter 2 caractères finalement
+ }
+ else if (s[i]=='+') {
+ StringAddchar(tempo, ' ');
+ }
+ else
+ StringAddchar(tempo, s[i]);
+ }
+}
+
+/* same, except + */
+void unescapeini(char* s, String* tempo) {
+ int i;
+ char lastc=0;
+ for (i=0;i<(int) strlen(s);i++) {
+ if (s[i]=='%' && s[i+1]=='%') {
+ i++;
+ StringAddchar(tempo, lastc = '%');
+ } else if (s[i]=='%') {
+ char hc;
+ i++;
+ hc = (char) ehex(s+i);
+ if (!is_retorsep(hc) || !is_retorsep(lastc)) {
+ StringAddchar(tempo, lastc = (char) hc);
+ }
+ i++; // sauter 2 caractères finalement
+ }
+ else
+ StringAddchar(tempo, lastc = s[i]);
+ }
+}
+
diff --git a/src/htsserver.h b/src/htsserver.h
new file mode 100644
index 0000000..2818b34
--- /dev/null
+++ b/src/htsserver.h
@@ -0,0 +1,149 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: Mini-server */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+// Fichier intercepteur d'URL .h
+
+#ifndef HTS_SERVER_DEFH
+#define HTS_SERVER_DEFH
+
+#include "htsbasenet.h"
+
+// Fonctions
+void socinput(T_SOC soc,char* s,int max);
+T_SOC smallserver_init_std(int* port_prox,char* adr_prox);
+T_SOC smallserver_init(int* port,char* adr);
+int smallserver(T_SOC soc,char* url,char* method,char* data, char* path);
+
+#define CATCH_RESPONSE \
+ "HTTP/1.0 200 OK\r\n"\
+ "Content-type: text/html\r\n"\
+ "\r\n"\
+ "<!-- Generated by HTTrack Website Copier -->\r\n"\
+ "<HTML><HEAD>\r\n"\
+ "<TITLE>Link caught!</TITLE>\r\n"\
+ "<SCRIPT LANGUAGE=\"Javascript\">\r\n"\
+ "<!--\r\n"\
+ "function back() {\r\n"\
+ " history.go(-1);\r\n"\
+ "}\r\n"\
+ "// -->\r\n"\
+ "</SCRIPT>\r\n"\
+ "</HEAD>\r\n"\
+ "<BODY>\r\n"\
+ "<H2>Link captured into HTTrack Website Copier, you can now restore your proxy preferences!</H2>\r\n"\
+ "<BR><BR>\r\n"\
+ "<H3><A HREF=\"javascript:back();\">Clic here to go back</A></H3>\r\n"\
+ "</BODY></HTML>"\
+ "<!-- Generated by HTTrack Website Copier -->\r\n"\
+ "\r\n"\
+
+
+/* String */
+
+typedef struct {
+ char* buff;
+ int len;
+ int capa;
+} String;
+
+#define STRING_EMPTY {NULL, 0, 0}
+#define BLK_SIZE 8192
+#define StringBuff(blk) ((blk)->buff)
+#define StringLength(blk) ((blk)->len)
+#define StringCapacity(blk) ((blk)->capa)
+#define StringClear(blk) do { \
+ if ((blk)->capa > 0) { \
+ (blk)->buff[0] = '\0'; \
+ }\
+ (blk)->len = 0; \
+} while(0)
+#define StringFree(blk) do { if ((blk)->buff != NULL) { freet((blk)->buff); (blk)->buff = NULL; } } while(0)
+#define StringMemcat(blk, str, size) do { \
+ if ((blk)->len + (int)(size) + 1 > (blk)->capa) { \
+ (blk)->capa = (blk)->len + (size) + BLK_SIZE; \
+ (blk)->buff = (char*) realloct((blk)->buff, (blk)->capa); \
+ assertf((blk)->buff != NULL); \
+ } \
+ if ((int)(size) > 0) { \
+ memcpy((blk)->buff + (blk)->len, (str), (size)); \
+ (blk)->len += (size); \
+ } \
+ *((blk)->buff + (blk)->len) = '\0'; \
+} while(0)
+#define StringAddchar(blk, c) do { \
+ char __c = (c); \
+ StringMemcat(blk, &__c, 1); \
+} while(0)
+static void* StringAcquire(String* blk) {
+ void* buff = blk->buff;
+ blk->buff = NULL;
+ blk->capa = 0;
+ blk->len = 0;
+ return buff;
+}
+
+static void StringStrcat(String* blk, char* str) {
+ StringMemcat(blk, str, strlen(str));
+}
+
+
+/* Language files */
+int htslang_load(char* limit_to, char* apppath);
+void conv_printf(char* from,char* to);
+void LANG_DELETE(void);
+void LANG_INIT(char* path);
+int LANG_T(char* path, int l);
+int QLANG_T(int l);
+char* LANGSEL(char* name);
+char* LANGINTKEY(char* name);
+int LANG_SEARCH(char* path, char* iso);
+int LANG_LIST(char* path, char* buffer);
+
+int htslang_init(void);
+int htslang_uninit(void);
+
+int linput_cpp(FILE* fp,char* s,int max);
+void unescapehttp(char* s, String* tempo);
+void unescapeini(char* s, String* tempo);
+
+int smallserver_setkey(char* key, char* value);
+int smallserver_setkeyint(char* key, LLint value);
+int smallserver_setkeyarr(char* key, int id, char* key2, char* value);
+
+#endif
+
+
+
diff --git a/src/htssystem.h b/src/htssystem.h
index 989607d..6c4d216 100644
--- a/src/htssystem.h
+++ b/src/htssystem.h
@@ -1,15 +1 @@
-// Définition de la plate-forme utilisée
-
-// Sun Solaris .......... 0
-// Windows/95 ........... 1
-// Ibm 580 .............. 2
-
-#define HTS_PLATFORM 1
-
-// SHELL
-#define HTS_ANALYSTE 2
-
-
-// Fin de la définition
-
-
+/* (empty file) */
diff --git a/src/htssystem.h.windows9x b/src/htssystem.h.windows9x
deleted file mode 100644
index 0689e0c..0000000
--- a/src/htssystem.h.windows9x
+++ /dev/null
@@ -1,11 +0,0 @@
-/* HTTrack, Offline Browser for Windows and Unix */
-
-/* HTTrack system definition for Windows */
-/* This should be the only file you have to change */
-
-/* Solaris: 0 / Windows: 1 / AIX: 2 / Linux: 3 */
-
-
-/* Fix plateform number to 1 (Windows) */
-/* If it doesn't compile, try another one */
-#define HTS_PLATEFORM 1
diff --git a/src/htsthread.c b/src/htsthread.c
index 0a3bee6..d403730 100644
--- a/src/htsthread.c
+++ b/src/htsthread.c
@@ -68,7 +68,7 @@ unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_s
[-1 check if locked (always return 0 with mutex)]
-999 initialize
*/
-int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) {
+HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) {
#if HTS_WIN
/* lock */
if (lock==1)
diff --git a/src/htsthread.h b/src/htsthread.h
index cb3a139..326c8cb 100644
--- a/src/htsthread.h
+++ b/src/htsthread.h
@@ -85,7 +85,7 @@ Please visit our Website: http://www.httrack.com
#endif
-int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock);
+HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock);
#if USE_PTHREAD
unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist );
diff --git a/src/htstools.c b/src/htstools.c
index 1eeafbf..44e5137 100644
--- a/src/htstools.c
+++ b/src/htstools.c
@@ -90,7 +90,7 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha
ok=-2; // non supporté
}
#if HTS_USEOPENSSL
- } else if (strfield(lien,"https://")) {
+ } else if (SSL_is_available && strfield(lien,"https://")) {
// Note: ftp:foobar.gif is not valid
if (ident_url_absolute(lien,adr,fil)==-1) {
ok=-1; // erreur URL
@@ -114,45 +114,56 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha
/* patch scheme if necessary */
if (strfield(lien,"http:")) {
lien+=5;
- strcpy(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http)
+ strcpybuff(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http)
} else if (strfield(lien,"https:")) {
lien+=6;
- strcpy(adr, "https://"); // même adresse forcée en https
- strcat(adr, jump_protocol(origin_adr));
+ strcpybuff(adr, "https://"); // même adresse forcée en https
+ strcatbuff(adr, jump_protocol(origin_adr));
} else if (strfield(lien,"ftp:")) {
lien+=4;
- strcpy(adr, "ftp://"); // même adresse forcée en ftp
- strcat(adr, jump_protocol(origin_adr));
+ strcpybuff(adr, "ftp://"); // même adresse forcée en ftp
+ strcatbuff(adr, jump_protocol(origin_adr));
} else {
- strcpy(adr,origin_adr); // même adresse ; et même éventuel protocole
+ strcpybuff(adr,origin_adr); // même adresse ; et même éventuel protocole
}
-
+
if (*lien!='/') { // sinon c'est un lien absolu
- a=strchr(origin_fil,'?');
- if (!a) a=origin_fil+strlen(origin_fil);
- while((*a!='/') && ( a > origin_fil) ) a--;
- if (*a=='/') { // ok on a un '/'
- if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) {
- // copier chemin
- strncpy(fil,origin_fil,((int) (a - origin_fil))+1);
- *(fil + ((int) (a - origin_fil))+1)='\0';
-
- // copier chemin relatif
- if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
- strcat(fil,lien + ((*lien=='/')?1:0) );
- // simplifier url pour les ../
- fil_simplifie(fil);
- } else
- ok=-1; // erreur
+ if (*lien == '\0') {
+ strcpybuff(fil,origin_fil);
+ } else if (*lien == '?') { // example: a href="?page=2"
+ char* a;
+ strcpybuff(fil,origin_fil);
+ a=strchr(fil,'?');
+ if (a) *a='\0';
+ strcatbuff(fil,lien);
+ } else {
+ a=strchr(origin_fil,'?');
+ if (a == NULL) a=origin_fil+strlen(origin_fil);
+ while((*a!='/') && ( a > origin_fil) ) a--;
+ if (*a=='/') { // ok on a un '/'
+ if ( (((int) (a - origin_fil))+1+strlen(lien)) < HTS_URLMAXSIZE) {
+ // copier chemin
+ strncpy(fil,origin_fil,((int) (a - origin_fil))+1);
+ *(fil + ((int) (a - origin_fil))+1)='\0';
+
+ // copier chemin relatif
+ if (((int) strlen(fil)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
+ strcatbuff(fil,lien + ((*lien=='/')?1:0) );
+ // simplifier url pour les ../
+ fil_simplifie(fil);
+ } else
+ ok=-1; // erreur
+ } else { // erreur
+ ok=-1; // erreur URL
+ }
} else { // erreur
ok=-1; // erreur URL
}
- } else { // erreur
- ok=-1; // erreur URL
}
} else { // chemin absolu
// copier chemin directement
- strcat(fil,lien);
+ strcatbuff(fil,lien);
+ fil_simplifie(fil);
} // *lien!='/'
} else
ok=-1;
@@ -191,17 +202,17 @@ int lienrelatif(char* s,char* link,char* curr_fil) {
// patch: éliminer les ? (paramètres) sinon bug
if ( (a=strchr(curr_fil,'?')) ) {
- strncat(newcurr_fil,curr_fil,(int) (a - curr_fil));
+ strncatbuff(newcurr_fil,curr_fil,(int) (a - curr_fil));
curr_fil = newcurr_fil;
}
if ( (a=strchr(link,'?')) ) {
- strncat(newlink,link,(int) (a - link));
+ strncatbuff(newlink,link,(int) (a - link));
link = newlink;
}
// recopier uniquement le chemin courant
curr=_curr;
- strcpy(curr,curr_fil);
+ strcpybuff(curr,curr_fil);
if ((a=strchr(curr,'?'))==NULL) // couper au ? (params)
a=curr+strlen(curr)-1; // pas de params: aller à la fin
while((*a!='/') && ( a> curr)) a--; // chercher dernier / du chemin courant
@@ -234,13 +245,13 @@ int lienrelatif(char* s,char* link,char* curr_fil) {
// LES ../ ONT ETE SIMPLIFIES
a=curr;
if (*a=='/') a++;
- while(*a) if (*(a++)=='/') strcat(s,"../");
- //if (strlen(s)==0) strcat(s,"/");
+ while(*a) if (*(a++)=='/') strcatbuff(s,"../");
+ //if (strlen(s)==0) strcatbuff(s,"/");
- if (slash) strcat(s,"/"); // garder absolu!!
+ if (slash) strcatbuff(s,"/"); // garder absolu!!
// on est dans le répertoire de départ, copier
- strcat(s,link + ((*link=='/')?1:0) );
+ strcatbuff(s,link + ((*link=='/')?1:0) );
/* Security check */
if (strlen(s) >= HTS_URLMAXSIZE)
@@ -294,10 +305,10 @@ void long_to_83(int mode,char* n83,char* save) {
fnl[i]='\0';
// conversion
longfile_to_83(mode,fn83,fnl);
- strcat(n83,fn83);
+ strcatbuff(n83,fn83);
save+=i;
- if (*save=='/') { strcat(n83,"/"); save++; }
+ if (*save=='/') { strcatbuff(n83,"/"); save++; }
}
}
@@ -375,15 +386,15 @@ void longfile_to_83(int mode,char* n83,char* save) {
}
// corriger vers 8-3
n83[0]='\0';
- strncat(n83,nom,8);
+ strncatbuff(n83,nom,8);
if (strnotempty(ext)) {
- strcat(n83,".");
- strncat(n83,ext,3);
+ strcatbuff(n83,".");
+ strncatbuff(n83,ext,3);
}
}
// écrire backblue.gif
-int verif_backblue(char* base) {
+int verif_backblue(httrackp* opt,char* base) {
int* done;
int ret=0;
NOSTATIC_RESERVE(done, int, 1);
@@ -400,7 +411,7 @@ int verif_backblue(char* base) {
if (fwrite(HTS_DATA_BACK_GIF,HTS_DATA_BACK_GIF_LEN,1,fp) != HTS_DATA_BACK_GIF_LEN)
ret=1;
fclose(fp);
- usercommand(0,NULL,fconcat(base,"backblue.gif"));
+ usercommand(opt,0,NULL,fconcat(base,"backblue.gif"),"","");
} else
ret=1;
//
@@ -409,7 +420,7 @@ int verif_backblue(char* base) {
if (fwrite(HTS_DATA_FADE_GIF,HTS_DATA_FADE_GIF_LEN,1,fp) != HTS_DATA_FADE_GIF_LEN)
ret=1;
fclose(fp);
- usercommand(0,NULL,fconcat(base,"fade.gif"));
+ usercommand(opt,0,NULL,fconcat(base,"fade.gif"),"","");
} else
ret=1;
}
@@ -524,7 +535,7 @@ int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) {
}
-int hts_buildtopindex(char* path,char* binpath) {
+HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) {
FILE* fpo;
int retval=0;
char rpath[1024*2];
@@ -537,7 +548,7 @@ int hts_buildtopindex(char* path,char* binpath) {
if (toptemplate_header && toptemplate_body && toptemplate_footer) {
- strcpy(rpath,path);
+ strcpybuff(rpath,path);
if (rpath[0]) {
if (rpath[strlen(rpath)-1]=='/')
rpath[strlen(rpath)-1]='\0';
@@ -546,7 +557,7 @@ int hts_buildtopindex(char* path,char* binpath) {
fpo=fopen(fconcat(rpath,"/index.html"),"wb");
if (fpo) {
find_handle h;
- verif_backblue(concat(rpath,"/")); // générer gif
+ verif_backblue(opt,concat(rpath,"/")); // générer gif
// Header
fprintf(fpo,toptemplate_header,
"<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
@@ -560,10 +571,10 @@ int hts_buildtopindex(char* path,char* binpath) {
do {
if (hts_findisdir(h)) {
char iname[HTS_URLMAXSIZE*2];
- strcpy(iname,rpath);
- strcat(iname,"/");
- strcat(iname,hts_findgetname(h));
- strcat(iname,"/index.html");
+ strcpybuff(iname,rpath);
+ strcatbuff(iname,"/");
+ strcatbuff(iname,hts_findgetname(h));
+ strcatbuff(iname,"/index.html");
if (fexist(iname)) {
struct topindex_chain * oldchain=chain;
chain=calloc(sizeof(struct topindex_chain), 1);
@@ -575,7 +586,7 @@ int hts_buildtopindex(char* path,char* binpath) {
oldchain->next=chain;
}
chain->next=NULL;
- strcpy(chain->name, hts_findgetname(h));
+ strcpybuff(chain->name, hts_findgetname(h));
}
}
@@ -587,7 +598,7 @@ int hts_buildtopindex(char* path,char* binpath) {
chain=startchain;
while(chain) {
char hname[HTS_URLMAXSIZE*2];
- strcpy(hname,chain->name);
+ strcpybuff(hname,chain->name);
escape_check_url(hname);
fprintf(fpo,toptemplate_body,
hname,
@@ -639,7 +650,7 @@ if (h) {
hts_findclose(h);
}
*/
-find_handle hts_findfirst(char* path) {
+HTSEXT_API find_handle hts_findfirst(char* path) {
if (path) {
if (strnotempty(path)) {
find_handle_struct* find = (find_handle_struct*) calloc(1,sizeof(find_handle_struct));
@@ -648,22 +659,22 @@ find_handle hts_findfirst(char* path) {
#if HTS_WIN
{
char rpath[1024*2];
- strcpy(rpath,path);
+ strcpybuff(rpath,path);
if (rpath[0]) {
if (rpath[strlen(rpath)-1]!='\\')
- strcat(rpath,"\\");
+ strcatbuff(rpath,"\\");
}
- strcat(rpath,"*.*");
+ strcatbuff(rpath,"*.*");
find->handle = FindFirstFile(rpath,&find->hdata);
if (find->handle != INVALID_HANDLE_VALUE)
return find;
}
#else
- strcpy(find->path,path);
+ strcpybuff(find->path,path);
{
if (find->path[0]) {
if (find->path[strlen(find->path)-1]!='/')
- strcat(find->path,"/");
+ strcatbuff(find->path,"/");
}
}
find->hdir=opendir(path);
@@ -678,7 +689,8 @@ find_handle hts_findfirst(char* path) {
}
return NULL;
}
-int hts_findnext(find_handle find) {
+
+HTSEXT_API int hts_findnext(find_handle find) {
if (find) {
#if HTS_WIN
if ( (FindNextFile(find->handle,&find->hdata)))
@@ -693,7 +705,8 @@ int hts_findnext(find_handle find) {
}
return 0;
}
-int hts_findclose(find_handle find) {
+
+HTSEXT_API int hts_findclose(find_handle find) {
if (find) {
#if HTS_WIN
if (find->handle) {
@@ -710,7 +723,8 @@ int hts_findclose(find_handle find) {
}
return 0;
}
-char* hts_findgetname(find_handle find) {
+
+HTSEXT_API char* hts_findgetname(find_handle find) {
if (find) {
#if HTS_WIN
return find->hdata.cFileName;
@@ -721,7 +735,8 @@ char* hts_findgetname(find_handle find) {
}
return NULL;
}
-int hts_findgetsize(find_handle find) {
+
+HTSEXT_API int hts_findgetsize(find_handle find) {
if (find) {
#if HTS_WIN
return find->hdata.nFileSizeLow;
@@ -731,7 +746,8 @@ int hts_findgetsize(find_handle find) {
}
return -1;
}
-int hts_findisdir(find_handle find) {
+
+HTSEXT_API int hts_findisdir(find_handle find) {
if (find) {
if (!hts_findissystem(find)) {
#if HTS_WIN
@@ -745,7 +761,7 @@ int hts_findisdir(find_handle find) {
}
return 0;
}
-int hts_findisfile(find_handle find) {
+HTSEXT_API int hts_findisfile(find_handle find) {
if (find) {
if (!hts_findissystem(find)) {
#if HTS_WIN
@@ -759,7 +775,7 @@ int hts_findisfile(find_handle find) {
}
return 0;
}
-int hts_findissystem(find_handle find) {
+HTSEXT_API int hts_findissystem(find_handle find) {
if (find) {
#if HTS_WIN
if (find->hdata.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_TEMPORARY))
diff --git a/src/htstools.h b/src/htstools.h
index b3e2c7e..e3f7dd7 100644
--- a/src/htstools.h
+++ b/src/htstools.h
@@ -45,10 +45,12 @@ Please visit our Website: http://www.httrack.com
#include "htsbase.h"
#include "htscore.h"
-#if HTS_WIN
+#ifdef _WIN32
#else
#include <dirent.h>
+#ifdef HAVE_UNISTD_H
#include <unistd.h>
+#endif
#include <sys/stat.h>
#endif
@@ -83,56 +85,47 @@ HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s);
//HTS_INLINE int rech_tageq(const char* adr,const char* s);
HTS_INLINE int rech_sampletag(const char* adr,const char* s);
HTS_INLINE int check_tag(char* from,const char* tag);
-int verif_backblue(char* base);
+int verif_backblue(httrackp* opt,char* base);
int verif_external(int nb,int test);
int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type);
-int hts_buildtopindex(char* path,char* binpath);
-
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath);
+#endif
// Portable directory find functions
-#if HTS_WIN
-
+#ifndef HTTRACK_DEFLIB
+#ifdef _WIN32
typedef struct {
WIN32_FIND_DATA hdata;
HANDLE handle;
} find_handle_struct;
-
-
#else
-
typedef struct {
DIR * hdir;
struct dirent* dirp;
struct stat filestat;
char path[2048];
} find_handle_struct;
-
#endif
-
typedef find_handle_struct* find_handle;
-
typedef struct topindex_chain {
char name[2048]; /* path */
struct topindex_chain* next; /* next element */
} topindex_chain ;
-
-
// Directory find functions
-find_handle hts_findfirst(char* path);
-int hts_findnext(find_handle find);
-int hts_findclose(find_handle find);
+HTSEXT_API find_handle hts_findfirst(char* path);
+HTSEXT_API int hts_findnext(find_handle find);
+HTSEXT_API int hts_findclose(find_handle find);
//
-char* hts_findgetname(find_handle find);
-int hts_findgetsize(find_handle find);
-int hts_findisdir(find_handle find);
-int hts_findisfile(find_handle find);
-int hts_findissystem(find_handle find);
-
-
-
+HTSEXT_API char* hts_findgetname(find_handle find);
+HTSEXT_API int hts_findgetsize(find_handle find);
+HTSEXT_API int hts_findisdir(find_handle find);
+HTSEXT_API int hts_findisfile(find_handle find);
+HTSEXT_API int hts_findissystem(find_handle find);
+#endif
#endif
diff --git a/src/htsweb.c b/src/htsweb.c
new file mode 100644
index 0000000..a5e1902
--- /dev/null
+++ b/src/htsweb.c
@@ -0,0 +1,653 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: webhttrack.c routines */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <ctype.h>
+#ifndef _WIN32
+#include <signal.h>
+#endif
+// htswrap_add
+#include "htsglobal.h"
+#include "htswrap.h"
+#include "httrack-library.h"
+
+/* Threads */
+#include "htsthread.h"
+
+/* External modules */
+#include "htsinthash.c"
+#include "htsmd5.c"
+#include "md5.c"
+
+#include "htsserver.h"
+#include "htsweb.h"
+
+#if USE_BEGINTHREAD==0
+#error fatal: no threads support
+#endif
+
+#if HTS_WIN
+#ifndef __cplusplus
+// DOS
+#include <process.h> /* _beginthread, _endthread */
+#endif
+#else
+#endif
+
+static PTHREAD_LOCK_TYPE refreshMutex;
+
+static int help_server(char* dest_path);
+extern int commandRunning;
+extern int commandEnd;
+extern int commandReturn;
+extern int commandEndRequested;
+extern char* commandReturnMsg;
+extern char* commandReturnCmdl;
+
+static void htsweb_sig_brpipe( int code ) {
+ /* ignore */
+}
+
+int main(int argc, char* argv[])
+{
+ int i;
+ int ret = 0;
+ printf("Initialzing the server..\n");
+
+#ifdef _WIN32
+ {
+ WORD wVersionRequested; // requested version WinSock API
+ WSADATA wsadata; // Windows Sockets API data
+ int stat;
+ wVersionRequested = 0x0101;
+ stat = WSAStartup( wVersionRequested, &wsadata );
+ if (stat != 0) {
+ fprintf(stderr, "Winsock not found!\n");
+ return -1;
+ } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
+ fprintf(stderr, "WINSOCK.DLL does not support version 1.1\n");
+ WSACleanup();
+ return -1;
+ }
+ }
+#endif
+
+ if (argc < 2 || (argc % 2) != 0) {
+ fprintf(stderr, "** Warning: use the webhttrack frontend if available\n");
+ fprintf(stderr, "usage: %s <path-to-html-root-dir> [key value [key value]..]\n", argv[0]);
+ fprintf(stderr, "example: %s /usr/share/httrack\n", argv[0]);
+ return 1;
+ }
+
+ /* init and launch */
+ hts_init();
+ htslang_init();
+ webhttrack_lock(-999);
+
+ /* set general keys */
+#ifdef HTS_ETCPATH
+ smallserver_setkey("ETCPATH", HTS_ETCPATH);
+#endif
+#ifdef HTS_BINPATH
+ smallserver_setkey("BINPATH", HTS_BINPATH);
+#endif
+#ifdef HTS_LIBPATH
+ smallserver_setkey("LIBPATH", HTS_LIBPATH);
+#endif
+#ifdef HTS_PREFIX
+ smallserver_setkey("PREFIX", HTS_PREFIX);
+#endif
+#ifdef HTS_HTTRACKCNF
+ smallserver_setkey("HTTRACKCNF", HTS_HTTRACKCNF);
+#endif
+#ifdef HTS_HTTRACKDIR
+ smallserver_setkey("HTTRACKDIR", HTS_HTTRACKDIR);
+#endif
+#ifdef HTS_INET6
+ smallserver_setkey("INET6", "1");
+#endif
+#ifdef HTS_USEOPENSSL
+ smallserver_setkey("USEOPENSSL", "1");
+#endif
+#ifdef HTS_DLOPEN
+ smallserver_setkey("DLOPEN", "1");
+#endif
+#ifdef HTS_USESWF
+ smallserver_setkey("USESWF", "1");
+#endif
+#ifdef HTS_USEZLIB
+ smallserver_setkey("USEZLIB", "1");
+#endif
+#ifdef _WIN32
+ smallserver_setkey("WIN32", "1");
+#endif
+ smallserver_setkey("HTTRACK_VERSION", HTTRACK_VERSION);
+ smallserver_setkey("HTTRACK_VERSIONID", HTTRACK_VERSIONID);
+ smallserver_setkey("HTTRACK_AFF_VERSION", HTTRACK_AFF_VERSION);
+ {
+ char tmp[32];
+ sprintf(tmp, "%d", HTS_PLATFORM);
+ smallserver_setkey("HTS_PLATFORM", tmp);
+ }
+ smallserver_setkey("HTTRACK_WEB", HTTRACK_WEB);
+
+ /* protected session-id */
+ {
+ char buff[1024];
+ char digest[32 + 2];
+ srand(time(NULL));
+ sprintf(buff, "%d-%d", (int)time(NULL), (int)rand());
+ domd5mem(buff,strlen(buff),digest,1);
+ smallserver_setkey("sid", digest);
+ smallserver_setkey("_sid", digest);
+ }
+
+ /* set commandline keys */
+ for(i = 2 ; i < argc ; i += 2) {
+ smallserver_setkey(argv[i], argv[i + 1]);
+ }
+
+ /* sigpipe */
+#ifndef _WIN32
+ signal( SIGPIPE , htsweb_sig_brpipe ); // broken pipe (write into non-opened socket)
+#endif
+
+ /* launch */
+ ret = help_server(argv[1]);
+
+ hts_uninit();
+
+#ifdef _WIN32
+ WSACleanup();
+#endif
+
+ return ret;
+}
+
+static int webhttrack_runmain(int argc, char** argv);
+static PTHREAD_TYPE back_launch_cmd( void* pP ) {
+ char* cmd = (char*) pP;
+ char** argv = (char**) malloct(1024 * sizeof(char*));
+ int argc = 0;
+ int i = 0;
+ int g = 0;
+
+ /* copy commandline */
+ if (commandReturnCmdl)
+ free(commandReturnCmdl);
+ commandReturnCmdl = strdup(cmd);
+
+ /* split */
+ argv[0]="webhttrack";
+ argv[1]=cmd;
+ argc++;
+ i = 0;
+ while(cmd[i]) {
+ if (cmd[i] == '\t' || cmd[i] == '\r' || cmd[i] == '\n') {
+ cmd[i] = ' ';
+ }
+ i++;
+ }
+ i = 0;
+ while(cmd[i]) {
+ if(cmd[i]=='\"') g=!g;
+ if(cmd[i]==' ') {
+ if(!g){
+ cmd[i]='\0';
+ argv[argc++]=cmd+i+1;
+ }
+ }
+ i++;
+ }
+
+ /* run */
+ commandReturn = webhttrack_runmain(argc, argv);
+ if (commandReturn) {
+ if (commandReturnMsg)
+ free(commandReturnMsg);
+ commandReturnMsg = strdup(hts_errmsg());
+ }
+
+ /* okay */
+ commandRunning = 0;
+
+ /* finished */
+ commandEnd = 1;
+
+ /* free */
+ free(cmd);
+ freet(argv);
+ return PTHREAD_RETURN;
+}
+
+void webhttrack_main(char* cmd) {
+ commandRunning = 1;
+ _beginthread(back_launch_cmd, 0, (void*) strdup(cmd));
+}
+
+void webhttrack_lock(int lock) {
+ htsSetLock(&refreshMutex, lock);
+}
+
+static int webhttrack_runmain(int argc, char** argv) {
+ hts_init();
+ htswrap_add("init",htsshow_init);
+ htswrap_add("free",htsshow_uninit);
+ htswrap_add("start",htsshow_start);
+ htswrap_add("change-options",htsshow_chopt);
+ htswrap_add("end",htsshow_end);
+ htswrap_add("check-html",htsshow_checkhtml);
+ htswrap_add("loop",htsshow_loop);
+ htswrap_add("query",htsshow_query);
+ htswrap_add("query2",htsshow_query2);
+ htswrap_add("query3",htsshow_query3);
+ htswrap_add("check-link",htsshow_check);
+ htswrap_add("pause",htsshow_pause);
+ htswrap_add("save-file",htsshow_filesave);
+ htswrap_add("link-detected",htsshow_linkdetected);
+ htswrap_add("transfer-status",htsshow_xfrstatus);
+ htswrap_add("save-name",htsshow_savename);
+ hts_uninit();
+ return hts_main(argc,argv);
+
+}
+
+static int help_server(char* dest_path) {
+ int returncode = 0;
+ char adr_prox[HTS_URLMAXSIZE*2];
+ int port_prox;
+ T_SOC soc=smallserver_init_std(&port_prox,adr_prox);
+ if (soc!=INVALID_SOCKET) {
+ char url[HTS_URLMAXSIZE*2];
+ char method[32];
+ char data[32768];
+ url[0]=method[0]=data[0]='\0';
+ //
+ printf("Okay, temporary server installed.\nThe URL is:\n");
+ printf("URL=http://%s:%d/\n", adr_prox, port_prox);
+#ifndef _WIN32
+ {
+ pid_t pid = getpid();
+ printf("PID=%d\n", (int)pid);
+ }
+#endif
+ fflush(stdout);
+ fflush(stderr);
+ //
+ if (!smallserver(soc,url,method,data,dest_path)) {
+ fprintf(stderr, "Unable to create the server\n");
+#ifdef _WIN32
+ closesocket(soc);
+#else
+ close(soc);
+#endif
+ printf("Done\n");
+ returncode = 1;
+ } else {
+ returncode = 0;
+ }
+ } else {
+ fprintf(stderr, "Unable to initialize a temporary server (no remaining port)\n");
+ returncode = 1;
+ }
+ printf("EXITED\n");
+ fflush(stdout);
+ fflush(stderr);
+ return returncode;
+}
+
+
+/* CALLBACK FUNCTIONS */
+
+/* Initialize the Winsock */
+void __cdecl htsshow_init(void) {
+}
+void __cdecl htsshow_uninit(void) {
+}
+int __cdecl htsshow_start(httrackp* opt) {
+ return 1;
+}
+int __cdecl htsshow_chopt(httrackp* opt) {
+ return htsshow_start(opt);
+}
+int __cdecl htsshow_end(void) {
+ return 1;
+}
+int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
+ return 1;
+}
+int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack
+ static TStamp prev_mytime=0; /* ok */
+ static t_InpInfo SInfo; /* ok */
+ //
+ TStamp mytime;
+ long int rate=0;
+ //
+ int stat_written=-1;
+ int stat_updated=-1;
+ int stat_errors=-1;
+ int stat_warnings=-1;
+ int stat_infos=-1;
+ int nbk=-1;
+ LLint nb=-1;
+ int stat_nsocket=-1;
+ LLint stat_bytes=-1;
+ LLint stat_bytes_recv=-1;
+ int irate=-1;
+ //
+ char st[256];
+
+ /* Exit now */
+ if (commandEndRequested == 2)
+ return 0;
+
+ /* Lock */
+ webhttrack_lock(1);
+
+ if (stats) {
+ stat_written=stats->stat_files;
+ stat_updated=stats->stat_updated_files;
+ stat_errors=stats->stat_errors;
+ stat_warnings=stats->stat_warnings;
+ stat_infos=stats->stat_infos;
+ nbk=stats->nbk;
+ stat_nsocket=stats->stat_nsocket;
+ irate=(int)stats->rate;
+ nb=stats->nb;
+ stat_bytes=stats->nb;
+ stat_bytes_recv=stats->HTS_TOTAL_RECV;
+ }
+
+ mytime=mtime_local();
+ if ((stat_time>0) && (stat_bytes_recv>0))
+ rate=(int)(stat_bytes_recv/stat_time);
+ else
+ rate=0; // pas d'infos
+
+ /* Infos */
+ if (stat_bytes>=0) SInfo.stat_bytes=stat_bytes; // bytes
+ if (stat_time>=0) SInfo.stat_time=stat_time; // time
+ if (lien_tot>=0) SInfo.lien_tot=lien_tot; // nb liens
+ if (lien_n>=0) SInfo.lien_n=lien_n; // scanned
+ SInfo.stat_nsocket=stat_nsocket; // socks
+ if (rate>0) SInfo.rate=rate; // rate
+ if (irate>=0) SInfo.irate=irate; // irate
+ if (SInfo.irate<0) SInfo.irate=SInfo.rate;
+ if (nbk>=0) SInfo.stat_back=nbk;
+ if (stat_written>=0) SInfo.stat_written=stat_written;
+ if (stat_updated>=0) SInfo.stat_updated=stat_updated;
+ if (stat_errors>=0) SInfo.stat_errors=stat_errors;
+ if (stat_warnings>=0) SInfo.stat_warnings=stat_warnings;
+ if (stat_infos>=0) SInfo.stat_infos=stat_infos;
+
+
+ st[0]='\0';
+ qsec2str(st,stat_time);
+
+ /* Set keys */
+ smallserver_setkeyint("info.stat_bytes", SInfo.stat_bytes);
+ smallserver_setkeyint("info.stat_time", SInfo.stat_time);
+ smallserver_setkeyint("info.lien_tot", SInfo.lien_tot);
+ smallserver_setkeyint("info.lien_n", SInfo.lien_n);
+ smallserver_setkeyint("info.stat_nsocket", SInfo.stat_nsocket);
+ smallserver_setkeyint("info.rate", SInfo.rate);
+ smallserver_setkeyint("info.irate", SInfo.irate);
+ smallserver_setkeyint("info.stat_back", SInfo.stat_back);
+ smallserver_setkeyint("info.stat_written", SInfo.stat_written);
+ smallserver_setkeyint("info.stat_updated", SInfo.stat_updated);
+ smallserver_setkeyint("info.stat_errors", SInfo.stat_errors);
+ smallserver_setkeyint("info.stat_warnings", SInfo.stat_warnings);
+ smallserver_setkeyint("info.stat_infos", SInfo.stat_infos);
+ /* */
+ smallserver_setkey("info.stat_time_str", st);
+
+ if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) {
+ prev_mytime=mytime;
+
+
+ // parcourir registre des liens
+ if (back_index>=0 && back_max > 0) { // seulement si index passé
+ int j,k;
+ int index=0;
+ int ok=0; // idem
+ int l; // idem
+ //
+ t_StatsBuffer StatsBuffer[NStatsBuffer];
+
+ {
+ int i;
+ for(i=0;i<NStatsBuffer;i++) {
+ strcpybuff(StatsBuffer[i].state,"");
+ strcpybuff(StatsBuffer[i].name,"");
+ strcpybuff(StatsBuffer[i].file,"");
+ strcpybuff(StatsBuffer[i].url_sav,"");
+ StatsBuffer[i].back=0;
+ StatsBuffer[i].size=0;
+ StatsBuffer[i].sizetot=0;
+ }
+ }
+ for(k=0;k<2;k++) { // 0: lien en cours 1: autres liens
+ for(j=0;(j<3) && (index<NStatsBuffer);j++) { // passe de priorité
+ int _i;
+ for(_i=0+k;(_i< max(back_max*k,1) ) && (index<NStatsBuffer);_i++) { // no lien
+ int i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel)
+ if (back[i].status>=0) { // signifie "lien actif"
+ // int ok=0; // OPTI
+ ok=0;
+ switch(j) {
+ case 0: // prioritaire
+ if ((back[i].status>0) && (back[i].status<99)) {
+ strcpybuff(StatsBuffer[index].state,"receive"); ok=1;
+ }
+ break;
+ case 1:
+ if (back[i].status==99) {
+ strcpybuff(StatsBuffer[index].state,"request"); ok=1;
+ }
+ else if (back[i].status==100) {
+ strcpybuff(StatsBuffer[index].state,"connect"); ok=1;
+ }
+ else if (back[i].status==101) {
+ strcpybuff(StatsBuffer[index].state,"search"); ok=1;
+ }
+ else if (back[i].status==1000) { // ohh le beau ftp
+ sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1;
+ }
+ break;
+ default:
+ if (back[i].status==0) { // prêt
+ if ((back[i].r.statuscode==200)) {
+ strcpybuff(StatsBuffer[index].state,"ready"); ok=1;
+ }
+ else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) {
+ char tempo[256]; tempo[0]='\0';
+ infostatuscode(tempo,back[i].r.statuscode);
+ strcpybuff(StatsBuffer[index].state,tempo); ok=1;
+ }
+ else {
+ strcpybuff(StatsBuffer[index].state,"error"); ok=1;
+ }
+ }
+ break;
+ }
+
+ if (ok) {
+ char s[HTS_URLMAXSIZE*2];
+ //
+ StatsBuffer[index].back=i; // index pour + d'infos
+ //
+ s[0]='\0';
+ strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel
+ if (strcmp(back[i].url_adr,"file://"))
+ strcatbuff(s,back[i].url_adr);
+ else
+ strcatbuff(s,"localhost");
+ if (back[i].url_fil[0]!='/')
+ strcatbuff(s,"/");
+ strcatbuff(s,back[i].url_fil);
+
+ StatsBuffer[index].file[0]='\0';
+ {
+ char* a=strrchr(s,'/');
+ if (a) {
+ strncatbuff(StatsBuffer[index].file,a,200);
+ *a='\0';
+ }
+ }
+
+ if ((l=strlen(s))<MAX_LEN_INPROGRESS)
+ strcpybuff(StatsBuffer[index].name,s);
+ else {
+ // couper
+ StatsBuffer[index].name[0]='\0';
+ strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
+ strcatbuff(StatsBuffer[index].name,"...");
+ strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
+ }
+
+ if (back[i].r.totalsize>0) { // taille prédéfinie
+ StatsBuffer[index].sizetot=back[i].r.totalsize;
+ StatsBuffer[index].size=back[i].r.size;
+ } else { // pas de taille prédéfinie
+ if (back[i].status==0) { // prêt
+ StatsBuffer[index].sizetot=back[i].r.size;
+ StatsBuffer[index].size=back[i].r.size;
+ } else {
+ StatsBuffer[index].sizetot=8192;
+ StatsBuffer[index].size=(back[i].r.size % 8192);
+ }
+ }
+ index++;
+ }
+ }
+ }
+ }
+ }
+
+ /* Display current job */
+ {
+ int parsing=0;
+ if (commandEndRequested)
+ smallserver_setkey("info.currentjob", "finishing pending transfers - Select [Cancel] to stop now!");
+ else if (!(parsing=hts_is_parsing(-1)))
+ smallserver_setkey("info.currentjob", "receiving files");
+ else {
+ char tmp[1024];
+ tmp[0] = '\0';
+ switch(hts_is_testing()) {
+ case 0:
+ sprintf(tmp, "parsing HTML file (%d%%)",parsing);
+ break;
+ case 1:
+ sprintf(tmp, "parsing HTML file: testing links (%d%%)",parsing);
+ break;
+ case 2:
+ sprintf(tmp, "purging files");
+ break;
+ }
+ smallserver_setkey("info.currentjob", tmp);
+ }
+ }
+
+ /* Display background jobs */
+ {
+ int i;
+ for(i=0;i<NStatsBuffer;i++) {
+ if (strnotempty(StatsBuffer[i].state)) {
+ smallserver_setkeyarr("info.state[", i, "]", StatsBuffer[i].state);
+ smallserver_setkeyarr("info.name[", i, "]", StatsBuffer[i].name);
+ smallserver_setkeyarr("info.file[", i, "]", StatsBuffer[i].file);
+ smallserver_setkeyarr("info.size[", i, "]", int2bytes(StatsBuffer[i].size));
+ smallserver_setkeyarr("info.sizetot[", i, "]", int2bytes(StatsBuffer[i].sizetot));
+ smallserver_setkeyarr("info.url_adr[", i, "]", StatsBuffer[i].url_adr);
+ smallserver_setkeyarr("info.url_fil[", i, "]", StatsBuffer[i].url_fil);
+ smallserver_setkeyarr("info.url_sav[", i, "]", StatsBuffer[i].url_sav);
+ }
+ }
+ }
+
+
+ }
+
+ }
+
+ /* UnLock */
+ webhttrack_lock(0);
+
+ return 1;
+}
+char* __cdecl htsshow_query(char* question) {
+ static char s[]=""; /* ok */
+ return s;
+}
+char* __cdecl htsshow_query2(char* question) {
+ static char s[]=""; /* ok */
+ return s;
+}
+char* __cdecl htsshow_query3(char* question) {
+ static char s[]=""; /* ok */
+ return s;
+}
+int __cdecl htsshow_check(char* adr,char* fil,int status) {
+ return -1;
+}
+void __cdecl htsshow_pause(char* lockfile) {
+}
+void __cdecl htsshow_filesave(char* file) {
+}
+int __cdecl htsshow_linkdetected(char* link) {
+ return 1;
+}
+int __cdecl htsshow_xfrstatus(lien_back* back) {
+ return 1;
+}
+int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) {
+ return 1;
+}
+
+
diff --git a/src/htsweb.h b/src/htsweb.h
new file mode 100644
index 0000000..272b363
--- /dev/null
+++ b/src/htsweb.h
@@ -0,0 +1,110 @@
+/* ------------------------------------------------------------ */
+/*
+HTTrack Website Copier, Offline Browser for Windows and Unix
+Copyright (C) Xavier Roche and other contributors
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+Important notes:
+
+- We hereby ask people using this source NOT to use it in purpose of grabbing
+emails addresses, or collecting any other private information on persons.
+This would disgrace our work, and spoil the many hours we spent on it.
+
+
+Please visit our Website: http://www.httrack.com
+*/
+
+
+/* ------------------------------------------------------------ */
+/* File: webhttrack.c routines */
+/* Author: Xavier Roche */
+/* ------------------------------------------------------------ */
+
+#ifndef WEBHTTRACK_WBC
+#define WEBHTTRACK_WBC
+
+#include "htsglobal.h"
+#include "htscore.h"
+
+#define NStatsBuffer 14
+#define MAX_LEN_INPROGRESS 40
+
+typedef struct {
+ char name[1024];
+ char file[1024];
+ char state[256];
+ char url_sav[HTS_URLMAXSIZE*2]; // pour cancel
+ char url_adr[HTS_URLMAXSIZE*2];
+ char url_fil[HTS_URLMAXSIZE*2];
+ LLint size;
+ LLint sizetot;
+ int offset;
+ //
+ int back;
+ //
+ int actived; // pour disabled
+} t_StatsBuffer;
+
+typedef struct {
+ int ask_refresh;
+ int refresh;
+ LLint stat_bytes;
+ int stat_time;
+ int lien_n;
+ int lien_tot;
+ int stat_nsocket;
+ int rate;
+ int irate;
+ int ft;
+ LLint stat_written;
+ int stat_updated;
+ int stat_errors;
+ int stat_warnings;
+ int stat_infos;
+ TStamp stat_timestart;
+ int stat_back;
+} t_InpInfo;
+
+// wrappers
+void __cdecl htsshow_init(void);
+void __cdecl htsshow_uninit(void);
+int __cdecl htsshow_start(httrackp* opt);
+int __cdecl htsshow_chopt(httrackp* opt);
+int __cdecl htsshow_end(void);
+int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
+int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+char* __cdecl htsshow_query(char* question);
+char* __cdecl htsshow_query2(char* question);
+char* __cdecl htsshow_query3(char* question);
+int __cdecl htsshow_check(char* adr,char* fil,int status);
+void __cdecl htsshow_pause(char* lockfile);
+void __cdecl htsshow_filesave(char* file);
+int __cdecl htsshow_linkdetected(char* link);
+int __cdecl htsshow_xfrstatus(lien_back* back);
+int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+
+
+int main(int argc, char **argv);
+void webhttrack_main(char* cmd);
+void webhttrack_lock(int lock);
+
+#ifndef _WIN32
+#define fconv(a) (a)
+#define fconcat(a,b) concat(a,b)
+#endif
+
+#endif
diff --git a/src/htswizard.c b/src/htswizard.c
index b23f5fb..e976ffd 100644
--- a/src/htswizard.c
+++ b/src/htswizard.c
@@ -55,17 +55,17 @@ Please visit our Website: http://www.httrack.com
#define urlfil (liens[ptr]->fil)
// libérer filters[0] pour insérer un élément dans filters[0]
-#define HT_INSERT_FILTERS0 {\
+#define HT_INSERT_FILTERS0 do {\
int i;\
- if (*filptr > 0) {\
- for(i = (*filptr)-1 ; i>=0 ; i--) {\
- strcpy(filters[i+1],filters[i]);\
+ if (*opt->filters.filptr > 0) {\
+ for(i = (*opt->filters.filptr)-1 ; i>=0 ; i--) {\
+ strcpybuff((*opt->filters.filters)[i+1],(*opt->filters.filters)[i]);\
}\
}\
- strcpy(filters[0],"");\
- (*filptr)++;\
- (*filptr)=minimum((*filptr),filter_max);\
-}
+ (*opt->filters.filters)[0][0]='\0';\
+ (*opt->filters.filptr)++;\
+ assertf((*opt->filters.filptr) < opt->maxfilter); \
+} while(0)
@@ -91,22 +91,34 @@ retour:
int hts_acceptlink(httrackp* opt,
int ptr,int lien_tot,lien_url** liens,
char* adr,char* fil,
- char*** ptrfilters,int* filptr,int filter_max,
- robots_wizard* robots,
int* set_prio_to,
int* just_test_it) {
int forbidden_url=-1;
int meme_adresse;
- char** filters = *ptrfilters;
+#define _FILTERS (*opt->filters.filters)
+#define _FILTERS_PTR (opt->filters.filptr)
+#define _ROBOTS ((robots_wizard*)opt->robotsptr)
+ int may_set_prio_to=0;
- // -------------------- PHASE 1 --------------------
+ // -------------------- PHASE 0 --------------------
/* Infos */
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil);
test_flush;
}
+
+ /* Already exists? Then, we know that we knew that this link had to be known */
+ if (adr[0] != '\0'
+ && fil[0] != '\0'
+ && opt->hash != NULL
+ && hash_read((hash_struct*)opt->hash, adr, fil, 1, opt->urlhack) >= 0
+ ) {
+ return 0; /* Yokai */
+ }
+
+ // -------------------- PHASE 1 --------------------
/* Doit-on traiter les non html? */
if ((opt->getmode & 2)==0) { // non on ne doit pas
@@ -156,6 +168,7 @@ int hts_acceptlink(httrackp* opt,
// problème: si un fichier est virtuellement accessible via une page mais dont le lien est sur une autre *uniquement*..
char tempo[HTS_URLMAXSIZE*2];
char tempo2[HTS_URLMAXSIZE*2];
+ tempo[0] = tempo2[0] = '\0';
// note (up/down): on calcule à partir du lien primaire, ET du lien précédent.
// ex: si on descend 2 fois on peut remonter 1 fois
@@ -177,12 +190,14 @@ int hts_acceptlink(httrackp* opt,
// (test même niveau (NOUVEAU à cause de certains problèmes de filtres non intégrés))
// NEW
- if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) {
- if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
- forbidden_url=0;
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
- test_flush;
+ if (tempo[0] != '\0' && tempo[1] != '\0') {
+ if ( (!strchr(tempo+1,'/')) || (!strchr(tempo2+1,'/')) ) {
+ if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved'
+ forbidden_url=0;
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil);
+ test_flush;
+ }
}
}
}
@@ -380,6 +395,7 @@ int hts_acceptlink(httrackp* opt,
if (!ishtml(fil)) { // non html
//printf("ok %s%s\n",ad,fil);
forbidden_url=0; // autoriser
+ may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil);
test_flush;
@@ -404,17 +420,17 @@ int hts_acceptlink(httrackp* opt,
if (forbidden_url!=-1) question=0; // pas de question, résolu
// former URL complète du lien actuel
- strcpy(l,jump_identification(adr));
- if (*fil!='/') strcat(l,"/");
- strcat(l,fil);
+ strcpybuff(l,jump_identification(adr));
+ if (*fil!='/') strcatbuff(l,"/");
+ strcatbuff(l,fil);
// full version (http://foo:bar@www.foo.com/bar.html)
if (!link_has_authority(adr))
- strcpy(lfull,"http://");
+ strcpybuff(lfull,"http://");
else
lfull[0]='\0';
- strcat(lfull,adr);
- if (*fil!='/') strcat(lfull,"/");
- strcat(lfull,fil);
+ strcatbuff(lfull,adr);
+ if (*fil!='/') strcatbuff(lfull,"/");
+ strcatbuff(lfull,fil);
// tester filters (URLs autorisées ou interdites explicitement)
@@ -422,41 +438,80 @@ int hts_acceptlink(httrackp* opt,
if (ptr==0) { // lien primaire, autoriser
question=1; // la question sera résolue automatiquement
forbidden_url=0;
+ may_set_prio_to=0; // clear may-set flag
} else {
- int jok;
- // filters, 0=sait pas 1=ok -1=interdit
- {
- int jokDepth1=0,jokDepth2=0;
- int jok1=0,jok2=0;
- jok1 = fa_strjoker(filters,*filptr,lfull,NULL,NULL,&jokDepth1);
- jok2 = fa_strjoker(filters,*filptr,l, NULL,NULL,&jokDepth2);
- if (jok2 == 0) // #2 doesn't know
- jok = jok1; // then, use #1
- else if (jok1 == 0) // #1 doesn't know
- jok = jok2; // then, use #2
- else if (jokDepth1 >= jokDepth2) // #1 matching rule is "after" #2, then it is prioritary
- jok = jok1;
- else // #2 matching rule is "after" #1, then it is prioritary
- jok = jok2;
- }
-
- if (jok == 1) { // autorisé
- filters_answer=1; // décision prise par les filtres
- question=0; // ne pas poser de question, autorisé
- forbidden_url=0; // URL autorisée
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized link: link %s at %s%s"LF,l,urladr,urlfil);
- test_flush;
+ // eternal depth first
+ // vérifier récursivité extérieure
+ if (opt->extdepth>0) {
+ if ( /*question && */ (ptr>0) && (!force_mirror)) {
+ // well, this is kinda a hak
+ // we don't want to mirror EVERYTHING, and we have to decide where to stop
+ // there is no way yet to tag "external" links, and therefore links that are
+ // "weak" (authorized depth < external depth) are just not considered for external
+ // hack
+ if (liens[ptr]->depth > opt->extdepth) {
+ // *set_prio_to = opt->extdepth + 1;
+ *set_prio_to = 1 + (opt->extdepth);
+ may_set_prio_to=0; // clear may-set flag
+ forbidden_url=0; // autorisé
+ question=0; // résolution auto
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ if (question) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil);
+ } else {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) forced to accept link (external depth): link %s at %s%s"LF,l,urladr,urlfil);
+ }
+ test_flush;
+ }
+
+ }
}
- } else if (jok == -1) {
- filters_answer=1; // décision prise par les filtres
- question=0; // ne pas poser de question:
- forbidden_url=1; // URL interdite
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden link: link %s at %s%s"LF,l,urladr,urlfil);
- test_flush;
+ }
+
+ // filters
+ {
+ int jok;
+ char* mdepth="";
+ // filters, 0=sait pas 1=ok -1=interdit
+ {
+ int jokDepth1=0,jokDepth2=0;
+ int jok1=0,jok2=0;
+ jok1 = fa_strjoker(_FILTERS,*_FILTERS_PTR,lfull,NULL,NULL,&jokDepth1);
+ jok2 = fa_strjoker(_FILTERS,*_FILTERS_PTR,l, NULL,NULL,&jokDepth2);
+ if (jok2 == 0) { // #2 doesn't know
+ jok = jok1; // then, use #1
+ mdepth = _FILTERS[jokDepth1];
+ } else if (jok1 == 0) { // #1 doesn't know
+ jok = jok2; // then, use #2
+ mdepth = _FILTERS[jokDepth2];
+ } else if (jokDepth1 >= jokDepth2) { // #1 matching rule is "after" #2, then it is prioritary
+ jok = jok1;
+ mdepth = _FILTERS[jokDepth1];
+ } else { // #2 matching rule is "after" #1, then it is prioritary
+ jok = jok2;
+ mdepth = _FILTERS[jokDepth2];
+ }
}
- } // sinon on touche à rien
+
+ if (jok == 1) { // autorisé
+ filters_answer=1; // décision prise par les filtres
+ question=0; // ne pas poser de question, autorisé
+ forbidden_url=0; // URL autorisée
+ may_set_prio_to=0; // clear may-set flag
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil);
+ test_flush;
+ }
+ } else if (jok == -1) { // forbidden
+ filters_answer=1; // décision prise par les filtres
+ question=0; // ne pas poser de question:
+ forbidden_url=1; // URL interdite
+ if ((opt->debug>1) && (opt->log!=NULL)) {
+ fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil);
+ test_flush;
+ }
+ } // sinon on touche à rien
+ }
}
// vérifier mode mirror links
@@ -464,6 +519,7 @@ int hts_acceptlink(httrackp* opt,
if (opt->mirror_first_page) { // mode mirror links
if (liens[ptr]->precedent==0) { // parent=primary!
forbidden_url=0; // autorisé
+ may_set_prio_to=0; // clear may-set flag
question=1; // résolution auto
force_mirror=5; // mirror (5)
if ((opt->debug>1) && (opt->log!=NULL)) {
@@ -473,20 +529,6 @@ int hts_acceptlink(httrackp* opt,
}
}
}
-
- // vérifier récursivité extérieure
- if ((question) && (ptr>0) && (!force_mirror)) {
- if (opt->extdepth>0) {
- // *set_prio_to = opt->extdepth + 1;
- *set_prio_to = opt->extdepth + 1;
- forbidden_url=0; // autorisé
- question=0; // résolution auto
- if ((opt->debug>1) && (opt->log!=NULL)) {
- fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil);
- test_flush;
- }
- }
- }
// on doit poser la question.. peut on la poser?
// (oui je sais quel preuve de délicatesse, merci merci)
@@ -503,7 +545,7 @@ int hts_acceptlink(httrackp* opt,
// vérifier robots.txt
if (opt->robots) {
- int r = checkrobots(robots,adr,fil);
+ int r = checkrobots(_ROBOTS,adr,fil);
if (r == -1) { // interdiction
#if DEBUG_ROBOTS
printf("robots.txt forbidden: %s%s\n",adr,fil);
@@ -578,9 +620,9 @@ int hts_acceptlink(httrackp* opt,
{
char tempo[HTS_URLMAXSIZE*2];
tempo[0]='\0';
- strcat(tempo,adr);
- strcat(tempo,"/");
- strcat(tempo,fil);
+ strcatbuff(tempo,adr);
+ strcatbuff(tempo,"/");
+ strcatbuff(tempo,fil);
s=hts_htmlcheck_query3(tempo);
}
#else
@@ -616,25 +658,7 @@ int hts_acceptlink(httrackp* opt,
} while(n==-999);
#endif
io_flush;
- } else { // lien primaire: autoriser répertoire entier
-
- /* sanity check */
- if ((*filptr) + 1 >= opt->maxfilter) {
- opt->maxfilter += HTS_FILTERSINC;
- if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) {
- printf("PANIC! : Too many filters : >%d [%d]\n", (*filptr),__LINE__);
- fflush(stdout);
- if (opt->errlog) {
- fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*filptr) );
- fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
- test_flush;
- }
- abort(); // wild..
- }
- //opt->filters.filters=filters;
- //*ptrfilters = filters;
- }
-
+ } else { // lien primaire: autoriser répertoire entier
if (!force_mirror) {
if ((opt->seeker & 1)==0) { // interdiction de descendre
n=7;
@@ -645,6 +669,22 @@ int hts_acceptlink(httrackp* opt,
n=force_mirror;
}
+ /* sanity check - reallocate filters HERE */
+ if ((*_FILTERS_PTR) + 1 >= opt->maxfilter) {
+ opt->maxfilter += HTS_FILTERSINC;
+ if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) {
+ printf("PANIC! : Too many filters : >%d [%d]\n", (*_FILTERS_PTR),__LINE__);
+ fflush(stdout);
+ if (opt->errlog) {
+ fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*_FILTERS_PTR) );
+ fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF);
+ test_flush;
+ }
+ assertf("too many filters - giving up" == NULL); // wild..
+ }
+ }
+
+ // here we have enough room for a new filter if necessary
switch(n) {
case -1: // sauter tout le reste
forbidden_url=1;
@@ -653,10 +693,10 @@ int hts_acceptlink(httrackp* opt,
case 0: // interdire les mêmes liens: adr/fil
forbidden_url=1;
HT_INSERT_FILTERS0; // insérer en 0
- strcpy(filters[0],"-");
- strcat(filters[0],jump_identification(adr));
- if (*fil!='/') strcat(filters[0],"/");
- strcat(filters[0],fil);
+ strcpybuff(_FILTERS[0],"-");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ if (*fil!='/') strcatbuff(_FILTERS[0],"/");
+ strcatbuff(_FILTERS[0],fil);
break;
case 1: // éliminer répertoire entier et sous rép: adr/path/ *
@@ -666,12 +706,13 @@ int hts_acceptlink(httrackp* opt,
while((fil[i]!='/') && (i>0)) i--;
if (fil[i]=='/') {
HT_INSERT_FILTERS0; // insérer en 0
- strcpy(filters[0],"-");
- strcat(filters[0],jump_identification(adr));
- if (*fil!='/') strcat(filters[0],"/");
- strncat(filters[0],fil,i);
- if (filters[0][strlen(filters[0])-1]!='/') strcat(filters[0],"/");
- strcat(filters[0],"*");
+ strcpybuff(_FILTERS[0],"-");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ if (*fil!='/') strcatbuff(_FILTERS[0],"/");
+ strncatbuff(_FILTERS[0] ,fil,i);
+ if (_FILTERS[0][strlen(_FILTERS[0])-1]!='/')
+ strcatbuff(_FILTERS[0],"/");
+ strcatbuff(_FILTERS[0],"*");
}
}
@@ -681,9 +722,9 @@ int hts_acceptlink(httrackp* opt,
case 2: // adresse adr*
forbidden_url=1;
HT_INSERT_FILTERS0; // insérer en 0
- strcpy(filters[0],"-");
- strcat(filters[0],jump_identification(adr));
- strcat(filters[0],"*");
+ strcpybuff(_FILTERS[0],"-");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ strcatbuff(_FILTERS[0],"*");
break;
case 3: // ** A FAIRE
@@ -703,10 +744,10 @@ int hts_acceptlink(httrackp* opt,
case 4: // same link
// PAS BESOIN!!
/*HT_INSERT_FILTERS0; // insérer en 0
- strcpy(filters[0],"+");
- strcat(filters[0],adr);
- if (*fil!='/') strcat(filters[0],"/");
- strcat(filters[0],fil);*/
+ strcpybuff(_FILTERS[0],"+");
+ strcatbuff(_FILTERS[0],adr);
+ if (*fil!='/') strcatbuff(_FILTERS[0],"/");
+ strcatbuff(_FILTERS[0],fil);*/
// étant donné le renversement wizard/primary filter (les primary autorisent up/down ET interdisent)
@@ -722,25 +763,25 @@ int hts_acceptlink(httrackp* opt,
while((fil[i]!='/') && (i>0)) i--;
if (fil[i]=='/') {
HT_INSERT_FILTERS0; // insérer en 0
- strcpy(filters[0],"+");
- strcat(filters[0],jump_identification(adr));
- if (*fil!='/') strcat(filters[0],"/");
- strncat(filters[0],fil,i+1);
- strcat(filters[0],"*");
+ strcpybuff(_FILTERS[0],"+");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ if (*fil!='/') strcatbuff(_FILTERS[0],"/");
+ strncatbuff(_FILTERS[0],fil,i+1);
+ strcatbuff(_FILTERS[0],"*");
}
} else { // autoriser domaine alors!!
- HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+");
- strcpy(filters[0],"+");
- strcat(filters[0],jump_identification(adr));
- strcat(filters[0],"*");
+ HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(filters[filptr],"+");
+ strcpybuff(_FILTERS[0],"+");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ strcatbuff(_FILTERS[0],"*");
}
break;
case 6: // same domain
- HT_INSERT_FILTERS0; // insérer en 0 strcpy(filters[filptr],"+");
- strcpy(filters[0],"+");
- strcat(filters[0],jump_identification(adr));
- strcat(filters[0],"*");
+ HT_INSERT_FILTERS0; // insérer en 0 strcpybuff(filters[filptr],"+");
+ strcpybuff(_FILTERS[0],"+");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ strcatbuff(_FILTERS[0],"*");
break;
//
case 7: // autoriser ce répertoire
@@ -749,11 +790,11 @@ int hts_acceptlink(httrackp* opt,
while((fil[i]!='/') && (i>0)) i--;
if (fil[i]=='/') {
HT_INSERT_FILTERS0; // insérer en 0
- strcpy(filters[0],"+");
- strcat(filters[0],jump_identification(adr));
- if (*fil!='/') strcat(filters[0],"/");
- strncat(filters[0],fil,i+1);
- strcat(filters[0],"*[file]");
+ strcpybuff(_FILTERS[0],"+");
+ strcatbuff(_FILTERS[0],jump_identification(adr));
+ if (*fil!='/') strcatbuff(_FILTERS[0],"/");
+ strncatbuff(_FILTERS[0],fil,i+1);
+ strcatbuff(_FILTERS[0],"*[file]");
}
}
@@ -788,10 +829,19 @@ int hts_acceptlink(httrackp* opt,
#if HTS_ANALYSTE
{
int test_url=hts_htmlcheck_check(adr,fil,forbidden_url);
- if (test_url!=-1)
+ if (test_url!=-1) {
forbidden_url=test_url;
+ may_set_prio_to=0; // clear may-set flag
+ }
}
-#endif
+#endif
+
+ // -------------------- FINAL PHASE --------------------
+ // Test if the "Near" test won
+ if (may_set_prio_to && forbidden_url == 0) {
+ *set_prio_to = may_set_prio_to;
+ }
+
return forbidden_url;
}
@@ -808,17 +858,17 @@ int hts_testlinksize(httrackp* opt,
int size_flag=0;
// former URL complète du lien actuel
- strcpy(l,jump_identification(adr));
- if (*fil!='/') strcat(l,"/");
- strcat(l,fil);
+ strcpybuff(l,jump_identification(adr));
+ if (*fil!='/') strcatbuff(l,"/");
+ strcatbuff(l,fil);
//
if (!link_has_authority(adr))
- strcpy(lfull,"http://");
+ strcpybuff(lfull,"http://");
else
lfull[0]='\0';
- strcat(lfull,adr);
- if (*fil!='/') strcat(l,"/");
- strcat(lfull,fil);
+ strcatbuff(lfull,adr);
+ if (*fil!='/') strcatbuff(l,"/");
+ strcatbuff(lfull,fil);
// tester filtres (taille)
// jok = fa_strjoker(opt->filters.filters,*opt->filters.filptr,l,&sz,&size_flag,NULL);
diff --git a/src/htswizard.h b/src/htswizard.h
index 28c5d2f..147c7b7 100644
--- a/src/htswizard.h
+++ b/src/htswizard.h
@@ -43,8 +43,6 @@ Please visit our Website: http://www.httrack.com
int hts_acceptlink(httrackp* opt,
int ptr,int lien_tot,lien_url** liens,
char* adr,char* fil,
- char*** filters,int* filptr,int filter_max,
- robots_wizard* robots,
int* set_prio_to_0,
int* just_test_it);
int hts_testlinksize(httrackp* opt,
diff --git a/src/htswrap.c b/src/htswrap.c
index 824af7e..28c4c71 100644
--- a/src/htswrap.c
+++ b/src/htswrap.c
@@ -37,30 +37,31 @@ Please visit our Website: http://www.httrack.com
#include "htswrap.h"
#include "htshash.h"
+#include "htsinthash.h"
// typedef long (__stdcall * XSHBFF_WndProc_type)(HWND ,UINT ,WPARAM ,LPARAM);
inthash wrappers=NULL;
-int htswrap_init(void) {
+HTSEXT_API int htswrap_init(void) {
if (!wrappers)
wrappers=inthash_new(42);
return inthash_created(wrappers);
}
-int htswrap_free(void) {
+HTSEXT_API int htswrap_free(void) {
inthash_delete(&wrappers);
return 1;
}
-int htswrap_add(char* name,void* fct) {
+HTSEXT_API int htswrap_add(char* name,void* fct) {
if (!wrappers)
htswrap_init();
inthash_write(wrappers,name,(unsigned long int)fct);
return 1;
}
-unsigned long int htswrap_read(char* name) {
+HTSEXT_API unsigned long int htswrap_read(char* name) {
unsigned long int fct=0;
if (!wrappers)
htswrap_init();
diff --git a/src/htswrap.h b/src/htswrap.h
index 03bf73f..b87bf11 100644
--- a/src/htswrap.h
+++ b/src/htswrap.h
@@ -35,14 +35,16 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-
-
#ifndef HTSWRAP_DEFH
#define HTSWRAP_DEFH
-int htswrap_init(void);
-int htswrap_add(char* name,void* fct);
-int htswrap_free(void);
-unsigned long int htswrap_read(char* name);
+#include "htsglobal.h"
+
+#ifndef HTTRACK_DEFLIB
+HTSEXT_API int htswrap_init(void);
+HTSEXT_API int htswrap_add(char* name,void* fct);
+HTSEXT_API int htswrap_free(void);
+HTSEXT_API unsigned long int htswrap_read(char* name);
+#endif
#endif
diff --git a/src/htszlib.c b/src/htszlib.c
index d138a1c..faf4e88 100644
--- a/src/htszlib.c
+++ b/src/htszlib.c
@@ -42,23 +42,26 @@ Please visit our Website: http://www.httrack.com
#include "htsbase.h"
#include "htscore.h"
-#if HTS_USEZLIB
+#include "htszlib.h"
+#if HTS_USEZLIB
/* zlib */
+/*
#include <zlib.h>
#include "htszlib.h"
+*/
/*
Unpack file into a new file
Return value: size of the new file, or -1 if an error occured
*/
int hts_zunpack(char* filename,char* newfile) {
- if (filename && newfile) {
+ if (gz_is_available && filename && newfile) {
if (filename[0] && newfile[0]) {
gzFile gz = gzopen (filename, "rb");
if (gz) {
FILE* fpout=fopen(fconv(newfile),"wb");
- int size=0;
+ INTsys size=0;
if (fpout) {
int nr;
do {
@@ -66,7 +69,7 @@ int hts_zunpack(char* filename,char* newfile) {
nr=gzread (gz, buff, 1024);
if (nr>0) {
size+=nr;
- if ((int)fwrite(buff,1,nr,fpout) != nr)
+ if ((INTsys)fwrite(buff,1,nr,fpout) != nr)
nr=size=-1;
}
} while(nr>0);
diff --git a/src/htszlib.h b/src/htszlib.h
index 63310b8..173d966 100644
--- a/src/htszlib.h
+++ b/src/htszlib.h
@@ -43,6 +43,27 @@ Please visit our Website: http://www.httrack.com
int hts_zunpack(char* filename,char* newfile);
+#define gzopen hts_ptrfunc_gzopen
+#define gzread hts_ptrfunc_gzread
+#define gzclose hts_ptrfunc_gzclose
+
+#ifdef _WIN32
+#define ZEXPORT WINAPI
+#else
+#define ZEXPORT
+#endif
+
+typedef void* voidp;
+typedef voidp gzFile;
+typedef gzFile (ZEXPORT *t_gzopen)(const char *path, const char *mode);
+typedef int (ZEXPORT *t_gzread)(gzFile file, voidp buf, unsigned len);
+typedef int (ZEXPORT *t_gzclose)(gzFile file);
+
+extern int gz_is_available;
+extern t_gzopen gzopen;
+extern t_gzread gzread;
+extern t_gzclose gzclose;
+
#endif
#endif
diff --git a/src/httrack-library.h b/src/httrack-library.h
index 13ecb46..aeea70f 100644
--- a/src/httrack-library.h
+++ b/src/httrack-library.h
@@ -42,9 +42,157 @@ Please visit our Website: http://www.httrack.com
#include "htsopt.h"
#include "htswrap.h"
-int hts_init(void);
-int hts_main(int argc, char **argv);
+/* Main functions */
+HTSEXT_API int hts_init(void);
+HTSEXT_API int hts_uninit(void);
+HTSEXT_API int hts_main(int argc, char **argv);
+
+/* Wrapper functions */
+HTSEXT_API int htswrap_init(void);
+HTSEXT_API int htswrap_add(char* name,void* fct);
+HTSEXT_API int htswrap_free(void);
+HTSEXT_API unsigned long int htswrap_read(char* name);
+HTSEXT_API const char* hts_is_available(void);
+
+/* Other functions */
+HTSEXT_API int hts_resetvar(void);
+HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath);
+
+/* Catch-URL */
+HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox);
+HTSEXT_API T_SOC catch_url_init(int* port,char* adr);
+HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data);
+
+/* State */
+HTSEXT_API int hts_is_parsing(int flag);
+HTSEXT_API int hts_is_testing(void);
+HTSEXT_API int hts_is_exiting(void);
+HTSEXT_API int hts_setopt(httrackp* opt);
+HTSEXT_API int hts_addurl(char** url);
+HTSEXT_API int hts_resetaddurl(void);
+HTSEXT_API int copy_htsopt(httrackp* from, httrackp* to);
+HTSEXT_API char* hts_errmsg(void);
+HTSEXT_API int hts_setpause(int); // pause transfer
+HTSEXT_API int hts_request_stop(int force);
+HTSEXT_API char* hts_cancel_file(char * s);
+HTSEXT_API void hts_cancel_test(void);
+HTSEXT_API void hts_cancel_parsing(void);
+HTSEXT_API char* hts_cancel_file(char * s);
+HTSEXT_API void hts_cancel_test(void);
+HTSEXT_API void hts_cancel_parsing(void);
+
+/* Tools */
+HTSEXT_API int structcheck(char* s);
+HTSEXT_API void infostatuscode(char* msg,int statuscode);
+HTSEXT_API HTS_INLINE TStamp mtime_local(void);
+HTSEXT_API void qsec2str(char *st,TStamp t);
+HTSEXT_API char* int2char(int n);
+HTSEXT_API char* int2bytes(LLint n);
+HTSEXT_API char* int2bytessec(long int n);
+HTSEXT_API char** int2bytes2(LLint n);
+HTSEXT_API char* jump_identification(char*);
+HTSEXT_API char* jump_normalized(char*);
+HTSEXT_API char* jump_toport(char*);
+HTSEXT_API char* fil_normalized(char* source, char* dest);
+HTSEXT_API char* adr_normalized(char* source, char* dest);
+HTSEXT_API char* hts_rootdir(char* file);
+
+/* Escaping URLs */
+HTSEXT_API void unescape_amp(char* s);
+HTSEXT_API void escape_spc_url(char* s);
+HTSEXT_API void escape_in_url(char* s);
+HTSEXT_API void escape_uri(char* s);
+HTSEXT_API void escape_uri_utf(char* s);
+HTSEXT_API void escape_check_url(char* s);
+HTSEXT_API char* escape_check_url_addr(char* s);
+HTSEXT_API void x_escape_http(char* s,int mode);
+HTSEXT_API char* unescape_http(char* s);
+HTSEXT_API char* unescape_http_unharm(char* s, int no_high);
+HTSEXT_API char* antislash_unescaped(char* s);
+HTSEXT_API void escape_remove_control(char* s);
+
+/* Portable directory API */
+
+typedef struct find_handle_struct find_handle_struct;
+typedef find_handle_struct* find_handle;
+typedef struct topindex_chain {
+ char name[2048]; /* path */
+ struct topindex_chain* next; /* next element */
+} topindex_chain ;
+HTSEXT_API find_handle hts_findfirst(char* path);
+HTSEXT_API int hts_findnext(find_handle find);
+HTSEXT_API int hts_findclose(find_handle find);
+HTSEXT_API char* hts_findgetname(find_handle find);
+HTSEXT_API int hts_findgetsize(find_handle find);
+HTSEXT_API int hts_findisdir(find_handle find);
+HTSEXT_API int hts_findisfile(find_handle find);
+HTSEXT_API int hts_findissystem(find_handle find);
+
+/* Wrapper functions types (commented) : */
+/*
+typedef void (* t_hts_htmlcheck_init)(void);
+typedef void (* t_hts_htmlcheck_uninit)(void);
+typedef int (* t_hts_htmlcheck_start)(httrackp* opt);
+typedef int (* t_hts_htmlcheck_end)(void);
+typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt);
+typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+typedef char* (* t_hts_htmlcheck_query)(char* question);
+typedef char* (* t_hts_htmlcheck_query2)(char* question);
+typedef char* (* t_hts_htmlcheck_query3)(char* question);
+typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);
+typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status);
+typedef void (* t_hts_htmlcheck_pause)(char* lockfile);
+typedef void (* t_hts_htmlcheck_filesave)(char* file);
+typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
+typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back);
+typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
+typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
+*/
+/* Wrapper functions names : */
+/*
+ hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init");
+Log: "engine: init"
+
+ hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free");
+Log: "engine: free"
+
+ hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start");
+Log: "engine: start"
+
+ hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end");
+Log: "engine: end"
+
+ hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options");
+Log: "engine: change-options"
+
+ hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html");
+Log: "check-html: <url>"
+
+ hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query");
+ hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2");
+ hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3");
+ hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop");
+ hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link");
+Log: none
+
+ hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause");
+Log: "pause: <lockfile>"
+
+ hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file");
+ hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected");
+Log: none
+
+ hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status");
+Log:
+ "engine: transfer-status: link updated: <url> -> <file>"
+ | "engine: transfer-status: link added: <url> -> <file>"
+ | "engine: transfer-status: link recorded: <url> -> <file>"
+ | "engine: transfer-status: link link error (<errno>, '<err_msg>'): <url>"
+ hts_htmlcheck_savename = (t_hts_htmlcheck_savename ) htswrap_read("save-name");
+Log:
+ "engine: save-name: local name: <url> -> <file>"
+*/
#endif
-
diff --git a/src/httrack.c b/src/httrack.c
index 0289fca..c69a600 100644
--- a/src/httrack.c
+++ b/src/httrack.c
@@ -35,8 +35,7 @@ Please visit our Website: http://www.httrack.com
/* Author: Xavier Roche */
/* ------------------------------------------------------------ */
-#if HTS_WIN
-#else
+#ifndef _WIN32
#ifndef Sleep
#define Sleep(a) { if (((a)*1000)%1000000) usleep(((a)*1000)%1000000); if (((a)*1000)/1000000) sleep(((a)*1000)/1000000); }
#endif
@@ -51,13 +50,22 @@ Please visit our Website: http://www.httrack.com
#if HTS_ANALYSTE_CONSOLE
/* specific definitions */
-#include "htsbase.h"
+//#include "htsbase.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
#include <ctype.h>
#ifdef _WIN32
-#include "Winsock.h"
+//#include "Winsock.h"
#endif
/* END specific definitions */
@@ -118,8 +126,8 @@ void vt_home(void) {
static int use_show;
-
int main(int argc, char **argv) {
+ int ret = 0;
hts_init();
/*
@@ -183,7 +191,11 @@ Log:
htswrap_add("transfer-status",htsshow_xfrstatus);
htswrap_add("save-name",htsshow_savename);
- return hts_main(argc,argv);
+ ret = hts_main(argc,argv);
+ if (ret) {
+ fprintf(stderr, "* %s\n", hts_errmsg());
+ }
+ return ret;
}
@@ -224,7 +236,7 @@ int __cdecl htsshow_start(httrackp* opt) {
return 1;
}
int __cdecl htsshow_chopt(httrackp* opt) {
- return __cdecl htsshow_start(opt);
+ return htsshow_start(opt);
}
int __cdecl htsshow_end(void) {
return 1;
@@ -283,7 +295,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
if (rate>0) SInfo.rate=rate; // rate
if (irate>=0) SInfo.irate=irate; // irate
if (SInfo.irate<0) SInfo.irate=SInfo.rate;
- if (SInfo.stat_back>=0) SInfo.stat_back=nbk;
+ if (nbk>=0) SInfo.stat_back=nbk;
if (stat_written>=0) SInfo.stat_written=stat_written;
if (stat_updated>=0) SInfo.stat_updated=stat_updated;
if (stat_errors>=0) SInfo.stat_errors=stat_errors;
@@ -369,10 +381,10 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
{
int i;
for(i=0;i<NStatsBuffer;i++) {
- strcpy(StatsBuffer[i].state,"");
- strcpy(StatsBuffer[i].name,"");
- strcpy(StatsBuffer[i].file,"");
- strcpy(StatsBuffer[i].url_sav,"");
+ strcpybuff(StatsBuffer[i].state,"");
+ strcpybuff(StatsBuffer[i].name,"");
+ strcpybuff(StatsBuffer[i].file,"");
+ strcpybuff(StatsBuffer[i].url_sav,"");
StatsBuffer[i].back=0;
StatsBuffer[i].size=0;
StatsBuffer[i].sizetot=0;
@@ -389,18 +401,18 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
switch(j) {
case 0: // prioritaire
if ((back[i].status>0) && (back[i].status<99)) {
- strcpy(StatsBuffer[index].state,"receive"); ok=1;
+ strcpybuff(StatsBuffer[index].state,"receive"); ok=1;
}
break;
case 1:
if (back[i].status==99) {
- strcpy(StatsBuffer[index].state,"request"); ok=1;
+ strcpybuff(StatsBuffer[index].state,"request"); ok=1;
}
else if (back[i].status==100) {
- strcpy(StatsBuffer[index].state,"connect"); ok=1;
+ strcpybuff(StatsBuffer[index].state,"connect"); ok=1;
}
else if (back[i].status==101) {
- strcpy(StatsBuffer[index].state,"search"); ok=1;
+ strcpybuff(StatsBuffer[index].state,"search"); ok=1;
}
else if (back[i].status==1000) { // ohh le beau ftp
sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1;
@@ -409,15 +421,15 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
default:
if (back[i].status==0) { // prêt
if ((back[i].r.statuscode==200)) {
- strcpy(StatsBuffer[index].state,"ready"); ok=1;
+ strcpybuff(StatsBuffer[index].state,"ready"); ok=1;
}
else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) {
char tempo[256]; tempo[0]='\0';
infostatuscode(tempo,back[i].r.statuscode);
- strcpy(StatsBuffer[index].state,tempo); ok=1;
+ strcpybuff(StatsBuffer[index].state,tempo); ok=1;
}
else {
- strcpy(StatsBuffer[index].state,"error"); ok=1;
+ strcpybuff(StatsBuffer[index].state,"error"); ok=1;
}
}
break;
@@ -429,32 +441,32 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,
StatsBuffer[index].back=i; // index pour + d'infos
//
s[0]='\0';
- strcpy(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel
+ strcpybuff(StatsBuffer[index].url_sav,back[i].url_sav); // pour cancel
if (strcmp(back[i].url_adr,"file://"))
- strcat(s,back[i].url_adr);
+ strcatbuff(s,back[i].url_adr);
else
- strcat(s,"localhost");
+ strcatbuff(s,"localhost");
if (back[i].url_fil[0]!='/')
- strcat(s,"/");
- strcat(s,back[i].url_fil);
+ strcatbuff(s,"/");
+ strcatbuff(s,back[i].url_fil);
StatsBuffer[index].file[0]='\0';
{
char* a=strrchr(s,'/');
if (a) {
- strncat(StatsBuffer[index].file,a,200);
+ strncatbuff(StatsBuffer[index].file,a,200);
*a='\0';
}
}
if ((l=strlen(s))<MAX_LEN_INPROGRESS)
- strcpy(StatsBuffer[index].name,s);
+ strcpybuff(StatsBuffer[index].name,s);
else {
// couper
StatsBuffer[index].name[0]='\0';
- strncat(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
- strcat(StatsBuffer[index].name,"...");
- strcat(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
+ strncatbuff(StatsBuffer[index].name,s,MAX_LEN_INPROGRESS/2-2);
+ strcatbuff(StatsBuffer[index].name,"...");
+ strcatbuff(StatsBuffer[index].name,s+l-MAX_LEN_INPROGRESS/2+2);
}
if (back[i].r.totalsize>0) { // taille prédéfinie
@@ -568,4 +580,36 @@ int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer
}
+/* *** Various functions *** */
+
+
+int fexist(char* s) {
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ if (stat(s, &st) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int linput(FILE* fp,char* s,int max) {
+ int c;
+ int j=0;
+ do {
+ c=fgetc(fp);
+ if (c!=EOF) {
+ switch(c) {
+ case 13: break; // sauter CR
+ case 10: c=-1; break;
+ case 9: case 12: break; // sauter ces caractères
+ default: s[j++]=(char) c; break;
+ }
+ }
+ } while((c!=-1) && (c!=EOF) && (j<(max-1)));
+ s[j]='\0';
+ return j;
+}
+
#endif
diff --git a/src/httrack.dsp b/src/httrack.dsp
index 7fc08da..590f025 100644
--- a/src/httrack.dsp
+++ b/src/httrack.dsp
@@ -43,8 +43,8 @@ RSC=rc.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FR /YX /FD /c
-# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD CPP /nologo /MD /W3 /Gi /O2 /Op /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /I "C:\Dev\Winhttrack" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "HTS_ANALYSTE_CONSOLE" /YX /FD /Zm200 /c
+# SUBTRACT CPP /Fr
# ADD BASE RSC /l 0x40c /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -52,7 +52,7 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /machine:I386 /out:"L:\HTTrack\httrack\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Releaselib"
# SUBTRACT LINK32 /verbose
!ELSEIF "$(CFG)" == "httrack - Win32 Debug"
@@ -69,7 +69,7 @@ LINK32=link.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /Gm /GR /GX /ZI /Od /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FAcs /Fr /YX /FD /c
+# ADD CPP /nologo /MDd /W3 /Gm /GR /ZI /Od /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /I "C:\Dev\Winhttrack" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "HTS_ANALYSTE_CONSOLE" /FAcs /Fr /FD /Zm200 /c
# ADD BASE RSC /l 0x40c /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@@ -77,8 +77,8 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /map /debug /debugtype:both /machine:I386 /out:"c:\temp\test\httrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
-# SUBTRACT LINK32 /profile
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /debugtype:both /machine:I386 /out:"C:\temp\httrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Debuglib"
+# SUBTRACT LINK32 /profile /map
!ELSEIF "$(CFG)" == "httrack - Win32 Release avec debug"
@@ -96,8 +96,7 @@ LINK32=link.exe
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /Ot /Oi /Oy /Ob2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# SUBTRACT BASE CPP /Ox /Oa /Ow /Og /Os
-# ADD CPP /nologo /MT /W3 /GX /Zi /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /FAcs /FR /YX /FD /c
-# SUBTRACT CPP /Ox /Oa /Ow /Og /Os
+# ADD CPP /nologo /MD /W3 /GX /Zi /Ot /Oi /Oy /Ob2 /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /I "C:\Dev\openssl\include" /I "C:\Dev\Winhttrack" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D "HTS_ANALYSTE_CONSOLE" /FAcs /FR /YX /FD /Zm200 /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
@@ -106,7 +105,7 @@ BSC32=bscmake.exe
LINK32=link.exe
# ADD BASE LINK32 wsock32.lib /nologo /subsystem:console /machine:I386 /out:"c:\temp\httrack.exe"
# SUBTRACT BASE LINK32 /verbose
-# ADD LINK32 wsock32.lib zlib.lib ssleay32.lib libeay32.lib /nologo /subsystem:console /debug /machine:I386 /out:"c:\temp\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /machine:I386 /out:"L:\HTTrack\httrack\httrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll"
# SUBTRACT LINK32 /verbose
!ENDIF
@@ -118,207 +117,11 @@ LINK32=link.exe
# Name "httrack - Win32 Release avec debug"
# Begin Source File
-SOURCE=.\htsalias.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsalias.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsback.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsback.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsbauth.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsbauth.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscache.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscache.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscatchurl.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscatchurl.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsconfig.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscore.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscore.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscoremain.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htscoremain.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsdefines.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsfilters.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsfilters.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsftp.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsftp.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsglobal.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshash.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshash.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshelp.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htshelp.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsindex.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsindex.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsjava.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsjava.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htslib.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htslib.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsmd5.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsmd5.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsname.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsname.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsnostatic.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsnostatic.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsrobots.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsrobots.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsthread.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htsthread.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htstools.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htstools.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswizard.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswizard.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswrap.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\htswrap.h
-# End Source File
-# Begin Source File
-
SOURCE=.\httrack.c
# End Source File
# Begin Source File
SOURCE=.\httrack.h
# End Source File
-# Begin Source File
-
-SOURCE=.\md5.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\md5.h
-# End Source File
# End Target
# End Project
diff --git a/src/httrack.dsw b/src/httrack.dsw
index 9aa199f..3cbb30f 100644
--- a/src/httrack.dsw
+++ b/src/httrack.dsw
@@ -11,6 +11,21 @@ Package=<5>
Package=<4>
{{{
+ Begin Project Dependency
+ Project_Dep_Name libhttrack
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "libhttrack"=.\libhttrack\libhttrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
}}}
###############################################################################
diff --git a/src/httrack.h b/src/httrack.h
index a8633de..f297e00 100644
--- a/src/httrack.h
+++ b/src/httrack.h
@@ -43,9 +43,9 @@ Please visit our Website: http://www.httrack.com
#include "htscore.h"
typedef struct {
- char name[1000];
- char file[256];
- char state[20];
+ char name[1024];
+ char file[1024];
+ char state[256];
char url_sav[HTS_URLMAXSIZE*2]; // pour cancel
char url_adr[HTS_URLMAXSIZE*2];
char url_fil[HTS_URLMAXSIZE*2];
diff --git a/src/postinst-config.in b/src/postinst-config.in
deleted file mode 100755
index f0edc72..0000000
--- a/src/postinst-config.in
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/sh
-
-# Config file location
-cnfdir="__ETCPATH__"
-cnf="__ETCPATH__/httrack.conf"
-
-if test "`id -u`" -eq 0; then
- mkdir -p "$cnfdir"
- if ! test -f "$cnf"; then
- echo "creating $cnf (please modify it) .."
- cat>"$cnf" << EOF
-# HTTrack Website Copier Settings
-# See httrack --help for more information
-
-# Examples: (to uncomment)
-
-# set proxy proxy.myisp.com:8080
-# retries=2
-# set max-size 10000000
-# set max-time 36000
-# set user-agent Mouzilla/17.0 (compatible; HTTrack; I)
-#
-# There are MUCH more options.. try 'httrack --quiet --help | more'
-
-# Deny and allow for links
-# this will be used by default for all mirrors
-allow *.gif
-allow *.png
-deny ad.doubleclick.net/*
-
-# Path and other options
-# '~' in the *begining* means 'home dir'
-# '#' at the *end* means "projectname" (that is, the first URL given)
-# Example: '~/websites/#' will create /home/smith/websites/www.foo.com
-# folder when launching 'httrack www.foo.com'
-set path ~/websites/#
-
-EOF
- fi
-
- if ! grep "set path" "$cnf" >/dev/null; then
- echo "default path set to <home dir>/websites/<first_site_name>"
- fi
-
- chown root:__ROOTGROUP__ "$cnf"
- chmod 744 "$cnf"
-else
- cat << EOF
-
-You are not root, therefore $cnf configuration file hasn't been created
-Re-run this sript ($0) as root if you want to do that
-
-EOF
-fi
-
diff --git a/src/strip_cr.in b/src/strip_cr.in
deleted file mode 100755
index 03af084..0000000
--- a/src/strip_cr.in
+++ /dev/null
@@ -1,32 +0,0 @@
-__PERL__
-# A simple script to convert DOS text files to
-# Unix one. Useful to strip all CR on .c and .h
-# sourcefiles.
-# Usage: strip_cr <files>
-foreach $fname (@ARGV) {
- $ad=1;
- if (open(FL,$fname)) {
- if (open(FO,">".$fname.".tmp")) {
- while(<FL>) {
- s/\r\n$/\n/g;
- print FO "$_";
- }
- close(FL);
- close(FO);
- if ((-s $fname) != (-s $fname.".tmp")) {
- print("Stripping ".$fname."..\n");
- rename($fname.".tmp",$fname);
- } else {
- unlink($fname.".tmp");
- }
- } else {
- print "Unable to open ".$fname.".tmp\n";
- }
- } else {
- print "Unable to open $fname\n";
- }
-}
-if (!$ad) {
- print "Ensure that a text file has no lines ended with CR (DOS)\n";
- print "Usage: strip_cr <file>\n";
-}
diff --git a/src/webhttrack b/src/webhttrack
new file mode 100755
index 0000000..3b433bb
--- /dev/null
+++ b/src/webhttrack
@@ -0,0 +1,101 @@
+#!/bin/bash
+#
+# WebHTTrack launcher script
+# Initializes the htsserver GUI frontend and launch the default browser
+BROWSEREXE=
+SRCHBROWSEREXE="x-www-browser www-browser mozilla galeon konqueror opera netscape"
+if test -n "${BROWSER}"; then
+# sensible-browser will f up if BROWSER is not set
+SRCHBROWSEREXE="sensible-browser ${SRCHBROWSEREXE}"
+fi
+SRCHPATH="/usr/local/bin /usr/share/bin /usr/bin /usr/lib/httrack /usr/local/lib/httrack /usr/local/share/httrack ${HOME}/usr/bin ${HOME}/bin"
+SRCHDISTPATH="/usr/share /usr/local /usr /local /usr/local/share ${HOME}/usr ${HOME}/usr/share ${HOME}/usr/local ${HOME}/usr/share"
+
+###
+# And now some famous cuisine
+
+function log {
+echo "$0($$): $@" >&2
+return 0
+}
+
+# First ensure that we can launch the server
+BINPATH=
+for i in ${SRCHPATH}; do
+ ! test -n "${BINPATH}" && test -x ${i}/htsserver && BINPATH=${i}
+done
+for i in ${SRCHDISTPATH}; do
+ ! test -n "${DISTPATH}" && test -f "${i}/httrack/lang.def" && DISTPATH="${i}/httrack"
+done
+test -n "${BINPATH}" || ! log "could not find htsserver" || exit 1
+test -n "${DISTPATH}" || ! log "could not find httrack directory" || exit 1
+test -f ${DISTPATH}/lang.def || ! log "could not find ${DISTPATH}/lang.def" || exit 1
+test -f ${DISTPATH}/lang.indexes || ! log "could not find ${DISTPATH}/lang.indexes" || exit 1
+test -d ${DISTPATH}/lang || ! log "could not find ${DISTPATH}/lang" || exit 1
+test -d ${DISTPATH}/html || ! log "could not find ${DISTPATH}/html" || exit 1
+
+# Locale
+HTSLANG="${LC_MESSAGES}"
+! test -n "${HTSLANG}" && HTSLANG="${LC_ALL}"
+! test -n "${HTSLANG}" && HTSLANG="${LANG}"
+test -n "${HTSLANG}" && HTSLANG="`echo ${HTSLANG} | cut -c1-2` | tr 'A-Z' 'a-z'"
+LANGN=`grep "${HTSLANG}:" ${DISTPATH}/lang.indexes | cut -f2 -d':'`
+! test -n "${LANGN}" && LANGN=1
+
+# Find the browser
+# note: not all systems have sensible-browser or www-browser alternative
+# thefeore, we have to find a bit more if sensible-browser could not be found
+for i in ${SRCHBROWSEREXE}; do
+for j in ${SRCHPATH}; do
+if test -x ${j}/${i}; then
+BROWSEREXE=${j}/${i}
+fi
+test -n "$BROWSEREXE" && break
+done
+test -n "$BROWSEREXE" && break
+done
+test -n "$BROWSEREXE" || ! log "cound not find any suitable browser" || exit 1
+
+# "browse" command
+if test "$1" = "browse"; then
+${BROWSEREXE} "file://${HOME}/websites/index.html"
+exit $?
+fi
+
+# Create a temporary filename
+TMPSRVFILE="/tmp/.webhttrack.$$.`/usr/bin/head -c16 /dev/random | /usr/bin/md5sum | /usr/bin/cut -f1 -d' '`"
+>${TMPSRVFILE} || ! log "cound not create the temporary file ${TMPSRVFILE}" || exit 1
+# Launch htsserver binary and setup the server
+(${BINPATH}/htsserver "${DISTPATH}/" path "${HOME}/websites" lang "${LANGN}" $@; echo SRVURL=error) > ${TMPSRVFILE}&
+# Find the generated SRVURL
+SRVURL=
+MAXCOUNT=60
+while ! test -n "$SRVURL"; do
+MAXCOUNT=$[$MAXCOUNT - 1]
+test $MAXCOUNT -gt 0 || exit 1
+test $MAXCOUNT -lt 50 && echo "waiting for server to reply.."
+SRVURL=`/bin/grep -E URL= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=`
+test ! "$SRVURL" = "error" || ! log "could not spawn htsserver" || exit 1
+test -n "$SRVURL" || /bin/sleep 1
+done
+
+# Cleanup function
+function cleanup {
+test -n "$1" && log "nasty signal caught, cleaning up.."
+test -f ${TMPSRVFILE} && SRVPID=`/bin/grep -E PID= ${TMPSRVFILE} | /usr/bin/cut -f2- -d=`
+test -n "${SRVPID}" && kill -9 ${SRVPID}
+test -f ${TMPSRVFILE} && rm ${TMPSRVFILE}
+test -n "$1" && log "..done"
+return 0
+}
+
+# Cleanup in case of emergency
+trap "cleanup now; exit" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
+
+# Got SRVURL, launch browser
+${BROWSEREXE} "${SRVURL}"
+
+# That's all, folks!
+trap "" 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
+cleanup
+exit 0
diff --git a/src/webhttrack.dsp b/src/webhttrack.dsp
new file mode 100755
index 0000000..a5940e8
--- /dev/null
+++ b/src/webhttrack.dsp
@@ -0,0 +1,120 @@
+# Microsoft Developer Studio Project File - Name="webhttrack" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=webhttrack - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "webhttrack.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "webhttrack.mak" CFG="webhttrack - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "webhttrack - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "webhttrack - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "webhttrack - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /O2 /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# SUBTRACT CPP /YX /Yc /Yu
+# ADD BASE RSC /l 0x40c /d "NDEBUG"
+# ADD RSC /l 0x40c /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /pdb:none /machine:I386 /force /out:"L:\HTTrack\httrack\webhttrack.exe" /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Releaselib"
+
+!ELSEIF "$(CFG)" == "webhttrack - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /Yu"stdafx.h" /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /Gm /GR /GX /ZI /Od /I "C:\Dev\\" /I "C:\Dev\IPv6Kit\inc\\" /I "C:\Dev\zlib\\" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FAcs /FR /FD /GZ /c
+# SUBTRACT CPP /YX /Yc /Yu
+# ADD BASE RSC /l 0x40c /d "_DEBUG"
+# ADD RSC /l 0x40c /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /machine:I386 /out:"C:\temp\webhttrack.exe" /pdbtype:sept /libpath:"C:\Dev\openssl\lib" /libpath:"C:\Dev\zlib\dll32" /libpath:"C:\Dev\openssl\lib\out32dll" /libpath:"C:\temp\Releaselib"
+
+!ENDIF
+
+# Begin Target
+
+# Name "webhttrack - Win32 Release"
+# Name "webhttrack - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\htsserver.c
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsweb.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\htsserver.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\htsweb.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# Begin Source File
+
+SOURCE=.\ReadMe.txt
+# End Source File
+# End Target
+# End Project
diff --git a/src/webhttrack.dsw b/src/webhttrack.dsw
new file mode 100755
index 0000000..ea9ea0b
--- /dev/null
+++ b/src/webhttrack.dsw
@@ -0,0 +1,44 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "libhttrack"=..\libhttrack\libhttrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Project: "webhttrack"=.\webhttrack.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+ Begin Project Dependency
+ Project_Dep_Name libhttrack
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+