diff options
167 files changed, 9780 insertions, 7701 deletions
diff --git a/config.h.in b/config.h.in index e4cf45f..80bb079 100644 --- a/config.h.in +++ b/config.h.in @@ -54,9 +54,6 @@ /* Check for large files support */ #undef HTS_LFS -/* Default value used */ -#undef HTS_PLATFORM - /* Check for libsocket */ #undef LIBSOCKET @@ -1776,7 +1776,7 @@ fi # Define the identity of the package. PACKAGE=httrack - VERSION=3.40.4 + VERSION=3.41.20 cat >>confdefs.h <<_ACEOF @@ -1902,7 +1902,7 @@ INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s" -VERSION_INFO="1:40:0" +VERSION_INFO="2:41:0" echo "$as_me:$LINENO: checking whether to enable maintainer-specific portions of Makefiles" >&5 echo $ECHO_N "checking whether to enable maintainer-specific portions of Makefiles... $ECHO_C" >&6 # Check whether --enable-maintainer-mode or --disable-maintainer-mode was given. @@ -19038,28 +19038,12 @@ DEFAULT_CFLAGS="-O -g3 -Wall -Wcast-align -Wstrict-prototypes \ ### Check for platform -case $host in -AIX) -cat >>confdefs.h <<\_ACEOF -#define HTS_PLATFORM 1 -_ACEOF -;; -*-solaris*) -cat >>confdefs.h <<\_ACEOF -#define HTS_PLATFORM 2 -_ACEOF -;; -*-linux-gnu | *-irix6*) -cat >>confdefs.h <<\_ACEOF -#define HTS_PLATFORM 3 -_ACEOF -;; -*) -cat >>confdefs.h <<\_ACEOF -#define HTS_PLATFORM 3 -_ACEOF -;; -esac +#case $host in +#AIX) AC_DEFINE(HTS_PLATFORM, 1, [Defined to build under AIX]);; +#*-solaris*) AC_DEFINE(HTS_PLATFORM, 2, [Defined to build under solaris]);; +#*-linux-gnu | *-irix6*) AC_DEFINE(HTS_PLATFORM, 3, [Defined to build under Linux]);; +#*) AC_DEFINE(HTS_PLATFORM, 3, [Default value used]);; +#esac ### Check size of long and long long. echo "$as_me:$LINENO: checking for long" >&5 @@ -21655,7 +21639,7 @@ _ACEOF fi - ac_config_files="$ac_config_files Makefile src/Makefile man/Makefile m4/Makefile libtest/Makefile templates/Makefile lang/Makefile html/Makefile" + ac_config_files="$ac_config_files Makefile src/Makefile man/Makefile m4/Makefile templates/Makefile lang/Makefile html/Makefile libtest/Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure @@ -22223,10 +22207,10 @@ do "src/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; "man/Makefile" ) CONFIG_FILES="$CONFIG_FILES man/Makefile" ;; "m4/Makefile" ) CONFIG_FILES="$CONFIG_FILES m4/Makefile" ;; - "libtest/Makefile" ) CONFIG_FILES="$CONFIG_FILES libtest/Makefile" ;; "templates/Makefile" ) CONFIG_FILES="$CONFIG_FILES templates/Makefile" ;; "lang/Makefile" ) CONFIG_FILES="$CONFIG_FILES lang/Makefile" ;; "html/Makefile" ) CONFIG_FILES="$CONFIG_FILES html/Makefile" ;; + "libtest/Makefile" ) CONFIG_FILES="$CONFIG_FILES libtest/Makefile" ;; "depfiles" ) CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 diff --git a/configure.in b/configure.in index da40415..9228af7 100644 --- a/configure.in +++ b/configure.in @@ -1,7 +1,7 @@ AC_INIT(src/httrack.c) AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(httrack, 3.40.4) -VERSION_INFO="1:40:0" +AM_INIT_AUTOMAKE(httrack, 3.41.20) +VERSION_INFO="2:41:0" AM_MAINTAINER_MODE AC_PREREQ(2.50) @@ -21,12 +21,12 @@ AC_SUBST(DEFAULT_CFLAGS) AC_SUBST(VERSION_INFO) ### Check for platform -case $host in -AIX) AC_DEFINE(HTS_PLATFORM, 1, [Defined to build under AIX]);; -*-solaris*) AC_DEFINE(HTS_PLATFORM, 2, [Defined to build under solaris]);; -*-linux-gnu | *-irix6*) AC_DEFINE(HTS_PLATFORM, 3, [Defined to build under Linux]);; -*) AC_DEFINE(HTS_PLATFORM, 3, [Default value used]);; -esac +#case $host in +#AIX) AC_DEFINE(HTS_PLATFORM, 1, [Defined to build under AIX]);; +#*-solaris*) AC_DEFINE(HTS_PLATFORM, 2, [Defined to build under solaris]);; +#*-linux-gnu | *-irix6*) AC_DEFINE(HTS_PLATFORM, 3, [Defined to build under Linux]);; +#*) AC_DEFINE(HTS_PLATFORM, 3, [Default value used]);; +#esac ### Check size of long and long long. AC_CHECK_SIZEOF(long) @@ -175,8 +175,8 @@ AC_OUTPUT([ Makefile src/Makefile man/Makefile m4/Makefile -libtest/Makefile templates/Makefile lang/Makefile html/Makefile +libtest/Makefile ]) diff --git a/history.txt b/history.txt index 126c458..0a298d6 100644 --- a/history.txt +++ b/history.txt @@ -4,6 +4,16 @@ HTTrack Website Copier release history: This file lists all changes and fixes that have been made for HTTrack.
+3.41-beta
++ New: changed API/ABI to thread-safe ones (libhttrack1 2), big cleanup in all .h definitions
++ Fixed: Major memory usage bug when downloading large sites
++ Fixed: do not rename files if the original MIME type was compatible
++ Fixed: several source fixes for freeBSD (especially time problems)
++ New: option %w to disable specific modules (java, flash..)
++ Fixed: 'no space left in stack for back_add' error
++ Fixed: fixed redirected images with "html" type
++ Fixed: 'Crash adding error, unexpected error found.. [4268]' error
+
3.40-2
+ Fixed: bogus '.del' filenames with ISO-9660 option
+ Fixed: now merges the header charset even with an empty footer string
diff --git a/html/Makefile.am b/html/Makefile.am index 416dbf5..2bd25bc 100755 --- a/html/Makefile.am +++ b/html/Makefile.am @@ -34,7 +34,7 @@ EXTRA_DIST = $(HelpHtml_DATA) $(HelpHtmlimg_DATA) $(HelpHtmlimages_DATA) \ httrack.css install-data-hook: - if test ! -f $(DESTDIR)$(prefix)/share/httrack/html ; then \ + if test ! -L $(DESTDIR)$(prefix)/share/httrack/html ; then \ ( cd $(DESTDIR)$(prefix)/share/httrack \ && mv -f ../doc/httrack/html html \ && cd ../doc/httrack/ \ diff --git a/html/Makefile.in b/html/Makefile.in index fd5387c..8855606 100644 --- a/html/Makefile.in +++ b/html/Makefile.in @@ -570,7 +570,7 @@ uninstall-am: uninstall-HelpHtmlDATA uninstall-HelpHtmlTxtDATA \ install-data-hook: - if test ! -f $(DESTDIR)$(prefix)/share/httrack/html ; then \ + if test ! -L $(DESTDIR)$(prefix)/share/httrack/html ; then \ ( cd $(DESTDIR)$(prefix)/share/httrack \ && mv -f ../doc/httrack/html html \ && cd ../doc/httrack/ \ diff --git a/html/abuse.html b/html/abuse.html index 1d98f95..92c46fa 100644 --- a/html/abuse.html +++ b/html/abuse.html @@ -579,7 +579,7 @@ And then, put the email address in your pages through: <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/addurl.html b/html/addurl.html index 46a163a..59f5a66 100644 --- a/html/addurl.html +++ b/html/addurl.html @@ -144,7 +144,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/cache.html b/html/cache.html index df28dc3..a17cb70 100755 --- a/html/cache.html +++ b/html/cache.html @@ -282,7 +282,7 @@ Libraries should generally handle this peculiar format, however. <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/cmddoc.html b/html/cmddoc.html index 7879071..4260b09 100644 --- a/html/cmddoc.html +++ b/html/cmddoc.html @@ -145,7 +145,7 @@ The command-line version <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/contact.html b/html/contact.html index 0706f6e..2a2e81d 100644 --- a/html/contact.html +++ b/html/contact.html @@ -243,7 +243,7 @@ roche at httrack dot com (Xavier ROCHE)<br> <br><hr><br> <br> This program is covered by the GNU General Public License.<br> - HTTrack/HTTrack Website Copier is Copyright (C) 1998-2003 Xavier Roche and other contributors + HTTrack/HTTrack Website Copier is Copyright (C) 1998-2007 Xavier Roche and other contributors <br> <!-- ==================== Start epilogue ==================== --> @@ -259,7 +259,7 @@ roche at httrack dot com (Xavier ROCHE)<br> <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/dev.html b/html/dev.html index 66ae8a1..01fbdf0 100644 --- a/html/dev.html +++ b/html/dev.html @@ -116,7 +116,8 @@ You can use tens of options (see <tt>httrack --help</tt>) to control precisely t <br><br>
<li><a href="plug.html">More complex use: plugging external C functions to the httrack library</a></li><br>
For advanced functions, you may have to use external C wrappers ; for example when adding advanced crawl features, such as "tuned" filestructure type
-<br><i>Important note: please read the license information of httrack before developing add-ons</i>
+<br><i>Important note: please read the license information of httrack before developing add-ons</i><br />
+See also the page <a href="plug_330.html">for versions prior to 3.41</a>
<br><br>
<li><a href="library.html">Advanced use: using the library</a></li><br>
The library can be used to write graphical GUIs for httrack, or to run mirrors from a program.
@@ -146,7 +147,7 @@ This page describes the HTTrack cache format. <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/faq.html b/html/faq.html index 9225147..3b4a051 100644 --- a/html/faq.html +++ b/html/faq.html @@ -934,7 +934,7 @@ A: <em>Feel free to <a href="contact.html">contact us</a>! <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/fcguide.html b/html/fcguide.html index e2f03d4..f86702f 100644 --- a/html/fcguide.html +++ b/html/fcguide.html @@ -2708,7 +2708,7 @@ for only 2 simultaneous sesions. <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/filters.html b/html/filters.html index dac8545..fa79ca9 100644 --- a/html/filters.html +++ b/html/filters.html @@ -466,7 +466,7 @@ See also: The <a href="faq.html#VF1">FAQ</a><br> <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/httrack.man.html b/html/httrack.man.html index 116cbb5..13ee1a0 100644 --- a/html/httrack.man.html +++ b/html/httrack.man.html @@ -1,5 +1,5 @@ <!-- Creator : groff version 1.18.1 --> -<!-- CreationDate: Sun Apr 16 11:34:04 2006 --> +<!-- CreationDate: Sat Feb 3 14:35:31 2007 --> <html> <head> <meta name="generator" content="groff -Thtml, see www.gnu.org"> @@ -46,14 +46,15 @@ local directory</p> <td width="10%"></td> <td width="89%"> <p><b>httrack [ url ]... [ -filter ]... [ +filter ]... [</b> -] [ <b>-w, --mirror</b> ] [ <b>-W, --mirror-wizard</b> ] [ -<b>-g, --get-files</b> ] [ <b>-i, --continue</b> ] [ <b>-Y, ---mirrorlinks</b> ] [ <b>-P, --proxy</b> ] [ <b>-%f, ---httpproxy-ftp[=N]</b> ] [ <b>-%b, --bind</b> ] [ <b>-rN, ---depth[=N]</b> ] [ <b>-%eN, --ext-depth[=N]</b> ] [ <b>-mN, ---max-files[=N]</b> ] [ <b>-MN, --max-size[=N]</b> ] [ -<b>-EN, --max-time[=N]</b> ] [ <b>-AN, --max-rate[=N]</b> ] -[ <b>-%cN, --connection-per-second[=N]</b> ] [ <b>-GN, +] [ <b>-%O, --chroot</b> ] [ <b>-w, --mirror</b> ] [ <b>-W, +--mirror-wizard</b> ] [ <b>-g, --get-files</b> ] [ <b>-i, +--continue</b> ] [ <b>-Y, --mirrorlinks</b> ] [ <b>-P, +--proxy</b> ] [ <b>-%f, --httpproxy-ftp[=N]</b> ] [ <b>-%b, +--bind</b> ] [ <b>-rN, --depth[=N]</b> ] [ <b>-%eN, +--ext-depth[=N]</b> ] [ <b>-mN, --max-files[=N]</b> ] [ +<b>-MN, --max-size[=N]</b> ] [ <b>-EN, --max-time[=N]</b> ] +[ <b>-AN, --max-rate[=N]</b> ] [ <b>-%cN, +--connection-per-second[=N]</b> ] [ <b>-GN, --max-pause[=N]</b> ] [ <b>-%mN, --max-mms-time[=N]</b> ] [ <b>-cN, --sockets[=N]</b> ] [ <b>-TN, --timeout</b> ] [ <b>-RN, --retries[=N]</b> ] [ <b>-JN, --min-rate[=N]</b> ] [ @@ -72,20 +73,20 @@ local directory</p> ] [ <b>-%h, --http-10</b> ] [ <b>-%k, --keep-alive</b> ] [ <b>-%B, --tolerant</b> ] [ <b>-%s, --updatehack</b> ] [ <b>-%u, --urlhack</b> ] [ <b>-%A, --assume</b> ] [ <b>-@iN, ---protocol[=N]</b> ] [ <b>-F, --user-agent</b> ] [ <b>-%R, ---referer</b> ] [ <b>-%E, --from</b> ] [ <b>-%F, ---footer</b> ] [ <b>-%l, --language</b> ] [ <b>-C, ---cache[=N]</b> ] [ <b>-k, --store-all-in-cache</b> ] [ -<b>-%n, --do-not-recatch</b> ] [ <b>-%v, --display</b> ] [ -<b>-Q, --do-not-log</b> ] [ <b>-q, --quiet</b> ] [ <b>-z, ---extra-log</b> ] [ <b>-Z, --debug-log</b> ] [ <b>-v, ---verbose</b> ] [ <b>-f, --file-log</b> ] [ <b>-f2, ---single-log</b> ] [ <b>-I, --index</b> ] [ <b>-%i, ---build-top-index</b> ] [ <b>-%I, --search-index</b> ] [ -<b>-pN, --priority[=N]</b> ] [ <b>-S, --stay-on-same-dir</b> -] [ <b>-D, --can-go-down</b> ] [ <b>-U, --can-go-up</b> ] [ -<b>-B, --can-go-up-and-down</b> ] [ <b>-a, ---stay-on-same-address</b> ] [ <b>-d, +--protocol[=N]</b> ] [ <b>-%w, --disable-module</b> ] [ +<b>-F, --user-agent</b> ] [ <b>-%R, --referer</b> ] [ +<b>-%E, --from</b> ] [ <b>-%F, --footer</b> ] [ <b>-%l, +--language</b> ] [ <b>-C, --cache[=N]</b> ] [ <b>-k, +--store-all-in-cache</b> ] [ <b>-%n, --do-not-recatch</b> ] +[ <b>-%v, --display</b> ] [ <b>-Q, --do-not-log</b> ] [ +<b>-q, --quiet</b> ] [ <b>-z, --extra-log</b> ] [ <b>-Z, +--debug-log</b> ] [ <b>-v, --verbose</b> ] [ <b>-f, +--file-log</b> ] [ <b>-f2, --single-log</b> ] [ <b>-I, +--index</b> ] [ <b>-%i, --build-top-index</b> ] [ <b>-%I, +--search-index</b> ] [ <b>-pN, --priority[=N]</b> ] [ <b>-S, +--stay-on-same-dir</b> ] [ <b>-D, --can-go-down</b> ] [ +<b>-U, --can-go-up</b> ] [ <b>-B, --can-go-up-and-down</b> ] +[ <b>-a, --stay-on-same-address</b> ] [ <b>-d, --stay-on-same-domain</b> ] [ <b>-l, --stay-on-same-tld</b> ] [ <b>-e, --go-everywhere</b> ] [ <b>-%H, --debug-headers</b> ] [ <b>-%!, @@ -255,8 +256,8 @@ proxy</p> <table width="100%" border=0 rules="none" frame="void" cols="4" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> -<td width="13%"></td> -<td width="2%"> +<td width="11%"></td> +<td width="4%"> <p>-O</p> </td> @@ -266,6 +267,18 @@ proxy</p> <p>path for mirror/logfiles+cache (-O path mirror[,path cache and logfiles]) (--path <param>)</p> </td> +<tr valign="top" align="left"> +<td width="11%"></td> +<td width="4%"> + +<p>-%O</p> +</td> +<td width="5%"></td> +<td width="77%"> + +<p>chroot path to, must be r00t (-%O root path) (--chroot +<param>)</p> +</td> </table> <!-- INDENTATION --> <table width="100%" border=0 rules="none" frame="void" @@ -351,11 +364,11 @@ cache and logfiles]) (--path <param>)</p> cols="4" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="5%"> +<td width="4%"> <p>-P</p> </td> -<td width="4%"></td> +<td width="5%"></td> <td width="77%"> <p>proxy use (-P proxy:port or -P user:pass@proxy:port) @@ -363,11 +376,11 @@ cache and logfiles]) (--path <param>)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="5%"> +<td width="4%"> <p>-%f</p> </td> -<td width="4%"></td> +<td width="5%"></td> <td width="77%"> <p>*use proxy for ftp (f0 don t use) @@ -375,11 +388,11 @@ cache and logfiles]) (--path <param>)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="5%"> +<td width="4%"> <p>-%b</p> </td> -<td width="4%"></td> +<td width="5%"></td> <td width="77%"> <p>use this local hostname to make/send requests (-%b @@ -529,22 +542,22 @@ minute, 3600=1 hour) (--max-mms-time[=N])</p> cols="4" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="7%"> <p>-cN</p> </td> -<td width="1%"></td> +<td width="2%"></td> <td width="77%"> <p>number of multiple connections (*c8) (--sockets[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="7%"> <p>-TN</p> </td> -<td width="1%"></td> +<td width="2%"></td> <td width="77%"> <p>timeout, number of seconds after a non-responding link @@ -552,11 +565,11 @@ is shutdown (--timeout)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="7%"> <p>-RN</p> </td> -<td width="1%"></td> +<td width="2%"></td> <td width="77%"> <p>number of retries, in case of timeout or non-fatal @@ -564,11 +577,11 @@ errors (*R1) (--retries[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="7%"> <p>-JN</p> </td> -<td width="1%"></td> +<td width="2%"></td> <td width="77%"> <p>traffic jam control, minimum transfert rate @@ -576,11 +589,11 @@ errors (*R1) (--retries[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="7%"> <p>-HN</p> </td> -<td width="1%"></td> +<td width="2%"></td> <td width="77%"> <p>host is abandonned if: 0=never, 1=timeout, 2=slow, @@ -672,11 +685,11 @@ file (one scan rule per line) (--urllist <param>)</p> cols="4" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-NN</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>structure type (0 *original structure, 1+: see below) @@ -684,22 +697,22 @@ file (one scan rule per line) (--urllist <param>)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-or</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>user defined structure (-N "%h%p/%n%q.%t")</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-%N</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>delayed type check, don t make any link test but wait @@ -708,11 +721,11 @@ t use, %N1 use for unknown extensions, * %N2 always use)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-%D</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>cached delayed type check, don t wait for remote type @@ -721,11 +734,11 @@ during updates, to speedup them (%D0 wait, * %D1 don t wait) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-%M</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>generate a RFC MIME-encapsulated full-archive (.mht) @@ -733,11 +746,11 @@ during updates, to speedup them (%D0 wait, * %D1 don t wait) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-LN</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>long names (L1 *long names / L0 8-3 conversion / L2 @@ -745,11 +758,11 @@ ISO9660 compatible) (--long-names[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-KN</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>keep original links (e.g. http://www.adr/link) (K0 @@ -758,11 +771,11 @@ absolute URI links) (--keep-links[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-x</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>replace external html links by error pages @@ -770,11 +783,11 @@ absolute URI links) (--keep-links[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-%x</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>do not include any password for external password @@ -782,11 +795,11 @@ protected websites (%x0 include) (--disable-passwords)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-%q</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>*include query string for local files (useless, for @@ -795,11 +808,11 @@ information purpose only) (%q0 don t include) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-o</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>*generate output html file in case of error (404..) (o0 @@ -807,11 +820,11 @@ don t generate) (--generate-errors)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-X</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>*purge old files after update (X0 keep delete) @@ -819,11 +832,11 @@ don t generate) (--generate-errors)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-%p</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>preserve html files as is (identical to -K4 -%F @@ -843,11 +856,11 @@ don t generate) (--generate-errors)</p> cols="4" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-bN</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>accept cookies in cookies.txt (0=do not accept,* @@ -855,11 +868,11 @@ don t generate) (--generate-errors)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-u</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>check document type if unknown (cgi,asp..) (u0 don t @@ -868,23 +881,24 @@ check, * u1 check but /, u2 check always) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-j</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> -<p>*parse Java Classes (j0 don t parse) -(--parse-java[=N])</p> +<p>*parse Java Classes (j0 don t parse, bitmask: |1 parse +default, |2 don t parse .class |4 don t parse .js |8 don t +be aggressive) (--parse-java[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-sN</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>follow robots.txt and meta robots tags @@ -893,11 +907,11 @@ rules)) (--robots[=N])</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-%h</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>force HTTP/1.0 requests (reduce update features, only @@ -905,11 +919,11 @@ for old servers or proxies) (--http-10)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-%k</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>use keep-alive if possible, greately reducing latency @@ -918,11 +932,11 @@ for small files and test requests (%k0 don t use) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-%B</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>tolerant requests (accept bogus responses on some @@ -930,11 +944,11 @@ servers, but not standard!) (--tolerant)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-%s</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>update hacks: various hacks to limit re-transfers when @@ -943,11 +957,11 @@ updating (identical size, bogus response..) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-%u</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>url hacks: various hacks to limit duplicate URLs (strip @@ -955,11 +969,11 @@ updating (identical size, bogus response..) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-%A</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>assume that a type (cgi,asp..) is always linked with a @@ -968,11 +982,11 @@ mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip) </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-can</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>also be used to force a specific file type: --assume @@ -980,16 +994,28 @@ foo.cgi=text/html</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="8%"> <p>-@iN</p> </td> -<td width="2%"></td> +<td width="1%"></td> <td width="77%"> <p>internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only) (--protocol[=N])</p> </td> +<tr valign="top" align="left"> +<td width="11%"></td> +<td width="8%"> + +<p>-%w</p> +</td> +<td width="1%"></td> +<td width="77%"> + +<p>disable a specific external mime module (-%w htsswf -%w +htsjava) (--disable-module <param>)</p> +</td> </table> <!-- INDENTATION --> <table width="100%" border=0 rules="none" frame="void" @@ -1495,7 +1521,7 @@ make) (--search-index)</p> <td width="2%"></td> <td width="77%"> -<p>cache repair (damaged cache) (--debug-oldftp)</p> +<p>cache repair (damaged cache) (--repair-cache)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> @@ -1612,7 +1638,7 @@ make) (--search-index)</p> <td width="1%"></td> <td width="77%"> -<p>old FTP routines (debug) (--debug-oldftp)</p> +<p>old FTP routines (debug) (--repair-cache)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> @@ -1768,8 +1794,7 @@ smith) (--user <param>)</p> <td width="77%"> <p>use an external library function as a wrapper (-%W -link-detected=foo.so:myfunction[,myparameters]) (--callback -<param>)</p> +myfoo.so[,myparameters]) (--callback <param>)</p> </td> </table> <!-- INDENTATION --> @@ -1782,58 +1807,53 @@ link-detected=foo.so:myfunction[,myparameters]) (--callback </table> <!-- TABS --> <table width="100%" border=0 rules="none" frame="void" - cols="4" cellspacing="0" cellpadding="0"> + cols="3" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N0</p> </td> -<td width="1%"></td> <td width="77%"> <p>Site-structure (default)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1</p> </td> -<td width="1%"></td> <td width="77%"> <p>HTML in web/, images/other files in web/images/</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N2</p> </td> -<td width="1%"></td> <td width="77%"> <p>HTML in web/HTML, images/other in web/images</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N3</p> </td> -<td width="1%"></td> <td width="77%"> <p>HTML in web/, images/other in web/</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N4</p> </td> -<td width="1%"></td> <td width="77%"> <p>HTML in web/, images/other in web/xxx, where xxx is the @@ -1842,44 +1862,40 @@ example)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N5</p> </td> -<td width="1%"></td> <td width="77%"> <p>Images/other in web/xxx and HTML in web/HTML</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N99</p> </td> -<td width="1%"></td> <td width="77%"> <p>All files in web/, with random names (gadget !)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N100</p> </td> -<td width="1%"></td> <td width="77%"> <p>Site-structure, without www.domain.xxx/</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N101</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N1 exept that "web" is replaced @@ -1887,11 +1903,10 @@ by the site s name</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N102</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N2 exept that "web" is replaced @@ -1899,11 +1914,10 @@ by the site s name</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N103</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N3 exept that "web" is replaced @@ -1911,11 +1925,10 @@ by the site s name</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N104</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N4 exept that "web" is replaced @@ -1923,11 +1936,10 @@ by the site s name</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N105</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N5 exept that "web" is replaced @@ -1935,11 +1947,10 @@ by the site s name</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N199</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N99 exept that "web" is replaced @@ -1947,11 +1958,10 @@ by the site s name</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1001</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N1 exept that there is no "web" @@ -1959,11 +1969,10 @@ directory</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1002</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N2 exept that there is no "web" @@ -1971,11 +1980,10 @@ directory</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1003</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N3 exept that there is no "web" @@ -1983,11 +1991,10 @@ directory (option set for g option)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1004</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N4 exept that there is no "web" @@ -1995,11 +2002,10 @@ directory</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1005</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N5 exept that there is no "web" @@ -2007,11 +2013,10 @@ directory</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>-N1099</p> </td> -<td width="1%"></td> <td width="77%"> <p>Identical to N99 exept that there is no "web" @@ -2171,11 +2176,11 @@ be empty</p> cols="4" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-K0</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, @@ -2183,11 +2188,11 @@ default)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-K</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>-> http://www.foobar.com/folder/foo.cgi?q=45 @@ -2195,22 +2200,22 @@ default)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-K4</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>-> foo.cgi?q=45 (original URL)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="7%"> +<td width="5%"> <p>-K3</p> </td> -<td width="2%"></td> +<td width="4%"></td> <td width="77%"> <p>-> /folder/foo.cgi?q=45 (absolute URI)</p> @@ -2243,14 +2248,13 @@ default)</p> </table> <!-- TABS --> <table width="100%" border=0 rules="none" frame="void" - cols="4" cellspacing="0" cellpadding="0"> + cols="3" cellspacing="0" cellpadding="0"> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>--get</p> </td> -<td width="1%"></td> <td width="77%"> <p><URLs> get the files indicated, do not seek other @@ -2258,11 +2262,10 @@ URLs (-qg)</p> </td> <tr valign="top" align="left"> <td width="11%"></td> -<td width="8%"> +<td width="10%"> <p>--list</p> </td> -<td width="1%"></td> <td width="77%"> <p><text file> add all URL located in this text file @@ -2457,264 +2460,7 @@ prototypes</b></p></td> <tr valign="top" align="left"> <td width="4%"></td> <td width="95%"> -<p><b>init : void (* myfunction)(void);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>free : void (* myfunction)(void);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>start : int (* myfunction)(httrackp* -opt);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>end : int (* myfunction)(void);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>change-options : int (* myfunction)(httrackp* -opt);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>preprocess-html : int (* myfunction)(char** html,int* -len,char* url</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>adresse,char* url fichier);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>postprocess-html : int (* myfunction)(char** html,int* -len,char* url</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>adresse,char* url fichier);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>check-html : int (* myfunction)(char* html,int -len,char* url</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>adresse,char* url fichier);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>query : char* (* myfunction)(char* -question);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>query2 : char* (* myfunction)(char* -question);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>query3 : char* (* myfunction)(char* -question);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>loop : int (* myfunction)(lien</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>back* back,int back max,int back index,int lien tot,int -lien ntot,int stat time,hts stat struct* stats);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>check-link : int (* myfunction)(char* adr,char* -fil,int status);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>pause : void (* myfunction)(char* -lockfile);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>save-file : void (* myfunction)(char* -file);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>save-file2 : void (* myfunction)(char* hostname,char* -filename,char* localfile,int is</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>new,int is modified);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>link-detected : int (* myfunction)(char* -link);</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>link-detected2 : int (* myfunction)(char* link, char* -start</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>tag);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>transfer-status : int (* myfunction)(lien</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>back* back);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>save-name : int (* myfunction)(char* adr</b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>complete,char* fil complete,char* referer adr,char* -referer fil,char* save);</p> -</td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="4%"></td> -<td width="95%"> -<p><b>And <wrappername></b></p></td> -</table> -<!-- INDENTATION --> -<table width="100%" border=0 rules="none" frame="void" - cols="2" cellspacing="0" cellpadding="0"> -<tr valign="top" align="left"> -<td width="10%"></td> -<td width="89%"> -<p>init() functions if defined, called upon plug</p> -</td> +<p><b>see htsdefines.h</b></p></td> </table> <a name="FILES"></a> <h2>FILES</h2> diff --git a/html/index.html b/html/index.html index 993e9fc..a393563 100644 --- a/html/index.html +++ b/html/index.html @@ -142,7 +142,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/library.html b/html/library.html index 2b61131..4d555bc 100755 --- a/html/library.html +++ b/html/library.html @@ -125,7 +125,7 @@ You may also want to check the <tt>httrack.c</tt> and <tt>httrack.h<tt> files to <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/options.html b/html/options.html index 4f552a9..f59b941 100644 --- a/html/options.html +++ b/html/options.html @@ -352,7 +352,7 @@ Add debug informations on log files <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/overview.html b/html/overview.html index 934d992..7cd28c8 100644 --- a/html/overview.html +++ b/html/overview.html @@ -145,7 +145,7 @@ downloads. HTTrack is fully configurable, and has an integrated help system. <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/plug.html b/html/plug.html index 42b0895..1b82c46 100755 --- a/html/plug.html +++ b/html/plug.html @@ -108,85 +108,215 @@ We'll see there some examples. <br><br> -The <tt>httrack</tt> commandline tool allows (since the 3.30 release) to plug external functions to various callbacks defined in httrack.<br>
-See also: the <tt>httrack-library.h</tt> prototype file, and the <tt>callbacks-example.c</tt> given in the httrack archive.<br>
+The <tt>httrack</tt> commandline tool allows (since the 3.30 release) to plug external functions to various callbacks defined in httrack.
+The 3.41 release introduces a cleaned up verion of callbacks, with two major changes:
+<ul>
+<li>Cleaned up function prototypes, with two arguments always passed (the caller carg structure, and the httrackp* object), convenient to pass an user-defined pointer (see <tt>CALLBACKARG_USERDEF(carg)</tt>)</li>
+<li>The httrackp* option structure can be directly accessed to plug callbacks (no need to give the callback name and function name in the commandline!)</li>
+<li>The callback plug is made through the CHAIN_FUNCTION() helper, allowing to chain multiple callbacks of the same type (the callbacks MUST preserve the chain by calling ancestors)</li>
+</ul>
<br>
+References:
+<ul>
+<li>the <tt>httrack-library.h</tt> prototype file
+<br />
+Note: the <i>Initialization</i>, <i>Main functions</i>, <i>Options handling</i> and <i>Wrapper functions</i> sections are generally the only ones to be considered. +</li> +<li>the <tt>htsdefines.h</tt> prototype file, which describes callback function prototypes</li>
+<li>the <tt>htsopt.h</tt> prototype file, which describes the full httrackp* structure</li>
+<li>the <tt>callbacks-example*.c</tt> files given in the httrack archive</li>
+<li>the <tt>htsjava.c</tt> source file (the java class plugin ; overrides 'detect' and 'parse')</li>
+<li>the example given at the end of this document</li>
+</ul>
+
+<br />
+Below the list of functions to be defined in the module (plugin).<br />
+<br />
+
+<table width="100%">
+<tr><td><b><i>module function name</i></b></td><td><b>function description</b></td><td><b>function signature</b></td></tr>
+<tr><td background="img/fade.gif"><i>hts_plug</i></td><td background="img/fade.gif">
+The module entry point. The opt structure can be used to plug callbacks, using the CHAIN_FUNCTION() macro helper. The argv optional argument is the one passed in the commandline as --wrapper parameter.<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)<br />
+
+<br />
+Wrappers can be plugged inside hts_plug() using:<br />
+<tt>
+CHAIN_FUNCTION(opt, <callback name>, <our callback function name>, <our callback function optional custom pointer argument>);
+</tt>
+<br />
+
+<br />
Example:
+<br />
<tt>
-httrack --wrapper check-html=callback:process_file ..
+CHAIN_FUNCTION(opt, check_html, process, userdef);
</tt>
-<br>
-With the callback.so (or callback.dll) module defined as below:
+<br />
-<pre>
-int process_file(char* html, int len, char* url_adresse, char* url_fichier) {
- printf("now parsing %s%s..\n", url_adresse, url_fichier);
- strcpy(currentURLBeingParsed, url_adresse);
- strcat(currentURLBeingParsed, url_fichier);
- return 1; /* success */
-}
-</pre>
+</td><td background="img/fade.gif"><tt>extern int hts_plug(httrackp *opt, const char* argv);</tt></td></tr>
+
+<!-- -->
+
+<tr><td background="img/fade.gif"><i>hts_unplug</i></td><td background="img/fade.gif">
+The module exit point. To free allocated resources without using global variables, use the uninit callback (see below)</td><td background="img/fade.gif"><tt>extern int hts_unplug(httrackp *opt);</tt></td></tr>
+
+</table>
-Below the list of callbacks, and associated external wrappers:<br>
+
+<br />
+Note that all callbacks (except init and uninit) take as first two argument:
+<ul>
+<li>the t_hts_callbackarg structure<br />
+this structure holds the callback chain (parent callbacks defined before the current callback) pointers, and the user-defined pointer ; see <tt>CALLBACKARG_USERDEF(carg)</tt>)
+</li>
+<li>the httrackp structure<br />
+this structure, holding all current httrack options and mirror state, can be read or mofidied
+</li>
+</ul>
+
+<br />
+Below the list of callbacks, and associated external wrappers.
<table width="100%">
-<tr><td><b>"<i>callback name</i>"</b></td><td><b>callback description</b></td><td><b>callback function signature</b></td></tr>
-
-<tr><td background="img/fade.gif">"<i>init</i>"</td><td background="img/fade.gif"><font color="red">Note: deprecated, should not be used anymore (unsafe callback) - see "start" callback or wrapper_init() module function below this table.</font>Called during initialization ; use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks.<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(void);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>free</i>"</td><td background="img/fade.gif"><font color="red">Note: deprecated, should not be used anymore (unsafe callback) - see "end" callback or wrapper_exit() module function below this table.</font><br />Called during un-initialization<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(void);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>start</i>"</td><td background="img/fade.gif">Called when the mirror starts. The <tt>opt</tt> structure passed lists all options defined for this mirror. You may modify the <tt>opt</tt> structure to fit your needs. Besides, use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks.<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)</td><td background="img/fade.gif"><tt>int (* myfunction)(httrackp* opt);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>end</i>"</td><td background="img/fade.gif">Called when the mirror ends<br>return value: 1 upon success, 0 upon error (the mirror will then be considered aborted)</td><td background="img/fade.gif"><tt>int (* myfunction)(void);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>change-options</i>"</td><td background="img/fade.gif">Called when options are to be changed. The <tt>opt</tt> structure passed lists all options, updated to take account of recent changes<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)</td><td background="img/fade.gif"><tt>int (* myfunction)(httrackp* opt);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>check-html</i>"</td><td background="img/fade.gif">Called when a document (which may not be an html document) is to be parsed. The <tt>html</tt> address points to the document data, of lenth <tt>len</tt>. The <tt>url_adresse</tt> and <tt>url_fichier</tt> are the address and URI of the file being processed<br>return value: 1 if the parsing can be processed, 0 if the file must be skipped without being parsed</td><td background="img/fade.gif"><tt>int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>preprocess-html</i>"</td><td background="img/fade.gif">Called when a document (which is an html document) is to be parsed (original, not yet modified document). The <tt>html</tt> address points to the document data address (char**), and the <tt>length</tt> address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The <tt>url_adresse</tt> and <tt>url_fichier</tt> are the address and URI of the file being processed<br>return value: 1 if the new pointers can be applied (default value)</td><td background="img/fade.gif"><tt>int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>postprocess-html</i>"</td><td background="img/fade.gif">Called when a document (which is an html document) is parsed and transformed (links rewritten). The <tt>html</tt> address points to the document data address (char**), and the <tt>length</tt> address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The <tt>url_adresse</tt> and <tt>url_fichier</tt> are the address and URI of the file being processed<br>return value: 1 if the new pointers can be applied (default value)</td><td background="img/fade.gif"><tt>int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>query</i>"</td><td background="img/fade.gif">Called when the wizard needs to ask a question. The <tt>question</tt> string contains the question for the (human) user<br>return value: the string answer ("" for default reply)</td><td background="img/fade.gif"><tt>char* (* myfunction)(char* question);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>query2</i>"</td><td background="img/fade.gif">Called when the wizard needs to ask a question</td><td background="img/fade.gif"><tt>char* (* myfunction)(char* question);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>query3</i>"</td><td background="img/fade.gif">Called when the wizard needs to ask a question</td><td background="img/fade.gif"><tt>char* (* myfunction)(char* question);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>loop</i>"</td><td background="img/fade.gif">Called periodically (informational, to display statistics)<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>check-link</i>"</td><td background="img/fade.gif">Called when a link has to be tested. The <tt>adr</tt> and <tt>fil</tt> are the address and URI of the link being tested. The passed <tt>status</tt> value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine<br>return value: same meaning as the passed <tt>status</tt> value ; you may generally return -1 to let the engine take the decision by itself</td><td background="img/fade.gif"><tt>int (* myfunction)(char* adr,char* fil,int status);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>check-mime</i>"</td><td background="img/fade.gif">Called when a link download has begun, and needs to be tested against its MIME type. The <tt>adr</tt> and <tt>fil</tt> are the address and URI of the link being tested, and the <tt>mime</tt> string contains the link type being processed. The passed <tt>status</tt> value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine<br>return value: same meaning as the passed <tt>status</tt> value ; you may generally return -1 to let the engine take the decision by itself</td><td background="img/fade.gif"><tt>int (* myfunction)(char* adr,char* fil,char* mime,int status);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>pause</i>"</td><td background="img/fade.gif">Called when the engine must pause. When the <tt>lockfile</tt> passed is deleted, the function can return<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(char* lockfile);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>save-file</i>"</td><td background="img/fade.gif">Called when a file is to be saved on disk<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(char* file);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>save-file2</i>"</td><td background="img/fade.gif">Called when a file is to be saved or checked on disk<br>The hostname, filename and local filename are given. Two additional flags tells if the file is new (is_new) and is the file is to be modified (is_modified).<br>(!is_new && !is_modified): the file is up-to-date, and will not be modified<br>(is_new && is_modified): a new file will be written (or an updated file is being written)<br>(!is_new && is_modified): a file is being updated (append)<br>(is_new && !is_modified): an empty file will be written ("do not recatch locally erased files")<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(char* hostname,char* filename,char* localfile,int is_new,int is_modified);</tt></td></tr>
-
-typedef void (* t_hts_htmlcheck_filesave2)(); -
-
-<tr><td background="img/fade.gif">"<i>link-detected</i>"</td><td background="img/fade.gif">Called when a link has been detected<br>return value: 1 if the link can be analyzed, 0 if the link must not even be considered</td><td background="img/fade.gif"><tt>int (* myfunction)(char* link);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>transfer-status</i>"</td><td background="img/fade.gif">Called when a file has been processed (downloaded, updated, or error)<br>return value: must return 1</td><td background="img/fade.gif"><tt>int (* myfunction)(lien_back* back);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>save-name</i>"</td><td background="img/fade.gif">Called when a local filename has to be processed. The <tt>adr_complete</tt> and <tt>fil_complete</tt> are the address and URI of the file being saved ; the <tt>referer_adr</tt> and <tt>referer_fil</tt> are the address and URI of the referer link. The <tt>save</tt> string contains the local filename being used. You may modifiy the <tt>save</tt> string to fit your needs, up to 1024 bytes (note: filename collisions, if any, will be handled by the engine by renaming the file into file-2.ext, file-3.ext ..).<br>return value: must return 1</td><td background="img/fade.gif"><tt>int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>send-header</i>"</td><td background="img/fade.gif">Called when HTTP headers are to be sent to the remote server. The <tt>buff</tt> buffer contains text headers, <tt>adr</tt> and <tt>fil</tt> the URL, and <tt>referer_adr</tt> and <tt>referer_fil</tt> the referer URL. The <tt>outgoing</tt> structure contains all information related to the current slot.<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);</tt></td></tr>
-<tr><td background="img/fade.gif">"<i>receive-header</i>"</td><td background="img/fade.gif">Called when HTTP headers are recevived from the remote server. The <tt>buff</tt> buffer contains text headers, <tt>adr</tt> and <tt>fil</tt> the URL, and <tt>referer_adr</tt> and <tt>referer_fil</tt> the referer URL. The <tt>incoming</tt> structure contains all information related to the current slot.<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);</tt></td></tr>
+<tr><td><b><i>callback name</i></b></td><td><b>callback description</b></td><td><b>callback function signature</b></td></tr>
+
+<tr><td background="img/fade.gif"><i>init</i></td><td background="img/fade.gif">Note: the use the "start" callback is advised. Called during initialization.<br>return value: none</td><td background="img/fade.gif"><tt>void mycallback(t_hts_callbackarg *carg);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>uninit</i></td><td background="img/fade.gif">Note: the use os the "end" callback is advised.<br />Called during un-initialization<br>return value: none</td><td background="img/fade.gif"><tt>void mycallback(t_hts_callbackarg *carg);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>start</i></td><td background="img/fade.gif">Called when the mirror starts. The <tt>opt</tt> structure passed lists all options defined for this mirror. You may modify the <tt>opt</tt> structure to fit your needs.<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>end</i></td><td background="img/fade.gif">Called when the mirror ends<br>return value: 1 upon success, 0 upon error (the mirror will then be considered aborted)</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>chopt</i></td><td background="img/fade.gif">Called when options are to be changed. The <tt>opt</tt> structure passed lists all options, updated to take account of recent changes<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>preprocess</i></td><td background="img/fade.gif">Called when a document (which is an html document) is to be parsed (original, not yet modified document). The <tt>html</tt> address points to the document data address (char**), and the <tt>length</tt> address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using the hts_realloc()/hts_free() library functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of hts_strdup() in such cases is advised. The <tt>url_address</tt> and <tt>url_file</tt> are the address and URI of the file being processed<br>return value: 1 if the new pointers can be applied (default value)</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char** html, int* len, const char* url_address, const char* url_file);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>postprocess</i></td><td background="img/fade.gif">Called when a document (which is an html document) is parsed and transformed (links rewritten). The <tt>html</tt> address points to the document data address (char**), and the <tt>length</tt> address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using the hts_realloc()/hts_free() library functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of hts_strdup() in such cases is advised. The <tt>url_address</tt> and <tt>url_file</tt> are the address and URI of the file being processed<br>return value: 1 if the new pointers can be applied (default value)</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char** html, int* len, const char* url_address, const char* url_file);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>check_html</i></td><td background="img/fade.gif">Called when a document (which may not be an html document) is to be parsed. The <tt>html</tt> address points to the document data, of lenth <tt>len</tt>. The <tt>url_address</tt> and <tt>url_file</tt> are the address and URI of the file being processed<br>return value: 1 if the parsing can be processed, 0 if the file must be skipped without being parsed</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>query</i></td><td background="img/fade.gif">Called when the wizard needs to ask a question. The <tt>question</tt> string contains the question for the (human) user<br>return value: the string answer ("" for default reply)</td><td background="img/fade.gif"><tt>const char* mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* question);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>query2</i></td><td background="img/fade.gif">Called when the wizard needs to ask a question</td><td background="img/fade.gif"><tt>const char* mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* question);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>query3</i></td><td background="img/fade.gif">Called when the wizard needs to ask a question</td><td background="img/fade.gif"><tt>const char* mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* question);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>loop</i></td><td background="img/fade.gif">Called periodically (informational, to display statistics)<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, lien_back* back, int back_max, int back_index, int lien_tot, int lien_ntot, int stat_time, hts_stat_struct* stats);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>check_link</i></td><td background="img/fade.gif">Called when a link has to be tested. The <tt>adr</tt> and <tt>fil</tt> are the address and URI of the link being tested. The passed <tt>status</tt> value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine<br>return value: same meaning as the passed <tt>status</tt> value ; you may generally return -1 to let the engine take the decision by itself</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* adr, const char* fil, int status);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>check_mime</i></td><td background="img/fade.gif">Called when a link download has begun, and needs to be tested against its MIME type. The <tt>adr</tt> and <tt>fil</tt> are the address and URI of the link being tested, and the <tt>mime</tt> string contains the link type being processed. The passed <tt>status</tt> value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine<br>return value: same meaning as the passed <tt>status</tt> value ; you may generally return -1 to let the engine take the decision by itself</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* adr, const char* fil, const char* mime, int status);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>pause</i></td><td background="img/fade.gif">Called when the engine must pause. When the <tt>lockfile</tt> passed is deleted, the function can return<br>return value: none</td><td background="img/fade.gif"><tt>void mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* lockfile);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>filesave</i></td><td background="img/fade.gif">Called when a file is to be saved on disk<br>return value: none</td><td background="img/fade.gif"><tt>void mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* file);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>filesave2</i></td><td background="img/fade.gif">Called when a file is to be saved or checked on disk<br>The hostname, filename and local filename are given. Two additional flags tells if the local file is new (is_new), if the local file is to be modified (is_modified), and if the file was not updated remotely (not_updated).<br>(!is_new && !is_modified): the file is up-to-date, and will not be modified<br>(is_new && is_modified): a new file will be written (or an updated file is being written)<br>(!is_new && is_modified): a file is being updated (append)<br>(is_new && !is_modified): an empty file will be written ("do not recatch locally erased files")<br>not_updated: the file was not re-downloaded because it was up-to-date (no data transfered again)<br><br>return value: none</td><td background="img/fade.gif"><tt>void mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* hostname, const char* filename, const char* localfile, int is_new, int is_modified, int not_updated);</tt></td></tr>
+
+<tr><td background="img/fade.gif"><i>linkdetected</i></td><td background="img/fade.gif">Called when a link has been detected<br>return value: 1 if the link can be analyzed, 0 if the link must not even be considered</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* link);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>linkdetected2</i></td><td background="img/fade.gif">Called when a link has been detected<br>return value: 1 if the link can be analyzed, 0 if the link must not even be considered</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* link, const const char* tag_start);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>xfrstatus</i></td><td background="img/fade.gif">Called when a file has been processed (downloaded, updated, or error)<br>return value: must return 1</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, lien_back* back);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>savename</i></td><td background="img/fade.gif">Called when a local filename has to be processed. The <tt>adr_complete</tt> and <tt>fil_complete</tt> are the address and URI of the file being saved ; the <tt>referer_adr</tt> and <tt>referer_fil</tt> are the address and URI of the referer link. The <tt>save</tt> string contains the local filename being used. You may modifiy the <tt>save</tt> string to fit your needs, up to 1024 bytes (note: filename collisions, if any, will be handled by the engine by renaming the file into file-2.ext, file-3.ext ..).<br>return value: must return 1</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>sendhead</i></td><td background="img/fade.gif">Called when HTTP headers are to be sent to the remote server. The <tt>buff</tt> buffer contains text headers, <tt>adr</tt> and <tt>fil</tt> the URL, and <tt>referer_adr</tt> and <tt>referer_fil</tt> the referer URL. The <tt>outgoing</tt> structure contains all information related to the current slot.<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>receivehead</i></td><td background="img/fade.gif">Called when HTTP headers are recevived from the remote server. The <tt>buff</tt> buffer contains text headers, <tt>adr</tt> and <tt>fil</tt> the URL, and <tt>referer_adr</tt> and <tt>referer_fil</tt> the referer URL. The <tt>incoming</tt> structure contains all information related to the current slot.<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming);</tt></td></tr>
+
+<tr><td background="img/fade.gif"><i>detect</i></td><td background="img/fade.gif">Called when an unknown document is to be parsed. The <tt>str</tt> structure contains all information related to the document.<br>return value: 1 if the type is known and can be parsed, 0 if the document type is unknown</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, htsmoduleStruct* str);</tt></td></tr>
+<tr><td background="img/fade.gif"><i>parse</i></td><td background="img/fade.gif">The <tt>str</tt> structure contains all information related to the document.<br>return value: 1 if the document was successfully parsed, 0 if an error occured</td><td background="img/fade.gif"><tt>int mycallback(t_hts_callbackarg *carg, httrackp* opt, htsmoduleStruct* str);</tt></td></tr>
+
</table>
<br><br> -Below additional function names that can be defined inside the module (DLL/.so):<br>
- -<table width="100%" ID="Table1">
-<tr><td><b>"<i>module function name</i>"</b></td><td><b>function description</b></td></tr>
+Note: the optional libhttrack-plugin module (libhttrack-plugin.dll or libhttrack-plugin.so), if found in the library environment, is loaded automatically, and its <tt>hts_plug()</tt> function being called.<br />
-<tr><td background="img/fade.gif"><i>int <b>function-name</b>_init(char *args);</i></td><td background="img/fade.gif">Called when a function named <b>function-name</b> is extracted from the current module (same as wrapper_init). The optional <tt>args</tt> provides additional commandline parameters. Returns 1 upon success, 0 if the function should not be extracted.</td></tr>
-<tr><td background="img/fade.gif"><i>int wrapper_init(char *fname, char *args);</i></td><td background="img/fade.gif">Called when a function named <tt>fname</tt> is extracted from the current module. The optional <tt>args</tt> provides additional commandline parameters. Besides, use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks. Returns 1 upon success, 0 if the function should not be extracted.</td></tr>
-<tr><td background="img/fade.gif"><i>int wrapper_exit(void);</i></td><td background="img/fade.gif">Called when the module is unloaded. The function should return 1 (but the result is ignored).</td></tr>
-</table> - -<br><br> -Below additional function names that can be defined inside the optional libhttrack-plugin module (libhttrack-plugin.dll or libhttrack-plugin.so) searched inside common library path:<br>
- -<table width="100%" ID="Table2">
-<tr><td><b>"<i>module function name</i>"</b></td><td><b>function description</b></td></tr>
+<br />
+An example is generally more efficient than anything else, so let's write our first module, aimed to stupidely print all parsed html files:
+<table width="100%" border="2">
+<tr><td>
+<pre>
+/* system includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
+
+/* local function called as "check_html" callback */
+static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt,
+ /*other parameters are callback-specific*/
+ char* html, int len, const char* url_address, const char* url_file) {
+ void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
+ html, len, url_address, url_file)) {
+ return 0; /* abort */
+ }
+ }
+
+ printf("file %s%s content: %s\n", url_address, url_file, html);
+ return 1; /* success */
+}
+
+/* local function called as "end" callback */
+static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) {
+ void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/
+
+ /* processing */
+ fprintf(stderr, "That's all, folks!\n");
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ /* status is ok on our side, return other callabck's status */
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
+ }
+
+ return 1; /* success */
+}
+
+/*
+module entry point
+the function name and prototype MUST match this prototype
+*/
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ /* optional argument passed in the commandline we won't be using here */
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* plug callback functions */
+ CHAIN_FUNCTION(opt, check_html, process_file, /*optional user-defined arg*/NULL);
+ CHAIN_FUNCTION(opt, end, end_of_mirror, /*optional user-defined arg*/NULL);
+
+ return 1; /* success */
+}
+
+/*
+module exit point
+the function name and prototype MUST match this prototype
+*/
+EXTERNAL_FUNCTION int hts_unplug(httrackp *opt) {
+ fprintf(stder, "Module unplugged");
+
+ return 1; /* success */
+}
+</pre>
+</td></tr></table>
+
+<br />
+Compile this file ; for example:
+<br />
+<tt>
+gcc -O -g3 -shared -o mylibrary.so myexample.c
+</tt>
+<br />
+and plug the module using the commandline ; for example:
+<br />
+<tt>
+httrack --wrapper mylibrary http://www.example.com
+</tt>
+<br />
+or, if some parameters are desired:
+<br />
+<tt>
+httrack --wrapper mylibrary,myparameter-string http://www.example.com
+</tt>
+<br />
+(the "myparameter-string" string will be available in the 'arg' parameter passed to the hts_plug entry point)
+<br />
-<tr><td background="img/fade.gif"><i>void plugin_init(void);</i></td><td background="img/fade.gif">Called if the module (named libhttrack-plugin.(so|dll)) is found in the library path. Use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks.</td></tr>
- -</table> - -<br><br> - <br><br> <!-- ==================== Start epilogue ==================== --> @@ -202,7 +332,7 @@ Below additional function names that can be defined inside the optional libhttra <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/plug_330.html b/html/plug_330.html new file mode 100644 index 0000000..d2b7ffb --- /dev/null +++ b/html/plug_330.html @@ -0,0 +1,215 @@ +<html xmlns="http://www.w3.org/1999/xhtml" lang="en"> + +<head> + <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> + <meta name="description" content="HTTrack is an easy-to-use website mirror utility. It allows you to download a World Wide website from the Internet to a local directory,building recursively all structures, getting html, images, and other files from the server to your computer. Links are rebuiltrelatively so that you can freely browse to the local site (works with any browser). You can mirror several sites together so that you can jump from one toanother. You can, also, update an existing mirror site, or resume an interrupted download. The robot is fully configurable, with an integrated help" /> + <meta name="keywords" content="httrack, HTTRACK, HTTrack, winhttrack, WINHTTRACK, WinHTTrack, offline browser, web mirror utility, aspirateur web, surf offline, web capture, www mirror utility, browse offline, local site builder, website mirroring, aspirateur www, internet grabber, capture de site web, internet tool, hors connexion, unix, dos, windows 95, windows 98, solaris, ibm580, AIX 4.0, HTS, HTGet, web aspirator, web aspirateur, libre, GPL, GNU, free software" /> + <title>HTTrack Website Copier - Offline Browser</title> + + <style type="text/css"> + <!-- + +body { + margin: 0; padding: 0; margin-bottom: 15px; margin-top: 8px; + background: #77b; +} +body, td { + font: 14px "Trebuchet MS", Verdana, Arial, Helvetica, sans-serif; + } + +#subTitle { + background: #000; color: #fff; padding: 4px; font-weight: bold; + } + +#siteNavigation a, #siteNavigation .current { + font-weight: bold; color: #448; + } +#siteNavigation a:link { text-decoration: none; } +#siteNavigation a:visited { text-decoration: none; } + +#siteNavigation .current { background-color: #ccd; } + +#siteNavigation a:hover { text-decoration: none; background-color: #fff; color: #000; } +#siteNavigation a:active { text-decoration: none; background-color: #ccc; } + + +a:link { text-decoration: underline; color: #00f; } +a:visited { text-decoration: underline; color: #000; } +a:hover { text-decoration: underline; color: #c00; } +a:active { text-decoration: underline; } + +#pageContent { + clear: both; + border-bottom: 6px solid #000; + padding: 10px; padding-top: 20px; + line-height: 1.65em; + background-image: url(images/bg_rings.gif); + background-repeat: no-repeat; + background-position: top right; + } + +#pageContent, #siteNavigation { + background-color: #ccd; + } + + +.imgLeft { float: left; margin-right: 10px; margin-bottom: 10px; } +.imgRight { float: right; margin-left: 10px; margin-bottom: 10px; } + +hr { height: 1px; color: #000; background-color: #000; margin-bottom: 15px; } + +h1 { margin: 0; font-weight: bold; font-size: 2em; } +h2 { margin: 0; font-weight: bold; font-size: 1.6em; } +h3 { margin: 0; font-weight: bold; font-size: 1.3em; } +h4 { margin: 0; font-weight: bold; font-size: 1.18em; } + +.blak { background-color: #000; } +.hide { display: none; } +.tableWidth { min-width: 400px; } + +.tblRegular { border-collapse: collapse; } +.tblRegular td { padding: 6px; background-image: url(fade.gif); border: 2px solid #99c; } +.tblHeaderColor, .tblHeaderColor td { background: #99c; } +.tblNoBorder td { border: 0; } + + +// --> +</style> + +</head> + +<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth"> + <tr> + <td><img src="images/header_title_4.gif" width="400" height="34" alt="HTTrack Website Copier" title="" border="0" id="title" /></td> + </tr> +</table> +<table width="76%" border="0" align="center" cellspacing="0" cellpadding="3" class="tableWidth"> + <tr> + <td id="subTitle">Open Source offline browser</td> + </tr> +</table> +<table width="76%" border="0" align="center" cellspacing="0" cellpadding="0" class="tableWidth"> +<tr class="blak"> +<td> + <table width="100%" border="0" align="center" cellspacing="1" cellpadding="0"> + <tr> + <td colspan="6"> + <table width="100%" border="0" align="center" cellspacing="0" cellpadding="10"> + <tr> + <td id="pageContent"> +<!-- ==================== End prologue ==================== --> + +<h2 align="center"><em>HTTrack Programming page - plugging functions<br > +releases 3.30 to 3.40 (not beyond) +</em></h2> + +<br> + +You can write external functions to be plugged in the httrack library very easily.
+We'll see there some examples.
+ +<br><br> +
+The <tt>httrack</tt> commandline tool allows (since the 3.30 release) to plug external functions to various callbacks defined in httrack.<br>
+See also: the <tt>httrack-library.h</tt> prototype file, and the <tt>callbacks-example.c</tt> given in the httrack archive.<br>
+
+<br>
+Example:
+<tt>
+httrack --wrapper check-html=callback:process_file ..
+</tt>
+<br>
+With the callback.so (or callback.dll) module defined as below:
+
+<pre>
+int process_file(char* html, int len, char* url_adresse, char* url_fichier) {
+ printf("now parsing %s%s..\n", url_adresse, url_fichier);
+ strcpy(currentURLBeingParsed, url_adresse);
+ strcat(currentURLBeingParsed, url_fichier);
+ return 1; /* success */
+}
+</pre>
+
+Below the list of callbacks, and associated external wrappers:<br>
+
+<table width="100%">
+<tr><td><b>"<i>callback name</i>"</b></td><td><b>callback description</b></td><td><b>callback function signature</b></td></tr>
+
+<tr><td background="img/fade.gif">"<i>init</i>"</td><td background="img/fade.gif"><font color="red">Note: deprecated, should not be used anymore (unsafe callback) - see "start" callback or wrapper_init() module function below this table.</font>Called during initialization ; use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks.<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(void);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>free</i>"</td><td background="img/fade.gif"><font color="red">Note: deprecated, should not be used anymore (unsafe callback) - see "end" callback or wrapper_exit() module function below this table.</font><br />Called during un-initialization<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(void);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>start</i>"</td><td background="img/fade.gif">Called when the mirror starts. The <tt>opt</tt> structure passed lists all options defined for this mirror. You may modify the <tt>opt</tt> structure to fit your needs. Besides, use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks.<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)</td><td background="img/fade.gif"><tt>int (* myfunction)(httrackp* opt);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>end</i>"</td><td background="img/fade.gif">Called when the mirror ends<br>return value: 1 upon success, 0 upon error (the mirror will then be considered aborted)</td><td background="img/fade.gif"><tt>int (* myfunction)(void);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>change-options</i>"</td><td background="img/fade.gif">Called when options are to be changed. The <tt>opt</tt> structure passed lists all options, updated to take account of recent changes<br>return value: 1 upon success, 0 upon error (the mirror will then be aborted)</td><td background="img/fade.gif"><tt>int (* myfunction)(httrackp* opt);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>check-html</i>"</td><td background="img/fade.gif">Called when a document (which may not be an html document) is to be parsed. The <tt>html</tt> address points to the document data, of lenth <tt>len</tt>. The <tt>url_adresse</tt> and <tt>url_fichier</tt> are the address and URI of the file being processed<br>return value: 1 if the parsing can be processed, 0 if the file must be skipped without being parsed</td><td background="img/fade.gif"><tt>int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>preprocess-html</i>"</td><td background="img/fade.gif">Called when a document (which is an html document) is to be parsed (original, not yet modified document). The <tt>html</tt> address points to the document data address (char**), and the <tt>length</tt> address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The <tt>url_adresse</tt> and <tt>url_fichier</tt> are the address and URI of the file being processed<br>return value: 1 if the new pointers can be applied (default value)</td><td background="img/fade.gif"><tt>int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>postprocess-html</i>"</td><td background="img/fade.gif">Called when a document (which is an html document) is parsed and transformed (links rewritten). The <tt>html</tt> address points to the document data address (char**), and the <tt>length</tt> address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The <tt>url_adresse</tt> and <tt>url_fichier</tt> are the address and URI of the file being processed<br>return value: 1 if the new pointers can be applied (default value)</td><td background="img/fade.gif"><tt>int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>query</i>"</td><td background="img/fade.gif">Called when the wizard needs to ask a question. The <tt>question</tt> string contains the question for the (human) user<br>return value: the string answer ("" for default reply)</td><td background="img/fade.gif"><tt>char* (* myfunction)(char* question);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>query2</i>"</td><td background="img/fade.gif">Called when the wizard needs to ask a question</td><td background="img/fade.gif"><tt>char* (* myfunction)(char* question);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>query3</i>"</td><td background="img/fade.gif">Called when the wizard needs to ask a question</td><td background="img/fade.gif"><tt>char* (* myfunction)(char* question);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>loop</i>"</td><td background="img/fade.gif">Called periodically (informational, to display statistics)<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>check-link</i>"</td><td background="img/fade.gif">Called when a link has to be tested. The <tt>adr</tt> and <tt>fil</tt> are the address and URI of the link being tested. The passed <tt>status</tt> value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine<br>return value: same meaning as the passed <tt>status</tt> value ; you may generally return -1 to let the engine take the decision by itself</td><td background="img/fade.gif"><tt>int (* myfunction)(char* adr,char* fil,int status);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>check-mime</i>"</td><td background="img/fade.gif">Called when a link download has begun, and needs to be tested against its MIME type. The <tt>adr</tt> and <tt>fil</tt> are the address and URI of the link being tested, and the <tt>mime</tt> string contains the link type being processed. The passed <tt>status</tt> value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine<br>return value: same meaning as the passed <tt>status</tt> value ; you may generally return -1 to let the engine take the decision by itself</td><td background="img/fade.gif"><tt>int (* myfunction)(char* adr,char* fil,char* mime,int status);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>pause</i>"</td><td background="img/fade.gif">Called when the engine must pause. When the <tt>lockfile</tt> passed is deleted, the function can return<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(char* lockfile);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>save-file</i>"</td><td background="img/fade.gif">Called when a file is to be saved on disk<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(char* file);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>save-file2</i>"</td><td background="img/fade.gif">Called when a file is to be saved or checked on disk<br>The hostname, filename and local filename are given. Two additional flags tells if the file is new (is_new) and is the file is to be modified (is_modified).<br>(!is_new && !is_modified): the file is up-to-date, and will not be modified<br>(is_new && is_modified): a new file will be written (or an updated file is being written)<br>(!is_new && is_modified): a file is being updated (append)<br>(is_new && !is_modified): an empty file will be written ("do not recatch locally erased files")<br>return value: none</td><td background="img/fade.gif"><tt>void (* myfunction)(char* hostname,char* filename,char* localfile,int is_new,int is_modified);</tt></td></tr>
+
+typedef void (* t_hts_htmlcheck_filesave2)(); +
+
+<tr><td background="img/fade.gif">"<i>link-detected</i>"</td><td background="img/fade.gif">Called when a link has been detected<br>return value: 1 if the link can be analyzed, 0 if the link must not even be considered</td><td background="img/fade.gif"><tt>int (* myfunction)(char* link);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>transfer-status</i>"</td><td background="img/fade.gif">Called when a file has been processed (downloaded, updated, or error)<br>return value: must return 1</td><td background="img/fade.gif"><tt>int (* myfunction)(lien_back* back);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>save-name</i>"</td><td background="img/fade.gif">Called when a local filename has to be processed. The <tt>adr_complete</tt> and <tt>fil_complete</tt> are the address and URI of the file being saved ; the <tt>referer_adr</tt> and <tt>referer_fil</tt> are the address and URI of the referer link. The <tt>save</tt> string contains the local filename being used. You may modifiy the <tt>save</tt> string to fit your needs, up to 1024 bytes (note: filename collisions, if any, will be handled by the engine by renaming the file into file-2.ext, file-3.ext ..).<br>return value: must return 1</td><td background="img/fade.gif"><tt>int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>send-header</i>"</td><td background="img/fade.gif">Called when HTTP headers are to be sent to the remote server. The <tt>buff</tt> buffer contains text headers, <tt>adr</tt> and <tt>fil</tt> the URL, and <tt>referer_adr</tt> and <tt>referer_fil</tt> the referer URL. The <tt>outgoing</tt> structure contains all information related to the current slot.<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);</tt></td></tr>
+<tr><td background="img/fade.gif">"<i>receive-header</i>"</td><td background="img/fade.gif">Called when HTTP headers are recevived from the remote server. The <tt>buff</tt> buffer contains text headers, <tt>adr</tt> and <tt>fil</tt> the URL, and <tt>referer_adr</tt> and <tt>referer_fil</tt> the referer URL. The <tt>incoming</tt> structure contains all information related to the current slot.<br>return value: 1 if the mirror can continue, 0 if the mirror must be aborted</td><td background="img/fade.gif"><tt>int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);</tt></td></tr>
+
+</table>
+ +<br><br> +Below additional function names that can be defined inside the module (DLL/.so):<br>
+ +<table width="100%" ID="Table1">
+<tr><td><b>"<i>module function name</i>"</b></td><td><b>function description</b></td></tr>
+
+<tr><td background="img/fade.gif"><i>int <b>function-name</b>_init(char *args);</i></td><td background="img/fade.gif">Called when a function named <b>function-name</b> is extracted from the current module (same as wrapper_init). The optional <tt>args</tt> provides additional commandline parameters. Returns 1 upon success, 0 if the function should not be extracted.</td></tr>
+<tr><td background="img/fade.gif"><i>int wrapper_init(char *fname, char *args);</i></td><td background="img/fade.gif">Called when a function named <tt>fname</tt> is extracted from the current module. The optional <tt>args</tt> provides additional commandline parameters. Besides, use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks. Returns 1 upon success, 0 if the function should not be extracted.</td></tr>
+<tr><td background="img/fade.gif"><i>int wrapper_exit(void);</i></td><td background="img/fade.gif">Called when the module is unloaded. The function should return 1 (but the result is ignored).</td></tr>
+ +</table> + +<br><br> +Below additional function names that can be defined inside the optional libhttrack-plugin module (libhttrack-plugin.dll or libhttrack-plugin.so) searched inside common library path:<br>
+ +<table width="100%" ID="Table2">
+<tr><td><b>"<i>module function name</i>"</b></td><td><b>function description</b></td></tr>
+
+<tr><td background="img/fade.gif"><i>void plugin_init(void);</i></td><td background="img/fade.gif">Called if the module (named libhttrack-plugin.(so|dll)) is found in the library path. Use of htswrap_add (see <tt>httrack-library.h</tt>) is permitted inside this function to setup other callbacks.</td></tr>
+ +</table> + +<br><br> +
+ +<br><br> + +<!-- ==================== Start epilogue ==================== --> + </td> + </tr> + </table> + </td> + </tr> + </table> +</td> +</tr> +</table> + +<table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> + <tr> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + </tr> +</table> + +</body> + +</html> + + diff --git a/html/scripting.html b/html/scripting.html index 2752a0d..02abb93 100755 --- a/html/scripting.html +++ b/html/scripting.html @@ -250,7 +250,7 @@ Script example: <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/server/about.html b/html/server/about.html index 087f68e..82af482 100755 --- a/html/server/about.html +++ b/html/server/about.html @@ -162,7 +162,7 @@ ${LANG_K3} : ${HTTRACK_WEB} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/addurl.html b/html/server/addurl.html index ee1f5a7..21a0953 100755 --- a/html/server/addurl.html +++ b/html/server/addurl.html @@ -218,7 +218,7 @@ ${do:end-if} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/error.html b/html/server/error.html index d8ea4d2..c600805 100755 --- a/html/server/error.html +++ b/html/server/error.html @@ -139,7 +139,7 @@ ${error} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/file.html b/html/server/file.html index 2dd3df6..4108afd 100755 --- a/html/server/file.html +++ b/html/server/file.html @@ -167,7 +167,7 @@ ${do:loadhash} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/finished.html b/html/server/finished.html index 5777a27..27130de 100755 --- a/html/server/finished.html +++ b/html/server/finished.html @@ -213,7 +213,7 @@ ${path}/${projname} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/help.html b/html/server/help.html index ef7f830..19decdb 100755 --- a/html/server/help.html +++ b/html/server/help.html @@ -174,7 +174,7 @@ ${do:end-if} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/index.html b/html/server/index.html index 6d48219..6998976 100755 --- a/html/server/index.html +++ b/html/server/index.html @@ -200,7 +200,7 @@ ${LANG_THANKYOU}! <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option1.html b/html/server/option1.html index 7e75e9e..9ba1d5b 100755 --- a/html/server/option1.html +++ b/html/server/option1.html @@ -229,7 +229,7 @@ ${do:end-if} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option10.html b/html/server/option10.html index 0ebe51f..99d0a4d 100755 --- a/html/server/option10.html +++ b/html/server/option10.html @@ -221,7 +221,7 @@ ${LANG_IOPT10}: <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option11.html b/html/server/option11.html index 7aea791..4b688e8 100755 --- a/html/server/option11.html +++ b/html/server/option11.html @@ -321,7 +321,7 @@ ${LANG_W3} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option2.html b/html/server/option2.html index fd3ab8c..6df78f3 100755 --- a/html/server/option2.html +++ b/html/server/option2.html @@ -247,7 +247,7 @@ ${listid:build:LISTDEF_3} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option2b.html b/html/server/option2b.html index d227c30..ef6d0b2 100755 --- a/html/server/option2b.html +++ b/html/server/option2b.html @@ -211,7 +211,7 @@ ${do:output-mode:} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option3.html b/html/server/option3.html index 743dc81..9c6149e 100755 --- a/html/server/option3.html +++ b/html/server/option3.html @@ -262,7 +262,7 @@ ${listid:travel3:LISTDEF_11} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option4.html b/html/server/option4.html index d686ec1..c2176c3 100755 --- a/html/server/option4.html +++ b/html/server/option4.html @@ -255,7 +255,7 @@ ${LANG_I46} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option5.html b/html/server/option5.html index 64ce3c5..0da4e32 100755 --- a/html/server/option5.html +++ b/html/server/option5.html @@ -291,7 +291,7 @@ ${LANG_I64b} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option6.html b/html/server/option6.html index 4e4a0ac..c0a18c5 100755 --- a/html/server/option6.html +++ b/html/server/option6.html @@ -227,7 +227,7 @@ ${LANG_I43b} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option7.html b/html/server/option7.html index d689de2..676c11b 100755 --- a/html/server/option7.html +++ b/html/server/option7.html @@ -219,7 +219,7 @@ ${LANG_B13} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option8.html b/html/server/option8.html index 397e33c..5f32e0e 100755 --- a/html/server/option8.html +++ b/html/server/option8.html @@ -256,7 +256,7 @@ ${listid:robots:LISTDEF_8} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/option9.html b/html/server/option9.html index b77cccb..2185f9f 100755 --- a/html/server/option9.html +++ b/html/server/option9.html @@ -237,7 +237,7 @@ ${listid:logtype:LISTDEF_9} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/refresh.html b/html/server/refresh.html index 35d3ecf..ca840a6 100755 --- a/html/server/refresh.html +++ b/html/server/refresh.html @@ -271,7 +271,7 @@ ${LANG_H20} ${info.currentjob} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/step2.html b/html/server/step2.html index 246a264..e2d74db 100755 --- a/html/server/step2.html +++ b/html/server/step2.html @@ -343,7 +343,7 @@ ${do:end-if:} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/step3.html b/html/server/step3.html index 54cc9da..a1b035f 100755 --- a/html/server/step3.html +++ b/html/server/step3.html @@ -276,7 +276,7 @@ ${do:output-mode:} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/server/step4.html b/html/server/step4.html index a65cf22..452edac 100755 --- a/html/server/step4.html +++ b/html/server/step4.html @@ -378,7 +378,7 @@ ${do:output-mode:} <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> + <td id="footer"><small><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></small></td> </tr> </table> diff --git a/html/shelldoc.html b/html/shelldoc.html index 05e0c80..737794b 100644 --- a/html/shelldoc.html +++ b/html/shelldoc.html @@ -141,7 +141,7 @@ You may encounter minor differences (in the display, or in various options) betw <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step.html b/html/step.html index fff295f..26193cd 100644 --- a/html/step.html +++ b/html/step.html @@ -128,7 +128,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step1.html b/html/step1.html index 645bf32..e61a02a 100644 --- a/html/step1.html +++ b/html/step1.html @@ -143,7 +143,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step2.html b/html/step2.html index c861e03..44cea6c 100644 --- a/html/step2.html +++ b/html/step2.html @@ -157,7 +157,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step3.html b/html/step3.html index 00b6b66..add093b 100644 --- a/html/step3.html +++ b/html/step3.html @@ -151,7 +151,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0">
<tr>
- <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
+ <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td>
</tr>
</table>
diff --git a/html/step4.html b/html/step4.html index 0de1c54..ab05d2f 100644 --- a/html/step4.html +++ b/html/step4.html @@ -128,7 +128,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step5.html b/html/step5.html index cae81bd..eee809a 100644 --- a/html/step5.html +++ b/html/step5.html @@ -127,7 +127,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9.html b/html/step9.html index 0400822..af565cd 100644 --- a/html/step9.html +++ b/html/step9.html @@ -144,7 +144,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt1.html b/html/step9_opt1.html index cf79c85..e263076 100644 --- a/html/step9_opt1.html +++ b/html/step9_opt1.html @@ -145,7 +145,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt10.html b/html/step9_opt10.html index 658b82f..c1d2b1f 100644 --- a/html/step9_opt10.html +++ b/html/step9_opt10.html @@ -150,7 +150,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt11.html b/html/step9_opt11.html index e4c0395..12caeee 100644 --- a/html/step9_opt11.html +++ b/html/step9_opt11.html @@ -182,7 +182,7 @@ In this case, HTTrack won't check the type, because it has learned that "foo" is <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt2.html b/html/step9_opt2.html index 3fd774d..3b7b8ff 100644 --- a/html/step9_opt2.html +++ b/html/step9_opt2.html @@ -181,7 +181,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt3.html b/html/step9_opt3.html index 98511f2..df96322 100644 --- a/html/step9_opt3.html +++ b/html/step9_opt3.html @@ -145,7 +145,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt4.html b/html/step9_opt4.html index 3a3a9d5..54b4713 100644 --- a/html/step9_opt4.html +++ b/html/step9_opt4.html @@ -112,11 +112,11 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <br><br> <!-- --> <li>Exclude link(s)</li> - <br><small>This button lets you add a filter to authorize either a directory, a domain, a certain file type... + <br><small>This button lets you add a filter to exclude either a directory, a domain, a certain file type... <br>See <a href="#add">below</a> to find out how to add a filter rule... </small><br><br> <!-- --> - <li>Exclude link(s)</li> + <li>Include link(s)</li> <br><small>This button lets you add a filter to authorize either a directory, a domain, a certain file type... <br>See <a href="#add">below</a> to find out how to add a filter rule... </small><br><br> @@ -176,7 +176,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt5.html b/html/step9_opt5.html index c13666c..c9fbf60 100644 --- a/html/step9_opt5.html +++ b/html/step9_opt5.html @@ -165,7 +165,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt6.html b/html/step9_opt6.html index 110b27f..41f4d5e 100644 --- a/html/step9_opt6.html +++ b/html/step9_opt6.html @@ -162,7 +162,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt7.html b/html/step9_opt7.html index 876fb3e..3be0a9e 100644 --- a/html/step9_opt7.html +++ b/html/step9_opt7.html @@ -151,7 +151,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt8.html b/html/step9_opt8.html index 97d424f..2f74b00 100644 --- a/html/step9_opt8.html +++ b/html/step9_opt8.html @@ -141,7 +141,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/html/step9_opt9.html b/html/step9_opt9.html index b706121..0568554 100644 --- a/html/step9_opt9.html +++ b/html/step9_opt9.html @@ -156,7 +156,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } <table width="76%" border="0" align="center" valign="bottom" cellspacing="0" cellpadding="0"> <tr> - <td id="footer"><small>© 2003 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> + <td id="footer"><small>© 2007 Xavier Roche & other contributors - Web Design: Leto Kauler.</small></td> </tr> </table> diff --git a/lang/Makefile.am b/lang/Makefile.am index 8748e61..89582ab 100644 --- a/lang/Makefile.am +++ b/lang/Makefile.am @@ -5,3 +5,5 @@ langrootdir = $(datadir)/httrack langroot_DATA = ../lang.def ../lang.indexes EXTRA_DIST = $(lang_DATA) $(langroot_DATA) + +#dist-hook: diff --git a/lang/Makefile.in b/lang/Makefile.in index 45b6c18..241e842 100644 --- a/lang/Makefile.in +++ b/lang/Makefile.in @@ -335,6 +335,8 @@ uninstall-am: uninstall-info-am uninstall-langDATA \ ps ps-am uninstall uninstall-am uninstall-info-am \ uninstall-langDATA uninstall-langrootDATA + +#dist-hook: # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/libtest/Makefile.am b/libtest/Makefile.am index 878c05b..eb500c0 100644 --- a/libtest/Makefile.am +++ b/libtest/Makefile.am @@ -20,22 +20,31 @@ AM_LDFLAGS = -L../src libbaselinks_la_SOURCES = callbacks-example-baselinks.c libbaselinks_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libbaselinks_la_LDFLAGS = -version-info 1:0:0 +libchangecontent_la_SOURCES = callbacks-example-changecontent.c +libchangecontent_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +libchangecontent_la_LDFLAGS = -version-info 1:0:0 libcontentfilter_la_SOURCES = callbacks-example-contentfilter.c libcontentfilter_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libcontentfilter_la_LDFLAGS = -version-info 1:0:0 libdisplayheader_la_SOURCES = callbacks-example-displayheader.c libdisplayheader_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libdisplayheader_la_LDFLAGS = -version-info 1:0:0 -libfilename_la_SOURCES = callbacks-example-filename.c -libfilename_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack -libfilename_la_LDFLAGS = -version-info 1:0:0 libfilename2_la_SOURCES = callbacks-example-filename2.c libfilename2_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libfilename2_la_LDFLAGS = -version-info 1:0:0 +libfilename_la_SOURCES = callbacks-example-filename.c +libfilename_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +libfilename_la_LDFLAGS = -version-info 1:0:0 libfilenameiisbug_la_SOURCES = callbacks-example-filenameiisbug.c libfilenameiisbug_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libfilenameiisbug_la_LDFLAGS = -version-info 1:0:0 liblistlinks_la_SOURCES = callbacks-example-listlinks.c liblistlinks_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack liblistlinks_la_LDFLAGS = -version-info 1:0:0 -pkglib_LTLIBRARIES = libbaselinks.la libcontentfilter.la libdisplayheader.la libfilename.la libfilename2.la libfilenameiisbug.la liblistlinks.la +liblog_la_SOURCES = callbacks-example-log.c +liblog_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +liblog_la_LDFLAGS = -version-info 1:0:0 +libsimple_la_SOURCES = callbacks-example-simple.c +libsimple_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +libsimple_la_LDFLAGS = -version-info 1:0:0 +pkglib_LTLIBRARIES = libbaselinks.la libchangecontent.la libcontentfilter.la libdisplayheader.la libfilename2.la libfilename.la libfilenameiisbug.la liblistlinks.la liblog.la libsimple.la diff --git a/libtest/Makefile.in b/libtest/Makefile.in index 5dabcbe..0c6d904 100644 --- a/libtest/Makefile.in +++ b/libtest/Makefile.in @@ -163,25 +163,34 @@ AM_LDFLAGS = -L../src libbaselinks_la_SOURCES = callbacks-example-baselinks.c libbaselinks_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libbaselinks_la_LDFLAGS = -version-info 1:0:0 +libchangecontent_la_SOURCES = callbacks-example-changecontent.c +libchangecontent_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +libchangecontent_la_LDFLAGS = -version-info 1:0:0 libcontentfilter_la_SOURCES = callbacks-example-contentfilter.c libcontentfilter_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libcontentfilter_la_LDFLAGS = -version-info 1:0:0 libdisplayheader_la_SOURCES = callbacks-example-displayheader.c libdisplayheader_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libdisplayheader_la_LDFLAGS = -version-info 1:0:0 -libfilename_la_SOURCES = callbacks-example-filename.c -libfilename_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack -libfilename_la_LDFLAGS = -version-info 1:0:0 libfilename2_la_SOURCES = callbacks-example-filename2.c libfilename2_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libfilename2_la_LDFLAGS = -version-info 1:0:0 +libfilename_la_SOURCES = callbacks-example-filename.c +libfilename_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +libfilename_la_LDFLAGS = -version-info 1:0:0 libfilenameiisbug_la_SOURCES = callbacks-example-filenameiisbug.c libfilenameiisbug_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack libfilenameiisbug_la_LDFLAGS = -version-info 1:0:0 liblistlinks_la_SOURCES = callbacks-example-listlinks.c liblistlinks_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack liblistlinks_la_LDFLAGS = -version-info 1:0:0 -pkglib_LTLIBRARIES = libbaselinks.la libcontentfilter.la libdisplayheader.la libfilename.la libfilename2.la libfilenameiisbug.la liblistlinks.la +liblog_la_SOURCES = callbacks-example-log.c +liblog_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +liblog_la_LDFLAGS = -version-info 1:0:0 +libsimple_la_SOURCES = callbacks-example-simple.c +libsimple_la_LIBADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack +libsimple_la_LDFLAGS = -version-info 1:0:0 +pkglib_LTLIBRARIES = libbaselinks.la libchangecontent.la libcontentfilter.la libdisplayheader.la libfilename2.la libfilename.la libfilenameiisbug.la liblistlinks.la liblog.la libsimple.la subdir = libtest ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs @@ -192,6 +201,9 @@ LTLIBRARIES = $(pkglib_LTLIBRARIES) libbaselinks_la_DEPENDENCIES = am_libbaselinks_la_OBJECTS = callbacks-example-baselinks.lo libbaselinks_la_OBJECTS = $(am_libbaselinks_la_OBJECTS) +libchangecontent_la_DEPENDENCIES = +am_libchangecontent_la_OBJECTS = callbacks-example-changecontent.lo +libchangecontent_la_OBJECTS = $(am_libchangecontent_la_OBJECTS) libcontentfilter_la_DEPENDENCIES = am_libcontentfilter_la_OBJECTS = callbacks-example-contentfilter.lo libcontentfilter_la_OBJECTS = $(am_libcontentfilter_la_OBJECTS) @@ -210,17 +222,26 @@ libfilenameiisbug_la_OBJECTS = $(am_libfilenameiisbug_la_OBJECTS) liblistlinks_la_DEPENDENCIES = am_liblistlinks_la_OBJECTS = callbacks-example-listlinks.lo liblistlinks_la_OBJECTS = $(am_liblistlinks_la_OBJECTS) +liblog_la_DEPENDENCIES = +am_liblog_la_OBJECTS = callbacks-example-log.lo +liblog_la_OBJECTS = $(am_liblog_la_OBJECTS) +libsimple_la_DEPENDENCIES = +am_libsimple_la_OBJECTS = callbacks-example-simple.lo +libsimple_la_OBJECTS = $(am_libsimple_la_OBJECTS) DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/callbacks-example-baselinks.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-changecontent.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-contentfilter.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-displayheader.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-filename.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-filename2.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-filenameiisbug.Plo \ -@AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-listlinks.Plo +@AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-listlinks.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-log.Plo \ +@AMDEP_TRUE@ ./$(DEPDIR)/callbacks-example-simple.Plo COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ @@ -228,14 +249,15 @@ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ CCLD = $(CC) LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ -DIST_SOURCES = $(libbaselinks_la_SOURCES) $(libcontentfilter_la_SOURCES) \ - $(libdisplayheader_la_SOURCES) $(libfilename_la_SOURCES) \ - $(libfilename2_la_SOURCES) $(libfilenameiisbug_la_SOURCES) \ - $(liblistlinks_la_SOURCES) +DIST_SOURCES = $(libbaselinks_la_SOURCES) $(libchangecontent_la_SOURCES) \ + $(libcontentfilter_la_SOURCES) $(libdisplayheader_la_SOURCES) \ + $(libfilename_la_SOURCES) $(libfilename2_la_SOURCES) \ + $(libfilenameiisbug_la_SOURCES) $(liblistlinks_la_SOURCES) \ + $(liblog_la_SOURCES) $(libsimple_la_SOURCES) DATA = $(exemples_DATA) DIST_COMMON = $(srcdir)/Makefile.in Makefile.am -SOURCES = $(libbaselinks_la_SOURCES) $(libcontentfilter_la_SOURCES) $(libdisplayheader_la_SOURCES) $(libfilename_la_SOURCES) $(libfilename2_la_SOURCES) $(libfilenameiisbug_la_SOURCES) $(liblistlinks_la_SOURCES) +SOURCES = $(libbaselinks_la_SOURCES) $(libchangecontent_la_SOURCES) $(libcontentfilter_la_SOURCES) $(libdisplayheader_la_SOURCES) $(libfilename_la_SOURCES) $(libfilename2_la_SOURCES) $(libfilenameiisbug_la_SOURCES) $(liblistlinks_la_SOURCES) $(liblog_la_SOURCES) $(libsimple_la_SOURCES) all: all-am @@ -276,6 +298,8 @@ clean-pkglibLTLIBRARIES: done libbaselinks.la: $(libbaselinks_la_OBJECTS) $(libbaselinks_la_DEPENDENCIES) $(LINK) -rpath $(pkglibdir) $(libbaselinks_la_LDFLAGS) $(libbaselinks_la_OBJECTS) $(libbaselinks_la_LIBADD) $(LIBS) +libchangecontent.la: $(libchangecontent_la_OBJECTS) $(libchangecontent_la_DEPENDENCIES) + $(LINK) -rpath $(pkglibdir) $(libchangecontent_la_LDFLAGS) $(libchangecontent_la_OBJECTS) $(libchangecontent_la_LIBADD) $(LIBS) libcontentfilter.la: $(libcontentfilter_la_OBJECTS) $(libcontentfilter_la_DEPENDENCIES) $(LINK) -rpath $(pkglibdir) $(libcontentfilter_la_LDFLAGS) $(libcontentfilter_la_OBJECTS) $(libcontentfilter_la_LIBADD) $(LIBS) libdisplayheader.la: $(libdisplayheader_la_OBJECTS) $(libdisplayheader_la_DEPENDENCIES) @@ -288,6 +312,10 @@ libfilenameiisbug.la: $(libfilenameiisbug_la_OBJECTS) $(libfilenameiisbug_la_DEP $(LINK) -rpath $(pkglibdir) $(libfilenameiisbug_la_LDFLAGS) $(libfilenameiisbug_la_OBJECTS) $(libfilenameiisbug_la_LIBADD) $(LIBS) liblistlinks.la: $(liblistlinks_la_OBJECTS) $(liblistlinks_la_DEPENDENCIES) $(LINK) -rpath $(pkglibdir) $(liblistlinks_la_LDFLAGS) $(liblistlinks_la_OBJECTS) $(liblistlinks_la_LIBADD) $(LIBS) +liblog.la: $(liblog_la_OBJECTS) $(liblog_la_DEPENDENCIES) + $(LINK) -rpath $(pkglibdir) $(liblog_la_LDFLAGS) $(liblog_la_OBJECTS) $(liblog_la_LIBADD) $(LIBS) +libsimple.la: $(libsimple_la_OBJECTS) $(libsimple_la_DEPENDENCIES) + $(LINK) -rpath $(pkglibdir) $(libsimple_la_LDFLAGS) $(libsimple_la_OBJECTS) $(libsimple_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) core *.core @@ -296,12 +324,15 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-baselinks.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-changecontent.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-contentfilter.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-displayheader.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-filename.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-filename2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-filenameiisbug.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-listlinks.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-log.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callbacks-example-simple.Plo@am__quote@ .c.o: @am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" \ diff --git a/libtest/callbacks-example-baselinks.c b/libtest/callbacks-example-baselinks.c index 9f6eb11..63b5175 100755 --- a/libtest/callbacks-example-baselinks.c +++ b/libtest/callbacks-example-baselinks.c @@ -3,89 +3,116 @@ Can be useful to make copies of site's archives using site's URL base href as root reference
.c file
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example.c
- - use the --wrapper option in httrack:
- httrack --wrapper check-html=callback:process_file
- --wrapper link-detected=callback:check_detectedlink
+ httrack --wrapper mycallback ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+/* Standard httrack module includes */
#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
-
-/* Function definitions */
-EXTERNAL_FUNCTION int process_file(char* html, int len, char* url_adresse, char* url_fichier);
-EXTERNAL_FUNCTION int check_detectedlink(char* link);
-EXTERNAL_FUNCTION int check_detectedlink_init(char* initString);
-EXTERNAL_FUNCTION int check_detectedlink_exit(void);
+/* Local function definitions */
+static int process_file(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file);
+static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link);
+static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt);
-static char base[HTS_URLMAXSIZE + 2] = "";
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
-/*
-"check-html" callback
-typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+/*
+module entry point
*/
-EXTERNAL_FUNCTION int process_file(char* html, int len, char* url_adresse, char* url_fichier) {
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* Check args */
+ fprintf(stderr, "Plugged..\n");
+ if (arg == NULL || *arg == '\0' || strlen(arg) >= HTS_URLMAXSIZE / 2) {
+ fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
+ fprintf(stderr, "usage: httrack --wrapper modulename,base\n");
+ fprintf(stderr, "example: httrack --wrapper callback,http://www.example.com/\n");
+ return 0; /* failed */
+ } else {
+ char *callbacks_userdef = strdup(arg); /* userdef */
+
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, check_html, process_file, callbacks_userdef);
+ CHAIN_FUNCTION(opt, linkdetected, check_detectedlink, callbacks_userdef);
+ CHAIN_FUNCTION(opt, end, check_detectedlink_end, callbacks_userdef);
+
+ fprintf(stderr, "Using root '%s'\n", callbacks_userdef);
+ }
+
+ return 1; /* success */
+}
+
+static int process_file(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file) {
+ char* prevBase;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
+ return 0; /* Abort */
+ }
+ }
+
/* Disable base href, if any */
- char* prevBase = strstr(html, "<BASE HREF=\"");
- if (prevBase != NULL) {
+ if ( ( prevBase = strstr(html, "<BASE HREF=\"") ) != NULL) {
prevBase[1] = 'X';
}
+
return 1; /* success */
}
-/*
-"link-detected" callback
-typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
-*/
-EXTERNAL_FUNCTION int check_detectedlink(char* link) {
- if (!base[0]) {
- fprintf(stderr, "** ERROR! check_detectedlink_init() was not called by httrack - you are probably using an old version (<3.31) or called the wrapper with bad arguments\n");
- fprintf(stderr, "** bailing out..\n");
- exit(1);
+static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link) {
+ const char *base = (char*) CALLBACKARG_USERDEF(carg);
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) {
+ return 0; /* Abort */
+ }
}
+
/* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */
if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) {
char temp[HTS_URLMAXSIZE * 2];
- strcpy(temp, base);
- strcat(temp, link);
- strcpy(link, temp);
+ strcpy(temp, base);
+ strcat(temp, link);
+ strcpy(link, temp);
}
+
return 1; /* success */
}
-/* <wrappername>_init() will be called, if exists, upon startup */
-EXTERNAL_FUNCTION int check_detectedlink_init(char* initString) {
- fprintf(stderr, "Plugged..\n");
- if (initString != NULL && *initString != '\0' && strlen(initString) < HTS_URLMAXSIZE / 2) {
- strcpy(base, initString);
- fprintf(stderr, "Using root '%s'\n", base);
- return 1; /* success */
- } else {
- fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
- fprintf(stderr, "usage: httrack --wrapper save-name=callback:mysavename,base\n");
- fprintf(stderr, "example: httrack --wrapper save-name=callback:mysavename,http://www.example.com/\n");
- return 0; /* failed */
- }
-}
+static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt) {
+ char *base = (char*) CALLBACKARG_USERDEF(carg);
-/* <wrappername>_exit() will be called, if exists, upon exit */
-EXTERNAL_FUNCTION int check_detectedlink_exit(void) {
fprintf(stderr, "Unplugged ..\n");
- return 1; /* success (result ignored anyway in xx_exit) */
+ if (base != NULL) {
+ free(base);
+ base = NULL;
+ }
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
+ }
+
+ return 1; /* success */
}
diff --git a/libtest/callbacks-example-changecontent.c b/libtest/callbacks-example-changecontent.c new file mode 100755 index 0000000..7e0e0a9 --- /dev/null +++ b/libtest/callbacks-example-changecontent.c @@ -0,0 +1,65 @@ +/*
+ HTTrack external callbacks example : display all incoming request headers
+ Example of <wrappername>_init and <wrappername>_exit call (httrack >> 3.31)
+ .c file
+
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
+ How to use:
+ httrack --wrapper mycallback ..
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
+
+/* Local function definitions */
+static int postprocess(t_hts_callbackarg *carg, httrackp *opt,
+ char** html, int* len,
+ const char* url_address, const char* url_file);
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
+
+/*
+module entry point
+*/
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) { + const char *arg = strchr(argv, ','); + if (arg != NULL) + arg++; +
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, postprocess, postprocess, NULL); + + return 1; /* success */
+}
+
+static int postprocess(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) {
+ char *old = *html;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, postprocess) != NULL) {
+ if (CALLBACKARG_PREV_FUN(carg, postprocess)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
+ /* Modified *html */
+ old = *html;
+ }
+ }
+
+ /* Process */
+ *html = strdup(*html);
+ hts_free(old);
+
+ return 1;
+}
diff --git a/libtest/callbacks-example-contentfilter.c b/libtest/callbacks-example-contentfilter.c index 54ee9c0..069a99f 100755 --- a/libtest/callbacks-example-contentfilter.c +++ b/libtest/callbacks-example-contentfilter.c @@ -3,32 +3,33 @@ Example of <wrappername>_init and <wrappername>_exit call (httrack >> 3.31)
.c file
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example-contentfilter.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example-contentfilter.c
- - use the --wrapper option in httrack:
- httrack --wrapper save-name=callback:process,string[,string..]
+ httrack --wrapper mycallback,stringtofind,stringtofind.. ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
+/* Standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
-/* Function definitions */
-EXTERNAL_FUNCTION int process(char* html, int len, char* address, char* filename);
-EXTERNAL_FUNCTION int wrapper_init(char* module, char* initString);
-EXTERNAL_FUNCTION int wrapper_exit(void);
+/* Local function definitions */
+static int process(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* address, const char* filename);
+static int end(t_hts_callbackarg *carg, httrackp *opt);
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
/* TOLOWER */
#define TOLOWER_(a) (a >= 'A' && a <= 'Z') ? (a + ('a' - 'A')) : a
@@ -38,24 +39,74 @@ EXTERNAL_FUNCTION int wrapper_exit(void); This sample just crawls pages that contains certain keywords, and skips the other ones
*/
-static char stringfilter[8192];
-static char* stringfilters[128];
-static int initialized = 0;
+typedef struct t_my_userdef {
+ char stringfilter[8192];
+ char* stringfilters[128];
+} t_my_userdef;
-/*
-"check-html" callback
-from htsdefines.h:
-typedef int (* t_hts_htmlcheck)(char* html,int len,char* address,char* filename);
+/*
+module entry point
*/
-EXTERNAL_FUNCTION int process(char* html, int len, char* address, char* filename) {
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* Check args */
+ if (arg == NULL || *arg == '\0') {
+ fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
+ fprintf(stderr, "usage: httrack --wrapper callback,stringtofind,stringtofind..\n");
+ fprintf(stderr, "example: httrack --wrapper callback,apple,orange,lemon\n");
+ return 0;
+ } else {
+ t_my_userdef *userdef = (t_my_userdef*) malloc(sizeof(t_my_userdef)); /* userdef */
+ char * const stringfilter = userdef->stringfilter;
+ char** const stringfilters = userdef->stringfilters;
+ /* */
+ char* a = stringfilter;
+ int i = 0;
+ fprintf(stderr, "** info: wrapper_init(%s) called!\n", arg);
+ fprintf(stderr, "** callback example: crawling pages only if specific keywords are found\n");
+
+ /* stringfilters = split(arg, ','); */
+ strcpy(stringfilter, arg);
+ while(a != NULL) {
+ stringfilters[i] = a;
+ a = strchr(a, ',');
+ if (a != NULL) {
+ *a = '\0';
+ a ++;
+ }
+ fprintf(stderr, "** callback info: will crawl pages with '%s' in them\n", stringfilters[i]);
+ i++;
+ }
+ stringfilters[i++] = NULL;
+
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, check_html, process, userdef);
+ CHAIN_FUNCTION(opt, end, end, userdef);
+ }
+
+ return 1; /* success */
+}
+
+static int process(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* address, const char* filename) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+ char * const stringfilter = userdef->stringfilter;
+ char** const stringfilters = userdef->stringfilters;
+ /* */
int i = 0;
int getIt = 0;
char* pos;
- if (!initialized) {
- fprintf(stderr, "** ERROR! process_init() was not called by httrack - you are probably using an old version (<3.31)\n");
- fprintf(stderr, "** bailing out..\n");
- exit(1);
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, address, filename)) {
+ return 0; /* Abort */
+ }
}
+
+ /* Process */
if (strcmp(address, "primary") == 0 && strcmp(filename, "/primary") == 0) /* primary page (list of links) */
return 1;
while(stringfilters[i] != NULL && ! getIt) {
@@ -82,39 +133,18 @@ EXTERNAL_FUNCTION int process(char* html, int len, char* address, char* filename }
}
-/* <wrappername>_init() will be called, if exists, upon startup */
-EXTERNAL_FUNCTION int wrapper_init(char* module, char* initString) {
- char* a = stringfilter;
- int i = 0;
- fprintf(stderr, "** info: wrapper_init(%s, %s) called!\n", module, initString);
- fprintf(stderr, "** callback example: crawling pages only if specific keywords are found\n");
- if (initString == NULL || *initString == '\0') {
- fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
- fprintf(stderr, "usage: httrack --wrapper save-name=callback:process,stringtofind,stringtofind..\n");
- fprintf(stderr, "example: httrack --wrapper save-name=callback:process,apple,orange,lemon\n");
- return 0;
+static int end(t_hts_callbackarg *carg, httrackp *opt) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+ fprintf(stderr, "** info: wrapper_exit() called!\n");
+ if (userdef != NULL) {
+ free(userdef);
+ userdef = NULL;
}
- /* stringfilters = split(initString, ','); */
- strcpy(stringfilter, initString);
- while(a != NULL) {
- stringfilters[i] = a;
- a = strchr(a, ',');
- if (a != NULL) {
- *a = '\0';
- a ++;
- }
- fprintf(stderr, "** callback info: will crawl pages with '%s' in them\n", stringfilters[i]);
- i++;
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
}
- stringfilters[i++] = NULL;
- initialized = 1; /* we're ok */
- return 1; /* success */
-}
-/* <wrappername>_exit() will be called, if exists, upon exit */
-EXTERNAL_FUNCTION int wrapper_exit(void) {
- fprintf(stderr, "** info: wrapper_exit() called!\n");
- initialized = 0;
- return 1; /* success (result ignored anyway in xx_exit) */
+ return 1; /* success */
}
diff --git a/libtest/callbacks-example-displayheader.c b/libtest/callbacks-example-displayheader.c index 1f9c471..a8333b3 100755 --- a/libtest/callbacks-example-displayheader.c +++ b/libtest/callbacks-example-displayheader.c @@ -3,55 +3,64 @@ Example of <wrappername>_init and <wrappername>_exit call (httrack >> 3.31)
.c file
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example-contentfilter.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example-displayheader.c
- - use the --wrapper option in httrack:
- httrack --wrapper save-name=callback:process,string[,string..]
+ httrack --wrapper mycallback ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+/* Standard httrack module includes */
#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
+/* Local function definitions */
+static int process(t_hts_callbackarg *carg, httrackp *opt,
+ char* buff, const char* adr, const char* fil,
+ const char* referer_adr, const char* referer_fil,
+ htsblk* incoming);
-/* Function definitions */
-EXTERNAL_FUNCTION int process(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, void* incoming);
-EXTERNAL_FUNCTION int wrapper_init(char* module, char* initString);
-EXTERNAL_FUNCTION int wrapper_exit(void);
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
-/*
-"receive-header" callback
-from htsdefines.h:
-typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
+/*
+module entry point
*/
-EXTERNAL_FUNCTION int process(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, void* incoming) {
- printf("[ %s%s ]\n%s\n", adr, fil, buff);
- return 1; /* success */
-}
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, receivehead, process, NULL);
-/* <wrappername>_init() will be called, if exists, upon startup */
-static char* thisModule = NULL;
-EXTERNAL_FUNCTION int wrapper_init(char* module, char* initString) {
- fprintf(stderr, "Plugged %s\n", module);
- thisModule = module;
return 1; /* success */
}
-/* <wrappername>_exit() will be called, if exists, upon exit */
-EXTERNAL_FUNCTION int wrapper_exit(void) {
- fprintf(stderr, "Unplugged %s\n", thisModule);
- return 1; /* success (result ignored anyway in xx_exit) */
+static int process(t_hts_callbackarg *carg, httrackp *opt,
+ char* buff, const char* adr, const char* fil,
+ const char* referer_adr, const char* referer_fil,
+ htsblk* incoming) {
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, receivehead) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, receivehead)(CALLBACKARG_PREV_CARG(carg), opt, buff, adr, fil, referer_adr, referer_fil, incoming)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
+ printf("[ %s%s ]\n%s\n", adr, fil, buff);
+
+ return 1; /* success */
}
diff --git a/libtest/callbacks-example-filename.c b/libtest/callbacks-example-filename.c index 0755b4d..a6c7fa1 100755 --- a/libtest/callbacks-example-filename.c +++ b/libtest/callbacks-example-filename.c @@ -2,30 +2,35 @@ HTTrack external callbacks example : changing the destination filename
.c file
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example-filename.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example-filename.c
- - use the --wrapper option in httrack:
- httrack --wrapper save-name=callback:mysavename
+ httrack --wrapper mycallback ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
+/* Standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
+
+/* Local function definitions */
+static int mysavename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save);
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
-/* Function definitions */
-EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* referer_adr, char* referer_fil, char* save);
+/* Options settings */
+#include "htsopt.h"
/* TOLOWER */
#define TOLOWER_(a) (a >= 'A' && a <= 'Z') ? (a + ('a' - 'A')) : a
@@ -46,18 +51,35 @@ EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* r This sample can be improved, for example, to make a map of a website.
*/
-/*
-"check-html" callback
-from htsdefines.h:
-typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+/*
+module entry point
*/
-EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* referer_adr, char* referer_fil, char* save) {
- char* a = save;
- while(*a) {
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, savename, mysavename, NULL);
+
+ return 1; /* success */
+}
+
+static int mysavename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save) {
+ char* a;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, savename)(CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete, referer_adr, referer_fil, save)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
+ for(a = save ; *a != 0 ; a++) {
char c = TOLOWER(*a);
if (c >= 'a' && c <= 'z')
*a = ( ( ( c - 'a' ) + 13 ) % 26 ) + 'a'; // ROT-13
- a++;
}
return 1; /* success */
diff --git a/libtest/callbacks-example-filename2.c b/libtest/callbacks-example-filename2.c index ded2e8f..a66420e 100755 --- a/libtest/callbacks-example-filename2.c +++ b/libtest/callbacks-example-filename2.c @@ -1,34 +1,31 @@ /*
- HTTrack external callbacks example : changing the destination filename
- Example of <wrappername>_init and <wrappername>_exit call (httrack >> 3.31)
- .c file
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example-filename.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example-filename.c
- - use the --wrapper option in httrack:
- httrack --wrapper save-name=callback:mysavename,string1,string2
+ httrack --wrapper mycallback,string1,string2 ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
+/* Standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
/* Function definitions */
-EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* referer_adr, char* referer_fil, char* save);
-EXTERNAL_FUNCTION int wrapper_init(char* module, char* initString);
-EXTERNAL_FUNCTION int wrapper_exit(void);
+static int mysavename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save);
+static int myend(t_hts_callbackarg *carg, httrackp *opt);
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
/* TOLOWER */
#define TOLOWER_(a) (a >= 'A' && a <= 'Z') ? (a + ('a' - 'A')) : a
@@ -40,24 +37,82 @@ EXTERNAL_FUNCTION int wrapper_exit(void); httrack --wrapper save-name=callback:mysavename,string1,string2 ..
*/
-static char string1[256];
-static char string2[256];
-static int initialized = 0;
+typedef struct t_my_userdef {
+ char string1[256];
+ char string2[256];
+} t_my_userdef;
-/*
-"check-html" callback
-from htsdefines.h:
-typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+/*
+module entry point
*/
-EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* referer_adr, char* referer_fil, char* save) {
- char* buff = strdup(save);
- char* a = buff;
- char* b = save;
- if (!initialized) {
- fprintf(stderr, "** ERROR! mysavename_init() was not called by httrack - you are probably using an old version (<3.31)\n");
- fprintf(stderr, "** bailing out..\n");
- exit(1);
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* Check args */
+ if (arg == NULL || *arg == '\0' || strchr(arg, ',') == NULL) {
+ fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
+ fprintf(stderr, "usage: httrack --wrapper save-name=callback:mysavename,string1,string2\n");
+ fprintf(stderr, "example: httrack --wrapper save-name=callback:mysavename,foo,bar\n");
+ return 0; /* failed */
+ } else {
+ char *pos = strchr(arg, ',');
+ t_my_userdef *userdef = (t_my_userdef*) malloc(sizeof(t_my_userdef));
+ char * const string1 = userdef->string1;
+ char * const string2 = userdef->string2;
+
+ /* Split args */
+ fprintf(stderr, "** info: wrapper_init(%s) called!\n", arg);
+ fprintf(stderr, "** callback example: changing destination filename word by another one\n");
+ string1[0] = string1[1] = '\0';
+ strncat(string1, arg, pos - arg);
+ strcpy(string2, pos + 1);
+ fprintf(stderr, "** callback info: will replace %s by %s in filenames!\n", string1, string2);
+
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, savename, mysavename, userdef);
+ CHAIN_FUNCTION(opt, end, myend, userdef);
+ }
+
+ return 1; /* success */
+}
+
+static int myend(t_hts_callbackarg *carg, httrackp *opt) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+
+ fprintf(stderr, "** info: wrapper_exit() called!\n");
+ if (userdef != NULL) {
+ free(userdef);
+ userdef = NULL;
+ }
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
}
+
+ return 1; /* success */
+}
+
+static int mysavename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+ char * const string1 = userdef->string1;
+ char * const string2 = userdef->string2;
+ /* */
+ char *buff, *a, *b;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, savename)(CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete, referer_adr, referer_fil, save)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
+ buff = strdup(save);
+ a = buff;
+ b = save;
*b = '\0'; /* the "save" variable points to a buffer with "sufficient" space */
while(*a) {
if (strncmp(a, string1, (int)strlen(string1)) == 0) {
@@ -70,31 +125,6 @@ EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* r }
}
free(buff);
- return 1; /* success */
-}
-/* <wrappername>_init() will be called, if exists, upon startup */
-EXTERNAL_FUNCTION int wrapper_init(char* module, char* initString) {
- char* pos;
- fprintf(stderr, "** info: wrapper_init(%s, %s) called!\n", module, initString);
- fprintf(stderr, "** callback example: changing destination filename word by another one\n");
- if (initString == NULL || *initString == '\0' || (pos = strchr(initString, ',') ) == NULL) {
- fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
- fprintf(stderr, "usage: httrack --wrapper save-name=callback:mysavename,string1,string2\n");
- fprintf(stderr, "example: httrack --wrapper save-name=callback:mysavename,foo,bar\n");
- return 0;
- }
- string1[0] = string1[1] = '\0';
- strncat(string1, initString, pos - initString);
- strcpy(string2, pos + 1);
- fprintf(stderr, "** callback info: will replace %s by %s in filenames!\n", string1, string2);
- initialized = 1; /* we're ok */
return 1; /* success */
}
-
-/* <wrappername>_exit() will be called, if exists, upon exit */
-EXTERNAL_FUNCTION int wrapper_exit(void) {
- fprintf(stderr, "** info: wrapper_exit() called!\n");
- initialized = 0;
- return 1; /* success (result ignored anyway in xx_exit) */
-}
diff --git a/libtest/callbacks-example-filenameiisbug.c b/libtest/callbacks-example-filenameiisbug.c index eb162d9..59c42f5 100755 --- a/libtest/callbacks-example-filenameiisbug.c +++ b/libtest/callbacks-example-filenameiisbug.c @@ -2,43 +2,57 @@ HTTrack external callbacks example : changing folder names ending with ".com"
with ".c0m" as a workaround of IIS bug (see KB 275601)
- How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example-filename.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example-filename.c
- - use the --wrapper option in httrack:
- httrack --wrapper save-name=callback:mysavename
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
+/* Standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
/* Function definitions */
-EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* referer_adr, char* referer_fil, char* save);
+static int mysavename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save);
-/*
- Replaces all "offending" IIS extensions (exe, dll..) with "nice" ones
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
+
+/*
+module entry point
*/
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+ CHAIN_FUNCTION(opt, savename, mysavename, NULL);
+ return 1; /* success */
+}
+
/*
-"check-html" callback
-from htsdefines.h:
-typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
+ Replaces all "offending" IIS extensions (exe, dll..) with "nice" ones
*/
-EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* referer_adr, char* referer_fil, char* save) {
+static int mysavename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save) {
static const char* iisBogus[] = { ".com", ".exe", ".dll", ".sh", NULL };
static const char* iisBogusReplace[] = { ".c0m", ".ex3", ".dl1", ".5h", NULL }; /* MUST be the same sizes */
char* a;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, savename) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, savename)(CALLBACKARG_PREV_CARG(carg), opt, adr_complete, fil_complete, referer_adr, referer_fil, save)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
for(a = save ; *a != '\0' ; a++) {
int i;
for(i = 0 ; iisBogus[i] != NULL ; i++) {
@@ -50,5 +64,6 @@ EXTERNAL_FUNCTION int mysavename(char* adr_complete, char* fil_complete, char* r }
}
}
+
return 1; /* success */
}
diff --git a/libtest/callbacks-example-listlinks.c b/libtest/callbacks-example-listlinks.c index 26c2055..9aef247 100755 --- a/libtest/callbacks-example-listlinks.c +++ b/libtest/callbacks-example-listlinks.c @@ -2,35 +2,35 @@ HTTrack external callbacks example
.c file
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
How to use:
- - compile this file as a module (callback.so or callback.dll)
- example:
- (with gcc)
- gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example.c
- or (with visual c++)
- cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example.c
- - use the --wrapper option in httrack:
- httrack --wrapper check-html=callback:process_file
- --wrapper link-detected=callback:check_detectedlink
- --wrapper loop=callback:check_loop
+ httrack --wrapper mycallback ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-/* "External" */
-#ifdef _WIN32
-#define EXTERNAL_FUNCTION __declspec(dllexport)
-#else
-#define EXTERNAL_FUNCTION
-#endif
+/* Standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
/* Function definitions */
-EXTERNAL_FUNCTION int process_file(char* html, int len, char* url_adresse, char* url_fichier);
-EXTERNAL_FUNCTION int check_detectedlink(char* link);
-EXTERNAL_FUNCTION int check_loop(void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats);
-EXTERNAL_FUNCTION int check_void(void);
+static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file);
+static int check_detectedlink(t_hts_callbackarg *carg, httrackp *opt, char* link);
+static int check_loop(t_hts_callbackarg *carg, httrackp *opt, void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats);
+static int end(t_hts_callbackarg *carg, httrackp *opt);
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
/*
This sample just lists all links in documents with the parent link:
@@ -38,42 +38,96 @@ EXTERNAL_FUNCTION int check_void(void); This sample can be improved, for example, to make a map of a website.
*/
-static char currentURLBeingParsed[2048];
+typedef struct t_my_userdef {
+ char currentURLBeingParsed[2048];
+} t_my_userdef;
-/*
-"check-html" callback
-typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier);
+/*
+module entry point
*/
-EXTERNAL_FUNCTION int process_file(char* html, int len, char* url_adresse, char* url_fichier) {
- printf("now parsing %s%s..\n", url_adresse, url_fichier);
- strcpy(currentURLBeingParsed, url_adresse);
- strcat(currentURLBeingParsed, url_fichier);
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ t_my_userdef *userdef;
+ /* */
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* Create user-defined structure */
+ userdef = (t_my_userdef*) malloc(sizeof(t_my_userdef)); /* userdef */
+ userdef->currentURLBeingParsed[0] = '\0';
+
+ /* Plug callback functions */
+ CHAIN_FUNCTION(opt, check_html, process_file, userdef);
+ CHAIN_FUNCTION(opt, end, end, userdef);
+ CHAIN_FUNCTION(opt, linkdetected, check_detectedlink, userdef);
+ CHAIN_FUNCTION(opt, loop, check_loop, userdef);
+
return 1; /* success */
}
-/*
-"link-detected" callback
-typedef int (* t_hts_htmlcheck_linkdetected)(char* link);
-*/
-EXTERNAL_FUNCTION int check_detectedlink(char* link) {
+static int process_file(t_hts_callbackarg *carg, httrackp *opt, char* html, int len, const char* url_address, const char* url_file) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+ char * const currentURLBeingParsed = userdef->currentURLBeingParsed;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
+ printf("now parsing %s%s..\n", url_address, url_file);
+ strcpy(currentURLBeingParsed, url_address);
+ strcat(currentURLBeingParsed, url_file);
+
+ return 1; /* success */
+}
+
+static int check_detectedlink(t_hts_callbackarg *carg, httrackp *opt, char* link) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+ char * const currentURLBeingParsed = userdef->currentURLBeingParsed;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, linkdetected)(CALLBACKARG_PREV_CARG(carg), opt, link)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
printf("[%s] -> [%s]\n", currentURLBeingParsed, link);
+
return 1; /* success */
}
-/*
-"loop" callback
-typedef int (* t_hts_htmlcheck_loop)(void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats);
-*/
-EXTERNAL_FUNCTION int check_loop(void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats) {
+static int check_loop(t_hts_callbackarg *carg, httrackp *opt, void* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,void* stats) {
static int fun_animation=0;
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, loop) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, loop)(CALLBACKARG_PREV_CARG(carg), opt, back, back_max, back_index, lien_tot, lien_ntot, stat_time, stats)) {
+ return 0; /* Abort */
+ }
+ }
+
+ /* Process */
printf("%c\r", "/-\\|"[(fun_animation++)%4]);
return 1;
}
-/*
-a default callback for testing purpose
-*/
-EXTERNAL_FUNCTION int check_void(void) {
- printf("\n* * * default callback function called! * * *\n\n");
- return 1;
+static int end(t_hts_callbackarg *carg, httrackp *opt) {
+ t_my_userdef *userdef = (t_my_userdef*) CALLBACKARG_USERDEF(carg);
+ fprintf(stderr, "** info: wrapper_exit() called!\n");
+ if (userdef != NULL) {
+ free(userdef);
+ userdef = NULL;
+ }
+
+ /* Call parent functions if multiple callbacks are chained. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
+ }
+
+ return 1; /* success */
}
diff --git a/libtest/callbacks-example-log.c b/libtest/callbacks-example-log.c new file mode 100755 index 0000000..2834d05 --- /dev/null +++ b/libtest/callbacks-example-log.c @@ -0,0 +1,114 @@ +/*
+ HTTrack external callbacks example : dumy plugin, aimed to log for debugging purpose
+
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
+ How to use:
+ httrack --wrapper mycallback ..
+*/
+
+/* system includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
+EXTERNAL_FUNCTION int hts_unplug(httrackp *opt);
+
+/* local function called as "check_html" callback */
+static int process_file(t_hts_callbackarg *carg, httrackp *opt,
+ char* html, int len, const char* url_address, const char* url_file) {
+ void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/
+ char *fmt;
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
+ html, len, url_address, url_file)) {
+ return 0; /* abort */
+ }
+ }
+
+ /* log */
+ fprintf(stderr, "* parsing file %s%s\n", url_address, url_file);
+ fmt = malloc(strlen(url_address) + strlen(url_file) + 128);
+ sprintf(fmt, " parsing file %s%s", url_address, url_file);
+ hts_log(opt, "log-wrapper-info", fmt);
+ free(fmt);
+
+ return 1; /* success */
+}
+
+static int start_of_mirror(t_hts_callbackarg *carg, httrackp *opt) {
+ const char *arginfo = (char*) CALLBACKARG_USERDEF(carg);
+
+ fprintf(stderr, "* mirror start\n");
+ hts_log(opt, arginfo, "mirror started");
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ /* status is ok on our side, return other callabck's status */
+ return CALLBACKARG_PREV_FUN(carg, start)(CALLBACKARG_PREV_CARG(carg), opt);
+ }
+
+ return 1; /* success */
+}
+
+/* local function called as "end" callback */
+static int end_of_mirror(t_hts_callbackarg *carg, httrackp *opt) {
+ const char *arginfo = (char*) CALLBACKARG_USERDEF(carg);
+
+ fprintf(stderr, "* mirror end\n");
+ hts_log(opt, arginfo, "mirror ended");
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ /* status is ok on our side, return other callabck's status */
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
+ }
+
+ return 1; /* success */
+}
+
+/*
+module entry point
+the function name and prototype MUST match this prototype
+*/
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ /* optional argument passed in the commandline we won't be using here */
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* plug callback functions */
+ if (arg == NULL)
+ arg = "log-wrapper-info";
+ hts_log(opt, arg, "* plugging functions");
+ CHAIN_FUNCTION(opt, check_html, process_file, (char*) arg);
+ CHAIN_FUNCTION(opt, start, start_of_mirror, (char*) arg);
+ CHAIN_FUNCTION(opt, end, end_of_mirror, (char*) arg);
+
+ hts_log(opt, arg, "* module successfully plugged");
+ return 1; /* success */
+}
+
+/*
+module exit point
+the function name and prototype MUST match this prototype
+*/
+EXTERNAL_FUNCTION int hts_unplug(httrackp *opt) {
+ hts_log(opt, "log-wrapper-info", "* module successfully unplugged");
+ return 1;
+}
diff --git a/libtest/callbacks-example-simple.c b/libtest/callbacks-example-simple.c new file mode 100755 index 0000000..e1f835a --- /dev/null +++ b/libtest/callbacks-example-simple.c @@ -0,0 +1,89 @@ +/*
+ HTTrack external callbacks example : print all downloaded html documents
+
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1
+ With MS-Visual C++:
+ cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib
+
+ Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
+
+ How to use:
+ httrack --wrapper mycallback ..
+*/
+
+/* system includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* standard httrack module includes */
+#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
+
+/* external functions */
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv);
+EXTERNAL_FUNCTION int hts_unplug(httrackp *opt);
+
+/* local function called as "check_html" callback */
+static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt,
+ /*other parameters are callback-specific*/
+ char* html, int len, const char* url_address, const char* url_file) {
+ void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
+ if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
+ html, len, url_address, url_file)) {
+ return 0; /* abort */
+ }
+ }
+
+ printf("file %s%s content: %s\n", url_address, url_file, html);
+ return 1; /* success */
+}
+
+/* local function called as "end" callback */
+static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) {
+ void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg); /*optional user-defined arg*/
+
+ /* processing */
+ fprintf(stderr, "That's all, folks!\n");
+
+ /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
+ if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
+ /* status is ok on our side, return other callabck's status */
+ return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
+ }
+
+ return 1; /* success */
+}
+
+/*
+module entry point
+the function name and prototype MUST match this prototype
+*/
+EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
+ /* optional argument passed in the commandline we won't be using here */
+ const char *arg = strchr(argv, ',');
+ if (arg != NULL)
+ arg++;
+
+ /* plug callback functions */
+ CHAIN_FUNCTION(opt, check_html, process_file, /*optional user-defined arg*/NULL);
+ CHAIN_FUNCTION(opt, end, end_of_mirror, /*optional user-defined arg*/NULL);
+
+ return 1; /* success */
+}
+
+/*
+module exit point
+the function name and prototype MUST match this prototype
+*/
+EXTERNAL_FUNCTION int hts_unplug(httrackp *opt) {
+ fprintf(stderr, "Module unplugged");
+
+ return 1; /* success */
+}
diff --git a/libtest/example.c b/libtest/example.c index 3361872..df6e503 100644 --- a/libtest/example.c +++ b/libtest/example.c @@ -2,29 +2,32 @@ HTTrack library example
.c file
- To Build on Windows:
- - install winhttrack
- - set the proper path in the project settings (especially for the httrack lib and dll)
- - compile in multithreaded DLL
- - avoid precompiled headers with VC
-
- To Build on Linux:
- - install httrack
- - link with libhttrack.so and compile using something like:
- gcc example.c -I/usr/include/httrack -lhttrack
+ Prerequisites:
+ - install winhttrack
+ - set the proper path in the project settings (especially for the httrack lib and dll)
+
+ How to build: (callback.so or callback.dll)
+ With GNU-GCC:
+ gcc -I/usr/include/httrack -O -g3 -Wall -D_REENTRANT -o example example.c -lhttrack1
+ With MS-Visual C++:
+ cl -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.exe" callbacks-example.c wsock32.lib libhttrack.lib
*/
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#ifdef _WIN32
+#include <windows.h>
+#endif
+/* Standard httrack module includes */
#include "httrack-library.h"
+#include "htsopt.h"
+#include "htsdefines.h"
+/* Local definitions */
#include "example.h"
-
-
/*
* Name: main
* Description: main() function
@@ -39,6 +42,8 @@ int main(void) { char _argv[][256] = {"httrack_test" , "<URL>" , "-r3" , "--testscan" , "" };
char* argv[] = {NULL , NULL , NULL , NULL , NULL};
int argc = 0;
+ httrackp *opt;
+ int ret;
while(strlen(_argv[argc])) {
argv[argc]=_argv[argc];
argc++;
@@ -49,40 +54,7 @@ int main(void) { scanf("%s",argv[1]);
printf("Test: 1 depth\n");
- hts_init();
- htswrap_add("init",httrack_wrapper_init);
- htswrap_add("free",httrack_wrapper_uninit);
- htswrap_add("start",httrack_wrapper_start);
- htswrap_add("change-options",httrack_wrapper_chopt);
- htswrap_add("end",httrack_wrapper_end);
- htswrap_add("check-html",httrack_wrapper_checkhtml);
- htswrap_add("loop",httrack_wrapper_loop);
- htswrap_add("query",httrack_wrapper_query);
- htswrap_add("query2",httrack_wrapper_query2);
- htswrap_add("query3",httrack_wrapper_query3);
- htswrap_add("check-link",httrack_wrapper_check);
- htswrap_add("pause",httrack_wrapper_pause);
- htswrap_add("save-file",httrack_wrapper_filesave);
- htswrap_add("link-detected",httrack_wrapper_linkdetected);
- htswrap_add("transfer-status",httrack_wrapper_xfrstatus);
-
- /* Then, launch the mirror */
- hts_main(argc,argv);
-
- /* Wait for a key */
- printf("\nPress ENTER key to exit\n");
- scanf("%s",argv[1]);
-
- /* That's all! */
- return 0;
-}
-
-
-/* CALLBACK FUNCTIONS */
-
-/* Initialize the Winsock */
-void CDECL httrack_wrapper_init(void) {
- printf("Engine started\n");
+ /* Initialize the library */
#ifdef _WIN32
{
WORD wVersionRequested; // requested version WinSock API
@@ -100,59 +72,130 @@ void CDECL httrack_wrapper_init(void) { }
}
#endif
+ hts_init();
-}
-void CDECL httrack_wrapper_uninit(void) {
- printf("Engine exited\n");
+ /* Create option settings and set callbacks (wrappers) */
+ opt = hts_create_opt();
+
+ CHAIN_FUNCTION(opt, init, httrack_wrapper_init, NULL);
+ CHAIN_FUNCTION(opt, uninit, httrack_wrapper_uninit, NULL);
+ CHAIN_FUNCTION(opt, start, httrack_wrapper_start, NULL);
+ CHAIN_FUNCTION(opt, end, httrack_wrapper_end, NULL);
+ CHAIN_FUNCTION(opt, chopt, httrack_wrapper_chopt, NULL);
+ CHAIN_FUNCTION(opt, preprocess, httrack_wrapper_preprocesshtml, NULL);
+ CHAIN_FUNCTION(opt, postprocess, httrack_wrapper_postprocesshtml, NULL);
+ CHAIN_FUNCTION(opt, check_html, httrack_wrapper_checkhtml, NULL);
+ CHAIN_FUNCTION(opt, query, httrack_wrapper_query, NULL);
+ CHAIN_FUNCTION(opt, query2, httrack_wrapper_query2, NULL);
+ CHAIN_FUNCTION(opt, query3, httrack_wrapper_query3, NULL);
+ CHAIN_FUNCTION(opt, loop, httrack_wrapper_loop, NULL);
+ CHAIN_FUNCTION(opt, check_link, httrack_wrapper_check, NULL);
+ CHAIN_FUNCTION(opt, check_mime, httrack_wrapper_check_mime, NULL);
+ CHAIN_FUNCTION(opt, pause, httrack_wrapper_pause, NULL);
+ CHAIN_FUNCTION(opt, filesave, httrack_wrapper_filesave, NULL);
+ CHAIN_FUNCTION(opt, filesave2, httrack_wrapper_filesave2, NULL);
+ CHAIN_FUNCTION(opt, linkdetected, httrack_wrapper_linkdetected, NULL);
+ CHAIN_FUNCTION(opt, linkdetected2, httrack_wrapper_linkdetected2, NULL);
+ CHAIN_FUNCTION(opt, xfrstatus, httrack_wrapper_xfrstatus, NULL);
+ CHAIN_FUNCTION(opt, savename, httrack_wrapper_savename, NULL);
+ CHAIN_FUNCTION(opt, sendhead, httrack_wrapper_sendheader, NULL);
+ CHAIN_FUNCTION(opt, receivehead, httrack_wrapper_receiveheader, NULL);
+
+ /* Then, launch the mirror */
+ ret = hts_main2(argc, argv, opt);
+
+ /* Wait for a key */
+ printf("\nPress ENTER key to exit\n");
+ scanf("%s",argv[1]);
+
+ /* Clear option state */
+ hts_free_opt(opt);
+ hts_uninit();
#ifdef _WIN32
WSACleanup();
#endif
+
+ /* That's all! */
+ return 0;
}
-int CDECL httrack_wrapper_start(httrackp* opt) {
+
+
+/* CALLBACK FUNCTIONS */
+
+/* Initialize the Winsock */
+static void CDECL httrack_wrapper_init(t_hts_callbackarg *carg) {
+ printf("Engine started\n");
+}
+static void CDECL httrack_wrapper_uninit(t_hts_callbackarg *carg) {
+ printf("Engine exited\n");
+}
+static int CDECL httrack_wrapper_start(t_hts_callbackarg *carg, httrackp* opt) {
printf("Start of mirror\n");
return 1;
}
-int CDECL httrack_wrapper_chopt(httrackp* opt) {
- return CDECL httrack_wrapper_start(opt);
+static int CDECL httrack_wrapper_chopt(t_hts_callbackarg *carg, httrackp* opt) {
+ return 1;
}
-int CDECL httrack_wrapper_end(void) {
+static int CDECL httrack_wrapper_end(t_hts_callbackarg *carg, httrackp* opt) {
printf("End of mirror\n");
return 1;
}
-int CDECL httrack_wrapper_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) {
- printf("Parsing html file: http://%s%s\n",url_adresse,url_fichier);
+static int CDECL httrack_wrapper_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file) {
+ printf("Parsing html file: http://%s%s\n",url_address,url_file);
return 1;
}
-int CDECL httrack_wrapper_loop(void* _back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) {
+static int CDECL httrack_wrapper_loop(t_hts_callbackarg *carg, httrackp *opt, void* _back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) {
/* printf("..httrack_wrapper_loop called\n"); */
return 1;
}
-char* CDECL httrack_wrapper_query(char* question) {
+static const char* CDECL httrack_wrapper_query(t_hts_callbackarg *carg, httrackp *opt, const char* question) {
/* Answer is No */
return "N";
}
-char* CDECL httrack_wrapper_query2(char* question) {
+static const char* CDECL httrack_wrapper_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question) {
/* Answer is No */
return "N";
}
-char* CDECL httrack_wrapper_query3(char* question) {
+static const char* CDECL httrack_wrapper_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question) {
/* Answer is "" */
return "";
}
-int CDECL httrack_wrapper_check(char* adr,char* fil,int status) {
+static int CDECL httrack_wrapper_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status) {
printf("Link status tested: http://%s%s\n",adr,fil);
return -1;
}
-void CDECL httrack_wrapper_pause(char* lockfile) {
+static void CDECL httrack_wrapper_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile) {
/* Wait until lockfile is removed.. */
}
-void CDECL httrack_wrapper_filesave(char* file) {
+static void CDECL httrack_wrapper_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file) {
}
-int CDECL httrack_wrapper_linkdetected(char* link) {
+static int CDECL httrack_wrapper_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link) {
printf("Link detected: %s\n",link);
return 1;
}
-int CDECL httrack_wrapper_xfrstatus(void* back) {
+static int CDECL httrack_wrapper_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, void* back) {
+ return 1;
+}
+static int CDECL httrack_wrapper_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) {
+ return 1;
+}
+static int CDECL httrack_wrapper_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) {
+ return 1;
+}
+static int CDECL httrack_wrapper_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status) {
+ return -1;
+}
+static void CDECL httrack_wrapper_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* fil, const char* save, int is_new, int is_modified,int not_updated) {
+}
+static int CDECL httrack_wrapper_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag) {
+ return 1;
+}
+static int CDECL httrack_wrapper_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save) {
+ return 1;
+}
+static int CDECL httrack_wrapper_sendheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing) {
+ return 1;
+}
+static int CDECL httrack_wrapper_receiveheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming) {
return 1;
}
-
diff --git a/libtest/example.dsp b/libtest/example.dsp deleted file mode 100644 index 0f87ae0..0000000 --- a/libtest/example.dsp +++ /dev/null @@ -1,95 +0,0 @@ -# Microsoft Developer Studio Project File - Name="example" - Package Owner=<4>
-# Microsoft Developer Studio Generated Build File, Format Version 6.00
-# ** DO NOT EDIT **
-
-# TARGTYPE "Win32 (x86) Console Application" 0x0103
-
-CFG=example - Win32 Debug
-!MESSAGE This is not a valid makefile. To build this project using NMAKE,
-!MESSAGE use the Export Makefile command and run
-!MESSAGE
-!MESSAGE NMAKE /f "example.mak".
-!MESSAGE
-!MESSAGE You can specify a configuration when running NMAKE
-!MESSAGE by defining the macro CFG on the command line. For example:
-!MESSAGE
-!MESSAGE NMAKE /f "example.mak" CFG="example - Win32 Debug"
-!MESSAGE
-!MESSAGE Possible choices for configuration are:
-!MESSAGE
-!MESSAGE "example - Win32 Release" (based on "Win32 (x86) Console Application")
-!MESSAGE "example - Win32 Debug" (based on "Win32 (x86) Console Application")
-!MESSAGE
-
-# Begin Project
-# PROP AllowPerConfigDependencies 0
-# PROP Scc_ProjName ""
-# PROP Scc_LocalPath ""
-CPP=cl.exe
-RSC=rc.exe
-
-!IF "$(CFG)" == "example - Win32 Release"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "Release"
-# PROP BASE Intermediate_Dir "Release"
-# PROP BASE Target_Dir ""
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "Release"
-# PROP Intermediate_Dir "Release"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir ""
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /I "C:\Dev" /I "C:\Dev\IPv6Kit\inc" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD BASE RSC /l 0x40c /d "NDEBUG"
-# ADD RSC /l 0x40c /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /machine:I386
-
-!ELSEIF "$(CFG)" == "example - Win32 Debug"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 1
-# PROP BASE Output_Dir "Debug"
-# PROP BASE Intermediate_Dir "Debug"
-# PROP BASE Target_Dir ""
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 1
-# PROP Output_Dir "Debug"
-# PROP Intermediate_Dir "Debug"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir ""
-# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
-# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "C:\Dev\IPv6Kit\inc" /I "C:\Dev" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /GZ /c
-# SUBTRACT CPP /YX
-# ADD BASE RSC /l 0x40c /d "_DEBUG"
-# ADD RSC /l 0x40c /d "_DEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-# ADD LINK32 wsock32.lib libhttrack.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
-
-!ENDIF
-
-# Begin Target
-
-# Name "example - Win32 Release"
-# Name "example - Win32 Debug"
-# Begin Source File
-
-SOURCE=.\example.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\example.h
-# End Source File
-# End Target
-# End Project
diff --git a/libtest/example.dsw b/libtest/example.dsw deleted file mode 100644 index adba05b..0000000 --- a/libtest/example.dsw +++ /dev/null @@ -1,29 +0,0 @@ -Microsoft Developer Studio Workspace File, Format Version 6.00
-# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
-
-###############################################################################
-
-Project: "example"=".\example.dsp" - Package Owner=<4>
-
-Package=<5>
-{{{
-}}}
-
-Package=<4>
-{{{
-}}}
-
-###############################################################################
-
-Global:
-
-Package=<5>
-{{{
-}}}
-
-Package=<3>
-{{{
-}}}
-
-###############################################################################
-
diff --git a/libtest/example.h b/libtest/example.h index 161dce5..57bf15d 100644 --- a/libtest/example.h +++ b/libtest/example.h @@ -9,19 +9,26 @@ #define CDECL
#endif
-void CDECL httrack_wrapper_init(void);
-void CDECL httrack_wrapper_uninit(void);
-int CDECL httrack_wrapper_start(httrackp* opt);
-int CDECL httrack_wrapper_chopt(httrackp* opt);
-int CDECL httrack_wrapper_end(void);
-int CDECL httrack_wrapper_checkhtml(char* html,int len,char* url_adresse,char* url_fichier);
-int CDECL httrack_wrapper_loop(void* _back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
-char* CDECL httrack_wrapper_query(char* question);
-char* CDECL httrack_wrapper_query2(char* question);
-char* CDECL httrack_wrapper_query3(char* question);
-int CDECL httrack_wrapper_check(char* adr,char* fil,int status);
-void CDECL httrack_wrapper_pause(char* lockfile);
-void CDECL httrack_wrapper_filesave(char* file);
-int CDECL httrack_wrapper_linkdetected(char* link);
-int CDECL httrack_wrapper_xfrstatus(void* back);
-
+static void CDECL httrack_wrapper_init(t_hts_callbackarg *carg);
+static void CDECL httrack_wrapper_uninit(t_hts_callbackarg *carg);
+static int CDECL httrack_wrapper_start(t_hts_callbackarg *carg, httrackp* opt);
+static int CDECL httrack_wrapper_chopt(t_hts_callbackarg *carg, httrackp* opt);
+static int CDECL httrack_wrapper_end(t_hts_callbackarg *carg, httrackp* opt);
+static int CDECL httrack_wrapper_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file);
+static int CDECL httrack_wrapper_loop(t_hts_callbackarg *carg, httrackp *opt, void* _back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats);
+static const char* CDECL httrack_wrapper_query(t_hts_callbackarg *carg, httrackp *opt, const char* question);
+static const char* CDECL httrack_wrapper_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question);
+static const char* CDECL httrack_wrapper_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question);
+static int CDECL httrack_wrapper_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status);
+static void CDECL httrack_wrapper_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile);
+static void CDECL httrack_wrapper_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file);
+static int CDECL httrack_wrapper_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link);
+static int CDECL httrack_wrapper_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, void* back);
+static int CDECL httrack_wrapper_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file);
+static int CDECL httrack_wrapper_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file);
+static int CDECL httrack_wrapper_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status);
+static void CDECL httrack_wrapper_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* fil, const char* save, int is_new, int is_modified,int not_updated);
+static int CDECL httrack_wrapper_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag);
+static int CDECL httrack_wrapper_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save);
+static int CDECL httrack_wrapper_sendheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing);
+static int CDECL httrack_wrapper_receiveheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming);
diff --git a/libtest/htssystem.h b/libtest/htssystem.h deleted file mode 100644 index 58941ed..0000000 --- a/libtest/htssystem.h +++ /dev/null @@ -1,2 +0,0 @@ -#define HTS_ANALYSTE 1
-
diff --git a/libtest/readme.txt b/libtest/readme.txt index 3c23e4c..c204e29 100644 --- a/libtest/readme.txt +++ b/libtest/readme.txt @@ -32,3 +32,25 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+========================================================================
+ MAKEFILE PROJECT : libtest Project Overview
+========================================================================
+
+AppWizard has created this libtest project for you.
+
+This file contains a summary of what you will find in each of the files that
+make up your libtest project.
+
+
+libtest.vcproj
+ This is the main project file for VC++ projects generated using an Application Wizard.
+ It contains information about the version of Visual C++ that generated the file, and
+ information about the platforms, configurations, and project features selected with the
+ Application Wizard.
+
+This project allows you to build/clean/rebuild from within Visual Studio by calling the commands you have input
+in the wizard. The build command can be nmake or any other tool you use.
+
+This project does not contain any files, so there are none displayed in Solution Explorer.
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/man/httrack.1 b/man/httrack.1 index 7df56df..70bad85 100644 --- a/man/httrack.1 +++ b/man/httrack.1 @@ -1,13 +1,15 @@ .\" Process this file with .\" groff -man -Tascii httrack.1 .\" -.TH httrack 1 "HTTrack version 3.40-2 (compiled Apr 9 2006)" "httrack website copier" +.TH httrack 1 "Jan 2007" "httrack website copier" .SH NAME httrack \- offline browser : copy websites to a local directory .SH SYNOPSIS .B httrack [ url ]... [ -filter ]... [ +filter ]... [ .B-O, --path ] [ +.B -%O, --chroot +] [ .B -w, --mirror ] [ .B -W, --mirror-wizard @@ -106,6 +108,8 @@ httrack \- offline browser : copy websites to a local directory ] [ .B -@iN, --protocol[=N] ] [ +.B -%w, --disable-module +] [ .B -F, --user-agent ] [ .B -%R, --referer @@ -208,6 +212,9 @@ mirror[,path cache and logfiles]) (--path <param>) +.IP -%O +chroot path to, must be r00t (-%O root +path) (--chroot <param>) .SS Action options: .IP -w @@ -309,7 +316,7 @@ accept cookies in cookies.txt (0=do not accept,* 1=accept) (--cookies[=N]) .IP -u check document type if unknown (cgi,asp..) (u0 don t check, * u1 check but /, u2 check always) (--check-type[=N]) .IP -j -*parse Java Classes (j0 don t parse) (--parse-java[=N]) +*parse Java Classes (j0 don t parse, bitmask: |1 parse default, |2 don t parse .class |4 don t parse .js |8 don t be aggressive) (--parse-java[=N]) .IP -sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules)) (--robots[=N]) .IP -%h @@ -328,6 +335,8 @@ assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=t also be used to force a specific file type: --assume foo.cgi=text/html .IP -@iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only) (--protocol[=N]) +.IP -%w +disable a specific external mime module (-%w htsswf -%w htsjava) (--disable-module <param>) .SS Browser ID: .IP -F @@ -415,7 +424,7 @@ type test (-#2 /foo/bar.php) .IP -#C cache list (-#C *.com/spider*.gif (--debug-cache <param>) .IP -#R -cache repair (damaged cache) (--debug-oldftp) +cache repair (damaged cache) (--repair-cache) .IP -#d debug parser (--debug-parsing) .IP -#E @@ -435,7 +444,7 @@ display ugly progress information (--advanced-progressinfo) .IP -#P catch URL (--catch-url) .IP -#R -old FTP routines (debug) (--debug-oldftp) +old FTP routines (debug) (--repair-cache) .IP -#T generate transfer ops. log every minutes (--debug-xfrstats) .IP -#u @@ -459,7 +468,7 @@ execute system command after each files ($0 is the filename: -V "rm \$0") (--use .IP -%U run the engine with another id when called as root (-%U smith) (--user <param>) .IP -%W -use an external library function as a wrapper (-%W link-detected=foo.so:myfunction[,myparameters]) (--callback <param>) +use an external library function as a wrapper (-%W myfoo.so[,myparameters]) (--callback <param>) .SS Details: Option N .IP -N0 @@ -571,50 +580,7 @@ foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, default) force http/1.0 requests (-%h) .SS Details: Option %W: External callbacks prototypes -.SS init : void (* myfunction)(void); -.SS free : void (* myfunction)(void); -.SS start : int (* myfunction)(httrackp* opt); -.SS end : int (* myfunction)(void); -.SS change-options : int (* myfunction)(httrackp* opt); -.SS preprocess-html : int (* myfunction)(char** html,int* len,char* url -adresse,char* url -fichier); -.SS postprocess-html : int (* myfunction)(char** html,int* len,char* url -adresse,char* url -fichier); -.SS check-html : int (* myfunction)(char* html,int len,char* url -adresse,char* url -fichier); -.SS query : char* (* myfunction)(char* question); -.SS query2 : char* (* myfunction)(char* question); -.SS query3 : char* (* myfunction)(char* question); -.SS loop : int (* myfunction)(lien -back* back,int back -max,int back -index,int lien -tot,int lien -ntot,int stat -time,hts -stat -struct* stats); -.SS check-link : int (* myfunction)(char* adr,char* fil,int status); -.SS pause : void (* myfunction)(char* lockfile); -.SS save-file : void (* myfunction)(char* file); -.SS save-file2 : void (* myfunction)(char* hostname,char* filename,char* localfile,int is -new,int is -modified); -.SS link-detected : int (* myfunction)(char* link); -.SS link-detected2 : int (* myfunction)(char* link, char* start -tag); -.SS transfer-status : int (* myfunction)(lien -back* back); -.SS save-name : int (* myfunction)(char* adr -complete,char* fil -complete,char* referer -adr,char* referer -fil,char* save); -.SS And <wrappername> -init() functions if defined, called upon plug +.SS see htsdefines.h .SH FILES .I /etc/httrack.conf .RS diff --git a/src/Makefile.am b/src/Makefile.am index 587535a..c609b07 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,16 +1,17 @@ #SUBDIRS = swf DevIncludesdir = $(includedir)/httrack -DevIncludes_DATA = httrack-library.h \ +DevIncludes_DATA = \ + httrack-library.h \ htsglobal.h \ htsopt.h \ htswrap.h \ - htssystem.h \ htsconfig.h \ ../config.h \ htsmodules.h \ htsbasenet.h \ - htsbauth.h + htsbauth.h \ + htsdefines.h INCLUDES = \ @DEFAULT_CFLAGS@ \ @@ -29,7 +30,7 @@ htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack proxytrack_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) proxytrack_CFLAGS = $(AM_CFLAGS) -DNO_MALLOCT -lib_LTLIBRARIES = libhttrack.la +lib_LTLIBRARIES = libhttrack.la libhtsjava.la htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h proxytrack_SOURCES = proxy/main.c \ @@ -42,10 +43,10 @@ whttrackrun_SCRIPTS = webhttrack libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ - htshelp.c htsjava.c htslib.c htscoremain.c \ + htshelp.c htslib.c htscoremain.c \ htsname.c htsrobots.c htstools.c htswizard.c \ htsalias.c htsthread.c htsindex.c htsbauth.c \ - htsmd5.c htszlib.c htsnostatic.c htswrap.c \ + htsmd5.c htszlib.c htswrap.c \ htsmodules.c \ md5.c \ htsmms.c \ @@ -55,22 +56,23 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ - htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \ - htsmodules.h htsname.h htsnet.h htsnostatic.h \ - htsopt.h htsrobots.h htssystem.h htsthread.h \ + htshelp.h htsindex.h htslib.h htsmd5.h \ + htsmodules.h htsname.h htsnet.h \ + htsopt.h htsrobots.h htsthread.h \ htstools.h htswizard.h htswrap.h htszlib.h \ htsstrings.h httrack-library.h \ md5.h \ htsmms.h \ minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h \ mmsrip/error.h mmsrip/mms.h - libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS) libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) +libhtsjava_la_SOURCES = htsjava.c htsjava.h +libhtsjava_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) +libhtsjava_la_LDFLAGS = -version-info $(VERSION_INFO) + EXTRA_DIST = httrack.h webhttrack \ - httrack.dsp httrack.dsw \ - webhttrack.dsp webhttrack.dsw \ minizip/ChangeLogUnzip \ minizip/iowin32.c \ minizip/iowin32.h \ @@ -86,4 +88,5 @@ EXTRA_DIST = httrack.h webhttrack \ proxy/changelog.txt \ proxy/proxystrings.h \ proxy/proxytrack.h \ - proxy/store.h + proxy/store.h \ + *.dsw *.dsp *.vcproj diff --git a/src/Makefile.in b/src/Makefile.in index 888071a..b22b2fb 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -144,16 +144,17 @@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ DevIncludesdir = $(includedir)/httrack -DevIncludes_DATA = httrack-library.h \ +DevIncludes_DATA = \ + httrack-library.h \ htsglobal.h \ htsopt.h \ htswrap.h \ - htssystem.h \ htsconfig.h \ ../config.h \ htsmodules.h \ htsbasenet.h \ - htsbauth.h + htsbauth.h \ + htsdefines.h INCLUDES = \ @@ -174,7 +175,7 @@ htsserver_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) -lhttrack proxytrack_LDADD = $(THREADS_LIBS) $(SOCKET_LIBS) proxytrack_CFLAGS = $(AM_CFLAGS) -DNO_MALLOCT -lib_LTLIBRARIES = libhttrack.la +lib_LTLIBRARIES = libhttrack.la libhtsjava.la htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h proxytrack_SOURCES = proxy/main.c \ @@ -188,10 +189,10 @@ whttrackrun_SCRIPTS = webhttrack libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ - htshelp.c htsjava.c htslib.c htscoremain.c \ + htshelp.c htslib.c htscoremain.c \ htsname.c htsrobots.c htstools.c htswizard.c \ htsalias.c htsthread.c htsindex.c htsbauth.c \ - htsmd5.c htszlib.c htsnostatic.c htswrap.c \ + htsmd5.c htszlib.c htswrap.c \ htsmodules.c \ md5.c \ htsmms.c \ @@ -201,9 +202,9 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ - htshelp.h htsindex.h htsjava.h htslib.h htsmd5.h \ - htsmodules.h htsname.h htsnet.h htsnostatic.h \ - htsopt.h htsrobots.h htssystem.h htsthread.h \ + htshelp.h htsindex.h htslib.h htsmd5.h \ + htsmodules.h htsname.h htsnet.h \ + htsopt.h htsrobots.h htsthread.h \ htstools.h htswizard.h htswrap.h htszlib.h \ htsstrings.h httrack-library.h \ md5.h \ @@ -211,13 +212,14 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ minizip/crypt.h minizip/ioapi.h minizip/mztools.h minizip/unzip.h minizip/zip.h \ mmsrip/error.h mmsrip/mms.h - libhttrack_la_LIBADD = $(THREADS_LIBS) $(ZLIB_LIBS) $(DL_LIBS) $(SOCKET_LIBS) libhttrack_la_LDFLAGS = -version-info $(VERSION_INFO) +libhtsjava_la_SOURCES = htsjava.c htsjava.h +libhtsjava_la_LIBADD = $(THREADS_LIBS) $(DL_LIBS) +libhtsjava_la_LDFLAGS = -version-info $(VERSION_INFO) + EXTRA_DIST = httrack.h webhttrack \ - httrack.dsp httrack.dsw \ - webhttrack.dsp webhttrack.dsw \ minizip/ChangeLogUnzip \ minizip/iowin32.c \ minizip/iowin32.h \ @@ -233,7 +235,8 @@ EXTRA_DIST = httrack.h webhttrack \ proxy/changelog.txt \ proxy/proxystrings.h \ proxy/proxytrack.h \ - proxy/store.h + proxy/store.h \ + *.dsw *.dsp *.vcproj subdir = src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -242,14 +245,17 @@ CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = LTLIBRARIES = $(lib_LTLIBRARIES) +libhtsjava_la_DEPENDENCIES = +am_libhtsjava_la_OBJECTS = htsjava.lo +libhtsjava_la_OBJECTS = $(am_libhtsjava_la_OBJECTS) libhttrack_la_DEPENDENCIES = am_libhttrack_la_OBJECTS = htscore.lo htsparse.lo htsback.lo htscache.lo \ htscatchurl.lo htsfilters.lo htsftp.lo htshash.lo htsinthash.lo \ - htshelp.lo htsjava.lo htslib.lo htscoremain.lo htsname.lo \ - htsrobots.lo htstools.lo htswizard.lo htsalias.lo htsthread.lo \ - htsindex.lo htsbauth.lo htsmd5.lo htszlib.lo htsnostatic.lo \ - htswrap.lo htsmodules.lo md5.lo htsmms.lo ioapi.lo mztools.lo \ - unzip.lo zip.lo error.lo mms.lo + htshelp.lo htslib.lo htscoremain.lo htsname.lo htsrobots.lo \ + htstools.lo htswizard.lo htsalias.lo htsthread.lo htsindex.lo \ + htsbauth.lo htsmd5.lo htszlib.lo htswrap.lo htsmodules.lo \ + md5.lo htsmms.lo ioapi.lo mztools.lo unzip.lo zip.lo error.lo \ + mms.lo libhttrack_la_OBJECTS = $(am_libhttrack_la_OBJECTS) bin_PROGRAMS = proxytrack$(EXEEXT) httrack$(EXEEXT) htsserver$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) @@ -288,7 +294,6 @@ am__depfiles_maybe = depfiles @AMDEP_TRUE@ ./$(DEPDIR)/htsjava.Plo ./$(DEPDIR)/htslib.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsmd5.Plo ./$(DEPDIR)/htsmms.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsmodules.Plo ./$(DEPDIR)/htsname.Plo \ -@AMDEP_TRUE@ ./$(DEPDIR)/htsnostatic.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsparse.Plo ./$(DEPDIR)/htsrobots.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htsserver.Po ./$(DEPDIR)/htsthread.Plo \ @AMDEP_TRUE@ ./$(DEPDIR)/htstools.Plo ./$(DEPDIR)/htsweb.Po \ @@ -314,12 +319,12 @@ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \ CCLD = $(CC) LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ -DIST_SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c \ - $(proxytrack_SOURCES) +DIST_SOURCES = $(libhtsjava_la_SOURCES) $(libhttrack_la_SOURCES) \ + $(htsserver_SOURCES) httrack.c $(proxytrack_SOURCES) DATA = $(DevIncludes_DATA) DIST_COMMON = $(srcdir)/Makefile.in Makefile.am -SOURCES = $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c $(proxytrack_SOURCES) +SOURCES = $(libhtsjava_la_SOURCES) $(libhttrack_la_SOURCES) $(htsserver_SOURCES) httrack.c $(proxytrack_SOURCES) all: all-am @@ -358,6 +363,8 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done +libhtsjava.la: $(libhtsjava_la_OBJECTS) $(libhtsjava_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libhtsjava_la_LDFLAGS) $(libhtsjava_la_OBJECTS) $(libhtsjava_la_LIBADD) $(LIBS) libhttrack.la: $(libhttrack_la_OBJECTS) $(libhttrack_la_DEPENDENCIES) $(LINK) -rpath $(libdir) $(libhttrack_la_LDFLAGS) $(libhttrack_la_OBJECTS) $(libhttrack_la_LIBADD) $(LIBS) binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) @@ -445,7 +452,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmms.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsmodules.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsname.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsnostatic.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsparse.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsrobots.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsserver.Po@am__quote@ diff --git a/src/hts-indextmpl.h b/src/hts-indextmpl.h index b9aff10..24706d5 100644 --- a/src/hts-indextmpl.h +++ b/src/hts-indextmpl.h @@ -174,7 +174,7 @@ regen: " <BR>"LF\ " <BR>"LF\ " <H6 ALIGN=\"RIGHT\">"LF\ - " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2006]</I>"LF\ + " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2007]</I>"LF\ " </H6>"LF\ " %s"LF\ " <!-- Thanks for using HTTrack Website Copier! -->"LF\ @@ -193,7 +193,7 @@ regen: ""LF\ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -324,7 +324,7 @@ regen: " </TABLE>"LF\ " <BR>"LF\ " <H6 ALIGN=\"RIGHT\">"LF\ - " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2006]</I>"LF\ + " <I>Mirror and index made by HTTrack Website Copier [XR&CO'2007]</I>"LF\ " </H6>"LF\ " %s"LF\ " <!-- Thanks for using HTTrack Website Copier! -->"LF\ @@ -342,7 +342,7 @@ regen: ""LF\ "<table width=\"76%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -483,7 +483,7 @@ regen: ""LF\ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ @@ -620,7 +620,7 @@ regen: ""LF\ "<table width=\"76%%\" height=\"100%%\" border=\"0\" align=\"center\" valign=\"bottom\" cellspacing=\"0\" cellpadding=\"0\">"LF\ " <tr>"LF\ - " <td id=\"footer\"><small>© 2006 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ + " <td id=\"footer\"><small>© 2007 Xavier Roche & other contributors - Web Design: Kauler Leto.</small></td>"LF\ " </tr>"LF\ "</table>"LF\ ""LF\ diff --git a/src/htsalias.c b/src/htsalias.c index c6bfef4..5845837 100644 --- a/src/htsalias.c +++ b/src/htsalias.c @@ -66,7 +66,7 @@ void hts_lowcase(char* s); } \ argv[0]=(buff+ptr); \ strcpybuff(argv[0],token); \ - ptr += (strlen(argv[0])+1); \ + ptr += (int) (strlen(argv[0])+1); \ argc++ // END OF COPY OF cmdl_ins in htsmain.c @@ -172,6 +172,7 @@ const char* hts_optalias[][4] = { {"display","-%v","single","show files transfered and other funny realtime information"}, {"dos83","-L0","single",""}, {"iso9660","-L2","single",""}, + {"disable-module","-%w","param1",""}, /* */ /* DEPRECATED */ @@ -193,7 +194,7 @@ const char* hts_optalias[][4] = { {"advanced-maxlinks","-#L","single",""}, {"advanced-progressinfo","-#p","single","deprecated"}, {"catch-url","-#P","single","catch complex URL through proxy"}, - {"debug-oldftp","-#R","single",""}, + /*{"debug-oldftp","-#R","single",""},*/ {"debug-xfrstats","-#T","single",""}, {"advanced-wait","-#u","single",""}, {"debug-ratestats","-#Z","single",""}, @@ -544,11 +545,11 @@ char* hts_gethome(void) { } /* Convert ~/foo into /home/smith/foo */ -void expand_home(char* str) { - if (str[0] == '~') { +void expand_home(String *str) { + if (StringSub(*str, 1) == '~') { char BIGSTK tempo[HTS_URLMAXSIZE*2]; - strcpybuff(tempo,hts_gethome()); - strcatbuff(tempo,str+1); - strcpybuff(str,tempo); + strcpybuff(tempo, hts_gethome()); + strcatbuff(tempo, StringBuff(*str) + 1); + StringCopy(*str, tempo); } } diff --git a/src/htsalias.h b/src/htsalias.h index 21c3142..bf52f3b 100644 --- a/src/htsalias.h +++ b/src/htsalias.h @@ -55,7 +55,7 @@ const char* optalias_value(int p); const char* opttype_value(int p); const char* opthelp_value(int p); char* hts_gethome(void); -void expand_home(char* str); +void expand_home(String *str); #endif #endif diff --git a/src/htsback.c b/src/htsback.c index 8a9aac5..2f06b09 100644 --- a/src/htsback.c +++ b/src/htsback.c @@ -38,16 +38,16 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE -#include "htsback.h" - /* specific definitions */ -#include "htsbase.h" #include "htsnet.h" +#include "htscore.h" #include "htsthread.h" #include <time.h> /* END specific definitions */ -//#if HTS_WIN +#include "htsback.h" + +//#ifdef _WIN32 #include "htsftp.h" #if HTS_USEZLIB #include "htszlib.h" @@ -56,7 +56,7 @@ Please visit our Website: http://www.httrack.com #endif //#endif -#if HTS_WIN +#ifdef _WIN32 #ifndef __cplusplus // DOS #ifndef _WIN32_WCE @@ -71,22 +71,28 @@ Please visit our Website: http://www.httrack.com #endif #undef test_flush -#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } } +#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->log) { fflush(opt->log); } } #define VT_CLREOL "\33[K" +/* Slot operations */ +static int slot_can_be_cached_on_disk(const lien_back* back); +static int slot_can_be_cleaned(const lien_back* back); +static int slot_can_be_finalized(httrackp* opt, const lien_back* back); + struct_back* back_new(int back_max) { int i; struct_back* sback = calloct(1, sizeof(struct_back)); sback->count = back_max; sback->lnk = (lien_back*) calloct((back_max + 1), sizeof(lien_back)); - sback->ready = inthash_new(8191); + sback->ready = inthash_new(32767); + sback->ready_size_bytes = 0; inthash_value_is_malloc(sback->ready, 1); // init for(i = 0 ; i < sback->count ; i++){ sback->lnk[i].r.location = sback->lnk[i].location_buffer; - sback->lnk[i].status = -1; + sback->lnk[i].status = STATUS_FREE; sback->lnk[i].r.soc = INVALID_SOCKET; } return sback; @@ -99,7 +105,8 @@ void back_free(struct_back** sback) { (*sback)->lnk = NULL; } if ((*sback)->ready != NULL) { - inthash_delete((inthash *)&(*sback)->ready); + inthash_delete(&(*sback)->ready); + (*sback)->ready_size_bytes = 0; } freet(*sback); *sback = NULL; @@ -115,15 +122,23 @@ void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback) { } // delete stored slots if (sback->ready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { - struct_back back1; - back1.count = 1; - back1.lnk = (lien_back*) item->value.ptr; - back1.ready = NULL; - back_delete(opt, cache, &back1, 0); +#ifndef HTS_NO_BACK_ON_DISK + char *filename = (char*) item->value.ptr; + if (filename != NULL) { + (void) unlink(filename); + } +#else + /* clear entry content (but not yet the entry) */ + lien_back *back = (lien_back*) item->value.ptr; + back_clear_entry(back); +#endif } + /* delete hashtable & content */ + inthash_delete(&sback->ready); + sback->ready_size_bytes = 0; } } } @@ -131,64 +146,113 @@ void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback) { // --- // routines de backing -static int back_index_ready(struct_back* sback, char* adr, char* fil, char* sav, int getIndex); -static int back_index_fetch(struct_back* sback, char* adr, char* fil, char* sav, int getIndex); +static int back_index_ready(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex); +static int back_index_fetch(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex); // retourne l'index d'un lien dans un tableau de backing -int back_index(struct_back* sback,char* adr,char* fil,char* sav) { - return back_index_fetch(sback, adr, fil, sav, 1); +int back_index(httrackp* opt, struct_back* sback,char* adr,char* fil,char* sav) { + return back_index_fetch(opt,sback, adr, fil, sav, 1); } -static int back_index_fetch(struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { +static int back_index_fetch(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { lien_back* const back = sback->lnk; const int back_max = sback->count; - int i=0; int index=-1; - while( i < back_max ) { - if (back[i].status>=0) // réception OU prêt - if (strfield2(back[i].url_adr,adr)) { - if (strcmp(back[i].url_fil,fil)==0) { - if (index==-1) /* first time we meet, store it */ - index=i; - else if (sav != NULL && strcmp(back[i].url_sav, sav) == 0) { /* oops, check sav too */ - index=i; - return index; - } - } + int i; + for( i = 0 ; i < back_max ; i++ ) { + if (back[i].status >= 0 /* not free or alive */ + && strfield2(back[i].url_adr,adr) + && strcmp(back[i].url_fil,fil)==0) + { + if (index==-1) /* first time we meet, store it */ + index=i; + else if (sav != NULL && strcmp(back[i].url_sav, sav) == 0) { /* oops, check sav too */ + index=i; + return index; } - i++; + } } // not found in fast repository - search in the storage hashtable if (index == -1 && sav != NULL) { - index = back_index_ready(sback, adr, fil, sav, getIndex); + index = back_index_ready(opt, sback, adr, fil, sav, getIndex); } return index; } -static int back_index_ready(struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { +/* resurrect stored entry */ +static int back_index_ready(httrackp* opt, struct_back* sback, char* adr, char* fil, char* sav, int getIndex) { lien_back* const back = sback->lnk; - const int back_max = sback->count; - int index=-1; void* ptr = NULL; - if (inthash_read_pvoid((inthash)sback->ready, sav, &ptr)) { - lien_back* itemback = (lien_back*) ptr; - if (itemback != NULL) { - if (!getIndex) { - return sback->count; // positive (but invalid) result + if (inthash_read_pvoid(sback->ready, sav, &ptr)) { + if (!getIndex) { /* don't "pagefault" the entry */ + if (ptr != NULL) { + return sback->count; /* (invalid but) positive result */ + } else { + return -1; /* not found */ + } + } else if (ptr != NULL) { + lien_back* itemback = NULL; +#ifndef HTS_NO_BACK_ON_DISK + FILE *fp; + char* fileback = (char*) ptr; + char catbuff[CATBUFF_SIZE]; + if (( fp = fopen(fconv(catbuff, fileback), "rb") ) != NULL ) { + if (back_unserialize(fp, &itemback) != 0) { + if (itemback != NULL) { + back_clear_entry(itemback); + freet(itemback); + itemback = NULL; + } + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: unserialize error for %s%s (%s): %s"LF,adr,fil,sav,strerror(last_errno)); + test_flush; + } + } + fclose(fp); } else { - // move from hashtable to fast repository - int q = back_search_quick(sback); - if (q != -1) { - deletehttp(&back[q].r); // security check - back_move(itemback, &back[q]); - inthash_remove((inthash)sback->ready, sav); // delete item - back[q].locked = 1; /* locked */ - index = q; + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: unserialize error for %s%s (%s), file disappeared: %s"LF,adr,fil,sav,strerror(last_errno)); + test_flush; } } - } + (void) unlink(fileback); +#else + itemback = (lien_back*) ptr; +#endif + if (itemback != NULL) { + // move from hashtable to fast repository + int q = back_search(opt, sback); + if (q != -1) { + deletehttp(&back[q].r); // security check + back_move(itemback, &back[q]); + back_clear_entry(itemback); /* delete entry content */ + freet(itemback); /* delete item */ + itemback = NULL; + inthash_remove(sback->ready, sav); // delete item + sback->ready_size_bytes -= back[q].r.size; /* substract for stats */ + back_set_locked(sback, q); /* locked */ + return q; + } else { + if (opt->log != NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: unserialize error for %s%s (%s): no more space to wakeup frozen slots"LF,adr,fil,sav); + test_flush; + } + } + } + } } - return index; + return -1; +} + +static int slot_can_be_cached_on_disk(const lien_back* back) { + return + (back->status == STATUS_READY && back->locked == 0 + && back->url_sav[0] != '\0' + && strcmp(back->url_sav, BACK_ADD_TEST) != 0 + ); + /* Note: not checking !IS_DELAYED_EXT(back->url_sav) or it will quickly cause the slots to be filled! */ } /* Put all backing entries that are ready in the storage hashtable to spare space and CPU */ @@ -199,19 +263,15 @@ int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback) int i; for( i = 0 ; i < back_max ; i++ ) { // ready, not locked and suitable - if (back[i].status == 0 && back[i].locked == 0 - && back[i].url_sav[0] != '\0' - && strcmp(back[i].url_sav, BACK_ADD_TEST) != 0 - && !IS_DELAYED_EXT(back[i].url_sav) - ) - { - lien_back* itemback = calloct(1, sizeof(lien_back)); + if (slot_can_be_cached_on_disk(&back[i])) { +#ifdef HTS_NO_BACK_ON_DISK + lien_back* itemback; +#endif /* Security check */ - int checkIndex = back_index_ready(sback, back[i].url_adr, back[i].url_fil, back[i].url_sav, 1); + int checkIndex = back_index_ready(opt, sback, back[i].url_adr, back[i].url_fil, back[i].url_sav, 1); if (checkIndex != -1) { if (opt->log) { - fspc(opt->log,"warning"); - fprintf(opt->log,"engine: unexpected duplicate file entry: %s%s -> %s (%d '%s') / %s%s -> %s (%d '%s')"LF, + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"engine: unexpected duplicate file entry: %s%s -> %s (%d '%s') / %s%s -> %s (%d '%s')"LF, back[checkIndex].url_adr, back[checkIndex].url_fil, back[checkIndex].url_sav, back[checkIndex].r.statuscode, back[checkIndex].r.msg, back[i].url_adr, back[i].url_fil, back[i].url_sav, back[i].r.statuscode, back[i].r.msg ); @@ -222,10 +282,67 @@ int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback) /* This should NOT happend! */ { int duplicateEntryInBacklog = 1; assertf(!duplicateEntryInBacklog); } #endif - } + } +#ifndef HTS_NO_BACK_ON_DISK + /* temporarily serialize the entry on disk */ + { + int fsz = (int) strlen(back[i].url_sav); + char *filename = malloc(fsz + 8 + 1); + if (filename != NULL) { + FILE *fp; + if (opt->getmode != 0) { + sprintf(filename, "%s.tmp", back[i].url_sav); + } else { + sprintf(filename, "%stmpfile%d.tmp", StringBuff(opt->path_html), opt->state.tmpnameid++); + } + /* Security check */ + if (fexist(filename)) { + if (opt->log != NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: temporary file %s already exists"LF, filename); + test_flush; + } + } + /* Create file and serialize slot */ + if ((fp = filecreate(NULL, filename)) != NULL) + { + if (back_serialize(fp, &back[i]) == 0) + { + inthash_add_pvoid(sback->ready, back[i].url_sav, filename); + filename = NULL; + sback->ready_size_bytes += back[i].r.size; /* add for stats */ + nclean++; + back_clear_entry(&back[i]); /* entry is now recycled */ + } else { + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: write error: %s"LF,back[i].url_adr,back[i].url_fil,filename,strerror(last_errno)); + test_flush; + } + } + fclose(fp); + } else { + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: open error: %s (%s, %s)"LF, back[i].url_adr, back[i].url_fil, filename, strerror(last_errno), dir_exists(filename) ? "directory exists" : "directory does NOT exist!", fexist(filename) ? "file already exists!" : "file does not exist"); + test_flush; + } + } + if (filename != NULL) + free(filename); + } else { + if (opt->log != NULL) { + int last_errno = errno; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: serialize error for %s%s to %s: memory full: %s"LF,back[i].url_adr,back[i].url_fil,filename,strerror(last_errno)); + test_flush; + } + } + } +#else + itemback = calloct(1, sizeof(lien_back)); back_move(&back[i], itemback); - inthash_add_pvoid((inthash)sback->ready, itemback->url_sav, itemback); + inthash_add_pvoid(sback->ready, itemback->url_sav, itemback); nclean++; +#endif } } return nclean; @@ -238,7 +355,7 @@ int back_available(struct_back* sback) { int i; int nb=0; for(i=0;i<back_max;i++) - if (back[i].status==-1) /* libre */ + if (back[i].status==STATUS_FREE) /* libre */ nb++; return nb; } @@ -254,8 +371,9 @@ LLint back_incache(struct_back* sback) { if (back[i].r.adr) // ne comptabilier que les blocs en mémoire sum+=max(back[i].r.size,back[i].r.totalsize); // stored (ready) slots +#ifdef HTS_NO_BACK_ON_DISK if (sback->ready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { lien_back* ritem = (lien_back*) item->value.ptr; @@ -264,6 +382,7 @@ LLint back_incache(struct_back* sback) { sum+=max(ritem->r.size,ritem->r.totalsize); } } +#endif return sum; } @@ -272,27 +391,31 @@ int back_done_incache(struct_back* sback) { lien_back* const back = sback->lnk; const int back_max = sback->count; int i; - int n=0; - for(i=0;i<back_max;i++) - if (back[i].status==0) + int n = 0; + for(i = 0 ; i < back_max ; i++) + if (back[i].status == STATUS_READY) n++; // stored (ready) slots if (sback->ready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); +#ifndef HTS_NO_BACK_ON_DISK + n += inthash_nitems(sback->ready); +#else + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { lien_back* ritem = (lien_back*) item->value.ptr; - if (ritem->status==0) + if (ritem->status==STATUS_READY) n++; } +#endif } return n; } // le lien a-t-il été mis en backing? -HTS_INLINE int back_exist(struct_back* sback,char* adr,char* fil,char* sav) { - return (back_index_fetch(sback, adr, fil, sav, /*don't fetch*/0) >= 0); +HTS_INLINE int back_exist(struct_back* sback,httrackp* opt,char* adr,char* fil,char* sav) { + return (back_index_fetch(opt, sback, adr, fil, sav, /*don't fetch*/0) >= 0); } // nombre de sockets en tâche de fond @@ -313,7 +436,7 @@ int back_nsoc_overall(struct_back* sback) { int n=0; int i; for(i=0;i<back_max;i++) - if (back[i].status > 0 || back[i].status == -103) + if (back[i].status > 0 || back[i].status == STATUS_ALIVE) n++; return n; @@ -324,6 +447,7 @@ int back_nsoc_overall(struct_back* sback) { // fermer les paramètres de transfert, // et notamment vérifier les fichiers compressés (décompresser), callback etc. int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { + char catbuff[CATBUFF_SIZE]; lien_back* const back = sback->lnk; const int back_max = sback->count; assertf(p >= 0 && p < back_max); @@ -338,9 +462,9 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } if ( - (back[p].status == 0) // ready + (back[p].status == STATUS_READY) // ready && - (back[p].r.statuscode>0) // not internal error + (back[p].r.statuscode > 0) // not internal error ) { if (!back[p].testmode) { // not test mode @@ -361,7 +485,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { back[p].r.out=fopen(back[p].tmpfile,"wb"); if (back[p].r.out) { if ((back[p].r.adr) && (back[p].r.size>0)) { - if (fwrite(back[p].r.adr,1,(INTsys)back[p].r.size,back[p].r.out) != back[p].r.size) { + if (fwrite(back[p].r.adr,1,(size_t)back[p].r.size,back[p].r.out) != back[p].r.size) { back[p].r.statuscode=STATUSCODE_INVALID; strcpybuff(back[p].r.msg,"Write error when decompressing"); } @@ -386,8 +510,8 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { if (back[p].tmpfile != NULL && back[p].tmpfile[0] != '\0') { if (back[p].url_sav[0]) { LLint size; - file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 1, 1, back[p].r.notmodified); - filecreateempty(back[p].url_sav); // filenote & co + file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 1, 1, back[p].r.notmodified); + filecreateempty(&opt->state.strc, back[p].url_sav); // filenote & co if ((size = hts_zunpack(back[p].tmpfile,back[p].url_sav))>=0) { back[p].r.size=back[p].r.totalsize=size; // fichier -> mémoire @@ -426,10 +550,10 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { REAL MEDIA HACK Check if we have to load locally the file ************************************************************************ */ - if (back[p].r.statuscode == 200) { // OK (ou 304 en backing) + if (back[p].r.statuscode == HTTP_OK) { // OK (ou 304 en backing) if (back[p].r.is_write) { // Written file - if (may_be_hypertext_mime(back[p].r.contenttype, back[p].url_fil)) { // to parse! - LLint sz; + if (may_be_hypertext_mime(opt,back[p].r.contenttype, back[p].url_fil)) { // to parse! + off_t sz; sz=fsize(back[p].url_sav); if (sz>0) { // ok, exists! if (sz < 8192) { // ok, small file --> to parse! @@ -437,7 +561,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { if (fp) { back[p].r.adr=malloct((int)sz + 2); if (back[p].r.adr) { - if (fread(back[p].r.adr,1,(INTsys)sz,fp) == sz) { + if (fread(back[p].r.adr,1,sz,fp) == sz) { back[p].r.size=sz; back[p].r.adr[sz] = '\0'; back[p].r.is_write = 0; /* not anymore a direct-to-disk file */ @@ -451,7 +575,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { fclose(fp); fp=NULL; // remove (temporary) file! - unlink(fconv(back[p].url_sav)); + unlink(fconv(catbuff,back[p].url_sav)); } if (fp) fclose(fp); @@ -512,7 +636,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { fprintf(cache->txt,LLintP,(LLint)back[p].r.totalsize); fprintf(cache->txt,"\t%s\t",flags); } - if (back[p].r.statuscode == 200) { + if (back[p].r.statuscode == HTTP_OK) { if (back[p].r.size>=0) { if (strcmp(back[p].url_fil,"/robots.txt") !=0 ) { HTS_STAT.stat_bytes+=back[p].r.size; @@ -521,7 +645,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { if ( (!back[p].r.notmodified) && (opt->is_update) ) { HTS_STAT.stat_updated_files++; // page modifiée if (opt->log!=NULL) { - fspc(opt->log,"info"); + HTS_LOG(opt,LOG_INFO); if (back[p].is_update) { fprintf(opt->log,"engine: transfer-status: link updated: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); } else { @@ -538,7 +662,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: transfer-status: link recorded: %s%s -> %s"LF,back[p].url_adr,back[p].url_fil,back[p].url_sav); test_flush; } if (cache->txt) { @@ -550,7 +674,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: transfer-status: empty file? (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); test_flush; } if (cache->txt) { @@ -559,7 +683,7 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } } else { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: transfer-status: link error (%d, '%s'): %s%s"LF,back[p].r.statuscode,back[p].r.msg,back[p].url_adr,back[p].url_fil); } if (cache->txt) { state="error"; @@ -575,11 +699,11 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { "(from %s%s%s)" LF, back[p].r.statuscode, - state, escape_check_url_addr(back[p].r.msg), - escape_check_url_addr(back[p].r.contenttype), - ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr((back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)), - (link_has_authority(back[p].url_adr) ? "" : "http://"),escape_check_url_addr(back[p].url_adr),escape_check_url_addr(back[p].url_fil),escape_check_url_addr(back[p].url_sav), - (link_has_authority(back[p].referer_adr) || !back[p].referer_adr[0]) ? "" : "http://",escape_check_url_addr(back[p].referer_adr),escape_check_url_addr(back[p].referer_fil) + state, escape_check_url_addr(OPT_GET_BUFF(opt),back[p].r.msg), + escape_check_url_addr(OPT_GET_BUFF(opt),back[p].r.contenttype), + ((back[p].r.etag[0])?"etag:":((back[p].r.lastmodified[0])?"date:":"")), escape_check_url_addr(OPT_GET_BUFF(opt),(back[p].r.etag[0])?back[p].r.etag:(back[p].r.lastmodified)), + (link_has_authority(back[p].url_adr) ? "" : "http://"),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].url_adr),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].url_fil),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].url_sav), + (link_has_authority(back[p].referer_adr) || !back[p].referer_adr[0]) ? "" : "http://",escape_check_url_addr(OPT_GET_BUFF(opt),back[p].referer_adr),escape_check_url_addr(OPT_GET_BUFF(opt),back[p].referer_fil) ); if (opt->flush) fflush(cache->txt); @@ -591,23 +715,21 @@ int back_finalize(httrackp* opt,cache_back* cache,struct_back* sback,int p) { } else { if (!HTTP_IS_OK(back[p].r.statuscode)) { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"redirect to %s%s"LF,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"redirect to %s%s"LF,back[p].url_adr,back[p].url_fil); } /* Store only header reference */ cache_mayadd(opt,cache,&back[p].r,back[p].url_adr,back[p].url_fil,NULL); } else { + /* Partial file, but marked as "ok" ? */ if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"file not stored in cache due to bogus state (incomplete type): %s%s"LF,back[p].url_adr,back[p].url_fil); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"file not stored in cache due to bogus state (incomplete type): %s%s"LF,back[p].url_adr,back[p].url_fil); } } } // status finished callback -#if HTS_ANALYSTE - if (hts_htmlcheck_xfrstatus != NULL) { - hts_htmlcheck_xfrstatus(&back[p]); - } -#endif + RUN_CALLBACK1(opt, xfrstatus, &back[p]); + return 0; } else { // testmode if (back[p].r.statuscode / 100 >= 3) { /* Store 3XX, 4XX, 5XX test response codes, but NOT 2XX */ @@ -670,7 +792,7 @@ void back_move(lien_back* src, lien_back* dst) { memcpy(dst, src, sizeof(lien_back)); memset(src, 0, sizeof(lien_back)); src->r.soc=INVALID_SOCKET; - src->status=-1; + src->status=STATUS_FREE; src->r.location = src->location_buffer; dst->r.location = dst->location_buffer; } @@ -688,6 +810,80 @@ void back_copy_static(const lien_back* src, lien_back* dst) { #endif } +static int back_data_serialize(FILE *fp, const void *data, size_t size) { + if ( fwrite(&size, 1, sizeof(size), fp) == sizeof(size) + && ( size == 0 || fwrite(data, 1, size, fp) == size ) + ) + return 0; + return 1; /* error */ +} + +static int back_string_serialize(FILE *fp, const char *str) { + size_t size = ( str != NULL ) ? ( strlen(str) + 1 ) : 0; + return back_data_serialize(fp, str, size); +} + +static int back_data_unserialize(FILE *fp, void **str, size_t *size) { + *str = NULL; + if (fread(size, 1, sizeof(*size), fp) == sizeof(*size)) { + if (*size == 0) /* serialized NULL ptr */ + return 0; + *str = malloct(*size + 1); + if (*str == NULL) + return 1; /* error */ + ((char*) *str)[*size] = 0; /* guard byte */ + if (fread(*str, 1, *size, fp) == *size) + return 0; + } + return 1; /* error */ +} + +static int back_string_unserialize(FILE *fp, char **str) { + size_t dummy; + return back_data_unserialize(fp, (void**) str, &dummy); +} + +int back_serialize(FILE *fp, const lien_back* src) { + if (back_data_serialize(fp, src, sizeof(lien_back)) == 0 + && back_data_serialize(fp, src->r.adr, src->r.adr ? (size_t)src->r.size : 0) == 0 + && back_string_serialize(fp, src->r.headers) == 0 + && fflush(fp) == 0) + return 0; + return 1; +} + +int back_unserialize(FILE *fp, lien_back** dst) { + size_t size; + *dst = NULL; + errno = 0; + if (back_data_unserialize(fp, (void**) dst, &size) == 0 && size == sizeof(lien_back)) { + (*dst)->tmpfile = NULL; + (*dst)->chunk_adr = NULL; + (*dst)->r.adr = NULL; + (*dst)->r.out = NULL; + (*dst)->r.location = (*dst)->location_buffer; + (*dst)->r.fp = NULL; +#if HTS_USEOPENSSL + (*dst)->r.ssl_con = NULL; +#endif + if (back_data_unserialize(fp, (void**) &(*dst)->r.adr, &size) == 0) + { + (*dst)->r.size = size; + (*dst)->r.headers = NULL; + if (back_string_unserialize(fp, &(*dst)->r.headers) == 0) + return 0; /* ok */ + if ((*dst)->r.headers != NULL) + freet((*dst)->r.headers); + } + if ((*dst)->r.adr != NULL) + freet((*dst)->r.adr); + } + if (dst != NULL) + freet(dst); + *dst = NULL; + return 1; /* error */ +} + // clear, or leave for keep-alive int back_maydelete(httrackp* opt,cache_back* cache,struct_back* sback, int p) { lien_back* const back = sback->lnk; @@ -712,9 +908,9 @@ int back_maydelete(httrackp* opt,cache_back* cache,struct_back* sback, int p) { strcpybuff(tmp.url_adr, back[p].url_adr); if (back_letlive(opt, cache, sback, p)) { strcpybuff(back[p].url_adr, tmp.url_adr); - back[p].status = -103; // alive & waiting + back[p].status = STATUS_ALIVE; // alive & waiting if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully saved #%d (%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): successfully saved #%d (%s)"LF, back[p].r.debugid, back[p].url_adr); test_flush; } @@ -754,7 +950,7 @@ void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, in /* Connection delay must not exceed keep-alive timeout */ && ( opt->maxconn <= 0 || ( back[p].r.keep_alive_t > ( 1.0 / opt->maxconn ) ) ) /* Available slot in backing */ - && ( q = back_search(opt, cache, sback) ) >= 0 + && ( q = back_search(opt, sback) ) >= 0 ) { lien_back tmp; @@ -764,9 +960,9 @@ void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, in back[q].ka_time_start = back[p].ka_time_start; // refresh back[p].r.soc = INVALID_SOCKET; strcpybuff(back[q].url_adr, tmp.url_adr); // address - back[q].status = -103; // alive & waiting + back[q].status = STATUS_ALIVE; // alive & waiting if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully preserved #%d (%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): successfully preserved #%d (%s)"LF, back[q].r.debugid, back[q].url_adr); test_flush; } @@ -783,13 +979,13 @@ int back_trylive(httrackp* opt,cache_back* cache,struct_back* sback, int p) { lien_back* const back = sback->lnk; const int back_max = sback->count; assertf(p >= 0 && p < back_max); - if (p>=0 && back[p].status != -103) { // we never know.. + if (p>=0 && back[p].status != STATUS_ALIVE) { // we never know.. int i = back_searchlive(opt,sback, back[p].url_adr); // search slot if (i >= 0 && i != p) { deletehttp(&back[p].r); // security check back_connxfr(&back[i].r, &back[p].r); // transfer live connection settings from i to p - back_delete(opt,cache,sback, i); // delete old slot - back[p].status=100; // ready to connect + back_delete(opt,cache,sback, i); // delete old slot + back[p].status=STATUS_CONNECTING; // ready to connect return 1; // success: will reuse live connection } } @@ -804,7 +1000,7 @@ int back_searchlive(httrackp* opt, struct_back* sback, char* search_addr) { /* search for a live socket */ for(i = 0 ; i < back_max ; i++ ) { - if (back[i].status == -103) { + if (back[i].status == STATUS_ALIVE) { if (strfield2(back[i].url_adr, search_addr)) { /* same location (xxc: check also virtual hosts?) */ if (time_local() < back[i].ka_time_start + back[i].r.keep_alive_t) { return i; @@ -822,7 +1018,7 @@ int back_search_quick(struct_back* sback) { /* try to find an empty place */ for(i = 0 ; i < back_max ; i++ ) { - if (back[i].status == -1) { + if (back[i].status == STATUS_FREE) { return i; } } @@ -831,7 +1027,7 @@ int back_search_quick(struct_back* sback) { return -1; } -int back_search(httrackp* opt,cache_back* cache,struct_back* sback) { +int back_search(httrackp* opt,struct_back* sback) { lien_back* const back = sback->lnk; const int back_max = sback->count; int i; @@ -842,9 +1038,11 @@ int back_search(httrackp* opt,cache_back* cache,struct_back* sback) { /* couldn't find an empty place, try to requisition a keep-alive place */ for(i = 0 ; i < back_max ; i++ ) { - if (back[i].status == -103) { + if (back[i].status == STATUS_ALIVE) { + lien_back* const back = sback->lnk; /* close this place */ - back_delete(opt,cache,sback, i); + back_clear_entry(&back[i]); /* Already finalized (this is the night of the living dead) */ + /*back_delete(opt,cache,sback, i);*/ return i; } } @@ -859,7 +1057,7 @@ void back_set_finished(struct_back* sback, int p) { assertf(p >= 0 && p < back_max); if (p >= 0 && p < sback->count) { // we never know.. /* status: finished (waiting to be validated) */ - back[p].status=0; /* finished */ + back[p].status=STATUS_READY; /* finished */ /* close open r/w streams, if any */ if (back[p].r.fp!=NULL) { fclose(back[p].r.fp); @@ -872,6 +1070,26 @@ void back_set_finished(struct_back* sback, int p) { } } +void back_set_locked(struct_back* sback, int p) { + lien_back* const back = sback->lnk; + const int back_max = sback->count; + assertf(p >= 0 && p < back_max); + if (p >= 0 && p < sback->count) { + /* status: locked (in process, do not swap on disk) */ + back[p].locked = 1; /* locked */ + } +} + +void back_set_unlocked(struct_back* sback, int p) { + lien_back* const back = sback->lnk; + const int back_max = sback->count; + assertf(p >= 0 && p < back_max); + if (p >= 0 && p < sback->count) { + /* status: unlocked (can be swapped on disk) */ + back[p].locked = 0; /* unlocked */ + } +} + int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int p) { lien_back* const back = sback->lnk; const int back_max = sback->count; @@ -906,18 +1124,6 @@ int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int } // effacer entrée -int back_set_passe2_ptr(httrackp* opt, cache_back* cache, struct_back* sback, int p, int* pass2_ptr) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; - assertf(p >= 0 && p < back_max); - if (p >= 0 && p < sback->count) { // on sait jamais.. - back[p].pass2_ptr = pass2_ptr; - return 1; - } - return 0; -} - -// effacer entrée int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) { lien_back* const back = sback->lnk; const int back_max = sback->count; @@ -934,14 +1140,14 @@ int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) { // Finalize if (!back[p].finalized) { if ( - (back[p].status == 0) // ready + (back[p].status == STATUS_READY) // ready && (!back[p].testmode) // not test mode && (back[p].r.statuscode>0) // not internal error ) { if (opt != NULL && opt->debug>1 && opt->log!=NULL) { - fspc(opt->log,"debug"); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s%s' -> %s not yet saved in cache - saving now"LF, back[p].url_adr, back[p].url_fil, back[p].url_sav); test_flush; } } if (cache != NULL) { @@ -952,46 +1158,70 @@ int back_delete(httrackp* opt, cache_back* cache, struct_back* sback, int p) { // flush output buffers (void) back_flush_output(opt, cache, sback, p); - + + return back_clear_entry(&back[p]); + } + return 0; +} + +/* ensure that the entry is not locked */ +void back_index_unlock(struct_back* sback, int p) { + lien_back* const back = sback->lnk; + if (back[p].locked) { + back[p].locked = 0; /* not locked anymore */ + } +} + +/* the entry is available again */ +static void back_set_free(lien_back* back) { + back->locked = 0; + back->status = STATUS_FREE; +} + +/* delete entry content (clear the entry), but don't unallocate the entry itself */ +int back_clear_entry(lien_back* back) { + if (back != NULL) { // Libérer tous les sockets, handles, buffers.. - if (back[p].r.soc!=INVALID_SOCKET) { + if (back->r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_delete: deletehttp\n"); #endif - deletehttp(&back[p].r); - back[p].r.soc=INVALID_SOCKET; + deletehttp(&back->r); + back->r.soc=INVALID_SOCKET; } - if (back[p].r.adr!=NULL) { // reste un bloc à désallouer - freet(back[p].r.adr); - back[p].r.adr=NULL; + if (back->r.adr!=NULL) { // reste un bloc à désallouer + freet(back->r.adr); + back->r.adr=NULL; } - if (back[p].chunk_adr!=NULL) { // reste un bloc à désallouer - freet(back[p].chunk_adr); - back[p].chunk_adr=NULL; - back[p].chunk_size=0; - back[p].chunk_blocksize=0; - back[p].is_chunk=0; + if (back->chunk_adr!=NULL) { // reste un bloc à désallouer + freet(back->chunk_adr); + back->chunk_adr=NULL; + back->chunk_size=0; + back->chunk_blocksize=0; + back->is_chunk=0; } // only for security - if (back[p].tmpfile && back[p].tmpfile[0] != '\0') { - (void) unlink(back[p].tmpfile); - back[p].tmpfile = NULL; + if (back->tmpfile && back->tmpfile[0] != '\0') { + (void) unlink(back->tmpfile); + back->tmpfile = NULL; } // headers - if (back[p].r.headers != NULL) { - freet(back[p].r.headers); - back[p].r.headers = NULL; + if (back->r.headers != NULL) { + freet(back->r.headers); + back->r.headers = NULL; } // Tout nettoyer - memset(&back[p], 0, sizeof(lien_back)); - back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer; + memset(back, 0, sizeof(lien_back)); + back->r.soc = INVALID_SOCKET; + back->r.location = back->location_buffer; // Le plus important: libérer le champ - back[p].status=-1; + back_set_free(back); + return 1; } return 0; @@ -1003,37 +1233,46 @@ int back_stack_available(struct_back* sback) { const int back_max = sback->count; int p=0,n=0; for( ; p < back_max ; p++ ) - if ( back[p].status == -1 ) + if ( back[p].status == STATUS_FREE ) n++; return n; } // ajouter un lien en backing -int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) { - int index = back_index(sback, adr, fil, save); - if (index < 0) { - return back_add(sback, opt, cache, adr, fil, save, referer_adr, referer_fil, test, pass2_ptr); - } else { - /* Ensure that the reference to pass2_ptr is set */ - return back_set_passe2_ptr(opt,cache,sback,index,pass2_ptr); +int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test) { + back_clean(opt, cache, sback); /* first cleanup the backlog to ensure that we have some entry left */ + if (!back_exist(sback,opt,adr,fil,save)) { + return back_add(sback, opt, cache, adr, fil, save, referer_adr, referer_fil, test); } + return 0; } -int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr) { +int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test) { lien_back* const back = sback->lnk; const int back_max = sback->count; int p=0; + char catbuff[CATBUFF_SIZE]; + char catbuff2[CATBUFF_SIZE]; + +#if (defined(_DEBUG) || defined(DEBUG)) + if (!test && back_exist(sback,opt,adr,fil,save)) { + int already_there = 0; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"error: back_add(%s,%s,%s) duplicate"LF, adr, fil, save); + } + } +#endif // vérifier cohérence de adr et fil (non vide!) if (strnotempty(adr)==0) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: adr is empty for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"error: adr is empty for back_add"LF); } return -1; // erreur! } if (strnotempty(fil)==0) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: fil is empty for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"error: fil is empty for back_add"LF); } return -1; // erreur! } @@ -1044,7 +1283,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // rechercher emplacement back_clean(opt, cache, sback); - if ( ( p = back_search(opt, cache, sback) ) >= 0) { + if ( ( p = back_search(opt, sback) ) >= 0) { back[p].send_too[0]='\0'; // éventuels paramètres supplémentaires à transmettre au serveur // clear r @@ -1059,7 +1298,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* strcpybuff(back[p].url_adr,adr); strcpybuff(back[p].url_fil,fil); strcpybuff(back[p].url_sav,save); - back[p].pass2_ptr=pass2_ptr; + //back[p].links_index = links_index; // copier referer si besoin strcpybuff(back[p].referer_adr,""); strcpybuff(back[p].referer_fil,""); @@ -1094,24 +1333,25 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* if (opt->state.stop) { back[p].r.statuscode=STATUSCODE_INVALID; // fatal strcpybuff(back[p].r.msg,"mirror stopped by user"); - back[p].status=0; // terminé + back[p].status=STATUS_READY; // terminé back_set_finished(sback, p); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File not added due to mirror cancel: %s%s"LF,adr,fil); test_flush; } return 0; } // test "fast header" cache ; that is, tests we did that lead to 3XX/4XX/5XX response codes if (cache->cached_tests != NULL) { - long int ptr = 0; - if (inthash_read((inthash)cache->cached_tests, concat(adr, fil), (long int*)&ptr)) { // gotcha + intptr_t ptr = 0; + if (inthash_read(cache->cached_tests, concat(OPT_GET_BUFF(opt), adr, fil), &ptr)) { // gotcha if (ptr != 0) { char* text = (char*) ptr; char* lf = strchr(text, '\n'); int code = 0; if (sscanf(text, "%d", &code) == 1) { // got code back[p].r.statuscode=code; + back[p].status=STATUS_READY; // terminé if (lf != NULL && *lf != '\0') { // got location ? strcpybuff(back[p].r.location, lf + 1); } @@ -1126,7 +1366,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* && ( (!test) || (cache->type==1) ) /* cache prioritaire, laisser passer en test! */ && ( (strnotempty(save)) || (strcmp(fil,"/robots.txt")==0) ) ) { // si en test on ne doit pas utiliser le cache sinon telescopage avec le 302.. #if HTS_FAST_CACHE - long int hash_pos; + intptr_t hash_pos; int hash_pos_return=0; #else char* a=NULL; @@ -1139,7 +1379,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* char BIGSTK buff[HTS_URLMAXSIZE*4]; #if HTS_FAST_CACHE strcpybuff(buff,adr); strcatbuff(buff,fil); - hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); + hash_pos_return=inthash_read(cache->hashtable,buff,&hash_pos); #else buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n"); a=strstr(cache->use,buff); @@ -1153,7 +1393,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* #endif if (!test) { // non mode test #if HTS_FAST_CACHE - int pos=hash_pos; + uintptr_t pos=hash_pos; #else int pos=-1; a+=strlen(buff); @@ -1161,7 +1401,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* #endif if (pos<0) { // pas de mise en cache data, vérifier existence /* note: no check with IS_DELAYED_EXT() enabled - postcheck by client please! */ - if (!IS_DELAYED_EXT(save) && fsize(fconv(save)) <= 0) { // fichier existe pas ou est vide! + if (!IS_DELAYED_EXT(save) && fsize(fconv(catbuff,save)) <= 0) { // fichier existe pas ou est vide! int found=0; /* It is possible that the file has been moved due to changes in build structure */ @@ -1169,16 +1409,16 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* char BIGSTK previous_save[HTS_URLMAXSIZE*2]; previous_save[0] = '\0'; back[p].r = cache_readex(opt, cache, adr, fil, NULL, back[p].location_buffer, previous_save, 0); - if (previous_save[0] != '\0' && fexist(fconv(previous_save))) { - rename(fconv(previous_save), fconv(save)); - if (fexist(fconv(save))) { + if (previous_save[0] != '\0' && fexist(fconv(catbuff,previous_save))) { + rename(fconv(catbuff,previous_save), fconv(catbuff2,save)); + if (fexist(fconv(catbuff,save))) { found = 1; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File '%s' has been renamed since last mirror to '%s' ; applying changes"LF, previous_save, save); test_flush; } } else { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"error"); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush; + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Could not rename '%s' to '%s' ; will have to retransfer it"LF, previous_save, save); test_flush; } } } @@ -1194,11 +1434,11 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // sinon, le fichier est ok à priori, mais on renverra un if-modified-since pour // en être sûr if (opt->norecatch) { // tester norecatch - if (!fexist(fconv(save))) { // fichier existe pas mais déclaré: on l'a effacé - FILE* fp=fopen(fconv(save),"wb"); + if (!fexist(fconv(catbuff,save))) { // fichier existe pas mais déclaré: on l'a effacé + FILE* fp=fopen(fconv(catbuff,save),"wb"); if (fp) fclose(fp); if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File must have been erased by user, ignoring: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; } } } @@ -1235,7 +1475,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* /* Interdiction taille par le wizard? --> détruire */ if (back[p].r.statuscode != -1) { // pas d'erreur de lecture if (!back_checksize(opt,&back[p],0)) { - back[p].status=0; // FINI + back[p].status=STATUS_READY; // FINI back_set_finished(sback, p); back[p].r.statuscode=STATUSCODE_TOO_BIG; if (!back[p].testmode) @@ -1249,13 +1489,13 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* if (back[p].r.statuscode != -1 || IS_DELAYED_EXT(save)) { // pas d'erreur de lecture ou test retardé if ((opt->debug>0) && (opt->log!=NULL)) { if (!test) { - fspc(opt->log,"debug"); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File immediately loaded from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; } else { - fspc(opt->log,"debug"); fprintf(opt->log,"File immediately tested from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File immediately tested from cache: %s%s"LF,back[p].url_adr,back[p].url_fil); test_flush; } } back[p].r.notmodified=1; // fichier non modifié - back[p].status=0; // OK prêt + back[p].status=STATUS_READY; // OK prêt //file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); // not modified back_set_finished(sback, p); @@ -1284,7 +1524,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* if (!back_checksize(opt,&back[p],1)) { r.statuscode = STATUSCODE_INVALID; // - back[p].status=0; // FINI + back[p].status=STATUS_READY; // FINI back_set_finished(sback, p); back[p].r.statuscode=STATUSCODE_TOO_BIG; deletehttp(&back[p].r); back[p].r.soc=INVALID_SOCKET; @@ -1298,7 +1538,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* } if (r.statuscode != -1) { - if (r.statuscode==200) { // uniquement des 200 (OK) + if (r.statuscode==HTTP_OK) { // uniquement des 200 (OK) if (strnotempty(r.etag)) { // ETag (RFC2616) /* - If both an entity tag and a Last-Modified value have been @@ -1332,12 +1572,12 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // Pas dans le cache: fichier n'a pas été transféré du tout, donc pas sur disque? } else { if (fexist(save)) { // fichier existe? aghl! - LLint sz=fsize(save); + off_t sz=fsize(save); // Bon, là il est possible que le fichier ait été partiellement transféré // (s'il l'avait été en totalité il aurait été inscrit dans le cache ET existerait sur disque) // PAS de If-Modified-Since, on a pas connaissance des données à la date du cache // On demande juste les données restantes si le date est valide (206), tout sinon (200) - if ((ishtml(save) != 1) && (ishtml(back[p].url_fil)!=1)) { // NON HTML (liens changés!!) + if ((ishtml(opt,save) != 1) && (ishtml(opt,back[p].url_fil)!=1)) { // NON HTML (liens changés!!) if (sz>0) { // Fichier non vide? (question bête, sinon on transfert tout!) char lastmodified[256]; get_filetime_rfc822(save, lastmodified); @@ -1346,7 +1586,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* printf("..if unmodified since %s size "LLintP"\n", lastmodified, (LLint)sz); #endif if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File partially present ("LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } /* impossible - don't have etag or date @@ -1368,37 +1608,37 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* back[p].r.req.nocompression=1; } else { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Could not find timestamp for partially present file, restarting (lost "LLintP" bytes): %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } } } else { - if ((opt->debug>0) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"warning"); + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_WARNING); /* if (opt->http10) - fprintf(opt->errlog,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil); + fprintf(opt->log,"File partially present (%d bytes) retransfered due to HTTP/1.0 settings: %s%s"LF,sz,back[p].url_adr,back[p].url_fil); else */ - fprintf(opt->errlog,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); + fprintf(opt->log,"File partially present ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } /* Sinon requête normale... */ back[p].http11=0; } } else if (opt->norecatch) { // tester norecatch - filenote(save,NULL); // ne pas purger tout de même - file_notify(back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); - back[p].status=0; // OK prêt + filenote(&opt->state.strc,save,NULL); // ne pas purger tout de même + file_notify(opt,back[p].url_adr, back[p].url_fil, back[p].url_sav, 0, 0, back[p].r.notmodified); + back[p].status=STATUS_READY; // OK prêt back_set_finished(sback, p); back[p].r.statuscode=STATUSCODE_INVALID; // erreur strcpybuff(back[p].r.msg,"Null-size file not recaught"); return 0; } } else { - if ((opt->debug>0) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"warning"); - fprintf(opt->errlog,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_WARNING); + fprintf(opt->log,"HTML file ("LLintP" bytes) retransfered due to lack of cache: %s%s"LF,(LLint)sz,back[p].url_adr,back[p].url_fil); test_flush; } /* Sinon requête normale... */ @@ -1417,12 +1657,18 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // ne pas traiter ou recevoir l'en tête immédiatement memset(&(back[p].r), 0, sizeof(htsblk)); back[p].r.soc=INVALID_SOCKET; back[p].r.location=back[p].location_buffer; // recopier proxy - memcpy(&(back[p].r.req.proxy), &opt->proxy, sizeof(opt->proxy)); + if ((back[p].r.req.proxy.active = opt->proxy.active)) { + if (StringBuff(opt->proxy.bindhost) != NULL) + strcpybuff(back[p].r.req.proxy.bindhost, StringBuff(opt->proxy.bindhost)); + if (StringBuff(opt->proxy.name) != NULL) + strcpybuff(back[p].r.req.proxy.name, StringBuff(opt->proxy.name)); + back[p].r.req.proxy.port = opt->proxy.port; + } // et user-agent - strcpybuff(back[p].r.req.user_agent,opt->user_agent); - strcpybuff(back[p].r.req.referer,opt->referer); - strcpybuff(back[p].r.req.from,opt->from); - strcpybuff(back[p].r.req.lang_iso,opt->lang_iso); + strcpy(back[p].r.req.user_agent,StringBuff(opt->user_agent)); + strcpy(back[p].r.req.referer,StringBuff(opt->referer)); + strcpy(back[p].r.req.from,StringBuff(opt->from)); + strcpy(back[p].r.req.lang_iso,StringBuff(opt->lang_iso)); back[p].r.req.user_agent_send=opt->user_agent_send; // et http11 back[p].r.req.http11=back[p].http11; @@ -1432,22 +1678,21 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // mode ftp, court-circuit! if (strfield(back[p].url_adr,"ftp://")) { if (back[p].testmode) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with ftp link for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"error: forbidden test with ftp link for back_add"LF); } return -1; // erreur pas de test permis } if (!(back[p].r.req.proxy.active && opt->ftp_proxy)) { // connexion directe, gérée en thread - back[p].status=1000; // connexion ftp + FTPDownloadStruct *str = (FTPDownloadStruct*) malloc(sizeof(FTPDownloadStruct)); + str->pBack = &back[p]; + str->pOpt = opt; + /* */ + back[p].status=STATUS_FTP_TRANSFER; // connexion ftp #if USE_BEGINTHREAD - launch_ftp(&(back[p])); + launch_ftp(str); #else - { - char nid[32]; - sprintf(nid,"htsftp%d-in_progress.lock",p); - strcpybuff(back[p].location_buffer,fconcat(opt->path_log,nid)); - } - launch_ftp(&(back[p]),back[p].location_buffer,opt->exec); +#error Must have pthreads #endif return 0; } @@ -1456,15 +1701,15 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* else if (strfield(back[p].url_adr,"mms://")) { MMSDownloadStruct str; if (back[p].testmode) { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: forbidden test with mms link for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"error: forbidden test with mms link for back_add"LF); } return -1; // erreur pas de test permis } if (back[p].r.req.proxy.active) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"warning: direct connection for mms links (proxy settings ignored)"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"warning: direct connection for mms links (proxy settings ignored)"LF); } - back[p].status=1000; // connexion externe + back[p].status=STATUS_FTP_TRANSFER; // connexion externe str.pBack = &back[p]; str.pOpt = opt; launch_mms(&str); @@ -1484,16 +1729,16 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* #if HDEBUG printf("back_solve..\n"); #endif - back[p].status=101; // tentative de résolution du nom de host + back[p].status=STATUS_WAIT_DNS; // tentative de résolution du nom de host soc=INVALID_SOCKET; // pas encore ouverte - back_solve(&back[p]); // préparer - if (host_wait(&back[p])) { // prêt, par ex fichier ou dispo dans dns + back_solve(opt, &back[p]); // préparer + if (host_wait(opt, &back[p])) { // prêt, par ex fichier ou dispo dans dns #if HDEBUG printf("ok, dns cache ready..\n"); #endif - soc=http_xfopen(0,0,0,back[p].send_too,adr,fil,&(back[p].r)); + soc=http_xfopen(opt,0,0,0,back[p].send_too,adr,fil,&(back[p].r)); if (soc==INVALID_SOCKET) { - back[p].status=0; // fini, erreur + back[p].status=STATUS_READY; // fini, erreur back_set_finished(sback, p); } } @@ -1521,7 +1766,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* soc = back[p].r.soc; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): successfully linked #%d (for %s%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): successfully linked #%d (for %s%s)"LF, back[p].r.debugid, back[p].url_adr, back[p].url_fil); test_flush; } @@ -1543,7 +1788,7 @@ int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* // Note: on charge les code-page erreurs (erreur 404, etc) dans le cas où cela est // rattrapable (exemple: 301,302 moved xxx -> refresh sur la page!) - //if ((back[p].statuscode!=200) || (soc<0)) { // ERREUR HTTP/autre + //if ((back[p].statuscode!=HTTP_OK) || (soc<0)) { // ERREUR HTTP/autre #if CNXDEBUG printf("Xfopen ok, poll..\n"); @@ -1551,23 +1796,23 @@ printf("Xfopen ok, poll..\n"); #if HTS_XGETHOST if (soc!=INVALID_SOCKET) - if (back[p].status==101) { // pas d'erreur + if (back[p].status==STATUS_WAIT_DNS) { // pas d'erreur if (!back[p].r.is_file) - back[p].status=100; // connexion en cours + back[p].status=STATUS_CONNECTING; // connexion en cours else back[p].status=1; // fichier } #else if (soc==INVALID_SOCKET) { // erreur socket - back[p].status=0; // FINI + back[p].status=STATUS_READY; // FINI back_set_finished(sback, p); //if (back[p].soc!=INVALID_SOCKET) deletehttp(back[p].soc); back[p].r.soc=INVALID_SOCKET; } else { if (!back[p].r.is_file) #if HTS_XCONN - back[p].status=100; // connexion en cours + back[p].status=STATUS_CONNECTING; // connexion en cours #else back[p].status=99; // chargement en tête en cours #endif @@ -1589,8 +1834,45 @@ printf("Xfopen ok, poll..\n"); return 0; } else { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: no space left in stack for back_add"LF); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"error: no space left in stack for back_add"LF); + if ( ( opt->state.debug_state & 1 ) == 0 ) { /* debug_state<0> == debug 'no space left in stack' */ + int i; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"debug: DUMPING %d BLOCKS"LF, back_max); + opt->state.debug_state |= 1; /* once */ + /* OUTPUT FULL DEBUG INFORMATION THE FIRST TIME WE SEE THIS VERY ANNOYING BUG, + HOPING THAT SOME USER REPORT WILL QUICKLY SOLVE THIS PROBLEM :p */ + for(i = 0 ; i < back_max ; i++ ) { + if (back[i].status != -1) { + int may_clean = slot_can_be_cleaned(&back[i]); + int may_finalize = may_clean && slot_can_be_finalized(opt, &back[i]); + int may_serialize = slot_can_be_cached_on_disk(&back[i]); + HTS_LOG(opt,LOG_INFO); + fprintf(opt->log, + "debug: back[%03d]: may_clean=%d, may_finalize_disk=%d, may_serialize=%d:"LF + "\t" "finalized(%d), status(%d), locked(%d), delayed(%d), test(%d), "LF + "\t" "statuscode(%d), size(%d), is_write(%d), may_hypertext(%d), "LF + "\t" "contenttype(%s), url(%s%s), save(%s)"LF, + i, + may_clean, may_finalize, may_serialize, + back[i].finalized, + back[i].status, + back[i].locked, + IS_DELAYED_EXT(back[i].url_sav), + back[i].testmode, + back[i].r.statuscode, + (int) back[i].r.size, + back[i].r.is_write, + may_be_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil), + /* */ + back[i].r.contenttype, + back[i].url_adr, back[i].url_fil, + back[i].url_sav ? back[i].url_sav : "<null>" + ); + } + } + } + } return -1; // plus de place } @@ -1601,9 +1883,14 @@ printf("Xfopen ok, poll..\n"); #if HTS_XGETHOST #if USE_BEGINTHREAD // lancement multithread du robot -PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { +typedef struct { + char iadr_p[HTS_URLMAXSIZE]; + httrackp *opt; +} HostlookupStruct; +void Hostlookup(void* pP) { + HostlookupStruct *str = (HostlookupStruct*) pP; char iadr[256]; - t_dnscache* cache=_hts_cache(); // adresse du cache + t_dnscache* cache=_hts_cache(str->opt); // adresse du cache t_hostent* hp; int error_found=0; @@ -1611,18 +1898,17 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { #if DEBUGDNS printf("resolv in background: %s\n",jump_identification(iadr_p)); #endif - strcpybuff(iadr,jump_identification(iadr_p)); + strcpybuff(iadr,jump_identification(str->iadr_p)); // couper éventuel : { char *a; if ( (a=jump_toport(iadr)) ) *a='\0'; // get rid of it } - freet(iadr_p); + freet(pP); + + hts_mutexlock(&dns_lock); - // attendre que le cache dns soit prêt - while(_hts_lockdns(-1)); // attendre libération - _hts_lockdns(1); // locker while(cache->n) { if (strcmp(cache->iadr,iadr)==0) { error_found=1; @@ -1641,7 +1927,6 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { strcpybuff(cache->n->iadr,iadr); cache->n->host_length=0; /* pour le moment rien */ cache->n->n=NULL; - _hts_lockdns(0); // délocker /* resolve */ #if DEBUGDNS @@ -1654,28 +1939,25 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p) { memcpy(cache->n->host_addr, hp->h_addr, hp->h_length); cache->n->host_length = hp->h_length; } - } else - _hts_lockdns(0); // délocker + } } else { #if DEBUGDNS printf("aborting resolv for %s (found)\n",iadr); #endif - _hts_lockdns(0); // délocker } - // fin de copie de hts_gethostbyname + + hts_mutexrelease(&dns_lock); #if DEBUGDNS printf("quitting resolv for %s (result: %d)\n",iadr,(cache->n!=NULL)?cache->n->host_length:(-999)); #endif - - return PTHREAD_RETURN; /* _endthread implied */ } #endif // attendre que le host (ou celui du proxy) ait été résolu // si c'est un fichier, la résolution est immédiate // idem pour ftp:// -void back_solve(lien_back* back) { +void back_solve(httrackp *opt, lien_back* back) { if ((!strfield(back->url_adr,"file://")) && ! strfield(back->url_adr,"ftp://") #if HTS_USEMMS @@ -1683,22 +1965,23 @@ void back_solve(lien_back* back) { #endif ) { //## if (back->url_adr[0]!=lOCAL_CHAR) { // qq chose à préparer - char* a; + const char* a; if (!(back->r.req.proxy.active)) a=back->url_adr; else a=back->r.req.proxy.name; a = jump_protocol(a); - if (!hts_dnstest(a)) { // non encore testé!.. + if (!hts_dnstest(opt, a)) { // non encore testé!.. // inscire en thread -#if HTS_WIN +#ifdef _WIN32 // Windows #if USE_BEGINTHREAD { - char* p = calloct(strlen(a)+2,1); - if (p) { - strcpybuff(p,a); - (void)hts_newthread( Hostlookup , 0, p ); + HostlookupStruct *str = (HostlookupStruct*)malloct(sizeof(HostlookupStruct)); + if (str) { + strcpybuff(str->iadr_p, a); + str->opt = opt; + hts_newthread(Hostlookup, str); } } #else @@ -1710,7 +1993,7 @@ void back_solve(lien_back* back) { char* p = calloct(strlen(a)+2,1); if (p) { strcpybuff(p,a); - (void)hts_newthread( Hostlookup , 0, p ); + hts_newthread( Hostlookup , p ); } #else // Sous Unix, le gethostbyname() est bloquant.. @@ -1723,7 +2006,7 @@ void back_solve(lien_back* back) { } // détermine si le host a pu être résolu -int host_wait(lien_back* back) { +int host_wait(httrackp *opt, lien_back* back) { if ((!strfield(back->url_adr,"file://")) && (!strfield(back->url_adr,"ftp://")) #if HTS_USEMMS @@ -1732,9 +2015,9 @@ int host_wait(lien_back* back) { ) { //## if (back->url_adr[0]!=lOCAL_CHAR) { if (!(back->r.req.proxy.active)) { - return (hts_dnstest(back->url_adr)); + return (hts_dnstest(opt, back->url_adr)); } else { - return (hts_dnstest(back->r.req.proxy.name)); + return (hts_dnstest(opt, back->r.req.proxy.name)); } } else return 1; // prêt, fichier local } @@ -1745,82 +2028,101 @@ int host_wait(lien_back* back) { // cleanup non-html files in backing to save backing space // and allow faster "save in cache" operation // also cleanup keep-alive sockets and ensure that not too many sockets are being opened + +static int slot_can_be_cleaned(const lien_back* back) { + return + (back->status == STATUS_READY) // ready + /* Check autoclean */ + && (!back->testmode) // not test mode + && (strnotempty(back->url_sav)) // filename exists + && (HTTP_IS_OK(back->r.statuscode)) // HTTP "OK" + && (back->r.size > 0) // size>0 + ; +} + +static int slot_can_be_finalized(httrackp* opt, const lien_back* back) { + return + (back->r.is_write // not in memory (on disk, ready) + && !is_hypertext_mime(opt,back->r.contenttype, back->url_fil) // not HTML/hypertext + && !may_be_hypertext_mime(opt,back->r.contenttype, back->url_fil) // may NOT be parseable mime type + ); +} + void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { lien_back* const back = sback->lnk; const int back_max = sback->count; -#if HTS_ANALYSTE - int oneMore = ( (_hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (_hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links -#endif + int oneMore = ( (opt->state._hts_in_html_parsing == 2 && opt->maxsoc >= 2) || (opt->state._hts_in_html_parsing == 1 && opt->maxsoc >= 4) ) ? 1 : 0; // testing links int i; for(i=0;i<back_max;i++) { - if (back[i].status == 0) { // ready - /* Check autoclean */ - if (!back[i].testmode) { // not test mode - if (strnotempty(back[i].url_sav)) { // filename exists - if (back[i].r.statuscode==200) { // HTTP "OK" - if (back[i].r.size>0) { // size>0 - if (back[i].r.is_write // not in memory (on disk, ready) - && !is_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // not HTML/hypertext - && !may_be_hypertext_mime(back[i].r.contenttype, back[i].url_fil) // may NOT be parseable mime type - ) - { - if (back[i].pass2_ptr) { - (void) back_flush_output(opt, cache, sback, i); // flush output buffers - usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil); - *back[i].pass2_ptr=-1; // Done! - HTS_STAT.stat_background++; - if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"info"); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; + if (slot_can_be_cleaned(&back[i])) { + if (slot_can_be_finalized(opt, &back[i])) { + (void) back_flush_output(opt, cache, sback, i); // flush output buffers + usercommand(opt, 0, NULL, back[i].url_sav, back[i].url_adr, back[i].url_fil); + //if (back[i].links_index >= 0) { + // assertf(back[i].links_index < opt->hash->max_lien); + // opt->hash->liens[back[i].links_index]->pass2 = -1; + // // *back[i].pass2_ptr=-1; // Done! + //} + /* MANDATORY if we don't want back_fill() to endlessly put the same file on download! */ + { + int index = hash_read(opt->hash,back[i].url_sav,"",0,opt->urlhack); // lecture type 0 (sav) + if (index >= 0) { + opt->hash->liens[index]->pass2 = -1; /* DONE! */ + } else { + if (opt->log != NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: warning: entry cleaned up, but no trace on heap: %s%s (%s)"LF,back[i].url_adr, back[i].url_fil,back[i].url_sav); + test_flush; + } + } + } + HTS_STAT.stat_background++; + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File successfully written in background: %s"LF,back[i].url_sav); test_flush; + } + back_maydelete(opt,cache,sback,i); // May delete backing entry + } else { + if (!back[i].finalized) { + if (1) { + /* Ensure deleted or recycled socket */ + /* BUT DO NOT YET WIPE back[i].r.adr */ + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush; + } + back_maydeletehttp(opt, cache, sback, i); + } else { + /* + NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO) + */ + /* Lock the entry but do not keep the html data in memory (in cache) */ + if (opt->cache) { + htsblk r; + + /* Ensure deleted or recycled socket */ + back_maydeletehttp(opt, cache, sback, i); + assertf(back[i].r.soc == INVALID_SOCKET); + + /* Check header */ + cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r); + if (r.statuscode == HTTP_OK) { + if (back[i].r.soc == INVALID_SOCKET) { + /* Delete buffer and sockets */ + deleteaddr(&back[i].r); + deletehttp(&back[i].r); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush; } - back_maydelete(opt,cache,sback,i); // May delete backing entry } } else { - if (!back[i].finalized) { - if (1) { - /* Ensure deleted or recycled socket */ - /* BUT DO NOT YET WIPE back[i].r.adr */ - if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s validated (cached, left in memory)"LF,back[i].url_adr,back[i].url_fil); test_flush; - } - back_maydeletehttp(opt, cache, sback, i); - } else { - /* - NOT YET HANDLED CORRECTLY (READ IN NEW CACHE TO DO) - */ - /* Lock the entry but do not keep the html data in memory (in cache) */ - if (opt->cache) { - htsblk r; - - /* Ensure deleted or recycled socket */ - back_maydeletehttp(opt, cache, sback, i); - assertf(back[i].r.soc == INVALID_SOCKET); - - /* Check header */ - cache_header(opt,cache,back[i].url_adr,back[i].url_fil,&r); - if (r.statuscode == 200) { - if (back[i].r.soc == INVALID_SOCKET) { - /* Delete buffer and sockets */ - deleteaddr(&back[i].r); - deletehttp(&back[i].r); - if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"file %s%s temporarily left in cache to spare memory"LF,back[i].url_adr,back[i].url_fil); test_flush; - } - } - } else { - if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected html cache lookup error during back clean"LF); test_flush; - } - } - // xxc xxc - } - } - } + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unexpected html cache lookup error during back clean"LF); test_flush; + } } + // xxc xxc } } } } - } else if (back[i].status == -103) { // waiting (keep-alive) + } else if (back[i].status == STATUS_ALIVE) { // waiting (keep-alive) if ( ! back[i].r.keep_alive || back[i].r.soc == INVALID_SOCKET @@ -1828,7 +2130,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { || time_local() >= back[i].ka_time_start + back[i].r.keep_alive_t ) { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): live socket closed #%d (%s)"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): live socket closed #%d (%s)"LF, back[i].r.debugid, back[i].url_adr); test_flush; @@ -1839,7 +2141,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { } /* switch connections to live ones */ for(i=0;i<back_max;i++) { - if (back[i].status == 0) { // ready + if (back[i].status == STATUS_READY) { // ready if (back[i].r.soc != INVALID_SOCKET) { back_maydeletehttp(opt,cache,sback, i); } @@ -1851,12 +2153,12 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { int curr = back_nsoc_overall(sback); if (curr > max) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Keep-Alive): deleting #%d sockets"LF, curr - max); test_flush; } } for(i = 0 ; i < back_max && curr > max ; i++) { - if (back[i].status == -103) { + if (back[i].status == STATUS_ALIVE) { back_delete(opt,cache,sback, i); // delete backing entry curr--; } @@ -1866,7 +2168,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { { int nxfr = back_cleanup_background(opt,cache,sback); if (nxfr > 0 && (opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(htsback): %d slots ready moved to background"LF, nxfr); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(htsback): %d slots ready moved to background"LF, nxfr); test_flush; } } @@ -1875,6 +2177,7 @@ void back_clean(httrackp* opt,cache_back* cache,struct_back* sback) { // attente (gestion des buffers des sockets) void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_timestart) { + char catbuff[CATBUFF_SIZE]; lien_back* const back = sback->lnk; const int back_max = sback->count; unsigned int i_mod; @@ -1888,9 +2191,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti int busy_recv=0; // pas de données pour le moment int busy_state=0; // pas de connexions int max_loop; // nombre de boucles max à parcourir.. -#if HTS_ANALYSTE int max_loop_chk=0; -#endif unsigned int mod_random = (unsigned int) ( time_local() + HTS_STAT.HTS_TOTAL_RECV ); // max. number of loops @@ -1926,7 +2227,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // en cas de gestion du connect préemptif #if HTS_XCONN - if (back[i].status==100) { // connexion + if (back[i].status==STATUS_CONNECTING) { // connexion do_wait=1; // noter socket write @@ -1947,7 +2248,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } else #endif #if HTS_XGETHOST - if (back[i].status==101) { // attente + if (back[i].status==STATUS_WAIT_DNS) { // attente // rien à faire.. } else #endif @@ -1984,14 +2285,14 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } else { back[i].r.statuscode=STATUSCODE_CONNERROR; - if (back[i].status==100) + if (back[i].status==STATUS_CONNECTING) strcpybuff(back[i].r.msg,"Connect Error"); else strcpybuff(back[i].r.msg,"Receive Error"); - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unexpected socket error during pre-loop"LF); test_flush; } } #if WIDE_DEBUG @@ -2058,14 +2359,14 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_CONNERROR; - if (back[i].status==100) + if (back[i].status==STATUS_CONNECTING) strcpybuff(back[i].r.msg,"Connect Error"); else strcpybuff(back[i].r.msg,"Receive Error"); - if (back[i].status == -103) { /* Keep-alive socket */ + if (back[i].status == STATUS_ALIVE) { /* Keep-alive socket */ back_delete(opt,cache,sback, i); } else { - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); } } @@ -2074,7 +2375,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // ---- FLAG WRITE MIS A UN?: POUR LE CONNECT - if (back[i].status==100) { // attendre connect + if (back[i].status==STATUS_CONNECTING) { // attendre connect int dispo=0; // vérifier l'existance de timeout-check if (!gestion_timeout) @@ -2098,7 +2399,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti SSL_clear(back[i].r.ssl_con); if (SSL_set_fd(back[i].r.ssl_con, back[i].r.soc) == 1) { SSL_set_connect_state(back[i].r.ssl_con); - back[i].status = 102; /* handshake wait */ + back[i].status = STATUS_SSL_WAIT_HANDSHAKE; /* handshake wait */ } else back[i].r.statuscode=STATUSCODE_SSL_HANDSHAKE; } else @@ -2110,7 +2411,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_NON_FATAL; - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); } } @@ -2121,7 +2422,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("..connect ok on socket %d\n",back[i].r.soc); #endif - if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==100)) { + if ((back[i].r.soc != INVALID_SOCKET) && (back[i].status==STATUS_CONNECTING)) { /* limit nb. connections/seconds to avoid server overload */ /*if (opt->maxconn>0) { Sleep(1000/opt->maxconn); @@ -2138,11 +2439,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti //if (strcmp(back[i].url_sav,BACK_ADD_TEST)!=0) // vrai get HTS_STAT.stat_nrequests++; if (!back[i].head_request) - http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); + http_sendhead(opt, opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); else if (back[i].head_request==2) // test en GET! - http_sendhead(opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); + http_sendhead(opt, opt->cookie,0,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); else // test! - http_sendhead(opt->cookie,1,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); + http_sendhead(opt, opt->cookie,1,back[i].send_too,back[i].url_adr,back[i].url_fil,back[i].referer_adr,back[i].referer_fil,&back[i].r); back[i].status=99; // attendre en tête maintenant } } @@ -2150,7 +2451,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // attente gethostbyname } #if HTS_USEOPENSSL - else if (SSL_is_available && back[i].status==102) { // wait for SSL handshake + else if (SSL_is_available && back[i].status == STATUS_SSL_WAIT_HANDSHAKE) { // wait for SSL handshake /* SSL mode */ if (back[i].r.ssl) { int conn_code; @@ -2173,25 +2474,25 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_NON_FATAL; - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); } } else { /* got it! */ - back[i].status=100; // back to waitconnect + back[i].status=STATUS_CONNECTING; // back to waitconnect } } else { strcpybuff(back[i].r.msg, "unexpected SSL/TLS error"); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_NON_FATAL; - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); } } #endif #if HTS_XGETHOST - else if (back[i].status==101) { // attendre gethostbyname + else if (back[i].status==STATUS_WAIT_DNS) { // attendre gethostbyname #if DEBUGDNS //printf("status 101 for %s\n",back[i].url_adr); #endif @@ -2200,8 +2501,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (back[i].timeout>0) gestion_timeout=1; - if (host_wait(&back[i])) { // prêt - back[i].status=100; // attente connexion + if (host_wait(opt, &back[i])) { // prêt + back[i].status=STATUS_CONNECTING; // attente connexion if (back[i].timeout>0) { // refresh timeout si besoin est back[i].timeout_refresh=time_local(); } @@ -2209,9 +2510,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti back[i].rateout_time=time_local(); } - back[i].r.soc=http_xfopen(0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r)); + back[i].r.soc=http_xfopen(opt,0,0,0,back[i].send_too,back[i].url_adr,back[i].url_fil,&(back[i].r)); if (back[i].r.soc==INVALID_SOCKET) { - back[i].status=0; // fini, erreur + back[i].status=STATUS_READY; // fini, erreur back_set_finished(sback, i); if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK @@ -2233,10 +2534,10 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti #if USE_BEGINTHREAD // ..rien à faire, c'est magic les threads #else - else if (back[i].status==1000) { // en réception ftp + else if (back[i].status==STATUS_FTP_TRANSFER) { // en réception ftp if (!fexist(back[i].location_buffer)) { // terminé FILE* fp; - fp=fopen(fconcat(back[i].location_buffer,".ok"),"rb"); + fp=fopen(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),"rb"); if (fp) { int j=0; fscanf(fp,"%d ",&(back[i].r.statuscode)); @@ -2247,13 +2548,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } back[i].r.msg[j++]='\0'; fclose(fp); - unlink(fconcat(back[i].location_buffer,".ok")); - strcpybuff(fconcat(back[i].location_buffer,".ok"),""); + unlink(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok")); + strcpybuff(fconcat(OPT_GET_BUFF(opt), back[i].location_buffer,".ok"),""); } else { strcpybuff(back[i].r.msg,"Unknown ftp result, check if file is ok"); back[i].r.statuscode=STATUSCODE_INVALID; } - back[i].status=0; + back[i].status=STATUS_READY; back_set_finished(sback, i); // finalize transfer if (back[i].r.statuscode>0) { @@ -2262,8 +2563,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } #endif - else if (back[i].status==1001) { // ftp ready - back[i].status=0; + else if (back[i].status==STATUS_FTP_READY) { // ftp ready + back[i].status=STATUS_READY; back_set_finished(sback, i); // finalize transfer if (back[i].r.statuscode>0) { @@ -2311,28 +2612,35 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!back[i].testmode) { // pas mode test if (strnotempty(back[i].url_sav)) { if (strcmp(back[i].url_fil,"/robots.txt")) { - if (back[i].r.statuscode==200) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML + if (back[i].r.statuscode==HTTP_OK) { // 'OK' + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html int fcheck=0; + int last_errno = 0; back[i].r.is_write=1; // écrire if (back[i].r.compressed && /* .gz are *NOT* depacked!! */ - (strfield(get_ext(back[i].url_sav),"gz") == 0) + (strfield(get_ext(catbuff,back[i].url_sav),"gz") == 0) ) { back[i].tmpfile_buffer[0]='\0'; back[i].tmpfile=tmpnam(back[i].tmpfile_buffer); - if (back[i].tmpfile != NULL && back[i].tmpfile[0]) - back[i].r.out=fopen(back[i].tmpfile,"wb"); + if (back[i].tmpfile != NULL && back[i].tmpfile[0]) { + if ((back[i].r.out=fopen(back[i].tmpfile,"wb")) == NULL) { + last_errno = errno; + } + } } else { - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 1, 1, back[i].r.notmodified); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 1, 1, back[i].r.notmodified); back[i].r.compressed=0; - back[i].r.out=filecreate(back[i].url_sav); + if ((back[i].r.out=filecreate(&opt->state.strc, back[i].url_sav)) == NULL) { + last_errno = errno; + } } if (back[i].r.out==NULL) { + errno = last_errno; if ((fcheck=check_fatal_io_errno())) { - fspc(opt->log,"error"); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; opt->state.exit_xh=-1; /* fatal error */ } } @@ -2340,16 +2648,16 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("direct-disk: %s\n",back[i].url_sav); #endif if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File received from net to disk: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File received from net to disk: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } if (back[i].r.out==NULL) { - if (opt->errlog) { - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"Unable to save file %s : %s"LF,back[i].url_sav, strerror(errno)); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"Unable to save file %s : %s"LF,back[i].url_sav, strerror(last_errno)); if (fcheck) { - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"* * Fatal write error, giving up"LF); + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"* * Fatal write error, giving up"LF); } test_flush; } @@ -2358,14 +2666,14 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("..error!\n"); #endif } -#if HTS_WIN==0 +#ifndef _WIN32 else chmod(back[i].url_sav,HTS_ACCESS_FILE); #endif } else { // on coupe tout! if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File cancelled (non HTML): %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); if (!back[i].testmode) back[i].r.statuscode=STATUSCODE_INVALID; // EUHH CANCEL @@ -2391,9 +2699,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // réception de données depuis socket ou fichier if (back[i].status) { - if (back[i].status==99) // recevoir par bloc de lignes + if (back[i].status==STATUS_WAIT_HEADERS) // recevoir par bloc de lignes retour_fread=http_xfread1(&(back[i].r),0); - else if (back[i].status==98 || back[i].status==97) { // recevoir longueur chunk en hexa caractère par caractère + else if (back[i].status==STATUS_CHUNK_WAIT || back[i].status==STATUS_CHUNK_CR) { // recevoir longueur chunk en hexa caractère par caractère // backuper pour lire dans le buffer chunk htsblk r; memcpy(&r, &(back[i].r), sizeof(htsblk)); @@ -2427,7 +2735,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (back[i].is_chunk) { // attendre prochain chunk if (back[i].r.size==back[i].r.totalsize) { // fin chunk! //printf("chunk end at %d\n",back[i].r.size); - back[i].status=97; /* fetch ending CRLF */ + back[i].status=STATUS_CHUNK_CR; /* fetch ending CRLF */ if (back[i].chunk_adr!=NULL) { freet(back[i].chunk_adr); back[i].chunk_adr=NULL; @@ -2446,7 +2754,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } if (retour_fread < 0) { // fin réception - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); /*KA back[i].r.soc=INVALID_SOCKET; */ #if CHUNKDEBUG==1 @@ -2455,18 +2763,18 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti #endif if (retour_fread < 0 && retour_fread != READ_EOF) { if (back[i].r.size > 0) - strcatbuff(back[i].r.msg, "Interrupted transfer"); + strcpybuff(back[i].r.msg, "Interrupted transfer"); else - strcatbuff(back[i].r.msg, "No data (connection closed)"); + strcpybuff(back[i].r.msg, "No data (connection closed)"); back[i].r.statuscode=STATUSCODE_CONNERROR; } else if ((back[i].r.statuscode <= 0) && (strnotempty(back[i].r.msg)==0)) { #if HDEBUG printf("error interruped: %s\n",back[i].r.adr); #endif if (back[i].r.size>0) - strcatbuff(back[i].r.msg,"Interrupted transfer"); + strcpybuff(back[i].r.msg,"Interrupted transfer"); else - strcatbuff(back[i].r.msg,"No data (connection closed)"); + strcpybuff(back[i].r.msg,"No data (connection closed)"); back[i].r.statuscode=STATUSCODE_CONNERROR; } @@ -2487,7 +2795,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } if (back[i].r.totalsize>0) { // tester totalsize - //if ((back[i].r.totalsize>0) && (back[i].status==99)) { // tester totalsize + //if ((back[i].r.totalsize>0) && (back[i].status==STATUS_WAIT_HEADERS)) { // tester totalsize if (back[i].r.totalsize!=back[i].r.size) { // pas la même! if (!opt->tolerant) { //#if HTS_CL_IS_FATAL @@ -2498,8 +2806,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } else { //#else // Un warning suffira.. - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fspc(opt,cache->log,"warning"); fprintf(cache->log,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } //#endif } @@ -2514,7 +2822,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // Traitement des en têtes chunks ou en têtes - if (back[i].status==98 || back[i].status==97) { // réception taille chunk en hexa ( après les en têtes, peut ne pas + if (back[i].status==STATUS_CHUNK_WAIT || back[i].status==STATUS_CHUNK_CR) { // réception taille chunk en hexa ( après les en têtes, peut ne pas if (back[i].chunk_size > 0 && back[i].chunk_adr[back[i].chunk_size-1]==10) { int chunk_size=-1; char chunk_data[64]; @@ -2534,7 +2842,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti #endif if (back[i].r.totalsize<0) back[i].r.totalsize=0; // initialiser à 0 - if (back[i].status==98) { // "real" chunk + if (back[i].status==STATUS_CHUNK_WAIT) { // "real" chunk if (sscanf(chunk_data,"%x",&chunk_size) == 1) { if (chunk_size > 0) back[i].chunk_blocksize = chunk_size; /* the data block chunk size */ @@ -2542,10 +2850,10 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti back[i].chunk_blocksize = -1; /* ending */ back[i].r.totalsize+=chunk_size; // noter taille if (back[i].r.adr != NULL || !back[i].r.is_write) { // Not to disk - back[i].r.adr=(char*) realloct(back[i].r.adr,(INTsys) back[i].r.totalsize + 1); + back[i].r.adr=(char*) realloct(back[i].r.adr, (size_t)back[i].r.totalsize + 1); if (!back[i].r.adr) { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Error: Not enough memory ("LLintP") for %s%s"LF,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } } } @@ -2553,11 +2861,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("[%d] chunk length: %d - next total "LLintP":\n",(int)back[i].r.soc,(int)chunk_size,(LLint)back[i].r.totalsize); #endif } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Warning: Illegal chunk (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); } } - } else { /* back[i].status==97 : just receiving ending CRLF after data */ + } else { /* back[i].status==STATUS_CHUNK_CR : just receiving ending CRLF after data */ if (chunk_data[0] == '\0') { if (back[i].chunk_blocksize > 0) chunk_size=(int)back[i].chunk_blocksize; /* recent data chunk size */ @@ -2569,8 +2877,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("[%d] chunk CRLF seen\n", (int)back[i].r.soc); #endif } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Illegal chunk CRLF (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Warning: Illegal chunk CRLF (%s) for %s%s"LF,back[i].chunk_adr,back[i].url_adr,back[i].url_fil); } #if CHUNKDEBUG==1 printf("[%d] chunk CRLF ERROR!! : '%s'\n", (int)back[i].r.soc, chunk_data); @@ -2578,25 +2886,25 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } } else { - if (cache->errlog!=NULL) { - fprintf(cache->errlog,"Warning: Chunk too big ("LLintP") for %s%s"LF,(LLint)back[i].chunk_size,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fprintf(cache->log,"Warning: Chunk too big ("LLintP") for %s%s"LF,(LLint)back[i].chunk_size,back[i].url_adr,back[i].url_fil); } } // ok, continuer sur le body // si chunk non nul continuer (ou commencer) - if (back[i].status==97 && chunk_size > 0) { - back[i].status = 98; /* waiting for next chunk (NN\r\n<data>\r\nNN\r\n<data>..\r\n0\r\n\r\n) */ + if (back[i].status==STATUS_CHUNK_CR && chunk_size > 0) { + back[i].status = STATUS_CHUNK_WAIT; /* waiting for next chunk (NN\r\n<data>\r\nNN\r\n<data>..\r\n0\r\n\r\n) */ #if CHUNKDEBUG==1 printf("[%d] waiting for next chunk\n", (int)back[i].r.soc); #endif - } else if (back[i].status==98 && chunk_size == 0) { /* final chunk */ - back[i].status=97; /* final CRLF */ + } else if (back[i].status==STATUS_CHUNK_WAIT && chunk_size == 0) { /* final chunk */ + back[i].status=STATUS_CHUNK_CR; /* final CRLF */ #if CHUNKDEBUG==1 printf("[%d] waiting for final CRLF (chunk)\n", (int)back[i].r.soc); #endif - } else if (back[i].status==98 && chunk_size >= 0) { /* will fetch data now */ + } else if (back[i].status==STATUS_CHUNK_WAIT && chunk_size >= 0) { /* will fetch data now */ back[i].status=1; // continuer body #if CHUNKDEBUG==1 printf("[%d] waiting for body (chunk)\n", (int)back[i].r.soc); @@ -2606,8 +2914,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("[%d] chunk end, total: %d\n",(int)back[i].r.soc,back[i].r.size); #endif /* End */ - //if (back[i].status==97) { - back[i].status=0; // fin + //if (back[i].status==STATUS_CHUNK_CR) { + back[i].status=STATUS_READY; // fin back_set_finished(sback, i); //} @@ -2615,9 +2923,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!IS_DELAYED_EXT(back[i].url_sav)) { back_finalize(opt,cache,sback,i); } else { - if (back[i].r.statuscode == 200) { - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Unexpected incomplete type with 200 code at %s%s"LF, back[i].url_adr, back[i].url_fil); + if (back[i].r.statuscode == HTTP_OK) { + if (cache->log!=NULL) { + fspc(opt,cache->log,"warning"); fprintf(cache->log,"Unexpected incomplete type with 200 code at %s%s"LF, back[i].url_adr, back[i].url_fil); } } } @@ -2648,8 +2956,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti strcpybuff(back[i].r.msg,"Incorrect length"); } else { // Un warning suffira.. - if (cache->errlog!=NULL) { - fspc(cache->errlog,"warning"); fprintf(cache->errlog,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); + if (cache->log!=NULL) { + fspc(opt,cache->log,"warning"); fprintf(cache->log,"Incorrect length ("LLintP"!="LLintP" expected) for %s%s"LF,(LLint)back[i].r.size,(LLint)back[i].r.totalsize,back[i].url_adr,back[i].url_fil); } } } @@ -2676,7 +2984,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // taille buffer chunk > 1 && LF // - } else if (back[i].status==99) { // en têtes (avant le chunk si il est présent) + } else if (back[i].status==STATUS_WAIT_HEADERS) { // en têtes (avant le chunk si il est présent) // if (back[i].r.size>=2) { // double LF @@ -2741,7 +3049,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti treathead(opt->cookie,back[i].url_adr,back[i].url_fil,&back[i].r,rcvd); // traiter // parfois les serveurs buggés renvoient un content-range avec un 200 - if (back[i].r.statuscode==200) // 'OK' + if (back[i].r.statuscode==HTTP_OK) // 'OK' if (strfield(rcvd,"content-range:")) // Avec un content-range: relisez les RFC.. back[i].r.statuscode=206; // FORCER A 206 !!!!! @@ -2755,21 +3063,20 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } // Callback -#if HTS_ANALYSTE - if (hts_htmlcheck_receivehead != NULL) { - int test_head=hts_htmlcheck_receivehead(back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); + { + int test_head = RUN_CALLBACK6(opt, receivehead, + back[i].r.adr, back[i].url_adr, back[i].url_fil, back[i].referer_adr, back[i].referer_fil, &back[i].r); if (test_head!=1) { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"warning"); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"External wrapper aborted transfer, breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } - back[i].status=0; // FINI + back[i].status=STATUS_READY; // FINI back_set_finished(sback, i); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; strcpybuff(back[i].r.msg,"External wrapper aborted transfer"); back[i].r.statuscode = STATUSCODE_INVALID; } } -#endif // Free headers memory now // Actually, save them for informational purpose @@ -2786,17 +3093,17 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // Check response : 203 == 200 - if (back[i].r.statuscode==203) { // 'Non-Authoritative Information' - back[i].r.statuscode=200; // forcer "OK" - } else if (back[i].r.statuscode == 100) { - back[i].status=99; + if (back[i].r.statuscode==HTTP_NON_AUTHORITATIVE_INFORMATION) { + back[i].r.statuscode=HTTP_OK; // forcer "OK" + } else if (back[i].r.statuscode == HTTP_CONTINUE) { + back[i].status=STATUS_WAIT_HEADERS; back[i].r.size=0; back[i].r.totalsize=0; back[i].chunk_size=0; back[i].r.statuscode=STATUSCODE_INVALID; back[i].r.msg[0]='\0'; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Status 100 detected for %s%s, continuing headers"LF,back[i].url_adr,back[i].url_fil); test_flush; } continue; } @@ -2810,22 +3117,22 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // -> // Content-Range: bytes */2830 if (back[i].range_req_size == back[i].r.crange) { - filenote(back[i].url_sav,NULL); - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); + filenote(&opt->state.strc,back[i].url_sav,NULL); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); back[i].r.size=back[i].r.totalsize=back[i].range_req_size; - back[i].r.statuscode=304; // NOT MODIFIED + back[i].r.statuscode=HTTP_NOT_MODIFIED; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (good 416 message), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } // transform 406 into 200 ; we'll catch embedded links inside the choice page if (back[i].r.statuscode==406) { // 'Not Acceptable' - back[i].r.statuscode=200; + back[i].r.statuscode=HTTP_OK; } // 'do not erase already downloaded file' @@ -2835,9 +3142,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (HTTP_IS_ERROR(back[i].r.statuscode) && back[i].is_update && !back[i].testmode) { if (back[i].url_sav[0] && fexist(back[i].url_sav)) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Error ignored %d (%s) because of 'no purge' option for %s%s"LF,back[i].r.statuscode,back[i].r.msg,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Error ignored %d (%s) because of 'no purge' option for %s%s"LF,back[i].r.statuscode,back[i].r.msg,back[i].url_adr,back[i].url_fil); test_flush; } - back[i].r.statuscode = 304; + back[i].r.statuscode = HTTP_NOT_MODIFIED; deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; } } @@ -2854,9 +3161,9 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // If the size is the same, and the option has been set, we assume // that the file is identical - and therefore let's break the connection if (back[i].is_update) { // mise à jour - if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' + if (back[i].r.statuscode==HTTP_OK && !back[i].testmode) { // 'OK' htsblk r = cache_read(opt,cache,back[i].url_adr,back[i].url_fil,NULL,NULL); // lire entrée cache - if (r.statuscode == 200) { // OK pas d'erreur cache + if (r.statuscode == HTTP_OK) { // OK pas d'erreur cache LLint len1,len2; len1=r.totalsize; len2=back[i].r.totalsize; @@ -2864,16 +3171,16 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti len1=r.size; if (len1>0) { if (len1 == len2) { // tailles identiques - back[i].r.statuscode=304; // forcer NOT MODIFIED + back[i].r.statuscode=HTTP_NOT_MODIFIED; // forcer NOT MODIFIED deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (same size), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } } else { - if (opt->errlog!=NULL) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File seems complete (same size), but there was a cache read error: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File seems complete (same size), but there was a cache read error: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } if (r.adr) { @@ -2888,21 +3195,21 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // Detect already downloaded file (with another browser, for example) if (opt->sizehack) { if (!back[i].is_update) { // mise à jour - if (back[i].r.statuscode==200 && !back[i].testmode) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // not HTML + if (back[i].r.statuscode==HTTP_OK && !back[i].testmode) { // 'OK' + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // not HTML if (strnotempty(back[i].url_sav)) { // target found int size = fsize(back[i].url_sav); // target size if (size >= 0) { if (back[i].r.totalsize == size) { // same size! deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); back[i].r.size=back[i].r.totalsize; - filenote(back[i].url_sav,NULL); - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); - back[i].r.statuscode=304; // NOT MODIFIED + filenote(&opt->state.strc,back[i].url_sav,NULL); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); + back[i].r.statuscode=HTTP_NOT_MODIFIED; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (same size file discovered), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } @@ -2924,23 +3231,23 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!back[i].testmode) { // pas mode test if (strnotempty(back[i].url_sav)) { if (strcmp(back[i].url_fil,"/robots.txt")) { - if (back[i].r.statuscode==200) { // 'OK' - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_fil)) { // pas HTML - if (back[i].r.statuscode==200) { // "OK" + if (back[i].r.statuscode==HTTP_OK) { // 'OK' + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_fil)) { // pas HTML + if (back[i].r.statuscode==HTTP_OK) { // "OK" if (back[i].range_req_size>0) { // but Range: requested if (back[i].range_req_size == back[i].r.totalsize) { // And same size #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(skip_range): deletehttp\n"); #endif deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); back[i].r.size=back[i].r.totalsize; - filenote(back[i].url_sav,NULL); - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); - back[i].r.statuscode=304; // NOT MODIFIED + filenote(&opt->state.strc,back[i].url_sav,NULL); + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 0, back[i].r.notmodified); + back[i].r.statuscode=HTTP_NOT_MODIFIED; // NOT MODIFIED if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File seems complete (reget failed), breaking connection: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } @@ -2965,7 +3272,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti /* Interdiction taille par le wizard? */ if (back[i].r.soc!=INVALID_SOCKET) { if (!back_checksize(opt,&back[i],1)) { - back[i].status=0; // FINI + back[i].status=STATUS_READY; // FINI back_set_finished(sback, i); back[i].r.statuscode=STATUSCODE_TOO_BIG; deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; @@ -2981,7 +3288,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // head: terminé if (back[i].head_request) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Tested file: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Tested file: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(head request): deletehttp\n"); @@ -2990,11 +3297,11 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (!back[i].http11) { /* NO KA */ deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; } - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); } // traiter une éventuelle erreur 304 (cache à jour utilisable) - else if (back[i].r.statuscode==304) { // document à jour dans le cache + else if (back[i].r.statuscode==HTTP_NOT_MODIFIED) { // document à jour dans le cache // lire dans le cache // ** NOTE: pas de vérif de la taille ici!! #if HTS_DEBUG_CLOSESOCK @@ -3015,24 +3322,24 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // then, force 'ok' status if (back[i].r.statuscode == STATUSCODE_INVALID) { if (fexist(back[i].url_sav)) { - back[i].r.statuscode=200; // OK + back[i].r.statuscode=HTTP_OK; // OK strcpybuff(back[i].r.msg, "OK (cached)"); back[i].r.is_file=1; back[i].r.totalsize = back[i].r.size = fsize(back[i].url_sav); - get_httptype(back[i].r.contenttype, back[i].url_sav, 1); + get_httptype(opt,back[i].r.contenttype, back[i].url_sav, 1); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Not-modified status without cache guessed: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } } } // Status is okay? if (back[i].r.statuscode!=-1) { // pas d'erreur de lecture - back[i].status=0; // OK prêt + back[i].status=STATUS_READY; // OK prêt back_set_finished(sback, i); back[i].r.notmodified=1; // NON modifié! if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File loaded after test from cache: %s%s"LF,back[i].url_adr,back[i].url_fil); test_flush; } // finalize @@ -3047,7 +3354,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti //printf(">%s status %d\n",back[p].r.contenttype,back[p].r.statuscode); } else { // erreur - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); //printf("erreur cache\n"); @@ -3066,7 +3373,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti /*KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET;*/ back_maydeletehttp(opt, cache, sback, i); - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); // finalize if (back[i].r.statuscode>0) { @@ -3085,20 +3392,20 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti // traiter 206 (partial content) // xxc SI CHUNK VERIFIER QUE CA MARCHE?? if (back[i].r.statuscode==206) { // on nous envoie un morceau (la fin) coz une partie sur disque! - LLint sz=fsize(back[i].url_sav); + off_t sz=fsize(back[i].url_sav); #if HDEBUG printf("partial content: "LLintP" on disk..\n",(LLint)sz); #endif if (sz>=0) { - if (!is_hypertext_mime(back[i].r.contenttype, back[i].url_sav)) { // pas HTML + if (!is_hypertext_mime(opt,back[i].r.contenttype, back[i].url_sav)) { // pas HTML if (opt->getmode&2) { // on peut ecrire des non html **sinon ben euhh sera intercepté plus loin, donc rap sur ce qui va sortir** - filenote(back[i].url_sav,NULL); // noter fichier comme connu - file_notify(back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 1, back[i].r.notmodified); - back[i].r.out=fopen(fconv(back[i].url_sav),"ab"); // append + filenote(&opt->state.strc,back[i].url_sav,NULL); // noter fichier comme connu + file_notify(opt,back[i].url_adr, back[i].url_fil, back[i].url_sav, 0, 1, back[i].r.notmodified); + back[i].r.out=fopen(fconv(catbuff,back[i].url_sav),"ab"); // append if (back[i].r.out) { back[i].r.is_write=1; // écrire back[i].r.size=sz; // déja écrit - back[i].r.statuscode=200; // Forcer 'OK' + back[i].r.statuscode=HTTP_OK; // Forcer 'OK' if (back[i].r.totalsize>0) back[i].r.totalsize+=sz; // plus en fait fseek(back[i].r.out,0,SEEK_END); // à la fin @@ -3106,50 +3413,50 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("continue interrupted file\n"); #endif } else { // On est dans la m** - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not open partial file"); } } } else { // mémoire - FILE* fp=fopen(fconv(back[i].url_sav),"rb"); + FILE* fp=fopen(fconv(catbuff,back[i].url_sav),"rb"); if (fp) { LLint alloc_mem=sz + 1; if (back[i].r.totalsize>0) alloc_mem+=back[i].r.totalsize; // AJOUTER RESTANT! - if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) alloc_mem)) ) { + if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((size_t)alloc_mem)) ) { back[i].r.size=sz; if (back[i].r.totalsize>0) back[i].r.totalsize+=sz; // plus en fait - if (( fread(back[i].r.adr,1,(INTsys)sz,fp)) != sz) { - back[i].status=0; // terminé (voir plus loin) + if (( fread(back[i].r.adr,1,sz,fp)) != sz) { + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not read partial file"); } else { - back[i].r.statuscode=200; // Forcer 'OK' + back[i].r.statuscode=HTTP_OK; // Forcer 'OK' #if HDEBUG printf("continue in mem interrupted file\n"); #endif } } else { - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"No memory for partial file"); } fclose(fp); } else { // Argh.. - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not open partial file"); } } } else { // Non trouvé?? - back[i].status=0; // terminé (voir plus loin) + back[i].status=STATUS_READY; // terminé (voir plus loin) back_set_finished(sback, i); strcpybuff(back[i].r.msg,"Can not find partial file"); } // Erreur? - if (back[i].status==0) { + if (back[i].status==STATUS_READY) { if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(206 solve problems): deletehttp\n"); @@ -3167,13 +3474,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti if (back[i].status!=0) { // non terminé (erreur) if (!back[i].testmode) { // fichier normal - if (back[i].r.empty /* ?? && back[i].r.statuscode==200 */) { // empty response + if (back[i].r.empty /* ?? && back[i].r.statuscode==HTTP_OK */) { // empty response // Couper connexion back_maydeletehttp(opt, cache, sback, i); /* KA deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; */ - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); - if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct((INTsys) 2)) ) { + if ( deleteaddr(&back[i].r) && (back[i].r.adr=(char*) malloct( 2)) ) { back[i].r.adr[0] = 0; } back_finalize(opt,cache,sback,i); @@ -3190,7 +3497,7 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti back[i].chunk_adr=NULL; back[i].chunk_size=0; back[i].chunk_blocksize=0; - back[i].status=98; // start body wait chunk + back[i].status=STATUS_CHUNK_WAIT; // start body wait chunk back[i].r.totalsize=0; /* devalidate size! (rfc) */ } if (back[i].rateout>0) { @@ -3200,13 +3507,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti printf("(buffer) start body!\n"); #endif } else { // mode test, ne pas passer en 1!! - back[i].status=0; // READY + back[i].status=STATUS_READY; // READY back_set_finished(sback, i); #if HTS_DEBUG_CLOSESOCK DEBUG_W("back_wait(test ok): deletehttp\n"); #endif deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; - if (back[i].r.statuscode==200) { + if (back[i].r.statuscode==HTTP_OK) { strcpybuff(back[i].r.msg,"Test: OK"); back[i].r.statuscode=STATUSCODE_TEST_OK; // test réussi } @@ -3248,8 +3555,8 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti //if (!opt->quiet) { // petite animation if (opt->verbosedisplay==1) { - if (back[i].status==0) { - if (back[i].r.statuscode==200) + if (back[i].status==STATUS_READY) { + if (back[i].r.statuscode==HTTP_OK) printf("* %s%s ("LLintP" bytes) - OK"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size); else printf("* %s%s ("LLintP" bytes) - %d"VT_CLREOL"\r",back[i].url_adr,back[i].url_fil,(LLint)back[i].r.size,back[i].r.statuscode); @@ -3281,18 +3588,18 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_TIMEOUT; - if (back[i].status==100) + if (back[i].status==STATUS_CONNECTING) strcpybuff(back[i].r.msg,"Connect Time Out"); - else if (back[i].status==101) + else if (back[i].status==STATUS_WAIT_DNS) strcpybuff(back[i].r.msg,"DNS Time Out"); else strcpybuff(back[i].r.msg,"Receive Time Out"); - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); } else if ((back[i].rateout>0) && (back[i].status<99)) { if (((int) (act-back[i].rateout_time))>=HTS_WATCHRATE) { // checker au bout de 15s if ( (int) ((back[i].r.size)/(act-back[i].rateout_time)) < back[i].rateout ) { // trop lent - back[i].status=0; // terminé + back[i].status=STATUS_READY; // terminé back_set_finished(sback, i); if (back[i].r.soc!=INVALID_SOCKET) { #if HTS_DEBUG_CLOSESOCK @@ -3311,17 +3618,13 @@ void back_wait(struct_back* sback,httrackp* opt,cache_back* cache,TStamp stat_ti } } max_loop--; -#if HTS_ANALYSTE max_loop_chk++; -#endif } while((busy_state) && (busy_recv) && (max_loop>0)); -#if HTS_ANALYSTE if ((!busy_recv) && (!busy_state)) { if (max_loop_chk>=1) { Sleep(10); // un tite pause pour éviter les lag.. } } -#endif } int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { @@ -3338,7 +3641,7 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { } /* vérifier taille classique (heml et non html) */ - if ((istoobig(size_to_test,eback->maxfile_html,eback->maxfile_nonhtml,eback->r.contenttype))) { + if ((istoobig(opt,size_to_test,eback->maxfile_html,eback->maxfile_nonhtml,eback->r.contenttype))) { return 0; /* interdit */ } } @@ -3348,14 +3651,14 @@ int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize) { int back_checkmirror(httrackp* opt) { // Check max time if ((opt->maxsite>0) && (HTS_STAT.stat_bytes >= opt->maxsite)) { - if (opt->errlog) { - fprintf(opt->errlog,"More than "LLintP" bytes have been transfered.. giving up"LF,(LLint)opt->maxsite); + if (opt->log) { + fprintf(opt->log,"More than "LLintP" bytes have been transfered.. giving up"LF,(LLint)opt->maxsite); test_flush; } return 0; } else if ((opt->maxtime>0) && ((time_local()-HTS_STAT.stat_timestart)>opt->maxtime)) { - if (opt->errlog) { - fprintf(opt->errlog,"More than %d seconds passed.. giving up"LF,opt->maxtime); + if (opt->log) { + fprintf(opt->log,"More than %d seconds passed.. giving up"LF,opt->maxtime); test_flush; } return 0; @@ -3372,16 +3675,20 @@ LLint back_transfered(LLint nb,struct_back* sback) { // ajouter octets en instance for(i=0;i<back_max;i++) if ((back[i].status>0) && (back[i].status<99 || back[i].status>=1000)) - nb+=back[i].r.size; + nb += back[i].r.size; // stored (ready) slots if (sback->ready != NULL) { - struct_inthash_enum e = inthash_enum_new((inthash)sback->ready); +#ifndef HTS_NO_BACK_ON_DISK + nb += sback->ready_size_bytes; +#else + struct_inthash_enum e = inthash_enum_new(sback->ready); inthash_chain* item; while((item = inthash_enum_next(&e))) { lien_back* ritem = (lien_back*) item->value.ptr; if ((ritem->status>0) && (ritem->status<99 || ritem->status>=1000)) - nb+=ritem->r.size; + nb += ritem->r.size; } +#endif } return nb; } @@ -3410,36 +3717,27 @@ void back_infostr(struct_back* sback,int i,int j,char* s) { if (back[i].status>=0) { int aff=0; if (j & 1) { - if (back[i].status==100) { + if (back[i].status==STATUS_CONNECTING) { strcatbuff(s,"CONNECT "); - } else if (back[i].status==99) { + } else if (back[i].status==STATUS_WAIT_HEADERS) { strcatbuff(s,"INFOS "); aff=1; - } else if (back[i].status==98 || back[i].status==97) { + } else if (back[i].status==STATUS_CHUNK_WAIT || back[i].status==STATUS_CHUNK_CR) { strcatbuff(s,"INFOSC"); // infos chunk aff=1; } else if (back[i].status>0) { -#if HTS_ANALYSTE==2 - strcatbuff(s,"WAIT "); -#else strcatbuff(s,"RECEIVE "); -#endif aff=1; } } if (j & 2) { - if (back[i].status==0) { + if (back[i].status==STATUS_READY) { switch (back[i].r.statuscode) { case 200: strcatbuff(s,"READY "); aff=1; break; -#if HTS_ANALYSTE==2 - default: - strcatbuff(s,"ERROR "); - break; -#else case -1: strcatbuff(s,"ERROR "); aff=1; @@ -3475,7 +3773,6 @@ void back_infostr(struct_back* sback,int i,int j,char* s) { strcatbuff(s,s2); } aff=1; -#endif } } } diff --git a/src/htsback.h b/src/htsback.h index 90c36db..57751d1 100644 --- a/src/htsback.h +++ b/src/htsback.h @@ -39,19 +39,34 @@ Please visit our Website: http://www.httrack.com #define HTSBACK_DEFH #include "htsglobal.h" -#include "htsbasenet.h" -#include "htscore.h" - -typedef enum BackStatusCode { - STATUSCODE_INVALID = -1, - STATUSCODE_TIMEOUT = -2, - STATUSCODE_SLOW = -3, - STATUSCODE_CONNERROR = -4, - STATUSCODE_NON_FATAL = -5, - STATUSCODE_SSL_HANDSHAKE = -6, - STATUSCODE_TOO_BIG = -7, - STATUSCODE_TEST_OK = -10 -} BackStatusCode; + +#if HTS_XGETHOST +#if USE_BEGINTHREAD +#include "htsthread.h" +#endif +#endif + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE @@ -63,17 +78,17 @@ void back_free(struct_back** sback); // backing #define BACK_ADD_TEST "(dummy)" #define BACK_ADD_TEST2 "(dummy2)" -int back_index(struct_back* sback,char* adr,char* fil,char* sav); +int back_index(httrackp* opt, struct_back* sback,char* adr,char* fil,char* sav); int back_available(struct_back* sback); LLint back_incache(struct_back* sback); int back_done_incache(struct_back* sback); -HTS_INLINE int back_exist(struct_back* sback,char* adr,char* fil,char* sav); +HTS_INLINE int back_exist(struct_back* sback,httrackp* opt,char* adr,char* fil,char* sav); int back_nsoc(struct_back* sback); int back_nsoc_overall(struct_back* sback); -int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr); -int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test,int* pass2_ptr); +int back_add(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test); +int back_add_if_not_exists(struct_back* sback,httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* referer_adr,char* referer_fil,int test); int back_stack_available(struct_back* sback); -int back_search(httrackp* opt, cache_back* cache, struct_back* sback); +int back_search(httrackp* opt, struct_back* sback); int back_search_quick(struct_back* sback); void back_clean(httrackp* opt,cache_back* cache,struct_back* sback); int back_cleanup_background(httrackp* opt,cache_back* cache,struct_back* sback); @@ -83,10 +98,15 @@ int back_searchlive(httrackp* opt, struct_back* sback, char* search_addr); void back_connxfr(htsblk* src, htsblk* dst); void back_move(lien_back* src, lien_back* dst); void back_copy_static(const lien_back* src, lien_back* dst); +int back_serialize(FILE *fp, const lien_back* src); +int back_unserialize(FILE *fp, lien_back** dst); void back_set_finished(struct_back* sback, int p); +void back_set_locked(struct_back* sback, int p); +void back_set_unlocked(struct_back* sback, int p); int back_delete(httrackp* opt,cache_back* cache,struct_back* sback,int p); +void back_index_unlock(struct_back* sback, int p); +int back_clear_entry(lien_back* back); int back_flush_output(httrackp* opt, cache_back* cache, struct_back* sback, int p); -int back_set_passe2_ptr(httrackp* opt, cache_back* cache, struct_back* sback, int p, int* pass2_ptr); void back_delete_all(httrackp* opt, cache_back* cache, struct_back* sback); int back_maydelete(httrackp* opt, cache_back* cache, struct_back* sback, int p); void back_maydeletehttp(httrackp* opt, cache_back* cache, struct_back* sback, int p); @@ -97,15 +117,15 @@ void back_infostr(struct_back* sback,int i,int j,char* s); LLint back_transfered(LLint add,struct_back* sback); // hostback #if HTS_XGETHOST -void back_solve(lien_back* sback); -int host_wait(lien_back* sback); +void back_solve(httrackp *opt,lien_back* sback); +int host_wait(httrackp *opt, lien_back* sback); #endif int back_checksize(httrackp* opt,lien_back* eback,int check_only_totalsize); int back_checkmirror(httrackp* opt); #if HTS_XGETHOST #if USE_BEGINTHREAD -PTHREAD_TYPE PTHREAD_TYPE_FNC Hostlookup(void* iadr_p); +void Hostlookup(void* iadr_p); #endif #endif diff --git a/src/htsbase.h b/src/htsbase.h index 7faec95..e3aec57 100644 --- a/src/htsbase.h +++ b/src/htsbase.h @@ -43,6 +43,7 @@ extern "C" { #endif #include "htsglobal.h" +#include "htsstrings.h" #include <string.h> #include <time.h> @@ -50,7 +51,7 @@ extern "C" { #ifdef HAVE_UNISTD_H #include <unistd.h> #endif -#ifdef HAVE_SYS_TYPES_H +#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) ) #include <sys/types.h> #endif #ifdef HAVE_SYS_STAT_H @@ -60,16 +61,25 @@ extern "C" { #include <dlfcn.h> #endif -#ifndef _WIN32 #include <errno.h> -#endif -#if HTS_WIN +#ifdef _WIN32 #else #include <fcntl.h> #endif #include <assert.h> +/* GCC extension */ +#ifndef HTS_UNUSED +#ifdef __GNUC__ +#define HTS_UNUSED __attribute__ ((unused)) +#define HTS_STATIC static __attribute__ ((unused)) +#else +#define HTS_UNUSED +#define HTS_STATIC static +#endif +#endif + #undef min #undef max #define min(a,b) ((a)>(b)?(b):(a)) @@ -149,6 +159,35 @@ extern HTSEXT_API t_abortLog abortLog__; } \ } while(0) +/* logging */ +typedef enum { + LOG_DEBUG, + LOG_INFO, + LOG_WARNING, + LOG_ERROR, + LOG_PANIC +} HTS_LogType; +#define HTS_LOG(OPT,TYPE) do { \ + int last_errno = errno; \ + switch(TYPE) { \ + case LOG_DEBUG: \ + fspc(OPT,(OPT)->log, "debug"); \ + break; \ + case LOG_INFO: \ + fspc(OPT,(OPT)->log, "info"); \ + break; \ + case LOG_WARNING: \ + fspc(OPT,(OPT)->log, "warning"); \ + break; \ + case LOG_ERROR: \ + fspc(OPT,(OPT)->log, "error"); \ + break; \ + case LOG_PANIC: \ + fspc(OPT,(OPT)->log, "panic"); \ + break; \ + } \ + errno = last_errno; \ +} while(0) /* regular malloc's() */ #ifndef HTS_TRACE_MALLOC @@ -194,6 +233,7 @@ extern HTSEXT_API int htsMemoryFastXfr; /* */ +#define stringdup() #ifdef STRDEBUG diff --git a/src/htsbasenet.h b/src/htsbasenet.h index f2a6c53..26f1392 100644 --- a/src/htsbasenet.h +++ b/src/htsbasenet.h @@ -31,14 +31,14 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ /* File: Basic net definitions */ -/* Used in .c and .h files that needs T_SOC and so */ +/* Used in .c and .h files that needs hostent and so */ /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ #ifndef HTS_DEFBASENETH #define HTS_DEFBASENETH -#if HTS_WIN +#ifdef _WIN32 #if HTS_INET6==0 #include <winsock2.h> @@ -57,13 +57,11 @@ Please visit our Website: http://www.httrack.com #endif -typedef SOCKET T_SOC; - typedef struct hostent FAR t_hostent; +typedef struct hostent FAR t_hostent; #else #define HTS_USESCOPEID #define INVALID_SOCKET -1 - typedef int T_SOC; typedef struct hostent t_hostent; #endif @@ -178,4 +176,76 @@ extern SSL_CTX *openssl_ctx; #endif #endif +/** RFC2616 status-codes ('statuscode' member of htsblk) **/ +typedef enum HTTPStatusCode { + HTTP_CONTINUE = 100, + HTTP_SWITCHING_PROTOCOLS = 101, + HTTP_OK = 200, + HTTP_CREATED = 201, + HTTP_ACCEPTED = 202, + HTTP_NON_AUTHORITATIVE_INFORMATION = 203, + HTTP_NO_CONTENT = 204, + HTTP_RESET_CONTENT = 205, + HTTP_PARTIAL_CONTENT = 206, + HTTP_MULTIPLE_CHOICES = 300, + HTTP_MOVED_PERMANENTLY = 301, + HTTP_FOUND = 302, + HTTP_SEE_OTHER = 303, + HTTP_NOT_MODIFIED = 304, + HTTP_USE_PROXY = 305, + HTTP_TEMPORARY_REDIRECT = 307, + HTTP_BAD_REQUEST = 400, + HTTP_UNAUTHORIZED = 401, + HTTP_PAYMENT_REQUIRED = 402, + HTTP_FORBIDDEN = 403, + HTTP_NOT_FOUND = 404, + HTTP_METHOD_NOT_ALLOWED = 405, + HTTP_NOT_ACCEPTABLE = 406, + HTTP_PROXY_AUTHENTICATION_REQUIRED = 407, + HTTP_REQUEST_TIME_OUT = 408, + HTTP_CONFLICT = 409, + HTTP_GONE = 410, + HTTP_LENGTH_REQUIRED = 411, + HTTP_PRECONDITION_FAILED = 412, + HTTP_REQUEST_ENTITY_TOO_LARGE = 413, + HTTP_REQUEST_URI_TOO_LARGE = 414, + HTTP_UNSUPPORTED_MEDIA_TYPE = 415, + HTTP_REQUESTED_RANGE_NOT_SATISFIABLE = 416, + HTTP_EXPECTATION_FAILED = 417, + HTTP_INTERNAL_SERVER_ERROR = 500, + HTTP_NOT_IMPLEMENTED = 501, + HTTP_BAD_GATEWAY = 502, + HTTP_SERVICE_UNAVAILABLE = 503, + HTTP_GATEWAY_TIME_OUT = 504, + HTTP_HTTP_VERSION_NOT_SUPPORTED = 505 +} HTTPStatusCode; + +/** Internal HTTrack status-codes ('statuscode' member of htsblk) **/ +typedef enum BackStatusCode { + STATUSCODE_INVALID = -1, + STATUSCODE_TIMEOUT = -2, + STATUSCODE_SLOW = -3, + STATUSCODE_CONNERROR = -4, + STATUSCODE_NON_FATAL = -5, + STATUSCODE_SSL_HANDSHAKE = -6, + STATUSCODE_TOO_BIG = -7, + STATUSCODE_TEST_OK = -10 +} BackStatusCode; + +/** HTTrack status ('status' member of of 'lien_back') **/ +typedef enum HTTrackStatus { + STATUS_ALIVE = -103, + STATUS_FREE = -1, + STATUS_READY = 0, + STATUS_TRANSFER = 1, + STATUS_CHUNK_CR = 97, + STATUS_CHUNK_WAIT = 98, + STATUS_WAIT_HEADERS = 99, + STATUS_CONNECTING = 100, + STATUS_WAIT_DNS = 101, + STATUS_SSL_WAIT_HANDSHAKE = 102, + STATUS_FTP_TRANSFER = 1000, + STATUS_FTP_READY = 1001 +} HTTrackStatus; + #endif diff --git a/src/htsbauth.c b/src/htsbauth.c index cdc7f1c..df47d13 100644 --- a/src/htsbauth.c +++ b/src/htsbauth.c @@ -44,14 +44,13 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htslib.h" -#include "htsnostatic.h" - /* END specific definitions */ // gestion des cookie // ajoute, dans l'ordre // !=0 : erreur int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path) { + char buffer[8192]; char* a=cookie->data; char* insert; char cook[16384]; @@ -72,7 +71,7 @@ int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,ch insert=a; // insérer ici while (*a) { - if ( strlen(cookie_get(a,2)) < strlen(path) ) // long. path (le + long est prioritaire) + if ( strlen(cookie_get(buffer, a,2)) < strlen(path) ) // long. path (le + long est prioritaire) a=cookie->data+strlen(cookie->data); // fin else { a=strchr(a,'\n'); // prochain champ @@ -127,20 +126,21 @@ int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path) { // path est aligné à droite et cook_name peut être vide (chercher alors tout cookie) // .doubleclick.net TRUE / FALSE 1999999999 id A char* cookie_find(char* s,char* cook_name,char* domain,char* path) { + char buffer[8192]; char* a=s; while (*a) { int t; if (strnotempty(cook_name)==0) t=1; // accepter par défaut else - t=( strcmp(cookie_get(a,5),cook_name)==0 ); // tester si même nom + t=( strcmp(cookie_get(buffer, a,5),cook_name)==0 ); // tester si même nom if (t) { // même nom ou nom qualconque // - char* chk_dom=cookie_get(a,0); // domaine concerné par le cookie + char* chk_dom=cookie_get(buffer,a,0); // domaine concerné par le cookie if ((int) strlen(chk_dom) <= (int) strlen(domain)) { if ( strcmp(chk_dom,domain+strlen(domain)-strlen(chk_dom))==0 ) { // même domaine // - char* chk_path=cookie_get(a,2); // chemin concerné par le cookie + char* chk_path=cookie_get(buffer,a,2); // chemin concerné par le cookie if ((int) strlen(chk_path) <= (int) strlen(path)) { if (strncmp(path,chk_path,strlen(chk_path))==0 ) { // même chemin return a; @@ -169,11 +169,13 @@ char* cookie_nextfield(char* a) { // lire cookies.txt // lire également (Windows seulement) les *@*.txt (cookies IE copiés) // !=0 : erreur -int cookie_load(t_cookie* cookie,char* fpath,char* name) { +int cookie_load(t_cookie* cookie, const char* fpath, const char* name) { + char catbuff[CATBUFF_SIZE]; + char buffer[8192]; // cookie->data[0]='\0'; // Fusionner d'abord les éventuels cookies IE -#if HTS_WIN +#ifdef _WIN32 { WIN32_FIND_DATAA find; HANDLE h; @@ -185,7 +187,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { do { if (!(find.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )) if (!(find.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM )) { - FILE* fp=fopen(fconcat(fpath,find.cFileName),"rb"); + FILE* fp=fopen(fconcat(catbuff, fpath, find.cFileName),"rb"); if (fp) { char cook_name[256]; char cook_value[1000]; @@ -223,7 +225,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { } fclose(fp); if (cookie_merged) - remove(fconcat(fpath,find.cFileName)); + remove(fconcat(catbuff,fpath,find.cFileName)); } // if fp } } while(FindNextFileA(h,&find)); @@ -234,7 +236,7 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { // Ensuite, cookies.txt { - FILE* fp = fopen(fconcat(fpath,name),"rb"); + FILE* fp = fopen(fconcat(catbuff, fpath, name),"rb"); if (fp) { char BIGSTK line[8192]; while( (!feof(fp)) && (((int) strlen(cookie->data)) < cookie->max_len)) { @@ -246,10 +248,10 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { char path[256]; // chemin (/) char cook_name[256]; // nom cookie (MYCOOK) char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234) - strcpybuff(domain,cookie_get(line,0)); // host - strcpybuff(path,cookie_get(line,2)); // path - strcpybuff(cook_name,cookie_get(line,5)); // name - strcpybuff(cook_value,cookie_get(line,6)); // value + strcpybuff(domain,cookie_get(buffer,line,0)); // host + strcpybuff(path,cookie_get(buffer,line,2)); // path + strcpybuff(cook_name,cookie_get(buffer,line,5)); // name + strcpybuff(cook_value,cookie_get(buffer,line,6)); // value #if DEBUG_COOK printf("%s\n",line); #endif @@ -268,9 +270,10 @@ int cookie_load(t_cookie* cookie,char* fpath,char* name) { // écrire cookies.txt // !=0 : erreur int cookie_save(t_cookie* cookie,char* name) { + char catbuff[CATBUFF_SIZE]; if (strnotempty(cookie->data)) { char BIGSTK line[8192]; - FILE* fp = fopen(fconv(name),"wb"); + FILE* fp = fopen(fconv(catbuff,name),"wb"); if (fp) { char* a=cookie->data; fprintf(fp,"# HTTrack Website Copier Cookie File"LF"# This file format is compatible with Netscape cookies"LF); @@ -318,11 +321,8 @@ void cookie_delete(char* s,int pos) { // renvoie champ param de la chaine cookie_base // ex: cookie_get("ceci est<tab>un<tab>exemple",1) renvoi "un" -char* cookie_get(char* cookie_base,int param) { - char* buffer; - // +char* cookie_get(char *buffer,char* cookie_base,int param) { char * limit; - NOSTATIC_RESERVE(buffer, char, 8192); while(*cookie_base=='\n') cookie_base++; limit = strchr(cookie_base,'\n'); @@ -359,10 +359,11 @@ char* cookie_get(char* cookie_base,int param) { /* déclarer un répertoire comme possédant une authentification propre */ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) { - if (cookie) { + char buffer[HTS_URLMAXSIZE*2]; + if (cookie) { if (!bauth_check(cookie,adr,fil)) { // n'existe pas déja bauth_chain* chain=&cookie->auth; - char* prefix=bauth_prefix(adr,fil); + char* prefix=bauth_prefix(buffer,adr,fil); /* fin de la chaine */ while(chain->next) chain=chain->next; @@ -382,9 +383,10 @@ int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth) { /* tester adr et fil, et retourner authentification si nécessaire */ /* sinon, retourne NULL */ char* bauth_check(t_cookie* cookie,char* adr,char* fil) { + char buffer[HTS_URLMAXSIZE*2]; if (cookie) { bauth_chain* chain=&cookie->auth; - char* prefix=bauth_prefix(adr,fil); + char* prefix=bauth_prefix(buffer,adr,fil); while(chain) { if (strnotempty(chain->prefix)) { if (strncmp(prefix,chain->prefix,strlen(chain->prefix))==0) { @@ -397,10 +399,8 @@ char* bauth_check(t_cookie* cookie,char* adr,char* fil) { return NULL; } -char* bauth_prefix(char* adr,char* fil) { - char* prefix; +char* bauth_prefix(char *prefix,char* adr,char* fil) { char* a; - NOSTATIC_RESERVE(prefix, char, HTS_URLMAXSIZE*2); strcpybuff(prefix,jump_identification(adr)); strcatbuff(prefix,fil); a=strchr(prefix,'?'); diff --git a/src/htsbauth.h b/src/htsbauth.h index 4066ece..2f585da 100644 --- a/src/htsbauth.h +++ b/src/htsbauth.h @@ -40,19 +40,27 @@ Please visit our Website: http://www.httrack.com #define HTSBAUTH_DEFH // robots wizard -typedef struct bauth_chain { +#ifndef HTS_DEF_FWSTRUCT_bauth_chain +#define HTS_DEF_FWSTRUCT_bauth_chain +typedef struct bauth_chain bauth_chain; +#endif +struct bauth_chain { char prefix[1024]; /* www.foo.com/secure/ */ char auth[1024]; /* base-64 encoded user:pass */ struct bauth_chain* next; /* next element */ -} bauth_chain; +}; // buffer pour les cookies et authentification -typedef struct t_cookie { +#ifndef HTS_DEF_FWSTRUCT_t_cookie +#define HTS_DEF_FWSTRUCT_t_cookie +typedef struct t_cookie t_cookie; +#endif +struct t_cookie { int max_len; char data[32768]; bauth_chain auth; -} t_cookie; +}; /* Library internal definictions */ @@ -61,18 +69,18 @@ typedef struct t_cookie { // cookies int cookie_add(t_cookie* cookie,char* cook_name,char* cook_value,char* domain,char* path); int cookie_del(t_cookie* cookie,char* cook_name,char* domain,char* path); -int cookie_load(t_cookie* cookie,char* path,char* name); +int cookie_load(t_cookie* cookie, const char* path, const char* name); int cookie_save(t_cookie* cookie,char* name); void cookie_insert(char* s,char* ins); void cookie_delete(char* s,int pos); -char* cookie_get(char* cookie_base,int param); +char* cookie_get(char *buffer, char* cookie_base,int param); char* cookie_find(char* s,char* cook_name,char* domain,char* path); char* cookie_nextfield(char* a); // basic auth int bauth_add(t_cookie* cookie,char* adr,char* fil,char* auth); char* bauth_check(t_cookie* cookie,char* adr,char* fil); -char* bauth_prefix(char* adr,char* fil); +char* bauth_prefix(char *buffer, char* adr,char* fil); #endif diff --git a/src/htscache.c b/src/htscache.c index de2273c..98ba328 100644 --- a/src/htscache.c +++ b/src/htscache.c @@ -41,19 +41,16 @@ Please visit our Website: http://www.httrack.com #include "htscache.h" /* specific definitions */ -#include "htsbase.h" +#include "htscore.h" #include "htsbasenet.h" #include "htsmd5.h" #include <time.h> #include "htszlib.h" - - -#include "htsnostatic.h" /* END specific definitions */ #undef test_flush -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // routines de mise en cache @@ -113,9 +110,9 @@ with // Nouveau: si != text/html ne stocke que la taille -void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save) { +void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,const char* url_adr,const char* url_fil,const char* url_save) { if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File checked by cache: %s"LF,url_adr); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File checked by cache: %s"LF,url_adr); } // ---stockage en cache--- // stocker dans le cache? @@ -124,7 +121,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // ensure not a temporary filename (should not happend ?!) if (IS_DELAYED_EXT(url_save)) { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log, "aborted cache validation: %s%s still has temporary name %s"LF, url_adr, url_fil, url_save); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log, "aborted cache validation: %s%s still has temporary name %s"LF, url_adr, url_fil, url_save); } return ; } @@ -136,7 +133,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* r->statuscode > 0 #else /* We don't store 5XX errors, because it might be a server problem */ - (r->statuscode==200) /* stocker réponse standard, plus */ + (r->statuscode==HTTP_OK) /* stocker réponse standard, plus */ || (r->statuscode==204) /* no content */ || HTTP_IS_REDIRECT(r->statuscode) /* redirect */ || (r->statuscode==401) /* authorization */ @@ -150,14 +147,14 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // stocker fichiers (et robots.txt) if ( url_save == NULL || (strnotempty(url_save)) || (strcmp(url_fil,"/robots.txt")==0)) { // ajouter le fichier au cache - cache_add(cache,*r,url_adr,url_fil,url_save,opt->all_in_cache,opt->path_html); + cache_add(opt,cache,r,url_adr,url_fil,url_save,opt->all_in_cache,StringBuff(opt->path_html)); // // store a reference NOT to redo the same test zillions of times! // (problem reported by Lars Clausen) // we just store statuscode + location (if any) if (url_save == NULL && r->statuscode / 100 >= 3) { // cached "fast" header doesn't uet exists - if (inthash_read((inthash)cache->cached_tests, concat(url_adr, url_fil), NULL) == 0) { + if (inthash_read(cache->cached_tests, concat(OPT_GET_BUFF(opt), url_adr, url_fil), NULL) == 0) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; sprintf(tempo, "%d", (int)r->statuscode); if (r->location != NULL && r->location[0] != '\0') { @@ -165,9 +162,9 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* strcatbuff(tempo, r->location); } if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log, "Cached fast-header response: %s%s is %d"LF, url_adr, url_fil, (int)r->statuscode); } - inthash_add((inthash)cache->cached_tests, concat(url_adr, url_fil), (long int)strdupt(tempo)); + inthash_add(cache->cached_tests, concat(OPT_GET_BUFF(opt), url_adr, url_fil), (intptr_t)strdupt(tempo)); } } } @@ -178,11 +175,7 @@ void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* // ---fin stockage en cache--- } - - -#if 01 - -/* test only - to be removed */ +#if 1 #define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \ if ( (value != NULL) && (value)[0] != '\0') { \ @@ -224,15 +217,16 @@ struct cache_back_zip_entry { /* Ajout d'un fichier en cache */ -void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache,char* path_prefix) { +void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,const char* url_adr,const char* url_fil,const char* url_save,int all_in_cache,const char* path_prefix) { char BIGSTK filename[HTS_URLMAXSIZE*4]; + char catbuff[CATBUFF_SIZE]; int dataincache=0; // put data in cache ? char BIGSTK headers[8192]; int headersSize = 0; int entryBodySize = 0; int entryFilenameSize = 0; zip_fileinfo fi; - char* url_save_suffix = url_save; + const char* url_save_suffix = url_save; int zErr; // robots.txt hack @@ -248,8 +242,8 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ } /* Data in cache ? */ - if (is_hypertext_mime(r.contenttype, url_fil) - || (may_be_hypertext_mime(r.contenttype, url_fil) && r.adr != NULL) + if (is_hypertext_mime(opt,r->contenttype, url_fil) + || (may_be_hypertext_mime(opt,r->contenttype, url_fil) && r->adr != NULL) ) { dataincache=1; @@ -258,13 +252,13 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ } } - if (r.size < 0) // error + if (r->size < 0) // error return; // data in cache if (dataincache) { - assertf(((int) r.size) == r.size); - entryBodySize = (int) r.size; + assertf(((int) r->size) == r->size); + entryBodySize = (int) r->size; } /* Fields */ @@ -272,14 +266,14 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ headersSize = 0; /* */ { - char* message; - if (strlen(r.msg) < 32) { - message = r.msg; + const char* message; + if (strlen(r->msg) < 32) { + message = r->msg; } else { message = "(See X-StatusMessage)"; } /* 64 characters MAX for first line */ - sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r.statuscode, r.msg); + sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', r->statuscode, r->msg); } headersSize += (int) strlen(headers + headersSize); @@ -292,15 +286,15 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /* Second line MUST ALWAYS be X-In-Cache */ ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", dataincache); - ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r.statuscode); - ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r.msg); - ZIP_FIELD_INT(headers, headersSize, "X-Size", r.size); // size - ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r.contenttype); // contenttype - ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r.charset); // contenttype - ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r.lastmodified); // last-modified - ZIP_FIELD_STRING(headers, headersSize, "Etag", r.etag); // Etag - ZIP_FIELD_STRING(headers, headersSize, "Location", r.location); // 'location' pour moved - ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r.cdispo); // Content-disposition + ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", r->statuscode); + ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", r->msg); + ZIP_FIELD_INT(headers, headersSize, "X-Size", r->size); // size + ZIP_FIELD_STRING(headers, headersSize, "Content-Type", r->contenttype); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "X-Charset", r->charset); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", r->lastmodified); // last-modified + ZIP_FIELD_STRING(headers, headersSize, "Etag", r->etag); // Etag + ZIP_FIELD_STRING(headers, headersSize, "Location", r->location); // 'location' pour moved + ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", r->cdispo); // Content-disposition ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename ZIP_FIELD_STRING(headers, headersSize, "X-Save", url_save_suffix); // Original save filename @@ -318,8 +312,9 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /* Time */ memset(&fi, 0, sizeof(fi)); - if (r.lastmodified[0] != '\0') { - struct tm* tm_s=convert_time_rfc822(r.lastmodified); + if (r->lastmodified[0] != '\0') { + struct tm buffer; + struct tm* tm_s=convert_time_rfc822(&buffer, r->lastmodified); if (tm_s) { fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec; fi.tmz_date.tm_min = (uInt) tm_s->tm_min; @@ -352,22 +347,22 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /* Write data in cache */ if (dataincache) { - if (r.is_write == 0) { - if (r.size > 0 && r.adr != NULL) { - if ((zErr = zipWriteInFileInZip((zipFile) cache->zipOutput, r.adr, (int) r.size)) != Z_OK) { + if (r->is_write == 0) { + if (r->size > 0 && r->adr != NULL) { + if ((zErr = zipWriteInFileInZip((zipFile) cache->zipOutput, r->adr, (int) r->size)) != Z_OK) { int zip_zipWriteInFileInZip_failed = 0; assertf(zip_zipWriteInFileInZip_failed); } } } else { FILE* fp; - // On recopie le fichier.. - LLint file_size=fsize(fconv(url_save)); + // On recopie le fichier->. + off_t file_size=fsize(fconv(catbuff, url_save)); if (file_size>=0) { - fp=fopen(fconv(url_save),"rb"); + fp=fopen(fconv(catbuff, url_save),"rb"); if (fp!=NULL) { char BIGSTK buff[32768]; - INTsys nl; + size_t nl; do { nl=fread(buff,1,32768,fp); if (nl>0) { @@ -401,7 +396,7 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ #else /* Ajout d'un fichier en cache */ -void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) { +void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,char* url_adr,char* url_fil,char* url_save,int all_in_cache) { int pos; char s[256]; char BIGSTK buff[HTS_URLMAXSIZE*4]; @@ -422,20 +417,20 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ return; // erreur (sauf robots.txt) } - if (r.size <= 0) // taille <= 0 + if (r->size <= 0) // taille <= 0 return; // refusé.. // Mettre les *donées* en cache ? - if (is_hypertext_mime(r.contenttype, url_fil)) // html, mise en cache des données et + if (is_hypertext_mime(opt,r->contenttype, url_fil)) // html, mise en cache des données et dataincache=1; // pas uniquement de l'en tête else if (all_in_cache) dataincache=1; // forcer tout en cache /* calcul md5 ? */ /* - if (is_hypertext_mime(r.contenttype)) { // html, calcul MD5 - if (r.adr) { - domd5mem(r.adr,r.size,digest,1); + if (is_hypertext_mime(opt,r->contenttype)) { // html, calcul MD5 + if (r->adr) { + domd5mem(r->adr,r->size,digest,1); } }*/ @@ -451,24 +446,24 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ // data // écrire données en-tête, données fichier /*if (!dataincache) { // patcher - r.size=-r.size; // négatif + r->size=-r->size; // négatif }*/ // Construction header ok=0; - if (cache_wint(cache_dat,r.statuscode) != -1 // statuscode - && cache_wLLint(cache_dat,r.size) != -1 // size - && cache_wstr(cache_dat,r.msg) != -1 // msg - && cache_wstr(cache_dat,r.contenttype) != -1 // contenttype - && cache_wstr(cache_dat,r.charset) != -1 // contenttype - && cache_wstr(cache_dat,r.lastmodified) != -1 // last-modified - && cache_wstr(cache_dat,r.etag) != -1 // Etag - && cache_wstr(cache_dat,(r.location!=NULL)?r.location:"") != -1 // 'location' pour moved - && cache_wstr(cache_dat,r.cdispo) != -1 // Content-disposition + if (cache_wint(cache_dat,r->statuscode) != -1 // statuscode + && cache_wLLint(cache_dat,r->size) != -1 // size + && cache_wstr(cache_dat,r->msg) != -1 // msg + && cache_wstr(cache_dat,r->contenttype) != -1 // contenttype + && cache_wstr(cache_dat,r->charset) != -1 // contenttype + && cache_wstr(cache_dat,r->lastmodified) != -1 // last-modified + && cache_wstr(cache_dat,r->etag) != -1 // Etag + && cache_wstr(cache_dat,(r->location!=NULL)?r->location:"") != -1 // 'location' pour moved + && cache_wstr(cache_dat,r->cdispo) != -1 // Content-disposition && cache_wstr(cache_dat,url_adr) != -1 // Original address && cache_wstr(cache_dat,url_fil) != -1 // Original URI filename && cache_wstr(cache_dat,url_save) != -1 // Original save filename - && cache_wstr(cache_dat,r.headers) != -1 // Full HTTP Headers + && cache_wstr(cache_dat,r->headers) != -1 // Full HTTP Headers && cache_wstr(cache_dat,"HTS") != -1 // end of header ) { ok=1; /* ok */ @@ -478,13 +473,13 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ /*if ((int) fwrite((char*) &r,1,sizeof(htsblk),cache_dat) == sizeof(htsblk)) {*/ if (ok) { if (dataincache) { // mise en cache? - if (!r.adr) { /* taille nulle (parfois en cas de 301 */ + if (!r->adr) { /* taille nulle (parfois en cas de 301 */ if (cache_wLLint(cache_dat,0)==-1) /* 0 bytes */ ok=0; - } else if (r.is_write==0) { // en mémoire, recopie directe - if (cache_wLLint(cache_dat,r.size)!=-1) { - if (r.size>0) { // taille>0 - if (fwrite(r.adr,1,(INTsys)r.size,cache_dat)!=r.size) + } else if (r->is_write==0) { // en mémoire, recopie directe + if (cache_wLLint(cache_dat,r->size)!=-1) { + if (r->size>0) { // taille>0 + if (fwrite(r->adr,1,r->size,cache_dat)!=r->size) ok=0; } else // taille=0, ne rien écrire ok=0; @@ -492,18 +487,18 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ ok=0; } else { // recopier fichier dans cache FILE* fp; - // On recopie le fichier.. - LLint file_size=fsize(fconv(url_save)); + // On recopie le fichier->. + off_t file_size=fsize(fconv(catbuff, url_save)); if (file_size>=0) { if (cache_wLLint(cache_dat,file_size)!=-1) { - fp=fopen(fconv(url_save),"rb"); + fp=fopen(fconv(catbuff, url_save),"rb"); if (fp!=NULL) { char BIGSTK buff[32768]; - INTsys nl; + ssize_t nl; do { nl=fread(buff,1,32768,fp); if (nl>0) { - if ((INTsys)fwrite(buff,1,(INTsys)nl,cache_dat)!=nl) { // erreur + if (fwrite(buff,1,nl,cache_dat)!=nl) { // erreur nl=-1; ok=0; } @@ -520,7 +515,7 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ } } else ok=0; /*if (!dataincache) { // dépatcher - r.size=-r.size; + r->size=-r->size; }*/ // index @@ -538,23 +533,23 @@ void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_ #endif -htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { +htsblk cache_read(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location) { return cache_readex(opt,cache,adr,fil,save,location,NULL,0); } -htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location) { +htsblk cache_read_ro(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location) { return cache_readex(opt,cache,adr,fil,save,location,NULL,1); } -static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_old(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly); -static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly); // lecture d'un fichier dans le cache // si save==null alors test unqiquement -htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +htsblk cache_readex(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly) { if (cache->zipInput != NULL) { return cache_readex_new(opt, cache, adr, fil, save, location, return_save, readonly); @@ -565,13 +560,14 @@ htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* sa // lecture d'un fichier dans le cache // si save==null alors test unqiquement -static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_new(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly) { char BIGSTK location_default[HTS_URLMAXSIZE*2]; char BIGSTK buff[HTS_URLMAXSIZE*2]; char BIGSTK previous_save[HTS_URLMAXSIZE*2]; char BIGSTK previous_save_[HTS_URLMAXSIZE*2]; - long int hash_pos; + char catbuff[CATBUFF_SIZE]; + intptr_t hash_pos; int hash_pos_return; htsblk r; memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; @@ -586,7 +582,7 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.location, ""); strcpybuff(buff, adr); strcatbuff(buff,fil); - hash_pos_return = inthash_read((inthash)cache->hashtable, buff, (long int*)&hash_pos); + hash_pos_return = inthash_read(cache->hashtable, buff, &hash_pos); /* avoid errors on data entries */ if (adr[0] == '/' && adr[1] == '/' && adr[2] == '[') { #if HTS_FAST_CACHE @@ -656,9 +652,9 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f /* Previous entry */ if (previous_save_[0] != '\0') { - int pathLen = (int) strlen(opt->path_html); - if (pathLen != 0 && strncmp(previous_save_, opt->path_html, pathLen) != 0) { // old (<3.40) buggy format - sprintf(previous_save, "%s%s", opt->path_html, previous_save_); + int pathLen = (int) strlen(StringBuff(opt->path_html)); + if (pathLen != 0 && strncmp(previous_save_, StringBuff(opt->path_html), pathLen) != 0) { // old (<3.40) buggy format + sprintf(previous_save, "%s%s", StringBuff(opt->path_html), previous_save_); } else { strcpy(previous_save, previous_save_); } @@ -684,28 +680,28 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.msg,"Cache Read Error : Unexpected error"); } } - else if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement + else if (!readonly && r.statuscode==HTTP_OK && !is_hypertext_mime(opt,r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement r.is_write=1; // écrire if (!dataincache) { - if (fexist(fconv(save))) { // un fichier existe déja + if (fexist(fconv(catbuff, save))) { // un fichier existe déja //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) ok=1; // plus rien à faire - filenote(save,NULL); // noter comme connu - file_notify(adr, fil, save, 0, 0, 1); // data in cache + filenote(&opt->state.strc,save,NULL); // noter comme connu + file_notify(opt,adr, fil, save, 0, 0, 1); // data in cache } } if (!dataincache && !ok) { // Pas de donnée en cache et fichier introuvable : erreur! if (opt->norecatch) { - file_notify(adr, fil, save, 1, 0, 0); - filecreateempty(save); + file_notify(opt,adr, fil, save, 1, 0, 0); + filecreateempty(&opt->state.strc, save); // r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"File deleted by user not recaught"); ok=1; // ne pas récupérer (et pas d'erreur) } else { - file_notify(adr, fil, save, 1, 1, 0); + file_notify(opt,adr, fil, save, 1, 1, 0); r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"Previous cache file not found"); ok=1; // ne pas récupérer @@ -713,8 +709,8 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f } if (!ok) { // load from cache - file_notify(adr, fil, save, 1, 1, 1); // data in cache - r.out=filecreate(save); + file_notify(opt,adr, fil, save, 1, 1, 1); // data in cache + r.out=filecreate(&opt->state.strc, save); #if HDEBUG printf("direct-disk: %s\n",save); #endif @@ -722,14 +718,15 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f char BIGSTK buff[32768+4]; LLint size = r.size; if (size > 0) { - INTsys nl; + size_t nl; do { nl = unzReadCurrentFile((unzFile) cache->zipInput, buff, (int)minimum(size, 32768)); if (nl>0) { size-=nl; - if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur + if (fwrite(buff,1,nl,r.out)!=nl) { // erreur + int last_errno = errno; r.statuscode=STATUSCODE_INVALID; - sprintf(r.msg,"Cache Read Error : Read To Disk: %s", strerror(errno)); + sprintf(r.msg,"Cache Read Error : Read To Disk: %s", strerror(last_errno)); } } } while((nl>0) && (size>0) && (r.statuscode!=-1)); @@ -737,7 +734,7 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f fclose(r.out); r.out=NULL; -#if HTS_WIN==0 +#ifndef _WIN32 chmod(save,HTS_ACCESS_FILE); #endif } else { @@ -757,13 +754,14 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.msg,"Previous cache file not found (2)"); } else { /* Read in memory from cache */ if (strnotempty(previous_save) && fexist(previous_save)) { - FILE* fp = fopen(fconv(previous_save), "rb"); + FILE* fp = fopen(fconv(catbuff, previous_save), "rb"); if (fp != NULL) { - r.adr=(char*) malloct((INTsys)r.size + 4); + r.adr = (char*) malloct((int) r.size + 4); if (r.adr != NULL) { - if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) { + if (r.size > 0 && fread(r.adr, 1, (int) r.size, fp) != r.size) { + int last_errno = errno; r.statuscode=STATUSCODE_INVALID; - sprintf(r.msg,"Read error in cache disk data: %s", strerror(errno)); + sprintf(r.msg,"Read error in cache disk data: %s", strerror(last_errno)); } } else { r.statuscode=STATUSCODE_INVALID; @@ -778,9 +776,9 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f } } else { // lire fichier (d'un coup) - r.adr=(char*) malloct((INTsys)r.size+4); + r.adr = (char*) malloct((int) r.size+4); if (r.adr!=NULL) { - if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (INTsys)r.size) != r.size) { // erreur + if (unzReadCurrentFile((unzFile) cache->zipInput, r.adr, (int) r.size) != r.size) { // erreur freet(r.adr); r.adr=NULL; r.statuscode=STATUSCODE_INVALID; @@ -824,10 +822,10 @@ static htsblk cache_readex_new(httrackp* opt,cache_back* cache,char* adr,char* f // lecture d'un fichier dans le cache // si save==null alors test unqiquement -static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location, +static htsblk cache_readex_old(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location, char* return_save, int readonly) { #if HTS_FAST_CACHE - long int hash_pos; + intptr_t hash_pos; int hash_pos_return; #else char* a; @@ -835,6 +833,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f char BIGSTK buff[HTS_URLMAXSIZE*2]; char BIGSTK location_default[HTS_URLMAXSIZE*2]; char BIGSTK previous_save[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; htsblk r; int ok=0; int header_only=0; @@ -848,7 +847,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.location, ""); #if HTS_FAST_CACHE strcpybuff(buff,adr); strcatbuff(buff,fil); - hash_pos_return=inthash_read((inthash)cache->hashtable,buff,(long int*)&hash_pos); + hash_pos_return=inthash_read(cache->hashtable,buff,&hash_pos); #else buff[0]='\0'; strcatbuff(buff,"\n"); strcatbuff(buff,adr); strcatbuff(buff,"\n"); strcatbuff(buff,fil); strcatbuff(buff,"\n"); if (cache->use) @@ -872,13 +871,13 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f #else if (a!=NULL) { // OK existe en cache! #endif - INTsys pos; + intptr_t pos; #if DEBUGCA fprintf(stdout,"..cache: %s%s at ",adr,fil); #endif #if HTS_FAST_CACHE - pos=hash_pos; /* simply */ + pos = hash_pos; /* simply */ #else a+=strlen(buff); sscanf(a,"%d",&pos); // lire position @@ -888,7 +887,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f #endif fflush(cache->olddat); - if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) { + if (fseek(cache->olddat,(long)((pos>0)?pos:(-pos)),SEEK_SET) == 0) { /* Importer cache1.0 */ if (cache->version==0) { OLD_htsblk old_r; @@ -938,7 +937,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f if (size_read>0) { /* si inscrite ici */ r.size=size_read; } else { /* pas de données directement dans le cache, fichier présent? */ - if (r.statuscode!=200) + if (r.statuscode!=HTTP_OK) header_only=1; /* que l'en tête ici! */ } } @@ -966,22 +965,22 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f #if HTS_DIRECTDISK // Court-circuit: // Peut-on stocker le fichier directement sur disque? - if (!readonly && r.statuscode==200 && !is_hypertext_mime(r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement + if (!readonly && r.statuscode==HTTP_OK && !is_hypertext_mime(opt,r.contenttype, fil) && strnotempty(save)) { // pas HTML, écrire sur disk directement int ok=0; r.is_write=1; // écrire - if (fexist(fconv(save))) { // un fichier existe déja + if (fexist(fconv(catbuff, save))) { // un fichier existe déja //if (fsize(fconv(save))==r.size) { // même taille -- NON tant pis (taille mal declaree) ok=1; // plus rien à faire - filenote(save,NULL); // noter comme connu - file_notify(adr, fil, save, 0, 0, 0); + filenote(&opt->state.strc,save,NULL); // noter comme connu + file_notify(opt,adr, fil, save, 0, 0, 0); //} } if ((pos<0) && (!ok)) { // Pas de donnée en cache et fichier introuvable : erreur! if (opt->norecatch) { - file_notify(adr, fil, save, 1, 0, 0); - filecreateempty(save); + file_notify(opt,adr, fil, save, 1, 0, 0); + filecreateempty(&opt->state.strc, save); // r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"File deleted by user not recaught"); @@ -994,20 +993,20 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f } if (!ok) { - r.out=filecreate(save); + r.out=filecreate(&opt->state.strc, save); #if HDEBUG printf("direct-disk: %s\n",save); #endif if (r.out!=NULL) { char BIGSTK buff[32768+4]; - LLint size = r.size; + size_t size = (size_t) r.size; if (size > 0) { - INTsys nl; + size_t nl; do { - nl=fread(buff,1,(INTsys) minimum(size,32768),cache->olddat); + nl=fread(buff,1,minimum(size,32768),cache->olddat); if (nl>0) { size-=nl; - if ((INTsys)fwrite(buff,1,(INTsys)nl,r.out)!=nl) { // erreur + if (fwrite(buff,1,nl,r.out)!=nl) { // erreur r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"Cache Read Error : Read To Disk"); } @@ -1017,7 +1016,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f fclose(r.out); r.out=NULL; -#if HTS_WIN==0 +#ifndef _WIN32 chmod(save,HTS_ACCESS_FILE); #endif } else { @@ -1037,11 +1036,11 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f strcpybuff(r.msg,"Previous cache file not found (2)"); } else { /* Read in memory from cache */ if (strnotempty(return_save) && fexist(return_save)) { - FILE* fp = fopen(fconv(return_save), "rb"); + FILE* fp = fopen(fconv(catbuff, return_save), "rb"); if (fp != NULL) { - r.adr=(char*) malloct((INTsys)r.size + 4); + r.adr = (char*) malloct((size_t)r.size + 4); if (r.adr != NULL) { - if (r.size > 0 && fread(r.adr, 1, (INTsys) r.size, fp) != r.size) { + if (r.size > 0 && fread(r.adr, 1, (size_t)r.size, fp) != r.size) { r.statuscode=STATUSCODE_INVALID; strcpybuff(r.msg,"Read error in cache disk data"); } @@ -1058,9 +1057,9 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f } } else { // lire fichier (d'un coup) - r.adr=(char*) malloct((INTsys)r.size+4); + r.adr=(char*) malloct((size_t)r.size+4); if (r.adr!=NULL) { - if (fread(r.adr,1,(INTsys)r.size,cache->olddat)!=r.size) { // erreur + if (fread(r.adr,1,(size_t)r.size,cache->olddat)!=r.size) { // erreur freet(r.adr); r.adr=NULL; r.statuscode=STATUSCODE_INVALID; @@ -1111,7 +1110,7 @@ static htsblk cache_readex_old(httrackp* opt,cache_back* cache,char* adr,char* f /* write (string1-string2)-data in cache */ /* 0 if failed */ -int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len) { +int cache_writedata(FILE* cache_ndx,FILE* cache_dat,const char* str1,const char* str2,char* outbuff,int len) { if (cache_dat) { char BIGSTK buff[HTS_URLMAXSIZE*4]; char s[256]; @@ -1120,12 +1119,12 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* pos=ftell(cache_dat); /* first write data */ if (cache_wint(cache_dat,len)!=-1) { // length - if ((INTsys)fwrite(outbuff,1,(INTsys)len,cache_dat) == (INTsys) len) { // data + if (fwrite(outbuff,1,len,cache_dat) == len) { // data /* then write index */ sprintf(s,"%d\n",pos); buff[0]='\0'; strcatbuff(buff,str1); strcatbuff(buff,"\n"); strcatbuff(buff,str2); strcatbuff(buff,"\n"); cache_wstr(cache_ndx,buff); - if (fwrite(s,1,(INTsys)strlen(s),cache_ndx) == strlen(s)) { + if (fwrite(s,1,strlen(s),cache_ndx) == strlen(s)) { fflush(cache_dat); fflush(cache_ndx); return 1; } @@ -1137,20 +1136,20 @@ int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* /* read the data corresponding to (string1-string2) in cache */ /* 0 if failed */ -int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* inlen) { +int cache_readdata(cache_back* cache,const char* str1,const char* str2,char** inbuff,int* inlen) { #if HTS_FAST_CACHE if (cache->hashtable) { char BIGSTK buff[HTS_URLMAXSIZE*4]; - long int pos; + intptr_t pos; strcpybuff(buff,str1); strcatbuff(buff,str2); - if (inthash_read((inthash)cache->hashtable,buff,(long int*)&pos)) { - if (fseek(cache->olddat,((pos>0)?pos:(-pos)),SEEK_SET) == 0) { + if (inthash_read(cache->hashtable,buff,&pos)) { + if (fseek(cache->olddat,(long)((pos>0)?pos:(-pos)),SEEK_SET) == 0) { INTsys len; cache_rint(cache->olddat,&len); if (len>0) { char* mem_buff=(char*)malloct(len+4); /* Plus byte 0 */ if (mem_buff) { - if ((INTsys)fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/ + if (fread(mem_buff,1,len,cache->olddat)==len) { // lire tout (y compris statuscode etc)*/ *inbuff=mem_buff; *inlen=len; return 1; @@ -1169,7 +1168,7 @@ int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* in // renvoyer uniquement en tête, ou NULL si erreur // return NULL upon error, and set -1 to r.statuscode -htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r) { +htsblk* cache_header(httrackp* opt,cache_back* cache,const char* adr,const char* fil,htsblk* r) { *r=cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement if (r->statuscode != -1) return r; @@ -1187,52 +1186,52 @@ void cache_init(cache_back* cache,httrackp* opt) { printf("cache init: "); #endif if (!cache->ro) { -#if HTS_WIN - mkdir(fconcat(opt->path_log,"hts-cache")); +#ifdef _WIN32 + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache")); #else - mkdir(fconcat(opt->path_log,"hts-cache"),HTS_PROTECT_FOLDER); + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache"),HTS_PROTECT_FOLDER); #endif - if ((fexist(fconcat(opt->path_log,"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer + if ((fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")))) { // il existe déja un cache précédent.. renommer /* Previous cache from the previous cache version */ #if 0 /* No.. reuse with old httrack releases! */ - if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) - remove(fconcat(opt->path_log,"hts-cache/old.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/old.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); #endif /* Previous cache version */ - if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer - rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); - rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); + if ((fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); } /* Remove OLD cache */ - if (fexist(fconcat(opt->path_log,"hts-cache/old.zip"))) - remove(fconcat(opt->path_log,"hts-cache/old.zip")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); /* Rename */ - rename(fconcat(opt->path_log,"hts-cache/new.zip"),fconcat(opt->path_log,"hts-cache/old.zip")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); } - else if ((fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) && (fexist(fconcat(opt->path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + else if ((fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer #if DEBUGCA printf("work with former cache\n"); #endif - if (fexist(fconcat(opt->path_log,"hts-cache/old.dat"))) - remove(fconcat(opt->path_log,"hts-cache/old.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/old.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/old.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); - rename(fconcat(opt->path_log,"hts-cache/new.dat"),fconcat(opt->path_log,"hts-cache/old.dat")); - rename(fconcat(opt->path_log,"hts-cache/new.ndx"),fconcat(opt->path_log,"hts-cache/old.ndx")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); } else { // un des deux (ou les deux) fichiers cache absents: effacer l'autre éventuel #if DEBUGCA printf("new cache\n"); #endif - if (fexist(fconcat(opt->path_log,"hts-cache/new.dat"))) - remove(fconcat(opt->path_log,"hts-cache/new.dat")); - if (fexist(fconcat(opt->path_log,"hts-cache/new.ndx"))) - remove(fconcat(opt->path_log,"hts-cache/new.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); } } @@ -1240,19 +1239,19 @@ void cache_init(cache_back* cache,httrackp* opt) { if ( ( !cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/old.zip")) > 0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) > 0 ) || ( cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/new.zip")) > 0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) > 0 ) ) { if (!cache->ro) { - cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/old.zip")); + cache->zipInput = unzOpen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); } else { - cache->zipInput = unzOpen(fconcat(opt->path_log,"hts-cache/new.zip")); + cache->zipInput = unzOpen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); } // Corrupted ZIP file ? Try to repair! @@ -1261,30 +1260,30 @@ void cache_init(cache_back* cache,httrackp* opt) { uLong repaired = 0; uLong repairedBytes = 0; if (!cache->ro) { - name = fconcat(opt->path_log,"hts-cache/old.zip"); + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"); } else { - name = fconcat(opt->path_log,"hts-cache/new.zip"); + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"); } if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: damaged cache, trying to repair"LF); fflush(opt->log); } if (unzRepair(name, - fconcat(opt->path_log,"hts-cache/repair.zip"), - fconcat(opt->path_log,"hts-cache/repair.tmp"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.tmp"), &repaired, &repairedBytes ) == Z_OK) { unlink(name); - rename(fconcat(opt->path_log,"hts-cache/repair.zip"), name); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), name); cache->zipInput = unzOpen(name); if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF, + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: %d bytes successfully recovered in %d entries"LF, (int) repairedBytes, (int) repaired); fflush(opt->log); } } else { if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: could not repair the cache"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: could not repair the cache"LF); fflush(opt->log); } } @@ -1338,28 +1337,28 @@ void cache_init(cache_back* cache,httrackp* opt) { } } if (dataincache) - inthash_add((inthash)cache->hashtable, filenameIndex, pos); + inthash_add(cache->hashtable, filenameIndex, pos); else - inthash_add((inthash)cache->hashtable, filenameIndex, -pos); + inthash_add(cache->hashtable, filenameIndex, -pos); } else { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Corrupted cache meta entry #%d"LF, (int)entries); } } } else { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); } } unzCloseCurrentFile((unzFile) cache->zipInput); } else { if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Corrupted cache entry #%d"LF, (int)entries); } } } while( unzGoToNextFile((unzFile) cache->zipInput) == Z_OK ); if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Cache index loaded: %d entries loaded"LF, (int)entries); } opt->is_update=1; // signaler comme update @@ -1370,12 +1369,12 @@ void cache_init(cache_back* cache,httrackp* opt) { } else if ( ( !cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/old.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/old.ndx")) >0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")) >=0 && fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")) >0 ) || ( cache->ro && - fsize(fconcat(opt->path_log,"hts-cache/new.dat")) >=0 && fsize(fconcat(opt->path_log,"hts-cache/new.ndx")) > 0 + fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) >=0 && fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) > 0 ) ) { FILE* oldndx=NULL; @@ -1383,11 +1382,11 @@ void cache_init(cache_back* cache,httrackp* opt) { printf("..load cache\n"); #endif if (!cache->ro) { - cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/old.dat"),"rb"); - oldndx=fopen(fconcat(opt->path_log,"hts-cache/old.ndx"),"rb"); + cache->olddat=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),"rb"); + oldndx=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),"rb"); } else { - cache->olddat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"rb"); - oldndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"rb"); + cache->olddat=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),"rb"); + oldndx=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),"rb"); } // les deux doivent être ouvrables if ((cache->olddat==NULL) && (oldndx!=NULL)) { @@ -1404,11 +1403,11 @@ void cache_init(cache_back* cache,httrackp* opt) { fclose(oldndx); oldndx=NULL; // lire ndx, et lastmodified if (!cache->ro) { - buffl=fsize(fconcat(opt->path_log,"hts-cache/old.ndx")); - cache->use=readfile(fconcat(opt->path_log,"hts-cache/old.ndx")); + buffl=fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); + cache->use=readfile(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); } else { - buffl=fsize(fconcat(opt->path_log,"hts-cache/new.ndx")); - cache->use=readfile(fconcat(opt->path_log,"hts-cache/new.ndx")); + buffl=fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + cache->use=readfile(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); } if (cache->use!=NULL) { char firstline[256]; @@ -1421,9 +1420,9 @@ void cache_init(cache_back* cache,httrackp* opt) { a+=cache_brstr(a,firstline); strcpybuff(cache->lastmodified,firstline); } else { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); - fflush(opt->errlog); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Cache: version 1.%d not supported, ignoring current cache"LF,cache->version); + fflush(opt->log); } fclose(cache->olddat); cache->olddat=NULL; @@ -1431,9 +1430,9 @@ void cache_init(cache_back* cache,httrackp* opt) { cache->use=NULL; } } else { // non supporté - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Cache: %s not supported, ignoring current cache"LF,firstline); - fflush(opt->errlog); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Cache: %s not supported, ignoring current cache"LF,firstline); + fflush(opt->log); } fclose(cache->olddat); cache->olddat=NULL; @@ -1444,7 +1443,7 @@ void cache_init(cache_back* cache,httrackp* opt) { } else { // Vieille version du cache /* */ if (opt->log) { - fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: importing old cache format"LF); fflush(opt->log); } cache->version=0; // cache 1.0 @@ -1468,7 +1467,7 @@ void cache_init(cache_back* cache,httrackp* opt) { /* read position */ a+=binput(a,linepos,200); sscanf(linepos,"%d",&pos); - inthash_add((inthash)cache->hashtable,line,pos); + inthash_add(cache->hashtable,line,pos); } } /* Not needed anymore! */ @@ -1485,44 +1484,46 @@ void cache_init(cache_back* cache,httrackp* opt) { #endif if (!cache->ro) { // ouvrir caches actuels - structcheck(fconcat(opt->path_log, "hts-cache/")); + structcheck(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), "hts-cache/")); if (1) { /* Create ZIP file cache */ - cache->zipOutput = (void*) zipOpen(fconcat(opt->path_log,"hts-cache/new.zip"), 0); + cache->zipOutput = (void*) zipOpen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"), 0); if (cache->zipOutput != NULL) { // supprimer old.lst - if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) - remove(fconcat(opt->path_log,"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) - rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); - // ouvrir - cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); - { - filecreate_params tmp; - strcpybuff(tmp.path,opt->path_html); // chemin - tmp.lst=cache->lst; // fichier lst - filenote("",&tmp); // initialiser filecreate - } - + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); + // ouvrir + cache->lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),"wb"); + strcpybuff(opt->state.strc.path, StringBuff(opt->path_html)); + opt->state.strc.lst = cache->lst; + //{ + //filecreate_params tmp; + //strcpybuff(tmp.path,StringBuff(opt->path_html)); // chemin + //tmp.lst=cache->lst; // fichier lst + //filenote("",&tmp); // initialiser filecreate + //} + // supprimer old.txt - if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) - remove(fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) - rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); // ouvrir - cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + cache->txt=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),"wb"); if (cache->txt) { fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); } } } else { - cache->dat=fopen(fconcat(opt->path_log,"hts-cache/new.dat"),"wb"); - cache->ndx=fopen(fconcat(opt->path_log,"hts-cache/new.ndx"),"wb"); + cache->dat=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"),"wb"); + cache->ndx=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"),"wb"); // les deux doivent être ouvrables if ((cache->dat==NULL) && (cache->ndx!=NULL)) { fclose(cache->ndx); @@ -1546,28 +1547,30 @@ void cache_init(cache_back* cache,httrackp* opt) { fflush(cache->ndx); // un petit fflush au cas où // supprimer old.lst - if (fexist(fconcat(opt->path_log,"hts-cache/old.lst"))) - remove(fconcat(opt->path_log,"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.lst"))) - rename(fconcat(opt->path_log,"hts-cache/new.lst"),fconcat(opt->path_log,"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); // ouvrir - cache->lst=fopen(fconcat(opt->path_log,"hts-cache/new.lst"),"wb"); - { - filecreate_params tmp; - strcpybuff(tmp.path,opt->path_html); // chemin - tmp.lst=cache->lst; // fichier lst - filenote("",&tmp); // initialiser filecreate - } + cache->lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),"wb"); + strcpybuff(opt->state.strc.path, StringBuff(opt->path_html)); + opt->state.strc.lst = cache->lst; + //{ + // filecreate_params tmp; + // strcpybuff(tmp.path,StringBuff(opt->path_html)); // chemin + // tmp.lst=cache->lst; // fichier lst + // filenote("",&tmp); // initialiser filecreate + //} // supprimer old.txt - if (fexist(fconcat(opt->path_log,"hts-cache/old.txt"))) - remove(fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); // renommer - if (fexist(fconcat(opt->path_log,"hts-cache/new.txt"))) - rename(fconcat(opt->path_log,"hts-cache/new.txt"),fconcat(opt->path_log,"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"))) + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_log),"hts-cache/old.txt")); // ouvrir - cache->txt=fopen(fconcat(opt->path_log,"hts-cache/new.txt"),"wb"); + cache->txt=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"),"wb"); if (cache->txt) { fprintf(cache->txt,"date\tsize'/'remotesize\tflags(request:Update,Range state:File response:Modified,Chunked,gZipped)\t"); fprintf(cache->txt,"statuscode\tstatus ('servermsg')\tMIME\tEtag|Date\tURL\tlocalfile\t(from URL)"LF); @@ -1596,17 +1599,18 @@ char* readfile(char* fil) { char* readfile2(char* fil, LLint* size) { char* adr=NULL; + char catbuff[CATBUFF_SIZE]; INTsys len=0; len=fsize(fil); if (len >= 0) { // exists FILE* fp; - fp=fopen(fconv(fil),"rb"); + fp=fopen(fconv(catbuff, fil),"rb"); if (fp!=NULL) { // n'existe pas (!) adr=(char*) malloct(len+1); if (size != NULL) *size = len; if (adr!=NULL) { - if (len > 0 && (INTsys)fread(adr,1,len,fp) != len) { // fichier endommagé ? + if (len > 0 && fread(adr,1,len,fp) != len) { // fichier endommagé ? freet(adr); adr=NULL; } else @@ -1621,8 +1625,9 @@ char* readfile2(char* fil, LLint* size) { char* readfile_or(char* fil,char* defaultdata) { char* realfile=fil; char* ret; + char catbuff[CATBUFF_SIZE]; if (!fexist(fil)) - realfile=fconcat(hts_rootdir(NULL),fil); + realfile=fconcat(catbuff,hts_rootdir(NULL),fil); ret=readfile(realfile); if (ret) return ret; @@ -1638,14 +1643,14 @@ char* readfile_or(char* fil,char* defaultdata) { // écriture/lecture d'une chaîne sur un fichier // -1 : erreur, sinon 0 -int cache_wstr(FILE* fp,char* s) { +int cache_wstr(FILE* fp,const char* s) { INTsys i; char buff[256+4]; - i = s != NULL ? strlen(s) : 0; + i = (s != NULL) ? ((INTsys)strlen(s)) : 0; sprintf(buff,INTsysP "\n",i); - if (fwrite(buff,1,(INTsys)strlen(buff),fp) != strlen(buff)) + if (fwrite(buff,1,strlen(buff),fp) != strlen(buff)) return -1; - if (i > 0 && (INTsys)fwrite(s,1,i,fp) != i) + if (i > 0 && fwrite(s,1,i,fp) != i) return -1; return 0; } diff --git a/src/htscache.h b/src/htscache.h index b80a0ee..7a4bb5c 100644 --- a/src/htscache.h +++ b/src/htscache.h @@ -40,32 +40,49 @@ Please visit our Website: http://www.httrack.com #ifndef HTSCACHE_DEFH #define HTSCACHE_DEFH -#include "htscore.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE +#include "htsglobal.h" + +#include <stdlib.h> + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif + // cache -void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,char* url_adr,char* url_fil,char* url_save); -void cache_add(cache_back* cache,htsblk r,char* url_adr,char* url_fil,char* url_save,int all_in_cache,char* path_prefix); -htsblk cache_read(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); -htsblk cache_read_ro(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location); -htsblk cache_readex(httrackp* opt,cache_back* cache,char* adr,char* fil,char* save,char* location,char* return_save,int readonly); -htsblk* cache_header(httrackp* opt,cache_back* cache,char* adr,char* fil,htsblk* r); +void cache_mayadd(httrackp* opt,cache_back* cache,htsblk* r,const char* url_adr,const char* url_fil,const char* url_save); +void cache_add(httrackp* opt,cache_back* cache,const htsblk *r,const char* url_adr,const char* url_fil,const char* url_save,int all_in_cache,const char* path_prefix); +htsblk cache_read(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location); +htsblk cache_read_ro(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location); +htsblk cache_readex(httrackp* opt,cache_back* cache,const char* adr,const char* fil,const char* save,char* location,char* return_save,int readonly); +htsblk* cache_header(httrackp* opt,cache_back* cache,const char* adr,const char* fil,htsblk* r); void cache_init(cache_back* cache,httrackp* opt); -int cache_writedata(FILE* cache_ndx,FILE* cache_dat,char* str1,char* str2,char* outbuff,int len); -int cache_readdata(cache_back* cache,char* str1,char* str2,char** inbuff,int* len); +int cache_writedata(FILE* cache_ndx,FILE* cache_dat,const char* str1,const char* str2,char* outbuff,int len); +int cache_readdata(cache_back* cache,const char* str1,const char* str2,char** inbuff,int* len); -int cache_wstr(FILE* fp,char* s); void cache_rstr(FILE* fp,char* s); char* cache_rstr_addr(FILE* fp); int cache_brstr(char* adr,char* s); int cache_quickbrstr(char* adr,char* s); int cache_brint(char* adr,int* i); void cache_rint(FILE* fp,int* i); -int cache_wint(FILE* fp,int i); void cache_rLLint(FILE* fp,LLint* i); + +int cache_wstr(FILE* fp,const char* s); +int cache_wint(FILE* fp,int i); int cache_wLLint(FILE* fp,LLint i); #endif diff --git a/src/htscatchurl.c b/src/htscatchurl.c index 3832019..74a2439 100644 --- a/src/htscatchurl.c +++ b/src/htscatchurl.c @@ -47,7 +47,7 @@ Please visit our Website: http://www.httrack.com #ifndef _WIN32_WCE #include <fcntl.h> #endif -#if HTS_WIN +#ifdef _WIN32 #else #include <arpa/inet.h> #endif @@ -102,7 +102,7 @@ HTSEXT_API T_SOC catch_url_init(int* port,char* adr) { // copie adresse SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length); - if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { SOCaddr_initport(server, *port); if ( bind(soc,(struct sockaddr*) &server,server_size) == 0 ) { SOCaddr server2; @@ -162,7 +162,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { T_SOC soc2; struct sockaddr dummyaddr; int dummylen = sizeof(struct sockaddr); - while ( (soc2=accept(soc,&dummyaddr,&dummylen)) == INVALID_SOCKET); + while ( (soc2 = (T_SOC) accept(soc,&dummyaddr,&dummylen)) == INVALID_SOCKET); /* #ifdef _WIN32 closesocket(soc); @@ -224,7 +224,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { //strcatbuff(data,"\r\n"); if (blkretour.totalsize>0) { int len=(int)min(blkretour.totalsize,32000); - int pos=strlen(data); + int pos = (int) strlen(data); // Copier le reste (post éventuel) while((len>0) && ((r=recv(soc,(char*) data+pos,len,0))>0) ) { pos+=r; @@ -234,7 +234,7 @@ HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data) { } // Envoyer page sprintf(line,CATCH_RESPONSE); - send(soc,line,strlen(line),0); + send(soc,line,(int)strlen(line),0); // OK! retour=1; } diff --git a/src/htscore.c b/src/htscore.c index 48d776f..370f529 100644 --- a/src/htscore.c +++ b/src/htscore.c @@ -67,56 +67,10 @@ Please visit our Website: http://www.httrack.com /* END specific definitions */ - -/* HTML parsing */ -#if HTS_ANALYSTE - -t_hts_htmlcheck_init hts_htmlcheck_init = NULL; -t_hts_htmlcheck_uninit hts_htmlcheck_uninit = NULL; -t_hts_htmlcheck_start hts_htmlcheck_start = NULL; -t_hts_htmlcheck_end hts_htmlcheck_end = NULL; -t_hts_htmlcheck_chopt hts_htmlcheck_chopt = NULL; -t_hts_htmlcheck_process hts_htmlcheck_preprocess = NULL; -t_hts_htmlcheck_process hts_htmlcheck_postprocess = NULL; -t_hts_htmlcheck hts_htmlcheck = NULL; -t_hts_htmlcheck_query hts_htmlcheck_query = NULL; -t_hts_htmlcheck_query2 hts_htmlcheck_query2 = NULL; -t_hts_htmlcheck_query3 hts_htmlcheck_query3 = NULL; -t_hts_htmlcheck_loop hts_htmlcheck_loop = NULL; -t_hts_htmlcheck_check hts_htmlcheck_check = NULL; -t_hts_htmlcheck_check_mime hts_htmlcheck_check_mime = NULL; -t_hts_htmlcheck_pause hts_htmlcheck_pause = NULL; -t_hts_htmlcheck_filesave hts_htmlcheck_filesave = NULL; -t_hts_htmlcheck_filesave2 hts_htmlcheck_filesave2 = NULL; -t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected = NULL; -t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2 = NULL; -t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus = NULL; -t_hts_htmlcheck_savename hts_htmlcheck_savename = NULL; -t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead = NULL; -t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead = NULL; - -extern void set_wrappers(void); - -char _hts_errmsg[1100]=""; -int _hts_in_html_parsing=0; -int _hts_in_html_done=0; // % done -int _hts_in_html_poll=0; // parsing -int _hts_setpause=0; -//httrackp* _hts_setopt=NULL; -char** _hts_addurl=NULL; - /* external modules */ extern int hts_parse_externals(htsmoduleStruct* str); extern void htspe_init(void); -// -int _hts_cancel=0; -#endif - - - -int exit_xh; /* quick exit (fatal error or interrupt) */ - /* debug */ #if DEBUG_SHOWTYPES char REG[32768]="\n"; @@ -155,18 +109,11 @@ int nsocDEBUG=0; int longest_hash[3]={0,0,0},hashnumber=0; #endif -// demande d'interaction avec le shell -#if HTS_ANALYSTE -char HTbuff[2048]; -#endif - - - // Début de httpmirror, routines annexes // version 1 pour httpmirror // flusher si on doit lire peu à peu le fichier -#define test_flush if (opt.flush) { fflush(opt.log); fflush(opt.errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // pour alléger la syntaxe, des raccourcis sont créés #define urladr (liens[ptr]->adr) @@ -177,18 +124,12 @@ char HTbuff[2048]; // au cas où nous devons quitter rapidement xhttpmirror (plus de mémoire, etc) // note: partir de liens_max.. vers 0.. sinon erreur de violation de mémoire: les liens suivants // ne sont plus à nous.. agh! [dur celui-là] -#if HTS_ANALYSTE #define HTMLCHECK_UNINIT { \ -if ( (opt.debug>0) && (opt.log!=NULL) ) { \ -fspc(opt.log,"info"); fprintf(opt.log,"engine: end"LF); \ -} \ -if (hts_htmlcheck_end != NULL) { \ - hts_htmlcheck_end(); \ +if ( (opt->debug>0) && (opt->log!=NULL) ) { \ +HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: end"LF); \ } \ +RUN_CALLBACK0(opt, end); \ } -#else - #define HTMLCHECK_UNINIT -#endif #define XH_extuninit do { \ int i; \ @@ -211,7 +152,7 @@ if (hts_htmlcheck_end != NULL) { \ if (filters) { \ freet(filters); filters=NULL; \ } \ - back_delete_all(&opt,&cache,sback); \ + back_delete_all(opt,&cache,sback); \ back_free(&sback); \ checkrobots_free(&robots);\ if (cache.use) { freet(cache.use); cache.use=NULL; } \ @@ -228,18 +169,18 @@ if (hts_htmlcheck_end != NULL) { \ if (cache.olddat) { fclose(cache.olddat); cache.olddat=NULL; } \ if (cache.lst) { fclose(cache.lst); cache.lst=NULL; } \ if (cache.txt) { fclose(cache.txt); cache.txt=NULL; } \ - if (opt.log) fflush(opt.log); \ - if (opt.errlog) fflush(opt.errlog);\ + if (opt->log) fflush(opt->log); \ + if (opt->log) fflush(opt->log);\ if (makestat_fp) { fclose(makestat_fp); makestat_fp=NULL; } \ if (maketrack_fp){ fclose(maketrack_fp); maketrack_fp=NULL; } \ - if (opt.accept_cookie) cookie_save(opt.cookie,fconcat(opt.path_log,"cookies.txt")); \ + if (opt->accept_cookie) cookie_save(opt->cookie,fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_log),"cookies.txt")); \ if (makeindex_fp) { fclose(makeindex_fp); makeindex_fp=NULL; } \ if (cache_hashtable) { inthash_delete(&cache_hashtable); } \ if (cache_tests) { inthash_delete(&cache_tests); } \ if (template_header) { freet(template_header); template_header=NULL; } \ if (template_body) { freet(template_body); template_body=NULL; } \ if (template_footer) { freet(template_footer); template_footer=NULL; } \ - clearCallbacks(&opt.state.callbacks); \ + clearCallbacks(&opt->state.callbacks); \ /*structcheck_init(-1);*/ \ } while(0) #define XH_uninit do { XH_extuninit; if (r.adr) { freet(r.adr); r.adr=NULL; } } while(0) @@ -254,7 +195,7 @@ if (hts_htmlcheck_end != NULL) { \ #define liens_record(A,F,S,FA,FF,NORM) { \ int notecode=0; \ -int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ +size_t lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ fil_len=strlen(F),\ sav_len=strlen(S),\ @@ -315,7 +256,7 @@ if (makeindex_fp) { \ fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ - usercommand(&opt,0,NULL,fconcat(opt.path_html,"index.html"),"",""); \ + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt),StringBuff(opt->path_html),"index.html"),"",""); \ } \ } \ makeindex_done=1; /* ok c'est fait */ \ @@ -326,8 +267,7 @@ makeindex_done=1; /* ok c'est fait */ \ // Début de httpmirror, robot // url1 peut être multiple -int httpmirror(char* url1,httrackp* ptropt) { - httrackp BIGSTK opt; // structure d'options +int httpmirror(char* url1, httrackp* opt) { char* primary=NULL; // première page, contenant les liens à scanner int lien_tot=0; // nombre de liens pour le moment lien_url** liens=NULL; // les pointeurs sur les liens @@ -335,7 +275,7 @@ int httpmirror(char* url1,httrackp* ptropt) { hash_struct* hashptr = &hash; t_cookie BIGSTK cookie; // gestion des cookies int lien_max=0; - int lien_size=0; // octets restants dans buffer liens dispo + size_t lien_size=0; // octets restants dans buffer liens dispo char* lien_buffer=NULL; // buffer liens actuel int add_tab_alloc=256000; // +256K de liens à chaque fois //char* tab_alloc=NULL; @@ -344,7 +284,6 @@ int httpmirror(char* url1,httrackp* ptropt) { int numero_passe=0; // deux passes pour html puis images struct_back* sback=NULL; htsblk BIGSTK r; // retour de certaines fonctions - TStamp lastime=0; // pour affichage infos de tmp en tmp // pour les stats, nombre de fichiers & octets écrits LLint stat_fragment=0; // pour la fragmentation //TStamp istat_timestart; // départ pour calcul instantanné @@ -378,8 +317,6 @@ int httpmirror(char* url1,httrackp* ptropt) { // char *template_header=NULL,*template_body=NULL,*template_footer=NULL; // - opt = *ptropt; - // codebase[0]='\0'; base[0]='\0'; // cookie.auth.next=NULL; @@ -394,66 +331,56 @@ int httpmirror(char* url1,httrackp* ptropt) { /* reset stats */ HTS_STAT.HTS_TOTAL_RECV=0; HTS_STAT.istat_bytes[0]=HTS_STAT.istat_bytes[1]=0; - /* - if (opt.aff_progress) - lastime=HTS_STAT.stat_timestart; - */ - if (opt.shell) { + if (opt->shell) { last_info_shell=HTS_STAT.stat_timestart; } - if ((opt.makestat) || (opt.maketrack)){ + if ((opt->makestat) || (opt->maketrack)){ makestat_time=HTS_STAT.stat_timestart; } - // initialiser compteur erreurs - fspc(NULL,NULL); - // init external modules htspe_init(); // initialiser cookie - if (opt.accept_cookie) { - opt.cookie=&cookie; + if (opt->accept_cookie) { + opt->cookie=&cookie; cookie.max_len=30000; // max len strcpybuff(cookie.data,""); // Charger cookies.txt par défaut ou cookies.txt du miroir - cookie_load(opt.cookie,opt.path_log,"cookies.txt"); - cookie_load(opt.cookie,"","cookies.txt"); + cookie_load(opt->cookie,StringBuff(opt->path_log),"cookies.txt"); + cookie_load(opt->cookie,"","cookies.txt"); } else - opt.cookie=NULL; + opt->cookie=NULL; // initialiser exit_xh - exit_xh=0; // sortir prématurément (var globale) + opt->state.exit_xh=0; // sortir prématurément (var globale) // initialiser usercommand - usercommand(&opt,opt.sys_com_exec,opt.sys_com,"","",""); + usercommand(opt,opt->sys_com_exec,StringBuff(opt->sys_com),"","",""); // initialiser structcheck // structcheck_init(1); - // initialiser tableau options accessible par d'autres fonctions (signal) - hts_declareoptbuffer(&opt); - // initialiser verif_backblue - verif_backblue(&opt,NULL); - verif_external(0,0); - verif_external(1,0); + verif_backblue(opt,NULL); + verif_external(opt,0,0); + verif_external(opt,1,0); // et templates html - template_header=readfile_or(fconcat(opt.path_bin,"templates/index-header.html"),HTS_INDEX_HEADER); - template_body=readfile_or(fconcat(opt.path_bin,"templates/index-body.html"),HTS_INDEX_BODY); - template_footer=readfile_or(fconcat(opt.path_bin,"templates/index-footer.html"),HTS_INDEX_FOOTER); + template_header=readfile_or(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_bin),"templates/index-header.html"),HTS_INDEX_HEADER); + template_body=readfile_or(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_bin),"templates/index-body.html"),HTS_INDEX_BODY); + template_footer=readfile_or(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_bin),"templates/index-footer.html"),HTS_INDEX_FOOTER); // initialiser mimedefs - get_userhttptype(1,opt.mimedefs,NULL); + //get_userhttptype(opt,1,StringBuff(opt->mimedefs),NULL); // Initialiser indexation - if (opt.kindex) - index_init(opt.path_html); + if (opt->kindex) + index_init(StringBuff(opt->path_html)); // effacer bloc cache memset(&cache, 0, sizeof(cache_back)); - cache.type=opt.cache; // cache? - cache.errlog=opt.errlog; // err log? + cache.type=opt->cache; // cache? + cache.errlog=cache.log=opt->log; // err log? cache.ptr_ant=cache.ptr_last=0; // pointeur pour anticiper // initialiser hash cache @@ -471,32 +398,29 @@ int httpmirror(char* url1,httrackp* ptropt) { cache.hashtable=(void*)cache_hashtable; /* copy backcache hash */ cache.cached_tests=(void*)cache_tests; /* copy of cache_tests */ - // initialiser cache DNS - _hts_lockdns(-999); - // robots.txt strcpybuff(robots.adr,"!"); // dummy robots.token[0]='\0'; robots.next=NULL; // suivant - opt.robotsptr = &robots; + opt->robotsptr = &robots; // effacer filters - opt.maxfilter = maximum(opt.maxfilter, 128); - if (filters_init(&filters, opt.maxfilter, 0) == 0) { + opt->maxfilter = maximum(opt->maxfilter, 128); + if (filters_init(&filters, opt->maxfilter, 0) == 0) { printf("PANIC! : Not enough memory [%d]\n",__LINE__); XH_extuninit; return 0; } - opt.filters.filters=&filters; + opt->filters.filters=&filters; // - opt.filters.filptr=&filptr; - //opt.filters.filter_max=&filter_max; + opt->filters.filptr=&filptr; + //opt->filters.filter_max=&filter_max; // hash table - opt.hash = &hash; + opt->hash = &hash; // tableau de pointeurs sur les liens - lien_max=maximum(opt.maxlink,32); + lien_max=maximum(opt->maxlink,32); liens=(lien_url**) malloct(lien_max*sizeof(lien_url*)); // tableau de pointeurs sur les liens if (liens==NULL) { printf("PANIC! : Not enough memory [%d]\n",__LINE__); @@ -519,15 +443,15 @@ int httpmirror(char* url1,httrackp* ptropt) { hash.liens = liens; hash.max_lien=0; } - + // copier adresse(s) dans liste des adresses { char *a=url1; int primary_len=8192; - if (strnotempty(opt.filelist)) { - primary_len+=max(0,fsize(opt.filelist)*2); + if (StringNotEmpty(opt->filelist)) { + primary_len += max(0, fsize(StringBuff(opt->filelist))*2); } - primary_len+=strlen(url1)*2; + primary_len += (int) strlen(url1)*2; // création de la première page, qui contient les liens de base à scanner // c'est plus propre et plus logique que d'entrer à la main les liens dans la pile @@ -586,19 +510,19 @@ int httpmirror(char* url1,httrackp* ptropt) { filptr++; /* sanity check */ - if (filptr + 1 >= opt.maxfilter) { - opt.maxfilter += HTS_FILTERSINC; - if (filters_init(&filters, opt.maxfilter, HTS_FILTERSINC) == 0) { + if (filptr + 1 >= opt->maxfilter) { + opt->maxfilter += HTS_FILTERSINC; + if (filters_init(&filters, opt->maxfilter, HTS_FILTERSINC) == 0) { printf("PANIC! : Too many filters : >%d [%d]\n",filptr,__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,LF"Too many filters, giving up..(>%d)"LF,filptr); - fprintf(opt.errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); + if (opt->log) { + fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF,filptr); + fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF); test_flush; } XH_extuninit; return 0; } - //opt.filters.filters=filters; + //opt->filters.filters=filters; } } @@ -622,15 +546,15 @@ int httpmirror(char* url1,httrackp* ptropt) { /* load URL file list */ /* OPTIMIZED for fast load */ - if (strnotempty(opt.filelist)) { + if (StringNotEmpty(opt->filelist)) { char* filelist_buff=NULL; - INTsys filelist_sz=fsize(opt.filelist); + off_t filelist_sz = fsize(StringBuff(opt->filelist)); if (filelist_sz>0) { - FILE* fp=fopen(opt.filelist,"rb"); + FILE* fp=fopen(StringBuff(opt->filelist),"rb"); if (fp) { - filelist_buff=malloct(filelist_sz + 2); + filelist_buff = malloct(filelist_sz + 2); if (filelist_buff) { - if ((INTsys)fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) { + if (fread(filelist_buff,1,filelist_sz,fp) != filelist_sz) { freet(filelist_buff); filelist_buff=NULL; } else { @@ -662,26 +586,26 @@ int httpmirror(char* url1,httrackp* ptropt) { } } // fclose(fp); - if (opt.log!=NULL) { - fspc(opt.log,"info"); fprintf(opt.log,"%d links added from %s"LF,n,opt.filelist); test_flush; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"%d links added from %s"LF,n,StringBuff(opt->filelist)); test_flush; } // Free buffer freet(filelist_buff); } else { - if (opt.errlog!=NULL) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Could not include URL list: %s"LF,opt.filelist); test_flush; + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Could not include URL list: %s"LF,StringBuff(opt->filelist)); test_flush; } } } // lien primaire - liens_record("primary","/primary",fslash(fconcat(opt.path_html,"index.html")),"","",opt.urlhack); + liens_record("primary","/primary",fslash(OPT_GET_BUFF(opt),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html")),"","",opt->urlhack); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt.errlog) { - fprintf(opt.errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } XH_extuninit; // désallocation mémoire & buffers @@ -689,9 +613,9 @@ int httpmirror(char* url1,httrackp* ptropt) { } liens[lien_tot]->testmode=0; // pas mode test liens[lien_tot]->link_import=0; // pas mode import - liens[lien_tot]->depth=opt.depth+1; // lien de priorité maximale + liens[lien_tot]->depth=opt->depth+1; // lien de priorité maximale liens[lien_tot]->pass2=0; // 1ère passe - liens[lien_tot]->retry=opt.retry; // lien de priorité maximale + liens[lien_tot]->retry=opt->retry; // lien de priorité maximale liens[lien_tot]->premier=lien_tot; // premier lien, objet-père=objet liens[lien_tot]->precedent=lien_tot; // lien précédent lien_tot++; @@ -699,18 +623,14 @@ int httpmirror(char* url1,httrackp* ptropt) { // Initialiser cache { int backupXFR = htsMemoryFastXfr; -#if HTS_ANALYSTE - _hts_in_html_parsing=4; -#endif - if (!hts_htmlcheck_loop(NULL,0,0,0,lien_tot,0,NULL)) { - exit_xh=1; // exit requested + opt->state._hts_in_html_parsing=4; + if (!RUN_CALLBACK7(opt, loop, NULL,0,0,0,lien_tot,0,NULL)) { + opt->state.exit_xh=1; // exit requested } htsMemoryFastXfr = 1; /* fast load */ - cache_init(&cache,&opt); + cache_init(&cache,opt); htsMemoryFastXfr = backupXFR; -#if HTS_ANALYSTE - _hts_in_html_parsing=0; -#endif + opt->state._hts_in_html_parsing=0; } } @@ -728,8 +648,8 @@ int httpmirror(char* url1,httrackp* ptropt) { #endif // backing - //soc_max=opt.maxsoc; - if (opt.maxsoc>0) { + //soc_max=opt->maxsoc; + if (opt->maxsoc>0) { #if BDEBUG==2 _CLRSCR; #endif @@ -737,10 +657,10 @@ int httpmirror(char* url1,httrackp* ptropt) { // On prévoit large: les fichiers HTML ne prennent que peu de place en mémoire, et les // fichiers non html sont sauvés en direct sur disque. // --> 1024 entrées + 32 entrées par socket en supplément - sback = back_new(opt.maxsoc*32+1024); + sback = back_new(opt->maxsoc*32+1024); if (sback == NULL) { - if (opt.errlog) - fprintf(opt.errlog,"Not enough memory, can not allocate %d bytes"LF,(int)((opt.maxsoc+1)*sizeof(lien_back))); + if (opt->log) + fprintf(opt->log,"Not enough memory, can not allocate %d bytes"LF,(int)((opt->maxsoc+1)*sizeof(lien_back))); return 0; } } @@ -750,8 +670,8 @@ int httpmirror(char* url1,httrackp* ptropt) { test_flush; // statistiques - if (opt.makestat) { - makestat_fp=fopen(fconcat(opt.path_log,"hts-stats.txt"),"wb"); + if (opt->makestat) { + makestat_fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-stats.txt"),"wb"); if (makestat_fp != NULL) { fprintf(makestat_fp,"HTTrack statistics report, every minutes"LF LF); fflush(makestat_fp); @@ -759,8 +679,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } // tracking -- débuggage - if (opt.maketrack) { - maketrack_fp=fopen(fconcat(opt.path_log,"hts-track.txt"),"wb"); + if (opt->maketrack) { + maketrack_fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-track.txt"),"wb"); if (maketrack_fp != NULL) { fprintf(maketrack_fp,"HTTrack tracking report, every minutes"LF LF); fflush(maketrack_fp); @@ -769,20 +689,16 @@ int httpmirror(char* url1,httrackp* ptropt) { // on n'a pas de liens!! (exemple: httrack www.* impossible sans départ..) if (lien_tot<=0) { - if (opt.errlog) { - fprintf(opt.errlog,"Error! You MUST specify at least one complete URL, and not only wildcards!"LF); + if (opt->log) { + fprintf(opt->log,"Error! You MUST specify at least one complete URL, and not only wildcards!"LF); } } /* Send options to callback functions */ -#if HTS_ANALYSTE - if (hts_htmlcheck_chopt != NULL) { - hts_htmlcheck_chopt(&opt); - } -#endif + RUN_CALLBACK0(opt, chopt); // attendre une certaine heure.. - if (opt.waittime>0) { + if (opt->waittime>0) { int rollover=0; int ok=0; { @@ -794,12 +710,12 @@ int httpmirror(char* url1,httrackp* ptropt) { tl+=A->tm_sec; tl+=A->tm_min*60; tl+=A->tm_hour*60*60; - if (tl>opt.waittime) // attendre minuit + if (tl>opt->waittime) // attendre minuit rollover=1; } // attendre.. - _hts_in_html_parsing=5; + opt->state._hts_in_html_parsing=5; do { TStamp tl=0; time_t tt; @@ -811,60 +727,49 @@ int httpmirror(char* url1,httrackp* ptropt) { tl+=A->tm_hour*60*60; if (rollover) { - if (tl<=opt.waittime) + if (tl<=opt->waittime) rollover=0; // attendre heure } else { - if (tl>opt.waittime) + if (tl>opt->waittime) ok=1; // ok! } -#if HTS_ANALYSTE - if (hts_htmlcheck_loop != NULL) { + { int r; if (rollover) - r=hts_htmlcheck_loop(sback->lnk, sback->count,0,0,lien_tot,(int) (opt.waittime-tl+24*3600),NULL); + r = RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,0,0,lien_tot,(int) (opt->waittime-tl+24*3600),NULL); else - r=hts_htmlcheck_loop(sback->lnk, sback->count,0,0,lien_tot,(int) (opt.waittime-tl),NULL); + r = RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,0,0,lien_tot,(int) (opt->waittime-tl),NULL); if (!r) { - exit_xh=1; // exit requested + opt->state.exit_xh=1; // exit requested ok=1; } else Sleep(100); } -#endif - } while(!ok); - _hts_in_html_parsing=0; + + } while(!ok); + opt->state._hts_in_html_parsing=0; // note: recopie de plus haut // noter heure actuelle de départ en secondes HTS_STAT.stat_timestart=time_local(); - /* - if (opt.aff_progress) - lastime=HTS_STAT.stat_timestart; - */ - if (opt.shell) { + if (opt->shell) { last_info_shell=HTS_STAT.stat_timestart; } - if ((opt.makestat) || (opt.maketrack)){ + if ((opt->makestat) || (opt->maketrack)){ makestat_time=HTS_STAT.stat_timestart; } } /* Info for wrappers */ - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: start"LF); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: start"LF); } -#if HTS_ANALYSTE - if (hts_htmlcheck_start != NULL) { - if (!hts_htmlcheck_start(&opt)) { - XH_extuninit; - return 1; - } + if (!RUN_CALLBACK0(opt, start)) { + XH_extuninit; + return 1; } - set_wrappers(); // _start() is allowed to set other wrappers -#endif - // ------------------------------------------------------------ @@ -883,13 +788,19 @@ int httpmirror(char* url1,httrackp* ptropt) { memset(&r, 0, sizeof(htsblk)); r.soc=INVALID_SOCKET; r.location=loc; // en cas d'erreur 3xx (moved) // recopier proxy - memcpy(&(r.req.proxy), &opt.proxy, sizeof(opt.proxy)); + if ((r.req.proxy.active = opt->proxy.active)) { + if (StringBuff(opt->proxy.bindhost) != NULL) + strcpybuff(r.req.proxy.bindhost, StringBuff(opt->proxy.bindhost)); + if (StringBuff(opt->proxy.name) != NULL) + strcpybuff(r.req.proxy.name, StringBuff(opt->proxy.name)); + r.req.proxy.port = opt->proxy.port; + } // et user-agent - strcpybuff(r.req.user_agent,opt.user_agent); - strcpybuff(r.req.referer,opt.referer); - strcpybuff(r.req.from,opt.from); - strcpybuff(r.req.lang_iso,opt.lang_iso); - r.req.user_agent_send=opt.user_agent_send; + strcpy(r.req.user_agent,StringBuff(opt->user_agent)); + strcpy(r.req.referer,StringBuff(opt->referer)); + strcpy(r.req.from,StringBuff(opt->from)); + strcpy(r.req.lang_iso,StringBuff(opt->lang_iso)); + r.req.user_agent_send=opt->user_agent_send; if (!error) { @@ -901,11 +812,11 @@ int httpmirror(char* url1,httrackp* ptropt) { ( (liens[ptr]->pass2 == -1) ) ) ) { // sauter si lien annulé (ou fil vide) - if ((opt.debug>1) && (opt.log!=NULL)) { + if ((opt->debug>1) && (opt->log!=NULL)) { if (liens[ptr] != NULL && liens[ptr]->pass2 == -1) { - fspc(opt.log,"debug"); fprintf(opt.log,"link #%d is ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link #%d is ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); } else { - fspc(opt.log,"debug"); fprintf(opt.log,"link #%d seems ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link #%d seems ready, skipping: %s%s.."LF,ptr,((urladr != NULL)?(urladr):(" ")),((urlfil != NULL)?(urlfil):(" "))); } test_flush; } @@ -918,8 +829,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } if (liens[ptr]) { // on a qq chose à récupérer? - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"Wait get: %s%s"LF,urladr,urlfil); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Wait get: %s%s"LF,urladr,urlfil); test_flush; #if DEBUG_ROBOTS if (strcmp(urlfil,"/robots.txt") == 0) { @@ -931,11 +842,11 @@ int httpmirror(char* url1,httrackp* ptropt) { // DEBUT --RECUPERATION LIEN--- if (ptr==0) { // premier lien à parcourir: lien primaire construit avant r.adr=primary; primary=NULL; - r.statuscode=200; + r.statuscode=HTTP_OK; r.size=strlen(r.adr); r.soc=INVALID_SOCKET; strcpybuff(r.contenttype,"text/html"); - /*} else if (opt.maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing) + /*} else if (opt->maxsoc<=0) { // fichiers 1 à 1 en attente (pas de backing) // charger le fichier en mémoire tout bêtement r=xhttpget(urladr,urlfil); // @@ -960,12 +871,12 @@ int httpmirror(char* url1,httrackp* ptropt) { str.mime = r.contenttype; str.url_host = urladr; str.url_file = urlfil; - str.size = (int) r.size; + str.size = (const int) r.size; /* */ str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -981,7 +892,7 @@ int httpmirror(char* url1,httrackp* ptropt) { stre.r_ = &r; /* */ stre.error_ = &error; - stre.exit_xh_ = &exit_xh; + stre.exit_xh_ = &opt->state.exit_xh; stre.store_errpage_ = &store_errpage; /* */ stre.base = base; @@ -1037,8 +948,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } else { // lien vide.. - if (opt.errlog && opt.debug > 0) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning, link #%d empty"LF,ptr); test_flush; + if (opt->log && opt->debug > 0) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Warning, link #%d empty"LF,ptr); test_flush; } error=1; goto jump_if_done; @@ -1061,11 +972,11 @@ int httpmirror(char* url1,httrackp* ptropt) { // error=1; // peut être que le fichier était trop gros? - if ((istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype)) - || (istoobig(r.totalsize,opt.maxfile_html,opt.maxfile_nonhtml,r.contenttype))) { + if ((istoobig(opt,r.totalsize,opt->maxfile_html,opt->maxfile_nonhtml,r.contenttype)) + || (istoobig(opt,r.totalsize,opt->maxfile_html,opt->maxfile_nonhtml,r.contenttype))) { error=0; - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Big file cancelled according to user's preferences: %s%s"LF,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Big file cancelled according to user's preferences: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1084,14 +995,14 @@ int httpmirror(char* url1,httrackp* ptropt) { // Content-disposition="foo.jpg" // -------------------- if (!error) { - if (r.statuscode == 200) { // OK (ou 304 en backing) + if (r.statuscode == HTTP_OK) { // OK (ou 304 en backing) if (r.adr) { // Written file - if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ + if ( (is_hypertext_mime(opt,r.contenttype, urlfil)) /* Is HTML or Js, .. */ /* NO - real media is real media, and mms is mms, not HTML */ /*|| (may_be_hypertext_mime(r.contenttype, urlfil) && (r.adr) )*/ /* Is real media, .. */ ) { if (strnotempty(r.cdispo)) { // Content-disposition set! - if (ishtml(savename) == 0) { // Non HTML!! + if (ishtml(opt, savename) == 0) { // Non HTML!! // patch it! strcpybuff(r.contenttype,"application/octet-stream"); } @@ -1103,8 +1014,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // ------------------------------------ // BOGUS MIME TYPE HACK II (the revenge) // Check if we have a bogus MIME type - if ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype, urlfil)) /* Is real media, .. */ + if ( (is_hypertext_mime(opt,r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(opt,r.contenttype, urlfil)) /* Is real media, .. */ ) { if ((r.adr) && (r.size)) { unsigned int map[256]; @@ -1150,7 +1061,7 @@ int httpmirror(char* url1,httrackp* ptropt) { #define CH_ADD_RNG2(c, r, r2, o) do { \ CH_ADD_RNG1(c, (r) * (r2), o); \ } while(0) - int new_capa = r.size / 2 + 1; + int new_capa = (int) ( r.size / 2 + 1 ); int new_offs = 0; unsigned char* prev_adr = (unsigned char*) r.adr; unsigned char* new_adr = (unsigned char*) malloct(new_capa); @@ -1203,7 +1114,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* U+D800..U+DFFF */ CH_ADD('?'); /* ill-formed */ - } else if (unic <= 0xFFFF) { + } else /* if (unic <= 0xFFFF) */ { /* U+E000..U+FFFF EE..EF 80..BF 80..BF */ unic -= 0xE000; CH_ADD_RNG2( unic, 0xbf - 0x80 + 1, 0xbf - 0x80 + 1, 0xee ); @@ -1211,8 +1122,8 @@ int httpmirror(char* url1,httrackp* ptropt) { CH_ADD_RNG0( unic, 0x80 ); } } - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File %s%s converted from UCS2 to UTF-8 (old size: %d bytes, new size: %d bytes)"LF, urladr, urlfil, (int)r.size, new_offs); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File %s%s converted from UCS2 to UTF-8 (old size: %d bytes, new size: %d bytes)"LF, urladr, urlfil, (int)r.size, new_offs); test_flush; } freet(r.adr); @@ -1226,8 +1137,8 @@ int httpmirror(char* url1,httrackp* ptropt) { #undef CH_ADD_RNG2 } else if ((nspec > r.size / 100) && (nspec > 10)) { // too many special characters strcpybuff(r.contenttype,"application/octet-stream"); - if (opt.errlog) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File not parsed, looks like binary: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1250,7 +1161,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // Check if we have to load locally the file // -------------------- //if (!error) { - // if (r.statuscode == 200) { // OK (ou 304 en backing) + // if (r.statuscode == HTTP_OK) { // OK (ou 304 en backing) // if (r.adr==NULL) { // Written file // if (may_be_hypertext_mime(r.contenttype, urlfil)) { // to parse! // LLint sz; @@ -1261,7 +1172,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // if (fp) { // r.adr=malloct((int)sz + 2); // if (r.adr) { - // if (fread(r.adr,1,(INTsys)sz,fp) == sz) { + // if (fread(r.adr,1,sz,fp) == sz) { // r.size=sz; // r.adr[sz] = '\0'; // r.is_write = 0; @@ -1295,7 +1206,7 @@ int httpmirror(char* url1,httrackp* ptropt) { if (!error) { if (ptr>0) { if (liens[ptr]) { - xxcache_mayadd(&opt,&cache,&r,urladr,urlfil,savename); + xxcache_mayadd(opt,&cache,&r,urladr,urlfil,savename); } else error=1; } @@ -1328,7 +1239,7 @@ int httpmirror(char* url1,httrackp* ptropt) { str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -1344,7 +1255,7 @@ int httpmirror(char* url1,httrackp* ptropt) { stre.r_ = &r; /* */ stre.error_ = &error; - stre.exit_xh_ = &exit_xh; + stre.exit_xh_ = &opt->state.exit_xh; stre.store_errpage_ = &store_errpage; /* */ stre.base = base; @@ -1394,7 +1305,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Load file if necessary */ if ( - is_hypertext_mime(r.contenttype, urlfil) /* Is HTML or Js, .. */ + may_be_hypertext_mime(opt,r.contenttype, urlfil) /* Is HTML or Js, .. */ && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */ && (r.adr==NULL) /* HTML Data exists */ && (!store_errpage) /* Not an html error page */ @@ -1402,15 +1313,15 @@ int httpmirror(char* url1,httrackp* ptropt) { ) { r.adr = readfile2(savename, &r.size); - (void) unlink(fconv(savename)); + (void) unlink(fconv(OPT_GET_BUFF(opt),savename)); if (r.adr != NULL) { - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"File successfully loaded for parsing: %s%s (%d bytes)"LF,urladr,urlfil,(int)r.size); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File successfully loaded for parsing: %s%s (%d bytes)"LF,urladr,urlfil,(int)r.size); test_flush; } } else { - if ( opt.log != NULL ) { - fspc(opt.log,"error"); fprintf(opt.log,"File could not be loaded for parsing: %s%s"LF,urladr,urlfil); + if ( opt->log != NULL ) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"File could not be loaded for parsing: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1437,19 +1348,19 @@ int httpmirror(char* url1,httrackp* ptropt) { /* if (ptr>0) { // "mis à jour" - if ((!r.notmodified) && (opt.is_update) && (!store_errpage)) { // page modifiée + if ((!r.notmodified) && (opt->is_update) && (!store_errpage)) { // page modifiée if (strnotempty(savename)) { HTS_STAT.stat_updated_files++; - if (opt.log!=NULL) { - //if ((opt.debug>0) && (opt.log!=NULL)) { - fspc(opt.log,"info"); fprintf(opt.log,"File updated: %s%s"LF,urladr,urlfil); + if (opt->log!=NULL) { + //if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File updated: %s%s"LF,urladr,urlfil); test_flush; } } } else { if (!store_errpage) { - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"File recorded: %s%s"LF,urladr,urlfil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"File recorded: %s%s"LF,urladr,urlfil); test_flush; } } @@ -1463,8 +1374,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // traiter if ( - ( (is_hypertext_mime(r.contenttype, urlfil)) /* Is HTML or Js, .. */ - || (may_be_hypertext_mime(r.contenttype, urlfil) && r.adr != NULL ) /* Is real media, .. */ + ( (is_hypertext_mime(opt,r.contenttype, urlfil)) /* Is HTML or Js, .. */ + || (may_be_hypertext_mime(opt,r.contenttype, urlfil) && r.adr != NULL ) /* Is real media, .. */ ) && (liens[ptr]->depth>0) /* Depth > 0 (recurse depth) */ && (r.adr!=NULL) /* HTML Data exists */ @@ -1476,8 +1387,8 @@ int httpmirror(char* url1,httrackp* ptropt) { // Parsing HTML if (!error) { /* Info for wrappers */ - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"engine: check-html: %s%s"LF,urladr,urlfil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: check-html: %s%s"LF,urladr,urlfil); } { char BIGSTK buff_err_msg[1024]; @@ -1497,7 +1408,7 @@ int httpmirror(char* url1,httrackp* ptropt) { str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -1513,7 +1424,7 @@ int httpmirror(char* url1,httrackp* ptropt) { stre.r_ = &r; /* */ stre.error_ = &error; - stre.exit_xh_ = &exit_xh; + stre.exit_xh_ = &opt->state.exit_xh; stre.store_errpage_ = &store_errpage; /* */ stre.base = base; @@ -1571,11 +1482,11 @@ int httpmirror(char* url1,httrackp* ptropt) { if (store_errpage) { // c'est une page d'erreur int create_html_warning=0; int create_gif_warning=0; - switch (ishtml(urlfil)) { /* pas fichier html */ + switch (ishtml(opt,urlfil)) { /* pas fichier html */ case 0: /* non html */ { char buff[256]; - guess_httptype(buff,urlfil); + guess_httptype(opt,buff,urlfil); if (strcmp(buff,"image/gif")==0) create_gif_warning=1; } @@ -1590,8 +1501,8 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Créer message d'erreur ? */ if (create_html_warning) { char* adr=(char*)malloct(strlen(HTS_DATA_ERROR_HTML)+1100); - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"Creating HTML warning file (%s)"LF,r.msg); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Creating HTML warning file (%s)"LF,r.msg); test_flush; } if (adr) { @@ -1604,8 +1515,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } } else if (create_gif_warning) { char* adr=(char*)malloct(HTS_DATA_UNKNOWN_GIF_LEN); - if ( (opt.debug>0) && (opt.log!=NULL) ) { - fspc(opt.log,"info"); fprintf(opt.log,"Creating GIF dummy file (%s)"LF,r.msg); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Creating GIF dummy file (%s)"LF,r.msg); test_flush; } if (r.adr) { @@ -1642,7 +1553,7 @@ int httpmirror(char* url1,httrackp* ptropt) { *comm = '\0'; } /* strip spaces */ - llen=strlen(line); + llen = (int) strlen(line); while(llen > 0 && is_realspace(line[llen - 1])) { line[llen - 1] = '\0'; llen--; @@ -1670,7 +1581,7 @@ int httpmirror(char* url1,httrackp* ptropt) { a++; // sauter espace(s) if (strnotempty(a)) { #ifdef IGNORE_RESTRICTIVE_ROBOTS - if (strcmp(a,"/") != 0 || opt.robots >= 3) + if (strcmp(a,"/") != 0 || opt->robots >= 3) #endif { /* ignoring disallow: / */ if ( (strlen(buff) + strlen(a) + 8) < sizeof(buff)) { @@ -1684,8 +1595,8 @@ int httpmirror(char* url1,httrackp* ptropt) { } #ifdef IGNORE_RESTRICTIVE_ROBOTS else { - if (opt.errlog!=NULL) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: %s robots.txt rules are too restrictive, ignoring /"LF,urladr); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Note: %s robots.txt rules are too restrictive, ignoring /"LF,urladr); test_flush; } } @@ -1696,14 +1607,14 @@ int httpmirror(char* url1,httrackp* ptropt) { } while( (bptr<r.size) && (strlen(buff) < (sizeof(buff) - 32) ) ); if (strnotempty(buff)) { checkrobots_set(&robots,urladr,buff); - if (opt.log!=NULL) { - if (opt.log != opt.errlog) { - fspc(opt.log,"info"); fprintf(opt.log,"Note: robots.txt forbidden links for %s are: %s"LF,urladr,infobuff); + if (opt->log!=NULL) { + if (opt->log != opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Note: robots.txt forbidden links for %s are: %s"LF,urladr,infobuff); test_flush; } } - if (opt.errlog!=NULL) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"Note: due to %s remote robots.txt rules, links begining with these path will be forbidden: %s (see in the options to disable this)"LF,urladr,infobuff); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Note: due to %s remote robots.txt rules, links begining with these path will be forbidden: %s (see in the options to disable this)"LF,urladr,infobuff); test_flush; } } @@ -1723,7 +1634,7 @@ int httpmirror(char* url1,httrackp* ptropt) { // Si par la suite on doit retraiter ce fichier avec un niveau de récursion plus // fort, on supprimera le readme, et on scannera le fichier html! // note: sauté si store_errpage (càd si page d'erreur, non à scanner!) - if ( (is_hypertext_mime(r.contenttype, urlfil)) && (!store_errpage) && (r.size>0)) { // c'est du html!! + if ( (is_hypertext_mime(opt,r.contenttype, urlfil)) && (!store_errpage) && (r.size>0)) { // c'est du html!! char BIGSTK tempo[HTS_URLMAXSIZE*2]; FILE* fp; tempo[0]='\0'; @@ -1744,32 +1655,32 @@ int httpmirror(char* url1,httrackp* ptropt) { #endif if ((fp=fopen(tempo,"wb"))!=NULL) { - fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, WHAT_is_available); + fprintf(fp,"Info-file generated by HTTrack Website Copier "HTTRACK_VERSION"%s"CRLF""CRLF, hts_get_version_info(opt)); fprintf(fp,"The file %s has not been scanned by HTS"CRLF,savename); fprintf(fp,"Some links contained in it may be unreachable locally."CRLF); fprintf(fp,"If you want to get these files, you have to set an upper recurse level, "); fprintf(fp,"and to rescan the URL."CRLF); fclose(fp); -#if HTS_WIN==0 +#ifndef _WIN32 chmod(tempo,HTS_ACCESS_FILE); #endif - usercommand(&opt,0,NULL,fconv(tempo),"",""); + usercommand(opt,0,NULL,fconv(OPT_GET_BUFF(opt),tempo),"",""); } - if ( (opt.debug>0) && (opt.errlog!=NULL) ) { - fspc(opt.errlog,"warning"); fprintf(opt.errlog,"Warning: store %s without scan: %s"LF,r.contenttype,savename); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Warning: store %s without scan: %s"LF,r.contenttype,savename); test_flush; } } else { - if ((opt.getmode & 2)!=0) { // ok autorisé - if ( (opt.debug>1) && (opt.log!=NULL) ) { - fspc(opt.log,"debug"); fprintf(opt.log,"Store %s: %s"LF,r.contenttype,savename); + if ((opt->getmode & 2)!=0) { // ok autorisé + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Store %s: %s"LF,r.contenttype,savename); test_flush; } } else { // lien non autorisé! (ex: cgi-bin en html) - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil); + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"non-html file ignored after upload at %s : %s"LF,urladr,urlfil); test_flush; } if (r.adr) { @@ -1782,18 +1693,19 @@ int httpmirror(char* url1,httrackp* ptropt) { // ATTENTION C'EST ICI QU'ON SAUVE LE FICHIER!! if (r.adr) { - file_notify(urladr,urlfil, savename, 1, 1, r.notmodified); - if (filesave(&opt,r.adr,(int)r.size,savename,urladr,urlfil)!=0) { + file_notify(opt, urladr,urlfil, savename, 1, 1, r.notmodified); + if (filesave(opt,r.adr,(int)r.size,savename,urladr,urlfil)!=0) { int fcheck; if ((fcheck=check_fatal_io_errno())) { - fspc(opt.log,"error"); fprintf(opt.log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; - exit_xh=-1; /* fatal error */ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); test_flush; + opt->state.exit_xh=-1; /* fatal error */ } - if (opt.errlog) { - fspc(opt.errlog,"error"); fprintf(opt.errlog,"Unable to save file %s : %s"LF, savename, strerror(errno)); + if (opt->log) { + int last_errno = errno; + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unable to save file %s : %s"LF, savename, strerror(last_errno)); if (fcheck) { - fspc(opt.errlog,"error"); - fprintf(opt.errlog,"* * Fatal write error, giving up"LF); + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"* * Fatal write error, giving up"LF); } test_flush; } @@ -1812,8 +1724,8 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Parsing of other media types (java, ram..) */ /* if (strfield2(r.contenttype,"audio/x-pn-realaudio")) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): parsing %s"LF,savename); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Real Media): parsing %s"LF,savename); test_flush; } if (fexist(savename)) { // ok, existe bien! FILE* fp=fopen(savename,"r+b"); @@ -1822,8 +1734,8 @@ int httpmirror(char* url1,httrackp* ptropt) { char BIGSTK line[HTS_URLMAXSIZE*2]; linput(fp,line,HTS_URLMAXSIZE); if (strnotempty(line)) { - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(Real Media): detected %s"LF,line); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(Real Media): detected %s"LF,line); test_flush; } } } @@ -1834,7 +1746,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* External modules */ - if (opt.parsejava && fexist(savename)) { + if ( opt->parsejava && ( opt->parsejava & HTSPARSE_NO_CLASS ) == 0 && fexist(savename)) { char BIGSTK buff_err_msg[1024]; htsmoduleStruct BIGSTK str; buff_err_msg[0] = '\0'; @@ -1850,7 +1762,7 @@ int httpmirror(char* url1,httrackp* ptropt) { str.addLink = htsAddLink; /* */ str.liens = liens; - str.opt = &opt; + str.opt = opt; str.sback = sback; str.cache = &cache; str.hashptr = hashptr; @@ -1864,13 +1776,13 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Parse if recognized */ switch(hts_parse_externals(&str)) { case 1: - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(External module): parsed successfully %s"LF,savename); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(External module): parsed successfully %s"LF,savename); test_flush; } break; case 0: - if ((opt.debug>1) && (opt.log!=NULL)) { - fspc(opt.log,"debug"); fprintf(opt.log,"(External module): couldn't parse successfully %s : %s"LF,savename, str.err_msg); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(External module): couldn't parse successfully %s : %s"LF,savename, str.err_msg); test_flush; } break; } @@ -1882,7 +1794,7 @@ int httpmirror(char* url1,httrackp* ptropt) { /* Post-processing */ if (fexist(savename)) { - usercommand(&opt, 0, NULL, savename, urladr, urlfil); + usercommand(opt, 0, NULL, savename, urladr, urlfil); } } // if !error @@ -1898,7 +1810,7 @@ jump_if_done: ptr++; // faut-il sauter le(s) lien(s) suivant(s)? (fichiers images à passer après les html) - if (opt.getmode & 4) { // sauver les non html après + if (opt->getmode & 4) { // sauver les non html après // sauter les fichiers selon la passe if (!numero_passe) { while((ptr<lien_tot)?( liens[ptr]->pass2):0) ptr++; @@ -1907,8 +1819,8 @@ jump_if_done: } if (ptr>=lien_tot) { // fin de boucle if (!numero_passe) { // première boucle - if ((opt.debug>1) && (opt.log!=NULL)) { - fprintf(opt.log,LF"Now getting non-html files..."LF); + if ((opt->debug>1) && (opt->log!=NULL)) { + fprintf(opt->log,LF"Now getting non-html files..."LF); test_flush; } numero_passe=1; // seconde boucle @@ -1923,19 +1835,19 @@ jump_if_done: } // copy abort state if necessary from outside - if (!exit_xh && opt.state.exit_xh) { - exit_xh=opt.state.exit_xh; - } + //if (!exit_xh && opt->state.exit_xh) { + // exit_xh=opt->state.exit_xh; + //} // a-t-on dépassé le quota? - if (!back_checkmirror(&opt)) { + if (!back_checkmirror(opt)) { ptr=lien_tot; - } else if (exit_xh) { // sortir - if (opt.errlog) { - fspc(opt.errlog,"info"); - if (exit_xh==1) { - fprintf(opt.errlog,"Exit requested by shell or user"LF); + } else if (opt->state.exit_xh) { // sortir + if (opt->log) { + HTS_LOG(opt,LOG_INFO); + if (opt->state.exit_xh==1) { + fprintf(opt->log,"Exit requested by shell or user"LF); } else { - fprintf(opt.errlog,"Exit requested by engine"LF); + fprintf(opt->log,"Exit requested by engine"LF); } test_flush; } @@ -1963,22 +1875,22 @@ jump_if_done: && (HTS_STAT.HTS_TOTAL_RECV < 32768) /* should be fine */ ) { - if (opt.errlog) { - fspc(opt.errlog,"info"); fprintf(opt.errlog,"No data seems to have been transfered during this session! : restoring previous one!"LF); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"No data seems to have been transfered during this session! : restoring previous one!"LF); test_flush; } XH_uninit; - if ( (fexist(fconcat(opt.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(opt.path_log,"hts-cache/old.ndx"))) ) { - remove(fconcat(opt.path_log,"hts-cache/new.dat")); - remove(fconcat(opt.path_log,"hts-cache/new.ndx")); - remove(fconcat(opt.path_log,"hts-cache/new.lst")); - remove(fconcat(opt.path_log,"hts-cache/new.txt")); - rename(fconcat(opt.path_log,"hts-cache/old.dat"),fconcat(opt.path_log,"hts-cache/new.dat")); - rename(fconcat(opt.path_log,"hts-cache/old.ndx"),fconcat(opt.path_log,"hts-cache/new.ndx")); - rename(fconcat(opt.path_log,"hts-cache/old.lst"),fconcat(opt.path_log,"hts-cache/new.lst")); - rename(fconcat(opt.path_log,"hts-cache/old.txt"),fconcat(opt.path_log,"hts-cache/new.txt")); - } - exit_xh=2; /* interrupted (no connection detected) */ + if ( (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) ) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt")); + } + opt->state.exit_xh=2; /* interrupted (no connection detected) */ return 1; } @@ -1990,33 +1902,31 @@ jump_if_done: // purger! if (cache.lst) { fclose(cache.lst); cache.lst=NULL; - if (opt.delete_old) { + if (opt->delete_old) { FILE *old_lst,*new_lst; // -#if HTS_ANALYSTE - _hts_in_html_parsing=3; -#endif + opt->state._hts_in_html_parsing=3; // - old_lst=fopen(fconcat(opt.path_log,"hts-cache/old.lst"),"rb"); + old_lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"),"rb"); if (old_lst) { - LLint sz=fsize(fconcat(opt.path_log,"hts-cache/new.lst")); - new_lst=fopen(fconcat(opt.path_log,"hts-cache/new.lst"),"rb"); + off_t sz=fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + new_lst=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"),"rb"); if ((new_lst) && (sz>0)) { - char* adr=(char*) malloct((INTsys)sz); + char* adr=(char*) malloct(sz); if (adr) { - if (fread(adr,1,(INTsys)sz,new_lst) == sz) { + if (fread(adr,1,sz,new_lst) == sz) { char line[1100]; int purge=0; while(!feof(old_lst)) { linput(old_lst,line,1000); if (!strstr(adr,line)) { // fichier non trouvé dans le nouveau? char BIGSTK file[HTS_URLMAXSIZE*2]; - strcpybuff(file,opt.path_html); + strcpybuff(file,StringBuff(opt->path_html)); strcatbuff(file,line+1); file[strlen(file)-1]='\0'; if (fexist(file)) { // toujours sur disque: virer - if (opt.log) { - fspc(opt.log,"info"); fprintf(opt.log,"Purging %s"LF,file); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Purging %s"LF,file); } remove(file); purge=1; } @@ -2034,12 +1944,12 @@ jump_if_done: if (strnotempty(line)) if (!strstr(adr,line)) { // non trouvé? char BIGSTK file[HTS_URLMAXSIZE*2]; - strcpybuff(file,opt.path_html); + strcpybuff(file,StringBuff(opt->path_html)); strcatbuff(file,line+1); while ((strnotempty(file)) && (rmdir(file)==0)) { // ok, éliminé (existait) purge=1; - if (opt.log) { - fspc(opt.log,"info"); fprintf(opt.log,"Purging directory %s/"LF,file); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Purging directory %s/"LF,file); while(strnotempty(file) && (file[strlen(file)-1]!='/') && (file[strlen(file)-1]!='\\')) { file[strlen(file)-1]='\0'; } @@ -2052,8 +1962,8 @@ jump_if_done: } // if (!purge) { - if (opt.log) { - fprintf(opt.log,"No files purged"LF); + if (opt->log) { + fprintf(opt->log,"No files purged"LF); } } } @@ -2064,23 +1974,21 @@ jump_if_done: fclose(old_lst); } // -#if HTS_ANALYSTE - _hts_in_html_parsing=0; -#endif + opt->state._hts_in_html_parsing=0; } } // fin purge! // Indexation - if (opt.kindex) - index_finish(opt.path_html,opt.kindex); + if (opt->kindex) + index_finish(StringBuff(opt->path_html),opt->kindex); // afficher résumé dans log - if (opt.log!=NULL) { + if (opt->log!=NULL) { char BIGSTK finalInfo[8192]; - int error = fspc(NULL,"error"); - int warning = fspc(NULL,"warning"); - int info = fspc(NULL,"info"); + int error = fspc(opt,NULL,"error"); + int warning = fspc(opt,NULL,"warning"); + int info = fspc(opt,NULL,"info"); char BIGSTK htstime[256]; char BIGSTK infoupdated[256]; // int n=(int) (stat_loaded/(time_local()-HTS_STAT.stat_timestart)); @@ -2089,7 +1997,7 @@ jump_if_done: sec2str(htstime,time_local()-HTS_STAT.stat_timestart); //sprintf(finalInfo + strlen(finalInfo),LF"HTS-mirror complete in %s : %d links scanned, %d files written (%d bytes overall) [%d bytes received at %d bytes/sec]"LF,htstime,lien_tot-1,HTS_STAT.stat_files,stat_bytes,stat_loaded,n); infoupdated[0] = '\0'; - if (opt.is_update) { + if (opt->is_update) { if (HTS_STAT.stat_updated_files > 0) { sprintf(infoupdated, ", %d files updated", (int)HTS_STAT.stat_updated_files); } else { @@ -2114,7 +2022,7 @@ jump_if_done: int packed_ratio=(int)((LLint)(HTS_STAT.total_packed*100)/HTS_STAT.total_unpacked); sprintf(finalInfo + strlen(finalInfo),", "LLintP" bytes transfered using HTTP compression in %d files, ratio %d%%",(LLint)HTS_STAT.total_unpacked,HTS_STAT.total_packedfiles,(int)packed_ratio); } - if (!opt.nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) { + if (!opt->nokeepalive && HTS_STAT.stat_sockid > 0 && HTS_STAT.stat_nrequests > HTS_STAT.stat_sockid) { int rq = (HTS_STAT.stat_nrequests * 10) / HTS_STAT.stat_sockid; sprintf(finalInfo + strlen(finalInfo),", %d.%d requests per connection", rq/10, rq%10); } @@ -2125,7 +2033,7 @@ jump_if_done: sprintf(finalInfo + strlen(finalInfo),"(No errors, %d warnings, %d messages)"LF,warning,info); // Log - fprintf(opt.log,LF"%s", finalInfo); + fprintf(opt->log,LF"%s", finalInfo); // Close ZIP if (cache.zipOutput) { @@ -2162,7 +2070,7 @@ jump_if_done: // fin afficher résumé dans log // ending - usercommand(&opt,0,NULL,NULL,NULL,NULL); + usercommand(opt,0,NULL,NULL,NULL,NULL); // désallocation mémoire & buffers XH_uninit; @@ -2172,7 +2080,7 @@ jump_if_done: // version 2 pour le reste // flusher si on doit lire peu à peu le fichier #undef test_flush -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // Estimate transfer rate @@ -2260,10 +2168,10 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,struct_back* s opt->maxfilter += HTS_FILTERSINC; if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) { printf("PANIC! : Too many filters : >%d [%d]\n",*_FILTERS_PTR,__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF,*_FILTERS_PTR); - fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); - fflush(opt->errlog); + if (opt->log) { + fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF,*_FILTERS_PTR); + fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF); + fflush(opt->log); } assertf("too many filters - giving up" == NULL); } @@ -2349,48 +2257,6 @@ void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,struct_back* s } } - -#if 0 -/* Init structure */ -/* 1 : init */ -/* -1 : off */ -/* 0 : query */ -/* 2 : LOCK */ -/* -2 : UNLOCK */ -void* structcheck_init(int init) { - int structcheck_size = 1024; - inthash structcheck_hash=NULL; - /* */ - static PTHREAD_LOCK_TYPE structcheck_init_mutex; - static int structcheck_init_mutex_init=0; - - if (init == 1 || init == -1) { - if (init) { - if (structcheck_hash) - inthash_delete(&structcheck_hash); - structcheck_hash=NULL; - } - if (init != -1) { - if (structcheck_init_mutex_init == 0) { - htsSetLock(&structcheck_init_mutex, -999); - structcheck_init_mutex_init=1; - } - if (structcheck_hash==NULL) { - structcheck_hash=inthash_new(structcheck_size); // désalloué xh_xx - } - } - } - - /* Lock / Unlock */ - if (init == 2) { // Lock - htsSetLock(&structcheck_init_mutex, 1); - } else if (init == -2) { // Unlock - htsSetLock(&structcheck_init_mutex, 0); - } - return structcheck_hash; -} -#endif - int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { char** filters = *ptrfilters; int filter_max=maximum(maxfilter, 128); @@ -2430,76 +2296,155 @@ int filters_init(char*** ptrfilters, int maxfilter, int filterinc) { return (filters != NULL) ? filter_max : 0; } -// vérifier présence de l'arbo -HTSEXT_API int structcheck(char* s) { - // vérifier la présence des dossier(s) - char *a=s; - char BIGSTK nom[HTS_URLMAXSIZE*2]; - char *b; - //inthash structcheck_hash=NULL; - if (strnotempty(s)==0) return 0; - if (strlen(s)>HTS_URLMAXSIZE) return 0; - - // Get buffer address - /* - structcheck_hash = (inthash)structcheck_init(0); - if (structcheck_hash == NULL) { - return -1; +static int mkdir_compat(const char *pathname) { +#ifdef _WIN32 + return mkdir(pathname); +#else + return mkdir(pathname, HTS_ACCESS_FOLDER); +#endif +} + +/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */ +HTSEXT_API int dir_exists(const char* path) { + struct stat st; + char BIGSTK file[HTS_URLMAXSIZE*2]; + int i = 0; + if (strnotempty(path) == 0) { + errno = EINVAL; + return 0; + } + if (strlen(path) > HTS_URLMAXSIZE) { + errno = EINVAL; + return 0; } - */ - b=nom; - do { - if (*a) *b++=*a++; - while((*a!='/') && (*a!='\0')) *b++=*a++; - *b='\0'; // pas de ++ pour boucler - if (*a=='/') { // toujours dossier - if (strnotempty(nom)) { - //if (inthash_write(structcheck_hash, nom, 1)) { // non encore créé -#if HTS_WIN - if (mkdir(fconv(nom))!=0) -#else - if (mkdir(fconv(nom),HTS_ACCESS_FOLDER)!=0) + /* Get a copy */ + strcpybuff(file, path); +#ifdef _WIN32 + /* To system name */ + for(i = 0 ; file[i] != 0 ; i++) { + if (file[i] == '/') { + file[i] = PATH_SEPARATOR; + } + } #endif - { -#if HTS_REMOVE_ANNOYING_INDEX - // might be a filename with same name than this folder - // then, remove it to allow folder creation - // it happends when servers gives a folder index while - // requesting / page - // -> if the file can be opened (not a folder) then rename it - if (fexist(fconv(nom))) { - rename(fconv(nom),fconcat(fconv(nom),".txt")); - } - // if it fails, that's too bad -#if HTS_WIN - mkdir(fconv(nom)); -#else - mkdir(fconv(nom),HTS_ACCESS_FOLDER); + /* Get prefix (note: file can not be empty here) */ + for(i = (int) strlen(file) - 1 ; i > 0 && file[i] != PATH_SEPARATOR ; i--); + for( ; i > 0 && file[i] == PATH_SEPARATOR ; i--); + file[i + 1] = '\0'; + + /* Check the final dir */ + if (stat(file, &st) == 0 && S_ISDIR(st.st_mode)) { + errno = 0; + return 1; /* EXISTS */ + } + errno = 0; + return 0; /* DOES NOT EXISTS */ +} + +/* path must end with "/" or with the finename (/tmp/bar/ or /tmp/bar/foo.zip) */ +HTSEXT_API int structcheck(const char* path) { + struct stat st; + char BIGSTK tmpbuf[HTS_URLMAXSIZE*2]; + char BIGSTK file[HTS_URLMAXSIZE*2]; + int i = 0; + int npaths; + if (strnotempty(path) == 0) + return 0; + if (strlen(path) > HTS_URLMAXSIZE) { + errno = EINVAL; + return -1; + } + + /* Get a copy */ + strcpybuff(file, path); +#ifdef _WIN32 + /* To system name */ + for(i = 0 ; file[i] != 0 ; i++) { + if (file[i] == '/') { + file[i] = PATH_SEPARATOR; + } + } #endif + /* Get prefix (note: file can not be empty here) */ + for(i = (int) strlen(file) - 1 ; i > 0 && file[i] != PATH_SEPARATOR ; i--); + for( ; i > 0 && file[i] == PATH_SEPARATOR ; i--); + file[i + 1] = '\0'; + + /* First check the final dir */ + if (stat(file, &st) == 0 && S_ISDIR(st.st_mode)) { + return 0; /* OK */ + } + + /* Start from the begining */ + i = 0; + + /* Skip irrelevant part (the root slash, or the drive path) */ +#ifdef _WIN32 + if (file[0] != 0 && file[1] == ':') { /* f:\ */ + i+= 2; + if (file[i] == PATH_SEPARATOR) { /* f:\ */ + i++; + } + } else if (file[0] == PATH_SEPARATOR && file[1] == PATH_SEPARATOR) { /* \\mch */ + i+= 2; + } #endif - // Si existe déja renvoie une erreur.. tant pis + + /* Check paths */ + for(npaths = 1 ; ; npaths++) { + char end_char; + + /* Go to next path */ + + /* Skip separator(s) */ + for( ; file[i] == PATH_SEPARATOR ; i++); + /* Next separator */ + for( ; file[i] != 0 && file[i] != PATH_SEPARATOR ; i++); + + /* Check */ + end_char = file[i]; + if (end_char != 0) { + file[i] = '\0'; + } + if (stat(file, &st) == 0) { /* Something exists */ + if (!S_ISDIR(st.st_mode)) { +#if HTS_REMOVE_ANNOYING_INDEX + if (S_ISREG(st.st_mode)) { /* Regular file in place ; move it and create directory */ + sprintf(tmpbuf, "%s.txt", file); + if (rename(file, tmpbuf) != 0) { /* Can't rename regular file */ + return -1; } -#if HTS_WIN==0 - /*chmod(fconv(nom),HTS_ACCESS_FOLDER);*/ + if (mkdir_compat(file) != 0) { /* Can't create directory */ + return -1; + } + } +#else +#error Not implemented #endif - //} } - *b++=*a++; // slash - } - } while(*a); + } else { /* Nothing exists ; create directory */ + if (mkdir_compat(file) != 0) { /* Can't create directory */ + return -1; + } + } + if (end_char == 0) { /* End */ + break; + } else { + file[i] = end_char; /* Restore / */ + } + } return 0; } - // sauver un fichier -int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr,char* url_fil) { +int filesave(httrackp* opt,const char* adr,int len,const char* s,const char* url_adr,const char* url_fil) { FILE* fp; // écrire le fichier - if ((fp=filecreate(s))!=NULL) { + if ((fp = filecreate(&opt->state.strc, s))!=NULL) { int nl=0; if (len>0) { - nl=(int) fwrite(adr,1,(INTsys)len,fp); + nl=(int) fwrite(adr,1,len,fp); } fclose(fp); if (nl!=len) // erreur @@ -2530,17 +2475,18 @@ int check_fatal_io_errno(void) { // ouvrir un fichier (avec chemin Un*x) -FILE* filecreate(char* s) { +FILE* filecreate(filenote_strc *strc, const char* s) { char BIGSTK fname[HTS_URLMAXSIZE*2]; FILE* fp; + int last_errno = 0; fname[0]='\0'; // noter lst - filenote(s,NULL); + if (strc != NULL) { + filenote(strc, s, NULL); + } - // if (*s=='/') strcpybuff(fname,s+1); else strcpybuff(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path> - strcpybuff(fname,s); - + strcpybuff(fname, s); #if HTS_DOSNAME // remplacer / par des slash arrière { @@ -2554,29 +2500,37 @@ FILE* filecreate(char* s) { // a partir d'ici le slash devient antislash #endif - // ouvrir - fp=fopen(fname,"wb"); + /* Try to open the file */ + fp = fopen(fname, "wb"); + + /* Error ? Check the directory structure and retry. */ if (fp == NULL) { - // construire le chemin si besoin est - (void)structcheck(s); - fp=fopen(fname,"wb"); + last_errno = errno; + if (structcheck(s) != 0) { + last_errno = errno; + } else { + last_errno = 0; + } + fp = fopen(fname, "wb"); } - -#if HTS_WIN==0 - if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE); + if (fp == NULL && last_errno != 0) { + errno = last_errno; + } +#ifndef _WIN32 + if (fp != NULL) + chmod(fname, HTS_ACCESS_FILE); #endif - return fp; } // ouvrir un fichier (avec chemin Un*x) -FILE* fileappend(char* s) { +FILE* fileappend(filenote_strc *strc,const char* s) { char BIGSTK fname[HTS_URLMAXSIZE*2]; FILE* fp; fname[0]='\0'; // noter lst - filenote(s,NULL); + filenote(strc,s,NULL); // if (*s=='/') strcpybuff(fname,s+1); else strcpybuff(fname,s); // pas de / (root!!) // ** SIIIIIII!!! à cause de -O <path> strcpybuff(fname,s); @@ -2597,7 +2551,7 @@ FILE* fileappend(char* s) { // ouvrir fp=fopen(fname,"ab"); -#if HTS_WIN==0 +#ifndef _WIN32 if (fp!=NULL) chmod(fname,HTS_ACCESS_FILE); #endif @@ -2606,9 +2560,9 @@ FILE* fileappend(char* s) { // create an empty file -int filecreateempty(char* filename) { +int filecreateempty(filenote_strc *strc, const char* filename) { FILE* fp; - fp=filecreate(filename); // filenote & co + fp=filecreate(strc, filename); // filenote & co if (fp) { fclose(fp); return 1; @@ -2617,14 +2571,7 @@ int filecreateempty(char* filename) { } // noter fichier -typedef struct { - FILE* lst; - char path[HTS_URLMAXSIZE*2]; -} filenote_strc; -int filenote(char* s,filecreate_params* params) { - filenote_strc* strc; - NOSTATIC_RESERVE(strc, filenote_strc, 1); - +int filenote(filenote_strc *strc, const char* s, filecreate_params* params) { // gestion du fichier liste liste if (params) { //filecreate_params* p = (filecreate_params*) params; @@ -2633,10 +2580,11 @@ int filenote(char* s,filecreate_params* params) { return 0; } else if (strc->lst) { char BIGSTK savelst[HTS_URLMAXSIZE*2]; - strcpybuff(savelst,fslash(s)); + char catbuff[CATBUFF_SIZE]; + strcpybuff(savelst,fslash(catbuff,s)); // couper chemin? if (strnotempty(strc->path)) { - if (strncmp(fslash(strc->path),savelst,strlen(strc->path))==0) { // couper + if (strncmp(fslash(catbuff,strc->path),savelst,strlen(strc->path))==0) { // couper strcpybuff(savelst,s+strlen(strc->path)); } } @@ -2646,23 +2594,14 @@ int filenote(char* s,filecreate_params* params) { return 1; } -void file_notify(char* adr,char* fil,char* save,int create,int modify,int not_updated) { -#if HTS_ANALYSTE - if (hts_htmlcheck_filesave2 != NULL) { - hts_htmlcheck_filesave2(adr, fil, save, create, modify, not_updated); - } -#endif +void file_notify(httrackp* opt,const char* adr,const char* fil,const char* save,int create,int modify,int not_updated) { + RUN_CALLBACK6(opt, filesave2, adr, fil, save, create, modify, not_updated); } // executer commande utilisateur -static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil); -typedef struct { - int exe; - char cmd[2048]; -} usercommand_strc; -HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* adr,char* fil) { - usercommand_strc* strc; - NOSTATIC_RESERVE(strc, usercommand_strc, 1); +static void postprocess_file(httrackp* opt, const char* save, const char* adr, const char* fil); +HTS_INLINE void usercommand(httrackp* opt,int _exe,const char* _cmd,const char* file,const char* adr,const char* fil) { + usercommand_strc* strc = &opt->state.usercmd; /* Callback */ if (_exe) { @@ -2676,12 +2615,9 @@ HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* a /* post-processing */ postprocess_file(opt, file, adr, fil); -#if HTS_ANALYSTE - if (hts_htmlcheck_filesave != NULL) { - if (file != NULL && strnotempty(file)) - hts_htmlcheck_filesave(file); - } -#endif + if (file != NULL && strnotempty(file)) { + RUN_CALLBACK1(opt, filesave, file); + } if (strc->exe) { if (file != NULL && strnotempty(file)) { @@ -2691,7 +2627,7 @@ HTS_INLINE void usercommand(httrackp* opt,int _exe,char* _cmd,char* file,char* a } } } -void usercommand_exe(char* cmd,char* file) { +void usercommand_exe(const char* cmd,const char* file) { char BIGSTK temp[8192]; char c[2]=""; int i; @@ -2710,7 +2646,7 @@ void usercommand_exe(char* cmd,char* file) { } -static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { +static void postprocess_file(httrackp* opt,const char* save, const char* adr, const char* fil) { int first = 0; /* MIME-html archive to build */ if (opt != NULL && opt->mimehtml) { @@ -2718,24 +2654,26 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { adr = NULL; } if (save != NULL && opt != NULL && adr != NULL && adr[0] && strnotempty(save) && fexist(save)) { - char* rsc_save = save; - char* rsc_fil = strrchr(fil, '/'); + const char* rsc_save = save; + const char* rsc_fil = strrchr(fil, '/'); int n; if (rsc_fil == NULL) rsc_fil = fil; - if (strncmp(fslash(save), fslash(opt->path_html), (n = (int)strlen(opt->path_html))) == 0) { + if (strncmp(fslash(OPT_GET_BUFF(opt),save), fslash(OPT_GET_BUFF(opt),StringBuff(opt->path_html)), (n = (int)strlen(StringBuff(opt->path_html)))) == 0) { rsc_save += n; } if (!opt->state.mimehtml_created) { first = 1; - opt->state.mimefp = fopen(fconcat(opt->path_html,"index.mht"), "wb"); + opt->state.mimefp = fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.mht"), "wb"); if (opt->state.mimefp != NULL) { char BIGSTK rndtmp[1024], currtime[256]; - srand(time(NULL)); + srand((unsigned int)time(NULL)); time_gmt_rfc822(currtime); sprintf(rndtmp, "%d_%d", (int)time(NULL), (int) rand()); - sprintf(opt->state.mimemid, "----=_MIMEPart_%s_=----", rndtmp); + StringRoom(opt->state.mimemid, 256); + sprintf(StringBuffRW(opt->state.mimemid), "----=_MIMEPart_%s_=----", rndtmp); + StringSetLength(opt->state.mimemid, -1); fprintf(opt->state.mimefp, "From: HTTrack Website Copier <nobody@localhost>\r\n" "Subject: Local mirror\r\n" "Date: %s\r\n" @@ -2746,12 +2684,12 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { "MIME-Version: 1.0\r\n" "\r\nThis message is a RFC MIME-compliant multipart message.\r\n" "\r\n" - , currtime, rndtmp, opt->state.mimemid); + , currtime, rndtmp, StringBuff(opt->state.mimemid)); opt->state.mimehtml_created = 1; } else { opt->state.mimehtml_created = -1; - if ( opt->errlog != NULL ) { - fspc(opt->errlog,"error"); fprintf(opt->log,"unable to create index.mht"LF); + if ( opt->log != NULL ) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"unable to create index.mht"LF); } } } @@ -2761,8 +2699,8 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { char buff[60*100 + 2]; char mimebuff[256]; char BIGSTK cid[HTS_URLMAXSIZE*3]; - int len; - int isHtml = ( ishtml(save) == 1 ); + size_t len; + int isHtml = ( ishtml(opt,save) == 1 ); mimebuff[0] = '\0'; /* CID */ @@ -2771,8 +2709,8 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { escape_in_url(cid); { char* a = cid; while((a = strchr(a, '%'))) { *a = 'X'; a++; } } - guess_httptype(mimebuff, save); - fprintf(opt->state.mimefp, "--%s\r\n", opt->state.mimemid); + guess_httptype(opt,mimebuff, save); + fprintf(opt->state.mimefp, "--%s\r\n", StringBuff(opt->state.mimemid)); /*if (first) fprintf(opt->state.mimefp, "Content-disposition: inline\r\n"); else*/ @@ -2791,7 +2729,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { buff[len] = '\0'; if (!isHtml) { char base64buff[60*100*2]; - code64((unsigned char*)buff, len, (unsigned char*)base64buff, 1); + code64((unsigned char*)buff, (int)len, (unsigned char*)base64buff, 1); fprintf(opt->state.mimefp, "%s", base64buff); } else { fprintf(opt->state.mimefp, "%s", buff); @@ -2804,7 +2742,7 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { } else if (save == NULL) { if (opt->state.mimehtml_created == 1 && opt->state.mimefp != NULL) { fprintf(opt->state.mimefp, - "--%s--\r\n", opt->state.mimemid); + "--%s--\r\n", StringBuff(opt->state.mimemid)); fclose(opt->state.mimefp); opt->state.mimefp = NULL; } @@ -2813,17 +2751,9 @@ static void postprocess_file(httrackp* opt,char* save, char* adr, char* fil) { } // écrire n espaces dans fp -typedef struct { - int error; - int warning; - int info; -} fspc_strc; -HTS_INLINE int fspc(FILE* fp,char* type) { - fspc_strc* strc; - NOSTATIC_RESERVE(strc, fspc_strc, 1); // log.. - - // - if (fp) { +HTS_INLINE int fspc(httrackp *opt,FILE* fp,const char* type) { + fspc_strc* const strc = ( opt != NULL ) ? &opt->state.fspc : NULL; + if (fp != NULL) { char s[256]; time_t tt; struct tm* A; @@ -2835,19 +2765,25 @@ HTS_INLINE int fspc(FILE* fp,char* type) { } strftime(s,250,"%H:%M:%S",A); if (strnotempty(type)) - fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1); - else - fprintf(fp,"%s\t \t",s); - if (strcmp(type,"warning")==0) - strc->warning++; - else if (strcmp(type,"error")==0) - strc->error++; - else if (strcmp(type,"info")==0) - strc->info++; - } - else if (!type) - strc->error=strc->warning=strc->info=0; // reset - else if (strcmp(type,"warning")==0) + fprintf(fp,"%s\t%c%s: \t",s,hichar(*type),type+1); + else + fprintf(fp,"%s\t \t",s); + if (strc != NULL) { + if (strcmp(type,"warning")==0) + strc->warning++; + else if (strcmp(type,"error")==0) + strc->error++; + else if (strcmp(type,"info")==0) + strc->info++; + } + } + else if (strc == NULL) { + return 0; + } + else if (!type) { + strc->error=strc->warning=strc->info=0; // reset + } + else if (strcmp(type,"warning")==0) return strc->warning; else if (strcmp(type,"error")==0) return strc->error; @@ -2914,8 +2850,6 @@ HTS_INLINE int back_fillmax(struct_back* sback,httrackp* opt,cache_back* cache,l } int back_pluggable_sockets_strict(struct_back* sback, httrackp* opt) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; int n = opt->maxsoc - back_nsoc(sback); // connect limiter @@ -2936,8 +2870,6 @@ int back_pluggable_sockets_strict(struct_back* sback, httrackp* opt) { } int back_pluggable_sockets(struct_back* sback, httrackp* opt) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; int n; // ajouter autant de socket qu'on peut ajouter @@ -2955,8 +2887,6 @@ int back_pluggable_sockets(struct_back* sback, httrackp* opt) { // remplir backing int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot) { - lien_back* const back = sback->lnk; - const int back_max = sback->count; int n = back_pluggable_sockets(sback, opt); if (opt->savename_delayed == 2 && !opt->delayed_cached) /* cancel (always delayed) */ return 0; @@ -2975,7 +2905,7 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien //while((p<lien_tot) && (n>0) && (p < ptr+opt->maxcache_anticipate)) { int ok=1; - // on ne met pas le fichier en backing si il doit être traité après + // on ne met pas le fichier en backing si il doit être traité après ou s'il a déja été traité if (liens[p]->pass2) { // 2è passe if (numero_passe!=1) ok=0; @@ -2983,15 +2913,19 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien if (numero_passe!=0) ok=0; } + if (ok && liens[p]->sav != NULL && liens[p]->sav[0] != '\0' + && hash_read(opt->hash,liens[p]->sav,"",0,opt->urlhack) >= 0) // lookup in liens_record + { + ok = 0; + } // note: si un backing est fini, il reste en mémoire jusqu'à ce que // le ptr l'atteigne if (ok) { - int index = back_index(sback, liens[p]->adr,liens[p]->fil,liens[p]->sav); - if (index < 0) { - if (back_add(sback,opt,cache,liens[p]->adr,liens[p]->fil,liens[p]->sav,liens[liens[p]->precedent]->adr,liens[liens[p]->precedent]->fil,liens[p]->testmode,&liens[p]->pass2)==-1) { - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"error: unable to add more links through back_add for back_fill"LF); + if (!back_exist(sback, opt, liens[p]->adr,liens[p]->fil,liens[p]->sav)) { + if (back_add(sback,opt,cache,liens[p]->adr,liens[p]->fil,liens[p]->sav,liens[liens[p]->precedent]->adr,liens[liens[p]->precedent]->fil,liens[p]->testmode)==-1) { + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"error: unable to add more links through back_add for back_fill"LF); test_flush; } #if BDEBUG==1 @@ -3004,8 +2938,6 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien printf("backing: %s%s\n",liens[p]->adr,liens[p]->fil); #endif } - } else { - back_set_passe2_ptr(opt,cache,sback,index,&liens[p]->pass2); } } p++; @@ -3035,116 +2967,6 @@ int back_fill(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** lien -// routines de détournement de SIGHUP & co (Unix) -// -httrackp* hts_declareoptbuffer(httrackp* optdecl) { - static httrackp* opt=NULL; /* OK */ - if (optdecl) opt=optdecl; - return opt; -} -// -void sig_finish( int code ) { // finir et quitter - signal(code,sig_term); // quitter si encore - exit_xh=1; - fprintf(stderr,"\nExit requested to engine (signal %d)\n",code); -} -void sig_term( int code ) { // quitter brutalement - fprintf(stderr,"\nProgram terminated (signal %d)\n",code); - exit(0); -} -#if HTS_WIN -void sig_ask( int code ) { // demander - char s[256]; - signal(code,sig_term); // quitter si encore - printf("\nQuit program/Interrupt/Cancel? (Q/I/C) "); - fflush(stdout); - scanf("%s",s); - if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q')) - exit(0); // quitter - else if ( (s[0]=='i') || (s[0]=='I') ) { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - // ask for stop - opt->state.stop=1; - } - } - signal(code,sig_ask); // remettre signal -} -#else -void sig_back( int code ) { // ignorer et mettre en backing - signal(code,sig_ignore); - sig_doback(0); -} -void sig_ask( int code ) { // demander - char s[256]; - signal(code,sig_term); // quitter si encore - printf("\nQuit program/Interrupt/Background/bLind background/Cancel? (Q/I/B/L/C) "); - fflush(stdout); - scanf("%s",s); - if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q')) - exit(0); // quitter - else if ( (s[0]=='b') || (s[0]=='B') || (s[0]=='a') || (s[0]=='A') ) - sig_doback(0); // arrière plan - else if ( (s[0]=='l') || (s[0]=='L') ) - sig_doback(1); // arrière plan - else if ( (s[0]=='i') || (s[0]=='I') ) { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - // ask for stop - printf("finishing pending transfers.. please wait\n"); - opt->state.stop=1; - } - signal(code,sig_ask); // remettre signal - } - else { - printf("cancel..\n"); - signal(code,sig_ask); // remettre signal - } -} -void sig_ignore( int code ) { // ignorer signal -} -void sig_brpipe( int code ) { // treat if necessary - signal(code, sig_brpipe); -} -void sig_doback(int blind) { // mettre en backing - int out=-1; - // - printf("\nMoving into background to complete the mirror...\n"); fflush(stdout); - - { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - // suppress logging and asking lousy questions - opt->quiet=1; - opt->verbosedisplay=0; - } - } - - if (!blind) - out = open("hts-nohup.out",O_CREAT|O_WRONLY,S_IRUSR|S_IWUSR); - if (out == -1) - out = open("/dev/null",O_WRONLY,S_IRUSR|S_IWUSR); - close(0); - close(1); - dup(out); - close(2); - dup(out); - // - switch (fork()) { - case 0: - break; - case -1: - fprintf(stderr,"Error: can not fork process\n"); - break; - default: // pere - usleep(100000); // pause 1/10s "A microsecond is .000001s" - _exit(0); - break; - } -} -#endif -// fin routines de détournement de SIGHUP & co - // Poll stdin.. si besoin #if HTS_POLL // lecture stdin des caractères disponibles @@ -3207,10 +3029,9 @@ HTS_INLINE int check_sockdata(T_SOC s) { } // Attente de touche -#if HTS_ANALYSTE -int ask_continue(void) { - char* s; - s=hts_htmlcheck_query2(HTbuff); +int ask_continue(httrackp *opt) { + const char* s; + s = RUN_CALLBACK1(opt, query2, opt->state.HTbuff); if (s) { if (strnotempty(s)) { if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON"))) @@ -3220,19 +3041,6 @@ int ask_continue(void) { } return 1; } -#else -int ask_continue(void) { - char s[12]; - s[0]='\0'; - printf("Press <Y><Enter> to confirm, <N><Enter> to abort\n"); - io_flush; linput(stdin,s,4); - if (strnotempty(s)) { - if ((strfield2(s,"N")) || (strfield2(s,"NO")) || (strfield2(s,"NON"))) - return 0; - } - return 1; -} -#endif // nombre de digits dans un nombre int nombre_digit(int n) { @@ -3289,166 +3097,130 @@ char* next_token(char* p,int flag) { return p; } -// routines annexes -#if HTS_ANALYSTE -// canceller un fichier (noter comme cancellable) -// !!NOT THREAD SAFE!! -HTSEXT_API char* hts_cancel_file(char * s) { - static char sav[HTS_URLMAXSIZE*2]=""; - if (s[0]!='\0') - if (sav[0]=='\0') - strcpybuff(sav,s); - return sav; -} -HTSEXT_API void hts_cancel_test(void) { - if (_hts_in_html_parsing==2) - _hts_cancel=2; +static int hts_cancel_file_push_(httrackp *opt, const char *url) { + if (url != NULL && url[0] != '\0') { + htsoptstatecancel **cancel; + /* search for available place to store a new htsoptstatecancel* */ + for( cancel = &opt->state.cancel ; *cancel != NULL ; cancel = & ( (*cancel)->next ) ) { + if (strcmp((*cancel)->url, url) == 0) { + return 1; /* already there */ + } + } + *cancel = malloct(sizeof(htsoptstatecancel)); + (*cancel)->next = NULL; + (*cancel)->url = strdupt(url); + return 0; + } + return 1; } -HTSEXT_API void hts_cancel_parsing(void) { - if (_hts_in_html_parsing) - _hts_cancel=1; + +/* cancel a file (locked) */ +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url) { + int ret; + hts_mutexlock(&opt->state.lock); + ret = hts_cancel_file_push_(opt, url); + hts_mutexrelease(&opt->state.lock); + return ret; } -#endif -// for(_i=0;(_i<back_max) && (index<NStatsBuffer);_i++) { -// i=(back_index+_i)%back_max; // commencer par le "premier" (l'actuel) -// if (back[i].status>=0) { // signifie "lien actif" -#if 0 -/* -hts_add_file, add/get elements in the add chain for java parsing -if file_position >= 0 - push 'file/file_position' - return 1 (return 0 if exists) -else - pop file -> 'file' - return 'file_position' -else if empty/error - return -1; -*/ -typedef struct addfile_chain { - char name[1024]; - int pos; - struct addfile_chain* next; -} addfile_chain; -typedef addfile_chain* addfile_chain_ptr; -int opt->(char* file,int file_position) { - addfile_chain** chain; - NOSTATIC_RESERVE(chain, addfile_chain_ptr, 1); - - if (file_position>=0) { /* copy file to the chain */ - struct addfile_chain** current; - current=chain; /* start from */ - while(*current) { - if (strcmp((*current)->name,file)==0) - return 0; /* already exists */ - current=&( (*current)->next ); /* 'next' address */ - } - *current=calloct(1,sizeof(addfile_chain)); - if (*current) { - (*current)->next=NULL; - (*current)->pos=-1; - (*current)->name[0]='\0'; - } - if (*current) { - strcpybuff((*current)->name,file); - (*current)->pos=file_position; - return 1; - } else { - printf("PANIC! Too many Java files during parsing [1]\n"); - return -1; - } - } else { /* copy last element in file and delete it */ - if (file) - file[0]='\0'; - if (*chain) { - struct addfile_chain** current; - int pos=-1; - current=chain; /* start from */ - while( (*current)->next ) { - current=&( (*current)->next ); /* 'next' address */ - } - if (file) - strcpybuff(file,(*current)->name); - pos=(*current)->pos; - freet(*current); - *current=NULL; - return pos; - } - return -1; /* no more elements */ +static char* hts_cancel_file_pop_(httrackp *opt) { + if (opt->state.cancel != NULL) { + htsoptstatecancel **cancel; + htsoptstatecancel *ret; + for( cancel = &opt->state.cancel ; (*cancel)->next != NULL ; cancel = & ( (*cancel)->next ) ); + ret = *cancel; + *cancel = NULL; + return ret->url; } + return NULL; /* no entry */ +} - return 0; +char* hts_cancel_file_pop(httrackp *opt) { + char* ret; + hts_mutexlock(&opt->state.lock); + ret = hts_cancel_file_pop_(opt); + hts_mutexrelease(&opt->state.lock); + return ret; +} + +HTSEXT_API void hts_cancel_test(httrackp *opt) { + if (opt->state._hts_in_html_parsing==2) + opt->state._hts_cancel=2; +} +HTSEXT_API void hts_cancel_parsing(httrackp *opt) { + if (opt->state._hts_in_html_parsing) + opt->state._hts_cancel=1; } -#endif -#if HTS_ANALYSTE // en train de parser un fichier html? réponse: % effectués // flag>0 : refresh demandé -HTSEXT_API int hts_is_parsing(int flag) { - if (_hts_in_html_parsing) { // parsing? - if (flag>=0) _hts_in_html_poll=1; // faudrait un tit refresh - return max(_hts_in_html_done,1); // % effectués +HTSEXT_API int hts_is_parsing(httrackp *opt, int flag) { + if (opt->state._hts_in_html_parsing) { // parsing? + if (flag >= 0) + opt->state._hts_in_html_poll = 1; // faudrait un tit refresh + return max(opt->state._hts_in_html_done, 1); // % effectués } else { return 0; // non } } -HTSEXT_API int hts_is_testing(void) { // 0 non 1 test 2 purge - if (_hts_in_html_parsing==2) +HTSEXT_API int hts_is_testing(httrackp *opt) { // 0 non 1 test 2 purge + if (opt->state._hts_in_html_parsing==2) return 1; - else if (_hts_in_html_parsing==3) + else if (opt->state._hts_in_html_parsing==3) return 2; - else if (_hts_in_html_parsing==4) + else if (opt->state._hts_in_html_parsing==4) return 3; - else if (_hts_in_html_parsing==5) // scheduling + else if (opt->state._hts_in_html_parsing==5) // scheduling return 4; - else if (_hts_in_html_parsing==6) // wait for slot + else if (opt->state._hts_in_html_parsing==6) // wait for slot return 5; return 0; } -HTSEXT_API int hts_is_exiting(void) { - return exit_xh; +HTSEXT_API int hts_is_exiting(httrackp *opt) { + return opt->state.exit_xh; } // message d'erreur? -char* hts_errmsg(void) { - return _hts_errmsg; +char* hts_errmsg(httrackp *opt) { + return opt->state._hts_errmsg; } // mode pause transfer -HTSEXT_API int hts_setpause(int p) { - if (p>=0) _hts_setpause=p; - return _hts_setpause; +HTSEXT_API int hts_setpause(httrackp *opt, int p) { + if (p >= 0) + opt->state._hts_setpause = p; + return opt->state._hts_setpause; } // ask for termination -HTSEXT_API int hts_request_stop(int force) { - httrackp* opt=hts_declareoptbuffer(NULL); - if (opt) { - opt->state.stop=1; +HTSEXT_API int hts_request_stop(httrackp* opt, int force) { + if (opt != NULL) { + opt->state.stop = 1; } return 0; } // régler en cours de route les paramètres réglables.. // -1 : erreur -HTSEXT_API int hts_setopt(httrackp* set_opt) { - if (set_opt) { - httrackp* engine_opt=hts_declareoptbuffer(NULL); - if (engine_opt) { - //_hts_setopt=opt; - copy_htsopt(set_opt,engine_opt); - } - } - return 0; -} +//HTSEXT_API int hts_setopt(httrackp* set_opt) { +// if (set_opt) { +// httrackp* engine_opt=hts_declareoptbuffer(NULL); +// if (engine_opt) { +// //_hts_setopt=opt; +// copy_htsopt(set_opt,engine_opt); +// } +// } +// return 0; +//} // ajout d'URL // -1 : erreur -HTSEXT_API int hts_addurl(char** url) { - if (url) _hts_addurl=url; - return (_hts_addurl!=NULL); +HTSEXT_API int hts_addurl(httrackp *opt, char** url) { + if (url) + opt->state._hts_addurl = url; + return (opt->state._hts_addurl != NULL); } -HTSEXT_API int hts_resetaddurl(void) { - _hts_addurl=NULL; - return (_hts_addurl!=NULL); +HTSEXT_API int hts_resetaddurl(httrackp *opt) { + opt->state._hts_addurl = NULL; + return (opt->state._hts_addurl != NULL); } // copier nouveaux paramètres si besoin -HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { +HTSEXT_API int copy_htsopt(const httrackp* from,httrackp* to) { if (from->maxsite > -1) to->maxsite = from->maxsite; @@ -3484,8 +3256,8 @@ HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { if (from->maxconn > 0) to->maxconn = from->maxconn; - if (strnotempty(from->user_agent)) - strcpybuff(to->user_agent , from->user_agent); + if (StringNotEmpty(from->user_agent)) + StringCopyS(to->user_agent, from->user_agent); if (from->retry > -1) to->retry = from->retry; @@ -3512,7 +3284,6 @@ HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to) { return 0; } -#endif // /* External modules callback */ @@ -3532,27 +3303,25 @@ int htsAddLink(htsmoduleStruct* str, char* link) { codebase[0]='\0'; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): adding link : '%s'"LF, link); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): adding link : '%s'"LF, link); test_flush; } // recopie de "creer le lien" // -#if HTS_ANALYSTE - if (hts_htmlcheck_linkdetected != NULL && !hts_htmlcheck_linkdetected(link)) { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF, link); + if (!RUN_CALLBACK1(opt, linkdetected, link)) { + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s refused by external wrapper"LF, link); test_flush; } return 0; } - if (hts_htmlcheck_linkdetected2 != NULL && !hts_htmlcheck_linkdetected2(link, NULL)) { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper(2)"LF, link); + if (!RUN_CALLBACK2(opt, linkdetected2, link, NULL)) { + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s refused by external wrapper(2)"LF, link); test_flush; } return 0; } -#endif // adr = c'est la même // fil et save: save2 et fil2 @@ -3578,8 +3347,8 @@ int htsAddLink(htsmoduleStruct* str, char* link) { strcpybuff(tempo,a); strcpybuff(codebase,tempo); // couper host } else { - if (opt->errlog) { - fprintf(opt->errlog,"Unexpected strstr error in base %s"LF,codebase); + if (opt->log) { + fprintf(opt->log,"Unexpected strstr error in base %s"LF,codebase); test_flush; } } @@ -3587,8 +3356,8 @@ int htsAddLink(htsmoduleStruct* str, char* link) { } if (!((int) strlen(codebase)<HTS_URLMAXSIZE)) { // trop long - if (opt->errlog) { - fprintf(opt->errlog,"Codebase too long, parsing skipped (%s)"LF,codebase); + if (opt->log) { + fprintf(opt->log,"Codebase too long, parsing skipped (%s)"LF,codebase); test_flush; } } @@ -3610,7 +3379,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { &set_prio_to, &just_test_it); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard external module link: %d"LF,forbidden_url); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard external module link: %d"LF,forbidden_url); test_flush; } @@ -3634,7 +3403,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { char BIGSTK former_adr[HTS_URLMAXSIZE*2]; char BIGSTK former_fil[HTS_URLMAXSIZE*2]; former_adr[0] = former_fil[0] = '\0'; - r = hts_wait_delayed(str, adr, fil, save, former_adr, former_fil, &forbidden_url); + r = hts_wait_delayed(str, adr, fil, save, NULL, NULL, former_adr, former_fil, &forbidden_url); } // end resolve unresolved type opt->savename_type=a; @@ -3643,7 +3412,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { if (savename) { if (lienrelatif(tempo,save,savename)==0) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): relative link at %s build with %s and %s: %s"LF,adr,save,savename,tempo); test_flush; if (str->localLink && str->localLinkSize > (int) strlen(tempo) + 1) { strcpybuff(str->localLink, tempo); @@ -3656,7 +3425,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { if (forbidden_url) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): file not caught: %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): file not caught: %s"LF,lien); test_flush; } if (str->localLink && str->localLinkSize > (int) ( strlen(adr) + strlen(fil) + 8 ) ) { str->localLink[0] = '\0'; @@ -3671,7 +3440,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { // if (r != -1) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): %s%s -> %s (base %s)"LF,adr,fil,save,codebase); test_flush; } // modifié par rapport à l'autre version (cf prio_fix notamment et save2) @@ -3697,11 +3466,11 @@ int htsAddLink(htsmoduleStruct* str, char* link) { liens_record(adr,fil,save,"","",opt->urlhack); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } - exit_xh=-1; /* fatal error -> exit */ + opt->state.exit_xh=-1; /* fatal error -> exit */ return 0; } @@ -3729,7 +3498,7 @@ int htsAddLink(htsmoduleStruct* str, char* link) { //strcpybuff(liens[lien_tot]->fil,fil); //strcpybuff(liens[lien_tot]->sav,save); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(module): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(module): OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); test_flush; } diff --git a/src/htscore.h b/src/htscore.h index e1966d3..5e88313 100644 --- a/src/htscore.h +++ b/src/htscore.h @@ -35,16 +35,15 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ // Fichier librairie .h -#ifndef HTTRACK_DEFH -#define HTTRACK_DEFH - +#ifndef HTS_CORE_DEFH +#define HTS_CORE_DEFH #include "htsglobal.h" /* specific definitions */ #include "htsbase.h" // Includes & définitions -#ifdef HAVE_SYS_TYPES_H +#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) ) #include <sys/types.h> #endif #ifdef HAVE_SYS_STAT_H @@ -55,29 +54,99 @@ Please visit our Website: http://www.httrack.com #include <conio.h> #endif #ifndef _WIN32_WCE -#include <signal.h> #include <direct.h> -#else -#ifndef HTS_CECOMPAT -#include "signal.h" -#endif #endif #else -#include <signal.h> #ifdef HAVE_UNISTD_H #include <unistd.h> #endif #endif /* END specific definitions */ +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_filecreate_params +#define HTS_DEF_FWSTRUCT_filecreate_params +typedef struct filecreate_params filecreate_params; +#endif // Include htslib.h for all types #include "htslib.h" +// options #include "htsopt.h" +// INCLUDES .H PARTIES DE CODE HTTRACK + +// routine main +#include "htscoremain.h" + +// core routines +#include "htscore.h" + +// divers outils pour httrack.c +#include "htstools.h" + +// aide pour la version en ligne de commande +#include "htshelp.h" + +// génération du nom de fichier à sauver +#include "htsname.h" + +// gestion ftp +#include "htsftp.h" + +// gestion interception d'URL +#include "htscatchurl.h" + +// gestion robots.txt +#include "htsrobots.h" + +// routines d'acceptation de liens +#include "htswizard.h" + +// routines de regexp +#include "htsfilters.h" + +// gestion backing +#include "htsback.h" + +// gestion cache +#include "htscache.h" + +// gestion hashage +#include "htshash.h" +#include "htsinthash.h" + +#include "htsdefines.h" + +#include "hts-indextmpl.h" + // structure d'un lien -typedef struct lien_url { +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +struct lien_url { char firstblock; // flag 1=premier malloc char link_import; // lien importé à la suite d'un moved - ne pas appliquer les règles classiques up/down int depth; // profondeur autorisée lien ; >0 forte 0=faible @@ -95,10 +164,14 @@ typedef struct lien_url { char* former_fil; // nom du fichier distant initial (avant éventuel moved), peut être nul // pour optimisation: int hash_next[3]; // prochain lien avec même valeur hash -} lien_url; +}; // chargement de fichiers en 'arrière plan' -typedef struct lien_back { +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +struct lien_back { #if DEBUG_CHECKINT char magic; #endif @@ -133,7 +206,7 @@ typedef struct lien_back { LLint chunk_blocksize; // taille data declaree par le chunk LLint compressed_size; // taille compressés (stats uniquement) // - int* pass2_ptr; // pointeur sur liens[ptr]->pass2 + //int links_index; // to access liens[links_index] // char info[256]; // éventuel status pour le ftp int stop_ftp; // flag stop pour ftp @@ -141,18 +214,27 @@ typedef struct lien_back { #if DEBUG_CHECKINT char magic2; #endif -} lien_back; +}; -typedef struct struct_back { +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +struct struct_back { lien_back* lnk; int count; - void* ready; -} struct_back; + inthash ready; + LLint ready_size_bytes; +}; typedef struct cache_back_zip_entry cache_back_zip_entry; // cache -typedef struct cache_back { +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +struct cache_back { int version; // 0 ou 1 /* */ int type; @@ -163,9 +245,9 @@ typedef struct cache_back { FILE *txt; // liste des fichiers (info) char lastmodified[256]; // HASH - void* hashtable; + inthash hashtable; // HASH for tests (naming subsystem) - void* cached_tests; + inthash cached_tests; // fichiers log optionnels FILE* log; FILE* errlog; @@ -173,32 +255,40 @@ typedef struct cache_back { int ptr_ant; // pointeur pour anticiper int ptr_last; // pointeur pour anticiper // - void* zipInput; - void* zipOutput; + void *zipInput; + void *zipOutput; cache_back_zip_entry* zipEntries; int zipEntriesOffs; int zipEntriesCapa; -} cache_back; +}; -typedef struct hash_struct { +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +struct hash_struct { lien_url** liens; // pointeur sur liens int max_lien; // indice le plus grand rencontré int hash[3][HTS_HASH_SIZE]; // tables pour sav/adr-fil/former_adr-former_fil -} hash_struct; +}; -typedef struct filecreate_params { +#ifndef HTS_DEF_FWSTRUCT_filecreate_params +#define HTS_DEF_FWSTRUCT_filecreate_params +typedef struct filecreate_params filecreate_params; +#endif +struct filecreate_params { FILE* lst; char path[HTS_URLMAXSIZE*2]; -} filecreate_params; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -static int cache_writable(cache_back* cache) { +HTS_STATIC int cache_writable(cache_back* cache) { return (cache != NULL && ( cache->dat != NULL || cache->zipOutput != NULL ) ); } -static int cache_readable(cache_back* cache) { +HTS_STATIC int cache_readable(cache_back* cache) { return (cache != NULL && ( cache->olddat != NULL || cache->zipInput != NULL ) ); } @@ -208,146 +298,29 @@ static int cache_readable(cache_back* cache) { // INCLUDES .H PARTIES DE CODE HTTRACK -// routine main -#include "htscoremain.h" - -// divers outils pour httrack.c -#include "htstools.h" - -// aide pour la version en ligne de commande -#include "htshelp.h" - -// génération du nom de fichier à sauver -#include "htsname.h" - -// gestion ftp -#include "htsftp.h" - -// routine parser java -#include "htsjava.h" - -// gestion interception d'URL -#include "htscatchurl.h" - -// gestion robots.txt -#include "htsrobots.h" - -// routines d'acceptation de liens -#include "htswizard.h" - -// routines de regexp -#include "htsfilters.h" - -// gestion backing -#include "htsback.h" - -// gestion cache -#include "htscache.h" - -// gestion hashage -#include "htshash.h" -#include "htsinthash.h" - -// gestion réentrance -#include "htsnostatic.h" - -// infos console -#if HTS_ANALYSTE_CONSOLE -#include "httrack.h" -#endif - -#include "htsdefines.h" - -#include "hts-indextmpl.h" - -// INCLUDES .H PARTIES DE CODE HTTRACK - -// - -/* -typedef void (* t_hts_htmlcheck_init)(void); -typedef void (* t_hts_htmlcheck_uninit)(void); -typedef int (* t_hts_htmlcheck_start)(httrackp* opt); -typedef int (* t_hts_htmlcheck_end)(void); -typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); -typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier); -typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); -typedef char* (* t_hts_htmlcheck_query)(char* question); -typedef char* (* t_hts_htmlcheck_query2)(char* question); -typedef char* (* t_hts_htmlcheck_query3)(char* question); -typedef int (* t_hts_htmlcheck_loop)(struct_back* sback,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); -typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); -typedef int (* t_hts_htmlcheck_check_mime)(char* adr,char* fil,char* mime,int status); -typedef void (* t_hts_htmlcheck_pause)(char* lockfile); -typedef void (* t_hts_htmlcheck_filesave)(char* file); -typedef void (* t_hts_htmlcheck_filesave2)(char* hostname,char* filename,char* localfile,int is_new,int is_modified, int not_updated); -typedef int (* t_hts_htmlcheck_linkdetected)(char* link); -typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start); -typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); -typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); -*/ - -// demande d'interaction avec le shell -#if HTS_ANALYSTE -//char HTbuff[1024]; -/* -extern t_hts_htmlcheck_init hts_htmlcheck_init; -extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; -extern t_hts_htmlcheck_start hts_htmlcheck_start; -extern t_hts_htmlcheck_end hts_htmlcheck_end; -extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; -extern t_hts_htmlcheck_process hts_htmlcheck_preprocess; -extern t_hts_htmlcheck_process hts_htmlcheck_postprocess; -extern t_hts_htmlcheck hts_htmlcheck; -extern t_hts_htmlcheck_query hts_htmlcheck_query; -extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; -extern t_hts_htmlcheck_query3 hts_htmlcheck_query3; -extern t_hts_htmlcheck_loop hts_htmlcheck_loop; -extern t_hts_htmlcheck_check hts_htmlcheck_check; -extern t_hts_htmlcheck_check_mime hts_htmlcheck_check_mime; -extern t_hts_htmlcheck_pause hts_htmlcheck_pause; -extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; -extern t_hts_htmlcheck_filesave2 hts_htmlcheck_filesave2; -extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; -extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2; -extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; -extern t_hts_htmlcheck_savename hts_htmlcheck_savename; -extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; -extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; -*/ - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE // #ifndef HTTRACK_DEFLIB -HTSEXT_API int hts_is_parsing(int flag); -HTSEXT_API int hts_is_testing(void); -HTSEXT_API int hts_is_exiting(void); -HTSEXT_API int hts_setopt(httrackp* opt); -HTSEXT_API int hts_addurl(char** url); -HTSEXT_API int hts_resetaddurl(void); -HTSEXT_API int copy_htsopt(httrackp* from,httrackp* to); -HTSEXT_API char* hts_errmsg(void); -HTSEXT_API int hts_setpause(int); // pause transfer -HTSEXT_API int hts_request_stop(int force); +HTSEXT_API int hts_is_parsing(httrackp *opt, int flag); +HTSEXT_API int hts_is_testing(httrackp *opt); +HTSEXT_API int hts_addurl(httrackp *opt, char** url); +HTSEXT_API int hts_resetaddurl(httrackp *opt); +HTSEXT_API int copy_htsopt(const httrackp* from,httrackp* to); +HTSEXT_API char* hts_errmsg(httrackp *opt); +HTSEXT_API int hts_setpause(httrackp *opt, int); // -HTSEXT_API char* hts_cancel_file(char * s); -HTSEXT_API void hts_cancel_test(void); -HTSEXT_API void hts_cancel_parsing(void); -#endif +HTSEXT_API int hts_is_exiting(httrackp *opt); +HTSEXT_API int hts_request_stop(httrackp* opt, int force); // -// Variables globales -extern int _hts_in_html_parsing; -extern int _hts_in_html_done; // % réalisés -extern int _hts_in_html_poll; // parsing -extern char _hts_errmsg[1100]; -extern int _hts_setpause; -//extern httrackp* _hts_setopt; -extern char** _hts_addurl; -extern int _hts_cancel; +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url); +HTSEXT_API void hts_cancel_test(httrackp *opt); +HTSEXT_API void hts_cancel_parsing(httrackp *opt); +#endif + +char* hts_cancel_file_pop(httrackp *opt); + #endif // @@ -355,23 +328,24 @@ extern int _hts_cancel; //int httpmirror(char* url,int level,httrackp opt); int httpmirror(char* url1,httrackp* opt); -int filesave(httrackp* opt,char* adr,int len,char* s,char* url_adr /* = NULL */,char* url_fil /* = NULL */); +int filesave(httrackp* opt,const char* adr,int len,const char* s,const char* url_adr /* = NULL */,const char* url_fil /* = NULL */); +char* hts_cancel_file_pop(httrackp *opt); int check_fatal_io_errno(void); int engine_stats(void); void host_ban(httrackp* opt,lien_url** liens,int ptr,int lien_tot,struct_back* sback,char* host); -FILE* filecreate(char* s); -FILE* fileappend(char* s); -int filecreateempty(char* filename); -int filenote(char* s,filecreate_params* params); -void file_notify(char* adr,char* fil,char* save,int create,int modify,int wasupdated); -HTS_INLINE void usercommand(httrackp* opt,int exe,char* cmd,char* file,char* adr,char* fil); -void usercommand_exe(char* cmd,char* file); -//void* structcheck_init(int init); +FILE* filecreate(filenote_strc *strct,const char* s); +FILE* fileappend(filenote_strc *strct,const char* s); +int filecreateempty(filenote_strc *strct, const char* filename); +int filenote(filenote_strc *strct,const char* s,filecreate_params* params); +void file_notify(httrackp* opt,const char* adr,const char* fil,const char* save,int create,int modify,int wasupdated); +HTS_INLINE void usercommand(httrackp* opt,int exe,const char* cmd,const char* file,const char* adr,const char* fil); +void usercommand_exe(const char* cmd,const char* file); int filters_init(char*** ptrfilters, int maxfilter, int filterinc); #ifndef HTTRACK_DEFLIB -HTSEXT_API int structcheck(char* s); +HTSEXT_API int structcheck(const char* path); +HTSEXT_API int dir_exists(const char* path); #endif -HTS_INLINE int fspc(FILE* fp,char* type); +HTS_INLINE int fspc(httrackp *opt,FILE* fp,const char* type); char* next_token(char* p,int flag); // char* readfile(char* fil); @@ -393,15 +367,13 @@ int backlinks_done(struct_back* sback,lien_url** liens,int lien_tot,int ptr); int back_fillmax(struct_back* sback,httrackp* opt,cache_back* cache,lien_url** liens,int ptr,int numero_passe,int lien_tot); // cancel file -#if HTS_ANALYSTE #ifndef HTTRACK_DEFLIB -HTSEXT_API char* hts_cancel_file(char * s); -HTSEXT_API void hts_cancel_test(void); -HTSEXT_API void hts_cancel_parsing(void); -#endif +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url); +HTSEXT_API void hts_cancel_test(httrackp *opt); +HTSEXT_API void hts_cancel_parsing(httrackp *opt); #endif -int ask_continue(void); +int ask_continue(httrackp *opt); int nombre_digit(int n); // Java @@ -418,19 +390,6 @@ int read_stdin(char* s,int max); HTS_INLINE int check_sockerror(T_SOC s); HTS_INLINE int check_sockdata(T_SOC s); -httrackp* hts_declareoptbuffer(httrackp* optdecl); -void sig_finish( int code ); // finir et quitter -void sig_term( int code ); // quitter -#if HTS_WIN -void sig_ask( int code ); // demander -#else -void sig_back( int code ); // ignorer et mettre en backing -void sig_ask( int code ); // demander -void sig_ignore( int code ); // ignorer signal -void sig_brpipe( int code ); // treat if necessary -void sig_doback(int); // mettre en arrière plan -#endif - /* external modules */ int htsAddLink(htsmoduleStruct* str, char* link); @@ -440,7 +399,3 @@ void voidf(void); #define HTS_TOPINDEX "TOP_INDEX_HTTRACK" #endif - -#endif - - diff --git a/src/htscoremain.c b/src/htscoremain.c index bdebfb0..4231ee0 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -30,7 +30,7 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ -/* File: httrack.c subroutines: */ +/* File: opt->c subroutines: */ /* main routine (first called) */ /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ @@ -50,11 +50,11 @@ Please visit our Website: http://www.httrack.com #include <ctype.h> #if USE_BEGINTHREAD -#if HTS_WIN +#ifdef _WIN32 #include <process.h> #endif #endif -#if HTS_WIN +#ifdef _WIN32 #else #ifndef HTS_DO_NOT_USE_UID /* setuid */ @@ -65,8 +65,6 @@ Please visit our Website: http://www.httrack.com #endif #endif -extern int exit_xh; // sortir prématurément - /* Resolver */ extern int IPV6_resolver; @@ -75,7 +73,7 @@ extern int IPV6_resolver; #define cmdl_add(token,argc,argv,buff,ptr) \ argv[argc]=(buff+ptr); \ strcpybuff(argv[argc],token); \ - ptr += (strlen(argv[argc])+2); \ + ptr += (int) (strlen(argv[argc])+2); \ argc++ // Insert a command in the argc/argv @@ -87,7 +85,7 @@ extern int IPV6_resolver; } \ argv[0]=(buff+ptr); \ strcpybuff(argv[0],token); \ - ptr += (strlen(argv[0])+2); \ + ptr += (int) (strlen(argv[0])+2); \ argc++ #define htsmain_free() do { if (url != NULL) { free(url); } } while(0) @@ -109,41 +107,16 @@ extern int IPV6_resolver; } \ } while(0) -void set_wrappers(void) { -#if HTS_ANALYSTE - // custom wrappers - hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); - hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); - hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); - hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); - hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); - hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html"); - hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html"); - hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); - hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); - hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); - hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); - hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); - hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); - hts_htmlcheck_check_mime = (t_hts_htmlcheck_check_mime) htswrap_read("check-mime"); - hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); - hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); - hts_htmlcheck_filesave2 = (t_hts_htmlcheck_filesave2) htswrap_read("save-file2"); - hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); - hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2"); - hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); - hts_htmlcheck_savename = (t_hts_htmlcheck_savename) htswrap_read("save-name"); - hts_htmlcheck_sendhead = (t_hts_htmlcheck_sendhead) htswrap_read("send-header"); - hts_htmlcheck_receivehead = (t_hts_htmlcheck_receivehead) htswrap_read("receive-header"); -#endif +HTSEXT_API int hts_main(int argc, char **argv) +{ + httrackp *opt = hts_create_opt(); + int ret = hts_main2(argc, argv, opt); + hts_free_opt(opt); + return ret; } // Main, récupère les paramètres et appelle le robot -#if HTS_ANALYSTE -HTSEXT_API int hts_main(int argc, char **argv) { -#else -int main(int argc, char **argv) { -#endif +HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt) { char** x_argv=NULL; // Patch pour argv et argc: en cas de récupération de ligne de commande char* x_argvblk=NULL; // (reprise ou update) int x_ptr=0; // offset @@ -154,15 +127,9 @@ int main(int argc, char **argv) { int url_sz = 65535; //char url[65536]; // URLS séparées par un espace // the parametres - httrackp BIGSTK httrack; int httrack_logmode=3; // ONE log file int recuperer=0; // récupérer un plantage (n'arrive jamais, à supprimer) -#if HTS_WIN -#if HTS_ANALYSTE!=2 - WORD wVersionRequested; /* requested version WinSock API */ - WSADATA BIGSTK wsadata; /* Windows Sockets API data */ -#endif -#else +#ifndef _WIN32 #ifndef HTS_DO_NOT_USE_UID int switch_uid=-1,switch_gid=-1; /* setuid/setgid */ #endif @@ -170,207 +137,41 @@ int main(int argc, char **argv) { #endif // ensureUrlCapacity(url, url_sz, 65536); - // - -#if HTS_ANALYSTE - // custom wrappers - set_wrappers(); -#endif - - // options par défaut - memset(&httrack, 0, sizeof(httrackp)); - httrack.wizard=2; // wizard automatique - httrack.quiet=0; // questions - // - httrack.travel=0; // même adresse - httrack.depth=9999; // mirror total par défaut - httrack.extdepth=0; // mais pas à l'extérieur - httrack.seeker=1; // down - httrack.urlmode=2; // relatif par défaut - httrack.debug=0; // pas de débug en plus - httrack.getmode=3; // linear scan - httrack.maxsite=-1; // taille max site (aucune) - httrack.maxfile_nonhtml=-1; // taille max fichier non html - httrack.maxfile_html=-1; // idem pour html - httrack.maxsoc=4; // nbre socket max - httrack.fragment=-1; // pas de fragmentation - httrack.nearlink=0; // ne pas prendre les liens non-html "adjacents" - httrack.makeindex=1; // faire un index - httrack.kindex=0; // index 'keyword' - httrack.delete_old=1; // effacer anciens fichiers - httrack.makestat=0; // pas de fichier de stats - httrack.maketrack=0; // ni de tracking - httrack.timeout=120; // timeout par défaut (2 minutes) - httrack.cache=1; // cache prioritaire - httrack.shell=0; // pas de shell par defaut - httrack.proxy.active=0; // pas de proxy - strcpybuff(httrack.proxy.bindhost, ""); // bind default host - httrack.user_agent_send=1; // envoyer un user-agent - strcpybuff(httrack.user_agent,"Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); - strcpybuff(httrack.referer, ""); - strcpybuff(httrack.from, ""); - httrack.savename_83=0; // noms longs par défaut - httrack.savename_type=0; // avec structure originale - httrack.savename_delayed=2;// hard delayed type (default) - httrack.delayed_cached=1; // cached delayed type (default) - httrack.mimehtml=0; // pas MIME-html - httrack.parsejava=1; // parser classes - httrack.hostcontrol=0; // PAS de control host pour timeout et traffic jammer - httrack.retry=2; // 2 retry par défaut - httrack.errpage=1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.) - httrack.check_type=1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html - httrack.all_in_cache=0; // ne pas tout stocker en cache - httrack.robots=2; // traiter les robots.txt - httrack.external=0; // liens externes normaux - httrack.passprivacy=0; // mots de passe dans les fichiers - httrack.includequery=1; // include query-string par défaut - httrack.mirror_first_page=0; // pas mode mirror links - httrack.accept_cookie=1; // gérer les cookies - httrack.cookie=NULL; - httrack.http10=0; // laisser http/1.1 - httrack.nokeepalive = 0; // pas keep-alive - httrack.nocompression=0; // pas de compression - httrack.tolerant=0; // ne pas accepter content-length incorrect - httrack.parseall=1; // tout parser (tags inconnus, par exemple) - httrack.parsedebug=0; // pas de mode débuggage - httrack.norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur - httrack.verbosedisplay=0; // pas d'animation texte - httrack.sizehack=0; // size hack - httrack.urlhack=1; // url hack (normalizer) - strcpybuff(httrack.footer,HTS_DEFAULT_FOOTER); - httrack.ftp_proxy=1; // proxy http pour ftp - strcpybuff(httrack.filelist,""); - strcpybuff(httrack.lang_iso,"en, *"); - strcpybuff(httrack.mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) - // - httrack.log=stdout; - httrack.errlog=stderr; - httrack.flush=1; // flush sur les fichiers log - //httrack.aff_progress=0; - httrack.keyboard=0; - // - strcpybuff(httrack.path_html,""); - strcpybuff(httrack.path_log,""); - strcpybuff(httrack.path_bin,""); - // -#if HTS_SPARE_MEMORY==0 - httrack.maxlink=100000; // 100,000 liens max par défaut (400Kb) - httrack.maxfilter=200; // 200 filtres max par défaut -#else - httrack.maxlink=10000; // 10,000 liens max par défaut (40Kb) - httrack.maxfilter=50; // 50 filtres max par défaut -#endif - httrack.maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT -- - //httrack.maxcache_anticipate=256; // maximum de liens à anticiper - httrack.maxtime=-1; // temps max en secondes -#if HTS_USEMMS - httrack.mms_maxtime = 60*3600; // max time for mms streams (one hour) -#endif - httrack.maxrate=25000; // taux maxi - httrack.maxconn=5.0; // nombre connexions/s - httrack.waittime=-1; // wait until.. hh*3600+mm*60+ss - // - httrack.exec=argv[0]; - httrack.is_update=0; // not an update (yet) - httrack.dir_topindex=0; // do not built top index (yet) - // - httrack.bypass_limits=0; // enforce limits by default - httrack.state.stop=0; // stopper - httrack.state.exit_xh=0; // abort - // - _DEBUG_HEAD=0; // pas de debuggage en têtes + // Create options + _DEBUG_HEAD=0; // pas de debuggage en têtes -#if HTS_WIN -#if HTS_ANALYSTE!=2 - { - int stat; - wVersionRequested = 0x0101; - stat = WSAStartup( wVersionRequested, &wsadata ); - if (stat != 0) { - HTS_PANIC_PRINTF("Winsock not found!\n"); - htsmain_free(); - return -1; - } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { - HTS_PANIC_PRINTF("WINSOCK.DLL does not support version 1.1\n"); - WSACleanup(); - htsmain_free(); - return -1; - } - } -#endif -#endif - /* Init root dir */ hts_rootdir(argv[0]); -#if HTS_WIN +#ifdef _WIN32 #else /* Terminal is a tty, may ask questions and display funny information */ if (isatty(1)) { - httrack.quiet=0; - httrack.verbosedisplay=1; + opt->quiet=0; + opt->verbosedisplay=1; } /* Not a tty, no stdin input or funny output! */ else { - httrack.quiet=1; - httrack.verbosedisplay=0; + opt->quiet=1; + opt->verbosedisplay=0; } #endif - /* First test: if -#R then only launch ftp */ - if (argc > 2) { - if (strcmp(argv[1],"-#R")==0) { - if (argc==6) { - lien_back r; - char* path; - FILE* fp; - strcpybuff(r.url_adr,argv[2]); - strcpybuff(r.url_fil,argv[3]); - strcpybuff(r.url_sav,argv[4]); - path=argv[5]; - r.status=1000; - run_launch_ftp(&r); - fp=fopen(fconv(path),"wb"); - if (fp) { - fprintf(fp,"%d %s",r.r.statuscode,r.r.msg); - fclose(fp); fp=NULL; - rename(fconv(path),fconcat(path,".ok")); - } else remove(fconv(path)); - } else { - printf("htsftp error, wrong parameter number (%d)\n",argc); - } - exit(0); // pas _exit() - } - } - - // ok, non ftp, continuer - - // Binary program path? #ifndef HTS_HTTRACKDIR { - char* path=fslash(argv[0]); + char catbuff[CATBUFF_SIZE]; + char* path=fslash(catbuff,argv[0]); char* a; if ((a=strrchr(path,'/'))) { - httrack.path_bin[0]='\0'; - strncatbuff(httrack.path_bin,argv[0],(int) a - (int) path); + StringCopyN(opt->path_bin,argv[0],a - path); } } #else - strcpybuff(httrack.path_bin, HTS_HTTRACKDIR); + StringCopy(opt->path_bin, HTS_HTTRACKDIR); #endif - /* libhttrack-plugin DLL preload (libhttrack-plugin.so or libhttrack-plugin.dll) */ - { - void* userfunction = getFunctionPtr(&httrack, "libhttrack-plugin", "plugin_init"); - if (userfunction != NULL) { - t_hts_htmlcheck_init initFnc = (t_hts_htmlcheck_init) userfunction; - initFnc(); - set_wrappers(); /* Re-read wrappers internal static functions */ - } - } - /* filter CR, LF, TAB.. */ { int na; @@ -387,15 +188,13 @@ int main(int argc, char **argv) { } } - - /* create x_argvblk buffer for transformed command line */ { int current_size=0; int size; int na; for(na=0;na<argc;na++) - current_size += (strlen(argv[na]) + 1); + current_size += (int) (strlen(argv[na]) + 1); if ((size=fsize("config"))>0) current_size += size; x_argvblk=(char*) malloct(current_size+32768); @@ -459,22 +258,22 @@ int main(int argc, char **argv) { //} } else { if (strcmp(tmp_argv[0],"-h")==0) { - help(argv[0],!httrack.quiet); + help(argv[0],!opt->quiet); htsmain_free(); return 0; } else { if (strncmp(tmp_argv[0],"--",2)) { /* pas */ if ((strchr(tmp_argv[0],'q')!=NULL)) - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) if ((strchr(tmp_argv[0],'i')!=NULL)) { // doit.log! argv_url=-1; /* forcer */ - httrack.quiet=1; + opt->quiet=1; } } else if (strcmp(tmp_argv[0] + 2,"quiet") == 0) { - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) } else if (strcmp(tmp_argv[0] + 2,"continue") == 0) { argv_url=-1; /* forcer */ - httrack.quiet=1; + opt->quiet=1; } } } @@ -535,40 +334,41 @@ int main(int argc, char **argv) { } else { int i, j; int inQuote; - char* path; + String *path; int noDbl = 0; if (com[1] == '1') { /* only 1 arg */ com++; noDbl = 1; } na++; - httrack.path_html[0] = '\0'; - httrack.path_log[0] = '\0'; - for(i = 0, j = 0, inQuote = 0, path = httrack.path_html ; argv[na][i] != 0 ; i++) { + StringClear(opt->path_html); + StringClear(opt->path_log); + for(i = 0, j = 0, inQuote = 0, path = &opt->path_html ; argv[na][i] != 0 ; i++) { if (argv[na][i] == '"') { if (inQuote) inQuote = 0; else inQuote = 1; } else if (!inQuote && !noDbl && argv[na][i] == ',') { - path[j++] = '\0'; + //StringAddchar(path, '\0'); j = 0; - path = httrack.path_log; + path = &opt->path_log; } else { - path[j++] = argv[na][i]; + StringAddchar(*path, argv[na][i]); + //path[j++] = argv[na][i]; } } - path[j++] = '\0'; - if (httrack.path_log[0] == '\0') { - strcpybuff(httrack.path_log, httrack.path_html); + //path[j++] = '\0'; + if (StringLength(opt->path_log) == 0) { + StringCopyS(opt->path_log, opt->path_html); } - check_path(httrack.path_log,argv_firsturl); - if (check_path(httrack.path_html,argv_firsturl)) { - httrack.dir_topindex=1; // rebuilt top index + check_path(&opt->path_log, argv_firsturl); + if (check_path(&opt->path_html, argv_firsturl)) { + opt->dir_topindex=1; // rebuilt top index } - //printf("-->%s\n%s\n",httrack.path_html,httrack.path_log); + //printf("-->%s\n%s\n",StringBuff(opt->path_html),StringBuff(opt->path_log)); } break; } // switch @@ -586,13 +386,13 @@ int main(int argc, char **argv) { printf("Loading httrackrc/doit.log\n"); #endif /* recreate a doit.log (no old doit.log or new URLs (and parameters)) */ - if ((strnotempty(httrack.path_log)) || (strnotempty(httrack.path_html))) + if ((strnotempty(StringBuff(opt->path_log))) || (strnotempty(StringBuff(opt->path_html)))) loops++; // do not loop once again and do not include rc file (O option exists) else { - if ( (!fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) || (argv_url>0) ) { - if (!optinclude_file(fconcat(httrack.path_log,HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) + if ( (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"))) || (argv_url>0) ) { + if (!optinclude_file(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) if (!optinclude_file(HTS_HTTRACKRC,&argc,argv,x_argvblk,&x_ptr)) { - if (!optinclude_file(fconcat(hts_gethome(),"/"HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) { + if (!optinclude_file(fconcat(OPT_GET_BUFF(opt), hts_gethome(),"/"HTS_HTTRACKRC),&argc,argv,x_argvblk,&x_ptr)) { #ifdef HTS_HTTRACKCNF optinclude_file(HTS_HTTRACKCNF,&argc,argv,x_argvblk,&x_ptr); #endif @@ -608,8 +408,8 @@ int main(int argc, char **argv) { } // traiter -O /* load doit.log and insert in current command line */ - if ( fexist(fconcat(httrack.path_log,"hts-cache/doit.log")) && (argv_url<=0) ) { - FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log")) && (argv_url<=0) ) { + FILE* fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"),"rb"); if (fp) { int insert_after=1; /* insérer après nom au début */ // @@ -649,30 +449,30 @@ int main(int argc, char **argv) { #if DEBUG_STEPS printf("Checking cache\n"); #endif - if (!fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) { - if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) { - rename(fconcat(httrack.path_log,"hts-cache/old.zip"),fconcat(httrack.path_log,"hts-cache/new.zip")); + if (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) { + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) ) { + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); } - } else if ( (!fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) || (!fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) ) { - if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) { - remove(fconcat(httrack.path_log,"hts-cache/new.dat")); - remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); - //remove(fconcat(httrack.path_log,"hts-cache/new.lst")); - rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat")); - rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx")); - //rename(fconcat(httrack.path_log,"hts-cache/old.lst"),fconcat(httrack.path_log,"hts-cache/new.lst")); + } else if ( (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) || (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) ) { + if ( (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) ) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + //remove(fconcat(StringBuff(opt->path_log),"hts-cache/new.lst")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + //rename(fconcat(StringBuff(opt->path_log),"hts-cache/old.lst"),fconcat(StringBuff(opt->path_log),"hts-cache/new.lst")); } } /* Interrupted mirror detected */ - if (!httrack.quiet) { - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { + if (!opt->quiet) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { /* Old cache */ - if ( (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) ) { - if (httrack.log != NULL) { - fprintf(httrack.log,"Warning!\n"); - fprintf(httrack.log,"An aborted mirror has been detected!\nThe current temporary cache is required for any update operation and only contains data downloaded during the last aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files\n"); - fprintf(httrack.log,"Please restart HTTrack with --continue (-iC1) option to override this message!\n"); + if ( (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) && (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) ) { + if (opt->log != NULL) { + fprintf(opt->log,"Warning!\n"); + fprintf(opt->log,"An aborted mirror has been detected!\nThe current temporary cache is required for any update operation and only contains data downloaded during the last aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files\n"); + fprintf(opt->log,"Please restart HTTrack with --continue (-iC1) option to override this message!\n"); } exit(0); } @@ -695,38 +495,38 @@ int main(int argc, char **argv) { if (argv[i][1]=='-') { // --xxx if ((strfield2(argv[i]+2,"clean")) || (strfield2(argv[i]+2,"tide"))) { // nettoyer strcpybuff(argv[i]+1,""); - if (fexist(fconcat(httrack.path_log,"hts-log.txt"))) - remove(fconcat(httrack.path_log,"hts-log.txt")); - if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) - remove(fconcat(httrack.path_log,"hts-err.txt")); - if (fexist(fconcat(httrack.path_html,"index.html"))) - remove(fconcat(httrack.path_html,"index.html")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html")); /* */ - if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) - remove(fconcat(httrack.path_log,"hts-cache/new.zip")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) - remove(fconcat(httrack.path_log,"hts-cache/old.zip")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) - remove(fconcat(httrack.path_log,"hts-cache/new.dat")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) - remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat"))) - remove(fconcat(httrack.path_log,"hts-cache/old.dat")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) - remove(fconcat(httrack.path_log,"hts-cache/old.ndx")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.lst"))) - remove(fconcat(httrack.path_log,"hts-cache/new.lst")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.lst"))) - remove(fconcat(httrack.path_log,"hts-cache/old.lst")); - if (fexist(fconcat(httrack.path_log,"hts-cache/new.txt"))) - remove(fconcat(httrack.path_log,"hts-cache/new.txt")); - if (fexist(fconcat(httrack.path_log,"hts-cache/old.txt"))) - remove(fconcat(httrack.path_log,"hts-cache/old.txt")); - if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) - remove(fconcat(httrack.path_log,"hts-cache/doit.log")); - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) - remove(fconcat(httrack.path_log,"hts-in_progress.lock")); - rmdir(fconcat(httrack.path_log,"hts-cache")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.lst")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock")); + rmdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache")); // } else if (strfield2(argv[i]+2,"catchurl")) { // capture d'URL via proxy temporaire! argv_url=1; // forcer a passer les parametres @@ -747,7 +547,7 @@ int main(int argc, char **argv) { printf("Cheking for updates...\n"); strcpybuff(_args[0],argv[0]); strcpybuff(_args[1],"--get"); - sprintf(_args[2],HTS_UPDATE_WEBSITE,HTS_PLATFORM,""); + sprintf(_args[2],HTS_UPDATE_WEBSITE,0,""); strcpybuff(_args[3],"--quickinfo"); args[0]=_args[0]; args[1]=_args[1]; @@ -784,12 +584,12 @@ int main(int argc, char **argv) { } else if (!cmdl_opt(argv[na])) { argv_url++; // un de plus } else if (strcmp(argv[na],"-h")==0) { - help(argv[0],!httrack.quiet); + help(argv[0],!opt->quiet); htsmain_free(); return 0; } else { if ((strchr(argv[na],'q')!=NULL)) - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) if ((strchr(argv[na],'i')!=NULL)) { // doit.log! argv_url=0; na=argc; @@ -803,8 +603,8 @@ int main(int argc, char **argv) { // Exemple: httrack www.truc.fr -L0 puis ^C puis httrack sans URL : ajouter URL précédente /* if (argv_url==0) { - //if ((fexist(fconcat(httrack.path_log,"hts-cache/new.dat"))) && (fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer - if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent + //if ((fexist(fconcat(StringBuff(opt->path_log),"hts-cache/new.dat"))) && (fexist(fconcat(StringBuff(opt->path_log),"hts-cache/new.ndx")))) { // il existe déja un cache précédent.. renommer + if (fexist(fconcat(StringBuff(opt->path_log),"hts-cache/doit.log"))) { // un cache est présent x_argvblk=(char*) calloct(32768,1); @@ -813,7 +613,7 @@ int main(int argc, char **argv) { int x_argc; //strcpybuff(x_argvblk,"httrack "); - fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"rb"); + fp=fopen(fconcat(StringBuff(opt->path_log),"hts-cache/doit.log"),"rb"); if (fp) { linput(fp,x_argvblk+strlen(x_argvblk),8192); fclose(fp); fp=NULL; @@ -865,7 +665,7 @@ int main(int argc, char **argv) { na++; // sauter nom de proxy } else { if ((strchr(argv[na],'q')!=NULL) || (strchr(argv[na],'i')!=NULL)) - httrack.quiet=1; // ne pas poser de questions! (nohup par exemple) + opt->quiet=1; // ne pas poser de questions! (nohup par exemple) } } } @@ -878,26 +678,26 @@ int main(int argc, char **argv) { if (argv_url==0) { // Présence d'un cache, que faire?.. if ( - ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) ) || - ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) ) ) { // il existe déja un cache précédent.. renommer - if (fexist(fconcat(httrack.path_log,"hts-cache/doit.log"))) { // un cache est présent + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"))) { // un cache est présent if (x_argvblk!=NULL) { int m; // établir mode - mode cache: 1 (cache valide) 2 (cache à vérifier) - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // cache prioritaire + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { // cache prioritaire m=1; recuperer=1; } else { m=2; } - httrack.cache=m; + opt->cache=m; - if (httrack.quiet==0) { // sinon on continue automatiquement + if (opt->quiet==0) { // sinon on continue automatiquement HT_REQUEST_START; HT_PRINT("A cache (hts-cache/) has been found in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(LF); if (m==1) { HT_PRINT("That means that a transfer has been aborted"LF); @@ -908,7 +708,7 @@ int main(int argc, char **argv) { } HT_PRINT("httrack "); HT_PRINT(x_argvblk); HT_PRINT("?"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } @@ -928,79 +728,75 @@ int main(int argc, char **argv) { } else { // aucune URL définie et pas de cache if (argc > 1 && strcmp(argv[0], "-#h") == 0) { - printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + printf("HTTrack version "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); exit(0); } -#if HTS_ANALYSTE!=2 - if (httrack.quiet) { -#endif - help(argv[0],!httrack.quiet); + if (opt->quiet) { + help(argv[0],!opt->quiet); htsmain_free(); return -1; -#if HTS_ANALYSTE!=2 } else { - help_wizard(&httrack); + help_wizard(opt); htsmain_free(); return -1; } -#endif htsmain_free(); return 0; } } else { // plus de 2 paramètres // un fichier log existe? - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // fichier lock? + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { // fichier lock? //char s[32]; - httrack.cache=1; // cache prioritaire - if (httrack.quiet==0) { + opt->cache=1; // cache prioritaire + if (opt->quiet==0) { if ( - ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) ) || - ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) ) ) { HT_REQUEST_START; HT_PRINT("There is a lock-file in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(LF"That means that a mirror has not been terminated"LF); HT_PRINT("Be sure you call httrack with proper parameters"LF); HT_PRINT("(The cache allows you to restart faster the transfer)"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } } } - } else if (fexist(fconcat(httrack.path_html,"index.html"))) { + } else if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"))) { //char s[32]; - httrack.cache=2; // cache vient après test de validité - if (httrack.quiet==0) { + opt->cache=2; // cache vient après test de validité + if (opt->quiet==0) { if ( - ( fexist(fconcat(httrack.path_log,"hts-cache/new.zip")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")) ) || - ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx")) ) + ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")) ) ) { HT_REQUEST_START; HT_PRINT("There is an index.html and a hts-cache folder in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(LF"A site may have been mirrored here, that could mean that you want to update it"LF); HT_PRINT("Be sure parameters are ok"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } } else { HT_REQUEST_START; HT_PRINT("There is an index.html in the directory "); - HT_PRINT(httrack.path_log); + HT_PRINT(StringBuff(opt->path_log)); HT_PRINT(" but no cache"LF); HT_PRINT("There is an index.html in the directory, but no cache"LF); HT_PRINT("A site may have been mirrored here, and erased.."LF); HT_PRINT("Be sure parameters are ok"LF); HT_REQUEST_END; - if (!ask_continue()) { + if (!ask_continue(opt)) { htsmain_free(); return 0; } @@ -1048,113 +844,119 @@ int main(int argc, char **argv) { return 0; // déja fait normalement // case 'g': // récupérer un (ou plusieurs) fichiers isolés - httrack.wizard=2; // le wizard on peut plus s'en passer.. - //httrack.wizard=0; // pas de wizard - httrack.cache=0; // ni de cache - httrack.makeindex=0; // ni d'index + opt->wizard=2; // le wizard on peut plus s'en passer.. + //opt->wizard=0; // pas de wizard + opt->cache=0; // ni de cache + opt->makeindex=0; // ni d'index httrack_logmode=1; // erreurs à l'écran - httrack.savename_type=1003; // mettre dans le répertoire courant - httrack.depth=0; // ne pas explorer la page - httrack.accept_cookie=0; // pas de cookies - httrack.robots=0; // pas de robots + opt->savename_type=1003; // mettre dans le répertoire courant + opt->depth=0; // ne pas explorer la page + opt->accept_cookie=0; // pas de cookies + opt->robots=0; // pas de robots break; - case 'w': httrack.wizard=2; // wizard 'soft' (ne pose pas de questions) - httrack.travel=0; - httrack.seeker=1; + case 'w': opt->wizard=2; // wizard 'soft' (ne pose pas de questions) + opt->travel=0; + opt->seeker=1; break; - case 'W': httrack.wizard=1; // Wizard-Help (pose des questions) - httrack.travel=0; - httrack.seeker=1; + case 'W': opt->wizard=1; // Wizard-Help (pose des questions) + opt->travel=0; + opt->seeker=1; break; case 'r': // n'est plus le recurse get bestial mais wizard itou! if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.depth); + sscanf(com+1,"%d",&opt->depth); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.depth=3; + } else opt->depth=3; break; /* - case 'r': httrack.wizard=0; + case 'r': opt->wizard=0; if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.depth); + sscanf(com+1,"%d",&opt->depth); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.depth=3; + } else opt->depth=3; break; */ // - // note: les tests httrack.depth sont pour éviter de faire + // note: les tests opt->depth sont pour éviter de faire // un miroir du web (:-O) accidentelement ;-) - case 'a': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=0+(httrack.travel&256); break; - case 'd': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=1+(httrack.travel&256); break; - case 'l': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=2+(httrack.travel&256); break; - case 'e': /*if (httrack.depth==9999) httrack.depth=3;*/ - httrack.travel=7+(httrack.travel&256); break; - case 't': httrack.travel|=256; break; - case 'n': httrack.nearlink=1; break; - case 'x': httrack.external=1; break; + case 'a': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=0+(opt->travel&256); break; + case 'd': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=1+(opt->travel&256); break; + case 'l': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=2+(opt->travel&256); break; + case 'e': /*if (opt->depth==9999) opt->depth=3;*/ + opt->travel=7+(opt->travel&256); break; + case 't': opt->travel|=256; break; + case 'n': opt->nearlink=1; break; + case 'x': opt->external=1; break; // - case 'U': httrack.seeker=2; break; - case 'D': httrack.seeker=1; break; - case 'S': httrack.seeker=0; break; - case 'B': httrack.seeker=3; break; + case 'U': opt->seeker=2; break; + case 'D': opt->seeker=1; break; + case 'S': opt->seeker=0; break; + case 'B': opt->seeker=3; break; // - case 'Y': httrack.mirror_first_page=1; break; + case 'Y': opt->mirror_first_page=1; break; // - case 'q': case 'i': httrack.quiet=1; break; + case 'q': case 'i': opt->quiet=1; break; // case 'Q': httrack_logmode=0; break; case 'v': httrack_logmode=1; break; case 'f': httrack_logmode=2; if (*(com+1)=='2') httrack_logmode=3; while(isdigit((unsigned char)*(com+1))) com++; break; // - //case 'A': httrack.urlmode=1; break; - //case 'R': httrack.urlmode=2; break; - case 'K': httrack.urlmode=0; + //case 'A': opt->urlmode=1; break; + //case 'R': opt->urlmode=2; break; + case 'K': opt->urlmode=0; if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.urlmode); - if (httrack.urlmode == 0) { // in fact K0 ==> K2 + sscanf(com+1,"%d",&opt->urlmode); + if (opt->urlmode == 0) { // in fact K0 ==> K2 // and K ==> K0 - httrack.urlmode=2; + opt->urlmode=2; } while(isdigit((unsigned char)*(com+1))) com++; } - //if (*(com+1)=='0') { httrack.urlmode=2; com++; } break; + //if (*(com+1)=='0') { opt->urlmode=2; com++; } break; // case 'c': if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.maxsoc); + sscanf(com+1,"%d",&opt->maxsoc); while(isdigit((unsigned char)*(com+1))) com++; - httrack.maxsoc=max(httrack.maxsoc,1); // FORCER A 1 - } else httrack.maxsoc=4; + opt->maxsoc=max(opt->maxsoc,1); // FORCER A 1 + } else opt->maxsoc=4; break; // - case 'p': sscanf(com+1,"%d",&httrack.getmode); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'p': sscanf(com+1,"%d",&opt->getmode); while(isdigit((unsigned char)*(com+1))) com++; break; // - case 'G': sscanf(com+1,LLintP,&httrack.fragment); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'M': sscanf(com+1,LLintP,&httrack.maxsite); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'm': sscanf(com+1,LLintP,&httrack.maxfile_nonhtml); while(isdigit((unsigned char)*(com+1))) com++; + case 'G': sscanf(com+1,LLintP,&opt->fragment); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'M': sscanf(com+1,LLintP,&opt->maxsite); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'm': sscanf(com+1,LLintP,&opt->maxfile_nonhtml); while(isdigit((unsigned char)*(com+1))) com++; if (*(com+1)==',') { com++; - sscanf(com+1,LLintP,&httrack.maxfile_html); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.maxfile_html=-1; + sscanf(com+1,LLintP,&opt->maxfile_html); while(isdigit((unsigned char)*(com+1))) com++; + } else opt->maxfile_html=-1; break; // - case 'T': sscanf(com+1,"%d",&httrack.timeout); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'J': sscanf(com+1,"%d",&httrack.rateout); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'R': sscanf(com+1,"%d",&httrack.retry); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'E': sscanf(com+1,"%d",&httrack.maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'H': sscanf(com+1,"%d",&httrack.hostcontrol); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'A': sscanf(com+1,"%d",&httrack.maxrate); while(isdigit((unsigned char)*(com+1))) com++; break; - - case 'j': httrack.parsejava=1; if (*(com+1)=='0') { httrack.parsejava=0; com++; } break; + case 'T': sscanf(com+1,"%d",&opt->timeout); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'J': sscanf(com+1,"%d",&opt->rateout); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'R': sscanf(com+1,"%d",&opt->retry); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'E': sscanf(com+1,"%d",&opt->maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'H': sscanf(com+1,"%d",&opt->hostcontrol); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'A': sscanf(com+1,"%d",&opt->maxrate); while(isdigit((unsigned char)*(com+1))) com++; break; + + case 'j': + opt->parsejava = HTSPARSE_DEFAULT; + if (isdigit((unsigned char)*(com+1))) { + sscanf(com+1,"%d",&opt->parsejava); + while(isdigit((unsigned char)*(com+1))) com++; + } + break; // - case 'I': httrack.makeindex=1; if (*(com+1)=='0') { httrack.makeindex=0; com++; } break; + case 'I': opt->makeindex=1; if (*(com+1)=='0') { opt->makeindex=0; com++; } break; // - case 'X': httrack.delete_old=1; if (*(com+1)=='0') { httrack.delete_old=0; com++; } break; + case 'X': opt->delete_old=1; if (*(com+1)=='0') { opt->delete_old=0; com++; } break; // - case 'b': sscanf(com+1,"%d",&httrack.accept_cookie); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'b': sscanf(com+1,"%d",&opt->accept_cookie); while(isdigit((unsigned char)*(com+1))) com++; break; // case 'N': if (strcmp(argv[na],"-N")==0) { // Tout seul @@ -1170,28 +972,28 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.savename_userdef,argv[na]); - if (strnotempty(httrack.savename_userdef)) - httrack.savename_type = -1; // userdef! + StringCopy(opt->savename_userdef, argv[na]); + if (StringLength(opt->savename_userdef) > 0) + opt->savename_type = -1; // userdef! else - httrack.savename_type = 0; // -N "" : par défaut + opt->savename_type = 0; // -N "" : par défaut } } else { - sscanf(com+1,"%d",&httrack.savename_type); while(isdigit((unsigned char)*(com+1))) com++; + sscanf(com+1,"%d",&opt->savename_type); while(isdigit((unsigned char)*(com+1))) com++; } break; case 'L': { - sscanf(com+1,"%d",&httrack.savename_83); - switch(httrack.savename_83) { + sscanf(com+1,"%d",&opt->savename_83); + switch(opt->savename_83) { case 0: // 8-3 (ISO9660 L1) - httrack.savename_83=1; + opt->savename_83=1; break; case 1: - httrack.savename_83=0; + opt->savename_83=0; break; default: // 2 == ISO9660 (ISO9660 L2) - httrack.savename_83=2; + opt->savename_83=2; break; } while(isdigit((unsigned char)*(com+1))) com++; @@ -1199,59 +1001,71 @@ int main(int argc, char **argv) { break; case 's': if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.robots); + sscanf(com+1,"%d",&opt->robots); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.robots=1; + } else opt->robots=1; #if DEBUG_ROBOTS - printf("robots.txt mode set to %d\n",httrack.robots); + printf("robots.txt mode set to %d\n",opt->robots); #endif break; - case 'o': sscanf(com+1,"%d",&httrack.errpage); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'u': sscanf(com+1,"%d",&httrack.check_type); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'o': sscanf(com+1,"%d",&opt->errpage); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'u': sscanf(com+1,"%d",&opt->check_type); while(isdigit((unsigned char)*(com+1))) com++; break; // case 'C': if (isdigit((unsigned char)*(com+1))) { - sscanf(com+1,"%d",&httrack.cache); + sscanf(com+1,"%d",&opt->cache); while(isdigit((unsigned char)*(com+1))) com++; - } else httrack.cache=1; + } else opt->cache=1; break; - case 'k': httrack.all_in_cache=1; break; + case 'k': opt->all_in_cache=1; break; // - case 'z': httrack.debug=1; break; // petit debug - case 'Z': httrack.debug=2; break; // GROS debug + case 'z': opt->debug=1; break; // petit debug + case 'Z': opt->debug=2; break; // GROS debug // case '&': case '%': { // deuxième jeu d'options com++; switch(*com) { - case 'M': httrack.mimehtml = 1; if (*(com+1)=='0') { httrack.mimehtml=0; com++; } break; - case 'k': httrack.nokeepalive = 0; if (*(com+1)=='0') { httrack.nokeepalive = 1; com++; } break; - case 'x': httrack.passprivacy=1; if (*(com+1)=='0') { httrack.passprivacy=0; com++; } break; // No passwords in html files - case 'q': httrack.includequery=1; if (*(com+1)=='0') { httrack.includequery=0; com++; } break; // No passwords in html files - case 'I': httrack.kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.kindex); while(isdigit((unsigned char)*(com+1))) com++; } + case 'M': opt->mimehtml = 1; if (*(com+1)=='0') { opt->mimehtml=0; com++; } break; + case 'k': opt->nokeepalive = 0; if (*(com+1)=='0') { opt->nokeepalive = 1; com++; } break; + case 'x': opt->passprivacy=1; if (*(com+1)=='0') { opt->passprivacy=0; com++; } break; // No passwords in html files + case 'q': opt->includequery=1; if (*(com+1)=='0') { opt->includequery=0; com++; } break; // No passwords in html files + case 'I': opt->kindex=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->kindex); while(isdigit((unsigned char)*(com+1))) com++; } break; // Keyword Index - case 'c': sscanf(com+1,"%f",&httrack.maxconn); while(isdigit((unsigned char)*(com+1)) || *(com+1) == '.') com++; break; - case 'e': sscanf(com+1,"%d",&httrack.extdepth); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'B': httrack.tolerant=1; if (*(com+1)=='0') { httrack.tolerant=0; com++; } break; // HTTP/1.0 notamment - case 'h': httrack.http10=1; if (*(com+1)=='0') { httrack.http10=0; com++; } break; // HTTP/1.0 - case 'z': httrack.nocompression=1; if (*(com+1)=='0') { httrack.nocompression=0; com++; } break; // pas de compression - case 'f': httrack.ftp_proxy=1; if (*(com+1)=='0') { httrack.ftp_proxy=0; com++; } break; // proxy http pour ftp - case 'P': httrack.parseall=1; if (*(com+1)=='0') { httrack.parseall=0; com++; } break; // tout parser - case 'n': httrack.norecatch=1; if (*(com+1)=='0') { httrack.norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement - case 's': httrack.sizehack=1; if (*(com+1)=='0') { httrack.sizehack=0; com++; } break; // hack sur content-length - case 'u': httrack.urlhack=1; if (*(com+1)=='0') { httrack.urlhack=0; com++; } break; // url hack - case 'v': httrack.verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; - case 'i': httrack.dir_topindex = 1; if (*(com+1)=='0') { httrack.dir_topindex=0; com++; } break; - case 'N': httrack.savename_delayed = 2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.savename_delayed); while(isdigit((unsigned char)*(com+1))) com++; } break; - case 'D': httrack.delayed_cached=1; if (*(com+1)=='0') { httrack.delayed_cached=0; com++; } break; // url hack - case '!': httrack.bypass_limits = 1; if (*(com+1)=='0') { httrack.bypass_limits=0; com++; } break; + case 'c': sscanf(com+1,"%f",&opt->maxconn); while(isdigit((unsigned char)*(com+1)) || *(com+1) == '.') com++; break; + case 'e': sscanf(com+1,"%d",&opt->extdepth); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'B': opt->tolerant=1; if (*(com+1)=='0') { opt->tolerant=0; com++; } break; // HTTP/1.0 notamment + case 'h': opt->http10=1; if (*(com+1)=='0') { opt->http10=0; com++; } break; // HTTP/1.0 + case 'z': opt->nocompression=1; if (*(com+1)=='0') { opt->nocompression=0; com++; } break; // pas de compression + case 'f': opt->ftp_proxy=1; if (*(com+1)=='0') { opt->ftp_proxy=0; com++; } break; // proxy http pour ftp + case 'P': opt->parseall=1; if (*(com+1)=='0') { opt->parseall=0; com++; } break; // tout parser + case 'n': opt->norecatch=1; if (*(com+1)=='0') { opt->norecatch=0; com++; } break; // ne pas reprendre fichiers effacés localement + case 's': opt->sizehack=1; if (*(com+1)=='0') { opt->sizehack=0; com++; } break; // hack sur content-length + case 'u': opt->urlhack=1; if (*(com+1)=='0') { opt->urlhack=0; com++; } break; // url hack + case 'v': opt->verbosedisplay=2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->verbosedisplay); while(isdigit((unsigned char)*(com+1))) com++; } break; + case 'i': opt->dir_topindex = 1; if (*(com+1)=='0') { opt->dir_topindex=0; com++; } break; + case 'N': opt->savename_delayed = 2; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->savename_delayed); while(isdigit((unsigned char)*(com+1))) com++; } break; + case 'D': opt->delayed_cached=1; if (*(com+1)=='0') { opt->delayed_cached=0; com++; } break; // url hack + case '!': opt->bypass_limits = 1; if (*(com+1)=='0') { opt->bypass_limits=0; com++; } break; #if HTS_USEMMS - case 'm': sscanf(com+1,"%d",&httrack.mms_maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'm': sscanf(com+1,"%d",&opt->mms_maxtime); while(isdigit((unsigned char)*(com+1))) com++; break; #endif + case 'w': // disable specific plugin + if ((na+1>=argc) || (argv[na+1][0]=='-')) { + HTS_PANIC_PRINTF("Option %w needs to be followed by a blank space, and a module name"); + printf("Example: -%%w htsswf\n"); + htsmain_free(); + return -1; + } else{ + na++; + StringCat(opt->mod_blacklist, argv[na]); + StringCat(opt->mod_blacklist, "\n"); + } + break; // preserve: no footer, original links case 'p': - httrack.footer[0]='\0'; - httrack.urlmode=4; + StringClear(opt->footer); + opt->urlmode=4; break; case 'L': // URL list if ((na+1>=argc) || (argv[na+1][0]=='-')) { @@ -1266,7 +1080,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.filelist,argv[na]); + StringCopy(opt->filelist,argv[na]); } break; case 'b': // bind @@ -1282,7 +1096,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.proxy.bindhost,argv[na]); + StringCopy(opt->proxy.bindhost, argv[na]); } break; case 'S': // Scan Rules list @@ -1292,7 +1106,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } else{ - INTsys fz; + off_t fz; na++; fz = fsize(argv[na]); if (fz < 0) { @@ -1304,7 +1118,7 @@ int main(int argc, char **argv) { if (fp != NULL) { int cl = (int) strlen(url); ensureUrlCapacity(url, url_sz, cl + fz + 8192); - if ((INTsys)fread(url + cl, 1, fz, fp) != fz) { + if (fread(url + cl, 1, fz, fp) != fz) { HTS_PANIC_PRINTF("File url list could not be read"); htsmain_free(); return -1; @@ -1323,30 +1137,29 @@ int main(int argc, char **argv) { return -1; } else{ na++; - if ( (strlen(argv[na]) + strlen(httrack.mimedefs) + 4) >= sizeof(httrack.mimedefs)) { - HTS_PANIC_PRINTF("Mime definition string too long"); - htsmain_free(); - return -1; - } // --assume standard if (strcmp(argv[na], "standard") == 0) { - strcpybuff(httrack.mimedefs,"\n"); - strcatbuff(httrack.mimedefs,HTS_ASSUME_STANDARD); - strcatbuff(httrack.mimedefs,"\n"); + StringCopy(opt->mimedefs,"\n"); + StringCat(opt->mimedefs,HTS_ASSUME_STANDARD); + StringCat(opt->mimedefs,"\n"); } else { char* a; - char* b = httrack.mimedefs + strlen(httrack.mimedefs); + //char* b = StringBuff(opt->mimedefs) + StringLength(opt->mimedefs); for(a = argv[na] ; *a != '\0' ; a++) { if (*a == ';') { /* next one */ - *b++ = '\n'; + StringAddchar(opt->mimedefs, '\n'); + //*b++ = '\n'; } else if (*a == ',' || *a == '\n' || *a == '\r' || *a == '\t') { - *b++ = ' '; + StringAddchar(opt->mimedefs, ' '); + //*b++ = ' '; } else { - *b++ = *a; + StringAddchar(opt->mimedefs, *a); + //*b++ = *a; } } - *b++ = '\n'; /* next def */ - *b++ = '\0'; + StringAddchar(opt->mimedefs, '\n'); + //*b++ = '\n'; /* next def */ + //*b++ = '\0'; } } break; @@ -1364,7 +1177,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.lang_iso,argv[na]); + StringCopy(opt->lang_iso,argv[na]); } break; // @@ -1381,14 +1194,14 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.footer,argv[na]); + StringCopy(opt->footer,argv[na]); } break; case 'H': // debug headers _DEBUG_HEAD=1; break; case 'O': -#if HTS_WIN +#ifdef _WIN32 printf("Warning option -%%O has no effect in this system (chroot)\n"); #else switch_chroot=1; @@ -1402,7 +1215,7 @@ int main(int argc, char **argv) { return -1; } else { na++; -#if HTS_WIN +#ifdef _WIN32 printf("Warning option -%%U has no effect on this system (setuid)\n"); #else #ifndef HTS_DO_NOT_USE_UID @@ -1431,64 +1244,36 @@ int main(int argc, char **argv) { } else { char callbackname[128]; char* a = argv[na + 1]; - char* pos = strchr(a, '='); - na++; - if (pos != NULL && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { - char* posf = strchr(pos + 1, ':'); - char BIGSTK filename[1024]; - callbackname[0] = '\0'; - strncatbuff(callbackname, a, pos - a); - pos++; - if (posf != NULL && (posf - pos) > 0 && (posf - pos + 2) < sizeof(filename)) { - void* userfunction; - filename[0] = '\0'; - strncatbuff(filename, pos, posf - pos); - posf++; - userfunction = getFunctionPtr(&httrack, filename, posf); - if (userfunction != NULL) { - if ((void*)htswrap_read(callbackname) != NULL) { - if (htswrap_add(callbackname, userfunction)) { - set_wrappers(); /* Re-read wrappers internal static functions */ - if ((void*)htswrap_read(callbackname) == userfunction) { - if (!httrack.quiet) { - fprintf(stderr, "successfully plugged [%s -> %s:%s]\n", callbackname, posf, filename); - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to (re)plug the function %s from the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to plug the function %s from the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unknown or undefined callback %s", callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - char BIGSTK tmp[1024 * 2]; - sprintf(tmp, "option %%W : unable to load the function %s in the file %s for the callback %s", posf, filename, callbackname); - HTS_PANIC_PRINTF(tmp); - htsmain_free(); - return -1; - } - } else { - HTS_PANIC_PRINTF("Syntax error in option %W : filename error : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); - printf("Example: -%%W check-link=checklink.so:check\n"); + char* pos; /* = strchr(a, '='); */ + for(pos = a ; *pos != '\0' && *pos != '=' && *pos != ',' && *pos != ':' ; pos++); + /* httrack --wrapper callback[,foo] */ + if (*pos == 0 || *pos == ',' || *pos == ':') { + int ret = plug_wrapper(opt, argv[na + 1], argv[na + 1]); + if (ret == 0) { + char BIGSTK tmp[1024 * 2]; + sprintf(tmp, "option %%W : unable to plug the module %s (returncode != 1)", a); + HTS_PANIC_PRINTF(tmp); + htsmain_free(); + return -1; + } else if (ret == -1) { + char BIGSTK tmp[1024 * 2]; + int last_errno = errno; + sprintf(tmp, "option %%W : unable to load the module %s: %s (check the library path ?)", a, strerror(last_errno)); + HTS_PANIC_PRINTF(tmp); htsmain_free(); return -1; } + } + /* Old style */ + /* httrack --wrapper save-name=callback:process,string */ + else if (*pos == '=' && (pos - a) > 0 && (pos - a + 2) < sizeof(callbackname)) { + fprintf(stderr, "Syntax error in option %%W : the old (<3.41) API is no more supported!\n"); + HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a module name"); + printf("Example: -%%W check-link=checklink.so:check\n"); + htsmain_free(); + return -1; } else { - HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a <callback-name>=<myfile.so>:<function-name> field"); + HTS_PANIC_PRINTF("Syntax error in option %W : this function needs to be followed by a blank space, and a module name"); printf("Example: -%%W check-link=checklink.so:check\n"); htsmain_free(); return -1; @@ -1509,7 +1294,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.referer, argv[na]); + StringCopy(opt->referer, argv[na]); } break; case 'E': // From Email address @@ -1525,7 +1310,7 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.from, argv[na]); + StringCopy(opt->from, argv[na]); } break; @@ -1585,7 +1370,7 @@ int main(int argc, char **argv) { } break; - //case 's': httrack.sslengine=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&httrack.sslengine); while(isdigit((unsigned char)*(com+1))) com++; } break; + //case 's': opt->sslengine=1; if (isdigit((unsigned char)*(com+1))) { sscanf(com+1,"%d",&opt->sslengine); while(isdigit((unsigned char)*(com+1))) com++; } break; } } break; @@ -1627,8 +1412,8 @@ int main(int argc, char **argv) { char BIGSTK url[HTS_URLMAXSIZE*2]; char linepos[256]; int pos; - char* cacheNdx = readfile(fconcat(httrack.path_log,"hts-cache/new.ndx")); - cache_init(&cache,&httrack); /* load cache */ + char* cacheNdx = readfile(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + cache_init(&cache,opt); /* load cache */ if (cacheNdx != NULL) { char firstline[256]; char* a = cacheNdx; @@ -1655,7 +1440,7 @@ int main(int argc, char **argv) { || (strjoker(url, filter, NULL, NULL) != NULL) ) { - r = cache_read_ro(&httrack, &cache, adr, fil, "", NULL); // lire entrée cache + data + r = cache_read_ro(opt, &cache, adr, fil, "", NULL); // lire entrée cache + data if (r.statuscode != -1) { // No errors found++; if (!hasFilter) { @@ -1678,7 +1463,7 @@ int main(int argc, char **argv) { (link_has_authority(adr)) ? "" : "http://", adr, fil); if (url_savename(adr, fil, sav, /*former_adr*/NULL, /*former_fil*/NULL, /*referer_adr*/NULL, /*referer_fil*/NULL, - /*opt*/&httrack, /*liens*/NULL, /*lien_tot*/0, /*sback*/NULL, /*cache*/&cache, /*hash*/NULL, /*ptr*/0, /*numero_passe*/0, /*mime_type*/NULL)!=-1) { + /*opt*/opt, /*liens*/NULL, /*lien_tot*/0, /*sback*/NULL, /*cache*/&cache, /*hash*/NULL, /*ptr*/0, /*numero_passe*/0, /*mime_type*/NULL)!=-1) { if (fexist(sav)) { fprintf(stdout, "Content-location: %s\r\n", sav); } @@ -1749,7 +1534,7 @@ int main(int argc, char **argv) { } break; case 'E': // extract cache - if (!hts_extract_meta(httrack.path_log)) { + if (!hts_extract_meta(StringBuff(opt->path_log))) { fprintf(stderr, "* error extracting meta-data\n"); return 1; } @@ -1768,22 +1553,22 @@ int main(int argc, char **argv) { char* name; uLong repaired = 0; uLong repairedBytes = 0; - if (fexist(fconcat(httrack.path_log,"hts-cache/new.zip"))) { - name = fconcat(httrack.path_log,"hts-cache/new.zip"); - } else if (fexist(fconcat(httrack.path_log,"hts-cache/old.zip"))) { - name = fconcat(httrack.path_log,"hts-cache/old.zip"); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) { + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"); + } else if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))) { + name = fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"); } else { - fprintf(stderr, "* error: no cache found in %s\n", fconcat(httrack.path_log,"hts-cache/new.zip")); + fprintf(stderr, "* error: no cache found in %s\n", fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); return 1; } fprintf(stderr, "Cache: trying to repair %s\n", name); if (unzRepair(name, - fconcat(httrack.path_log,"hts-cache/repair.zip"), - fconcat(httrack.path_log,"hts-cache/repair.tmp"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), + fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.tmp"), &repaired, &repairedBytes ) == Z_OK) { unlink(name); - rename(fconcat(httrack.path_log,"hts-cache/repair.zip"), name); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/repair.zip"), name); fprintf(stderr,"Cache: %d bytes successfully recovered in %d entries\n", (int) repairedBytes, (int) repaired); } else { fprintf(stderr, "Cache: could not repair the cache\n"); @@ -1798,20 +1583,20 @@ int main(int argc, char **argv) { return 0; } break; - case 'f': httrack.flush=1; break; + case 'f': opt->flush=1; break; case 'h': - printf("HTTrack version "HTTRACK_VERSION"%s\n", WHAT_is_available); + printf("HTTrack version "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); return 0; break; - case 'p': /* httrack.aff_progress=1; deprecated */ break; - case 'S': httrack.shell=1; break; // stdin sur un shell - case 'K': httrack.keyboard=1; break; // vérifier stdin + case 'p': /* opt->aff_progress=1; deprecated */ break; + case 'S': opt->shell=1; break; // stdin sur un shell + case 'K': opt->keyboard=1; break; // vérifier stdin // - case 'L': sscanf(com+1,"%d",&httrack.maxlink); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'F': sscanf(com+1,"%d",&httrack.maxfilter); while(isdigit((unsigned char)*(com+1))) com++; break; - case 'Z': httrack.makestat=1; break; - case 'T': httrack.maketrack=1; break; - case 'u': sscanf(com+1,"%d",&httrack.waittime); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'L': sscanf(com+1,"%d",&opt->maxlink); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'F': sscanf(com+1,"%d",&opt->maxfilter); while(isdigit((unsigned char)*(com+1))) com++; break; + case 'Z': opt->makestat=1; break; + case 'T': opt->maketrack=1; break; + case 'u': sscanf(com+1,"%d",&opt->waittime); while(isdigit((unsigned char)*(com+1))) com++; break; /*case 'R': // ohh ftp, catch->ftpget HTS_PANIC_PRINTF("Unexpected internal error with -#R command"); @@ -1820,7 +1605,7 @@ int main(int argc, char **argv) { break; */ case 'P': { // catchurl - help_catchurl(httrack.path_log); + help_catchurl(StringBuff(opt->path_log)); htsmain_free(); return 0; } @@ -1863,10 +1648,10 @@ int main(int argc, char **argv) { } else { char mime[256]; // initialiser mimedefs - get_userhttptype(1,httrack.mimedefs,NULL); + //get_userhttptype(opt,1,opt->mimedefs,NULL); // check mime[0] = '\0'; - get_httptype(mime, argv[na+1], 0); + get_httptype(opt, mime, argv[na+1], 0); if (mime[0] != '\0') { char ext[256]; printf("%s is '%s'\n", argv[na+1], mime); @@ -1893,7 +1678,7 @@ int main(int argc, char **argv) { } break; case 'd': - httrack.parsedebug = 1; + opt->parsedebug = 1; break; /* autotest */ @@ -1921,18 +1706,17 @@ int main(int argc, char **argv) { } else { char* a; na++; - httrack.proxy.active=1; + opt->proxy.active=1; // Rechercher MAIS en partant de la fin à cause de user:pass@proxy:port a = argv[na] + strlen(argv[na]) -1; // a=strstr(argv[na],":"); // port while( (a > argv[na]) && (*a != ':') && (*a != '@') ) a--; if (*a == ':') { // un port est présent, <proxy>:port - sscanf(a+1,"%d",&httrack.proxy.port); - httrack.proxy.name[0]='\0'; - strncatbuff(httrack.proxy.name,argv[na],(int) (a - argv[na])); + sscanf(a+1,"%d",&opt->proxy.port); + StringCopyN(opt->proxy.name,argv[na],(int) (a - argv[na])); } else { // <proxy> - httrack.proxy.port=8080; - strcpybuff(httrack.proxy.name,argv[na]); + opt->proxy.port=8080; + StringCopy(opt->proxy.name,argv[na]); } } break; @@ -1949,11 +1733,11 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.user_agent,argv[na]); - if (strnotempty(httrack.user_agent)) - httrack.user_agent_send=1; + StringCopy(opt->user_agent,argv[na]); + if (StringNotEmpty(opt->user_agent)) + opt->user_agent_send=1; else - httrack.user_agent_send=0; // -F "" désactive l'option + opt->user_agent_send=0; // -F "" désactive l'option } break; // @@ -1970,11 +1754,11 @@ int main(int argc, char **argv) { htsmain_free(); return -1; } - strcpybuff(httrack.sys_com,argv[na]); - if (strnotempty(httrack.sys_com)) - httrack.sys_com_exec=1; + StringCopy(opt->sys_com,argv[na]); + if (StringNotEmpty(opt->sys_com)) + opt->sys_com_exec=1; else - httrack.sys_com_exec=0; // -V "" désactive l'option + opt->sys_com_exec=0; // -V "" désactive l'option } break; // @@ -1991,9 +1775,10 @@ int main(int argc, char **argv) { } // while } else { // URL/filters + char catbuff[CATBUFF_SIZE]; char BIGSTK tempo[1024]; if (strnotempty(url)) strcatbuff(url," "); // espace de séparation - strcpybuff(tempo,unescape_http_unharm(argv[na],1)); + strcpybuff(tempo,unescape_http_unharm(catbuff,argv[na],1)); escape_spc_url(tempo); strcatbuff(url,tempo); } // if argv=- etc. @@ -2010,7 +1795,7 @@ int main(int argc, char **argv) { #endif -#if HTS_WIN +#ifdef _WIN32 #else #ifndef HTS_DO_NOT_USE_UID /* Chroot - xxc */ @@ -2021,9 +1806,9 @@ int main(int argc, char **argv) { if (!userid) { //if (strcmp(userdef->pw_name,"root")==0) { char BIGSTK rpath[1024]; - //printf("html=%s log=%s\n",httrack.path_html,httrack.path_log); // xxc - if ((httrack.path_html[0]) && (httrack.path_log[0])) { - char *a=httrack.path_html,*b=httrack.path_log,*c=NULL,*d=NULL; + //printf("html=%s log=%s\n",StringBuff(opt->path_html),StringBuff(opt->path_log)); // xxc + if ((StringBuff(opt->path_html)[0]) && (StringBuff(opt->path_log)[0])) { + const char *a=StringBuff(opt->path_html),*b=StringBuff(opt->path_log),*c=NULL,*d=NULL; c=a; d=b; while ((*a) && (*a == *b)) { if (*a=='/') { c=a; d=b; } @@ -2032,23 +1817,20 @@ int main(int argc, char **argv) { } rpath[0]='\0'; - if (c != httrack.path_html) { - if (httrack.path_html[0]!='/') + if (c != StringBuff(opt->path_html)) { + if (StringBuff(opt->path_html)[0]!='/') strcatbuff(rpath,"./"); - strncatbuff(rpath,httrack.path_html,(int) (c - httrack.path_html)); - } - { - char BIGSTK tmp[1024]; - strcpybuff(tmp,c); strcpybuff(httrack.path_html,tmp); - strcpybuff(tmp,d); strcpybuff(httrack.path_log,tmp); + strncatbuff(rpath,StringBuff(opt->path_html),(int) (c - StringBuff(opt->path_html))); } + StringCopyOverlapped(opt->path_html, c); + StringCopyOverlapped(opt->path_log, d); } else { strcpybuff(rpath,"./"); - strcpybuff(httrack.path_html,"/"); - strcpybuff(httrack.path_log,"/"); + StringCopy(opt->path_html,"/"); + StringCopy(opt->path_log,"/"); } if (rpath[0]) { - printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,httrack.path_html,httrack.path_log); + printf("[changing root path to %s (path_data=%s,path_log=%s)]\n",rpath,StringBuff(opt->path_html),StringBuff(opt->path_log)); if (chroot(rpath)) { printf("ERROR! Can not chroot to %s!\n",rpath); return -1; @@ -2094,32 +1876,32 @@ int main(int argc, char **argv) { // on utilise le cache.. // en cas de présence des deux versions, garder la version la plus avancée, // cad la version contenant le plus de fichiers - if (httrack.cache) { - if (fexist(fconcat(httrack.path_log,"hts-in_progress.lock"))) { // problemes.. - if ( fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) ) { - if ( fexist(fconcat(httrack.path_log,"hts-cache/old.zip")) ) { - if (fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))<32768) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip"))>65536) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.zip")) > fsize(fconcat(httrack.path_log,"hts-cache/new.zip"))) { - remove(fconcat(httrack.path_log,"hts-cache/new.zip")); - rename(fconcat(httrack.path_log,"hts-cache/old.zip"), fconcat(httrack.path_log,"hts-cache/new.zip")); + if (opt->cache) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"))) { // problemes.. + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) ) { + if ( fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) ) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))<32768) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"))>65536) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip")) > fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip"))) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.zip"), fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.zip")); } } } } } - else if (fexist(fconcat(httrack.path_log,"hts-cache/new.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/new.ndx"))) { - if (fexist(fconcat(httrack.path_log,"hts-cache/old.dat")) && fexist(fconcat(httrack.path_log,"hts-cache/old.ndx"))) { + else if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx"))) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")) && fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"))) { // switcher si new<32Ko et old>65Ko (tailles arbitraires) ? // ce cas est peut être une erreur ou un crash d'un miroir ancien, prendre // alors l'ancien cache - if (fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))<32768) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat"))>65536) { - if (fsize(fconcat(httrack.path_log,"hts-cache/old.dat")) > fsize(fconcat(httrack.path_log,"hts-cache/new.dat"))) { - remove(fconcat(httrack.path_log,"hts-cache/new.dat")); - remove(fconcat(httrack.path_log,"hts-cache/new.ndx")); - rename(fconcat(httrack.path_log,"hts-cache/old.dat"),fconcat(httrack.path_log,"hts-cache/new.dat")); - rename(fconcat(httrack.path_log,"hts-cache/old.ndx"),fconcat(httrack.path_log,"hts-cache/new.ndx")); + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))<32768) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"))>65536) { + if (fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat")) > fsize(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat"))) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.dat"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.dat")); + rename(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/old.ndx"),fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/new.ndx")); //} else { // ne rien faire // remove("hts-cache/old.dat"); // remove("hts-cache/old.ndx"); @@ -2133,7 +1915,7 @@ int main(int argc, char **argv) { // Débuggage des en têtes if (_DEBUG_HEAD) { - ioinfo=fopen(fconcat(httrack.path_log,"hts-ioinfo.txt"),"wb"); + ioinfo=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-ioinfo.txt"),"wb"); } { @@ -2141,41 +1923,41 @@ int main(int argc, char **argv) { // on peut pas avoir un affichage ET un fichier log // ca sera pour la version 2 if (httrack_logmode==1) { - httrack.log=stdout; - httrack.errlog=stderr; + opt->log=stdout; + opt->errlog=stderr; } else if (httrack_logmode>=2) { // deux fichiers log - structcheck(httrack.path_log); - if (fexist(fconcat(httrack.path_log,"hts-log.txt"))) - remove(fconcat(httrack.path_log,"hts-log.txt")); - if (fexist(fconcat(httrack.path_log,"hts-err.txt"))) - remove(fconcat(httrack.path_log,"hts-err.txt")); + structcheck(StringBuff(opt->path_log)); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt")); + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt"))) + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt")); /* Check FS directory structure created */ - structcheck(httrack.path_log); + structcheck(StringBuff(opt->path_log)); - httrack.log=fopen(fconcat(httrack.path_log,"hts-log.txt"),"w"); + opt->log=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt"),"w"); if (httrack_logmode==2) - httrack.errlog=fopen(fconcat(httrack.path_log,"hts-err.txt"),"w"); + opt->errlog=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt"),"w"); else - httrack.errlog=httrack.log; - if (httrack.log==NULL) { + opt->errlog=opt->log; + if (opt->log==NULL) { char s[HTS_CDLMAXSIZE]; - sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-log.txt")); + sprintf(s,"Unable to create log file %s",fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-log.txt")); HTS_PANIC_PRINTF(s); htsmain_free(); return -1; - } else if (httrack.errlog==NULL) { + } else if (opt->errlog==NULL) { char s[HTS_CDLMAXSIZE]; - sprintf(s,"Unable to create log file %s",fconcat(httrack.path_log,"hts-err.txt")); + sprintf(s,"Unable to create log file %s",fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-err.txt")); HTS_PANIC_PRINTF(s); htsmain_free(); return -1; } } else { - httrack.log=NULL; - httrack.errlog=NULL; + opt->log=NULL; + opt->errlog=NULL; } // un petit lock-file pour indiquer un miroir en cours, ainsi qu'un éventuel fichier log @@ -2187,11 +1969,11 @@ int main(int argc, char **argv) { /* readme for information purpose */ { - FILE* fp=fopen(fconcat(httrack.path_log,"hts-cache/readme.txt"),"wb"); + FILE* fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/readme.txt"),"wb"); if (fp) { fprintf(fp,"What's in this folder?"LF); fprintf(fp,""LF); - fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, WHAT_is_available); + fprintf(fp,"This folder (hts-cache) has been generated by WinHTTrack "HTTRACK_VERSION"%s"LF, hts_get_version_info(opt)); fprintf(fp,"and is used for updating this website."LF); fprintf(fp,"(The HTML website structure is stored here to allow fast updates)"LF""LF); fprintf(fp,"DO NOT delete this folder unless you do not want to update the mirror in the future!!"LF); @@ -2202,15 +1984,15 @@ int main(int argc, char **argv) { } } - sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock")); - //sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n); + sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock")); + //sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"),n); /*do { if (!n) - sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress.lock"),n); + sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress.lock"),n); else - sprintf(n_lock,fconcat(httrack.path_log,"hts-in_progress%d.lock"),n); + sprintf(n_lock,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-in_progress%d.lock"),n); n++; - } while((fexist(n_lock)) && httrack.quiet); + } while((fexist(n_lock)) && opt->quiet); if (fexist(n_lock)) { if (!recuperer) { remove(n_lock); @@ -2218,19 +2000,19 @@ int main(int argc, char **argv) { }*/ // vérifier existence de la structure - structcheck(fconcat(httrack.path_html, "/")); - structcheck(fconcat(httrack.path_log, "/")); + structcheck(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html), "/")); + structcheck(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log), "/")); // reprise/update - if (httrack.cache) { + if (opt->cache) { FILE* fp; int i; -#if HTS_WIN - mkdir(fconcat(httrack.path_log,"hts-cache")); +#ifdef _WIN32 + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache")); #else - mkdir(fconcat(httrack.path_log,"hts-cache"),HTS_PROTECT_FOLDER); + mkdir(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache"),HTS_PROTECT_FOLDER); #endif - fp=fopen(fconcat(httrack.path_log,"hts-cache/doit.log"),"wb"); + fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-cache/doit.log"),"wb"); if (fp) { for(i=0+1;i<argc;i++) { if ( ((strchr(argv[i],' ')!=NULL) || (strchr(argv[i],'"')!=NULL) || (strchr(argv[i],'\\')!=NULL)) && (argv[i][0]!='"') ) { @@ -2262,7 +2044,7 @@ int main(int argc, char **argv) { fprintf(fp,"To continue an interrupted mirror, just launch httrack without any parameters"LF); fprintf(fp,LF); fclose(fp); fp=NULL; - //} else if (httrack.debug>1) { + //} else if (opt->debug>1) { // printf("! FileOpen error, \"%s\"\n",strerror(errno)); } } @@ -2291,29 +2073,29 @@ int main(int argc, char **argv) { } // fichier log - if (httrack.log) { + if (opt->log) { int i; - fprintf(httrack.log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF, - WHAT_is_available, + fprintf(opt->log,"HTTrack"HTTRACK_VERSION"%s launched on %s at %s"LF, + hts_get_version_info(opt), t, url); - fprintf(httrack.log,"("); + fprintf(opt->log,"("); for(i=0;i<argc;i++) { - if ((strchr(argv[i],' ')==NULL) || (strchr(argv[i],'\"'))) - fprintf(httrack.log,"%s ",argv[i]); + if (strchr(argv[i],' ') == NULL || strchr(argv[i],'\"') != NULL) + fprintf(opt->log,"%s ",argv[i]); else // entre "" (si espace(s) et pas déja de ") - fprintf(httrack.log,"\"%s\" ",argv[i]); + fprintf(opt->log,"\"%s\" ",argv[i]); } - fprintf(httrack.log,")"LF); - fprintf(httrack.log,LF); - fprintf(httrack.log,"Information, Warnings and Errors reported for this mirror:"LF); - fprintf(httrack.log,HTS_LOG_SECURITY_WARNING ); - fprintf(httrack.log,LF); + fprintf(opt->log,")"LF); + fprintf(opt->log,LF); + fprintf(opt->log,"Information, Warnings and Errors reported for this mirror:"LF); + fprintf(opt->log,HTS_LOG_SECURITY_WARNING ); + fprintf(opt->log,LF); } if (httrack_logmode) { - printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,WHAT_is_available); - if (httrack.wizard==0) { - printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,httrack.depth,httrack.maxsoc,httrack.travel,httrack.seeker,httrack_logmode,httrack.urlmode,httrack.getmode); + printf("Mirror launched on %s by HTTrack Website Copier/"HTTRACK_VERSION"%s "HTTRACK_AFF_AUTHORS""LF,t,hts_get_version_info(opt)); + if (opt->wizard==0) { + printf("mirroring %s with %d levels, %d sockets,t=%d,s=%d,logm=%d,lnk=%d,mdg=%d\n",url,opt->depth,opt->maxsoc,opt->travel,opt->seeker,httrack_logmode,opt->urlmode,opt->getmode); } else { // the magic wizard printf("mirroring %s with the wizard help..\n",url); } @@ -2323,73 +2105,50 @@ int main(int argc, char **argv) { io_flush; /* Enforce limits to avoid bandwith abuse. The bypass_limits should only be used by administrators and experts. */ - if (!httrack.bypass_limits) { - if (httrack.maxsoc <= 0 || httrack.maxsoc > 4) { - httrack.maxsoc = 4; - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of simultaneous connections limited to %d to avoid server overload"LF, (int)httrack.maxsoc); + if (!opt->bypass_limits) { + if (opt->maxsoc <= 0 || opt->maxsoc > 4) { + opt->maxsoc = 4; + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: maximum number of simultaneous connections limited to %d to avoid server overload"LF, (int)opt->maxsoc); } } - if (httrack.maxrate <= 0 || httrack.maxrate > 100000) { - httrack.maxrate = 100000; - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum bandwidth limited to %d to avoid server overload"LF, (int)httrack.maxrate); + if (opt->maxrate <= 0 || opt->maxrate > 100000) { + opt->maxrate = 100000; + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: maximum bandwidth limited to %d to avoid server overload"LF, (int)opt->maxrate); } } - if (httrack.maxconn <= 0 || httrack.maxconn > 5.0) { - httrack.maxconn = 5.0; - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: maximum number of connections per second limited to %f to avoid server overload"LF, (float)httrack.maxconn); + if (opt->maxconn <= 0 || opt->maxconn > 5.0) { + opt->maxconn = 5.0; + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: maximum number of connections per second limited to %f to avoid server overload"LF, (float)opt->maxconn); } } } else { - if (httrack.log != NULL) { - fspc(httrack.log,"warning"); fprintf(httrack.log,"* security warning: !!! BYPASSING SECURITY LIMITS - MONITOR THIS SESSION WITH EXTREME CARE !!!"LF); + if (opt->log != NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"* security warning: !!! BYPASSING SECURITY LIMITS - MONITOR THIS SESSION WITH EXTREME CARE !!!"LF); } } /* Info for wrappers */ - if ( (httrack.debug>0) && (httrack.log!=NULL) ) { - fspc(httrack.log,"info"); fprintf(httrack.log,"engine: init"LF); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: init"LF); } -#if HTS_ANALYSTE - if (hts_htmlcheck_init != NULL) { - hts_htmlcheck_init(); - } - set_wrappers(); // init() is allowed to set other wrappers -#endif + + /* Init external */ + RUN_CALLBACK_NOARG(opt, init); // détourner SIGHUP etc. -#if HTS_WIN -#ifndef _WIN32_WCE - signal( SIGINT , sig_ask ); // ^C - signal( SIGTERM , sig_finish ); // kill <process> -#endif -#else - signal( SIGHUP , sig_back ); // close window - signal( SIGTSTP , sig_back ); // ^Z - signal( SIGTERM , sig_finish ); // kill <process> - signal( SIGINT , sig_ask ); // ^C - signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket) -/* -deprecated - see SIGCHLD -#ifndef HTS_DO_NOT_SIGCLD - signal( SIGCLD , sig_ignore ); // child change status -#endif -*/ - signal( SIGCHLD , sig_ignore ); // child change status -#endif #if DEBUG_STEPS printf("Launching the mirror\n"); #endif - // Lancement du miroir // ------------------------------------------------------------ - if (httpmirror(url, &httrack)==0) { + if (httpmirror(url, opt)==0) { printf("Error during operation (see log file), site has not been successfully mirrored\n"); } else { - if (httrack.shell) { + if (opt->shell) { HTT_REQUEST_START; HT_PRINT("TRANSFER DONE"LF); HTT_REQUEST_END @@ -2401,10 +2160,10 @@ deprecated - see SIGCHLD // // Build top index - if (httrack.dir_topindex) { + if (opt->dir_topindex) { char BIGSTK rpath[1024*2]; char* a; - strcpybuff(rpath,httrack.path_html); + strcpybuff(rpath,StringBuff(opt->path_html)); if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') rpath[strlen(rpath)-1]='\0'; @@ -2412,33 +2171,31 @@ deprecated - see SIGCHLD a=strrchr(rpath,'/'); if (a) { *a='\0'; - hts_buildtopindex(&httrack,rpath,httrack.path_bin); - if (httrack.log) { - fspc(httrack.log,"info"); fprintf(httrack.log,"Top index rebuilt (done)"LF); + hts_buildtopindex(opt,rpath,StringBuff(opt->path_bin)); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Top index rebuilt (done)"LF); } } } - if (exit_xh ==1) { - if (httrack.log) { - fprintf(httrack.log,"* * MIRROR ABORTED! * *\nThe current temporary cache is required for any update operation and only contains data downloaded during the present aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files]\n"); + if (opt->state.exit_xh ==1) { + if (opt->log) { + fprintf(opt->log,"* * MIRROR ABORTED! * *\nThe current temporary cache is required for any update operation and only contains data downloaded during the present aborted session.\nThe former cache might contain more complete information; if you do not want to lose that information, you have to restore it and delete the current cache.\nThis can easily be done here by erasing the hts-cache/new.* files]\n"); } } /* Info for wrappers */ - if ( (httrack.debug>0) && (httrack.log!=NULL) ) { - fspc(httrack.log,"info"); fprintf(httrack.log,"engine: free"LF); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: free"LF); } -#if HTS_ANALYSTE - if (hts_htmlcheck_uninit != NULL) { - hts_htmlcheck_uninit(); - } -#endif + + /* UnInit */ + RUN_CALLBACK_NOARG(opt, uninit); if (httrack_logmode!=1) { - if (httrack.errlog == httrack.log) httrack.errlog=NULL; - if (httrack.log) { fclose(httrack.log); httrack.log=NULL; } - if (httrack.errlog) { fclose(httrack.errlog); httrack.errlog=NULL; } + if (opt->errlog == opt->log) opt->errlog=NULL; + if (opt->log) { fclose(opt->log); opt->log=NULL; } + if (opt->errlog) { fclose(opt->errlog); opt->errlog=NULL; } } // Débuggage des en têtes @@ -2456,12 +2213,9 @@ deprecated - see SIGCHLD freet(x_argvblk); if (x_argv) freet(x_argv); - -#if HTS_WIN -#if HTS_ANALYSTE!=2 -// WSACleanup(); // ** non en cas de thread tjs présent!.. -#endif -#endif + if (url) + freet(url); + #ifdef HTS_TRACE_MALLOC hts_freeall(); #endif @@ -2476,42 +2230,39 @@ deprecated - see SIGCHLD // main() subroutines // vérifier chemin path -int check_path(char* s,char* defaultname) { +int check_path(String* s, char* defaultname) { int i; int return_value=0; // Replace name: ~/mywebsites/# -> /home/foo/mywebsites/# expand_home(s); - for(i=0;i<(int) strlen(s);i++) // conversion \ -> / - if (s[i]=='\\') - s[i]='/'; + for(i = 0 ; i < (int) StringLength(*s) ; i++) // conversion \ -> / + if (StringSub(*s, i) == '\\') + StringSubRW(*s, i) = '/'; // remove ending / - if (strnotempty(s)) - if (s[strlen(s)-1]=='/') - s[strlen(s)-1]='\0'; + if (StringNotEmpty(*s) && StringRight(*s, 1) == '/') + StringPopRight(*s); // Replace name: /home/foo/mywebsites/# -> /home/foo/mywebsites/wonderfulsite - if (strnotempty(s)) { - if (s[(i=strlen(s))-1]=='#') { + if (StringNotEmpty(*s)) { + if (StringRight(*s, 1) == '#') { if (strnotempty((defaultname?defaultname:""))) { - char BIGSTK tempo[HTS_URLMAXSIZE*2]; - char* a=strchr(defaultname,'#'); // we never know.. - if (a) *a='\0'; - tempo[0]='\0'; - strncatbuff(tempo,s,i-1); - strcatbuff(tempo,defaultname); - strcpybuff(s,tempo); - } else - s[0]='\0'; // Clear path (no name/default url given) + char* a = strchr(defaultname,'#'); // we never know.. + if (a) + *a='\0'; + StringPopRight(*s); + StringCat(*s, defaultname); + } else { + StringClear(*s); // Clear path (no name/default url given) + } return_value=1; // expanded } } // ending / - if (strnotempty(s)) - if (s[strlen(s)-1]!='/') // ajouter slash à la fin - strcatbuff(s,"/"); + if (StringNotEmpty(*s) && StringRight(*s, 1) != '/') // ajouter slash à la fin + StringCat(*s, "/"); return return_value; } diff --git a/src/htscoremain.h b/src/htscoremain.h index db781eb..a3a4025 100644 --- a/src/htscoremain.h +++ b/src/htscoremain.h @@ -45,20 +45,18 @@ Please visit our Website: http://www.httrack.com "php2 php3 php4 php cgi asp jsp pl cfm nsf=text/html" #include "htsglobal.h" +#include "htsopt.h" /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE // Main, récupère les paramètres et appelle le robot -#if HTS_ANALYSTE #ifndef HTTRACK_DEFLIB HTSEXT_API int hts_main(int argc, char **argv); -#endif -#else -int main(int argc, char **argv); +HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt); #endif int cmdl_opt(char* s); -int check_path(char* s,char* defaultname); +int check_path(String* s,char* defaultname); #endif diff --git a/src/htsdefines.h b/src/htsdefines.h index 9f1de79..8a915ae 100644 --- a/src/htsdefines.h +++ b/src/htsdefines.h @@ -17,7 +17,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - Important notes: - We hereby ask people using this source NOT to use it in purpose of grabbing @@ -38,78 +37,189 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_DEFINES_DEFH #define HTS_DEFINES_DEFH -typedef void (* t_hts_htmlcheck_init)(void); -typedef void (* t_hts_htmlcheck_uninit)(void); -typedef int (* t_hts_htmlcheck_start)(httrackp* opt); -typedef int (* t_hts_htmlcheck_end)(void); -typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); -typedef int (* t_hts_htmlcheck_process)(char** html,int* len,char* url_adresse,char* url_fichier); -typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); -typedef char* (* t_hts_htmlcheck_query)(char* question); -typedef char* (* t_hts_htmlcheck_query2)(char* question); -typedef char* (* t_hts_htmlcheck_query3)(char* question); -typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); -typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); -typedef int (* t_hts_htmlcheck_check_mime)(char* adr,char* fil,char* mime,int status); -typedef void (* t_hts_htmlcheck_pause)(char* lockfile); -typedef void (* t_hts_htmlcheck_filesave)(char* file); -typedef void (* t_hts_htmlcheck_filesave2)(char* hostname,char* filename,char* localfile,int is_new,int is_modified,int not_updated); -typedef int (* t_hts_htmlcheck_linkdetected)(char* link); -typedef int (* t_hts_htmlcheck_linkdetected2)(char* link, char* tag_start); -typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); -typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); - -/* Library internal definictions */ +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct +#define HTS_DEF_FWSTRUCT_hts_stat_struct +typedef struct hts_stat_struct hts_stat_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsmoduleStruct +#define HTS_DEF_FWSTRUCT_htsmoduleStruct +typedef struct htsmoduleStruct htsmoduleStruct; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_hts_callbackarg +#define HTS_DEF_FWSTRUCT_t_hts_callbackarg +typedef struct t_hts_callbackarg t_hts_callbackarg; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_hts_callbackarg +#define HTS_DEF_FWSTRUCT_t_hts_callbackarg +typedef struct t_hts_callbackarg t_hts_callbackarg; +#endif + +/* External callbacks */ +#ifndef EXTERNAL_FUNCTION +#ifdef _WIN32 +#define EXTERNAL_FUNCTION __declspec(dllexport) +#else +#define EXTERNAL_FUNCTION +#endif +#endif + +/* --wrapper plug function prototype */ + +typedef int (*t_hts_plug)(httrackp *opt, const char* argv); +typedef int (*t_hts_unplug)(httrackp *opt); + +/* htsopt function callbacks definitions */ + +typedef void (* t_hts_htmlcheck_init)(t_hts_callbackarg *carg); +typedef void (* t_hts_htmlcheck_uninit)(t_hts_callbackarg *carg); +typedef int (* t_hts_htmlcheck_start)(t_hts_callbackarg *carg, httrackp* opt); +typedef int (* t_hts_htmlcheck_end)(t_hts_callbackarg *carg, httrackp *opt); +typedef int (* t_hts_htmlcheck_chopt)(t_hts_callbackarg *carg, httrackp* opt); +typedef int (* t_hts_htmlcheck_process)(t_hts_callbackarg *carg, httrackp *opt, + char** html, int* len, const char* url_adresse, const char* url_fichier); +typedef t_hts_htmlcheck_process t_hts_htmlcheck_preprocess; +typedef t_hts_htmlcheck_process t_hts_htmlcheck_postprocess; +typedef int (* t_hts_htmlcheck_check_html)(t_hts_callbackarg *carg, httrackp *opt, + char* html, int len, const char* url_adresse, const char* url_fichier); +typedef const char* (* t_hts_htmlcheck_query)(t_hts_callbackarg *carg, httrackp *opt, + const char* question); +typedef const char* (* t_hts_htmlcheck_query2)(t_hts_callbackarg *carg, httrackp *opt, + const char* question); +typedef const char* (* t_hts_htmlcheck_query3)(t_hts_callbackarg *carg, httrackp *opt, + const char* question); +typedef int (* t_hts_htmlcheck_loop)(t_hts_callbackarg *carg, httrackp *opt, + lien_back* back, int back_max, int back_index, + int lien_tot, int lien_ntot, + int stat_time, hts_stat_struct* stats); +typedef int (* t_hts_htmlcheck_check_link)(t_hts_callbackarg *carg, httrackp *opt, + const char* adr, const char* fil, int status); +typedef int (* t_hts_htmlcheck_check_mime)(t_hts_callbackarg *carg, httrackp *opt, + const char* adr, const char* fil, const char* mime, int status); +typedef void (* t_hts_htmlcheck_pause)(t_hts_callbackarg *carg, httrackp *opt, + const char* lockfile); +typedef void (* t_hts_htmlcheck_filesave)(t_hts_callbackarg *carg, httrackp *opt, + const char* file); +typedef void (* t_hts_htmlcheck_filesave2)(t_hts_callbackarg *carg, httrackp *opt, + const char* hostname, const char* filename, const char* localfile, + int is_new, int is_modified, int not_updated); +typedef int (* t_hts_htmlcheck_linkdetected)(t_hts_callbackarg *carg, httrackp *opt, + char* link); +typedef int (* t_hts_htmlcheck_linkdetected2)(t_hts_callbackarg *carg, httrackp *opt, + char* link, const char* tag_start); +typedef int (* t_hts_htmlcheck_xfrstatus)(t_hts_callbackarg *carg, httrackp *opt, + lien_back* back); +typedef int (* t_hts_htmlcheck_savename)(t_hts_callbackarg *carg, httrackp *opt, + const char* adr_complete, const char* fil_complete, + const char* referer_adr, const char* referer_fil, char* save); +typedef int (* t_hts_htmlcheck_sendhead)(t_hts_callbackarg *carg, httrackp *opt, + char* buff, const char* adr, const char* fil, + const char* referer_adr, const char* referer_fil, + htsblk* outgoing); +typedef int (* t_hts_htmlcheck_receivehead)(t_hts_callbackarg *carg, httrackp *opt, + char* buff, const char* adr, const char* fil, + const char* referer_adr, const char* referer_fil, + htsblk* incoming); + +/* External additional parsing module(s) */ +typedef int (*t_hts_htmlcheck_detect)(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str); +typedef int (*t_hts_htmlcheck_parse)(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str); + +/* Callbacks */ +#ifndef HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +#define HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks; +#endif + +/* Callabck array */ +#define DEFCALLBACK(NAME) \ + struct NAME { \ + t_hts_htmlcheck_ ##NAME fun; \ + t_hts_callbackarg *carg; \ + } NAME + +/* Callback items */ +typedef void* t_hts_htmlcheck_t_hts_htmlcheck_callbacks_item; +typedef DEFCALLBACK(t_hts_htmlcheck_callbacks_item); + +/* Linked list, which should be used for the 'arg' user-defined argument */ +struct t_hts_callbackarg { + /* User-defined agument for the called function */ + void *userdef; + + /* Previous function, if any (fun != NULL) */ + struct prev { + void *fun; + t_hts_callbackarg *carg; + } prev; +}; + +/* Callback structure */ +struct t_hts_htmlcheck_callbacks { + /* v3.41 */ + DEFCALLBACK(init); + DEFCALLBACK(uninit); + DEFCALLBACK(start); + DEFCALLBACK(end); + DEFCALLBACK(chopt); + DEFCALLBACK(preprocess); + DEFCALLBACK(postprocess); + DEFCALLBACK(check_html); + DEFCALLBACK(query); + DEFCALLBACK(query2); + DEFCALLBACK(query3); + DEFCALLBACK(loop); + DEFCALLBACK(check_link); + DEFCALLBACK(check_mime); + DEFCALLBACK(pause); + DEFCALLBACK(filesave); + DEFCALLBACK(filesave2); + DEFCALLBACK(linkdetected); + DEFCALLBACK(linkdetected2); + DEFCALLBACK(xfrstatus); + DEFCALLBACK(savename); + DEFCALLBACK(sendhead); + DEFCALLBACK(receivehead); + DEFCALLBACK(detect); + DEFCALLBACK(parse); + /* >3.41 */ +}; + +/* Library internal definitions */ #ifdef HTS_INTERNAL_BYTECODE -// demande d'interaction avec le shell -#if HTS_ANALYSTE -extern char HTbuff[2048]; -extern t_hts_htmlcheck_init hts_htmlcheck_init; -extern t_hts_htmlcheck_uninit hts_htmlcheck_uninit; -extern t_hts_htmlcheck_start hts_htmlcheck_start; -extern t_hts_htmlcheck_end hts_htmlcheck_end; -extern t_hts_htmlcheck_chopt hts_htmlcheck_chopt; -extern t_hts_htmlcheck_process hts_htmlcheck_preprocess; -extern t_hts_htmlcheck_process hts_htmlcheck_postprocess; -extern t_hts_htmlcheck hts_htmlcheck; -extern t_hts_htmlcheck_query hts_htmlcheck_query; -extern t_hts_htmlcheck_query2 hts_htmlcheck_query2; -extern t_hts_htmlcheck_query3 hts_htmlcheck_query3; -extern t_hts_htmlcheck_loop hts_htmlcheck_loop; -extern t_hts_htmlcheck_check hts_htmlcheck_check; -extern t_hts_htmlcheck_check_mime hts_htmlcheck_check_mime; -extern t_hts_htmlcheck_pause hts_htmlcheck_pause; -extern t_hts_htmlcheck_filesave hts_htmlcheck_filesave; -extern t_hts_htmlcheck_filesave2 hts_htmlcheck_filesave2; -extern t_hts_htmlcheck_linkdetected hts_htmlcheck_linkdetected; -extern t_hts_htmlcheck_linkdetected2 hts_htmlcheck_linkdetected2; -extern t_hts_htmlcheck_xfrstatus hts_htmlcheck_xfrstatus; -extern t_hts_htmlcheck_savename hts_htmlcheck_savename; -extern t_hts_htmlcheck_sendhead hts_htmlcheck_sendhead; -extern t_hts_htmlcheck_receivehead hts_htmlcheck_receivehead; + +#ifndef HTS_DEF_FWSTRUCT_t_hts_callback_ref +#define HTS_DEF_FWSTRUCT_t_hts_callback_ref +typedef struct t_hts_callback_ref t_hts_callback_ref; #endif +struct t_hts_callback_ref { + const char *name; + size_t offset; +}; + +extern const t_hts_htmlcheck_callbacks default_callbacks; +extern const t_hts_callback_ref default_callbacks_ref[]; -#if HTS_ANALYSTE -#define HT_PRINT(A) strcatbuff(HTbuff,A); -#define HT_REQUEST_START HTbuff[0]='\0'; +#define HT_PRINT(A) strcatbuff(opt->state.HTbuff,A); +#define HT_REQUEST_START opt->state.HTbuff[0]='\0'; #define HT_REQUEST_END -#define HTT_REQUEST_START HTbuff[0]='\0'; +#define HTT_REQUEST_START opt->state.HTbuff[0]='\0'; #define HTT_REQUEST_END -#define HTS_REQUEST_START HTbuff[0]='\0'; +#define HTS_REQUEST_START opt->state.HTbuff[0]='\0'; #define HTS_REQUEST_END -#define HTS_PANIC_PRINTF(S) strcpybuff(_hts_errmsg,S); -#else -#define HT_PRINT(A) printf("%s",A); -#define HT_REQUEST_START /*printf("§\n");*/ -#define HT_REQUEST_END /*printf("§\n");*/ -#define HTT_REQUEST_START /*if (httrack.shell) printf("§\n");*/ -#define HTT_REQUEST_END /*if (httrack.shell) printf("§\n");*/ -#define HTS_REQUEST_START if (opt->shell) { HT_REQUEST_START } -#define HTS_REQUEST_END if (opt->shell) { HT_REQUEST_END } -#define HTS_PANIC_PRINTF(S) printf("%s\n",S); -#endif +#define HTS_PANIC_PRINTF(S) strcpybuff(opt->state._hts_errmsg,S); #endif diff --git a/src/htsfilters.c b/src/htsfilters.c index cd7abdd..d78848f 100644 --- a/src/htsfilters.c +++ b/src/htsfilters.c @@ -265,7 +265,7 @@ HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag) { // tester i=0; if (!unique) - max=strlen(chaine); + max = (int) strlen(chaine); else /* *(a) only match a (not aaaaa) */ max=1; while(i<(int) max) { diff --git a/src/htsfilters.h b/src/htsfilters.h index a1ba329..6526400 100644 --- a/src/htsfilters.h +++ b/src/htsfilters.h @@ -40,10 +40,11 @@ Please visit our Website: http://www.httrack.com #ifndef HTSFILT_DEFH #define HTSFILT_DEFH -#include "htsbase.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +#include "htsbase.h" + int fa_strjoker(int type,char** filters,int nfil,char* nom,LLint* size,int* size_flag,int* depth); HTS_INLINE char* strjoker(char* chaine,char* joker,LLint* size,int* size_flag); char* strjokerfind(char* chaine,char* joker); diff --git a/src/htsftp.c b/src/htsftp.c index 1084558..e8797ca 100644 --- a/src/htsftp.c +++ b/src/htsftp.c @@ -42,17 +42,15 @@ Please visit our Website: http://www.httrack.com #include "htsftp.h" -#include "htsglobal.h" -#include "htsbase.h" -#include "htsnet.h" +#include "htscore.h" #include "htsthread.h" -#if HTS_WIN +#ifdef _WIN32 #else //inet_ntoa #include <arpa/inet.h> #endif -#if HTS_WIN +#ifdef _WIN32 #ifndef __cplusplus // DOS #ifndef _WIN32_WCE @@ -72,17 +70,18 @@ Please visit our Website: http://www.httrack.com #define FTP_DEBUG 0 //#define FORK_DEBUG 0 -#define FTP_STATUS_READY 1001 - #if USE_BEGINTHREAD -PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ) { - lien_back* back=(lien_back*) pP; - if (back == NULL) { +void back_launch_ftp( void* pP ) { + FTPDownloadStruct *pStruct = (FTPDownloadStruct*)pP; + if (pStruct == NULL) + return ; + + if (pStruct == NULL) { #if FTP_DEBUG printf("[ftp error: no args]\n"); #endif - return PTHREAD_RETURN; + return ; } /* Initialize */ @@ -92,72 +91,28 @@ PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ) { #if FTP_DEBUG printf("[Launching main ftp routine]\n"); #endif - run_launch_ftp(back); + run_launch_ftp(pStruct); // prêt - back->status=FTP_STATUS_READY; - + pStruct->pBack->status=STATUS_FTP_READY; + + /* Delete structure */ + free(pP); + /* Uninitialize */ hts_uninit(); - return PTHREAD_RETURN; + return ; } // lancer en back -void launch_ftp(lien_back* back) { +void launch_ftp(FTPDownloadStruct* params) { // DOS #if FTP_DEBUG printf("[Launching main ftp thread]\n"); #endif - (void)hts_newthread(back_launch_ftp, 0, (void*) back); + hts_newthread(back_launch_ftp, (void*) params); } #else -// Unix sans pthread -int back_launch_ftp(lien_back* back) { - // lancer ftp - run_launch_ftp(back); - // prêt - back->status=FTP_STATUS_READY; - return 0; -} -void launch_ftp(lien_back* back,char* path,char* exec) { - FILE* fp = fopen(fconv(path),"wb"); - if (fp) { - char _args[8][256]; - char *args[8]; - fclose(fp); fp=NULL; - - strcpybuff(_args[0],exec); - strcpybuff(_args[1],"-#R"); - strcpybuff(_args[2],back->url_adr); - strcpybuff(_args[3],back->url_fil); - strcpybuff(_args[4],back->url_sav); - strcpybuff(_args[5],path); - //strcpybuff(_args[6],""); - args[0]=_args[0]; - args[1]=_args[1]; - args[2]=_args[2]; - args[3]=_args[3]; - args[4]=_args[4]; - args[5]=_args[5]; - args[6]=NULL; - switch (fork()) { // note: vfork déconne un max' - case -1: printf("Can not vfork() process\n"); break; - case 0: - if (execvp(args[0],args)==-1) { - fp=fopen(fconv(path),"wb"); - if (fp) { - fprintf(fp,"-1 unable to launch %s",args[0]); - fclose(fp); fp=NULL; - rename(path,concat(path,".ok")); - } else remove(path); - } - _exit(0); // exit 'propre' - break; - default: // parent - // bah on fait rien.. - break; - } - } -} +#error No more supported #endif // pour l'arrêt du ftp @@ -178,7 +133,9 @@ void launch_ftp(lien_back* back,char* path,char* exec) { } // la véritable fonction une fois lancées les routines thread/fork -int run_launch_ftp(lien_back* back) { +int run_launch_ftp(FTPDownloadStruct *pStruct) { + lien_back* back = pStruct->pBack; + httrackp *opt = pStruct->pOpt; char user[256]="anonymous"; char pass[256]="user@"; char line_retr[2048]; @@ -253,7 +210,8 @@ int run_launch_ftp(lien_back* back) { #endif ftp_filename=a; if (strnotempty(a)) { - char* ua=unescape_http(a); + char catbuff[CATBUFF_SIZE]; + char* ua=unescape_http(catbuff,a); int len_a = (int) strlen(ua); if (len_a > 0 && ua[len_a -1] == '/') { /* obviously a directory listing */ transfer_list=1; @@ -275,7 +233,7 @@ int run_launch_ftp(lien_back* back) { } } else { strcpybuff(back->r.msg,"Unexpected PORT error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } @@ -306,10 +264,10 @@ int run_launch_ftp(lien_back* back) { // récupérer adresse résolue strcpybuff(back->info,"host name"); - hp = hts_gethostbyname(_adr, &fullhostent_buffer); + hp = hts_gethostbyname(opt,_adr, &fullhostent_buffer); if (hp == NULL) { strcpybuff(back->r.msg,"Unable to get server's address"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_NON_FATAL; _HALT_FTP return 0; @@ -323,10 +281,10 @@ int run_launch_ftp(lien_back* back) { // memcpy(&server.sin_addr, hp->h_addr, hp->h_length); // créer ("attachement") une socket (point d'accès) internet,en flot - soc_ctl=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + soc_ctl = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_ctl==INVALID_SOCKET) { strcpybuff(back->r.msg,"Unable to create a socket"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; _HALT_FTP return 0; @@ -338,17 +296,17 @@ int run_launch_ftp(lien_back* back) { // connexion (bloquante, on est en thread) strcpybuff(back->info,"connect"); -#if HTS_WIN +#ifdef _WIN32 if (connect(soc_ctl, (const struct sockaddr FAR *)&server, server_size) != 0) { #else if (connect(soc_ctl, (struct sockaddr *)&server, server_size) == -1) { #endif strcpybuff(back->r.msg,"Unable to connect to the server"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; _HALT_FTP return 0; -#if HTS_WIN +#ifdef _WIN32 } #else } @@ -384,7 +342,7 @@ int run_launch_ftp(lien_back* back) { // ok } else { strcpybuff(back->r.msg,"TYPE I error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } #if 0 @@ -411,34 +369,34 @@ int run_launch_ftp(lien_back* back) { // ok.. } else { strcpybuff(back->r.msg,"TYPE I error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"CWD error: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { strcpybuff(back->r.msg,"Unexpected ftp error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } #endif } else { sprintf(back->r.msg,"Bad password: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"Bad user name: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"Connection refused: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } @@ -497,7 +455,7 @@ int run_launch_ftp(lien_back* back) { // -- fin analyse de l'adresse IP et du port -- } else { sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { @@ -529,12 +487,12 @@ int run_launch_ftp(lien_back* back) { } } else { sprintf(back->r.msg,"EPSV incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"PASV/EPSV error: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } @@ -548,7 +506,8 @@ int run_launch_ftp(lien_back* back) { // SIZE if (back->r.statuscode != -1) { if (!transfer_list) { - char* ua=unescape_http(ftp_filename); + char catbuff[CATBUFF_SIZE]; + char* ua=unescape_http(catbuff,ftp_filename); if ( (strchr(ua, ' ')) || @@ -611,7 +570,7 @@ int run_launch_ftp(lien_back* back) { // résoudre if (adr_ip[0]) { - hp = hts_gethostbyname(adr_ip, &fullhostent_buffer); + hp = hts_gethostbyname(opt,adr_ip, &fullhostent_buffer); if (hp) { SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); } else { @@ -629,12 +588,12 @@ int run_launch_ftp(lien_back* back) { #endif if (server_size > 0) { // socket - soc_dat=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + soc_dat = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (soc_dat != INVALID_SOCKET) { // structure: connexion au domaine internet, port 80 (ou autre) SOCaddr_initport(server, port_pasv); // server.sin_port = htons((unsigned short int) port_pasv); -#if HTS_WIN +#ifdef _WIN32 if (connect(soc_dat, (const struct sockaddr FAR *)&server, server_size) == 0) { #else if (connect(soc_dat, (struct sockaddr *)&server, server_size) != -1) { @@ -650,7 +609,7 @@ int run_launch_ftp(lien_back* back) { deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // sprintf(back->r.msg,"RETR command errror: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { @@ -660,22 +619,22 @@ int run_launch_ftp(lien_back* back) { deletesoc(soc_dat); soc_dat=INVALID_SOCKET; // strcpybuff(back->r.msg,"Unable to connect"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { strcpybuff(back->r.msg,"Unable to create a socket"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { sprintf(back->r.msg,"Unable to resolve IP %s",adr_ip); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts } else { sprintf(back->r.msg,"PASV incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } // sinon on est prêts #else @@ -698,27 +657,27 @@ int run_launch_ftp(lien_back* back) { int dummylen = sizeof(struct sockaddr); if ( (soc_dat=accept(soc_servdat,&dummyaddr,&dummylen)) == INVALID_SOCKET) { strcpybuff(back->r.msg,"Unable to accept connection"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"RETR command errror: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { sprintf(back->r.msg,"PORT command error: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_servdat); #else close(soc_servdat); #endif } else { strcpybuff(back->r.msg,"Unable to listen to a port"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } #endif @@ -728,11 +687,11 @@ int run_launch_ftp(lien_back* back) { // if (soc_dat != INVALID_SOCKET) { if (rest_understood) { // REST envoyée et comprise - file_notify(back->url_adr, back->url_fil, back->url_sav, 0, 1, 0); - back->r.fp = fileappend(back->url_sav); + file_notify(opt, back->url_adr, back->url_fil, back->url_sav, 0, 1, 0); + back->r.fp = fileappend(&opt->state.strc, back->url_sav); } else { - file_notify(back->url_adr, back->url_fil, back->url_sav, 1, 1, 0); - back->r.fp = filecreate(back->url_sav); + file_notify(opt, back->url_adr, back->url_fil, back->url_sav, 1, 1, 0); + back->r.fp = filecreate(&opt->state.strc, back->url_sav); } strcpybuff(back->info,"receiving"); if (back->r.fp != NULL) { @@ -747,13 +706,13 @@ int run_launch_ftp(lien_back* back) { switch(wait_socket_receive(soc_dat,timeout)) { case -1: strcpybuff(back->r.msg,"FTP read error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; len=0; // fin break; case 0: sprintf(back->r.msg,"Time out (%d)",timeout); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; len=0; // fin break; @@ -774,17 +733,17 @@ int run_launch_ftp(lien_back* back) { } */ strcpybuff(back->r.msg,"Write error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; len=0; // error } } else { strcpybuff(back->r.msg,"Unexpected write error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { // Erreur ou terminé - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=0; if (back->r.totalsize > 0 && back->r.size != back->r.totalsize) { back->r.statuscode=STATUSCODE_INVALID; @@ -801,10 +760,10 @@ int run_launch_ftp(lien_back* back) { } } else { strcpybuff(back->r.msg,"Unable to write file"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_dat); #else close(soc_dat); @@ -817,16 +776,16 @@ int run_launch_ftp(lien_back* back) { get_ftp_line(soc_ctl,line,timeout); if (line[0]=='2') { // OK strcpybuff(back->r.msg,"OK"); - // back->status=FTP_STATUS_READY; // fini - back->r.statuscode=200; + // back->status=STATUS_FTP_READY; // fini + back->r.statuscode=HTTP_OK; } else { sprintf(back->r.msg,"RETR incorrect: %s",linejmp(line)); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } else { strcpybuff(back->r.msg,"FTP read error"); - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; } } @@ -844,7 +803,7 @@ int run_launch_ftp(lien_back* back) { strcpybuff(back->info,"quit"); send_line(soc_ctl,"QUIT"); // bye bye get_ftp_line(soc_ctl,NULL,timeout); -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_ctl); #else close(soc_ctl); @@ -852,10 +811,10 @@ int run_launch_ftp(lien_back* back) { } if (back->r.statuscode!=-1) { - back->r.statuscode=200; + back->r.statuscode=HTTP_OK; strcpybuff(back->r.msg,"OK"); } - // back->status=FTP_STATUS_READY; // fini + // back->status=STATUS_FTP_READY; // fini return 0; } @@ -881,7 +840,7 @@ T_SOC get_datasocket(char* to_send) { // copie adresse SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length); - if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) { SOCaddr server2; @@ -923,7 +882,7 @@ T_SOC get_datasocket(char* to_send) { #endif } else { -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -933,7 +892,7 @@ T_SOC get_datasocket(char* to_send) { } else { -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -943,7 +902,7 @@ T_SOC get_datasocket(char* to_send) { } else { -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -991,7 +950,7 @@ int send_line(T_SOC soc,char* data) { return r; } #else - return (send(soc,line,strlen(line),0) == (int) strlen(line)); + return (send(soc,line,(int)strlen(line),0) == (int) strlen(line)); #endif } @@ -1140,8 +1099,8 @@ int wait_socket_receive(T_SOC soc,int timeout) { // cancel reçu? int stop_ftp(lien_back* back) { if (back->stop_ftp) { - strcpybuff(back->r.msg,"Cancelled by User"); - // back->status=FTP_STATUS_READY; // fini + strcpybuff(back->r.msg, "Cancelled by User"); + // back->status=STATUS_FTP_READY; // fini back->r.statuscode=STATUSCODE_INVALID; return 1; } diff --git a/src/htsftp.h b/src/htsftp.h index 08ab784..1899164 100644 --- a/src/htsftp.h +++ b/src/htsftp.h @@ -42,20 +42,37 @@ Please visit our Website: http://www.httrack.com #include "htsbasenet.h" #include "htsthread.h" -// lien_back -#include "htscore.h" +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif + +/* Download structure */ +#ifndef HTS_DEF_FWSTRUCT_FTPDownloadStruct +#define HTS_DEF_FWSTRUCT_FTPDownloadStruct +typedef struct FTPDownloadStruct FTPDownloadStruct; +#endif +struct FTPDownloadStruct { + lien_back *pBack; + httrackp *pOpt; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE #if USE_BEGINTHREAD -void launch_ftp(lien_back* back); -PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_ftp( void* pP ); +void launch_ftp(FTPDownloadStruct *params); +void back_launch_ftp( void* pP ); #else -void launch_ftp(lien_back* back,char* path,char* exec); -int back_launch_ftp(lien_back* back); +void launch_ftp(FTPDownloadStruct *params,char* path,char* exec); +int back_launch_ftp(FTPDownloadStruct *params); #endif -int run_launch_ftp(lien_back* back); +int run_launch_ftp(FTPDownloadStruct *params); int send_line(T_SOC soc,char* data); int get_ftp_line(T_SOC soc,char* line,int timeout); T_SOC get_datasocket(char* to_send); diff --git a/src/htsglobal.h b/src/htsglobal.h index eef3ab3..dc39198 100644 --- a/src/htsglobal.h +++ b/src/htsglobal.h @@ -40,10 +40,10 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_GLOBAL_DEFH // Version -#define HTTRACK_VERSION "3.40-2" -#define HTTRACK_VERSIONID "3.40.4" +#define HTTRACK_VERSION "3.41" +#define HTTRACK_VERSIONID "3.41.20" #define HTTRACK_AFF_VERSION "3.x" -//#define HTTRACK_AFF_WARNING "This is a BETA release of WinHTTrack Website Copier ("HTTRACK_VERSION")\nPlease report any crashes, bugs or problems" +#define HTTRACK_LIB_VERSION "2.0" #ifndef HTS_NOINCLUDES #ifndef _WIN32_WCE @@ -61,7 +61,6 @@ Please visit our Website: http://www.httrack.com #endif // Définition plate-forme -#include "htssystem.h" #include "htsconfig.h" // WIN32 types @@ -90,6 +89,7 @@ Please visit our Website: http://www.httrack.com #endif #ifndef S_ISREG #define S_ISREG(m) ((m) & _S_IFREG) +#define S_ISDIR(m) ((m) & _S_IFDIR) #endif #else @@ -164,21 +164,6 @@ Please visit our Website: http://www.httrack.com #endif -// Socket windows ou socket unix -#ifdef _WIN32 -#undef HTS_PLATFORM -#define HTS_PLATFORM 1 -#define HTS_WIN 1 - -#else - -#define HTS_WIN 0 -#ifdef __linux -#undef HTS_PLATFORM -#define HTS_PLATFORM 3 -#endif -#endif - // don't spare memory usage by default #ifndef HTS_SPARE_MEMORY #define HTS_SPARE_MEMORY 0 @@ -189,7 +174,7 @@ Please visit our Website: http://www.httrack.com #endif // compatibilité DOS -#if HTS_WIN +#ifdef _WIN32 #define HTS_DOSNAME 1 #else #define HTS_DOSNAME 0 @@ -224,30 +209,13 @@ Please visit our Website: http://www.httrack.com #define HTS_USESWF 1 #endif -#if HTS_WIN +#ifdef _WIN32 #else #define __cdecl #endif -#ifdef HTS_ANALYSTE_CONSOLE -#undef HTS_ANALYSTE_CONSOLE -#define HTS_ANALYSTE_CONSOLE 1 -#endif - -#if HTS_ANALYSTE -#else -#if HTS_WIN -#else -#undef HTS_ANALYSTE -// Analyste -#define HTS_ANALYSTE 1 -#define HTS_ANALYSTE_CONSOLE 1 -#endif -#endif - - /* rc file */ -#if HTS_WIN +#ifdef _WIN32 #define HTS_HTTRACKRC "httrackrc" #else @@ -292,14 +260,14 @@ Please visit our Website: http://www.httrack.com #endif /* Copyright (C) Xavier Roche and other contributors */ -#define HTTRACK_AFF_AUTHORS "[XR&CO'2006]" +#define HTTRACK_AFF_AUTHORS "[XR&CO'2007]" #define HTS_DEFAULT_FOOTER "<!-- Mirrored from %s%s by HTTrack Website Copier/"HTTRACK_AFF_VERSION" "HTTRACK_AFF_AUTHORS", %s -->" #define HTTRACK_WEB "http://www.httrack.com" #define HTS_UPDATE_WEBSITE "http://www.httrack.com/update.php3?Product=HTTrack&Version="HTTRACK_VERSIONID"&VersionStr="HTTRACK_VERSION"&Platform=%d&Language=%s" #define H_CRLF "\x0d\x0a" #define CRLF "\x0d\x0a" -#if HTS_WIN +#ifdef _WIN32 #define LF "\x0d\x0a" #else #define LF "\x0a" @@ -350,22 +318,23 @@ Please visit our Website: http://www.httrack.com typedef LLINT_TYPE TStamp; #define LLintP LLINT_FORMAT #else - #if HTS_WIN - typedef __int64 LLint; - typedef __int64 TStamp; - #define LLintP "%I64d" - #else - #if HTS_PLATFORM==0 + +#ifdef _WIN32 + typedef __int64 LLint; + typedef __int64 TStamp; + #define LLintP "%I64d" +#elif (defined(__x86_64__) || defined(_LP64) || defined(__64BIT__)) + typedef unsigned long int LLint; + typedef unsigned long int TStamp; + #define LLintP "%ld" +#else typedef long long int LLint; typedef long long int TStamp; #define LLintP "%lld" - #else - typedef long long int LLint; - typedef long long int TStamp; - #define LLintP "%Ld" - #endif - #endif #endif + +#endif /* HTS_LONGLONG */ + #else typedef int LLint; #define LLintP "%d" @@ -383,6 +352,16 @@ typedef int INTsys; #define INTsysP "%d" #endif +#ifdef _WIN32 +#if defined(_WIN64) +typedef unsigned __int64 T_SOC; +#else +typedef unsigned __int32 T_SOC; +#endif +#else +typedef int T_SOC; +#endif + /* Default alignement */ #ifndef HTS_ALIGN #define HTS_ALIGN (sizeof(void*)) @@ -391,7 +370,7 @@ typedef int INTsys; /* IPV4, IPV6 and various unified structures */ #define HTS_MAXADDRLEN 64 -#if HTS_WIN +#ifdef _WIN32 #else #define __cdecl #endif @@ -440,43 +419,26 @@ typedef int INTsys; #define TAILLE_BUFFER 8192 #endif -#if HTS_WIN -#else -// use pthreads.h - -#ifndef THREADS -#define HTS_DO_NOT_USE_PTHREAD -#endif - #ifdef HTS_DO_NOT_USE_PTHREAD -#define USE_PTHREAD 0 -#else -#define USE_PTHREAD 1 +#error needs threads support #endif -#endif - -#if HTS_WIN #define USE_BEGINTHREAD 1 -#else -#if USE_PTHREAD -#define USE_BEGINTHREAD 1 -#else -/* sh*t.. */ -#define USE_BEGINTHREAD 0 -#endif -#endif #ifdef _DEBUG // trace mallocs //#define HTS_TRACE_MALLOC #ifdef HTS_TRACE_MALLOC typedef unsigned long int t_htsboundary; -typedef struct mlink { +#ifndef HTS_DEF_FWSTRUCT_mlink +#define HTS_DEF_FWSTRUCT_mlink +typedef struct mlink mlink; +#endif +struct mlink { char* adr; int len; int id; struct mlink* next; -} mlink; +}; static const t_htsboundary htsboundary = 0xDEADBEEF; #endif #endif diff --git a/src/htshash.c b/src/htshash.c index 67d34d0..cf6b3ec 100644 --- a/src/htshash.c +++ b/src/htshash.c @@ -42,8 +42,10 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ #include "htsbase.h" +#include "htsopt.h" #include "htsglobal.h" #include "htsmd5.h" +#include "htscore.h" /* END specific definitions */ /* Specific macros */ @@ -63,6 +65,7 @@ Please visit our Website: http://www.httrack.com // retour: position ou -1 si non trouvé int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { char BIGSTK normfil_[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; char* normfil; char* normadr; unsigned int cle; @@ -71,7 +74,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { if (type) cle = hash_cle(nom1,nom2); else - cle = hash_cle(convtolower(nom1),nom2); // case insensitive + cle = hash_cle(convtolower(catbuff,nom1),nom2); // case insensitive // la position se calcule en modulant pos = (int) (cle%HTS_HASH_SIZE); // entrée trouvée? @@ -199,6 +202,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { // enregistrement lien lpos dans les 3 tables hash1..3 void hash_write(hash_struct* hash,int lpos,int normalized) { char BIGSTK normfil_[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; char* normfil; unsigned int cle; int pos; @@ -212,7 +216,7 @@ void hash_write(hash_struct* hash,int lpos,int normalized) { // élément actuel sur -1 (fin de chaine) hash->liens[lpos]->hash_next[0]=hash->liens[lpos]->hash_next[1]=hash->liens[lpos]->hash_next[2]=-1; // - cle = hash_cle(convtolower(hash->liens[lpos]->sav),""); // CASE INSENSITIVE + cle = hash_cle(convtolower(catbuff,hash->liens[lpos]->sav),""); // CASE INSENSITIVE pos = (int) (cle%HTS_HASH_SIZE); ptr = hash_calc_chaine(hash,0,pos); // calculer adresse chaine *ptr = lpos; // noter dernier enregistré diff --git a/src/htshash.h b/src/htshash.h index 43b5003..15f111e 100644 --- a/src/htshash.h +++ b/src/htshash.h @@ -40,11 +40,16 @@ Please visit our Website: http://www.httrack.com #ifndef HTSHASH_DEFH #define HTSHASH_DEFH -#include "htscore.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -// tables de hashage + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif + +// tables de hachage int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized); void hash_write(hash_struct* hash,int lpos,int normalized); int* hash_calc_chaine(hash_struct* hash,int type,int pos); diff --git a/src/htshelp.c b/src/htshelp.c index d1557ac..1aa0945 100644 --- a/src/htshelp.c +++ b/src/htshelp.c @@ -46,7 +46,8 @@ Please visit our Website: http://www.httrack.com #include "htscatchurl.h" #include "htslib.h" #include "htsalias.h" -#if HTS_WIN +#include "htsmodules.h" +#ifdef _WIN32 #else #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -83,7 +84,7 @@ void infomsg(char* msg) { while(cmd[p]==' ') p++; sscanf(msg+p,"%s",cmd+strlen(cmd)); /* clears cN -> c */ - if ((p=strlen(cmd))>2) + if ((p = (int) strlen(cmd))>2) if (cmd[p-1]=='N') cmd[p-1]='\0'; /* finds alias (if any) */ @@ -141,7 +142,7 @@ void help_wizard(httrackp* opt) { // printf("\n"); - printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", WHAT_is_available); + printf("Welcome to HTTrack Website Copier (Offline Browser) "HTTRACK_VERSION"%s\n", hts_get_version_info(opt)); printf("Copyright (C) Xavier Roche and other contributors\n"); #ifdef _WIN32 printf("Note: You are running the commandline version,\n"); @@ -279,11 +280,7 @@ void help_wizard(httrackp* opt) { } i++; } -#if HTS_ANALYSTE hts_main(argc,argv); -#else - main(argc,argv); -#endif } //} else { // help("httrack",1); @@ -333,7 +330,7 @@ int help_query(char* list,int def) { } // Capture d'URL -void help_catchurl(char* dest_path) { +void help_catchurl(const char* dest_path) { char BIGSTK adr_prox[HTS_URLMAXSIZE*2]; int port_prox; T_SOC soc=catch_url_init_std(&port_prox,adr_prox); @@ -401,7 +398,7 @@ void help(char* app,int more) { if (more) infomsg("1"); if (more != 2) { - sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available); + sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", hts_is_available()); infomsg(info); #ifdef HTTRACK_AFF_WARNING infomsg("NOTE: "HTTRACK_AFF_WARNING); @@ -413,7 +410,7 @@ void help(char* app,int more) { } infomsg("General options:"); infomsg(" O path for mirror/logfiles+cache (-O path_mirror[,path_cache_and_logfiles])"); -#ifndef HTS_WIN +#ifndef _WIN32 infomsg(" %O chroot path to, must be r00t (-%O root_path)"); #endif infomsg(""); @@ -475,7 +472,7 @@ void help(char* app,int more) { infomsg("Spider options:"); infomsg(" bN accept cookies in cookies.txt (0=do not accept,* 1=accept)"); infomsg(" u check document type if unknown (cgi,asp..) (u0 don't check, * u1 check but /, u2 check always)"); - infomsg(" j *parse Java Classes (j0 don't parse)"); + infomsg(" j *parse Java Classes (j0 don't parse, bitmask: |1 parse default, |2 don't parse .class |4 don't parse .js |8 don't be aggressive)"); infomsg(" sN follow robots.txt and meta robots tags (0=never,1=sometimes,* 2=always, 3=always (even strict rules))"); infomsg(" %h force HTTP/1.0 requests (reduce update features, only for old servers or proxies)"); infomsg(" %k use keep-alive if possible, greately reducing latency for small files and test requests (%k0 don't use)"); @@ -486,6 +483,7 @@ void help(char* app,int more) { infomsg(" shortcut: '--assume standard' is equivalent to -%A "HTS_ASSUME_STANDARD); infomsg(" can also be used to force a specific file type: --assume foo.cgi=text/html"); infomsg(" @iN internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only)"); + infomsg(" %w disable a specific external mime module (-%w htsswf -%w htsjava)"); infomsg(""); infomsg("Browser ID:"); infomsg(" F user-agent field sent in HTTP headers (-F \"user-agent name\")"); @@ -557,7 +555,7 @@ void help(char* app,int more) { infomsg("Command-line specific options:"); infomsg(" V execute system command after each files ($0 is the filename: -V \"rm \\$0\")"); infomsg(" %U run the engine with another id when called as root (-%U smith)"); - infomsg(" %W use an external library function as a wrapper (-%W link-detected=foo.so:myfunction[,myparameters])"); + infomsg(" %W use an external library function as a wrapper (-%W myfoo.so[,myparameters])"); /* infomsg(" %O do a chroot before setuid"); */ infomsg(""); infomsg("Details: Option N"); @@ -627,28 +625,7 @@ void help(char* app,int more) { infomsg("--http10 force http/1.0 requests (-%h)"); infomsg(""); infomsg("Details: Option %W: External callbacks prototypes"); - infomsg("'init' : void (* myfunction)(void);"); - infomsg("'free' : void (* myfunction)(void);"); - infomsg("'start' : int (* myfunction)(httrackp* opt);"); - infomsg("'end' : int (* myfunction)(void);"); - infomsg("'change-options' : int (* myfunction)(httrackp* opt);"); - infomsg("'preprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);"); - infomsg("'postprocess-html' : int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);"); - infomsg("'check-html' : int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);"); - infomsg("'query' : char* (* myfunction)(char* question);"); - infomsg("'query2' : char* (* myfunction)(char* question);"); - infomsg("'query3' : char* (* myfunction)(char* question);"); - infomsg("'loop' : int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);"); - infomsg("'check-link' : int (* myfunction)(char* adr,char* fil,int status);"); - infomsg("'pause' : void (* myfunction)(char* lockfile);"); - infomsg("'save-file' : void (* myfunction)(char* file);"); - infomsg("'save-file2' : void (* myfunction)(char* hostname,char* filename,char* localfile,int is_new,int is_modified);"); - infomsg("'link-detected' : int (* myfunction)(char* link);"); - infomsg("'link-detected2' : int (* myfunction)(char* link, char* start_tag);"); - infomsg("'transfer-status' : int (* myfunction)(lien_back* back);"); - infomsg("'save-name' : int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);"); - infomsg("And <wrappername>_init() functions if defined, called upon plug"); - infomsg(""); + infomsg("see htsdefines.h"); infomsg(""); infomsg("example: httrack www.someweb.com/bob/"); infomsg("means: mirror site www.someweb.com/bob/ and only this site"); @@ -671,7 +648,7 @@ void help(char* app,int more) { infomsg("example: httrack --continue"); infomsg("continues a mirror in the current folder"); infomsg(""); - sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", WHAT_is_available); + sprintf(info, "HTTrack version "HTTRACK_VERSION"%s (compiled "__DATE__")", hts_is_available()); infomsg(info); infomsg("Copyright (C) Xavier Roche and other contributors"); #ifdef HTS_PLATFORM_NAME diff --git a/src/htshelp.h b/src/htshelp.h index 67354c7..1ec16e2 100644 --- a/src/htshelp.h +++ b/src/htshelp.h @@ -40,17 +40,22 @@ Please visit our Website: http://www.httrack.com #ifndef HTSHELP_DEFH #define HTSHELP_DEFH -#include "htsglobal.h" -#include "htscore.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif + void infomsg(char* msg); void help(char* app,int more); void make_empty_index(char* str); void help_wizard(httrackp* opt); int help_query(char* list,int def); -void help_catchurl(char* dest_path); +void help_catchurl(const char* dest_path); + #endif #endif diff --git a/src/htsindex.c b/src/htsindex.c index 0546b2f..4a7bd67 100644 --- a/src/htsindex.c +++ b/src/htsindex.c @@ -144,6 +144,7 @@ void index_init(const char* indexpath) { */ int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) { #if HTS_MAKE_KEYWORD_INDEX + char catbuff[CATBUFF_SIZE]; int intag=0,inscript=0,incomment=0; char keyword[KEYW_LEN+32]; int i=0; @@ -165,8 +166,8 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* // Init ? if (hts_index_init) { - remove(concat(indexpath,"index.txt")); - remove(concat(indexpath,"sindex.html")); + remove(concat(catbuff,indexpath,"index.txt")); + remove(concat(catbuff,indexpath,"sindex.html")); hts_index_init=0; } @@ -236,7 +237,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* if ( (!inscript) && (!incomment) && (!intag) ) { char cchar=html_data[i]; int pos; - int len=strlen(keyword); + int len = (int) strlen(keyword); // Replace (ignore case, and so on..) if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0) @@ -261,7 +262,7 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* /* Strip ending . and so */ { int ok=0; - while((len=strlen(keyword)) && (!ok)) { + while((len = (int) strlen(keyword)) && (!ok)) { if (strchr(KEYW_STRIP_END,keyword[len-1])) { /* strip it */ keyword[len-1]='\0'; } else @@ -302,13 +303,13 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* char line[KEYW_LEN + 32]; linput(tmpfp,line,KEYW_LEN + 2); if (strnotempty(line)) { - unsigned long int e=0; + intptr_t e=0; if (inthash_read(WordIndexHash,line,&e)) { //if (e) { char BIGSTK savelst[HTS_URLMAXSIZE*2]; e++; /* 0 means "once" */ - if (strncmp((const char*)fslash((char*)indexpath),filename,strlen(indexpath))==0) // couper + if (strncmp((const char*)fslash(catbuff,(char*)indexpath),filename,strlen(indexpath))==0) // couper strcpybuff(savelst,filename+strlen(indexpath)); else strcpybuff(savelst,filename); @@ -339,11 +340,10 @@ int index_keyword(const char* html_data,LLint size,const char* mime,const char* */ void index_finish(const char* indexpath,int mode) { #if HTS_MAKE_KEYWORD_INDEX + char catbuff[CATBUFF_SIZE]; char** tab; char* blk; - INTsys size; - - size=fpsize(fp_tmpproject); + off_t size = fpsize(fp_tmpproject); if (size>0) { //FILE* fp=fopen(concat(indexpath,"index.txt"),"rb"); if (fp_tmpproject) { @@ -373,9 +373,9 @@ void index_finish(const char* indexpath,int mode) { // Write new file if (mode == 1) // TEXT - fp=fopen(concat(indexpath,"index.txt"),"wb"); + fp=fopen(concat(catbuff,indexpath,"index.txt"),"wb"); else // HTML - fp=fopen(concat(indexpath,"sindex.html"),"wb"); + fp=fopen(concat(catbuff,indexpath,"sindex.html"),"wb"); if (fp) { char current_word[KEYW_LEN + 32]; char word[KEYW_LEN + 32]; diff --git a/src/htsindex.h b/src/htsindex.h index b773034..13e139d 100644 --- a/src/htsindex.h +++ b/src/htsindex.h @@ -39,10 +39,11 @@ Please visit our Website: http://www.httrack.com #ifndef HTSKINDEX_DEFH #define HTSKINDEX_DEFH -#include "htsglobal.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +#include "htsglobal.h" + int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath); void index_init(const char* indexpath); void index_finish(const char* indexpath,int mode); diff --git a/src/htsinthash.c b/src/htsinthash.c index e81a74f..a8fcba9 100644 --- a/src/htsinthash.c +++ b/src/htsinthash.c @@ -94,7 +94,7 @@ void inthash_add_pvoid(inthash hashtable, const char* name, void* pvalue) { } // Check for duplicate entry (==1 : added) -int inthash_write(inthash hashtable,const char* name,long int intvalue) { +int inthash_write(inthash hashtable,const char* name,intptr_t intvalue) { inthash_value value = INTHASH_VALUE_NULL; value.intg = intvalue; return inthash_write_value(hashtable, name, value); @@ -129,7 +129,7 @@ int inthash_write_value(inthash hashtable,const char* name,inthash_value value) // Increment pos value, create one if necessary (=0) // (==1 : created) int inthash_inc(inthash hashtable,const char* name) { - long int value=0; + intptr_t value=0; int r=0; if (inthash_read(hashtable,name,&value)) { value++; @@ -144,7 +144,7 @@ int inthash_inc(inthash hashtable,const char* name) { // Does not check for duplicate entry -void inthash_add(inthash hashtable, const char* name, long int intvalue) { +void inthash_add(inthash hashtable, const char* name, intptr_t intvalue) { inthash_value value = INTHASH_VALUE_NULL; memset(&value, 0, sizeof(value)); value.intg = intvalue; @@ -195,7 +195,7 @@ void* inthash_addblk(inthash hashtable,const char* name,int blksize) { return NULL; } -int inthash_read(inthash hashtable,const char* name,long int* intvalue) { +int inthash_read(inthash hashtable,const char* name,intptr_t* intvalue) { inthash_value value = INTHASH_VALUE_NULL; int ret = inthash_read_value(hashtable, name, (intvalue != NULL) ? &value : NULL); if (intvalue != NULL) @@ -255,7 +255,7 @@ int inthash_remove(inthash hashtable,const char* name) { return 0; } -int inthash_readptr(inthash hashtable,const char* name,long int* value) { +int inthash_readptr(inthash hashtable,const char* name,intptr_t* value) { int ret; *value = 0; ret = inthash_read(hashtable, name, value); diff --git a/src/htsinthash.h b/src/htsinthash.h index b11b7ac..f839d2d 100644 --- a/src/htsinthash.h +++ b/src/htsinthash.h @@ -35,47 +35,65 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ - +// inthash -- simple hash table, using a key (char[]) and a value (uintptr_t) #ifndef HTSINTHASH_DEFH #define HTSINTHASH_DEFH -// inthash -- simple hash table, using a key (char[]) and a value (ulong int) +/* Includes */ +#ifdef _WIN32 +#include <stddef.h> +#elif (defined(SOLARIS) || defined(sun) || defined(HAVE_INTTYPES_H) \ + || defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) +#include <inttypes.h> +#else +#include <stdint.h> +#endif // value typedef union inthash_value { - unsigned long int intg; /* integer value */ + uintptr_t intg; /* integer value */ void* ptr; /* ptr value */ } inthash_value; #define INTHASH_VALUE_NULL { 0 } // simple hash table for other routines -typedef struct inthash_chain { +#ifndef HTS_DEF_FWSTRUCT_inthash_chain +#define HTS_DEF_FWSTRUCT_inthash_chain +typedef struct inthash_chain inthash_chain; +#endif +struct inthash_chain { char* name; /* key (name) */ inthash_value value; /* value */ struct inthash_chain* next; /* next element */ -} inthash_chain; +}; -// structure behind inthash typedef void (* t_inthash_freehandler)(void* value); -typedef struct struct_inthash { + +/* inthash structure */ +#ifndef HTS_DEF_FWSTRUCT_struct_inthash +#define HTS_DEF_FWSTRUCT_struct_inthash +typedef struct struct_inthash struct_inthash, *inthash; +#endif +struct struct_inthash { inthash_chain** hash; unsigned int nitems; t_inthash_freehandler free_handler; unsigned int hash_size; unsigned short flag_valueismalloc; -} struct_inthash; - -// main inthash type -typedef struct_inthash* inthash; +}; // enumeration -typedef struct struct_inthash_enum { +#ifndef HTS_DEF_FWSTRUCT_struct_inthash_enum +#define HTS_DEF_FWSTRUCT_struct_inthash_enum +typedef struct struct_inthash_enum struct_inthash_enum; +#endif +struct struct_inthash_enum { inthash table; int index; inthash_chain* item; -} struct_inthash_enum; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE @@ -91,8 +109,8 @@ void inthash_value_is_malloc(inthash hashtable,int flag); /* Is void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */ t_inthash_freehandler free_handler); /* */ -int inthash_read(inthash hashtable,const char* name,long int* intvalue); /* Read entry from the hash table */ -int inthash_readptr(inthash hashtable,const char* name,long int* intvalue); /* Same function, but returns 0 upon null ptr */ +int inthash_read(inthash hashtable,const char* name,intptr_t* intvalue); /* Read entry from the hash table */ +int inthash_readptr(inthash hashtable,const char* name,intptr_t* intvalue); /* Same function, but returns 0 upon null ptr */ int inthash_exists(inthash hashtable, const char* name); /* Is the key existing ? */ /* */ int inthash_read_value(inthash hashtable,const char* name,inthash_value* value); @@ -103,9 +121,9 @@ int inthash_read_pvoid(inthash hashtable,const char* name, void** value); int inthash_write_pvoid(inthash hashtable,const char* name, void* value); void inthash_add_pvoid(inthash hashtable, const char* name, void* value); /* */ -void inthash_add(inthash hashtable,const char* name,long int value); /* Add entry in the hash table */ +void inthash_add(inthash hashtable,const char* name,intptr_t value); /* Add entry in the hash table */ void* inthash_addblk(inthash hashtable,const char* name,int blksize); /* Add entry in the hash table and set value to a new memory block */ -int inthash_write(inthash hashtable,const char* name,long int value); /* Overwrite/add entry in the hash table */ +int inthash_write(inthash hashtable,const char* name,intptr_t value); /* Overwrite/add entry in the hash table */ int inthash_inc(inthash hashtable,const char* name); /* Increment entry in the hash table */ int inthash_remove(inthash hashtable,const char* name); /* Remove an entry from the hashtable */ /* */ diff --git a/src/htsjava.c b/src/htsjava.c index 3536b9b..a52aea2 100644 --- a/src/htsjava.c +++ b/src/htsjava.c @@ -35,23 +35,37 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ -/* Internal engine bytecode */ -#define HTS_INTERNAL_BYTECODE - /* Version: Oct/2000 */ /* Fixed: problems with class structure (10/2000) */ // htsjava.c - Parseur de classes java -#include "stdio.h" -#include "htsglobal.h" -#include "htscore.h" +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#if ( defined(_WIN32) ||defined(HAVE_SYS_TYPES_H) ) +#include <sys/types.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif -#include "htsjava.h" +/* Standard httrack module includes */ +#include "httrack-library.h" +#include "htsopt.h" +#include "htsdefines.h" -#include "htsnostatic.h" +/* Module structures */ +#include "htsmodules.h" -//#include <math.h> +/* We link to libhttrack, we can use its functions */ +#include "httrack-library.h" + +/* This file */ +#include "htsjava.h" static int reverse_endian(void) { int endian = 1; @@ -62,6 +76,11 @@ static int reverse_endian(void) { #define hts_swap16(A) ( (((A) & 0xFF)<<8) | (((A) & 0xFF00)>>8) ) #define hts_swap32(A) ( (( (hts_swap16(A)) & 0xFFFF)<<16) | (( (hts_swap16(A>>16)) & 0xFFFF)) ) +/* Static definitions */ +static RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*); +static unsigned short int readshort(FILE *fp); +static int tris(httrackp *opt,char*); +static char * printname(char [1024],char [1024]); // ** HTS_xx sinon pas pris par VC++ #define HTS_CLASS 7 @@ -79,148 +98,217 @@ static int reverse_endian(void) { #define JAVADEBUG 0 -int hts_detect_java(htsmoduleStruct* str) { - char* savename = str->filename; +static const char *libName = "htsjava"; + +#ifdef _WIN32 +#define strcasecmp(a,b) stricmp(a,b) +#define strncasecmp(a,b,n) strnicmp(a,b,n) +#endif + +static int detect_mime(htsmoduleStruct* str) { + const char* savename = str->filename; if (savename) { int len = (int) strlen(savename); - if (len > 6 && strfield(savename + len - 6,".class")) { + if (len > 6 && strcasecmp(savename + len - 6,".class") == 0) { return 1; } } return 0; } -int hts_parse_java(htsmoduleStruct* str) +static int hts_detect_java(t_hts_callbackarg *carg, httrackp *opt, + htsmoduleStruct* str) { - FILE *fpout; - JAVA_HEADER header; - RESP_STRUCT *tab; - char* file = str->filename; - - str->relativeToHtmlLink = 1; + /* Call parent functions if multiple callbacks are chained. */ + if (CALLBACKARG_PREV_FUN(carg, detect) != NULL) { + if (CALLBACKARG_PREV_FUN(carg, detect)(CALLBACKARG_PREV_CARG(carg), opt, str)) { + return 1; /* Found before us, let them have the priority */ + } + } -#if JAVADEBUG - printf("fopen\n"); -#endif - if ((fpout = fopen(fconv(file), "r+b")) == NULL) - { - //fprintf(stderr, "Cannot open input file.\n"); - sprintf(str->err_msg,"Unable to open file %s",file); - return 0; // une erreur.. + /* Check MIME */ + if (detect_mime(str)) { + str->wrapper_name = libName; /* Our ID */ + return 1; /* Known format, we take it */ } - + + return 0; /* Unknown format */ +} + +static off_t fsize(const char* s) { + FILE* fp; + fp=fopen(s,"rb"); + if (fp!=NULL) { + off_t i; + fseek(fp,0,SEEK_END); + i = ftell(fp); + fclose(fp); + return i; + } else + return -1; +} + +static int hts_parse_java(t_hts_callbackarg *carg, httrackp *opt, + htsmoduleStruct* str) +{ + /* The wrapper_name memebr has changed: not for us anymore */ + if (str->wrapper_name == NULL || strcmp(str->wrapper_name, libName) != 0) { + /* Call parent functions if multiple callbacks are chained. */ + if (CALLBACKARG_PREV_FUN(carg, parse) != NULL) { + return CALLBACKARG_PREV_FUN(carg, parse)(CALLBACKARG_PREV_CARG(carg), opt, str); + } + strcpy(str->err_msg, "unexpected error: bad wrapper_name and no previous wrapper"); + return 0; /* Unexpected error */ + } else { + if (detect_mime(str)) { + + /* (Legacy code) */ + char catbuff[CATBUFF_SIZE]; + FILE *fpout; + JAVA_HEADER header; + RESP_STRUCT *tab; + const char* file = str->filename; + + str->relativeToHtmlLink = 1; + #if JAVADEBUG - printf("fread\n"); + printf("fopen\n"); #endif - //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. - if (fread(&header,1,10,fpout) != 10) { // pas complet.. - fclose(fpout); - sprintf(str->err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file)); - return 0; - } + if ((fpout = fopen(fconv(catbuff, file), "r+b")) == NULL) + { + //fprintf(stderr, "Cannot open input file.\n"); + sprintf(str->err_msg,"Unable to open file %s",file); + return 0; // une erreur.. + } #if JAVADEBUG - printf("header\n"); + printf("fread\n"); #endif - // tester en tête - if (reverse_endian()) { - header.magic = hts_swap32(header.magic); - header.count = hts_swap16(header.count); - } - if(header.magic!=0xCAFEBABE) { - sprintf(str->err_msg,"non java file"); - if (fpout) { fclose(fpout); fpout=NULL; } - return 0; - } - - tab =(RESP_STRUCT*)calloct(header.count,sizeof(RESP_STRUCT)); - if (!tab) { - sprintf(str->err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT)); - if (fpout) { fclose(fpout); fpout=NULL; } - return 0; // erreur.. - } + //if (fread(&header,1,sizeof(JAVA_HEADER),fpout) != sizeof(JAVA_HEADER)) { // pas complet.. + if (fread(&header,1,10,fpout) != 10) { // pas complet.. + fclose(fpout); + sprintf(str->err_msg,"File header too small (file len = "LLintP")",(LLint)fsize(file)); + return 0; + } #if JAVADEBUG - printf("calchead\n"); + printf("header\n"); #endif - { - int i; - - for (i = 1; i < header.count; i++) { - int err=0; // ++ - tab[i]=readtable(str,fpout,tab[i],&err); - if (!err) { - if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float - } else { // ++ une erreur est survenue! - if (strnotempty(str->err_msg)==0) - strcpybuff(str->err_msg,"Internal readtable error"); - freet(tab); + // tester en tête + if (reverse_endian()) { + header.magic = hts_swap32(header.magic); + header.count = hts_swap16(header.count); + } + if(header.magic!=0xCAFEBABE) { + sprintf(str->err_msg,"non java file"); if (fpout) { fclose(fpout); fpout=NULL; } return 0; } - } - - } - + tab =(RESP_STRUCT*)calloc(header.count,sizeof(RESP_STRUCT)); + if (!tab) { + sprintf(str->err_msg,"Unable to alloc %d bytes",(int)sizeof(RESP_STRUCT)); + if (fpout) { fclose(fpout); fpout=NULL; } + return 0; // erreur.. + } + #if JAVADEBUG - printf("addfiles\n"); + printf("calchead\n"); #endif - { - unsigned int acess; - unsigned int Class; - unsigned int SClass; - int i; - acess = readshort(fpout); - Class = readshort(fpout); - SClass = readshort(fpout); - - for (i = 1; i <header.count; i++) { - - if (tab[i].type == HTS_CLASS) { - - if ((tab[i].index1<header.count) && (tab[i].index1>=0)) { - - - if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) { - - if(!strstr(tab[tab[i].index1].name,"java/")) { - char BIGSTK tempo[1024]; - tempo[0]='\0'; - - sprintf(tempo,"%s.class",tab[tab[i].index1].name); + { + int i; + + for (i = 1; i < header.count; i++) { + int err=0; // ++ + tab[i]=readtable(str,fpout,tab[i],&err); + if (!err) { + if ((tab[i].type == HTS_LONG) ||(tab[i].type == HTS_DOUBLE)) i++; //2 element si double ou float + } else { // ++ une erreur est survenue! + if (strnotempty(str->err_msg)==0) + strcpy(str->err_msg,"Internal readtable error"); + free(tab); + if (fpout) { fclose(fpout); fpout=NULL; } + return 0; + } + } + + } + + +#if JAVADEBUG + printf("addfiles\n"); +#endif + { + unsigned int acess; + unsigned int Class; + unsigned int SClass; + int i; + acess = readshort(fpout); + Class = readshort(fpout); + SClass = readshort(fpout); + + for (i = 1; i <header.count; i++) { + + if (tab[i].type == HTS_CLASS) { + + if ((tab[i].index1<header.count) && (tab[i].index1>=0)) { + + + if((tab[i].index1!=SClass) && (tab[i].index1!=Class) && (tab[tab[i].index1].name[0]!='[')) { + + if(!strstr(tab[tab[i].index1].name,"java/")) { + char BIGSTK tempo[1024]; + tempo[0]='\0'; + + sprintf(tempo,"%s.class",tab[tab[i].index1].name); #if JAVADEBUG - printf("add %s\n",tempo); + printf("add %s\n",tempo); #endif - if (tab[tab[i].index1].file_position >= 0) - str->addLink(str,tempo); /* tab[tab[i].index1].file_position */ + if (tab[tab[i].index1].file_position >= 0) + str->addLink(str,tempo); /* tab[tab[i].index1].file_position */ + } + + } + } else { + i=header.count; // exit } - } - } else { - i=header.count; // exit + } } - - } - } - - + + #if JAVADEBUG - printf("end\n"); + printf("end\n"); #endif - freet(tab); - if (fpout) { fclose(fpout); fpout=NULL; } - return 1; -} + free(tab); + if (fpout) { fclose(fpout); fpout=NULL; } + return 1; + } else { + strcpy(str->err_msg, "bad MIME type"); + } + } + return 0; /* Error */ +} +/* +module entry point +*/ +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv); +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) { + /* Plug callback functions */ + CHAIN_FUNCTION(opt, detect, hts_detect_java, NULL); + CHAIN_FUNCTION(opt, parse, hts_parse_java, NULL); + return 1; /* success */ +} // error: !=0 si erreur fatale -RESP_STRUCT readtable(htsmoduleStruct* str, - FILE *fp, RESP_STRUCT trans, int* error) +static RESP_STRUCT readtable(htsmoduleStruct* str, + FILE *fp, RESP_STRUCT trans, int* error) { + char rname[1024]; unsigned short int length; int j; *error = 0; // pas d'erreur @@ -228,54 +316,54 @@ RESP_STRUCT readtable(htsmoduleStruct* str, trans.type = (int)(unsigned char)fgetc(fp); switch (trans.type) { case HTS_CLASS: - strcpybuff(trans.name,"Class"); + strcpy(trans.name,"Class"); trans.index1 = readshort(fp); break; case HTS_FIELDREF: - strcpybuff(trans.name,"Field Reference"); + strcpy(trans.name,"Field Reference"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_METHODREF: - strcpybuff(trans.name,"Method Reference"); + strcpy(trans.name,"Method Reference"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_INTERFACE: - strcpybuff(trans.name,"Interface Method Reference"); + strcpy(trans.name,"Interface Method Reference"); trans.index1 =readshort(fp); readshort(fp); break; case HTS_NAMEANDTYPE: - strcpybuff(trans.name,"Name and Type"); + strcpy(trans.name,"Name and Type"); trans.index1 = readshort(fp); readshort(fp); break; case HTS_STRING: // CONSTANT_String - strcpybuff(trans.name,"String"); + strcpy(trans.name,"String"); trans.index1 = readshort(fp); break; case HTS_INTEGER: - strcpybuff(trans.name,"Integer"); + strcpy(trans.name,"Integer"); for(j=0;j<4;j++) fgetc(fp); break; case HTS_FLOAT: - strcpybuff(trans.name,"Float"); + strcpy(trans.name,"Float"); for(j=0;j<4;j++) fgetc(fp); break; case HTS_LONG: - strcpybuff(trans.name,"Long"); + strcpy(trans.name,"Long"); for(j=0;j<8;j++) fgetc(fp); break; case HTS_DOUBLE: - strcpybuff(trans.name,"Double"); + strcpy(trans.name,"Double"); for(j=0;j<8;j++) fgetc(fp); break; @@ -283,9 +371,9 @@ RESP_STRUCT readtable(htsmoduleStruct* str, case HTS_UNICODE: if (trans.type == HTS_ASCIZ) - strcpybuff(trans.name,"HTS_ASCIZ"); + strcpy(trans.name,"HTS_ASCIZ"); else - strcpybuff(trans.name,"HTS_UNICODE"); + strcpy(trans.name,"HTS_UNICODE"); { char BIGSTK buffer[1024]; @@ -309,10 +397,10 @@ RESP_STRUCT readtable(htsmoduleStruct* str, // if(tris(buffer)==1) printf("%s\n ",buffer); // if(tris(buffer)==2) printf("%s\n ",printname(buffer)); //#endif - if(tris(buffer)==1) str->addLink(str, buffer); /* trans.file_position */ - else if(tris(buffer)==2) str->addLink(str, printname(buffer)); + if(tris(str->opt,buffer)==1) str->addLink(str, buffer); /* trans.file_position */ + else if(tris(str->opt,buffer)==2) str->addLink(str, printname(rname,buffer)); - strcpybuff(trans.name,buffer); + strcpy(trans.name,buffer); } else { // gros pb while ( (length > 0) && (!feof(fp))) { fgetc(fp); @@ -340,7 +428,7 @@ RESP_STRUCT readtable(htsmoduleStruct* str, } -unsigned short int readshort(FILE *fp) +static unsigned short int readshort(FILE *fp) { unsigned short int valint; fread(&valint,sizeof(valint),1,fp); @@ -352,8 +440,9 @@ unsigned short int readshort(FILE *fp) } -int tris(char * buffer) +static int tris(httrackp *opt,char * buffer) { + char catbuff[CATBUFF_SIZE]; // // Java if((buffer[0]=='[') && buffer[1]=='L' && (!strstr(buffer,"java/")) ) @@ -365,25 +454,21 @@ int tris(char * buffer) { char type[256]; type[0]='\0'; - get_httptype(type,buffer,0); + get_httptype(opt,type,buffer,0); if (strnotempty(type)) // type reconnu! return 1; // ajout RX 05/2001 - else if (is_dyntype(get_ext(buffer))) // asp,cgi... + else if (is_dyntype(get_ext(catbuff, buffer))) // asp,cgi... return 1; } return 0; } - -char * printname(char name[1024]) +static char * printname(char rname[1024], char name[1024]) { - char* rname; - //char *rname; char *p; char *p1; int j; - NOSTATIC_RESERVE(rname, char, 1024); rname[0]='\0'; // @@ -396,7 +481,7 @@ char * printname(char name[1024]) for (j = 0; j < (int) strlen(name); j++,p++) { if (*p == '/') *p1='.'; if (*p==';'){*p1='\0'; - strcatbuff(rname,".class"); + strcat(rname,".class"); return (rname);} else *p1=*p; p1++; diff --git a/src/htsjava.h b/src/htsjava.h index 915824b..b95155e 100644 --- a/src/htsjava.h +++ b/src/htsjava.h @@ -38,35 +38,35 @@ Please visit our Website: http://www.httrack.com #ifndef HTSJAVA_DEFH #define HTSJAVA_DEFH -#include <stdio.h> -#include "htsmodules.h" - -typedef struct { +#ifndef HTS_DEF_FWSTRUCT_JAVA_HEADER +#define HTS_DEF_FWSTRUCT_JAVA_HEADER +typedef struct JAVA_HEADER JAVA_HEADER; +#endif +struct JAVA_HEADER { unsigned long int magic; unsigned short int minor; unsigned short int major; unsigned short int count; -} JAVA_HEADER; +}; -typedef struct { +#ifndef HTS_DEF_FWSTRUCT_RESP_STRUCT +#define HTS_DEF_FWSTRUCT_RESP_STRUCT +typedef struct RESP_STRUCT RESP_STRUCT; +#endif +struct RESP_STRUCT { int file_position; // unsigned int index1; unsigned int type; char name[1024]; -} RESP_STRUCT; +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -int hts_detect_java(htsmoduleStruct* str); -int hts_parse_java(htsmoduleStruct* str); -RESP_STRUCT affecte(int i1,int i2,RESP_STRUCT *i3,RESP_STRUCT *i4,int i5); -//unsigned int swap(long int nomber,int digit); -RESP_STRUCT readtable(htsmoduleStruct* str,FILE *fp,RESP_STRUCT,int*); -unsigned short int readshort(FILE *fp); -int tris(char*); -char * printname(char [1024]); + +EXTERNAL_FUNCTION int hts_plug_java(httrackp *opt, const char* argv); + #endif #endif diff --git a/src/htslib.c b/src/htslib.c index 93119df..c398e3f 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -39,8 +39,7 @@ Please visit our Website: http://www.httrack.com // Fichier librairie .c -#include "htslib.h" -#include "htsbauth.h" +#include "htscore.h" #ifdef _WIN32_WCE #ifndef HTS_CECOMPAT @@ -53,10 +52,12 @@ Please visit our Website: http://www.httrack.com #include "htsnet.h" #include "htsbauth.h" #include "htsthread.h" -#include "htsnostatic.h" +#include "htsback.h" #include "htswrap.h" #include "htsmd5.h" -#if HTS_WIN +#include "htsmodules.h" + +#ifdef _WIN32 #ifndef _WIN32_WCE #include <direct.h> #endif @@ -70,21 +71,22 @@ Please visit our Website: http://www.httrack.com #ifdef HAVE_UNISTD_H #include <unistd.h> #endif -#endif +#endif /* _WIN32 */ + #include <string.h> #include <time.h> + #ifndef _WIN32_WCE #include <sys/timeb.h> +#include <fcntl.h> #else #ifndef HTS_CECOMPAT #include <sys/timeb.h> #endif -#endif -#ifndef _WIN32_WCE -#include <fcntl.h> -#endif +#endif /* _WIN32_WCE */ + // pour utimbuf -#if HTS_WIN +#ifdef _WIN32 #ifndef _WIN32_WCE #include <sys/utime.h> #else @@ -94,7 +96,8 @@ Please visit our Website: http://www.httrack.com #endif #else #include <utime.h> -#endif +#endif /* _WIN32 */ + #ifndef _WIN32_WCE #include <sys/stat.h> #endif @@ -115,7 +118,6 @@ FILE* ioinfo; #endif int IPV6_resolver = 0; - /* détection complémentaire */ const char* hts_detect[] = { "archive", @@ -300,6 +302,7 @@ const char* hts_mime[][2] = { {"application/x-authorware-map","aam"}, {"application/x-authorware-seg","aas"}, {"application/x-authorware-bin","aab"}, + {"application/x-bzip2","bz2"}, {"application/x-cocoa","cco"}, {"application/x-csh","csh"}, {"application/x-director","dir"}, @@ -347,6 +350,7 @@ const char* hts_mime[][2] = { {"application/x-tar","tar"}, {"application/x-ustar","ustar"}, {"application/x-winhelp","hlp"}, + {"application/xml","xml"}, {"audio/midi","mid"}, {"audio/midi","midi"}, {"audio/midi","kar"}, @@ -437,17 +441,17 @@ const char* hts_mime[][2] = { { "application/pkix-crl", "crl" }, { "application/set-payment-initiation", "setpay" }, { "application/set-registration-initiation", "setreg" }, + { "application/vnd.ms-excel", "xls" }, { "application/vnd.ms-excel", "xla" }, { "application/vnd.ms-excel", "xlc" }, { "application/vnd.ms-excel", "xlm" }, - { "application/vnd.ms-excel", "xls" }, { "application/vnd.ms-excel", "xlt" }, { "application/vnd.ms-excel", "xlw" }, { "application/vnd.ms-pkicertstore", "sst" }, { "application/vnd.ms-pkiseccat", "cat" }, + { "application/vnd.ms-powerpoint", "ppt" }, { "application/vnd.ms-powerpoint", "pot" }, { "application/vnd.ms-powerpoint", "pps" }, - { "application/vnd.ms-powerpoint", "ppt" }, { "application/vnd.ms-project", "mpp" }, { "application/vnd.ms-works", "wcm" }, { "application/vnd.ms-works", "wdb" }, @@ -527,7 +531,7 @@ const char* hts_mime[][2] = { /* Various */ { "application/ogg", "ogg" }, - {"*","class"}, + {"application/x-java-vm","class"}, {"",""}}; @@ -588,15 +592,12 @@ const char* hts_mime[][2] = { // conversion éventuelle / vers antislash -#if HTS_WIN -char* antislash(char* s) { - char* buff; +#ifdef _WIN32 +char* antislash(char *catbuff, const char* s) { char* a; - NOSTATIC_RESERVE(buff, char, HTS_URLMAXSIZE*2); - - strcpybuff(buff,s); - while(a=strchr(buff,'/')) *a='\\'; - return buff; + strcpybuff(catbuff,s); + while(a=strchr(catbuff,'/')) *a='\\'; + return catbuff; } #endif @@ -612,7 +613,7 @@ char cwd[MAX_PATH+1] = ""; // suivre l'évolution du chargement si le process a été lancé // en background -htsblk httpget(char* url) { +htsblk httpget(httrackp *opt,char* url) { char BIGSTK adr[HTS_URLMAXSIZE*2]; // adresse char BIGSTK fil[HTS_URLMAXSIZE*2]; // chemin @@ -629,14 +630,14 @@ htsblk httpget(char* url) { return retour; } - return xhttpget(adr,fil); + return xhttpget(opt,adr,fil); } // ouvre une liaison http, envoie une requète GET et réceptionne le header // retour: socket -int http_fopen(char* adr,char* fil,htsblk* retour) { +int http_fopen(httrackp *opt,char* adr,char* fil,htsblk* retour) { // / GET, traiter en-tête - return http_xfopen(0,1,1,NULL,adr,fil,retour); + return http_xfopen(opt,0,1,1,NULL,adr,fil,retour); } // ouverture d'une liaison http, envoi d'une requète @@ -644,7 +645,7 @@ int http_fopen(char* adr,char* fil,htsblk* retour) { // treat: traiter header? // waitconnect: attendre le connect() // note: dans retour, on met les params du proxy -int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour) { +int http_xfopen(httrackp *opt,int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour) { //htsblk retour; //int bufl=TAILLE_BUFFER; // 8Ko de buffer T_SOC soc=INVALID_SOCKET; @@ -681,12 +682,12 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f (strncmp(adr,"https://", 8)==0) ) ) { /* pas de proxy, ou non utilisable ici */ - soc=newhttp(adr,retour,-1,waitconnect); + soc=newhttp(opt,adr,retour,-1,waitconnect); } else { - soc=newhttp(retour->req.proxy.name,retour,retour->req.proxy.port,waitconnect); // ouvrir sur le proxy à la place + soc=newhttp(opt, retour->req.proxy.name, retour,retour->req.proxy.port, waitconnect); // ouvrir sur le proxy à la place } } else { - soc=newhttp(adr,NULL,-1,waitconnect); + soc=newhttp(opt,adr,NULL,-1,waitconnect); } // copier index socket retour @@ -698,9 +699,11 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f if (retour->msg) { if (!strnotempty(retour->msg)) { #ifdef _WIN32 - sprintf(retour->msg,"Connect error: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Connect error: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Connect error: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Connect error: %s", strerror(last_errno)); #endif } } @@ -715,15 +718,15 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f if (mode==0) { // GET // Test en cas de file:///C|... - if (!fexist(fconv(unescape_http(fil)))) - if (fexist(fconv(unescape_http(fil+1)))) { + if (!fexist(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil)))) + if (fexist(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil+1)))) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; strcpybuff(tempo,fil+1); strcpybuff(fil,tempo); } // Ouvrir - retour->totalsize=fsize(fconv(unescape_http(fil))); // taille du fichier + retour->totalsize=fsize(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil))); // taille du fichier retour->msg[0]='\0'; soc=INVALID_SOCKET; if (retour->totalsize<0) @@ -733,7 +736,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f else { // Note: On passe par un FILE* (plus propre) //soc=open(fil,O_RDONLY,0); // en lecture seule! - retour->fp=fopen(fconv(unescape_http(fil)),"rb"); // ouvrir + retour->fp=fopen(fconv(OPT_GET_BUFF(opt), unescape_http(OPT_GET_BUFF(opt),fil)),"rb"); // ouvrir if (retour->fp==NULL) soc=INVALID_SOCKET; else @@ -741,9 +744,9 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f } retour->soc=soc; if (soc!=INVALID_SOCKET) { - retour->statuscode=200; // OK + retour->statuscode=HTTP_OK; // OK strcpybuff(retour->msg,"OK"); - guess_httptype(retour->contenttype,fil); + guess_httptype(opt,retour->contenttype,fil); } else if (strnotempty(retour->msg)==0) strcpybuff(retour->msg,"Unable to open local file"); return soc; // renvoyer @@ -765,7 +768,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f // connecté? if (waitconnect) { - http_sendhead(NULL,mode,xsend,adr,fil,NULL,NULL,retour); + http_sendhead(opt,NULL,mode,xsend,adr,fil,NULL,NULL,retour); } if (soc!=INVALID_SOCKET) { @@ -826,7 +829,7 @@ int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* f // envoi d'une requète -int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) { +int http_sendhead(httrackp *opt,t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour) { char BIGSTK buff[8192]; //int use_11=0; // HTTP 1.1 utilisé int direct_url=0; // ne pas analyser l'url (exemple: ftp://) @@ -849,7 +852,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char search_tag=strstr(fil,POSTTOK"file:"); if (search_tag) { // postfile if (mode==0) { // GET! - FILE* fp=fopen(unescape_http(search_tag+strlen(POSTTOK)+5),"rb"); + FILE* fp=fopen(unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+5),"rb"); if (fp) { char BIGSTK line[1100]; char BIGSTK protocol[256],url[HTS_URLMAXSIZE*2],method[256]; @@ -929,14 +932,14 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char // tester proxy authentication if (retour->req.proxy.active) { if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy! - char* a=jump_identification(retour->req.proxy.name); - char* astart=jump_protocol(retour->req.proxy.name); + const char* a = jump_identification(retour->req.proxy.name); + const char* astart = jump_protocol(retour->req.proxy.name); char autorisation[1100]; char user_pass[256]; autorisation[0]=user_pass[0]='\0'; // strncatbuff(user_pass,astart,(int) (a - astart) - 1); - strcpybuff(user_pass,unescape_http(user_pass)); + strcpybuff(user_pass,unescape_http(OPT_GET_BUFF(opt),user_pass)); code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0); strcatbuff(buff,"Proxy-Authorization: Basic "); strcatbuff(buff,autorisation); @@ -978,17 +981,18 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char if (mode==0) { // GET! if (search_tag) { char clen[256]; - sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(search_tag+strlen(POSTTOK)+1)))); + sprintf(clen,"Content-length: %d"H_CRLF,(int)(strlen(unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+1)))); strcatbuff(buff,clen); } } // gestion cookies? if (cookie) { + char buffer[8192]; char* b=cookie->data; int cook=0; int max_cookies=8; - int max_size=2048; + size_t max_size=2048; max_size+=strlen(buff); do { b=cookie_find(b,"",jump_identification(adr),fil); // prochain cookie satisfaisant aux conditions @@ -1000,11 +1004,11 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char cook=1; } else strcatbuff(buff,"; "); - strcatbuff(buff,cookie_get(b,5)); + strcatbuff(buff,cookie_get(buffer,b,5)); strcatbuff(buff,"="); - strcatbuff(buff,cookie_get(b,6)); + strcatbuff(buff,cookie_get(buffer,b,6)); strcatbuff(buff,"; $Path="); - strcatbuff(buff,cookie_get(b,2)); + strcatbuff(buff,cookie_get(buffer,b,2)); b=cookie_nextfield(b); } } while( (b) && (max_cookies>0) && ((int)strlen(buff)<max_size)); @@ -1098,7 +1102,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char char user_pass[256]; user_pass[0]='\0'; strncatbuff(user_pass,astart,(int) (a - astart) - 1); - strcpybuff(user_pass,unescape_http(user_pass)); + strcpybuff(user_pass,unescape_http(OPT_GET_BUFF(opt),user_pass)); code64((unsigned char*)user_pass,(int)strlen(user_pass),(unsigned char*)autorisation,0); if (strcmp(fil,"/robots.txt")) /* pas robots.txt */ bauth_add(cookie,astart,fil,autorisation); @@ -1123,7 +1127,7 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char // données complémentaires? if (search_tag) if (mode==0) // GET! - strcatbuff(buff,unescape_http(search_tag+strlen(POSTTOK)+1)); + strcatbuff(buff,unescape_http(OPT_GET_BUFF(opt),search_tag+strlen(POSTTOK)+1)); } #if HDEBUG @@ -1139,16 +1143,14 @@ int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char // // Callback -#if HTS_ANALYSTE - if (hts_htmlcheck_sendhead != NULL) { - int test_head=hts_htmlcheck_sendhead(buff, adr, fil, referer_adr, referer_fil, retour); + { + int test_head = RUN_CALLBACK6(opt, sendhead, buff, adr, fil, referer_adr, referer_fil, retour); if (test_head!=1) { deletesoc_r(retour); strcpybuff(retour->msg,"Header refused by external wrapper"); retour->soc=INVALID_SOCKET; } } -#endif // Envoi HTS_STAT.last_request = mtime_local(); @@ -1203,7 +1205,7 @@ void treatfirstline(htsblk* retour,char* rcvd) { } else { if (*a == '<') { /* This is dirty .. */ - retour->statuscode=200; + retour->statuscode=HTTP_OK; retour->keep_alive=0; strcpybuff(retour->msg, "Unknown, assuming junky server"); strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); @@ -1212,7 +1214,7 @@ void treatfirstline(htsblk* retour,char* rcvd) { strcpybuff(retour->msg,"Unknown (not HTTP/xx) response structure"); } else { /* This is dirty .. */ - retour->statuscode=200; + retour->statuscode=HTTP_OK; retour->keep_alive=0; strcpybuff(retour->msg, "Unknown, assuming junky server"); strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); @@ -1224,7 +1226,7 @@ void treatfirstline(htsblk* retour,char* rcvd) { strcpybuff(retour->msg,"Empty reponse or internal error"); */ /* This is dirty .. */ - retour->statuscode=200; + retour->statuscode=HTTP_OK; strcpybuff(retour->msg, "Unknown, assuming junky server"); strcpybuff(retour->contenttype,HTS_HYPERTEXT_DEFAULT_MIME); } @@ -1613,12 +1615,12 @@ HTSEXT_API void infostatuscode(char* msg,int statuscode) { // identique au précédent, sauf que l'on donne adr+fil et non url complète -htsblk xhttpget(char* adr,char* fil) { +htsblk xhttpget(httrackp *opt,char* adr,char* fil) { T_SOC soc; htsblk retour; memset(&retour, 0, sizeof(htsblk)); - soc=http_fopen(adr,fil,&retour); + soc=http_fopen(opt,adr,fil,&retour); if (soc!=INVALID_SOCKET) { http_fread(soc,&retour); @@ -1634,12 +1636,12 @@ htsblk xhttpget(char* adr,char* fil) { // variation sur un thème... // réceptionne uniquement un en-tête (HEAD) // retourne dans xx.adr l'adresse pointant sur le bloc de mémoire de l'en tête -htsblk http_gethead(char* adr,char* fil) { +htsblk http_gethead(httrackp *opt,char* adr,char* fil) { T_SOC soc; htsblk retour; memset(&retour, 0, sizeof(htsblk)); - soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // HEAD, pas de traitement en-tête + soc=http_xfopen(opt,1,0,1,NULL,adr,fil,&retour); // HEAD, pas de traitement en-tête if (soc!=INVALID_SOCKET) { http_fread(soc,&retour); // réception en-tête @@ -1745,10 +1747,10 @@ LLint http_xfread1(htsblk* r,int bufl) { if (!r->is_write) { // stocker en mémoire if (r->totalsize>0) { // totalsize déterminé ET ALLOUE if (r->adr==NULL) { - r->adr=(char*) malloct((INTsys) r->totalsize + 1); - r->size=0; + r->adr = (char*) malloct((size_t) r->totalsize + 1); + r->size = 0; } - if (r->adr!=NULL) { + if (r->adr != NULL) { // lecture nl = hts_read(r,r->adr + ((int) r->size),(int) (r->totalsize-r->size) ); /* NO 32 bit overlow possible here (no 4GB html!) */ // nouvelle taille @@ -1812,7 +1814,7 @@ LLint http_xfread1(htsblk* r,int bufl) { // nouvelle taille if (nl > 0) { r->size+=nl; - if ((INTsys)fwrite(buff,1,nl,r->out)!=nl) { + if (fwrite(buff,1,nl,r->out)!=nl) { r->statuscode=STATUSCODE_INVALID; strcpybuff(r->msg,"Write error on disk"); nl=READ_ERROR; @@ -1905,7 +1907,7 @@ LLint http_xfread1(htsblk* r,int bufl) { // teste une adresse, et suit l'éventuel chemin "moved" // retourne 200 ou le code d'erreur (404=NOT FOUND, etc) // copie dans loc la véritable adresse si celle-ci est différente -htsblk http_location(char* adr,char* fil,char* loc) { +htsblk http_location(httrackp *opt,char* adr,char* fil,char* loc) { htsblk retour; int retry=0; int tryagain; @@ -1914,9 +1916,13 @@ htsblk http_location(char* adr,char* fil,char* loc) { // sinon abandon.. do { tryagain=0; - switch ((retour=http_test(adr,fil,loc)).statuscode) { - case 200: break; // ok! - case 301: case 302: case 303: case 307: // moved! + switch ((retour=http_test(opt,adr,fil,loc)).statuscode) { + case HTTP_OK: + break; // ok! + case HTTP_MOVED_PERMANENTLY: + case HTTP_FOUND: + case HTTP_SEE_OTHER: + case HTTP_TEMPORARY_REDIRECT: // moved! // recalculer adr et fil! if (ident_url_absolute(loc,adr,fil)!=-1) { tryagain=1; // retenter @@ -1933,7 +1939,7 @@ htsblk http_location(char* adr,char* fil,char* loc) { // en cas de moved xx, dans location // abandonne désormais au bout de 30 secondes (aurevoir les sites // qui nous font poireauter 5 heures..) -> -2=timeout -htsblk http_test(char* adr,char* fil,char* loc) { +htsblk http_test(httrackp *opt,char* adr,char* fil,char* loc) { T_SOC soc; htsblk retour; //int rcvsize=-1; @@ -1952,7 +1958,7 @@ htsblk http_test(char* adr,char* fil,char* loc) { //soc=http_fopen(adr,fil,&retour,NULL); // ouvrir, + header // on ouvre en head, et on traite l'en tête - soc=http_xfopen(1,0,1,NULL,adr,fil,&retour); // ouvrir HEAD, + envoi header + soc=http_xfopen(opt,1,0,1,NULL,adr,fil,&retour); // ouvrir HEAD, + envoi header if (soc!=INVALID_SOCKET) { int e=0; @@ -2033,7 +2039,7 @@ htsblk http_test(char* adr,char* fil,char* loc) { // Crée un lien (http) vers une adresse internet iadr // retour: structure (adresse, taille, message si erreur (si !adr)) // peut ouvrir avec des connect() non bloquants: waitconnect=0/1 -int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { +int newhttp(httrackp *opt,const char* _iadr,htsblk* retour,int port,int waitconnect) { t_fullhostent fullhostent_buffer; // buffer pour resolver T_SOC soc; // descipteur de la socket char* iadr; @@ -2081,17 +2087,17 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { strncatbuff(iadr2,iadr,(int) (a - iadr)); // adresse sans le :xx - hp = hts_gethostbyname(iadr2, &fullhostent_buffer); + hp = hts_gethostbyname(opt,iadr2, &fullhostent_buffer); } else { // adresse normale (port par défaut par la suite) - hp = hts_gethostbyname(iadr, &fullhostent_buffer); + hp = hts_gethostbyname(opt,iadr, &fullhostent_buffer); } } else // port défini - hp = hts_gethostbyname(iadr, &fullhostent_buffer); + hp = hts_gethostbyname(opt,iadr, &fullhostent_buffer); // Conversion iadr -> adresse @@ -2101,13 +2107,15 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if DEBUG printf("erreur gethostbyname\n"); #endif - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to get server's address: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to get server's address: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to get server's address: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to get server's address: %s", strerror(last_errno)); #endif + } return INVALID_SOCKET; } // copie adresse @@ -2124,7 +2132,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HTS_WIDE_DEBUG DEBUG_W("socket\n"); #endif - soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); + soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0); if (retour != NULL) { retour->debugid = HTS_STAT.stat_sockid++; } @@ -2132,29 +2140,33 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { DEBUG_W("socket()=%d\n" _ (int) soc); #endif if (soc==INVALID_SOCKET) { - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to create a socket: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to create a socket: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to create a socket: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to create a socket: %s", strerror(last_errno)); #endif + } return INVALID_SOCKET; // erreur création socket impossible } // bind this address - if (retour != NULL && retour->req.proxy.bindhost[0] != '\0') { + if (retour != NULL && retour->req.proxy.bindhost[0] != 0) { t_fullhostent bind_buffer; - hp = hts_gethostbyname(retour->req.proxy.bindhost, &bind_buffer); + hp = hts_gethostbyname(opt, retour->req.proxy.bindhost, &bind_buffer); if (hp == NULL || bind(soc, (struct sockaddr *)hp->h_addr_list[0], hp->h_length) != 0) { - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to bind the specificied server address: %s", strerror(last_errno)); #endif + } deletesoc(soc); return INVALID_SOCKET; } @@ -2169,7 +2181,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { // connexion non bloquante? if (!waitconnect ) { unsigned long p=1; // non bloquant -#if HTS_WIN +#ifdef _WIN32 ioctlsocket(soc,FIONBIO,&p); #else ioctl(soc,FIONBIO,&p); @@ -2185,7 +2197,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HTS_WIDE_DEBUG DEBUG_W("connect\n"); #endif -#if HTS_WIN +#ifdef _WIN32 if (connect(soc, (const struct sockaddr FAR *)&server, server_size) != 0) { #else if (connect(soc, (struct sockaddr *)&server, server_size) == -1) { @@ -2196,13 +2208,15 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { #if HDEBUG printf("unable to connect!\n"); #endif - if (retour) - if (retour->msg) + if (retour && retour->msg) { #ifdef _WIN32 - sprintf(retour->msg,"Unable to connect to the server: %s", strerror(WSAGetLastError())); + int last_errno = WSAGetLastError(); + sprintf(retour->msg,"Unable to connect to the server: %s", strerror(last_errno)); #else - sprintf(retour->msg,"Unable to connect to the server: %s", strerror(errno)); + int last_errno = errno; + sprintf(retour->msg,"Unable to connect to the server: %s", strerror(last_errno)); #endif + } /* Close the socket and notify the error!!! */ deletesoc(soc); return INVALID_SOCKET; @@ -2236,7 +2250,7 @@ int newhttp(char* _iadr,htsblk* retour,int port,int waitconnect) { // couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html // retour=-1 si erreur. // si file://... alors adresse=file:// (et coupe le ?query dans ce cas) -int ident_url_absolute(char* url,char* adr,char* fil) { +int ident_url_absolute(const char* url,char* adr,char* fil) { int pos=0; int scheme=0; @@ -2249,7 +2263,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // Scheme? { - char* a=url; + const char* a=url; while (isalpha((unsigned char)*a)) a++; if (*a == ':') @@ -2286,7 +2300,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { //## if (adr[0]!=lOCAL_CHAR) { // adresse normale http if (!strfield(adr,"file:")) { // PAS file:// - char *p,*q; + const char *p,*q; p=url+pos; // p pointe sur le début de l'adresse, ex: www.truc.fr/sommaire/index.html @@ -2312,7 +2326,7 @@ int ident_url_absolute(char* url,char* adr,char* fil) { // simplifier url pour les ../ fil_simplifie(fil); } else { // localhost file:// - char *p; + const char *p; int i; char* a; @@ -2458,7 +2472,7 @@ HTS_INLINE void deletesoc(T_SOC soc) { #if HTS_WIDE_DEBUG DEBUG_W("close %d\n" _ (int) soc); #endif -#if HTS_WIN +#ifdef _WIN32 closesocket(soc); #else close(soc); @@ -2570,9 +2584,7 @@ void time_local_rfc822(char* s) { } /* convertir une chaine en temps */ -struct tm* convert_time_rfc822(char* s) { - struct tm* result; - /* */ +struct tm* convert_time_rfc822(struct tm *result, const char* s) { char months[]="jan feb mar apr may jun jul aug sep oct nov dec"; char str[256]; char* a; @@ -2584,7 +2596,6 @@ struct tm* convert_time_rfc822(char* s) { int result_n3=-1; int result_n4=-1; /* */ - NOSTATIC_RESERVE(result, struct tm, 1); if ((int) strlen(s) > 200) return NULL; @@ -2655,30 +2666,41 @@ struct tm* convert_time_rfc822(char* s) { return NULL; } -/* sets file time. -1 if error */ -int set_filetime(char* file,struct tm* tm_time) { - struct utimbuf tim; -#ifndef HTS_DO_NOT_USE_FTIME - struct timeb B; - B.timezone=0; - ftime( &B ); - tim.actime=tim.modtime=mktime(tm_time) - B.timezone*60; -#else - // bogus time (GMT/local).. - tim.actime=tim.modtime=mktime(tm_time); +static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */ + time_t t = mktime(tm); + if (t != (time_t) -1 && t != (time_t) 0) { + /* BSD does not have static "timezone" declared */ +#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) + time_t now = time(NULL); + time_t timezone = - localtime(&now)->tm_gmtoff; #endif - return utime(file,&tim); + return (time_t) (t - timezone); + } + return (time_t) -1; +} + +/* sets file time. -1 if error */ +int set_filetime(const char* file, struct tm* tm_time) { + time_t t = getGMT(tm_time); + if (t != (time_t) -1) { + struct utimbuf tim; + memset(&tim, 0, sizeof(tim)); + tim.actime = tim.modtime = t; + return utime(file, &tim); + } + return -1; } /* sets file time from RFC822 date+time, -1 if error*/ -int set_filetime_rfc822(char* file,char* date) { - struct tm* tm_s=convert_time_rfc822(date); +int set_filetime_rfc822(const char* file, const char* date) { + struct tm buffer; + struct tm* tm_s = convert_time_rfc822(&buffer, date); if (tm_s) { return set_filetime(file,tm_s); } else return -1; } -int get_filetime_rfc822(char* file,char* date) { +int get_filetime_rfc822(const char* file, char* date) { struct stat buf; date[0] = '\0'; if (stat(file, &buf) == 0) { @@ -2714,31 +2736,24 @@ HTS_INLINE void time_rfc822_local(char* s,struct tm * A) { } // conversion en b,Kb,Mb -HTSEXT_API char* int2bytes(LLint n) { - char** a=int2bytes2(n); - char* buff; - NOSTATIC_RESERVE(buff, char, 256); - - strcpybuff(buff,a[0]); - strcatbuff(buff,a[1]); - return concat(buff,""); +HTSEXT_API char* int2bytes(strc_int2bytes2* strc, LLint n) { + char** a = int2bytes2(strc, n); + strcpybuff(strc->catbuff, a[0]); + strcatbuff(strc->catbuff, a[1]); + return strc->catbuff; } // conversion en b/s,Kb/s,Mb/s -HTSEXT_API char* int2bytessec(long int n) { - char* buff; - char** a=int2bytes2(n); - NOSTATIC_RESERVE(buff, char, 256); - - strcpybuff(buff,a[0]); - strcatbuff(buff,a[1]); - return concat(buff,"/s"); +HTSEXT_API char* int2bytessec(strc_int2bytes2* strc, long int n) { + char buff[256]; + char** a = int2bytes2(strc, n); + strcpybuff(buff, a[0]); + strcatbuff(buff, a[1]); + return concat(strc->catbuff, buff, "/s"); } -HTSEXT_API char* int2char(int n) { - char* buffer; - NOSTATIC_RESERVE(buffer, char, 32); - sprintf(buffer,"%d",n); - return concat(buffer,""); +HTSEXT_API char* int2char(strc_int2bytes2* strc, int n) { + sprintf(strc->buff2, "%d", n); + return strc->buff2; } // conversion en b,Kb,Mb, nombre et type séparés @@ -2753,15 +2768,7 @@ HTSEXT_API char* int2char(int n) { #define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB) #define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB) #endif -typedef struct { - char buff1[256]; - char buff2[32]; - char* buffadr[2]; -} strc_int2bytes2; -HTSEXT_API char** int2bytes2(LLint n) { - strc_int2bytes2* strc; - NOSTATIC_RESERVE(strc, strc_int2bytes2, 1); - +HTSEXT_API char** int2bytes2(strc_int2bytes2* strc, LLint n) { if (n < ToLLintKiB) { sprintf(strc->buff1,"%d",(int)(LLint)n); strcpybuff(strc->buff2,"B"); @@ -2794,7 +2801,7 @@ HTSEXT_API char** int2bytes2(LLint n) { return strc->buffadr; } -#if HTS_WIN +#ifdef _WIN32 #else // ignore sigpipe? int sig_ignore_flag( int setflag ) { // flag ignore @@ -2806,10 +2813,10 @@ int sig_ignore_flag( int setflag ) { // flag ignore #endif // envoi de texte (en têtes généralement) sur la socket soc -HTS_INLINE int sendc(htsblk* r, char* s) { +HTS_INLINE int sendc(htsblk* r, const char* s) { int n, ssz = (int)strlen(s); -#if HTS_WIN +#ifdef _WIN32 #else sig_ignore_flag(1); #endif @@ -2824,7 +2831,7 @@ HTS_INLINE int sendc(htsblk* r, char* s) { #endif n = send(r->soc,s,ssz,0); -#if HTS_WIN +#ifdef _WIN32 #else sig_ignore_flag(0); #endif @@ -3079,7 +3086,7 @@ void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map) // 1 : oui // -1 : on sait pas // -2 : on sait pas, pas d'extension -int ishtml(const char* fil) { +int ishtml(httrackp *opt,const char* fil) { /* User-defined MIME types (overrides ishtml()) */ char BIGSTK fil_noquery[HTS_URLMAXSIZE*2]; char mime[256]; @@ -3088,7 +3095,7 @@ int ishtml(const char* fil) { if ((a = strchr(fil_noquery, '?')) != NULL) { *a = '\0'; } - if (get_userhttptype(0, mime, fil_noquery)) { + if (get_userhttptype(opt, mime, fil_noquery)) { if (strfield2(mime, "text/html")) { return 1; } else { @@ -3111,7 +3118,7 @@ int ishtml(const char* fil) { *b='\0'; ret = ishtml_ext(fil_noquery); // retour if (ret == -1) { - switch(is_knowntype(dotted)) { + switch(is_knowntype(opt,dotted)) { case 1: ret = 0; // connu, non html break; @@ -3174,33 +3181,33 @@ HTS_INLINE int ishttperror(int err) { // retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant // une identification -HTSEXT_API char* jump_identification(char* source) { - char *a,*trytofind; +HTSEXT_API char* jump_identification(const char* source) { + const char *a,*trytofind; if (strcmp(source, "file://") == 0) - return source; + return (char*) source; // rechercher dernier @ (car parfois email transmise dans adresse!) // mais sauter ftp:// éventuel a = jump_protocol(source); trytofind = strrchr_limit(a, '@', strchr(a,'/')); - return (trytofind != NULL)?trytofind:a; + return (char*) ( (trytofind != NULL) ? trytofind : a ); } -HTSEXT_API char* jump_normalized(char* source) { +HTSEXT_API char* jump_normalized(const char* source) { if (strcmp(source, "file://") == 0) - return source; + return (char*) source; source = jump_identification(source); if (strfield(source, "www") && source[3] != '\0') { if (source[3] == '.') { // www.foo.com -> foo.com source += 4; } else { // www-4.foo.com -> foo.com - char* a = source + 3; + const char* a = source + 3; while(*a && ( isdigit(*a) || *a == '-') ) a++; if (*a == '.') { source = a + 1; } } } - return source; + return (char*) source; } static int sortNormFnc(const void * a_, const void * b_) { @@ -3210,7 +3217,7 @@ static int sortNormFnc(const void * a_, const void * b_) { } -HTSEXT_API char* fil_normalized(char* source, char* dest) { +HTSEXT_API char* fil_normalized(const char* source, char* dest) { char lastc = 0; int gotquery=0; int ampargs=0; @@ -3280,7 +3287,7 @@ HTSEXT_API char* fil_normalized(char* source, char* dest) { } #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 ); -HTSEXT_API char* adr_normalized(char* source, char* dest) { +HTSEXT_API char* adr_normalized(const char* source, char* dest) { /* not yet too aggressive (no com<->net<->org checkings) */ strcpybuff(dest, jump_normalized(source)); return dest; @@ -3290,47 +3297,47 @@ HTSEXT_API char* adr_normalized(char* source, char* dest) { // find port (:80) or NULL if not found // can handle IPV6 addresses -HTSEXT_API char* jump_toport(char* source) { - char *a,*trytofind; +HTSEXT_API char* jump_toport(const char* source) { + const char *a,*trytofind; a = jump_identification(source); trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html) a = strchr( (trytofind)?trytofind:a, ':'); - return a; + return (char*)a; } // strrchr, but not too far -char* strrchr_limit(char* s, char c, char* limit) { +char* strrchr_limit(const char* s, char c, const char* limit) { if (limit == NULL) { - char* p = strrchr(s, c); - return p?(p+1):NULL; + const char* p = strrchr(s, c); + return (char*) ( p ? (p+1) : NULL ); } else { - char *a=NULL, *p; + const char *a = NULL, *p; for(;;) { - p=strchr((a)?a:s, c); + p = strchr( (a) ? a : s, c); if ((p >= limit) || (p == NULL)) - return a; + return (char*) a; a=p+1; } } } // strrchr, but not too far -char* strstr_limit(char* s, char* sub, char* limit) { +char* strstr_limit(const char* s, const char* sub, const char* limit) { if (limit == NULL) { return strstr(s, sub); } else { - char* pos = strstr(s, sub); + const char* pos = strstr(s, sub); if (pos != NULL) { - char* farpos = strstr(s, limit); + const char* farpos = strstr(s, limit); if (farpos == NULL || pos < farpos) - return pos; + return (char*) pos; } } return NULL; } // retourner adr sans ftp:// -HTS_INLINE char* jump_protocol(char* source) { +HTS_INLINE char* jump_protocol(const char* source) { int p; // scheme // "Comparisons of scheme names MUST be case-insensitive" (RFC2616) @@ -3349,7 +3356,7 @@ HTS_INLINE char* jump_protocol(char* source) { // net_path if (strncmp(source,"//",2)==0) source+=2; - return source; + return (char*) source; } // codage base 64 a vers b @@ -3414,15 +3421,16 @@ HTSEXT_API void unescape_amp(char* s) { if (strcmpbeg(s, "&#") == 0) { int num=0; if ( (s[2] == 'x') || (s[2] == 'X')) { - if (sscanf(s+3, "%x", &num) == 1) { - c=(unsigned char)num; + if (sscanf(s+3, "%x", &num) == 1 && num <= 0xff) { + c=(unsigned char) num; } } else { - if (sscanf(s+2, "%d", &num) == 1) { - c=(unsigned char)num; + if (sscanf(s+2, "%d", &num) == 1 && num <= 0xff) { + c=(unsigned char) num; } } - } else if (strcmpbeg(s, " ")==0) + } + else if (strcmpbeg(s, " ")==0) c=32; // hack - c=160; else if (strcmpbeg(s, "¡")==0) c=161; @@ -3653,20 +3661,18 @@ static int ehexh(char c) { return 0; } -static int ehex(char* s) { +static int ehex(const char* s) { return 16*ehexh(*s)+ehexh(*(s+1)); } // remplacer %20 par ' ', | par : etc.. // buffer MAX 1Ko -HTSEXT_API char* unescape_http(char* s) { - char* tempo; +HTSEXT_API char* unescape_http(char *catbuff, const char* s) { int i,j=0; - NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2); for (i=0;i<(int) strlen(s);i++) { if (s[i]=='%') { i++; - tempo[j++]=(char) ehex(s+i); + catbuff[j++]=(char) ehex(s+i); i++; // sauter 2 caractères finalement } /* @@ -3676,18 +3682,16 @@ HTSEXT_API char* unescape_http(char* s) { } */ else - tempo[j++]=s[i]; + catbuff[j++]=s[i]; } - tempo[j++]='\0'; - return tempo; + catbuff[j++]='\0'; + return catbuff; } // unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI // DOES NOT DECODE %25 -HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { - char* tempo; +HTSEXT_API char* unescape_http_unharm(char *catbuff, const char* s, int no_high) { int i,j=0; - NOSTATIC_RESERVE(tempo, char, HTS_URLMAXSIZE*2); for (i=0;i<(int) strlen(s);i++) { if (s[i]=='%') { int nchar=(char) ehex(s+i+1); @@ -3705,10 +3709,10 @@ HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { ); if (!test) { - tempo[j++]=(char) ehex(s+i+1); + catbuff[j++]=(char) ehex(s+i+1); i+=2; } else { - tempo[j++]='%'; + catbuff[j++]='%'; } } /* @@ -3718,10 +3722,10 @@ HTSEXT_API char* unescape_http_unharm(char* s, int no_high) { } */ else - tempo[j++]=s[i]; + catbuff[j++]=s[i]; } - tempo[j++]='\0'; - return tempo; + catbuff[j++]='\0'; + return catbuff; } // remplacer " par %xx etc.. @@ -3744,9 +3748,9 @@ HTSEXT_API void escape_check_url(char* s) { x_escape_http(s,0); } // same as escape_check_url, but returns char* -HTSEXT_API char* escape_check_url_addr(char* s) { +HTSEXT_API char* escape_check_url_addr(char *catbuff, const char* s) { char* adr; - escape_check_url(adr = concat(s,"")); + escape_check_url(adr = concat(catbuff, s, "")); return adr; } @@ -3854,55 +3858,52 @@ HTSEXT_API void escape_for_html_print_full(char* s, char* d) { // concat, concatène deux chaines et renvoi le résultat // permet d'alléger grandement le code // il faut savoir qu'on ne peut mettre plus de 16 concat() dans une expression -typedef struct { - char buff[16][HTS_URLMAXSIZE*2*2]; - int rol; -} concat_strc; -char* concat(const char* a,const char* b) { - concat_strc* strc; - NOSTATIC_RESERVE(strc, concat_strc, 1); - strc->rol=((strc->rol+1)%16); // roving pointer - strcpybuff(strc->buff[strc->rol],a); - if (b) strcatbuff(strc->buff[strc->rol],b); - return strc->buff[strc->rol]; +HTSEXT_API char* concat(char *catbuff,const char* a,const char* b) { + if (a != NULL && a[0] != '\0') { + strcpybuff(catbuff, a); + } else { + catbuff[0] = '\0'; + } + if (b != NULL && b[0] != '\0') { + strcatbuff(catbuff, b); + } + return catbuff; } // conversion fichier / -> antislash +static char* __fconv(char* a) { #if HTS_DOSNAME -char* __fconv(char* a) { int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='/') // convertir - a[i]='\\'; + for(i = 0 ; a[i] != 0 ; i++) + if (a[i] == '/') // Unix-to-DOS style + a[i] = '\\'; +#endif return a; } -char* fconcat(char* a,char* b) { - return __fconv(concat(a,b)); + +HTSEXT_API char* fconcat(char *catbuff, const char* a, const char* b) { + return __fconv(concat(catbuff,a,b)); } -char* fconv(char* a) { - return __fconv(concat(a,"")); +HTSEXT_API char* fconv(char *catbuff, const char* a) { + return __fconv(concat(catbuff,a,"")); } -#endif /* / et \\ en / */ -char* __fslash(char* a) { +static char* __fslash(char* a) { int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='\\') // convertir - a[i]='/'; + for(i = 0 ; a[i] != 0 ; i++) + if (a[i] == '\\') // convertir + a[i] = '/'; return a; } -char* fslash(char* a) { - return __fslash(concat(a,"")); +char* fslash(char *catbuff, const char* a) { + return __fslash(concat(catbuff,a,NULL)); } // conversion minuscules, avec buffer -char* convtolower(char* a) { - concat_strc* strc; - NOSTATIC_RESERVE(strc, concat_strc, 1); - strc->rol=((strc->rol+1)%16); // roving pointer - strcpybuff(strc->buff[strc->rol],a); - hts_lowcase(strc->buff[strc->rol]); // lower case - return strc->buff[strc->rol]; +char* convtolower(char *catbuff, const char* a) { + strcpybuff(catbuff,a); + hts_lowcase(catbuff); // lower case + return catbuff; } // conversion en minuscules @@ -3957,18 +3958,18 @@ HTS_INLINE int is_realspace(char c) { // deviner type d'un fichier local.. // ex: fil="toto.gif" -> s="image/gif" -void guess_httptype(char *s,const char *fil) { - get_httptype(s, fil, 1); +void guess_httptype(httrackp *opt,char *s,const char *fil) { + get_httptype(opt,s, fil, 1); } // idem // flag: 1 si toujours renvoyer un type -void get_httptype(char *s,const char *fil,int flag) { +HTSEXT_API void get_httptype(httrackp *opt,char *s,const char *fil,int flag) { // userdef overrides get_httptype - if (get_userhttptype(0, s, fil)) { + if (get_userhttptype(opt, s, fil)) { return ; } // regular tests - if (ishtml(fil) == 1) { + if (ishtml(opt,fil) == 1) { strcpybuff(s,"text/html"); } else { /* Check html -> text/html */ @@ -3999,26 +4000,21 @@ void get_httptype(char *s,const char *fil,int flag) { // get type of fil (php) // s: buffer (text/html) or NULL // return: 1 if known by user -int get_userhttptype(int setdefs, char *s, const char *fil) { - char** buffer=NULL; - NOSTATIC_RESERVE(buffer, char*, 1); - if (setdefs) { - *buffer=s; - return 1; - } else { +int get_userhttptype(httrackp *opt, char *s, const char *fil) { + if (s != NULL) { if (s) s[0]='\0'; if (fil == NULL || *fil == '\0') return 0; #if 1 - if (*buffer) { + if (StringLength(opt->mimedefs) > 0) { /* Check --assume foooo/foo/bar.cgi=text/html, then foo/bar.cgi=text/html, then bar.cgi=text/html */ /* also: --assume baz,bar,foooo/foo/bar.cgi=text/html */ /* start from path begining */ do { - char* next; - char* mimedefs = *buffer; /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */ + const char* next; + const char* mimedefs = StringBuff(opt->mimedefs); /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */ while(*mimedefs != '\0') { const char* segment = fil + 1; if (*mimedefs == '\n') { @@ -4139,12 +4135,13 @@ void give_mimext(char *s,const char *st) { // 0 : non // 1 : oui // 2 : html -int is_knowntype(const char *fil) { +HTSEXT_API int is_knowntype(httrackp *opt,const char *fil) { + char catbuff[CATBUFF_SIZE]; const char *ext; int j=0; if (!fil) return 0; - ext = get_ext(fil); + ext = get_ext(catbuff, fil); while(strnotempty(hts_mime[j][1])) { if (strfield2(hts_mime[j][1], ext)) { if (strfield2(hts_mime[j][0], "text/html")) @@ -4156,16 +4153,15 @@ int is_knowntype(const char *fil) { } // Known by user? - return (is_userknowntype(fil)); + return (is_userknowntype(opt,fil)); } // extension : html,gif.. -char* get_ext(const char *fil) { - char* fil_noquery; +HTSEXT_API char* get_ext(char *catbuff, const char *fil) { const char *a=fil+strlen(fil)-1; - NOSTATIC_RESERVE(fil_noquery, char, HTS_URLMAXSIZE*2); while ( (*a!='.') && (*a!='/') && (a>fil)) a--; if (*a=='.') { + char fil_noquery[HTS_URLMAXSIZE*2]; char* b; fil_noquery[0]='\0'; a++; // pointer sur extension @@ -4173,7 +4169,7 @@ char* get_ext(const char *fil) { b=strchr(fil_noquery,'?'); if (b) *b='\0'; - return concat(fil_noquery,""); + return concat(catbuff,fil_noquery,""); } else return ""; @@ -4184,14 +4180,14 @@ char* get_ext(const char *fil) { // 2 : html // setdefs : set mime buffer: // file=(char*) "asp=text/html\nphp=text/html\n" -int is_userknowntype(const char *fil) { +HTSEXT_API int is_userknowntype(httrackp *opt,const char *fil) { char BIGSTK mime[1024]; if (!fil) return 0; if (!strnotempty(fil)) return 0; mime[0]='\0'; - get_userhttptype(0,mime,fil); + get_userhttptype(opt, mime, fil); if (!strnotempty(mime)) return 0; else if (strfield2(mime,"text/html")) @@ -4202,7 +4198,7 @@ int is_userknowntype(const char *fil) { // page dynamique? // is_dyntype(get_ext("foo.asp")) -int is_dyntype(const char *fil) { +HTSEXT_API int is_dyntype(const char *fil) { int j=0; if (!fil) return 0; @@ -4219,10 +4215,10 @@ int is_dyntype(const char *fil) { // types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne // connaissent pas le type -int may_unknown(const char* st) { +int may_unknown(httrackp *opt,const char* st) { int j=0; // types média - if (may_be_hypertext_mime(st, "")) { + if (may_be_hypertext_mime(opt,st, "")) { return 1; } while(strnotempty(hts_mime_keep[j])) { @@ -4258,10 +4254,11 @@ void fprintfio(FILE* fp,char* buff,char* prefix) { } /* Le fichier existe-t-il? (ou est-il accessible?) */ -int fexist(char* s) { +int fexist(const char* s) { + char catbuff[CATBUFF_SIZE]; struct stat st; memset(&st, 0, sizeof(st)); - if (stat(s, &st) == 0) { + if (stat(fconv(catbuff,s), &st) == 0) { if (S_ISREG(st.st_mode)) { return 1; } @@ -4272,13 +4269,14 @@ int fexist(char* s) { /* Taille d'un fichier, -1 si n'existe pas */ /* fp->_cnt ne fonctionne pas sur toute les plate-formes :-(( */ /* Note: NOT YET READY FOR 64-bit */ -INTsys fsize(char* s) { - FILE* fp; +off_t fsize(const char* s) { + char catbuff[CATBUFF_SIZE]; + FILE* fp; if (strnotempty(s)==0) // nom vide: erreur return -1; - fp=fopen(fconv(s),"rb"); + fp=fopen(fconv(catbuff,s),"rb"); if (fp!=NULL) { - INTsys i; + off_t i; fseek(fp,0,SEEK_END); #ifdef HTS_FSEEKO i=ftello(fp); @@ -4287,11 +4285,12 @@ INTsys fsize(char* s) { #endif fclose(fp); return i; - } else return -1; + } else + return -1; } -INTsys fpsize(FILE* fp) { - INTsys oldpos,size; +off_t fpsize(FILE* fp) { + off_t oldpos,size; if (!fp) return -1; #ifdef HTS_FSEEKO @@ -4317,7 +4316,6 @@ typedef struct { } hts_rootdir_strc; HTSEXT_API char* hts_rootdir(char* file) { static hts_rootdir_strc strc = {"", 0}; - //NOSTATIC_RESERVE(strc, hts_rootdir_strc, 1); if (file) { if (!strc.init) { strc.path[0]='\0'; @@ -4470,10 +4468,12 @@ HTS_INLINE int hts_read(htsblk* r,char* buff,int size) { #if HTS_DNSCACHE // 'capsule' contenant uniquement le cache -t_dnscache* _hts_cache(void) { - t_dnscache* cache; - NOSTATIC_RESERVE(cache, t_dnscache, 1); - return cache; +t_dnscache* _hts_cache(httrackp *opt) { + if (opt->state.dns_cache == NULL) { + opt->state.dns_cache = (t_dnscache*)malloct(sizeof(t_dnscache)); + memset(opt->state.dns_cache, 0, sizeof(t_dnscache)); + } + return opt->state.dns_cache; } // free the cache static void hts_cache_free_(t_dnscache* cache) { @@ -4485,10 +4485,10 @@ static void hts_cache_free_(t_dnscache* cache) { } } void hts_cache_free(t_dnscache* cache) { - if (cache != NULL && cache->n != NULL) { - hts_cache_free_(cache->n); - cache->n = NULL; - } + if (cache != NULL && cache->n != NULL) { + hts_cache_free_(cache->n); + cache->n = NULL; + } } // lock le cache dns pour tout opération d'ajout @@ -4496,104 +4496,86 @@ void hts_cache_free(t_dnscache* cache) { // -1: status? 0: libérer 1:locker /* - Simple lock function for cache - - Return value: always 0 - Parameter: - 1 wait for lock (mutex) available and lock it - 0 unlock the mutex - [-1 check if locked (always return 0 with mutex)] - -999 initialize + Simple lock for cache */ -#if USE_BEGINTHREAD -int _hts_lockdns(int i) { - static PTHREAD_LOCK_TYPE hMutex; - return htsSetLock(&hMutex,i); -} -#else -int _hts_lockdns(int i) { - int l=0; - if (i>=0) - l=i; - return l; -} -#endif +htsmutex dns_lock = HTSMUTEX_INIT; // routine pour le cache - retour optionnel à donner à chaque fois // NULL: nom non encore testé dans le cache // si h_length==0 alors le nom n'existe pas dans le dns -t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour) { - // attendre que le cache dns soit prêt - //while(_hts_lockdns(-1)); // attendre libération - _hts_lockdns(1); // locker - - while(1) { - if (strcmp(cache->iadr,iadr)==0) { // ok trouvé - if (cache->host_length>0) { // entrée valide +t_hostent* _hts_ghbn(t_dnscache* cache,const char* iadr,t_hostent* retour) { + t_hostent* ret = NULL; + hts_mutexlock(&dns_lock); + for(;;) { + if (strcmp(cache->iadr,iadr) == 0) { // ok trouvé + if (cache->host_length > 0) { // entrée valide if (retour->h_addr_list[0]) memcpy(retour->h_addr_list[0], cache->host_addr, cache->host_length); retour->h_length=cache->host_length; - } else if (cache->host_length==0) { // en cours - _hts_lockdns(0); // délocker - return NULL; + } else if (cache->host_length == 0) { // en cours + ret = NULL; + break; } else { // erreur dans le dns, déja vérifié if (retour->h_addr_list[0]) retour->h_addr_list[0][0]='\0'; retour->h_length=0; // erreur, n'existe pas } - _hts_lockdns(0); // délocker - return retour; + ret = retour; + break; } else { // on a pas encore trouvé if (cache->n!=NULL) { // chercher encore - cache=cache->n; // suivant! + cache = cache->n; // suivant! } else { - _hts_lockdns(0); // délocker - return NULL; // non présent + ret = NULL; + break; } } } + hts_mutexrelease(&dns_lock); + return ret; } // tester si iadr a déja été testé (ou en cours de test) // 0 non encore // 1 ok // 2 non présent -int hts_dnstest(char* _iadr) { - char* iadr; - t_dnscache* cache=_hts_cache(); // adresse du cache - NOSTATIC_RESERVE(iadr, char, HTS_URLMAXSIZE*2); +int hts_dnstest(httrackp *opt, const char* _iadr) { + int ret = 0; + t_dnscache* cache=_hts_cache(opt); // adresse du cache + char iadr[HTS_URLMAXSIZE*2]; // sauter user:pass@ éventuel - strcpybuff(iadr,jump_identification(_iadr)); + strcpybuff(iadr, jump_identification(_iadr)); // couper éventuel : { char *a; - if ( (a=jump_toport(iadr)) ) + if ( (a = jump_toport(iadr)) ) *a='\0'; } -#if HTS_WIN +#ifdef _WIN32 if (inet_addr(iadr)!=INADDR_NONE) // numérique #else if (inet_addr(iadr)!=(in_addr_t) -1 ) // numérique #endif return 1; - // while(_hts_lockdns(-1)); // attendre libération - _hts_lockdns(1); // locker - while(1) { - if (strcmp(cache->iadr,iadr)==0) { // ok trouvé - _hts_lockdns(0); // délocker - return 1; // présent! + hts_mutexlock(&dns_lock); + for(;;) { + if (strcmp(cache->iadr, iadr)==0) { // ok trouvé + ret = 1; + break; } else { // on a pas encore trouvé if (cache->n!=NULL) { // chercher encore cache=cache->n; // suivant! } else { - _hts_lockdns(0); // délocker - return 2; // non présent + ret = 2; // non présent + break ; } } } + hts_mutexrelease(&dns_lock); + return ret; } @@ -4655,7 +4637,7 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { if (res) { if ( (res->ai_addr) && (res->ai_addrlen) && (res->ai_addrlen <= buffer->addr_maxlen) ) { memcpy(buffer->hp.h_addr_list[0], res->ai_addr, res->ai_addrlen); - buffer->hp.h_length = res->ai_addrlen; + buffer->hp.h_length = (short) res->ai_addrlen; freeaddrinfo(res); return &(buffer->hp); } @@ -4671,10 +4653,10 @@ HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer) { } // cache dns interne à HTS // ** FREE A FAIRE sur la chaine -t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { +t_hostent* hts_gethostbyname(httrackp *opt,const char* _iadr, void* v_buffer) { char BIGSTK iadr[HTS_URLMAXSIZE*2]; t_fullhostent* buffer = (t_fullhostent*) v_buffer; - t_dnscache* cache=_hts_cache(); // adresse du cache + t_dnscache* cache=_hts_cache(opt); // adresse du cache t_hostent* hp; /* Clear */ @@ -4718,7 +4700,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { #endif { unsigned long inetaddr; -#if HTS_WIN +#ifdef _WIN32 if ((inetaddr=inet_addr(iadr))==INADDR_NONE) { #else if ((inetaddr=inet_addr(iadr))==(in_addr_t) -1 ) { @@ -4755,7 +4737,7 @@ t_hostent* hts_gethostbyname(char* _iadr, void* v_buffer) { } #else -HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) { +HTS_INLINE t_hostent* hts_gethostbyname(httrackp *opt,char* iadr, t_fullhostent* buffer) { t_hostent* retour; #if HTS_WIDE_DEBUG DEBUG_W("gethostbyname (2)\n"); @@ -4778,7 +4760,7 @@ HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, t_fullhostent* buffer) { #define htsLocker(A, N) do {} while(0) static mlink trmalloc = {NULL,0,0,NULL}; static int trmalloc_id=0; -static PTHREAD_LOCK_TYPE* mallocMutex = NULL; +static htsmutex* mallocMutex = NULL; static void hts_meminit(void) { //if (mallocMutex == NULL) { // mallocMutex = calloc(sizeof(*mallocMutex), 1); @@ -4966,30 +4948,32 @@ void cut_path(char* fullpath,char* path,char* pname) { // -- Gestion protocole ftp -- -#if HTS_WIN +#ifdef _WIN32 int ftp_available(void) { return 1; } #else int ftp_available(void) { return 1; // ok! - //return 0; // SOUS UNIX, PROBLEMES + //return 0; // SOUS UNIX, PROBLEMESs } #endif int hts_dgb_init = 0; FILE* hts_dgb_init_fp = NULL; -static void hts_dgb(char* msg); HTSEXT_API void hts_debug(int level) { hts_dgb_init = level; if (hts_dgb_init > 0) { - hts_dgb("hts_debug() called"); + HTS_DBG("hts_debug() called"); } } -static void hts_dgb(char* msg) { - if (hts_dgb_init > 0) { - if (hts_dgb_init_fp == NULL) { + +FILE *hts_dgb_(void) { + if (hts_dgb_init_fp == NULL) { + if ((hts_dgb_init & 0x80) == 0) { + hts_dgb_init_fp = stderr; + } else { #ifdef _WIN32_WCE hts_dgb_init_fp = fopen("\\Temp\\hts-debug.txt", "wb"); #else @@ -4999,17 +4983,28 @@ static void hts_dgb(char* msg) { fprintf(hts_dgb_init_fp, "* Creating file\r\n"); } } - if (hts_dgb_init_fp != NULL) { - fprintf(hts_dgb_init_fp, "%s\r\n", msg); - fflush(hts_dgb_init_fp); - } } + return hts_dgb_init_fp; } +static int hts_init_ok = 0; HTSEXT_API int hts_init(void) { - static int hts_init_ok = 0; + const char *dbg_env; + /* */ + if (hts_init_ok) + return 1; + hts_init_ok = 1; + + /* enable debugging ? */ + dbg_env = getenv("HTS_LOG"); + if (dbg_env != NULL && *dbg_env != 0) { + int level = 0; + if (sscanf(dbg_env, "%d", &level) == 1) { + hts_debug(level); + } + } - hts_dgb("entering hts_init()"); /* debug */ + HTS_DBG("entering hts_init()"); /* debug */ #ifdef _WIN32_WCE #ifndef HTS_CECOMPAT @@ -5017,19 +5012,17 @@ HTSEXT_API int hts_init(void) { #endif #endif - /* Init threads */ - if (!hts_init_ok) { - htsthread_init(); - } + /* Init threads (lazy init) */ + htsthread_init(); /* Ensure external modules are loaded */ - hts_dgb("calling htspe_init()"); /* debug */ - htspe_init(); + HTS_DBG("calling htspe_init()"); /* debug */ + htspe_init(); /* module load (lazy) */ /* MD5 Auto-test */ { char digest[32 + 2]; - unsigned char* atest = (unsigned char*)"MD5 Checksum Autotest"; + const unsigned char* atest = (const unsigned char*)"MD5 Checksum Autotest"; digest[0] = '\0'; domd5mem(atest, strlen(atest), digest, 1); /* a42ec44369da07ace5ec1d660ba4a69a */ if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) { @@ -5038,36 +5031,7 @@ HTSEXT_API int hts_init(void) { } } - hts_dgb("initializing default wrappers"); /* debug */ - if (!hts_init_ok) { - hts_init_ok = 1; - // default wrappers - htswrap_init(); - htswrap_add("init",htsdefault_init); - htswrap_add("free",htsdefault_uninit); - htswrap_add("start",htsdefault_start); - htswrap_add("change-options",htsdefault_chopt); - htswrap_add("end",htsdefault_end); - htswrap_add("preprocess-html",htsdefault_preprocesshtml); - htswrap_add("postprocess-html",htsdefault_postprocesshtml); - htswrap_add("check-html",htsdefault_checkhtml); - htswrap_add("loop",htsdefault_loop); - htswrap_add("query",htsdefault_query); - htswrap_add("query2",htsdefault_query2); - htswrap_add("query3",htsdefault_query3); - htswrap_add("check-link",htsdefault_check); - htswrap_add("pause",htsdefault_pause); - htswrap_add("save-file",htsdefault_filesave); - htswrap_add("save-file2",htsdefault_filesave2); - htswrap_add("link-detected",htsdefault_linkdetected); - htswrap_add("link-detected2",htsdefault_linkdetected2); - htswrap_add("transfer-status",htsdefault_xfrstatus); - htswrap_add("save-name",htsdefault_savename); - htswrap_add("send-header",htsdefault_sendheader); - htswrap_add("receive-header",htsdefault_receiveheader); - } - - hts_dgb("initializing SSL"); /* debug */ + HTS_DBG("initializing SSL"); /* debug */ #if HTS_USEOPENSSL /* Initialize the OpensSSL library @@ -5088,96 +5052,522 @@ HTSEXT_API int hts_init(void) { } #endif - /* Init vars and thread-specific values */ - hts_dgb("initializing variables"); /* debug */ - hts_initvar(); - - /* initialiser structcheck */ - // structcheck_init(1); - - hts_dgb("ending hts_init()"); /* debug */ + HTS_DBG("ending hts_init()"); /* debug */ return 1; } + +/* will not free thread env. */ HTSEXT_API int hts_uninit(void) { - //htsthread_uninit(); - hts_cache_free(_hts_cache()); - hts_freevar(); - /* htswrap_free(); */ + /* hts_init() is a lazy initializer, with limited a allocation (one or two mutexes) ; + we won't free anything here as the .h semantic was never being very clear */ return 1; } +HTSEXT_API int hts_uninit_module(void) { + if (!hts_init_ok) + return 1; + htsthread_uninit(); + htspe_uninit(); + hts_init_ok = 0; + return 1; +} + +HTSEXT_API int hts_log(httrackp *opt, const char* prefix, const char *msg) { + if (opt->log != NULL) { + fspc(opt, opt->log, prefix); + fprintf(opt->log, "%s"LF, msg); + return 0; + } + return 1; /* Error */ +} + +HTSEXT_API void set_wrappers(httrackp *opt) { // LEGACY +} + +HTSEXT_API int plug_wrapper(httrackp *opt, const char *moduleName, const char* argv) { + void* handle = openFunctionLib(moduleName); + if (handle != NULL) { + t_hts_plug plug = (t_hts_plug) getFunctionPtr(handle, "hts_plug"); + t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(handle, "hts_unplug"); + if (plug != NULL) { + int ret = plug(opt, argv); + if (hts_dgb_init > 0 && opt->log != NULL) { + HTS_DBG("plugged module '%s' (return code=%d)" _ moduleName _ ret); + } + if (ret == 1) { /* Success! */ + opt->libHandles.handles = (htslibhandle*) realloct(opt->libHandles.handles, ( opt->libHandles.count + 1 )*sizeof(htslibhandle)); + opt->libHandles.handles[opt->libHandles.count].handle = handle; + opt->libHandles.handles[opt->libHandles.count].moduleName = strdupt(moduleName); + opt->libHandles.count++; + return 1; + } else { + HTS_DBG("* note: error while running entry point 'hts_plug' in %s"LF _ moduleName); + if (unplug) + unplug(opt); + } + } else { + int last_errno = errno; + HTS_DBG("* note: can't find entry point 'hts_plug' in %s: %s"LF _ moduleName _ strerror(last_errno)); + } + closeFunctionLib(handle); + return 0; + } else { + int last_errno = errno; + HTS_DBG("* note: can't load %s: %s"LF _ moduleName _ strerror(last_errno)); + } + return -1; +} + +static void unplug_wrappers(httrackp *opt) { + if (opt->libHandles.handles != NULL) { + int i; + for(i = 0 ; i < opt->libHandles.count ; i++) { + if (opt->libHandles.handles[i].handle != NULL) { + /* hts_unplug(), the dll exit point (finalizer) */ + t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(opt->libHandles.handles[i].handle, "hts_unplug"); + if (unplug != NULL) + unplug(opt); + closeFunctionLib(opt->libHandles.handles[i].handle); + opt->libHandles.handles[i].handle = NULL; + } + if (opt->libHandles.handles[i].moduleName != NULL) { + freet(opt->libHandles.handles[i].moduleName); + opt->libHandles.handles[i].moduleName = NULL; + } + } + freet(opt->libHandles.handles); + opt->libHandles.handles = NULL; + opt->libHandles.count = 0; + } +} + +int multipleStringMatch(const char *s, const char *match) { + int ret = 0; + String name = STRING_EMPTY; + if (match == NULL || s == NULL || *s == 0) + return 0; + for( ; *match != 0 ; match++) { + StringClear(name); + for( ; *match != 0 && *match != '\n' ; match++) { + StringAddchar(name, *match); + } + if (StringLength(name) > 0 && strstr(s, StringBuff(name)) != NULL) { + ret = 1; + break ; + } + } + StringFree(name); + return ret; +} + +HTSEXT_API httrackp *hts_create_opt(void) { +#ifdef _WIN32 + static const char *defaultModules[] = { + "htsswf", "htsjava", "httrack-plugin", NULL + }; +#else + static const char *defaultModules[] = { + "libhtsswf.so.1", "libhtsjava.so.2", "httrack-plugin", NULL + }; +#endif + httrackp *opt = malloc(sizeof(httrackp)); + + /* default options */ + memset(opt, 0, sizeof(httrackp)); + opt->size_httrackp = sizeof(httrackp); + + /* mutexes */ + hts_mutexinit(&opt->state.lock); + + /* custom wrappers */ + opt->libHandles.count = 0; + + /* default settings */ + + opt->wizard=2; // wizard automatique + opt->quiet=0; // questions + // + opt->travel=0; // même adresse + opt->depth=9999; // mirror total par défaut + opt->extdepth=0; // mais pas à l'extérieur + opt->seeker=1; // down + opt->urlmode=2; // relatif par défaut + opt->debug=0; // pas de débug en plus + opt->getmode=3; // linear scan + opt->maxsite=-1; // taille max site (aucune) + opt->maxfile_nonhtml=-1; // taille max fichier non html + opt->maxfile_html=-1; // idem pour html + opt->maxsoc=4; // nbre socket max + opt->fragment=-1; // pas de fragmentation + opt->nearlink=0; // ne pas prendre les liens non-html "adjacents" + opt->makeindex=1; // faire un index + opt->kindex=0; // index 'keyword' + opt->delete_old=1; // effacer anciens fichiers + opt->makestat=0; // pas de fichier de stats + opt->maketrack=0; // ni de tracking + opt->timeout=120; // timeout par défaut (2 minutes) + opt->cache=1; // cache prioritaire + opt->shell=0; // pas de shell par defaut + opt->proxy.active=0; // pas de proxy + opt->user_agent_send=1; // envoyer un user-agent + StringCopy(opt->user_agent, "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"); + StringCopy(opt->referer, ""); + StringCopy(opt->from, ""); + opt->savename_83=0; // noms longs par défaut + opt->savename_type=0; // avec structure originale + opt->savename_delayed=2;// hard delayed type (default) + opt->delayed_cached=1; // cached delayed type (default) + opt->mimehtml=0; // pas MIME-html + opt->parsejava=HTSPARSE_DEFAULT; // parser classes + opt->hostcontrol=0; // PAS de control host pour timeout et traffic jammer + opt->retry=2; // 2 retry par défaut + opt->errpage=1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.) + opt->check_type=1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html + opt->all_in_cache=0; // ne pas tout stocker en cache + opt->robots=2; // traiter les robots.txt + opt->external=0; // liens externes normaux + opt->passprivacy=0; // mots de passe dans les fichiers + opt->includequery=1; // include query-string par défaut + opt->mirror_first_page=0; // pas mode mirror links + opt->accept_cookie=1; // gérer les cookies + opt->cookie=NULL; + opt->http10=0; // laisser http/1.1 + opt->nokeepalive = 0; // pas keep-alive + opt->nocompression=0; // pas de compression + opt->tolerant=0; // ne pas accepter content-length incorrect + opt->parseall=1; // tout parser (tags inconnus, par exemple) + opt->parsedebug=0; // pas de mode débuggage + opt->norecatch=0; // ne pas reprendre les fichiers effacés par l'utilisateur + opt->verbosedisplay=0; // pas d'animation texte + opt->sizehack=0; // size hack + opt->urlhack=1; // url hack (normalizer) + StringCopy(opt->footer,HTS_DEFAULT_FOOTER); + opt->ftp_proxy=1; // proxy http pour ftp + StringCopy(opt->filelist,""); + StringCopy(opt->lang_iso,"en, *"); + StringCopy(opt->mimedefs,"\n"); // aucun filtre mime (\n IMPORTANT) + StringClear(opt->mod_blacklist); + // + opt->log = stdout; + opt->errlog = stderr; + opt->flush = 1; // flush sur les fichiers log + //opt->aff_progress=0; + opt->keyboard=0; + // + StringCopy(opt->path_html,""); + StringCopy(opt->path_log,""); + StringCopy(opt->path_bin,""); + // +#if HTS_SPARE_MEMORY==0 + opt->maxlink=100000; // 100,000 liens max par défaut (400Kb) + opt->maxfilter=200; // 200 filtres max par défaut +#else + opt->maxlink=10000; // 10,000 liens max par défaut (40Kb) + opt->maxfilter=50; // 50 filtres max par défaut +#endif + opt->maxcache=1048576*32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT -- + //opt->maxcache_anticipate=256; // maximum de liens à anticiper + opt->maxtime=-1; // temps max en secondes +#if HTS_USEMMS + opt->mms_maxtime = 60*3600; // max time for mms streams (one hour) +#endif + opt->maxrate=25000; // taux maxi + opt->maxconn=5.0; // nombre connexions/s + opt->waittime=-1; // wait until.. hh*3600+mm*60+ss + // + opt->exec=""; + opt->is_update=0; // not an update (yet) + opt->dir_topindex=0; // do not built top index (yet) + // + opt->bypass_limits=0; // enforce limits by default + opt->state.stop=0; // stopper + opt->state.exit_xh=0; // abort + + /* Alocated buffers */ + + opt->callbacks_fun = (t_hts_htmlcheck_callbacks*) malloct(sizeof(t_hts_htmlcheck_callbacks)); + memset(opt->callbacks_fun, 0, sizeof(t_hts_htmlcheck_callbacks)); + + /* Preload callbacks : java and flash parser, and the automatic user-defined callback */ + + { + int i; + for(i = 0 ; defaultModules[i] != NULL ; i++) { + int ret = plug_wrapper(opt, defaultModules[i], defaultModules[i]); + if (ret == 0) { /* Module aborted initialization */ + /* Ignored. */ + } + } + } + + return opt; +} + +HTSEXT_API void hts_free_opt(httrackp *opt) { + if (opt != NULL) { + + /* Alocated callbacks */ + + if (opt->callbacks_fun != NULL) { + int i; + t_hts_htmlcheck_callbacks_item *items = (t_hts_htmlcheck_callbacks_item*) opt->callbacks_fun; + const int size = (int) sizeof(t_hts_htmlcheck_callbacks) / sizeof(t_hts_htmlcheck_callbacks_item); + assertf(sizeof(t_hts_htmlcheck_callbacks_item)*size == sizeof(t_hts_htmlcheck_callbacks)); + + /* Free all linked lists */ + for(i = 0 ; i < size ; i++) { + t_hts_callbackarg *carg, *next_carg; + for(carg = items[i].carg ; carg != NULL && (next_carg = carg->prev.carg, carg != NULL) ; carg = next_carg ) { + hts_free(carg); + } + } + + freet(opt->callbacks_fun); + opt->callbacks_fun = NULL; + } + + /* Close library handles */ + unplug_wrappers(opt); + + /* Cache */ + if (opt->state.dns_cache != NULL) { + hts_cache_free(opt->state.dns_cache); + opt->state.dns_cache = NULL; + } + + /* Cancel chain */ + if (opt->state.cancel != NULL) { + htsoptstatecancel *cancel; + for(cancel = opt->state.cancel ; cancel != NULL ; ) { + htsoptstatecancel *next = cancel->next; + if (cancel->url != NULL) { + freet(cancel->url); + } + freet(cancel); + cancel = next; + } + opt->state.cancel = NULL; + } + + /* Free strings */ + + StringFree(opt->proxy.name); + StringFree(opt->proxy.bindhost); + + StringFree(opt->savename_userdef); + StringFree(opt->user_agent); + StringFree(opt->referer); + StringFree(opt->from); + StringFree(opt->lang_iso); + StringFree(opt->sys_com); + StringFree(opt->mimedefs); + StringFree(opt->filelist); + StringFree(opt->urllist); + StringFree(opt->footer); + StringFree(opt->mod_blacklist); + + StringFree(opt->path_html); + StringFree(opt->path_log); + StringFree(opt->path_bin); + + /* mutexes */ + hts_mutexfree(&opt->state.lock); + + /* Free structure */ + free(opt); + } +} + // defaut wrappers -void __cdecl htsdefault_init(void) { +static void __cdecl htsdefault_init(t_hts_callbackarg *carg) { } -void __cdecl htsdefault_uninit(void) { - hts_freevar(); +static void __cdecl htsdefault_uninit(t_hts_callbackarg *carg) { + // hts_freevar(); } -int __cdecl htsdefault_start(void* opt) { +static int __cdecl htsdefault_start(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsdefault_chopt(void* opt) { +static int __cdecl htsdefault_chopt(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsdefault_end(void) { +static int __cdecl htsdefault_end(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { +static int __cdecl htsdefault_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_adresse,const char* url_fichier) { return 1; } -int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { +static int __cdecl htsdefault_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_adresse,const char* url_fichier) { return 1; } -int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { +static int __cdecl htsdefault_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_adresse,const char* url_fichier) { return 1; } -int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack +static int __cdecl htsdefault_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack return 1; } -char* __cdecl htsdefault_query(char* question) { +static const char* __cdecl htsdefault_query(t_hts_callbackarg *carg, httrackp *opt, const char* question) { return ""; } -char* __cdecl htsdefault_query2(char* question) { +static const char* __cdecl htsdefault_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question) { return ""; } -char* __cdecl htsdefault_query3(char* question) { +static const char* __cdecl htsdefault_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question) { return ""; } -int __cdecl htsdefault_check(char* adr,char* fil,int status) { +static int __cdecl htsdefault_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status) { return -1; } -int __cdecl htsdefault_check_mime(char* adr,char* fil,char* mime,int status) { +static int __cdecl htsdefault_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status) { return -1; } -void __cdecl htsdefault_pause(char* lockfile) { +static void __cdecl htsdefault_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile) { while (fexist(lockfile)) { Sleep(1000); } } -void __cdecl htsdefault_filesave(char* file) { +static void __cdecl htsdefault_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file) { } -void __cdecl htsdefault_filesave2(char* adr, char* file, char* sav, int is_new, int is_modified, int not_updated) { +static void __cdecl htsdefault_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* file, const char* sav, int is_new, int is_modified, int not_updated) { } -int __cdecl htsdefault_linkdetected(char* link) { +static int __cdecl htsdefault_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link) { return 1; } -int __cdecl htsdefault_linkdetected2(char* link, char* start_tag) { +static int __cdecl htsdefault_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag) { return 1; } -int __cdecl htsdefault_xfrstatus(void* back) { +static int __cdecl htsdefault_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, lien_back* back) { return 1; } -int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { +static int __cdecl htsdefault_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save) { return 1; } -int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { +static int __cdecl htsdefault_sendhead(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing) { return 1; } -int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { +static int __cdecl htsdefault_receivehead(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming) { return 1; } +static int __cdecl htsdefault_detect(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str) { + return 0; +} +static int __cdecl htsdefault_parse(t_hts_callbackarg *carg, httrackp *opt, htsmoduleStruct* str) { + return 0; +} + + +/* Default internal dummy callbacks */ +const t_hts_htmlcheck_callbacks default_callbacks = { + { htsdefault_init, NULL }, + { htsdefault_uninit, NULL }, + { htsdefault_start, NULL }, + { htsdefault_end, NULL }, + { htsdefault_chopt, NULL }, + { htsdefault_preprocesshtml, NULL }, + { htsdefault_postprocesshtml, NULL }, + { htsdefault_checkhtml, NULL }, + { htsdefault_query, NULL }, + { htsdefault_query2, NULL }, + { htsdefault_query3, NULL }, + { htsdefault_loop, NULL }, + { htsdefault_check, NULL }, + { htsdefault_check_mime, NULL }, + { htsdefault_pause, NULL }, + { htsdefault_filesave, NULL }, + { htsdefault_filesave2, NULL }, + { htsdefault_linkdetected, NULL }, + { htsdefault_linkdetected2, NULL }, + { htsdefault_xfrstatus, NULL }, + { htsdefault_savename, NULL }, + { htsdefault_sendhead, NULL }, + { htsdefault_receivehead, NULL }, + { htsdefault_detect, NULL }, + { htsdefault_parse, NULL } +}; + +#define CHARCAST(A) ( (char*) (A) ) +#define OFFSET_OF(TYPE, MEMBER) ( (size_t) ( CHARCAST(&(((TYPE*) NULL)->MEMBER)) - CHARCAST((TYPE*) NULL) ) ) +#define CALLBACK_REF(name, fun) \ + { name, OFFSET_OF(t_hts_htmlcheck_callbacks, fun) } +#define MEMBER_OF(STRUCT, OFFSET, TYPE) ( * ((TYPE*)((char*)(STRUCT) + (OFFSET))) ) + +const t_hts_callback_ref default_callbacks_ref[] = { + CALLBACK_REF("init", init), + CALLBACK_REF("free", uninit), + CALLBACK_REF("start", start), + CALLBACK_REF("end", end), + CALLBACK_REF("change-options", chopt), + CALLBACK_REF("preprocess-html", preprocess), + CALLBACK_REF("postprocess-html", postprocess), + CALLBACK_REF("check-html", check_html), + CALLBACK_REF("query", query), + CALLBACK_REF("query2", query2), + CALLBACK_REF("query3", query3), + CALLBACK_REF("loop", loop), + CALLBACK_REF("check-link", check_link), + CALLBACK_REF("check-mime", check_mime), + CALLBACK_REF("pause", pause), + CALLBACK_REF("save-file", filesave), + CALLBACK_REF("save-file2", filesave2), + CALLBACK_REF("link-detected", linkdetected), + CALLBACK_REF("link-detected2", linkdetected2), + CALLBACK_REF("transfer-status", xfrstatus), + CALLBACK_REF("save-name", savename), + CALLBACK_REF("send-header", sendhead), + CALLBACK_REF("receive-header", receivehead), + { NULL, 0 } +}; + +size_t hts_get_callback_offs(const char *name) { + const t_hts_callback_ref *ref; + for(ref = &default_callbacks_ref[0] ; ref->name != NULL ; ref++) { + if (strcmp(name, ref->name) == 0) { + return ref->offset; + } + } + return (size_t)(-1); +} + +int hts_set_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name, void *function) { + size_t offs = hts_get_callback_offs(name); + if (offs != (size_t) -1) { + MEMBER_OF(callbacks, offs, void*) = function; + return 0; + } + return 1; +} + +void *hts_get_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name) { + size_t offs = hts_get_callback_offs(name); + if (offs != (size_t) -1) { + return MEMBER_OF(callbacks, offs, void*); + } + return NULL; +} + // end defaut wrappers +/* libc stubs */ +HTSEXT_API char* hts_strdup(const char* str) { + return strdup(str); +} + +HTSEXT_API void* hts_malloc(size_t size) { + return malloc(size); +} + +HTSEXT_API void* hts_realloc(void* data, size_t size) { + return realloc(data, size); +} + +HTSEXT_API void hts_free(void* data) { + free(data); +} + +/* Dummy functions */ +HTSEXT_API int hts_resetvar(void) { + return 0; +} // Fin diff --git a/src/htslib.h b/src/htslib.h index 2a720da..1061aee 100644 --- a/src/htslib.h +++ b/src/htslib.h @@ -39,6 +39,20 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_DEFH #define HTS_DEFH +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_htsrequest +#define HTS_DEF_FWSTRUCT_htsrequest +typedef struct htsrequest htsrequest; +#endif +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_dnscache +#define HTS_DEF_FWSTRUCT_t_dnscache +typedef struct t_dnscache t_dnscache; +#endif + /* définitions globales */ #include "htsglobal.h" @@ -46,6 +60,7 @@ Please visit our Website: http://www.httrack.com #include "htsbase.h" #include "htsbasenet.h" #include "htsnet.h" +#include "htsdefines.h" /* cookies et auth */ #include "htsbauth.h" @@ -61,8 +76,29 @@ Please visit our Website: http://www.httrack.com #define READ_TIMEOUT (-3) #define READ_INTERNAL_ERROR (-4) +/* concat */ +HTS_STATIC char* getHtsOptBuff_(httrackp *opt) { + opt->state.concat.index = ( opt->state.concat.index + 1 ) % 16; + return opt->state.concat.buff[opt->state.concat.index]; +} +#define OPT_GET_BUFF(OPT) ( getHtsOptBuff_(OPT) ) + // structure pour paramètres supplémentaires lors de la requête -typedef struct htsrequest { +#ifndef HTS_DEF_FWSTRUCT_htsrequest_proxy +#define HTS_DEF_FWSTRUCT_htsrequest_proxy +typedef struct htsrequest_proxy htsrequest_proxy; +#endif +struct htsrequest_proxy { + int active; + char name[1024]; + int port; + char bindhost[256]; // bind this host +}; +#ifndef HTS_DEF_FWSTRUCT_htsrequest +#define HTS_DEF_FWSTRUCT_htsrequest +typedef struct htsrequest htsrequest; +#endif +struct htsrequest { short int user_agent_send; // user agent (ex: httrack/1.0 [sun]) short int http11; // l'en tête peut (doit) être signé HTTP/1.1 et non HTTP/1.0 short int nokeepalive; // pas de keep-alive @@ -73,12 +109,16 @@ typedef struct htsrequest { char referer[256]; char from[256]; char lang_iso[64]; - t_proxy proxy; // proxy -} htsrequest; + htsrequest_proxy proxy; // proxy +}; // structure pour retour d'une connexion/prise d'en tête -typedef struct htsblk { +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +struct htsblk { int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) short int notmodified; // page ou fichier NON modifié (transféré) short int is_write; // sortie sur disque (out) ou en mémoire (adr) @@ -117,16 +157,24 @@ typedef struct htsblk { /* */ htsrequest req; // paramètres pour la requête /*char digest[32+2]; // digest md5 généré par le moteur ("" si non généré)*/ -} htsblk; +}; /* ANCIENNE STURCTURE pour cache 1.0 */ -typedef struct { +#ifndef HTS_DEF_FWSTRUCT_OLD_t_proxy +#define HTS_DEF_FWSTRUCT_OLD_t_proxy +typedef struct OLD_t_proxy OLD_t_proxy; +#endif +struct OLD_t_proxy { int active; char name[1024]; int port; -} OLD_t_proxy; -typedef struct { +}; +#ifndef HTS_DEF_FWSTRUCT_OLD_htsblk +#define HTS_DEF_FWSTRUCT_OLD_htsblk +typedef struct OLD_htsblk OLD_htsblk; +#endif +struct OLD_htsblk { int statuscode; // ANCIENNE STURCTURE - status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) int notmodified; // ANCIENNE STURCTURE - page ou fichier NON modifié (transféré) int is_write; // ANCIENNE STURCTURE - sortie sur disque (out) ou en mémoire (adr) @@ -144,24 +192,29 @@ typedef struct { int user_agent_send; // ANCIENNE STURCTURE - user agent (ex: httrack/1.0 [sun]) char user_agent[64]; int http11; // ANCIENNE STURCTURE - l'en tête doit être signé HTTP/1.1 et non HTTP/1.0 -} OLD_htsblk; +}; /* fin ANCIENNE STURCTURE pour cache 1.0 */ // cache pour le dns, pour éviter de faire des gethostbyname sans arrêt -typedef struct t_dnscache { +#ifndef HTS_DEF_FWSTRUCT_t_dnscache +#define HTS_DEF_FWSTRUCT_t_dnscache +typedef struct t_dnscache t_dnscache; +#endif +struct t_dnscache { char iadr[1024]; struct t_dnscache* n; char host_addr[HTS_MAXADDRLEN]; // 4 octets (v4), ou 16 octets (v6) int host_length; // 4 normalement - ==0 alors en cours de résolution // ou >16 si sockaddr // ==-1 alors erreur (host n'éxiste pas) -} t_dnscache; - +}; /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE +extern htsmutex dns_lock; + // fonctions unix/winsock int hts_read(htsblk* r,char* buff,int size); //int HTS_TOTAL_RECV_CHECK(int var); @@ -170,21 +223,45 @@ LLint check_downloadable_bytes(int rate); #ifndef HTTRACK_DEFLIB HTSEXT_API int hts_init(void); HTSEXT_API int hts_uninit(void); +HTSEXT_API int hts_uninit_module(void); +HTSEXT_API int hts_resetvar(void); /* dummy */ +HTSEXT_API void hts_debug(int level); +HTSEXT_API httrackp* hts_create_opt(void); +HTSEXT_API void hts_free_opt(httrackp *opt); +HTSEXT_API void set_wrappers(httrackp *opt); /* LEGACY */ +HTSEXT_API int plug_wrapper(httrackp *opt, const char *moduleName, const char* argv); + +HTSEXT_API char* hts_strdup(const char* string); +HTSEXT_API void* hts_malloc(size_t size); +HTSEXT_API void* hts_realloc(void* data, size_t size); +HTSEXT_API void hts_free(void* data); #endif +extern int hts_dgb_init; +extern FILE* hts_dgb_(void); +#undef _ +#define _ , +#define HTS_DBG(FMT) do { \ + if (hts_dgb_init > 0) { \ + FILE *fp = hts_dgb_(); \ + fprintf(fp, FMT); \ + fprintf(fp, "\n"); \ + fflush(fp); \ + } \ +} while(0) // fonctions principales -int http_fopen(char* adr,char* fil,htsblk* retour); -int http_xfopen(int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour); -int http_sendhead(t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour); -htsblk httpget(char* url); +int http_fopen(httrackp *opt,char* adr,char* fil,htsblk* retour); +int http_xfopen(httrackp *opt,int mode,int treat,int waitconnect,char* xsend,char* adr,char* fil,htsblk* retour); +int http_sendhead(httrackp *opt,t_cookie* cookie,int mode,char* xsend,char* adr,char* fil,char* referer_adr,char* referer_fil,htsblk* retour); +htsblk httpget(httrackp *opt,char* url); //int newhttp(char* iadr,char* err=NULL); -int newhttp(char* iadr,htsblk* retour,int port,int waitconnect); +int newhttp(httrackp *opt,const char* iadr,htsblk* retour,int port,int waitconnect); HTS_INLINE void deletehttp(htsblk* r); HTS_INLINE int deleteaddr(htsblk* r); HTS_INLINE void deletesoc(T_SOC soc); HTS_INLINE void deletesoc_r(htsblk* r); -htsblk http_location(char* adr,char* fil,char* loc); -htsblk http_test(char* adr,char* fil,char* loc); +htsblk http_location(httrackp *opt,char* adr,char* fil,char* loc); +htsblk http_test(httrackp *opt,char* adr,char* fil,char* loc); int check_readinput(htsblk* r); int check_readinput_t(T_SOC soc, int timeout); void http_fread(T_SOC soc,htsblk* retour); @@ -196,20 +273,19 @@ HTSEXT_API void infostatuscode(char* msg,int statuscode); #endif // sous-fonctions -htsblk xhttpget(char* adr,char* fil); -htsblk http_gethead(char* adr,char* fil); +htsblk xhttpget(httrackp *opt,char* adr,char* fil); +htsblk http_gethead(httrackp *opt,char* adr,char* fil); LLint http_xfread1(htsblk* r,int bufl); -HTS_INLINE t_hostent* hts_gethostbyname(char* iadr, void* v_buffer); +HTS_INLINE t_hostent* hts_gethostbyname(httrackp *opt,const char* iadr, void* v_buffer); #ifndef HTTRACK_DEFLIB HTSEXT_API t_hostent* vxgethostbyname(char* hostname, void* v_buffer); #endif -t_hostent* _hts_ghbn(t_dnscache* cache,char* iadr,t_hostent* retour); +t_hostent* _hts_ghbn(t_dnscache* cache,const char* iadr,t_hostent* retour); int ftp_available(void); #if HTS_DNSCACHE void hts_cache_free(t_dnscache* cache); -int hts_dnstest(char* _iadr); -t_dnscache* _hts_cache(void); -int _hts_lockdns(int i); +int hts_dnstest(httrackp *opt, const char* _iadr); +t_dnscache* _hts_cache(httrackp *opt); #endif // outils divers @@ -223,19 +299,19 @@ HTSEXT_API void qsec2str(char *st,TStamp t); #endif void time_gmt_rfc822(char* s); void time_local_rfc822(char* s); -struct tm* convert_time_rfc822(char* s); -int set_filetime(char* file,struct tm* tm_time); -int set_filetime_rfc822(char* file,char* date); -int get_filetime_rfc822(char* file,char* date); +struct tm* convert_time_rfc822(struct tm* buffer, const char* s); +int set_filetime(const char* file,struct tm* tm_time); +int set_filetime_rfc822(const char* file,const char* date); +int get_filetime_rfc822(const char* file,char* date); HTS_INLINE void time_rfc822(char* s,struct tm * A); HTS_INLINE void time_rfc822_local(char* s,struct tm * A); #ifndef HTTRACK_DEFLIB -HTSEXT_API char* int2char(int n); -HTSEXT_API char* int2bytes(LLint n); -HTSEXT_API char* int2bytessec(long int n); -HTSEXT_API char** int2bytes2(LLint n); +HTSEXT_API char* int2char(strc_int2bytes2* strc, int n); +HTSEXT_API char* int2bytes(strc_int2bytes2* strc, LLint n); +HTSEXT_API char* int2bytessec(strc_int2bytes2* strc, long int n); +HTSEXT_API char** int2bytes2(strc_int2bytes2* strc, LLint n); #endif -HTS_INLINE int sendc(htsblk* r, char* s); +HTS_INLINE int sendc(htsblk* r, const char* s); int finput(int fd,char* s,int max); int binput(char* buff,char* s,int max); int linput(FILE* fp,char* s,int max); @@ -245,32 +321,36 @@ int linput_trim(FILE* fp,char* s,int max); int linput_cpp(FILE* fp,char* s,int max); void rawlinput(FILE* fp,char* s,int max); char* strstrcase(char *s,char *o); -int ident_url_absolute(char* url,char* adr,char* fil); +int ident_url_absolute(const char* url,char* adr,char* fil); void fil_simplifie(char* f); int is_unicode_utf8(unsigned char* buffer, unsigned int size); void map_characters(unsigned char* buffer, unsigned int size, unsigned int* map); -int ishtml(const char* urlfil); +int ishtml(httrackp *opt,const char* urlfil); int ishtml_ext(const char* a); int ishttperror(int err); -void guess_httptype(char *s,const char *fil); -void get_httptype(char *s,const char *fil,int flag); -int get_userhttptype(int setdefs,char *s,const char *fil); +void guess_httptype(httrackp *opt,char *s,const char *fil); +#ifndef HTTRACK_DEFLIB +HTSEXT_API void get_httptype(httrackp *opt,char *s,const char *fil,int flag); +#endif +int get_userhttptype(httrackp *opt,char *s,const char *fil); void give_mimext(char *s,const char *st); -int is_knowntype(const char *fil); -int is_userknowntype(const char *fil); -int is_dyntype(const char *fil); -char* get_ext(const char *fil); -int may_unknown(const char* st); #ifndef HTTRACK_DEFLIB -HTSEXT_API char* jump_identification(char*); -HTSEXT_API char* jump_normalized(char*); -HTSEXT_API char* jump_toport(char*); -HTSEXT_API char* fil_normalized(char* source, char* dest); -HTSEXT_API char* adr_normalized(char* source, char* dest); +HTSEXT_API int is_knowntype(httrackp *opt,const char *fil); +HTSEXT_API int is_userknowntype(httrackp *opt,const char *fil); +HTSEXT_API int is_dyntype(const char *fil); +HTSEXT_API char* get_ext(char *catbuff, const char *fil); #endif -char* strrchr_limit(char* s, char c, char* limit); -char* strstr_limit(char* s, char* sub, char* limit); -HTS_INLINE char* jump_protocol(char* source); +int may_unknown(httrackp *opt,const char* st); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* jump_identification(const char*); +HTSEXT_API char* jump_normalized(const char*); +HTSEXT_API char* jump_toport(const char*); +HTSEXT_API char* fil_normalized(const char* source, char* dest); +HTSEXT_API char* adr_normalized(const char* source, char* dest); +#endif +char* strrchr_limit(const char* s, char c, const char* limit); +char* strstr_limit(const char* s, const char* sub, const char* limit); +HTS_INLINE char* jump_protocol(const char* source); void code64(unsigned char* a,int size_a,unsigned char* b,int crlf); #ifndef HTTRACK_DEFLIB HTSEXT_API void unescape_amp(char* s); @@ -279,7 +359,7 @@ HTSEXT_API void escape_in_url(char* s); HTSEXT_API void escape_uri(char* s); HTSEXT_API void escape_uri_utf(char* s); HTSEXT_API void escape_check_url(char* s); -HTSEXT_API char* escape_check_url_addr(char* s); +HTSEXT_API char* escape_check_url_addr(char *catbuff, const char* s); HTSEXT_API void x_escape_http(char* s,int mode); HTSEXT_API void x_escape_html(char* s); HTSEXT_API void escape_remove_control(char* s); @@ -287,57 +367,47 @@ HTSEXT_API void escape_for_html_print(char* s, char* d); HTSEXT_API void escape_for_html_print_full(char* s, char* d); #endif #ifndef HTTRACK_DEFLIB -HTSEXT_API char* unescape_http(char* s); -HTSEXT_API char* unescape_http_unharm(char* s, int no_high); -HTSEXT_API char* antislash_unescaped(char* s); +HTSEXT_API char* unescape_http(char *catbuff, const char* s); +HTSEXT_API char* unescape_http_unharm(char *catbuff, const char* s, int no_high); +HTSEXT_API char* antislash_unescaped(char *catbuff, const char* s); +HTSEXT_API char* concat(char *catbuff,const char* a,const char* b); +HTSEXT_API char* fconcat(char *catbuff, const char* a, const char* b); +HTSEXT_API char* fconv(char *catbuff, const char* a); #endif -char* concat(const char* a,const char* b); -#define copychar(a) concat((a),NULL) -#if HTS_DOSNAME -char* fconcat(char* a,char* b); -char* fconv(char* a); -#else -#define fconv(a) (a) -#define fconcat(a,b) concat(a,b) +#define copychar(catbuff,a) concat(catbuff,(a),NULL) +char* fslash(char *catbuff, const char* a); +#ifndef HTTRACK_DEFLIB +HTSEXT_API int hts_log(httrackp *opt, const char* prefix, const char *msg); #endif -char* fslash(char* a); -char* __fslash(char* a); -char* convtolower(char* a); -char* concat(const char* a,const char* b); +char* convtolower(char *catbuff, const char* a); void hts_lowcase(char* s); void hts_replace(char *s,char from,char to); +int multipleStringMatch(const char *s, const char *match); void fprintfio(FILE* fp,char* buff,char* prefix); -#if HTS_WIN +#ifdef _WIN32 #else int sig_ignore_flag( int setflag ); // flag ignore #endif void cut_path(char* fullpath,char* path,char* pname); -int fexist(char* s); -/*LLint fsize(char* s); */ -INTsys fpsize(FILE* fp); -INTsys fsize(char* s); +int fexist(const char* s); +/*LLint fsize(const char* s); */ +off_t fpsize(FILE* fp); +off_t fsize(const char* s); /* root dir */ #ifndef HTTRACK_DEFLIB HTSEXT_API char* hts_rootdir(char* file); #endif // Threads -#if USE_PTHREAD typedef void* ( *beginthread_type )( void * ); -unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist ); -#endif - - - +/*unsigned long _beginthread( beginthread_type start_address, unsigned stack_size, void *arglist );*/ /* variables globales */ -//extern LLint HTS_TOTAL_RECV; // flux entrant reçu -//extern int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup extern HTSEXT_API hts_stat_struct HTS_STAT; extern int _DEBUG_HEAD; extern FILE* ioinfo; @@ -353,42 +423,63 @@ extern const char* hts_detectURL[]; extern const char* hts_detectandleave[]; extern const char* hts_detect_js[]; -// defaut wrappers -void __cdecl htsdefault_init(void); -void __cdecl htsdefault_uninit(void); -int __cdecl htsdefault_start(void* opt); -int __cdecl htsdefault_chopt(void* opt); -int __cdecl htsdefault_end(void); -int __cdecl htsdefault_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -int __cdecl htsdefault_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -int __cdecl htsdefault_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); -int __cdecl htsdefault_loop(void* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); -char* __cdecl htsdefault_query(char* question); -char* __cdecl htsdefault_query2(char* question); -char* __cdecl htsdefault_query3(char* question); -int __cdecl htsdefault_check(char* adr,char* fil,int status); -int __cdecl htsdefault_check_mime(char* adr,char* fil,char* mime,int status); -void __cdecl htsdefault_pause(char* lockfile); -void __cdecl htsdefault_filesave(char*); -void __cdecl htsdefault_filesave2(char* adr, char* file, char* sav, int is_new, int is_modified,int not_updated); -int __cdecl htsdefault_linkdetected(char* link); -int __cdecl htsdefault_linkdetected2(char* link, char* tag_start); -int __cdecl htsdefault_xfrstatus(void* back); -int __cdecl htsdefault_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -int __cdecl htsdefault_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -int __cdecl htsdefault_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); - -// end defaut wrappers - - // htsmodule.c definitions -extern void* getFunctionPtr(httrackp* opt, char* file, char* fncname); -extern void clearCallbacks(htscallbacks* chain); +extern void* openFunctionLib(const char* file_); +extern void* getFunctionPtr(void* handle, const char* fncname); +extern void closeFunctionLib(void* handle); +extern void clearCallbacks(htscallbacks* chain); +extern size_t hts_get_callback_offs(const char *name); +int hts_set_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name, void *function); +void *hts_get_callback(t_hts_htmlcheck_callbacks *callbacks, const char *name); + +#define CBSTRUCT(OPT) ((t_hts_htmlcheck_callbacks*) ((OPT)->callbacks_fun)) +#define GET_USERCALLBACK(OPT, NAME) ( CBSTRUCT(OPT)-> NAME .fun ) +#define GET_USERARG(OPT, NAME) ( CBSTRUCT(OPT)-> NAME .carg ) +#define GET_USERDEF(OPT, NAME) ( \ + (CBSTRUCT(OPT) != NULL && CBSTRUCT(OPT)-> NAME .fun != NULL) \ + ? ( GET_USERARG(OPT, NAME) ) \ + : ( default_callbacks. NAME .carg ) \ +) +#define GET_CALLBACK(OPT, NAME) ( \ + (CBSTRUCT(OPT) != NULL && CBSTRUCT(OPT)-> NAME .fun != NULL) \ + ? ( GET_USERCALLBACK(OPT, NAME ) ) \ + : ( default_callbacks. NAME .fun ) \ +) + +/* Predefined macros */ +#define RUN_CALLBACK_NOARG(OPT, NAME) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME)) +#define RUN_CALLBACK0(OPT, NAME) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT) +#define RUN_CALLBACK1(OPT, NAME, ARG1) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1) +#define RUN_CALLBACK2(OPT, NAME, ARG1, ARG2) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2) +#define RUN_CALLBACK3(OPT, NAME, ARG1, ARG2, ARG3) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3) +#define RUN_CALLBACK4(OPT, NAME, ARG1, ARG2, ARG3, ARG4) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4) +#define RUN_CALLBACK5(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5) +#define RUN_CALLBACK6(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) +#define RUN_CALLBACK7(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7) +#define RUN_CALLBACK8(OPT, NAME, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) GET_CALLBACK(OPT, NAME)(GET_USERARG(OPT, NAME), OPT, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) +/* +#define GET_CALLBACK(OPT, NAME, ARG) ( \ + ( \ + ( ARG ) = GET_USERDEF(OPT, NAME), \ + ( \ + (CBSTRUCT(OPT) != NULL && CBSTRUCT(OPT)-> NAME .fun != NULL) \ + ? ( GET_USERCALLBACK(OPT, NAME ) ) \ + : ( default_callbacks. NAME .fun ) \ + ) \ + ) \ +) +*/ #endif // internals +#undef PATH_SEPARATOR +#ifdef _WIN32 +#define PATH_SEPARATOR '\\' +#else +#define PATH_SEPARATOR '/' +#endif /* Spaces: CR,LF,TAB,FF */ #define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) @@ -413,7 +504,7 @@ extern void clearCallbacks(htscallbacks* chain); // compare le début de f avec s et retourne la position de la fin // 'A=a' (case insensitive) -static int strfield(const char* f,const char* s) { +HTS_STATIC int strfield(const char* f,const char* s) { int r=0; while (streql(*f,*s) && ((*f)!=0) && ((*s)!=0)) { f++; s++; r++; } if (*s==0) @@ -421,7 +512,7 @@ static int strfield(const char* f,const char* s) { else return 0; } -static int strcmpnocase(char* a,char* b) { +HTS_STATIC int strcmpnocase(char* a,char* b) { while(*a) { int cmp = hichar(*a) - hichar(*b); if (cmp != 0) @@ -445,7 +536,7 @@ static int strcmpnocase(char* a,char* b) { #if HTS_USEMMS #define OPT_MMS(a) (strfield2((a), "video/x-ms-asf") != 0) #else -#define OPT_MMS(a) (false) +#define OPT_MMS(a) (0) #endif #define is_hypertext_mime__(a) \ ( (strfield2((a),"text/html")!=0)\ @@ -461,6 +552,7 @@ static int strcmpnocase(char* a,char* b) { (\ (strfield2((a),"audio/x-pn-realaudio")!=0) \ || (strfield2((a),"audio/x-mpegurl")!=0) \ + /*|| (strfield2((a),"text/xml")!=0) || (strfield2((a),"application/xml")!=0) : TODO: content check */ \ || OPT_MMS(a) \ ) @@ -469,39 +561,39 @@ static int strcmpnocase(char* a,char* b) { #ifdef HTS_INTERNAL_BYTECODE // check if (mime, file) is hypertext -static int is_hypertext_mime(const char* mime, const char* file) { +HTS_STATIC int is_hypertext_mime(httrackp *opt,const char* mime, const char* file) { if (is_hypertext_mime__(mime)) return 1; - if (may_unknown(mime)) { + if (may_unknown(opt,mime)) { char guessed[256]; guessed[0] = '\0'; - guess_httptype(guessed, file); + guess_httptype(opt,guessed, file); return is_hypertext_mime__(guessed); } return 0; } // check if (mime, file) might be "false" hypertext -static int may_be_hypertext_mime(const char* mime, const char* file) { +HTS_STATIC int may_be_hypertext_mime(httrackp *opt,const char* mime, const char* file) { if (may_be_hypertext_mime__(mime)) return 1; - if (file != NULL && file[0] != '\0' && may_unknown(mime)) { + if (file != NULL && file[0] != '\0' && may_unknown(opt,mime)) { char guessed[256]; guessed[0] = '\0'; - guess_httptype(guessed, file); + guess_httptype(opt,guessed, file); return may_be_hypertext_mime__(guessed); } return 0; } // compare (mime, file) with reference -static int compare_mime(const char* mime, const char* file, const char* reference) { +HTS_STATIC int compare_mime(httrackp *opt,const char* mime, const char* file, const char* reference) { if (is_hypertext_mime__(mime) || may_be_hypertext_mime__(mime)) return strfield2(mime, reference); - if (file != NULL && file[0] != '\0' && may_unknown(mime)) { + if (file != NULL && file[0] != '\0' && may_unknown(opt,mime)) { char guessed[256]; guessed[0] = '\0'; - guess_httptype(guessed, file); + guess_httptype(opt,guessed, file); return strfield2(guessed, reference); } return 0; @@ -511,8 +603,7 @@ static int compare_mime(const char* mime, const char* file, const char* referenc #ifdef _WIN32_WCE_XXC extern char cwd[MAX_PATH+1]; -static char *getcwd_ce(char *buffer, int maxlen) -{ +HTS_STATIC char *getcwd_ce(char *buffer, int maxlen) { TCHAR fileUnc[MAX_PATH+1]; char* plast; diff --git a/src/htsmd5.c b/src/htsmd5.c index adbdb67..114ae7a 100644 --- a/src/htsmd5.c +++ b/src/htsmd5.c @@ -42,12 +42,13 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE -#include "htsmd5.h" -#include "md5.h" #include <string.h> #include <stdio.h> +#include <stdlib.h> +#include "htsmd5.h" +#include "md5.h" -int domd5mem(const unsigned char * buf, int len, +int domd5mem(const unsigned char * buf, size_t len, unsigned char * digest, int asAscii) { int endian = 1; unsigned char bindigest[16]; @@ -56,7 +57,7 @@ int domd5mem(const unsigned char * buf, int len, MD5_CTX ctx; MD5Init(&ctx, * ( (char*) &endian)); - MD5Update(&ctx, buf, len); + MD5Update(&ctx, buf, (unsigned int) len); MD5Final(bindigest, &ctx); #else /* Broken md5.. temporary hack */ @@ -87,7 +88,6 @@ int domd5mem(const unsigned char * buf, int len, unsigned long int md5sum32(const char* buff) { unsigned char md5digest[16]; - unsigned char* md5digest_ = md5digest; - domd5mem(buff,strlen(buff),md5digest,0); + domd5mem(buff,(int)strlen(buff),md5digest,0); return *( (long int*)(char*)md5digest ); } diff --git a/src/htsmd5.h b/src/htsmd5.h index 8892895..3a8c9bb 100644 --- a/src/htsmd5.h +++ b/src/htsmd5.h @@ -44,7 +44,7 @@ Please visit our Website: http://www.httrack.com /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -int domd5mem(const unsigned char * buf, int len, +int domd5mem(const unsigned char * buf, size_t len, unsigned char * digest, int asAscii); unsigned long int md5sum32(const char* buff); #endif diff --git a/src/htsmms.c b/src/htsmms.c index 3d76cda..3c25d80 100644 --- a/src/htsmms.c +++ b/src/htsmms.c @@ -47,9 +47,7 @@ Please visit our Website: http://www.httrack.com #if HTS_USEMMS -#include "htsbase.h" -#include "htsnet.h" -#include "htsthread.h" +#include "htscore.h" #include "htsmms.h" #include "mmsrip/mms.h" @@ -57,10 +55,10 @@ Please visit our Website: http://www.httrack.com #define FTP_STATUS_READY 1001 static int run_launch_mms(MMSDownloadStruct* back); -static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_mms( void* pP ) { +static void back_launch_mms( void* pP ) { MMSDownloadStruct *pStruct = (MMSDownloadStruct*)pP; if (pStruct == NULL) - return PTHREAD_RETURN; + return ; /* Initialize */ hts_init(); @@ -79,7 +77,7 @@ static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_mms( void* pP ) { /* Uninitialize */ hts_uninit(); - return PTHREAD_RETURN; + return ; } /* download cancelled */ @@ -96,7 +94,7 @@ static int stop_mms(lien_back* back) { void launch_mms(const MMSDownloadStruct* pStruct) { MMSDownloadStruct *pCopy = calloc(sizeof(MMSDownloadStruct), 1); memcpy(pCopy, pStruct, sizeof(*pCopy)); - (void) hts_newthread(back_launch_mms, 0, (void*) pCopy); + hts_newthread(back_launch_mms, (void*) pCopy); } /* Code mainly written by Nicolas BENOIT */ @@ -105,6 +103,8 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { httrackp* opt = pStruct->pOpt; /* */ char url[HTS_URLMAXSIZE*2]; + char catbuff[CATBUFF_SIZE]; + char catbuff2[CATBUFF_SIZE]; MMS *mms; FILE *f; ssize_t len_written; @@ -112,25 +112,25 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { int delay = opt->mms_maxtime; time_t end = time(NULL) + delay; short checkPending = 0; - INTsys existingSize = fsize(back->url_sav); + ssize_t existingSize = fsize(back->url_sav); // effacer strcpybuff(back->r.msg,""); - back->status=1000; - back->r.statuscode=200; + back->status=STATUS_FTP_TRANSFER; + back->r.statuscode=HTTP_OK; back->r.size=0; /* Create file */ if (existingSize > 0) { /* back->r.out = fileappend(back->url_sav); */ - (void) unlink(fconcat(back->url_sav, ".old")); - if (rename(fconcat(back->url_sav, ""), fconcat(back->url_sav, ".old")) == 0) { + (void) unlink(fconcat(catbuff,back->url_sav, ".old")); + if (rename(fconcat(catbuff,back->url_sav, ""), fconcat(catbuff2,back->url_sav, ".old")) == 0) { checkPending = 1; } - back->r.out = filecreate(back->url_sav); + back->r.out = filecreate(&pStruct->pOpt->state.strc, back->url_sav); } else { - back->r.out = filecreate(back->url_sav); + back->r.out = filecreate(&pStruct->pOpt->state.strc, back->url_sav); } if ((f = back->r.out) != NULL) { // create mms resource @@ -151,18 +151,18 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { { fclose(back->r.out); f = back->r.out = NULL; - if (unlink(fconcat(back->url_sav, "")) == 0 - && rename(fconcat(back->url_sav, ".old"), fconcat(back->url_sav, "")) == 0) + if (unlink(fconcat(catbuff, back->url_sav, "")) == 0 + && rename(fconcat(catbuff, back->url_sav, ".old"), fconcat(catbuff2, back->url_sav, "")) == 0) { back->r.notmodified = 1; - back->r.statuscode = 200; + back->r.statuscode = HTTP_OK; strcpybuff(back->r.msg, "Not modified"); } else { - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Unable to rename previous file (not updated)"); } } else { - (void) unlink(fconcat(back->url_sav, ".old")); + (void) unlink(fconcat(catbuff, back->url_sav, ".old")); } } @@ -180,8 +180,7 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { if ( len_written == 0 ) { break; } else if ( len_written == -1 ) { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Unable to write stream data"); break; } @@ -194,51 +193,39 @@ static int run_launch_mms(MMSDownloadStruct* pStruct) { if ( delay != 0 && end <= time(NULL) ) { delay = -1; - back->r.statuscode = 200; + back->r.statuscode = HTTP_OK; strcpybuff(back->r.msg, "Download interrupted"); break; } } // while - back->r.statuscode = 0; /* Finished */ + back->r.statuscode = HTTP_OK; /* Finished */ } else if (f != NULL) { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not begin ripping"); } } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not write stream header"); } } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not handshake"); } mms_disconnect ( mms ); } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not connect"); } mms_destroy ( mms ); } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Can not create mms resource"); } } else { - back->r.statuscode = -1; - back->r.statuscode = 500; + back->r.statuscode = HTTP_INTERNAL_SERVER_ERROR; strcpybuff(back->r.msg, "Unable to open local output file"); } - - // End - if (back->r.statuscode != -1) { - back->r.statuscode=200; - strcpybuff(back->r.msg, "OK"); - } return 0; } diff --git a/src/htsmms.h b/src/htsmms.h index 43a6c1e..7ae22c1 100644 --- a/src/htsmms.h +++ b/src/htsmms.h @@ -42,21 +42,26 @@ Please visit our Website: http://www.httrack.com #ifndef HTSMMS_DEFH #define HTSMMS_DEFH -#include "htsglobal.h" - #if HTS_USEMMS -#include "htsbase.h" -#include "htsbasenet.h" -#include "htsthread.h" - -// lien_back -#include "htscore.h" +/* Forware definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif -typedef struct MMSDownloadStruct { +#ifndef HTS_DEF_FWSTRUCT_MMSDownloadStruct +#define HTS_DEF_FWSTRUCT_MMSDownloadStruct +typedef struct MMSDownloadStruct MMSDownloadStruct; +#endif +struct MMSDownloadStruct { lien_back *pBack; httrackp *pOpt; -} MMSDownloadStruct; +}; void launch_mms(const MMSDownloadStruct* pStruct); #endif diff --git a/src/htsmodules.c b/src/htsmodules.c index ba3927e..1049d36 100644 --- a/src/htsmodules.c +++ b/src/htsmodules.c @@ -41,7 +41,10 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htsmodules.h" #include "htsopt.h" -extern int fspc(FILE* fp,char* type); +#include "htsbasenet.h" +#include "htslib.h" + +extern int fspc(httrackp *opt,FILE* fp,const char* type); #ifndef _WIN32 #if HTS_DLOPEN @@ -52,23 +55,10 @@ extern int fspc(FILE* fp,char* type); /* >>> Put all modules definitions here */ #include "htszlib.h" #include "htsbase.h" - -typedef int (*t_hts_detect_swf)(htsmoduleStruct* str); -typedef int (*t_hts_parse_swf)(htsmoduleStruct* str); -/* <<< */ - -/* >>> Put all modules includes here */ -#include "htsjava.h" -#if HTS_USESWF -#endif /* <<< */ /* >>> Put all modules variables here */ -int swf_is_available = 0; -t_hts_detect_swf hts_detect_swf = NULL; -t_hts_parse_swf hts_parse_swf = NULL; - int gz_is_available = 0; #if 0 t_gzopen gzopen = NULL; @@ -98,9 +88,28 @@ t_SSL_load_error_strings SSL_load_error_strings = NULL; int V6_is_available = HTS_INET6; -char WHAT_is_available[64]=""; +static char WHAT_is_available[64]=""; /* <<< */ +HTSEXT_API const char* hts_get_version_info(httrackp *opt) { + size_t size; + int i; + strcpy(opt->state.HTbuff, WHAT_is_available); + size = strlen(opt->state.HTbuff); + for(i = 0 ; i < opt->libHandles.count ; i++) { + const char *name = opt->libHandles.handles[i].moduleName; + if (name != NULL) { + size_t nsize = strlen(name) + sizeof("+"); + size += nsize; + if (size + 1 >= sizeof(opt->state.HTbuff)) + break; + strcat(opt->state.HTbuff, "+"); + strcat(opt->state.HTbuff, name); + } + } + return opt->state.HTbuff; +} + /* memory checks */ HTSEXT_API htsErrorCallback htsCallbackErr = NULL; HTSEXT_API int htsMemoryFastXfr = 1; /* fast xfr by default */ @@ -119,44 +128,39 @@ void abortLog__fnc(char* msg, char* file, int line) { } HTSEXT_API t_abortLog abortLog__ = abortLog__fnc; /* avoid VC++ inlining */ -static void htspe_log(htsmoduleStruct* str, char* msg); +static void htspe_log(htsmoduleStruct* str, const char* msg); int hts_parse_externals(htsmoduleStruct* str) { - /* >>> Put all module calls here */ - - /* JAVA */ - if (hts_detect_java(str)) { - htspe_log(str, "java-lib"); - return hts_parse_java(str); - } - -#if HTS_USESWF - /* FLASH - (external module derivated from Macromedia(tm)'s classes) - */ - else if (swf_is_available && hts_detect_swf(str)) { - htspe_log(str, "swf-lib"); - return hts_parse_swf(str); + str->wrapper_name = "wrapper-lib"; + + /* External callback */ + if (RUN_CALLBACK1(str->opt, detect, str)) { + if (str->wrapper_name == NULL) + str->wrapper_name = "wrapper-lib"; + /* Blacklisted */ + if (multipleStringMatch(str->wrapper_name, StringBuff(str->opt->mod_blacklist))) { + return -1; + } else { + htspe_log(str, str->wrapper_name); + return RUN_CALLBACK1(str->opt, parse, str); + } } -#endif - /* <<< */ - /* Not detected */ return -1; } -static void addCallback(htscallbacks* chain, void* moduleHandle, htscallbacksfncptr exitFnc) { - while(chain->next != NULL) { - chain = chain->next; - } - chain->next = calloct(1, sizeof(htscallbacks)); - assertf(chain->next != NULL); - chain = chain->next; - memset(chain, 0, sizeof(*chain)); - chain->exitFnc = exitFnc; - chain->moduleHandle = moduleHandle; -} +//static void addCallback(htscallbacks* chain, void* moduleHandle, htscallbacksfncptr exitFnc) { +// while(chain->next != NULL) { +// chain = chain->next; +// } +// chain->next = calloct(1, sizeof(htscallbacks)); +// assertf(chain->next != NULL); +// chain = chain->next; +// memset(chain, 0, sizeof(*chain)); +// chain->exitFnc = exitFnc; +// chain->moduleHandle = moduleHandle; +//} void clearCallbacks(htscallbacks* chain_); void clearCallbacks(htscallbacks* chain_) { @@ -189,116 +193,67 @@ void clearCallbacks(htscallbacks* chain_) { chain_->next = NULL; // Empty } -void* getFunctionPtr(httrackp* opt, char* file_, char* fncname); -void* getFunctionPtr(httrackp* opt, char* file_, char* fncname) { - char BIGSTK file[1024]; +void* openFunctionLib(const char* file_) { void* handle; - void* userfunction = NULL; - strcpybuff(file, file_); + char *file = malloct(strlen(file_) + 32); + strcpy(file, file_); #ifdef _WIN32 - handle = LoadLibraryA((char*)file); + handle = LoadLibraryA(file); if (handle == NULL) { - strcatbuff(file, ".dll"); - handle = LoadLibraryA((char*)file); + sprintf(file, "%s.dll", file_); + handle = LoadLibraryA(file); } #else handle = dlopen(file, RTLD_LAZY); if (handle == NULL) { - strcatbuff(file, ".so"); + sprintf(file, "lib%s.so", file_); handle = dlopen(file, RTLD_LAZY); } #endif + freet(file); + return handle; +} + +void closeFunctionLib(void* handle) { +#ifdef _WIN32 + FreeLibrary(handle); +#else + dlclose(handle); +#endif +} + +void* getFunctionPtr(void* handle, const char* fncname_) { if (handle) { - /* Thanks to Lars Clausen for the "wrapper-init" patch */ - /* If given arguments, call "<wrappername>_init" */ - char BIGSTK tmpName[1024]; + void* userfunction = NULL; + char *fncname = strdupt(fncname_); + + /* Strip optional comma */ char *comma; if ((comma = strchr(fncname, ',')) != NULL) { /* empty arg */ *comma++ = '\0'; } - /* speficic plug init */ - { - t_htsWrapperPlugInit initfunction; - sprintf(tmpName, "%s_init", fncname); - initfunction = (t_htsWrapperPlugInit)DynamicGet(handle, (char*)tmpName); - if (initfunction != NULL) { - int result = (int) initfunction(comma); - if (!result) { - if (userfunction == NULL) { -#ifdef _WIN32 - FreeLibrary(handle); -#else - dlclose(handle); -#endif - } - return NULL; - } - } - } - /* wrapper_init() */ - { - t_htsWrapperInit initfunction = (t_htsWrapperInit)DynamicGet(handle, (char*)"wrapper_init"); - if (initfunction != NULL) { - if (! initfunction(fncname, comma)) { - if (userfunction == NULL) { -#ifdef _WIN32 - FreeLibrary(handle); -#else - dlclose(handle); -#endif - } - return NULL; - } - } - } /* the function itself */ userfunction = (void*) DynamicGet(handle, (char*)fncname); - if (userfunction == NULL) { -#ifdef _WIN32 - FreeLibrary(handle); -#else - dlclose(handle); -#endif - } else { - /* optional exit wrapper */ - t_htsWrapperExit exitFnc = (t_htsWrapperExit) DynamicGet(handle, (char*)"wrapper_exit"); - addCallback(&opt->state.callbacks, handle, exitFnc); // exitFnc can be null - } + + freet(fncname); + + return userfunction; } - return userfunction; + return NULL; } -void htspe_init() { +void* ssl_handle = NULL; +#ifdef _WIN32 +void* ssl_handle_2 = NULL; +#endif +void htspe_init(void) { static int initOk = 0; if (!initOk) { initOk = 1; - /* >>> Put all module initializations here */ - - - /* Zlib */ + /* Zlib is now statically linked */ gz_is_available = 1; - /* -#if HTS_DLOPEN - { - void* handle; -#ifdef _WIN32 - handle = LoadLibrary("zlib"); -#else - handle = dlopen("libz.so.1", RTLD_LAZY); -#endif - if (handle) { - gzopen = (t_gzopen) DynamicGet(handle, "gzopen"); - gzread = (t_gzread) DynamicGet(handle, "gzread"); - gzclose = (t_gzclose) DynamicGet(handle, "gzclose"); - if (gzopen && gzread && gzclose) { - gz_is_available = 1; - } - } - } -#endif - */ /* OpenSSL */ #if HTS_DLOPEN @@ -317,9 +272,14 @@ void htspe_init() { } if (handle == NULL) { /* Try harder */ + handle = dlopen("libssl.so", RTLD_LAZY); + } + if (handle == NULL) { + /* Try harder */ handle = dlopen("libssl.so.0", RTLD_LAZY); } #endif + ssl_handle = handle; if (handle) { SSL_shutdown = (t_SSL_shutdown) DynamicGet(handle, (char*)"SSL_shutdown"); SSL_free = (t_SSL_free) DynamicGet(handle, (char*)"SSL_free"); @@ -339,6 +299,7 @@ void htspe_init() { SSL_CTX_ctrl = (t_SSL_CTX_ctrl) DynamicGet(handle, (char*)"SSL_CTX_ctrl"); #ifdef _WIN32 handle = LoadLibraryA((char*)"libeay32"); + ssl_handle_2 = handle; #endif ERR_load_crypto_strings = (t_ERR_load_crypto_strings) DynamicGet(handle, (char*)"ERR_load_crypto_strings"); ERR_error_string = (t_ERR_error_string) DynamicGet(handle, (char*)"ERR_error_string"); @@ -354,53 +315,35 @@ void htspe_init() { #endif /* */ - /* - FLASH - Load the library on-the-fly, if available - If not, that's not a problem - */ -#if HTS_DLOPEN - { -#ifdef _WIN32 - void* handle = LoadLibraryA((char*)"htsswf"); -#else - void* handle = dlopen("libhtsswf.so.1", RTLD_LAZY); -#endif - if (handle) { - hts_detect_swf = (t_hts_detect_swf) DynamicGet(handle, "hts_detect_swf"); - hts_parse_swf = (t_hts_parse_swf) DynamicGet(handle, "hts_parse_swf"); - if (hts_detect_swf && hts_parse_swf) { - swf_is_available = 1; - } - } - // FreeLibrary(handle); - // dlclose(handle); - } -#endif - - /* <<< */ - /* Options availability */ - sprintf(WHAT_is_available, "%s%s%s%s", + sprintf(WHAT_is_available, "%s%s%s", V6_is_available ? "" : "-noV6", gz_is_available ? "" : "-nozip", - SSL_is_available ? "" : "-nossl", - swf_is_available ? "+swf" : ""); - - + SSL_is_available ? "" : "-nossl"); } } -static void htspe_log(htsmoduleStruct* str, char* msg) { - char* savename = str->filename; +void htspe_uninit(void) { +#ifdef _WIN32 + CloseHandle(ssl_handle); + CloseHandle(ssl_handle_2); + ssl_handle = NULL; + ssl_handle_2 = NULL; +#else + dlclose(ssl_handle); + ssl_handle = NULL; +#endif +} + +static void htspe_log(htsmoduleStruct* str, const char* msg) { + const char* savename = str->filename; httrackp* opt = (httrackp*) str->opt; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(External module): parsing %s using module %s"LF, + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(External module): parsing %s using module %s"LF, savename, msg); } } -HTSEXT_API const char* hts_is_available(void); HTSEXT_API const char* hts_is_available(void) { return WHAT_is_available; } diff --git a/src/htsmodules.h b/src/htsmodules.h index 6c4a305..5d0c6c1 100644 --- a/src/htsmodules.h +++ b/src/htsmodules.h @@ -38,24 +38,50 @@ Please visit our Website: http://www.httrack.com #ifndef HTS_MODULES #define HTS_MODULES +/* Forware definitions */ +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif + /* Function type to add links inside the module link : link to add (absolute or relative) str : structure defined below Returns 1 if the link was added, 0 if not */ +#ifndef HTS_DEF_FWSTRUCT_htsmoduleStruct +#define HTS_DEF_FWSTRUCT_htsmoduleStruct typedef struct htsmoduleStruct htsmoduleStruct; +#endif typedef int (* t_htsAddLink)(htsmoduleStruct* str, char* link); /* Structure passed to the module */ struct htsmoduleStruct { /* Read-only elements */ - char* filename; /* filename (C:\My Web Sites\...) */ + const char* filename; /* filename (C:\My Web Sites\...) */ int size; /* size of filename (should be > 0) */ - char* mime; /* MIME type of the object */ - char* url_host; /* incoming hostname (www.foo.com) */ - char* url_file; /* incoming filename (/bar/bar.gny) */ + const char* mime; /* MIME type of the object */ + const char* url_host; /* incoming hostname (www.foo.com) */ + const char* url_file; /* incoming filename (/bar/bar.gny) */ /* Write-only */ + const char* wrapper_name; /* name of wrapper (static string) */ char* err_msg; /* if an error occured, the error message (max. 1KB) */ /* Read/Write */ @@ -78,20 +104,20 @@ struct htsmoduleStruct { void* userdef; /* can be used by callback routines */ - /* ---- ---- ---- */ + /* The parser httrackp structure (may be used) */ + httrackp* opt; /* Internal use - please don't touch */ - void* liens; - void* opt; - void* sback; - void* cache; - void* hashptr; + lien_url** liens; + struct_back* sback; + cache_back* cache; + hash_struct* hashptr; int numero_passe; int add_tab_alloc; /* */ int* lien_tot_; int* ptr_; - int* lien_size_; + size_t* lien_size_; char** lien_buffer_; /* Internal use - please don't touch */ @@ -105,14 +131,16 @@ typedef int (*t_htsWrapperPlugInit)(char *args); /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE +HTSEXT_API const char* hts_get_version_info(httrackp *opt); +HTSEXT_API const char* hts_is_available(void); extern void htspe_init(void); +extern void htspe_uninit(void); extern int hts_parse_externals(htsmoduleStruct* str); extern int gz_is_available; -extern int swf_is_available; +/*extern int swf_is_available;*/ extern int SSL_is_available; extern int V6_is_available; -extern char WHAT_is_available[64]; #endif #endif diff --git a/src/htsname.c b/src/htsname.c index 0176c5c..c0f74d8 100644 --- a/src/htsname.c +++ b/src/htsname.c @@ -38,17 +38,14 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE +#include "htscore.h" #include "htsname.h" - -/* specific definitions */ -#include "htsbase.h" -#include "htstools.h" #include "htsmd5.h" +#include "htstools.h" #include <ctype.h> -/* END specific definitions */ #undef test_flush -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } #define ADD_STANDARD_PATH \ { /* ajout nom */\ @@ -80,26 +77,28 @@ static const char *hts_tbdev[] = #define URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET() do { \ - int prev = _hts_in_html_parsing; \ + int prev = opt->state._hts_in_html_parsing; \ while(back_pluggable_sockets_strict(sback, opt) <= 0) { \ - _hts_in_html_parsing = 6; \ + opt->state. _hts_in_html_parsing = 6; \ /* Wait .. */ \ back_wait(sback,opt,cache,0); \ /* Transfer rate */ \ engine_stats(); \ /* Refresh various stats */ \ HTS_STAT.stat_nsocket=back_nsoc(sback); \ - HTS_STAT.stat_errors=fspc(NULL,"error"); \ - HTS_STAT.stat_warnings=fspc(NULL,"warning"); \ - HTS_STAT.stat_infos=fspc(NULL,"info"); \ + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); \ + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); \ + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); \ HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); \ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); \ /* Check */ \ - if (!hts_htmlcheck_loop(sback->lnk, sback->count,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ - return -1; \ + { \ + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ + return -1; \ + } \ } \ } \ - _hts_in_html_parsing = prev; \ + opt->state._hts_in_html_parsing = prev; \ } while(0) @@ -112,9 +111,9 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, lien_url** liens, int lien_tot, struct_back* sback, cache_back* cache, hash_struct* hash, int ptr, int numero_passe, const lien_back* headers) { - const char* mime_type = headers ? headers->r.contenttype : NULL; + char catbuff[CATBUFF_SIZE]; + const char* mime_type = ( headers && HTTP_IS_OK(headers->r.statuscode) ) ? headers->r.contenttype : NULL; lien_back* const back = sback->lnk; - const int back_max = sback->count; /* */ char BIGSTK newfil[HTS_URLMAXSIZE*2]; /* ="" */ /*char BIGSTK normadr_[HTS_URLMAXSIZE*2];*/ @@ -263,7 +262,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, fil=newfil; } // Decode remaining % - strcpybuff(fil,unescape_http(fil)); + strcpybuff(fil,unescape_http(catbuff,fil)); // , BUT do not decode high chars //strcpybuff(fil,unescape_http_unharm(fil, 1)); // YES (not server side, but fs/client side) @@ -272,7 +271,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, /* .asx hack */ if (headers != NULL && headers->r.cdispo[0] != 0 && strfield(headers->r.contenttype, "video/") - && strfield2(get_ext(headers->r.cdispo), "asx") == 0) + && strfield2(get_ext(OPT_GET_BUFF(opt),headers->r.cdispo), "asx") == 0) { ext_chg = 1; strcpybuff(ext, "asx"); @@ -280,7 +279,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, else if (headers != NULL && headers->r.contenttype[0] != 0 && strfield2(headers->r.contenttype, "video/x-ms-asf")) { - char *exts = get_ext(headers->url_fil); + char *exts = get_ext(OPT_GET_BUFF(opt),headers->url_fil); if (strfield2(exts, "wmv") == 0) { ext_chg = 1; @@ -308,12 +307,12 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if (opt->savename_delayed == 2) is_html = -1; /* ALWAYS delay type */ else - is_html = ishtml(fil); + is_html = ishtml(opt,fil); switch ( is_html ) { /* .html,.shtml,.. */ case 1: if ( - (strfield2(get_ext(fil),"html") == 0) - && (strfield2(get_ext(fil),"htm") == 0) + (strfield2(get_ext(OPT_GET_BUFF(opt),fil),"html") == 0) + && (strfield2(get_ext(OPT_GET_BUFF(opt),fil),"htm") == 0) ) { strcpybuff(ext,"html"); @@ -322,10 +321,10 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, break; case 0: if (!strnotempty(ext)) { - if (is_userknowntype(fil)) { // mime known by user + if (is_userknowntype(opt,fil)) { // mime known by user char BIGSTK mime[1024]; mime[0]=ext[0]='\0'; - get_userhttptype(0,mime,fil); + get_userhttptype(opt,mime,fil); if (strnotempty(mime)) { give_mimext(ext,mime); if (strnotempty(ext)) { @@ -348,20 +347,20 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, ) { // tester type avec requète HEAD si on ne connait pas le type du fichier if (!( (opt->check_type==1) && (fil[strlen(fil)-1]=='/') )) // slash doit être html? - if (opt->savename_delayed == 2 || (ishtest=ishtml(fil)) < 0) { // on ne sait pas si c'est un html ou un fichier.. + if ( opt->savename_delayed == 2 || (ishtest=ishtml(opt,fil)) < 0) { // on ne sait pas si c'est un html ou un fichier.. // lire dans le cache htsblk r = cache_read(opt,cache,adr,fil,NULL,NULL); // test uniquement if (r.statuscode != -1) { // pas d'erreur de lecture cache char s[16]; s[0]='\0'; if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type (from cache) %s%s"LF,adr_complete,fil_complete); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link type (from cache) %s%s"LF,adr_complete,fil_complete); test_flush; } if (strnotempty(r.cdispo)) { /* filename given */ ext_chg=2; /* change filename */ strcpybuff(ext,r.cdispo); } - else if (!may_unknown(r.contenttype) || ishtest == -2) { // on peut patcher à priori? + else if (!may_unknown(opt,r.contenttype) || ishtest == -2) { // on peut patcher à priori? give_mimext(s,r.contenttype); // obtenir extension if (strnotempty(s)>0) { // on a reconnu l'extension ext_chg=1; @@ -369,14 +368,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } } // - } else if (opt->savename_delayed != 2 && is_userknowntype(fil)) { /* PATCH BY BRIAN SCHRÖDER. + } else if ( opt->savename_delayed != 2 && is_userknowntype(opt,fil)) { /* PATCH BY BRIAN SCHRÖDER. Lookup mimetype not only by extension, but also by filename */ /* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type, that is, ".html" */ char BIGSTK mime[1024]; mime[0]=ext[0]='\0'; - get_userhttptype(0, mime, fil); + get_userhttptype(opt, mime, fil); if (strnotempty(mime)) { give_mimext(ext, mime); if (strnotempty(ext)) { @@ -392,7 +391,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, give_mimext(ext, mime_type); } if (strnotempty(ext)) { - ext_chg = 1; + char mime_from_file[128]; + mime_from_file[0] = 0; + get_httptype(opt, mime_from_file, fil, 1); + if (!strnotempty(mime_from_file) || strcasecmp(mime_type, mime_from_file) != 0) { /* different mime for this type */ + ext_chg = 1; + } else { + ext_chg = 0; + } } } else { /* Avoid collisions (no collisionning detection) */ @@ -404,9 +410,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, // test imposible dans le cache, faire une requête else { // -#if HTS_ANALYSTE - int hihp=_hts_in_html_parsing; -#endif + int hihp = opt->state._hts_in_html_parsing; int has_been_moved=0; char BIGSTK curr_adr[HTS_URLMAXSIZE*2],curr_fil[HTS_URLMAXSIZE*2]; @@ -418,20 +422,18 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, /* Rock'in */ curr_adr[0]=curr_fil[0]='\0'; -#if HTS_ANALYSTE - _hts_in_html_parsing=2; // test -#endif + opt->state. _hts_in_html_parsing=2; // test if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link type %s%s"LF,adr_complete,fil_complete); test_flush; } strcpybuff(curr_adr,adr_complete); strcpybuff(curr_fil,fil_complete); // ajouter dans le backing le fichier en mode test // savename: rien car en mode test - if (back_add(sback,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1,NULL)!=-1) { + if (back_add(sback,opt,cache,curr_adr,curr_fil,BACK_ADD_TEST,referer_adr,referer_fil,1)!=-1) { int b; - b=back_index(sback,curr_adr,curr_fil,BACK_ADD_TEST); + b=back_index(opt,sback,curr_adr,curr_fil,BACK_ADD_TEST); if (b>=0) { int stop_looping=0; int petits_tours=0; @@ -442,34 +444,28 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, back_wait(sback,opt,cache,0); } if (ptr>=0) { - back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); + back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); + } + + // on est obligé d'appeler le shell pour le refresh.. + // Transfer rate + engine_stats(); + + // Refresh various stats + HTS_STAT.stat_nsocket=back_nsoc(sback); + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); + HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); + HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); + + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + return -1; + } else if (opt->state._hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) + back_delete(opt,cache,sback,b); // cancel test + stop_looping = 1; } - - // on est obligé d'appeler le shell pour le refresh.. -#if HTS_ANALYSTE - { - - // Transfer rate - engine_stats(); - - // Refresh various stats - HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); - HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); - HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); - - if (!hts_htmlcheck_loop(sback->lnk, sback->count,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - return -1; - } else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) - back_delete(opt,cache,sback,b); // cancel test - stop_looping = 1; - } - } -#endif - - + // traitement des 304,303.. if (back[b].status<=0) { if (HTTP_IS_REDIRECT(back[b].r.statuscode)) { // agh moved.. un tit tour de plus @@ -497,7 +493,6 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, // check explicit forbidden - don't follow 3xx in this case { int set_prio_to=0; - robots_wizard* robots = (robots_wizard*) opt->robotsptr; if (hts_acceptlink(opt,ptr,lien_tot,liens, mov_adr,mov_fil, NULL, NULL, @@ -531,16 +526,16 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, methode=BACK_ADD_TEST; // tester avec HEAD else { methode=BACK_ADD_TEST2; // tester avec GET - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Loop with HEAD request (during prefetch) at %s%s"LF,curr_adr,curr_fil); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Loop with HEAD request (during prefetch) at %s%s"LF,curr_adr,curr_fil); test_flush; } } // Ajouter URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET(); - if (back_add(sback,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1,NULL)!=-1) { // OK - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); + if (back_add(sback,opt,cache,mov_adr,mov_fil,methode,referer_adr,referer_fil,1)!=-1) { // OK + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"(during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); test_flush; } @@ -548,21 +543,21 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, back_maydelete(opt,cache,sback,b); strcpybuff(curr_adr,mov_adr); strcpybuff(curr_fil,mov_fil); - b=back_index(sback,curr_adr,curr_fil,methode); + b=back_index(opt,sback,curr_adr,curr_fil,methode); if (!get_test_request) has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé petits_tours++; // } else {// sinon on fait rien et on s'en va.. (ftp etc) - if ( (opt->debug>1) && (opt->errlog)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Warning: Savename redirect backing error at %s%s"LF,mov_adr,mov_fil); + if ( (opt->debug>1) && (opt->log)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Warning: Savename redirect backing error at %s%s"LF,mov_adr,mov_fil); test_flush; } } } } else { - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop to same filename)"LF,adr_complete,fil_complete); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unable to test %s%s (loop to same filename)"LF,adr_complete,fil_complete); test_flush; } } @@ -570,8 +565,8 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } } } else{ // arrêter les frais - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop)"LF,adr_complete,fil_complete); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unable to test %s%s (loop)"LF,adr_complete,fil_complete); test_flush; } } @@ -586,8 +581,8 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, strcpybuff(back[b].r.contenttype,"text/html"); // message d'erreur en html // Finalement on, renvoie un erreur, pour ne toucher à rien dans le code // libérer emplacement backing - /*if (opt->errlog!=NULL) { - fspc(opt->errlog,0); fprintf(opt->errlog,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); + /*if (opt->log!=NULL) { + fspc(opt->log,0); fprintf(opt->log,"Error: (during prefetch) %s (%d) to link %s at %s%s"LF,back[b].r.msg,back[b].r.statuscode,back[b].r.location,curr_adr,curr_fil); test_flush; } back_delete(opt,cache,sback,b); @@ -602,7 +597,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, ext_chg=2; /* change filename */ strcpybuff(ext,back[b].r.cdispo); } - else if (!may_unknown(back[b].r.contenttype) || ishtest == -2 ) { // on peut patcher à priori? (pas interdit ou pas de type) + else if (!may_unknown(opt,back[b].r.contenttype) || ishtest == -2 ) { // on peut patcher à priori? (pas interdit ou pas de type) give_mimext(s,back[b].r.contenttype); // obtenir extension if (strnotempty(s)>0) { // on a reconnu l'extension ext_chg=1; @@ -635,16 +630,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, #if BDEBUG==1 printf("error while savename crash adding\n"); #endif - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected savename backing error at %s%s"LF,adr,fil_complete); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unexpected savename backing error at %s%s"LF,adr,fil_complete); test_flush; } } // restaurer -#if HTS_ANALYSTE - _hts_in_html_parsing=hihp; -#endif + opt->state._hts_in_html_parsing=hihp; } // caché? } } @@ -679,7 +672,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if (ext_chg) { // changer ext char* a=fil+strlen(fil)-1; if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); + HTS_LOG(opt,LOG_DEBUG); if (ext_chg==1) fprintf(opt->log,"Changing link extension %s%s to .%s"LF,adr_complete,fil_complete,ext); else @@ -720,7 +713,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, // ajouter nom du site éventuellement en premier if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t) - char* a = opt->savename_userdef; + const char* a = StringBuff(opt->savename_userdef); char* b = save; /*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */ char tok; @@ -791,7 +784,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, *d++ = *c++; } *d = '\0'; - d = unescape_http(name[0]); + d = unescape_http(catbuff,name[0]); if (d && *d) { strcpybuff(b, d); /* value */ b += strlen(b); @@ -923,9 +916,12 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, break; case 'Q': case 'q': /* query MD5 (128-bits/16-bits) GENERATED ONLY IF query string exists! */ - *b='\0'; - strncatbuff(b,url_md5(fil_complete),(tok == 'Q')?32:4); - b+=strlen(b); // pointer à la fin + { + char md5[32 + 2]; + *b='\0'; + strncatbuff(b,url_md5(md5, fil_complete),(tok == 'Q')?32:4); + b+=strlen(b); // pointer à la fin + } break; case 'r': case 'R': // protocol *b='\0'; @@ -1035,7 +1031,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } // si un html à coup sûr - if ( (ext_chg!=0) ? (ishtml_ext(ext) == 1) : (ishtml(fil)==1) ) { + if ( (ext_chg!=0) ? (ishtml_ext(ext) == 1) : (ishtml(opt,fil)==1) ) { if (opt->savename_type%100==2) { // html/ strcatbuff(save, "html/"); } @@ -1053,7 +1049,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if ((*a=='/') || (*a=='\\')) a++; // html? - if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(fil)==1) ) { + if ( (ext_chg!=0) ? (ishtml_ext(ext)==1) : (ishtml(opt,fil)==1) ) { if (opt->savename_type%100==5) strcatbuff(save,"html/"); } else { @@ -1078,7 +1074,7 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, int L; // pseudo-CRC sur fil et adr pour initialiser générateur aléatoire.. unsigned int s=0; - L=strlen(C); + L = (int) strlen(C); for(i=0;i<(int) strlen(fil_complete);i++) { s+=(unsigned int) fil_complete[i]; } @@ -1087,14 +1083,14 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } srand(s); - j=strlen(save); + j = (int) strlen(save); for(i=0;i<8;i++) { char c=C[(rand()%L)]; save[i+j]=c; } save[i+j]='\0'; // ajouter extension - a=fil+strlen(fil)-1; + a = fil + strlen(fil) - 1; while(( a > fil) && (*a != '/') && (*a != '.')) a--; if (*a=='.') { strcatbuff(save,a); // ajouter @@ -1141,8 +1137,8 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, if (*a!='.') { // agh pas de point //strcatbuff(save,".none"); // a éviter strcatbuff(save,".html"); // préférable! - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Default HTML type set for %s%s"LF,adr_complete,fil_complete); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Default HTML type set for %s%s"LF,adr_complete,fil_complete); test_flush; } } @@ -1288,15 +1284,13 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, /* ensure that there is no ../ (potential vulnerability) */ fil_simplifie(save); -#if HTS_ANALYSTE - { - hts_htmlcheck_savename(adr_complete,fil_complete,referer_adr,referer_fil,save); - if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: save-name: local name: %s%s -> %s"LF,adr,fil,save); - test_flush; - } - } -#endif + /* callback */ + RUN_CALLBACK5(opt, savename, adr_complete,fil_complete,referer_adr,referer_fil,save); + + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: save-name: local name: %s%s -> %s"LF,adr,fil,save); + test_flush; + } /* Ensure that the MANDATORY "temporary" extension is set */ if (ext_chg_delayed) { @@ -1317,9 +1311,9 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, } // chemin primaire éventuel A METTRE AVANT - if (strnotempty(opt->path_html)) { + if (strnotempty(StringBuff(opt->path_html))) { char BIGSTK tempo[HTS_URLMAXSIZE*2]; - strcpybuff(tempo,opt->path_html); + strcpybuff(tempo,StringBuff(opt->path_html)); strcatbuff(tempo,save); strcpybuff(save,tempo); } @@ -1330,8 +1324,6 @@ int url_savename(char* adr_complete, char* fil_complete, char* save, int nom_ok; do { int i; - int len; - len=strlen(save); // taille // nom_ok=1; // à priori bon // on part de la fin pour optimiser, plus les opti de taille pour aller encore plus vite.. @@ -1426,6 +1418,8 @@ printf("\nEnd search, %s\n",fil_complete); /* nom avec md5 urilisé partout */ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver) { + char md5[32 + 2]; + b[0]='\0'; /* Nom */ if (dot_pos) { @@ -1440,7 +1434,7 @@ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int sh strncatbuff(b,nom_pos,8); } /* MD5 - 16 bits */ - strncatbuff(b,url_md5(fil_complete),4); + strncatbuff(b,url_md5(md5,fil_complete),4); /* Ext */ if (dot_pos) { strcatbuff(b,"."); @@ -1458,10 +1452,8 @@ void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int sh /* Petit md5 */ -char* url_md5(char* fil_complete) { - char* digest; +char* url_md5(char* digest, char* fil_complete) { char* a; - NOSTATIC_RESERVE(digest, char, 32+2); digest[0]='\0'; a=strchr(fil_complete,'?'); if (a) { @@ -1478,7 +1470,7 @@ char* url_md5(char* fil_complete) { // interne à url_savename: ajoute une chaîne à une autre avec \ -> / void url_savename_addstr(char* d,char* s) { - int i=strlen(d); + int i = (int) strlen(d); while(*s) { if (*s=='\\') // remplacer \ par des / d[i++]='/'; diff --git a/src/htsname.h b/src/htsname.h index 908e8e6..9d98622 100644 --- a/src/htsname.h +++ b/src/htsname.h @@ -40,11 +40,11 @@ Please visit our Website: http://www.httrack.com #ifndef HTSNAME_DEFH #define HTSNAME_DEFH -#include "htscore.h" +#include "htsglobal.h" #define DELAYED_EXT "delayed" #define IS_DELAYED_EXT(a) ( ((a) != NULL) && ((a)[0] != 0) && strendwith_(a, "." DELAYED_EXT) ) -static int strendwith_(const char* a, const char* b) { +HTS_STATIC int strendwith_(const char* a, const char* b) { int i, j; for(i = 0 ; a[i] != 0 ; i++); for(j = 0 ; b[j] != 0 ; j++); @@ -55,14 +55,49 @@ static int strendwith_(const char* a, const char* b) { return (j == -1); } - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif +#ifndef HTS_DEF_FWSTRUCT_struct_back +#define HTS_DEF_FWSTRUCT_struct_back +typedef struct struct_back struct_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_cache_back +#define HTS_DEF_FWSTRUCT_cache_back +typedef struct cache_back cache_back; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_back +#define HTS_DEF_FWSTRUCT_lien_back +typedef struct lien_back lien_back; +#endif + // note: 'headers' can either be null, or incomplete (only r member filled) -int url_savename(char* adr_complete,char* fil_complete,char* save,char* former_adr,char* former_fil,char* referer_adr,char* referer_fil,httrackp* opt,lien_url** liens,int lien_tot,struct_back* sback,cache_back* cache,hash_struct* hash,int ptr,int numero_passe,const lien_back* headers); +int url_savename(char* adr_complete, char* fil_complete, char* save, + char* former_adr, char* former_fil, + char* referer_adr, char* referer_fil, + httrackp* opt, + lien_url** liens, int lien_tot, + struct_back* sback, + cache_back* cache, + hash_struct* hash, + int ptr, int numero_passe, + const lien_back* headers); void standard_name(char* b,char* dot_pos,char* nom_pos,char* fil_complete,int short_ver); void url_savename_addstr(char* d,char* s); -char* url_md5(char* fil_complete); +char* url_md5(char* digest_buffer, char* fil_complete); #endif #endif diff --git a/src/htsnet.h b/src/htsnet.h index 4880721..7c7fbe3 100644 --- a/src/htsnet.h +++ b/src/htsnet.h @@ -40,10 +40,11 @@ Please visit our Website: http://www.httrack.com #define HTS_DEFNETH /* basic net definitions */ +#include "htsglobal.h" #include "htsbasenet.h" #include <ctype.h> -#if HTS_WIN +#ifdef _WIN32 // pour read #ifndef _WIN32_WCE #include <io.h> @@ -83,7 +84,11 @@ Please visit our Website: http://www.httrack.com /* Ipv4 structures */ typedef struct in_addr INaddr; /* This should handle all cases */ -typedef struct SOCaddr { +#ifndef HTS_DEF_FWSTRUCT_SOCaddr +#define HTS_DEF_FWSTRUCT_SOCaddr +typedef struct SOCaddr SOCaddr; +#endif +struct SOCaddr { union { struct sockaddr_in in; struct sockaddr sa; @@ -91,7 +96,7 @@ typedef struct SOCaddr { unsigned char v6data[16]; unsigned char pad[128]; } m_addr; -} SOCaddr; +}; /* Ipv4 structure members */ #define SOCaddr_sinaddr(server) ((server).m_addr.in.sin_addr) @@ -153,7 +158,11 @@ strcpy(namebuf, dot); \ /* Ipv4 structures */ typedef struct in6_addr INaddr; /* This should handle all cases */ -typedef struct SOCaddr { +#ifndef HTS_DEF_FWSTRUCT_SOCaddr +#define HTS_DEF_FWSTRUCT_SOCaddr +typedef struct SOCaddr SOCaddr; +#endif +struct SOCaddr { union { struct sockaddr_in6 in6; struct sockaddr_in in; @@ -162,7 +171,7 @@ typedef struct SOCaddr { unsigned char v6data[16]; unsigned char pad[128]; } m_addr; -} SOCaddr; +}; /* Ipv4 structure members */ #define SOCaddr_sinaddr(server) ((server).m_addr.in6.sin6_addr) @@ -239,12 +248,16 @@ getnameinfo((struct sockaddr *)&(ss), sslen, \ #endif /* Buffer structure to copy various hostent structures */ -typedef struct t_fullhostent { +#ifndef HTS_DEF_FWSTRUCT_t_fullhostent +#define HTS_DEF_FWSTRUCT_t_fullhostent +typedef struct t_fullhostent t_fullhostent; +#endif +struct t_fullhostent { t_hostent hp; char* list[2]; char addr[HTS_MAXADDRLEN]; /* various struct sockaddr structures */ unsigned int addr_maxlen; -} t_fullhostent; +}; /* Initialize a t_fullhostent structure */ #define fullhostent_init(h) do { \ diff --git a/src/htsnostatic.c b/src/htsnostatic.c deleted file mode 100644 index 22e7d7a..0000000 --- a/src/htsnostatic.c +++ /dev/null @@ -1,264 +0,0 @@ -/* ------------------------------------------------------------ */ -/* -HTTrack Website Copier, Offline Browser for Windows and Unix -Copyright (C) Xavier Roche and other contributors - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - -Important notes: - -- We hereby ask people using this source NOT to use it in purpose of grabbing -emails addresses, or collecting any other private information on persons. -This would disgrace our work, and spoil the many hours we spent on it. - - -Please visit our Website: http://www.httrack.com -*/ - - -/* ------------------------------------------------------------ */ -/* File: htsnostatic.c subroutines: */ -/* thread-safe routines for reentrancy */ -/* Author: Xavier Roche */ -/* ------------------------------------------------------------ */ - -/* Internal engine bytecode */ -#define HTS_INTERNAL_BYTECODE - -#include "htsnostatic.h" - -#include "htsbase.h" -#include "htshash.h" -#include "htsinthash.h" - -typedef struct hts_varhash { - /* - inthash values; - */ - inthash blocks; -} hts_varhash; - -#if USE_BEGINTHREAD -static PTHREAD_LOCK_TYPE hts_static_Mutex; -#endif -static int hts_static_Mutex_init=0; -#if HTS_WIN -#else -static PTHREAD_KEY_TYPE hts_static_key; -#endif - -int hts_initvar() { - if (!hts_static_Mutex_init) { - /* Init done */ - hts_static_Mutex_init=1; -#if USE_BEGINTHREAD - /* Init mutex */ - htsSetLock(&hts_static_Mutex, -999); - -#if HTS_WIN -#else - /* Init hash */ - PTHREAD_KEY_CREATE(&hts_static_key, hts_destroyvar); -#endif -#endif - } - - /* Set specific thread value */ -#if USE_BEGINTHREAD -#if HTS_WIN -#else - { - void* thread_val; - hts_varhash* hts_static_hash = (hts_varhash*) malloc(sizeof(hts_static_hash)); - if (!hts_static_hash) - return 0; - /* - hts_static_hash->values = inthash_new(HTS_VAR_MAIN_HASH); - if (!hts_static_hash->values) - return 0; - */ - hts_static_hash->blocks = inthash_new(HTS_VAR_MAIN_HASH); - if (!hts_static_hash->blocks) - return 0; - /* inthash_value_is_malloc(hts_static_hash->values, 0); */ /* Regular values */ - inthash_value_is_malloc(hts_static_hash->blocks, 1); /* We'll have to free them upon term! */ - inthash_value_set_free_handler(hts_static_hash->blocks, hts_destroyvar_key); /* free handler */ - thread_val = (void*) hts_static_hash; - - PTHREAD_KEY_SET(hts_static_key, thread_val, inthash); - } -#endif -#endif - - return 1; -} - -/* - hash table free handler to free all keys -*/ -void hts_destroyvar_key(void* adr) { -#if HTS_WIN -#else - hts_NostaticComplexKey* cKey = (hts_NostaticComplexKey*) adr; - if (cKey) { - void* block_address = NULL; - PTHREAD_KEY_GET(cKey->localKey, &block_address, void*); - /* Free block */ - if (block_address) { - free(block_address); - } - cKey->localInit = 0; - } -#endif -} - -void hts_destroyvar(void* ptrkey) { -#if HTS_WIN -#else - if (ptrkey) { - hts_varhash* hashtables = (hts_varhash*) ptrkey; - PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */ - - /* Destroy has table */ - inthash_delete(&(hashtables->blocks)); /* will magically call hts_destroyvar_key(), too */ - /* - inthash_delete(&(hashtables->values)); - */ - free(ptrkey); - } -#endif -} - -/* - destroy all key values (for the current thread) -*/ -int hts_freevar() { -#if HTS_WIN -#if 0 - void* thread_val = NULL; - PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash); - hts_destroyvar(thread_val); - PTHREAD_KEY_SET(hts_static_key, NULL, inthash); /* unregister */ - /* - PTHREAD_KEY_DELETE(hts_static_key); NO - */ -#endif -#endif - return 1; -} - -HTSEXT_API int hts_resetvar() { - int r; - hts_lockvar(); - { - hts_freevar(); - r = hts_initvar(); - } - hts_unlockvar(); - return r; -} - -int hts_maylockvar() { - return hts_static_Mutex_init; -} - -int hts_lockvar() { -#if USE_BEGINTHREAD - htsSetLock(&hts_static_Mutex, 1); -#endif - return 1; -} - -int hts_unlockvar() { -#if USE_BEGINTHREAD - htsSetLock(&hts_static_Mutex, 0); -#endif - return 1; -} - -int hts_setvar(char* name, long int value) { - return hts_setextvar(name, (long int)value, 0); -} - -int hts_setblkvar(char* name, void* value) { - return hts_setextvar(name, (long int)value, 1); -} - -int hts_setextvar(char* name, long int value, int flag) { -#if HTS_WIN -#else - void* thread_val = NULL; - hts_varhash* hashtables; - - /* - hts_lockvar(); // NO - MUST be protected by caller - { - */ - PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash); - hashtables = (hts_varhash*) thread_val; - if (hashtables) { // XXc XXC hack for win version - inthash_write(hashtables->blocks, name, value); - } -#endif - - return 1; -} - - -int hts_getvar(char* name, long int* ptrvalue) { - return hts_getextvar(name, (long int*)ptrvalue, 0); -} - -int hts_getblkvar(char* name, void** ptrvalue) { - return hts_getextvar(name, (long int*)ptrvalue, 1); -} - -int hts_getextvar(char* name, long int* ptrvalue, int flag) { -#if HTS_WIN -#else - void* thread_val = NULL; - hts_varhash* hashtables; - - hts_lockvar(); - { - PTHREAD_KEY_GET(hts_static_key, &thread_val, inthash); - hashtables = (hts_varhash*) thread_val; - /* if (flag) { - */ - inthash_read(hashtables->blocks, name, ptrvalue); - /* - } else { - inthash_read(hashtables->values, name, ptrvalue); - } - */ - } - hts_unlockvar(); -#endif - - return 1; -} - -long int hts_directgetvar(char* name) { - long int value=0; - hts_getvar(name, &value); - return value; -} - -void* hts_directgetblkvar(char* name) { - void* value=NULL; - hts_getblkvar(name, &value); - return value; -} diff --git a/src/htsnostatic.h b/src/htsnostatic.h deleted file mode 100644 index 3bf4ec9..0000000 --- a/src/htsnostatic.h +++ /dev/null @@ -1,278 +0,0 @@ -/* ------------------------------------------------------------ */ -/* -HTTrack Website Copier, Offline Browser for Windows and Unix -Copyright (C) Xavier Roche and other contributors - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - -Important notes: - -- We hereby ask people using this source NOT to use it in purpose of grabbing -emails addresses, or collecting any other private information on persons. -This would disgrace our work, and spoil the many hours we spent on it. - - -Please visit our Website: http://www.httrack.com -*/ - - -/* ------------------------------------------------------------ */ -/* File: htsnostatic.c subroutines: */ -/* thread-safe routines for reentrancy */ -/* Author: Xavier Roche */ -/* ------------------------------------------------------------ */ - -/* - Okay, with these routines, the engine should be fully reentrant (thread-safe) - All static references have been changed: - - from - function foo() { - static bartype bar; - } - to: - function foo() { - bartype* bar; - NOSTATIC_RESERVE(bar, bartype, 1); - } -*/ - -#ifndef HTSNOSTATIC_DEFH -#define HTSNOSTATIC_DEFH - -/* Library internal definictions */ -#ifdef HTS_INTERNAL_BYTECODE - -#include "htscore.h" -#include "htsthread.h" - -#define HTS_VAR_MAIN_HASH 127 - -/* - MutEx -*/ - - -/* Magic per-thread variables functions - - Example: - hts_lockvar(); - hts_setvar("MyFoo", (long int)(void*)&foo); - hts_unlockvar(); - .. - foo=(void*)(long int)hts_directgetvar("MyFoo"); - - Do not forget to initialize (hts_initvar()) the library once per thread -*/ -int hts_initvar(void); -int hts_freevar(void); -#ifndef HTTRACK_DEFLIB -HTSEXT_API int hts_resetvar(void); -#endif -int hts_maylockvar(void); -int hts_lockvar(void); -int hts_unlockvar(void); - -int hts_setvar(char* name, long int value); -int hts_getvar(char* name, long int* ptrvalue); -long int hts_directgetvar(char* name); - -int hts_setblkvar(char* name, void* value); -int hts_getblkvar(char* name, void** ptrvalue); -void* hts_directgetblkvar(char* name); - -/* Internal */ -int hts_setextvar(char* name, long int value, int flag); -int hts_getextvar(char* name, long int* ptrvalue, int flag); -void hts_destroyvar(void* ptrkey); -void hts_destroyvar_key(void* adr); - -/* - Ensure that the variable 'name' has 'nelts' of type 'type' reserved - fnc is an UNIQUE function name -*/ -#define NOSTATIC_RESERVE(name, type, nelt) NOSTATIC_XRESERVE(name, type, nelt) - -/* - Note: - Yes, we first read the localInit flag variable without MutEx protection, - for optimization purpose, because the flag is set once initialization DONE. - If the first read fails, we *securely* re-check and initialize *if* necessary. - The abort() things should NEVER be called, and are here for safety reasons -*/ -/* - function-specific static cKey: - cKey = { localKey, localInit } - || \ - \/ \ ==1 upon initialization - thread variable - || - \/ - void* - || - \/ - 'thread-static' value - - the function-specific static cKey is also referenced in the global - hashtable for free() purpose: (see hts_destroyvar()) - - global static key variable - 'hts_static_key' - || - \/ - thread variable - || - \/ - void* - || - \/ - hashtable - || - \/ - function-specific hash key - || - \/ - &cKey - -*/ -#ifdef _WIN32 - -#ifdef _WIN32_WCE - -/* Windows CE: static only */ -#define NOSTATIC_XRESERVE(name, type, nelt) do { \ - /*__declspec( thread )*/ static type thValue[nelt]; \ - /* __declspec( thread ) */ int static initValue = 0; \ - name = thValue; \ - if (!initValue) { \ - initValue = 1; \ - memset(&thValue, 0, sizeof(thValue)); \ - } \ -} while(0) - -#elif 1 - -/* New Windows version: TLS */ -/* Suggested by daan at zwif.com to be more gentle with LoadLibrary (04/2004) -See http://msdn.microsoft.com/library/en-us/vccore/html/_core_rules_and_limitations_for_tls.asp -And especially the "DLL declares any nonlocal data or object as __declspec( thread )" section -*/ -#define NOSTATIC_XRESERVE(name,type,nelt) do { \ - static DWORD tlsIndex = 0; \ - static int initValue = 0; \ - if (initValue == 0) \ - { \ - if (!hts_maylockvar()) { \ - abortLog("unable to lock mutex (not initialized?!)"); \ - abort(); \ - } \ - hts_lockvar(); \ - if (initValue == 0) { \ - tlsIndex = TlsAlloc(); \ - if (tlsIndex == 0xFFFFFFFF) { \ - abortLog("unable to allocate thread local storage (TLS) for variable!"); \ - abort(); \ - } \ - initValue = 1; \ - } \ - hts_unlockvar(); \ - } \ - name = (type*)TlsGetValue(tlsIndex); \ - if (name == NULL) { \ - name = (type*)malloc(sizeof(type)*nelt); \ - if (name == NULL) { \ - abortLog("unable to allocate memory for variable!"); \ - abort(); \ - } \ - memset(name, 0, sizeof(type)*nelt); \ - TlsSetValue(tlsIndex, name); \ - } \ -} while(0) - -#else - -/* Windows: handled by the compiler */ -#define NOSTATIC_XRESERVE(name, type, nelt) do { \ - __declspec( thread ) static type thValue[nelt]; \ - __declspec( thread ) int static initValue = 0; \ - name = thValue; \ - if (!initValue) { \ - initValue = 1; \ - memset(&thValue, 0, sizeof(thValue)); \ - } \ -} while(0) - -#endif - -#else - -/* Un*x : slightly more complex, we have to create a thread-key */ -typedef struct { - PTHREAD_KEY_TYPE localKey; - unsigned char localInit; -} hts_NostaticComplexKey; -#define NOSTATIC_XRESERVE(name, type, nelt) do { \ -static hts_NostaticComplexKey cKey={0,0}; \ -name = NULL; \ -if ( cKey.localInit ) { \ - PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ -} \ -if ( ( ! cKey.localInit ) || ( name == NULL ) ) { \ - if (!hts_maylockvar()) { \ - abortLog("unable to lock mutex (not initialized?!)"); \ - abort(); \ - } \ - hts_lockvar(); \ - { \ - { \ - name = (type *) calloc((nelt), sizeof(type)); \ - if (name == NULL) { \ - abortLog("unable to allocate memory for variable!"); \ - abort(); \ - } \ - { \ - char elt_name[64+8]; \ - sprintf(elt_name, #name "_%d", (int) __LINE__); \ - PTHREAD_KEY_CREATE(&(cKey.localKey), NULL); \ - hts_setblkvar(elt_name, &cKey); \ - } \ - PTHREAD_KEY_SET(cKey.localKey, name, type*); \ - name = NULL; \ - PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ - if (name == NULL) { \ - abortLog("unable to load thread key!"); \ - abort(); \ - } \ - if ( ! cKey.localInit ) { \ - cKey.localInit = 1; \ - } \ - } \ - } \ - hts_unlockvar(); \ -} \ -else { \ - PTHREAD_KEY_GET(cKey.localKey, &name, type*); \ - if (name == NULL) { \ - abortLog("unable to load thread key! (2)"); \ - abort(); \ - } \ -} \ -} while(0) -#endif - -#endif - -#endif diff --git a/src/htsopt.h b/src/htsopt.h index bf62c72..0b304f8 100644 --- a/src/htsopt.h +++ b/src/htsopt.h @@ -40,51 +40,231 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_DEFOPT #include <stdio.h> -#include "htsbasenet.h" -#include "htsbauth.h" +#include "htsglobal.h" -// structure proxy -typedef struct t_proxy { +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +#define HTS_DEF_FWSTRUCT_t_hts_htmlcheck_callbacks +typedef struct t_hts_htmlcheck_callbacks t_hts_htmlcheck_callbacks; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_dnscache +#define HTS_DEF_FWSTRUCT_t_dnscache +typedef struct t_dnscache t_dnscache; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif +#ifndef HTS_DEF_FWSTRUCT_robots_wizard +#define HTS_DEF_FWSTRUCT_robots_wizard +typedef struct robots_wizard robots_wizard; +#endif +#ifndef HTS_DEF_FWSTRUCT_t_cookie +#define HTS_DEF_FWSTRUCT_t_cookie +typedef struct t_cookie t_cookie; +#endif + +/** Forward definitions **/ +#ifndef HTS_DEF_FWSTRUCT_String +#define HTS_DEF_FWSTRUCT_String +typedef struct String String; +#endif +#ifndef HTS_DEF_STRUCT_String +#define HTS_DEF_STRUCT_String +struct String { + char* buffer_; + size_t length_; + size_t capacity_; +}; +#endif + +/* Defines */ +#define CATBUFF_SIZE (STRING_SIZE*2*2) +#define STRING_SIZE 2048 + +/* Proxy structure */ +#ifndef HTS_DEF_FWSTRUCT_t_proxy +#define HTS_DEF_FWSTRUCT_t_proxy +typedef struct t_proxy t_proxy; +#endif +struct t_proxy { int active; - char name[1024]; + String name; int port; - char bindhost[256]; // bind this host -} t_proxy; + String bindhost; // bind this host +}; /* Structure utile pour copier en bloc les paramètres */ -typedef struct htsfilters { +#ifndef HTS_DEF_FWSTRUCT_htsfilters +#define HTS_DEF_FWSTRUCT_htsfilters +typedef struct htsfilters htsfilters; +#endif +struct htsfilters { char*** filters; int* filptr; //int* filter_max; -} htsfilters; +}; /* User callbacks chain */ typedef int (*htscallbacksfncptr)(void); typedef struct htscallbacks htscallbacks; struct htscallbacks { - char callbackName[128]; void* moduleHandle; htscallbacksfncptr exitFnc; htscallbacks * next; }; +/* filenote() internal file structure */ +#ifndef HTS_DEF_FWSTRUCT_filenote_strc +#define HTS_DEF_FWSTRUCT_filenote_strc +typedef struct filenote_strc filenote_strc; +#endif +struct filenote_strc { + FILE* lst; + char path[STRING_SIZE*2]; +}; + +/* concat() functions */ +#ifndef HTS_DEF_FWSTRUCT_concat_strc +#define HTS_DEF_FWSTRUCT_concat_strc +typedef struct concat_strc concat_strc; +#endif +struct concat_strc { + int index; + char buff[16][STRING_SIZE*2*2]; +}; + +/* int2 functions */ +#ifndef HTS_DEF_FWSTRUCT_strc_int2bytes2 +#define HTS_DEF_FWSTRUCT_strc_int2bytes2 +typedef struct strc_int2bytes2 strc_int2bytes2; +#endif +struct strc_int2bytes2 { + char catbuff[CATBUFF_SIZE]; + char buff1[256]; + char buff2[32]; + char* buffadr[2]; +}; + +/* cmd callback */ +#ifndef HTS_DEF_FWSTRUCT_usercommand_strc +#define HTS_DEF_FWSTRUCT_usercommand_strc +typedef struct usercommand_strc usercommand_strc; +#endif +struct usercommand_strc { + int exe; + char cmd[2048]; +}; + +/* error logging */ +#ifndef HTS_DEF_FWSTRUCT_fspc_strc +#define HTS_DEF_FWSTRUCT_fspc_strc +typedef struct fspc_strc fspc_strc; +#endif +struct fspc_strc { + int error; + int warning; + int info; +}; + /* Structure état du miroir */ -typedef struct htsoptstate { +#ifndef HTS_DEF_FWSTRUCT_htsoptstatecancel +#define HTS_DEF_FWSTRUCT_htsoptstatecancel +typedef struct htsoptstatecancel htsoptstatecancel; +#endif +struct htsoptstatecancel { + char *url; + htsoptstatecancel *next; +}; + +/* Mutexes */ +#ifndef HTS_DEF_FWSTRUCT_htsmutex_s +#define HTS_DEF_FWSTRUCT_htsmutex_s +typedef struct htsmutex_s htsmutex_s, *htsmutex; +#endif + +/* Hashtables */ +#ifndef HTS_DEF_FWSTRUCT_struct_inthash +#define HTS_DEF_FWSTRUCT_struct_inthash +typedef struct struct_inthash struct_inthash, *inthash; +#endif + +/* Structure état du miroir */ +#ifndef HTS_DEF_FWSTRUCT_htsoptstate +#define HTS_DEF_FWSTRUCT_htsoptstate +typedef struct htsoptstate htsoptstate; +#endif +struct htsoptstate { + htsmutex lock; /* 3.41 */ + /* */ int stop; int exit_xh; int back_add_stats; /* */ int mimehtml_created; - char mimemid[256]; + String mimemid; FILE* mimefp; int delayedId; /* */ + filenote_strc strc; + /* Functions context (avoir thread variables!) */ htscallbacks callbacks; -} htsoptstate; + concat_strc concat; + usercommand_strc usercmd; + fspc_strc fspc; + char *userhttptype; + int verif_backblue_done; + int verif_external_status; + t_dnscache *dns_cache; + /* HTML parsing state */ + char _hts_errmsg[1024]; + int _hts_in_html_parsing; + int _hts_in_html_done; + int _hts_in_html_poll; + int _hts_setpause; + char** _hts_addurl; + int _hts_cancel; + htsoptstatecancel *cancel; /* 3.41 */ + char HTbuff[2048]; + unsigned int debug_state; + unsigned int tmpnameid; /* 3.41 */ +}; +/* Library handles */ +#ifndef HTS_DEF_FWSTRUCT_htslibhandles +#define HTS_DEF_FWSTRUCT_htslibhandles +typedef struct htslibhandles htslibhandles; +#endif +#ifndef HTS_DEF_FWSTRUCT_htslibhandle +#define HTS_DEF_FWSTRUCT_htslibhandle +typedef struct htslibhandle htslibhandle; +#endif +struct htslibhandle { + char *moduleName; + void *handle; +}; +struct htslibhandles { + int count; + htslibhandle *handles; +}; + +/* Javascript parser flags */ +typedef enum htsparsejava_flags { + HTSPARSE_NONE = 0, // don't parse + HTSPARSE_DEFAULT = 1, // parse default (all) + HTSPARSE_NO_CLASS = 2, // don't parse .java + HTSPARSE_NO_JAVASCRIPT = 4, // don't parse .js + HTSPARSE_NO_AGGRESSIVE = 8 // don't aggressively parse .js or .java +} htsparsejava_flags; // paramètres httrack (options) -typedef struct httrackp { +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +struct httrackp { + size_t size_httrackp; // size of this structure + /* */ int wizard; // wizard aucun/grand/petit int flush; // fflush sur les fichiers log int travel; // type de déplacements (same domain etc) @@ -109,9 +289,7 @@ typedef struct httrackp { int rateout; // nombre d'octets minium pour le transfert int maxtime; // temps max en secondes int maxrate; // taux de transfert max -#if HTS_USEMMS int mms_maxtime; // max duration of a mms file -#endif float maxconn; // nombre max de connexions/s int waittime; // démarrage programmé int cache; // génération d'un cache @@ -120,21 +298,21 @@ typedef struct httrackp { t_proxy proxy; // configuration du proxy int savename_83; // conversion 8-3 pour les noms de fichiers int savename_type; // type de noms: structure originale/html-images en un seul niveau - char savename_userdef[256]; // structure userdef (ex: %h%p/%n%q.%t) + String savename_userdef; // structure userdef (ex: %h%p/%n%q.%t) int savename_delayed; // delayed type check int delayed_cached; // delayed type check can be cached to speedup updates int mimehtml; // MIME-html int user_agent_send; // user agent (ex: httrack/1.0 [sun]) - char user_agent[128]; - char referer[256]; // referer - char from[256]; // from - char path_log[1024]; // chemin pour cache et log - char path_html[1024]; // chemin pour miroir - char path_bin[1024]; // chemin pour templates + String user_agent; // + String referer; // referer + String from; // from + String path_log; // chemin pour cache et log + String path_html; // chemin pour miroir + String path_bin; // chemin pour templates int retry; // nombre d'essais supplémentaires en cas d'échec int makestat; // mettre à jour un fichier log de statistiques de transfert int maketrack; // mettre à jour un fichier log de statistiques d'opérations - int parsejava; // parsing des classes java pour récupérer les class, gif & cie + int parsejava; // parsing des classes java pour récupérer les class, gif & cie ; see htsparsejava_flags int hostcontrol; // abandon d'un host trop lent etc. int errpage; // générer une page d'erreur en cas de 404 etc. int check_type; // si type inconnu (cgi,asp,/) alors tester lien (et gérer moved éventuellement) @@ -144,7 +322,7 @@ typedef struct httrackp { int passprivacy; // pas de mot de pass dans les liens externes? int includequery; // include la query-string int mirror_first_page; // miroir des liens - char sys_com[2048]; // commande système + String sys_com; // commande système int sys_com_exec; // executer commande int accept_cookie; // gestion des cookies t_cookie* cookie; @@ -158,17 +336,18 @@ typedef struct httrackp { int parsedebug; // débugger parser (debug!) int norecatch; // ne pas reprendre les fichiers effacés localement par l'utilisateur int verbosedisplay; // animation textuelle - char footer[256]; // ligne d'infos + String footer; // ligne d'infos int maxcache; // maximum en mémoire au niveau du cache (backing) //int maxcache_anticipate; // maximum de liens à anticiper (majorant) int ftp_proxy; // proxy http pour ftp - char filelist[1024]; // fichier liste URL à inclure - char urllist[1024]; // fichier liste de filtres à inclure + String filelist; // fichier liste URL à inclure + String urllist; // fichier liste de filtres à inclure htsfilters filters; // contient les pointeurs pour les filtres - void* hash; // hash structure - void* robotsptr; // robots ptr - char lang_iso[64]; // en, fr .. - char mimedefs[2048]; // ext1=mimetype1\next2=mimetype2.. + hash_struct* hash; // hash structure + robots_wizard* robotsptr; // robots ptr + String lang_iso; // en, fr .. + String mimedefs; // ext1=mimetype1\next2=mimetype2.. + String mod_blacklist; // (3.41) // int maxlink; // nombre max de liens int maxfilter; // nombre max de filtres @@ -182,11 +361,20 @@ typedef struct httrackp { int is_update; // c'est une update (afficher "File updated...") int dir_topindex; // reconstruire top index par la suite // - htsoptstate state; // état -} httrackp; + // callbacks + t_hts_htmlcheck_callbacks *callbacks_fun; + // store library handles + htslibhandles libHandles; + // + htsoptstate state; // state +}; // stats for httrack -typedef struct hts_stat_struct { +#ifndef HTS_DEF_FWSTRUCT_hts_stat_struct +#define HTS_DEF_FWSTRUCT_hts_stat_struct +typedef struct hts_stat_struct hts_stat_struct; +#endif +struct hts_stat_struct { LLint HTS_TOTAL_RECV; // flux entrant reçu LLint stat_bytes; // octets écrits sur disque // int HTS_TOTAL_RECV_STATE; // status: 0 tout va bien 1: ralentir un peu 2: ralentir 3: beaucoup @@ -219,8 +407,7 @@ typedef struct hts_stat_struct { // TStamp last_connect; // last connect() call TStamp last_request; // last request issued -} hts_stat_struct; - +}; #endif diff --git a/src/htsparse.c b/src/htsparse.c index 4aa1b7e..b39b41f 100644 --- a/src/htsparse.c +++ b/src/htsparse.c @@ -76,7 +76,7 @@ Please visit our Website: http://www.httrack.com #define relativeurlfil ((!parent_relative)?urlfil:parenturlfil) #define relativesavename ((!parent_relative)?savename:parentsavename) -#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog); } } +#define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->log) { fflush(opt->log); } } // does nothing #define XH_uninit do {} while(0) @@ -96,14 +96,14 @@ Please visit our Website: http://www.httrack.com ht_len+=A; #define HT_ADD_ADR \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ + size_t i = ((size_t) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \ memcpy(ht_buff+j, lastsaved, i); \ ht_buff[j+i]='\0'; \ lastsaved=adr; \ } #define HT_ADD(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_=strlen(A),j_=ht_len; \ + size_t i_ = strlen(A), j_ = ht_len; \ if (i_) { \ HT_ADD_CHK(i_) \ memcpy(ht_buff+j_, A, i_); \ @@ -111,7 +111,7 @@ Please visit our Website: http://www.httrack.com } } #define HT_ADD_HTMLESCAPED(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_, j_; \ + size_t i_, j_; \ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ escape_for_html_print(A, tempo_); \ i_=strlen(tempo_); \ @@ -123,7 +123,7 @@ Please visit our Website: http://www.httrack.com } } #define HT_ADD_HTMLESCAPED_FULL(A) \ if ((opt->getmode & 1) && (ptr>0)) { \ - int i_, j_; \ + size_t i_, j_; \ char BIGSTK tempo_[HTS_URLMAXSIZE*2]; \ escape_for_html_print_full(A, tempo_); \ i_=strlen(tempo_); \ @@ -134,15 +134,15 @@ Please visit our Website: http://www.httrack.com ht_buff[j_+i_]='\0'; \ } } #define HT_ADD_START \ - int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \ - int ht_len=0; \ + size_t ht_size=(size_t)(r->size*5)/4+REALLOC_SIZE; \ + size_t ht_len=0; \ char* ht_buff=NULL; \ if ((opt->getmode & 1) && (ptr>0)) { \ ht_buff=(char*) malloct(ht_size); \ if (ht_buff==NULL) { \ printf("PANIC! : Not enough memory [%d]\n",__LINE__); \ XH_uninit; \ - abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \ + abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ (int) ht_size); \ exit(1); \ } \ ht_buff[0]='\0'; \ @@ -151,7 +151,7 @@ Please visit our Website: http://www.httrack.com int ok=0;\ if (ht_buff) { \ char digest[32+2];\ - INTsys fsize_old=fsize(fconv(savename));\ + off_t fsize_old = fsize(fconv(OPT_GET_BUFF(opt),savename));\ digest[0]='\0';\ domd5mem(ht_buff,ht_len,digest,1);\ if (fsize_old==ht_len) { \ @@ -163,7 +163,7 @@ Please visit our Website: http://www.httrack.com if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\ ok=1;\ if ( (opt->debug>1) && (opt->log!=NULL) ) {\ - fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\ test_flush;\ }\ } else {\ @@ -171,20 +171,21 @@ Please visit our Website: http://www.httrack.com } \ }\ if (!ok) { \ - file_notify(urladr, urlfil, savename, 1, 1, r->notmodified); \ - fp=filecreate(savename); \ + file_notify(opt,urladr, urlfil, savename, 1, 1, r->notmodified); \ + fp=filecreate(&opt->state.strc, savename); \ if (fp) { \ if (ht_len>0) {\ - if ((INTsys)fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ + if (fwrite(ht_buff,1,ht_len,fp) != ht_len) { \ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ opt->state.exit_xh=-1;\ }\ - if (opt->errlog) { \ - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\ + if (opt->log) { \ + int last_errno = errno; \ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unable to write HTML file %s: %s"LF, savename, strerror(last_errno));\ if (fcheck) {\ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"* * Fatal write error, giving up"LF);\ }\ test_flush;\ }\ @@ -196,23 +197,24 @@ Please visit our Website: http://www.httrack.com } else {\ int fcheck;\ if ((fcheck=check_fatal_io_errno())) {\ - fspc(opt->log,"error"); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); \ + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Mirror aborted: disk full or filesystem problems"LF); \ test_flush; \ opt->state.exit_xh=-1;\ }\ - if (opt->errlog) { \ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\ + if (opt->log) { \ + int last_errno = errno; \ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"Unable to save file %s : %s"LF, savename, strerror(last_errno));\ if (fcheck) {\ - fspc(opt->errlog,"error");\ - fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\ + HTS_LOG(opt,LOG_ERROR);\ + fprintf(opt->log,"* * Fatal write error, giving up"LF);\ }\ test_flush;\ }\ }\ } else {\ - file_notify(urladr, urlfil, savename, 0, 0, r->notmodified); \ - filenote(savename,NULL); \ + file_notify(opt,urladr, urlfil, savename, 0, 0, r->notmodified); \ + filenote(&opt->state.strc, savename,NULL); \ }\ if (cache->ndx)\ cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\ @@ -237,7 +239,7 @@ Please visit our Website: http://www.httrack.com fflush(makeindex_fp); \ fclose(makeindex_fp); /* à ne pas oublier sinon on passe une nuit blanche */ \ makeindex_fp=NULL; \ - usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary"); \ + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"),"primary","primary"); \ } \ } \ makeindex_done=1; /* ok c'est fait */ \ @@ -255,7 +257,7 @@ Please visit our Website: http://www.httrack.com #define liens_record(A,F,S,FA,FF) { \ int notecode=0; \ - int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ + size_t lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\ adr_len=strlen(A),\ fil_len=strlen(F),\ sav_len=strlen(S),\ @@ -314,27 +316,27 @@ Please visit our Website: http://www.httrack.com #define ENGINE_LOAD_CONTEXT() \ ENGINE_LOAD_CONTEXT_BASE(); \ /* */ \ - htsblk* r = stre->r_; \ - hash_struct* hash = stre->hash_; \ - int lien_max = *stre->lien_max_; \ + htsblk* r HTS_UNUSED = stre->r_; \ + hash_struct* hash HTS_UNUSED = stre->hash_; \ + int lien_max HTS_UNUSED = *stre->lien_max_; \ /* */ \ - int error = * stre->error_; \ - int store_errpage = * stre->store_errpage_; \ - char* codebase = stre->codebase; \ - char* base = stre->base; \ + int error HTS_UNUSED = * stre->error_; \ + int store_errpage HTS_UNUSED = * stre->store_errpage_; \ + char* codebase HTS_UNUSED = stre->codebase; \ + char* base HTS_UNUSED = stre->base; \ /* */ \ - int makeindex_done = *stre->makeindex_done_; \ - FILE* makeindex_fp = *stre->makeindex_fp_; \ - int makeindex_links = *stre->makeindex_links_; \ - char* makeindex_firstlink = stre->makeindex_firstlink_; \ + int makeindex_done HTS_UNUSED = *stre->makeindex_done_; \ + FILE* makeindex_fp HTS_UNUSED = *stre->makeindex_fp_; \ + int makeindex_links HTS_UNUSED = *stre->makeindex_links_; \ + char* makeindex_firstlink HTS_UNUSED = stre->makeindex_firstlink_; \ /* */ \ - char *template_header = stre->template_header_; \ - char *template_body = stre->template_body_; \ - char *template_footer = stre->template_footer_; \ + char *template_header HTS_UNUSED = stre->template_header_; \ + char *template_body HTS_UNUSED = stre->template_body_; \ + char *template_footer HTS_UNUSED = stre->template_footer_; \ /* */ \ - LLint stat_fragment = *stre->stat_fragment_; \ - TStamp makestat_time = stre->makestat_time; \ - FILE* makestat_fp = stre->makestat_fp + LLint stat_fragment HTS_UNUSED = *stre->stat_fragment_; \ + TStamp makestat_time HTS_UNUSED = stre->makestat_time; \ + FILE* makestat_fp HTS_UNUSED = stre->makestat_fp #define ENGINE_SAVE_CONTEXT() \ ENGINE_SAVE_CONTEXT_BASE(); \ @@ -369,7 +371,7 @@ Please visit our Website: http://www.httrack.com /* Increment current pointer to 'steps' characters, modifying automate if necessary */ #define INCREMENT_CURRENT_ADR(steps) do { \ - int steps__ = (steps); \ + int steps__ = (int) ( steps ); \ while(steps__ > 0) { \ adr++; \ AUTOMATE_LOOKUP_CURRENT_ADR(); \ @@ -382,39 +384,38 @@ Please visit our Website: http://www.httrack.com int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Load engine variables */ ENGINE_LOAD_CONTEXT(); + char catbuff[CATBUFF_SIZE]; -#if HTS_ANALYSTE { char* cAddr = r->adr; int cSize = (int) r->size; if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: preprocess-html: %s%s"LF, urladr, urlfil); } - if (hts_htmlcheck_preprocess(&cAddr, &cSize, urladr, urlfil) == 1) { + if (RUN_CALLBACK4(opt, preprocess, &cAddr, &cSize, urladr, urlfil) == 1) { r->adr = cAddr; r->size = cSize; } } - if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) { -#endif + if (RUN_CALLBACK4(opt, check_html, r->adr,(int)r->size,urladr,urlfil)) { FILE* fp=NULL; // fichier écrit localement char* adr=r->adr; // pointeur (on parcourt) char* lastsaved; // adresse du dernier octet sauvé + 1 if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"scanning file %s%s (%s).."LF, urladr, urlfil, savename); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"scanning file %s%s (%s).."LF, urladr, urlfil, savename); test_flush; } // Indexing! #if HTS_MAKE_KEYWORD_INDEX if (opt->kindex) { - if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) { + if (index_keyword(r->adr,r->size,r->contenttype,savename,StringBuff(opt->path_html))) { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..done"LF); test_flush; } } else { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"indexing file..error!"LF); test_flush; } } } @@ -470,6 +471,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // int parent_relative=0; // the parent is the base path (.js, .css..) HT_ADD_START; // débuter + lastsaved=adr; /* Initialize script automate for comments, quotes.. */ memset(inscript_state, 0xff, sizeof(inscript_state)); @@ -498,15 +500,6 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE; /* #8: escape in "" */ inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2; /* #9: escape in '' */ - - /* statistics */ - if ((opt->getmode & 1) && (ptr>0)) { - /* - HTS_STAT.stat_files++; - HTS_STAT.stat_bytes+=r->size; - */ - } - /* Primary list or URLs */ if (ptr == 0) { intag=1; @@ -515,8 +508,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } /* Check is the file is a .js file */ else if ( - (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0) - || (compare_mime(r->contenttype, str->url_file, "text/css")!=0) + (compare_mime(opt,r->contenttype, str->url_file, "application/x-javascript")!=0) + || (compare_mime(opt,r->contenttype, str->url_file, "text/css")!=0) ) { /* JavaScript js file */ inscript=1; if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); } @@ -524,10 +517,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { intag=1; // because après <script> on y est .. - pas utile intag_start_valid=0; // OUI car nous sommes dans du code, plus dans du "vrai" tag if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush; } // for javascript only - if (compare_mime(r->contenttype, str->url_file, "application/x-javascript") != 0) { + if (compare_mime(opt,r->contenttype, str->url_file, "application/x-javascript") != 0) { // all links must be checked against parent, not this link if (liens[ptr]->precedent != 0) { parent_relative=1; @@ -535,25 +528,42 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } /* Or a real audio */ - else if (compare_mime(r->contenttype, str->url_file, "audio/x-pn-realaudio")!=0) { /* realaudio link file */ + else if (compare_mime(opt,r->contenttype, str->url_file, "audio/x-pn-realaudio")!=0) { /* realaudio link file */ inscript=intag=0; inscript_name="media"; intag_start_valid=0; in_media="LNK"; // real media! -> links } /* Or a m3u playlist */ - else if (compare_mime(r->contenttype, str->url_file, "audio/x-mpegurl")!=0) { /* mp3 link file */ + else if (compare_mime(opt,r->contenttype, str->url_file, "audio/x-mpegurl")!=0) { /* mp3 link file */ inscript=intag=0; inscript_name="media"; intag_start_valid=0; in_media="LNK"; // m3u! -> links } - else if (compare_mime(r->contenttype, str->url_file, "application/x-authorware-map")!=0) { /* macromedia aam file */ + else if (compare_mime(opt,r->contenttype, str->url_file, "application/x-authorware-map")!=0) { /* macromedia aam file */ inscript=intag=0; inscript_name="media"; intag_start_valid=0; in_media="AAM"; // aam } + /* Or a RSS file */ + else if ( + compare_mime(opt,r->contenttype, str->url_file, "text/xml") != 0 + || compare_mime(opt,r->contenttype, str->url_file, "application/xml") != 0 + ) + { + if (strstr(adr, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup + { /* RSS file */ + inscript=intag=0; + intag_start_valid=0; + in_media=NULL; // regular XML + } else { // cancel: write all + adr = r->adr + r->size; + HT_ADD_ADR; + lastsaved=adr; + } + } // Detect UTF8 format //if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) { @@ -569,13 +579,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // analyser ce qu'il y a en mémoire (fichier html) // on scanne les balises // ------------------------------------------------------------ -#if HTS_ANALYSTE - _hts_in_html_done=0; // 0% scannés - _hts_cancel=0; // pas de cancel - _hts_in_html_parsing=1; // flag pour indiquer un parsing -#endif - base[0]='\0'; // effacer base-href - lastsaved=adr; + opt->state._hts_in_html_done=0; // 0% scannés + opt->state._hts_in_html_parsing=1; // flag pour indiquer un parsing + + base[0]='\0'; // effacer base-href do { int p=0; int valid_p=0; // force to take p even if == 0 @@ -585,6 +592,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { INSCRIPT inscript_state_pos_prev=inscript_state_pos; error=0; + /* Break if we are done yet */ + if ( ( adr - r->adr ) >= r->size) + break; + /* Hack to avoid NULL char problems with C syntax */ /* Yes, some bogus HTML pages can embed null chars and therefore can not be properly handled if this hack is not done @@ -594,8 +605,6 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { *adr=' '; } - - /* index.html built here */ @@ -626,9 +635,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (p) { // ok center if (makeindex_fp==NULL) { - file_notify("", "", fconcat(opt->path_html,"index.html"), 1, 1, 0); - verif_backblue(opt,opt->path_html); // générer gif - makeindex_fp=filecreate(fconcat(opt->path_html,"index.html")); + file_notify(opt,"", "", fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html"), 1, 1, 0); + verif_backblue(opt,StringBuff(opt->path_html)); // générer gif + makeindex_fp=filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),"index.html")); if (makeindex_fp!=NULL) { // Header @@ -653,7 +662,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { b=strchr(a,'<'); // prochain tag } } - if (lienrelatif(tempo,liens[ptr]->sav,concat(opt->path_html,"index.html"))==0) { + if (lienrelatif(tempo,liens[ptr]->sav,concat(OPT_GET_BUFF(opt),StringBuff(opt->path_html),"index.html"))==0) { detect_title=1; // ok détecté pour cette page! makeindex_links++; // un de plus strcpybuff(makeindex_firstlink,tempo); @@ -739,14 +748,14 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { char* eol="\n"; if (strchr(r->adr,'\r')) eol="\r\n"; - if (strnotempty(opt->footer) || opt->urlmode != 4) { /* != preserve */ - if (strnotempty(opt->footer)) { + if (StringNotEmpty(opt->footer) || opt->urlmode != 4) { /* != preserve */ + if (StringNotEmpty(opt->footer)) { char BIGSTK tempo[1024+HTS_URLMAXSIZE*2]; char gmttime[256]; tempo[0]='\0'; time_gmt_rfc822(gmttime); strcatbuff(tempo,eol); - sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","",""); + sprintf(tempo+strlen(tempo),StringBuff(opt->footer),jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","",""); strcatbuff(tempo,eol); //fwrite(tempo,1,strlen(tempo),fp); HT_ADD(tempo); @@ -901,7 +910,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { char BIGSTK tmp[HTS_URLMAXSIZE/2 + 2]; tmp[0] = '\0'; strncat(tmp, a + dot + 1, n - dot - 1); - if (is_knowntype(tmp) || ishtml_ext(tmp) != -1) { + if (is_knowntype(opt,tmp) || ishtml_ext(tmp) != -1) { adr++; p = 0; valid_p = 1; @@ -995,7 +1004,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { char str[512]; str[0]='\0'; strncatbuff(str,b,minimum((int) (a - b + 1), 32)); - fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush; } } @@ -1217,8 +1226,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil); #endif nofollow=1; // NE PLUS suivre liens dans cette page - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil); test_flush; } } @@ -1288,7 +1297,6 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { foo("url") or foo(url) foo "url" */ - int nc; char expected = '='; // caractère attendu après char* expected_end = ";"; int can_avoid_quotes=0; @@ -1296,99 +1304,105 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { int ensure_not_mime=0; if (inscript_tag) expected_end=";\"\'"; // voir a href="javascript:doc.location='foo'" - nc = strfield(adr,".src"); // nom.src="image"; - if (!nc) nc = strfield(adr,".location"); // document.location="doc" - if (!nc) nc = strfield(adr,":location"); // javascript:location="doc" - if (!nc) { // location="doc" - if ( ( nc = strfield(adr,"location") ) - && !isspace(*(adr - 1)) - ) - nc = 0; - } - if (!nc) nc = strfield(adr,".href"); // document.location="doc" - if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",.. - expected='('; // parenthèse - expected_end="),"; // fin: virgule ou parenthèse - ensure_not_mime=1; //* ensure the url is not a mime type */ - } - if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url") - expected='('; // parenthèse - expected_end=")"; // fin: parenthèse - } - if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url") - expected='('; // parenthèse - expected_end=")"; // fin: parenthèse - } - if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) - && *(adr - 1) != '_' - ) { // url(url) + + /* Can we parse javascript ? */ + if ( (opt->parsejava & HTSPARSE_NO_JAVASCRIPT) == 0) { + int nc; + nc = strfield(adr,".src"); // nom.src="image"; + if (!nc) nc = strfield(adr,".location"); // document.location="doc" + if (!nc) nc = strfield(adr,":location"); // javascript:location="doc" + if (!nc) { // location="doc" + if ( ( nc = strfield(adr,"location") ) + && !isspace(*(adr - 1)) + ) + nc = 0; + } + if (!nc) nc = strfield(adr,".href"); // document.location="doc" + if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",.. + expected='('; // parenthèse + expected_end="),"; // fin: virgule ou parenthèse + ensure_not_mime=1; //* ensure the url is not a mime type */ + } + if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url") expected='('; // parenthèse expected_end=")"; // fin: parenthèse - can_avoid_quotes=1; - quotes_replacement=')'; } - if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url" - if (is_space(*(adr+nc))) { - expected=0; // no char expected - } else - nc=0; + if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url") + expected='('; // parenthèse + expected_end=")"; // fin: parenthèse } - if (nc) { - char *a; - a=adr+nc; - while(is_realspace(*a)) a++; - if ((*a == expected) || (!expected)) { - if (expected) - a++; + if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) + && *(adr - 1) != '_' + ) { // url(url) + expected='('; // parenthèse + expected_end=")"; // fin: parenthèse + can_avoid_quotes=1; + quotes_replacement=')'; + } + if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url" + if (is_space(*(adr+nc))) { + expected=0; // no char expected + } else + nc=0; + } + if (nc) { + char *a; + a=adr+nc; while(is_realspace(*a)) a++; - if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) { - char *b,*c; - int ndelim=1; - if ((*a==34) || (*a=='\'')) + if ((*a == expected) || (!expected)) { + if (expected) a++; - else - ndelim=0; - b=a; - if (ndelim) { - while((*b!=34) && (*b!='\'') && (*b!='\0')) b++; - } - else { - while((*b != quotes_replacement) && (*b!='\0')) b++; - } - c=b--; c+=ndelim; - while(*c==' ') c++; - if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) { - c-=(ndelim+1); - if ((int) (c - a + 1)) { - if (ensure_not_mime) { - int i = 0; - while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') { - int p; - if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') { - a=NULL; + while(is_realspace(*a)) a++; + if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) { + char *b,*c; + int ndelim=1; + if ((*a==34) || (*a=='\'')) + a++; + else + ndelim=0; + b=a; + if (ndelim) { + while((*b!=34) && (*b!='\'') && (*b!='\0')) b++; + } + else { + while((*b != quotes_replacement) && (*b!='\0')) b++; + } + c=b--; c+=ndelim; + while(*c==' ') c++; + if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) { + c-=(ndelim+1); + if ((int) (c - a + 1)) { + if (ensure_not_mime) { + int i = 0; + while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') { + int p; + if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') { + a=NULL; + } + i++; } - i++; } - } - if (a != NULL) { - if ((opt->debug>1) && (opt->log!=NULL)) { - char str[512]; - str[0]='\0'; - strncatbuff(str,a,minimum((int) (c - a + 1),32)); - fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush; - } - p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER - if (can_avoid_quotes) { - ending_p=quotes_replacement; + if (a != NULL) { + if ((opt->debug>1) && (opt->log!=NULL)) { + char str[512]; + str[0]='\0'; + strncatbuff(str,a,minimum((int) (c - a + 1),32)); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush; + } + p=(int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER + if (can_avoid_quotes) { + ending_p=quotes_replacement; + } } } } - } + } } } - } + + } /* HTSPARSE_NO_JAVASCRIPT */ } } @@ -1414,7 +1428,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // risque: générer de faux fichiers parazites // fix: ne parse plus dans les commentaires // ------------------------------------------------------------ - if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut" + if ( opt->parseall && (opt->parsejava & HTSPARSE_NO_AGGRESSIVE) == 0 + && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) { // option parsing "brut" //int incomment_justquit=0; if (!is_realspace(*adr)) { int noparse=0; @@ -1547,10 +1562,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } // Prendre si extension reconnue if (!url_ok) { - get_httptype(type,tempo,0); + get_httptype(opt,type,tempo,0); if (strnotempty(type)) // type reconnu! url_ok=1; - else if (is_dyntype(get_ext(tempo))) // reconnu php,cgi,asp.. + else if (is_dyntype(get_ext(OPT_GET_BUFF(opt),tempo))) // reconnu php,cgi,asp.. url_ok=1; // MAIS pas les foobar@aol.com !! if (strchr(tempo,'@')) @@ -1576,7 +1591,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (nop) { url_ok=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush; } } } @@ -1828,19 +1843,17 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // 1: interdit (patcher tout de même adresse) if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html (tag): %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link detected in html (tag): %s"LF,lien); test_flush; } // external check -#if HTS_ANALYSTE - if (!hts_htmlcheck_linkdetected(lien) || !hts_htmlcheck_linkdetected2(lien, intag_start)) { + if (!RUN_CALLBACK1(opt, linkdetected, lien) || !RUN_CALLBACK2(opt, linkdetected2, lien, intag_start)) { error=1; // erreur - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s refused by external wrapper"LF,lien); test_flush; } } -#endif #if HTS_STRIP_DOUBLE_SLASH // supprimer les // en / (sauf pour http://) @@ -1876,7 +1889,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // (IMG SRC="foo.<\n><\t>gif<\t>") { char* a = lien; - int llen; + size_t llen; // strip ending spaces llen = ( *a != '\0' ) ? strlen(a) : 0; @@ -1901,7 +1914,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (strchr(lien, ',')) { error=1; // erreur if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush; } } } @@ -1922,7 +1935,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien)); //strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1)); /* Never unescape high-chars (we don't know the encoding!!) */ - strcpybuff(lien,unescape_http_unharm(lien, 1)); /* note: '%' is still escaped */ + strcpybuff(lien,unescape_http_unharm(catbuff,lien, 1)); /* note: '%' is still escaped */ escape_remove_control(lien); escape_spc_url(lien); strcatbuff(lien,query); /* restore */ @@ -2034,7 +2047,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // si c'est un chemin, alors vérifier (toto/toto.html -> http://www/toto/) if (!error) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"position link check %s"LF,lien); test_flush; } if ((p_type==2) || (p_type==-2)) { // code ou codebase @@ -2125,7 +2138,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush; } //printf("base code: %s - %s\n",lien,base); } @@ -2153,13 +2166,13 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // ** vérifier que ../ fonctionne (ne doit pas arriver mais bon..) if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; } } } else { error=1; // erreur - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s too long with base href"LF,lien); test_flush; } } @@ -2178,12 +2191,12 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { strcpybuff(lien,tempo); // patcher en considérant base if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush; } } else { error=1; // erreur - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link %s too long with base href"LF,lien); test_flush; } } @@ -2196,34 +2209,33 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } } - // transformer lien quelconque (http, relatif, etc) en une adresse // et un chemin+fichier (adr,fil) if (!error) { int reponse; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; } if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) { adr[0]='\0'; // erreur if (reponse==-2) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Link %s not caught (unknown protocol)"LF,lien); test_flush; } } else { - if ((opt->debug>1) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush; } } } else { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush; } } } else { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush; } adr[0]='\0'; } @@ -2238,11 +2250,11 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) { //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) { if (fil[strlen(fil)-1]!='/') { // pas répertoire - if (ishtml(fil)==-2) { // pas d'extension + if (ishtml(opt,fil)==-2) { // pas d'extension char BIGSTK loc[HTS_URLMAXSIZE*2]; // éventuelle nouvelle position loc[0]='\0'; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil); test_flush; } @@ -2250,8 +2262,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { switch (http_location(adr,fil,loc).statuscode) { case 200: // ok au final if (strnotempty(loc)) { // a changé d'adresse - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil); test_flush; } @@ -2259,7 +2271,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (ident_url_absolute(loc,adr,fil)==-1) { adr[0]='\0'; // cancel if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil); test_flush; } } @@ -2267,8 +2279,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } break; case -2: case -3: // timeout ou erreur grave - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil); test_flush; } @@ -2285,7 +2297,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (p_nocatch) { forbidden_url=1; // interdire récupération du lien if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil); test_flush; } } @@ -2298,7 +2310,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (!p_nocatch) { if (adr[0]!='\0') { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil); test_flush; } forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, @@ -2307,7 +2319,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { &set_prio_to, &just_test_it); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url); test_flush; } } @@ -2356,7 +2368,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (!p_nocatch) { if (adr[0]!='\0') { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil); test_flush; } forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens, @@ -2365,7 +2377,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { &set_prio_to, &just_test_it); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url); test_flush; } } @@ -2385,23 +2397,23 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { && p_type != 2 && p_type != -2 && forbidden_url == 0 && IS_DELAYED_EXT(save) - ) + ) { // pas d'erreur, on continue - r_sv = hts_wait_delayed(str, adr, fil, save, former_adr, former_fil, &forbidden_url); + r_sv = hts_wait_delayed(str, adr, fil, save, parenturladr, parenturlfil, former_adr, former_fil, &forbidden_url); } // record! if (r_sv!=-1) { // pas d'erreur, on continue /* log */ if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); + HTS_LOG(opt,LOG_DEBUG); if (forbidden_url!=1) { // le lien va être chargé if ((p_type==2) || (p_type==-2)) { // base href ou codebase, pas un lien fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil); } else if ((opt->getmode & 4)==0) { fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); } else { - if (!ishtml(fil)) + if (!ishtml(opt,fil)) fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save); else fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save); @@ -2490,7 +2502,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { int cat_data_len=0; // ajouter lien external - switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil)) ) ) { + switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(opt,fil)) ) ) { case 1: case -2: // html ou répertoire if (opt->getmode & 1) { // sauver html patch_it=1; // redirect @@ -2506,7 +2518,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif")) || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg")) || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) - /*|| (ishtml(fil)!=0)*/ ) { + /*|| (ishtml(opt,fil)!=0)*/ ) { patch_it=1; // redirect add_url=1; // avec link aussi cat_name="external.gif"; @@ -2527,7 +2539,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (patch_it) { char BIGSTK save[HTS_URLMAXSIZE*2]; char BIGSTK tempo[HTS_URLMAXSIZE*2]; - strcpybuff(save,opt->path_html); + strcpybuff(save,StringBuff(opt->path_html)); strcatbuff(save,cat_name); if (lienrelatif(tempo,save, relativesavename)==0) { /* Never escape high-chars (we don't know the encoding!!) */ @@ -2574,18 +2586,18 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } // écrire fichier? - if (verif_external(cat_nb,1)) { - //if (!fexist(fconcat(opt->path_html,cat_name))) { - FILE* fp = filecreate(fconcat(opt->path_html,cat_name)); + if (verif_external(opt,cat_nb,1)) { + //if (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),cat_name))) { + FILE* fp = filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),cat_name)); if (fp) { if (cat_data_len==0) { // texte - verif_backblue(opt,opt->path_html); + verif_backblue(opt,StringBuff(opt->path_html)); fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data); } else { // data fwrite(cat_data,cat_data_len,1,fp); } fclose(fp); - usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"",""); + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_html),cat_name),"",""); } } } else { // écrire normalement le nom de fichier @@ -2622,8 +2634,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } // if add_class_dots_to_patch, this is because there is a problem!! if (add_class_dots_to_patch) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Error: can not rewind java path %s, check html code"LF,tempo); test_flush; } } @@ -2714,7 +2726,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { //} } if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo); test_flush; } @@ -2737,8 +2749,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } // if add_class_dots_to_patch, this is because there is a problem!! if (add_class_dots_to_patch) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Error: can not rewind java path %s, check html code"LF,tempo); test_flush; } } @@ -2786,8 +2798,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } lastsaved=eadr-1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein) } else { - if (opt->errlog) { - fprintf(opt->errlog,"Error building relative link %s and %s"LF,save,relativesavename); + if (opt->log) { + fprintf(opt->log,"Error building relative link %s and %s"LF,save,relativesavename); test_flush; } } @@ -2798,8 +2810,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if (fexist(save)) { // le fichier existe.. adr[0]='\0'; //if ((opt->debug>0) && (opt->log!=NULL)) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link has already been written on disk, cancelled: %s"LF,save); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Link has already been written on disk, cancelled: %s"LF,save); test_flush; } } @@ -2808,8 +2820,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* Security check */ if (strlen(save) >= HTS_URLMAXSIZE) { adr[0]='\0'; - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link is too long: %s"LF,save); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Link is too long: %s"LF,save); test_flush; } } @@ -2818,9 +2830,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // n'y a-t-il pas trop de liens? if (lien_tot+1 >= lien_max-4) { // trop de liens! printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max); - fprintf(opt->errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF); + if (opt->log) { + fprintf(opt->log,LF"Too many URLs, giving up..(>%d)"LF,lien_max); + fprintf(opt->log,"To avoid that: use #L option for more links (example: -#L1000000)"LF); test_flush; } if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } @@ -2834,7 +2846,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { if ((opt->getmode & 4)==0) { // traiter html après pass_fix=0; } else { // vérifier que ce n'est pas un !html - if (!ishtml(fil)) + if (!ishtml(opt,fil)) pass_fix=1; // priorité inférieure (traiter après) else pass_fix=max(0,numero_passe); // priorité normale @@ -2843,7 +2855,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { /* If the file seems to be an html file, get depth-1 */ /* if (strnotempty(save)) { - if (ishtml(save) == 1) { + if (ishtml(opt,save) == 1) { // descore_prio = 2; } else { // descore_prio = 1; @@ -2864,7 +2876,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { strcmp(adr, liens[i]->adr) != 0 || strcmp(fil, liens[i]->fil) != 0 ) { - fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil); test_flush; } } @@ -2893,7 +2905,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { ) { // non file if (opt->robots) { // récupérer robots - if (ishtml(fil)!=0) { // pas la peine pour des fichiers isolés + if (ishtml(opt,fil)!=0) { // pas la peine pour des fichiers isolés if (checkrobots(_ROBOTS,adr,"") != -1) { // robots.txt ? checkrobots_set(_ROBOTS ,adr,""); // ajouter entrée vide if (checkrobots(_ROBOTS,adr,"") == -1) { // robots.txt ? @@ -2901,8 +2913,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { liens_record(adr,"/robots.txt","","",""); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } @@ -2921,12 +2933,12 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { printf("robots.txt: added file robots.txt for %s\n",adr); #endif if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"robots.txt added at %s"LF,adr); test_flush; } } else { - if (opt->errlog) { - fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__); + if (opt->log) { + fprintf(opt->log,"Unexpected robots.txt error at %d"LF,__LINE__); test_flush; } } @@ -2941,8 +2953,8 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { liens_record(adr,fil,save,former_adr,former_fil); if (liens[lien_tot]==NULL) { // erreur, pas de place réservée printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } } @@ -2981,9 +2993,9 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { //strcpybuff(liens[lien_tot]->sav,save); if ((opt->debug>1) && (opt->log!=NULL)) { if (!just_test_it) { - fspc(opt->log,"debug"); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav); } else { - fspc(opt->log,"debug"); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil); } test_flush; } @@ -2991,7 +3003,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { lien_tot++; // UN LIEN DE PLUS } else { // if !dejafait if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save); test_flush; } @@ -3083,11 +3095,10 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { } // pour les stats du shell si parsing trop long -#if HTS_ANALYSTE if (r->size) - _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size); - if (_hts_in_html_poll) { - _hts_in_html_poll=0; + opt->state._hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size); + if (opt->state._hts_in_html_poll) { + opt->state._hts_in_html_poll=0; // temps à attendre, et remplir autant que l'on peut le cache (backing) back_wait(sback,opt,cache,HTS_STAT.stat_timestart); back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); @@ -3097,26 +3108,27 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.stat_errors=fspc(opt, NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt, NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt, NULL,"info"); HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); - if (!hts_htmlcheck_loop(sback->lnk, sback->count, 0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count, 0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Exit requested by shell or user"LF); test_flush; } *stre->exit_xh_=1; // exit requested XH_uninit; return -1; //adr = r->adr + r->size; // exit - } else if (_hts_cancel==1) { + } else if (opt->state._hts_cancel == 1) { // adr = r->adr + r->size; // exit nofollow=1; // moins violent - _hts_cancel=0; + opt->state._hts_cancel = 0; } + } // refresh the backing system each 2 seconds @@ -3124,20 +3136,19 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { back_wait(sback,opt,cache,HTS_STAT.stat_timestart); back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); } -#endif } while(( ((int) (adr - r->adr)) ) < r->size); -#if HTS_ANALYSTE - _hts_in_html_parsing=0; // flag - _hts_cancel=0; // pas de cancel -#endif - if ((opt->getmode & 1) && (ptr>0)) { + + opt->state._hts_in_html_parsing=0; // flag + opt->state._hts_cancel=0; // pas de cancel + + if ((opt->getmode & 1) && (ptr>0)) { { char* cAddr = ht_buff; - int cSize = ht_len; + int cSize = (int) ht_len; if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: postprocess-html: %s%s"LF, urladr, urlfil); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: postprocess-html: %s%s"LF, urladr, urlfil); } - if (hts_htmlcheck_postprocess(&cAddr, &cSize, urladr, urlfil) == 1) { + if (RUN_CALLBACK4(opt, postprocess, &cAddr, &cSize, urladr, urlfil) == 1) { ht_buff = cAddr; ht_len = cSize; } @@ -3157,9 +3168,7 @@ int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) { //structcheck(savename); //filesave(opt,r->adr,r->size,savename); -#if HTS_ANALYSTE } // analyse OK -#endif /* Apply changes */ ENGINE_SAVE_CONTEXT(); @@ -3193,9 +3202,9 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) char *rn=NULL; // char* p; - if ( (opt->debug>0) && (opt->errlog!=NULL) ) { - //if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + //if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"%s for %s%s"LF,r->msg,urladr,urlfil); test_flush; } @@ -3223,8 +3232,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) (void) adr_normalized(urladr, pn_adr); (void) fil_normalized(urlfil, pn_fil); if (strcasecmp(n_adr, pn_adr) == 0 && strcasecmp(n_fil, pn_fil) == 0) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s"LF, urladr, urlfil, mov_adr, mov_fil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s"LF, urladr, urlfil, mov_adr, mov_fil); test_flush; } } @@ -3239,18 +3248,18 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) if (strcmp(mov_fil,urlfil)==0) { error=1; get_it=-1; // ne rien faire - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil); test_flush; } } else { // mauvaise casse, effacer entrée dans la pile et rejouer une fois get_it=1; } } else { // adresse différente - if (ishtml(mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible) + if (ishtml(opt,mov_url)==0) { // pas même adresse MAIS c'est un fichier non html (pas de page moved possible) // -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash) if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil); test_flush; } // accepté? @@ -3261,7 +3270,7 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) NULL) != 1) { /* nouvelle adresse non refusée ? */ get_it=1; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil); test_flush; } } @@ -3272,8 +3281,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) if (get_it==1) { // court-circuiter le reste du traitement // et reculer pour mieux sauter - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil); test_flush; } // canceller lien actuel @@ -3306,8 +3315,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) lien_tot++; } else { // oups erreur, plus de mémoire!! printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } @@ -3315,8 +3324,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) return 0; } } else { - if ( (opt->debug>0) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil); test_flush; } } @@ -3335,8 +3344,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) if (get_it==0) { // adresse vraiment différente et potentiellement en html (pas de possibilité de bouger la page tel quel à cause des <img src..> et cie) rn=(char*) calloct(8192,1); if (rn!=NULL) { - if (opt->errlog) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url); + if (opt->log) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url); test_flush; } if (!opt->mimehtml) { @@ -3389,9 +3398,9 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) #if HDEBUG printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav); #endif - if ( (opt->debug>1) && (opt->errlog!=NULL) ) { - //if (opt->errlog) { - fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil); + if ( (opt->debug>1) && (opt->log!=NULL) ) { + //if (opt->log) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil); test_flush; } // enregistrer le MEME lien (MACRO) @@ -3412,8 +3421,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) // } else { // oups erreur, plus de mémoire!! printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } @@ -3421,18 +3430,18 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) return 0; } } else { - if (opt->errlog!=NULL) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Can not remove old file %s"LF,urlfil); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Can not remove old file %s"LF,urlfil); test_flush; } } } else { - if (opt->errlog!=NULL) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil); + if (opt->log!=NULL) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil); test_flush; } } - } else if (r->statuscode!=200) { + } else if (r->statuscode!=HTTP_OK) { int can_retry=0; // cas où l'on peut reessayer @@ -3442,11 +3451,11 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) if (opt->hostcontrol) { // timeout et retry épuisés if ((opt->hostcontrol & 1) && (liens[ptr]->retry<=0)) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; } host_ban(opt,liens,ptr,lien_tot,sback,jump_identification(urladr)); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; } } else can_retry=1; } else can_retry=1; @@ -3455,11 +3464,11 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) if ((opt->hostcontrol) && (liens[ptr]->retry<=0)) { // too slow if (opt->hostcontrol & 2) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush; } host_ban(opt,liens,ptr,lien_tot,sback,jump_identification(urladr)); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush; } } else can_retry=1; } else can_retry=1; @@ -3480,23 +3489,23 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) { // no primary (internal page 0) if ((liens[ptr]->retry<=0) || (!can_retry) ) { // retry épuisés (ou retry impossible) - if (opt->errlog) { + if (opt->log) { if ((opt->retry>0) && (can_retry)){ - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); } else { if (r->statuscode==STATUSCODE_TEST_OK) { // test OK - if ((opt->debug>0) && (opt->errlog!=NULL)) { - fspc(opt->errlog,"info"); - fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + if ((opt->debug>0) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_INFO); + fprintf(opt->log,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); } } else { if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par défaut - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); } else { if (opt->debug>1) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"No robots.txt rules at %s"LF,urladr); test_flush; } } @@ -3530,8 +3539,8 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) } } else { // retry!! - if (opt->debug>0 && opt->errlog != NULL) { // on fera un alert si le retry échoue - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); + if (opt->debug>0 && opt->log != NULL) { // on fera un alert si le retry échoue + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil); test_flush; } // redemander fichier @@ -3547,9 +3556,9 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) lien_tot++; } else { // oups erreur, plus de mémoire!! printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fspc(opt->errlog,"panic"); - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + HTS_LOG(opt,LOG_PANIC); + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } @@ -3558,10 +3567,10 @@ int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) } } } else { - if (opt->errlog) { + if (opt->log) { if (opt->debug>1) { - fspc(opt->errlog,"info"); - fprintf(opt->errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil); + HTS_LOG(opt,LOG_INFO); + fprintf(opt->log,"Info: no robots.txt at %s%s"LF,urladr,urlfil); } } } @@ -3609,10 +3618,10 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* int do_pause=0; // user pause lockfile : create hts-paused.lock --> HTTrack will be paused - if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) { + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-stop.lock"))) { // remove lockfile - remove(fconcat(opt->path_log,"hts-stop.lock")); - if (!fexist(fconcat(opt->path_log,"hts-stop.lock"))) { + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-stop.lock")); + if (!fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-stop.lock"))) { do_pause=1; } } @@ -3627,12 +3636,11 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* // pause? if (do_pause) { if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: pause requested.."LF); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: pause requested.."LF); } while (back_nsoc(sback)>0) { // attendre fin des transferts back_wait(sback,opt,cache,HTS_STAT.stat_timestart); Sleep(200); -#if HTS_ANALYSTE { back_wait(sback,opt,cache,HTS_STAT.stat_timestart); @@ -3641,17 +3649,17 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); b=0; - if (!hts_htmlcheck_loop(sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT) + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT) || !back_checkmirror(opt)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Exit requested by shell or user"LF); test_flush; } *stre->exit_xh_=1; // exit requested @@ -3659,15 +3667,14 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* return 0; } } -#endif } // On désalloue le buffer d'enregistrement des chemins créée, au cas où pendant la pause // l'utilisateur ferait un rm -r après avoir effectué un tar // structcheck_init(1); { - FILE* fp = fopen(fconcat(opt->path_log,"hts-paused.lock"),"wb"); + FILE* fp = fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-paused.lock"),"wb"); if (fp) { - fspc(fp,"info"); // dater + fspc(NULL,fp,"info"); // dater fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes); fclose(fp); } @@ -3675,38 +3682,24 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stat_fragment=HTS_STAT.stat_bytes; /* Info for wrappers */ if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: pause: %s"LF,fconcat(opt->path_log,"hts-paused.lock")); - } -#if HTS_ANALYSTE - hts_htmlcheck_pause(fconcat(opt->path_log,"hts-paused.lock")); -#else - while (fexist(fconcat(opt->path_log,"hts-paused.lock"))) { - //back_wait(sback,opt,cache,HTS_STAT.stat_timestart); inutile!! (plus de sockets actives) - Sleep(1000); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: pause: %s"LF,fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-paused.lock")); } -#endif + RUN_CALLBACK1(opt, pause, fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-paused.lock")); } // } // end of pause/lock files -#if HTS_ANALYSTE // changement dans les préférences - /* - if (_hts_setopt) { - copy_htsopt(_hts_setopt,opt); // copier au besoin - _hts_setopt=NULL; // effacer callback - } - */ - if (_hts_addurl) { + if (opt->state._hts_addurl) { char BIGSTK add_adr[HTS_URLMAXSIZE*2]; char BIGSTK add_fil[HTS_URLMAXSIZE*2]; - while(*_hts_addurl) { + while(*opt->state._hts_addurl) { char BIGSTK add_url[HTS_URLMAXSIZE*2]; add_adr[0]=add_fil[0]=add_url[0]='\0'; - if (!link_has_authority(*_hts_addurl)) + if (!link_has_authority(*opt->state._hts_addurl)) strcpybuff(add_url,"http://"); // ajouter http:// - strcatbuff(add_url,*_hts_addurl); + strcatbuff(add_url,*opt->state._hts_addurl); if (ident_url_absolute(add_url,add_adr,add_fil)>=0) { // ----Ajout---- // noter NOUVEAU lien @@ -3727,13 +3720,13 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* lien_tot++; // if ((opt->debug>0) && (opt->log!=NULL)) { - fspc(opt->log,"info"); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush; + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush; } // } else { // oups erreur, plus de mémoire!! printf("PANIC! : Not enough memory [%d]\n",__LINE__); - if (opt->errlog) { - fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); + if (opt->log) { + fprintf(opt->log,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url))); test_flush; } //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } } @@ -3741,33 +3734,33 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* return 0; } } else { - if ( (opt->debug>0) && (opt->errlog!=NULL) ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil); + if ( (opt->debug>0) && (opt->log!=NULL) ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Existing link %s%s not added after user request"LF,add_adr,add_fil); test_flush; } } } } else { - if (opt->errlog) { - fspc(opt->errlog,"error"); - fprintf(opt->errlog,"Error during URL decoding for %s"LF,add_url); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); + fprintf(opt->log,"Error during URL decoding for %s"LF,add_url); test_flush; } } // ----Fin Ajout---- - _hts_addurl++; // suivante + opt->state._hts_addurl++; // suivante } - _hts_addurl=NULL; // libérer _hts_addurl + opt->state._hts_addurl=NULL; // libérer _hts_addurl } // si une pause a été demandée - if (_hts_setpause || back_pluggable_sockets_strict(sback, opt) <= 0) { + if (opt->state._hts_setpause || back_pluggable_sockets_strict(sback, opt) <= 0) { // index du lien actuel - int b=back_index(sback,urladr,urlfil,savename); - int prev = _hts_in_html_parsing; + int b=back_index(opt,sback,urladr,urlfil,savename); + int prev = opt->state._hts_in_html_parsing; if (b<0) b=0; // forcer pour les stats - while(_hts_setpause || back_pluggable_sockets_strict(sback, opt) <= 0) { // on fait la pause.. - _hts_in_html_parsing = 6; + while(opt->state._hts_setpause || back_pluggable_sockets_strict(sback, opt) <= 0) { // on fait la pause.. + opt->state._hts_in_html_parsing = 6; back_wait(sback,opt,cache,HTS_STAT.stat_timestart); // Transfer rate @@ -3775,15 +3768,15 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); - if (!hts_htmlcheck_loop(sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Exit requested by shell or user"LF); test_flush; } *stre->exit_xh_=1; // exit requested @@ -3792,22 +3785,21 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* } Sleep(100); // pause } - _hts_in_html_parsing = prev; + opt->state._hts_in_html_parsing = prev; } -#endif // si le fichier n'est pas en backing, le mettre.. - if (!back_exist(sback,urladr,urlfil,savename)) { + if (!back_exist(sback,opt,urladr,urlfil,savename)) { #if BDEBUG==1 printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil); #endif - if (back_add(sback,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) { + if (back_add(sback,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode)==-1) { printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__); #if BDEBUG==1 printf("error while crash adding\n"); #endif - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unexpected backing error for %s%s"LF,urladr,urlfil); test_flush; } @@ -3824,28 +3816,19 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* printf("%d sockets available for backing\n",n); #endif -#if HTS_ANALYSTE - if ((n>0) && (!_hts_setpause)) { // si sockets libre et pas en pause, ajouter -#else - if (n>0) { // si sockets libre -#endif + if ((n>0) && (!opt->state._hts_setpause)) { // si sockets libre et pas en pause, ajouter // remplir autant que l'on peut le cache (backing) back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); } // index du lien actuel - /* - b=back_index(sback,urladr,urlfil,savename); - - if (b>=0) - */ { // ------------------------------------------------------------ // attendre que le fichier actuel soit prêt - BOUCLE D'ATTENTE do { // index du lien actuel - b=back_index(sback,urladr,urlfil,savename); + b=back_index(opt,sback,urladr,urlfil,savename); #if BDEBUG==1 printf("back index %d, waiting\n",b); #endif @@ -3858,7 +3841,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* back_wait(sback,opt,cache,HTS_STAT.stat_timestart); // Continue to the loop if link still present - b=back_index(sback,urladr,urlfil,savename); + b=back_index(opt,sback,urladr,urlfil,savename); if (b<0) break; @@ -3874,7 +3857,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* back_fillmax(sback,opt,cache,liens,ptr,numero_passe,lien_tot); // Continue to the loop if link still present - b=back_index(sback,urladr,urlfil,savename); + b=back_index(opt,sback,urladr,urlfil,savename); if (b<0) break; @@ -3883,7 +3866,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* TStamp l=time_local(); if ((int) (l-makestat_time) >= 60) { if (makestat_fp != NULL) { - fspc(makestat_fp,"info"); + fspc(NULL,makestat_fp,"info"); fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot); fflush(makestat_fp); *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV; @@ -3891,7 +3874,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* } if (stre->maketrack_fp != NULL) { int i; - fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF); + fspc(NULL,stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF); for(i=0;i<back_max;i++) { back_info(sback,i,3,stre->maketrack_fp); } @@ -3902,20 +3885,22 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* makestat_time=l; } } -#if HTS_ANALYSTE - { + + /* cancel links */ + { int i; - { - char* s=hts_cancel_file(""); + char* s; + while(( s = hts_cancel_file_pop(opt) ) != NULL) { if (strnotempty(s)) { // fichier à canceller - for(i=0;i<back_max;i++) { - if ((back[i].status>0)) { - if (strcmp(back[i].url_sav,s)==0) { // ok trouvé + for(i = 0 ; i < back_max ; i++) { + if ((back[i].status > 0)) { + if (strcmp(back[i].url_sav,s) == 0) { // ok trouvé if (back[i].status != 1000) { #if HTS_DEBUG_CLOSESOCK DEBUG_W("user cancel: deletehttp\n"); #endif - if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r); + if (back[i].r.soc!=INVALID_SOCKET) + deletehttp(&back[i].r); back[i].r.soc=INVALID_SOCKET; back[i].r.statuscode=STATUSCODE_INVALID; strcpybuff(back[i].r.msg,"Cancelled by User"); @@ -3928,6 +3913,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* } s[0]='\0'; } + freet(s); } // Transfer rate @@ -3935,24 +3921,24 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); - if (!hts_htmlcheck_loop(sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { - if (opt->errlog) { - fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF); + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (opt->log) { + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"Exit requested by shell or user"LF); test_flush; } *stre->exit_xh_=1; // exit requested XH_uninit; return 0; } + } -#endif #if HTS_POLL if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) { TStamp tl; @@ -3969,11 +3955,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* opt->verbosedisplay=2; /* Info for wrappers */ if ( (opt->debug>0) && (opt->log!=NULL) ) { - fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF); + HTS_LOG(opt,LOG_INFO); fprintf(opt->log,"engine: change-options"LF); } -#if HTS_ANALYSTE - hts_htmlcheck_chopt(opt); -#endif + RUN_CALLBACK0(opt, chopt); } } @@ -3985,11 +3969,11 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* FILE* fp=stdout; int a=0; *stre->last_info_shell_=tl; - if (fexist(fconcat(opt->path_log,"hts-autopsy"))) { // débuggage: teste si le robot est vivant + if (fexist(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-autopsy"))) { // débuggage: teste si le robot est vivant // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi) // (libérons les robots esclaves de l'internet!) - remove(fconcat(opt->path_log,"hts-autopsy")); - fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb"); + remove(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-autopsy")); + fp=fopen(fconcat(OPT_GET_BUFF(opt), StringBuff(opt->path_log),"hts-isalive"),"wb"); a=1; } if ((*stre->info_shell_) || a) { @@ -4031,7 +4015,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* // Then, skip it and go to the next one if (b<0) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil); test_flush; } @@ -4051,21 +4035,18 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* /* ensure correct location buffer set */ back[b].r.location=back[b].location_buffer; if (back[b].r.statuscode == STATUSCODE_INVALID) { - if (opt->errlog) { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil); + if (opt->log) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil); test_flush; } } else { if ( (opt->debug>1) && (opt->log!=NULL) ) { - fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush; + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush; } } } #endif -#if HTS_ANALYSTE==2 -#else - //if (!opt->quiet) { // petite animation if (!opt->verbosedisplay) { if (!opt->quiet) { static int roll=0; /* static: ok */ @@ -4075,18 +4056,18 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* } } else if (opt->verbosedisplay==1) { if (b >= 0) { - if (back[b].r.statuscode==200) + if (back[b].r.statuscode==HTTP_OK) printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size); else printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode); } else { - fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link disappeared"); + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"Link disappeared"); } fflush(stdout); } //} -#endif - // ------------------------------------------------------------ + + // ------------------------------------------------------------ // Vérificateur d'intégrité #if DEBUG_CHECKINT _CHECKINT(&back[b],"Retour de back_wait, après le while") @@ -4144,7 +4125,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* } printf("]"); - //} else if (back[i].status==0) { + //} else if (back[i].status==STATUS_READY) { // strcpybuff(s,"ENDED"); } printf("\n"); @@ -4165,7 +4146,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* while(i<minimum(back_max,160)) { if (back[i].status>0) { sprintf(s,"%d",back[i].r.size); - } else if (back[i].status==0) { + } else if (back[i].status==STATUS_READY) { strcpybuff(s,"ENDED"); } else strcpybuff(s," - "); @@ -4196,10 +4177,11 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* /* Wait for delayed types */ int hts_wait_delayed(htsmoduleStruct* str, char* adr, char* fil, char* save, + char* parent_adr, char* parent_fil, char* former_adr, char* former_fil, int* forbidden_url) { ENGINE_LOAD_CONTEXT_BASE(); - hash_struct* hash = hashptr; + hash_struct* const hash = hashptr; int r_sv=0; @@ -4210,15 +4192,15 @@ int hts_wait_delayed(htsmoduleStruct* str, && !opt->state.stop ) { - int loops=0; - int continue_loop = 1; + int loops; + int continue_loop; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Waiting for type to be known: %s%s"LF, adr, fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Waiting for type to be known: %s%s"LF, adr, fil); test_flush; } /* Follow while type is unknown and redirects occurs */ - while(IS_DELAYED_EXT(save) && continue_loop && loops++ < 7) { + for( loops = 0, continue_loop = 1 ; IS_DELAYED_EXT(save) && continue_loop && loops < 7 ; loops++ ) { continue_loop = 0; /* @@ -4231,9 +4213,9 @@ int hts_wait_delayed(htsmoduleStruct* str, lien_back back; memset(&back, 0, sizeof(back)); back.r = cache_read(opt, cache, adr, fil, NULL, NULL); // test uniquement - if (back.r.statuscode == 200 && strnotempty(back.r.contenttype)) { // cache found, and aswer is 'OK' + if (back.r.statuscode == HTTP_OK && strnotempty(back.r.contenttype)) { // cache found, and aswer is 'OK' if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Direct type lookup in cache (-%%D1): %s"LF, back.r.contenttype); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Direct type lookup in cache (-%%D1): %s"LF, back.r.contenttype); test_flush; } @@ -4246,13 +4228,13 @@ int hts_wait_delayed(htsmoduleStruct* str, int new_forbidden_url = hts_acceptmime(opt, ptr, lien_tot, liens, adr,fil, back.r.contenttype); if (new_forbidden_url != -1) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard mime test: %s"LF,new_forbidden_url); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard mime test: %d"LF,new_forbidden_url); test_flush; } if (new_forbidden_url == 1) { *forbidden_url = new_forbidden_url; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link forbidden because of MIME types restrictions: %s%s"LF, adr, fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link forbidden because of MIME types restrictions: %s%s"LF, adr, fil); test_flush; } break; // exit loop @@ -4265,10 +4247,23 @@ int hts_wait_delayed(htsmoduleStruct* str, } } + /* Check if the file was recorded already (necessary for redirects) */ + if (hash_read(hash,save,"",0,opt->urlhack) >= 0) { + if (loops == 0) { /* Should not happend */ + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log, "Duplicate entry in hts_wait_delayed() cancelled: %s%s -> %s"LF,adr,fil,save); + test_flush; + } + } + /* Exit loop (we're done) */ + continue_loop = 0; + break ; + } + /* Add in backing (back_index() will respond correctly) */ - if (back_add_if_not_exists(sback,opt,cache,adr,fil,save,NULL,NULL,0,NULL) != -1) { + if (back_add_if_not_exists(sback,opt,cache,adr,fil,save,parent_adr,parent_fil,0) != -1) { int b; - b=back_index(sback,adr,fil,save); + b=back_index(opt,sback,adr,fil,save); if (b<0) { printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__); XH_uninit; // désallocation mémoire & buffers @@ -4298,13 +4293,13 @@ int hts_wait_delayed(htsmoduleStruct* str, int new_forbidden_url = hts_acceptmime(opt, ptr, lien_tot, liens, adr,fil, delayed_back.r.contenttype); if (new_forbidden_url != -1) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard mime test: %d"LF,*forbidden_url); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard mime test: %d"LF,*forbidden_url); test_flush; } if (new_forbidden_url == 1) { *forbidden_url = new_forbidden_url; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link forbidden because of MIME types restrictions: %s%s"LF, adr, fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link forbidden because of MIME types restrictions: %s%s"LF, adr, fil); test_flush; } break; // exit loop @@ -4313,8 +4308,8 @@ int hts_wait_delayed(htsmoduleStruct* str, } /* Re-Add wiht correct type */ - if (back_add_if_not_exists(sback,opt,cache,adr,fil,save,NULL,NULL,0,NULL) != -1) { - b=back_index(sback,adr,fil,save); + if (back_add_if_not_exists(sback,opt,cache,adr,fil,save,parent_adr,parent_fil,0) != -1) { + b=back_index(opt,sback,adr,fil,save); } if (b<0) { printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__); @@ -4322,12 +4317,15 @@ int hts_wait_delayed(htsmoduleStruct* str, return -1; } if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Type immediately loaded from cache: %s"LF, delayed_back.r.contenttype); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Type immediately loaded from cache: %s"LF, delayed_back.r.contenttype); test_flush; } } /* Wait for headers to be received */ + if (b >= 0) { + back_set_locked(sback, b); // Locked entry + } do { if (b < 0) break; @@ -4341,7 +4339,6 @@ int hts_wait_delayed(htsmoduleStruct* str, } // on est obligé d'appeler le shell pour le refresh.. -#if HTS_ANALYSTE { // Transfer rate @@ -4349,20 +4346,19 @@ int hts_wait_delayed(htsmoduleStruct* str, // Refresh various stats HTS_STAT.stat_nsocket=back_nsoc(sback); - HTS_STAT.stat_errors=fspc(NULL,"error"); - HTS_STAT.stat_warnings=fspc(NULL,"warning"); - HTS_STAT.stat_infos=fspc(NULL,"info"); + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); - if (!hts_htmlcheck_loop(sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count, b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { return -1; - } else if (_hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) + } else if (opt->state._hts_cancel || !back_checkmirror(opt)) { // cancel 2 ou 1 (cancel parsing) back_delete(opt,cache,sback,b); // cancel test break; } } -#endif } while( /* dns/connect/request */ ( back[b].status >= 99 && back[b].status <= 101 ) @@ -4373,6 +4369,9 @@ int hts_wait_delayed(htsmoduleStruct* str, /* Same for errors */ ( HTTP_IS_ERROR(back[b].r.statuscode) && back[b].status > 0 ) ); + if (b >= 0) { + back_set_unlocked(sback, b); // Unlocked entry + } /* ready (chunked) or ready (regular download) or ready (completed) */ // Note: filename NOT in hashtable yet - liens_record will do it, with the correct ext! @@ -4392,9 +4391,9 @@ int hts_wait_delayed(htsmoduleStruct* str, *forbidden_url = 1; /* Forbidden! */ if (opt->log != NULL) { if (back[b].r.statuscode == STATUSCODE_TOO_BIG) { - fspc(opt->log,"error"); fprintf(opt->log,"link not taken because of its size (%d bytes) at %s%s"LF,(int)back[b].r.totalsize,adr,fil); + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"link not taken because of its size (%d bytes) at %s%s"LF,(int)back[b].r.totalsize,adr,fil); } else { - fspc(opt->log,"error"); fprintf(opt->log,"link not taken because of error (%d '%s') at %s%s"LF,back[b].r.statuscode,back[b].r.msg,adr,fil); + HTS_LOG(opt,LOG_ERROR); fprintf(opt->log,"link not taken because of error (%d '%s') at %s%s"LF,back[b].r.statuscode,back[b].r.msg,adr,fil); } test_flush; } @@ -4409,10 +4408,10 @@ int hts_wait_delayed(htsmoduleStruct* str, strcpybuff(mov_url, back[b].r.location); // copier URL /* Remove (temporarily created) file if it was created */ - unlink(fconv(back[b].url_sav)); + unlink(fconv(OPT_GET_BUFF(opt),back[b].url_sav)); /* Remove slot! */ - if (back[b].status == 0) { + if (back[b].status == STATUS_READY) { back_maydelete(opt, cache, sback, b); } else { /* should not happend */ back_delete(opt, cache, sback, b); @@ -4426,7 +4425,7 @@ int hts_wait_delayed(htsmoduleStruct* str, // if (ident_url_relatif(mov_url,adr,fil,mov_adr,mov_fil)>=0) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Redirect while resolving type: %s%s -> %s%s"LF, adr, fil, mov_adr, mov_fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Redirect while resolving type: %s%s -> %s%s"LF, adr, fil, mov_adr, mov_fil); test_flush; } // si non bouclage sur soi même, ou si test avec GET non testé @@ -4443,7 +4442,6 @@ int hts_wait_delayed(htsmoduleStruct* str, // check explicit forbidden - don't follow 3xx in this case { int set_prio_to=0; - robots_wizard* robots = (robots_wizard*) opt->robotsptr; if (hts_acceptlink(opt,ptr,lien_tot,liens, mov_adr,mov_fil, NULL, NULL, @@ -4453,7 +4451,7 @@ int hts_wait_delayed(htsmoduleStruct* str, /* Note: the cache 'cached_tests' system will remember this error, and we'll only issue ONE request */ *forbidden_url = 1; /* Forbidden! */ if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link forbidden because of redirect beyond the mirror scope at %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link forbidden because of redirect beyond the mirror scope at %s%s -> %s%s"LF,adr,fil,mov_adr,mov_fil); test_flush; } strcpybuff(adr,mov_adr); @@ -4478,9 +4476,13 @@ int hts_wait_delayed(htsmoduleStruct* str, strcpybuff(adr,mov_adr); strcpybuff(fil,mov_fil); continue_loop = 1; + + /* Recompute filename for hash lookup */ + save[0] = '\0'; + r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back); } else { - if ( opt->errlog!=NULL ) { - fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unable to test %s%s (loop to same filename)"LF,adr,fil); + if ( opt->log!=NULL ) { + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Unable to test %s%s (loop to same filename)"LF,adr,fil); test_flush; } } // loop to same location @@ -4488,37 +4490,40 @@ int hts_wait_delayed(htsmoduleStruct* str, } // location } // redirect if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Final type for %s%s: '%s'"LF, adr, fil, delayed_back.r.contenttype); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Final type for %s%s: '%s'"LF, adr, fil, delayed_back.r.contenttype); test_flush; } - /* Recompute filename with MIME type */ - save[0] = '\0'; - r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back); - - /* Recompute authorization with MIME type */ - { - int new_forbidden_url = hts_acceptmime(opt, ptr, lien_tot, liens, adr,fil, delayed_back.r.contenttype); - if (new_forbidden_url != -1) { - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard mime test: %d"LF,forbidden_url); - test_flush; - } - if (new_forbidden_url == 1) { - *forbidden_url = new_forbidden_url; - if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"link forbidden because of MIME types restrictions: %s%s"LF, adr, fil); - test_flush; - } - break; // exit loop - } - } - } + /* If we are done, do additional checks with final type and authorizations */ + if (!continue_loop) { + /* Recompute filename with MIME type */ + save[0] = '\0'; + r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,&delayed_back); + + /* Recompute authorization with MIME type */ + { + int new_forbidden_url = hts_acceptmime(opt, ptr, lien_tot, liens, adr,fil, delayed_back.r.contenttype); + if (new_forbidden_url != -1) { + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"result for wizard mime test: %d"LF,*forbidden_url); + test_flush; + } + if (new_forbidden_url == 1) { + *forbidden_url = new_forbidden_url; + if ((opt->debug>1) && (opt->log!=NULL)) { + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"link forbidden because of MIME types restrictions: %s%s"LF, adr, fil); + test_flush; + } + break; // exit loop + } + } + } + } /* Still have a back reference */ if (b >= 0) { /* Finalize now as we have the type */ - if (back[b].status == 0) { + if (back[b].status == STATUS_READY) { if (!back[b].finalized) { back_finalize(opt,cache,sback,b); } @@ -4541,7 +4546,7 @@ int hts_wait_delayed(htsmoduleStruct* str, && IS_DELAYED_EXT(save)) { *forbidden_url = 1; if (opt->log!=NULL) { - fspc(opt->log,"warning"); fprintf(opt->log,"link is probably looping, type unknown, aborting: %s%s"LF, adr, fil); + HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"link is probably looping, type unknown, aborting: %s%s"LF, adr, fil); test_flush; } } diff --git a/src/htsparse.h b/src/htsparse.h index 561ae68..afd9e88 100644 --- a/src/htsparse.h +++ b/src/htsparse.h @@ -36,8 +36,27 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +#include "htsglobal.h" -typedef struct htsmoduleStructExtended { +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_htsblk +#define HTS_DEF_FWSTRUCT_htsblk +typedef struct htsblk htsblk; +#endif +#ifndef HTS_DEF_FWSTRUCT_robots_wizard +#define HTS_DEF_FWSTRUCT_robots_wizard +typedef struct robots_wizard robots_wizard; +#endif +#ifndef HTS_DEF_FWSTRUCT_hash_struct +#define HTS_DEF_FWSTRUCT_hash_struct +typedef struct hash_struct hash_struct; +#endif + +#ifndef HTS_DEF_FWSTRUCT_htsmoduleStructExtended +#define HTS_DEF_FWSTRUCT_htsmoduleStructExtended +typedef struct htsmoduleStructExtended htsmoduleStructExtended; +#endif +struct htsmoduleStructExtended { /* Main object */ htsblk* r_; @@ -81,7 +100,7 @@ typedef struct htsmoduleStructExtended { TStamp* last_info_shell_; int* info_shell_; -} htsmoduleStructExtended; +}; /* Library internal definictions */ @@ -115,6 +134,7 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* */ int hts_wait_delayed(htsmoduleStruct* str, char* adr, char* fil, char* save, + char* parent_adr, char* parent_fil, char* former_adr, char* former_fil, int* forbidden_url); @@ -122,49 +142,49 @@ int hts_wait_delayed(htsmoduleStruct* str, /* Context state */ #define ENGINE_LOAD_CONTEXT_BASE() \ - lien_url** liens = (lien_url**) str->liens; \ - httrackp* opt = (httrackp*) str->opt; \ - struct_back* sback = (struct_back*) str->sback; \ - lien_back* const back = sback->lnk; \ - const int back_max = sback->count; \ - cache_back* cache = (cache_back*) str->cache; \ - hash_struct* hashptr = (hash_struct*) str->hashptr; \ - int numero_passe = str->numero_passe; \ - int add_tab_alloc = str->add_tab_alloc; \ + lien_url** const liens HTS_UNUSED = (lien_url**) str->liens; \ + httrackp* const opt HTS_UNUSED = (httrackp*) str->opt; \ + struct_back* const sback HTS_UNUSED = (struct_back*) str->sback; \ + lien_back* const back HTS_UNUSED = sback->lnk; \ + const int back_max HTS_UNUSED = sback->count; \ + cache_back* const cache HTS_UNUSED = (cache_back*) str->cache; \ + hash_struct* const hashptr HTS_UNUSED = (hash_struct*) str->hashptr; \ + const int numero_passe HTS_UNUSED = str->numero_passe; \ + const int add_tab_alloc HTS_UNUSED = str->add_tab_alloc; \ /* */ \ - int lien_tot = * ( (int*) (str->lien_tot_) ); \ - int ptr = * ( (int*) (str->ptr_) ); \ - int lien_size = * ( (int*) (str->lien_size_) ); \ - char* lien_buffer = * ( (char**) (str->lien_buffer_) ) + int lien_tot HTS_UNUSED = *str->lien_tot_; \ + int ptr HTS_UNUSED = *str->ptr_; \ + size_t lien_size HTS_UNUSED = *str->lien_size_; \ + char* lien_buffer HTS_UNUSED = *str->lien_buffer_ #define ENGINE_SAVE_CONTEXT_BASE() \ /* Apply changes */ \ - * ( (int*) (str->lien_tot_) ) = lien_tot; \ - * ( (int*) (str->ptr_) ) = ptr; \ - * ( (int*) (str->lien_size_) ) = lien_size; \ - * ( (char**) (str->lien_buffer_) ) = lien_buffer + * str->lien_tot_ = lien_tot; \ + * str->ptr_ = ptr; \ + * str->lien_size_ = lien_size; \ + * str->lien_buffer_ = lien_buffer #define WAIT_FOR_AVAILABLE_SOCKET() do { \ - int prev = _hts_in_html_parsing; \ + int prev = opt->state._hts_in_html_parsing; \ while(back_pluggable_sockets_strict(sback, opt) <= 0) { \ - _hts_in_html_parsing = 6; \ + opt->state._hts_in_html_parsing = 6; \ /* Wait .. */ \ back_wait(sback,opt,cache,0); \ /* Transfer rate */ \ engine_stats(); \ /* Refresh various stats */ \ HTS_STAT.stat_nsocket=back_nsoc(sback); \ - HTS_STAT.stat_errors=fspc(NULL,"error"); \ - HTS_STAT.stat_warnings=fspc(NULL,"warning"); \ - HTS_STAT.stat_infos=fspc(NULL,"info"); \ + HTS_STAT.stat_errors=fspc(opt,NULL,"error"); \ + HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); \ + HTS_STAT.stat_infos=fspc(opt,NULL,"info"); \ HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); \ HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,sback); \ /* Check */ \ - if (!hts_htmlcheck_loop(sback->lnk, sback->count, -1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ + if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count, -1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \ return -1; \ } \ } \ - _hts_in_html_parsing = prev; \ + opt->state._hts_in_html_parsing = prev; \ } while(0) #endif diff --git a/src/htsrobots.c b/src/htsrobots.c index 5ca7640..47982cd 100644 --- a/src/htsrobots.c +++ b/src/htsrobots.c @@ -38,13 +38,13 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE -#include "htsrobots.h" - /* specific definitions */ +#include "htscore.h" #include "htsbase.h" #include "htslib.h" /* END specific definitions */ +#include "htsrobots.h" // -- robots -- diff --git a/src/htsrobots.h b/src/htsrobots.h index 195bbde..b60334d 100644 --- a/src/htsrobots.h +++ b/src/htsrobots.h @@ -40,11 +40,15 @@ Please visit our Website: http://www.httrack.com #define HTSROBOTS_DEFH // robots wizard -typedef struct robots_wizard { +#ifndef HTS_DEF_FWSTRUCT_robots_wizard +#define HTS_DEF_FWSTRUCT_robots_wizard +typedef struct robots_wizard robots_wizard; +#endif +struct robots_wizard { char adr[128]; char token[4096]; struct robots_wizard* next; -} robots_wizard; +}; /* Library internal definictions */ diff --git a/src/htsserver.c b/src/htsserver.c index bb30640..abcd1be 100644 --- a/src/htsserver.c +++ b/src/htsserver.c @@ -50,7 +50,7 @@ Please visit our Website: http://www.httrack.com #include <string.h> #include <time.h> #include <fcntl.h> -#if HTS_WIN +#ifdef _WIN32 #else #include <arpa/inet.h> #endif @@ -94,9 +94,12 @@ char* commandReturnMsg = NULL; char* commandReturnCmdl = NULL; int commandReturnSet = 0; +httrackp *global_opt = NULL; + /* Extern */ extern void webhttrack_main(char* cmd); -extern void webhttrack_lock(int lock); +extern void webhttrack_lock(void); +extern void webhttrack_release(void); static int is_image(char* file) { return ( (strstr(file, ".gif") != NULL) ); @@ -183,7 +186,7 @@ T_SOC smallserver_init(int* port,char* adr) { // SOCaddr_copyaddr(server, server_size, hp_loc->h_addr_list[0], hp_loc->h_length); SOCaddr_initany(server, server_size); - if ( (soc=socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0)) != INVALID_SOCKET) { SOCaddr_initport(server, *port); if ( bind(soc,(struct sockaddr*) &server, server_size) == 0 ) { /*int len; @@ -250,7 +253,7 @@ typedef struct { } initStrElt; #define SET_ERROR(err) do { \ - inthash_write(NewLangList, "error", (unsigned long int)strdup(err)); \ + inthash_write(NewLangList, "error", (intptr_t)strdup(err)); \ error_redirect = "/server/error.html"; \ } while(0) @@ -265,6 +268,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { String tmpbuff = STRING_EMPTY; String tmpbuff2 = STRING_EMPTY; String fspath = STRING_EMPTY; + char catbuff[CATBUFF_SIZE]; /* Load strings */ htslang_init(); @@ -306,21 +310,21 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { for(i = 0 ; initInt[i].name ; i++) { char tmp[32]; sprintf(tmp, "%d", initInt[i].value); - inthash_write(NewLangList, initInt[i].name, (unsigned long int)strdup(tmp)); + inthash_write(NewLangList, initInt[i].name, (intptr_t)strdup(tmp)); } for(i = 0 ; initOn[i] ; i++) { - inthash_write(NewLangList, initOn[i], (unsigned long int)strdup("1")); /* "on" */ + inthash_write(NewLangList, initOn[i], (intptr_t)strdup("1")); /* "on" */ } for(i = 0 ; initStr[i].name ; i++) { - inthash_write(NewLangList, initStr[i].name, (unsigned long int)strdup(initStr[i].value)); + inthash_write(NewLangList, initStr[i].name, (intptr_t)strdup(initStr[i].value)); } strcpybuff(pth, gethomedir()); strcatbuff(pth, "/websites"); - inthash_write(NewLangList, "path", (unsigned long int)strdup(pth)); + inthash_write(NewLangList, "path", (intptr_t)strdup(pth)); } /* Lock */ - webhttrack_lock(1); + webhttrack_lock(); // connexion (accept) while(!willexit && buffer != NULL && soc != INVALID_SOCKET) { @@ -340,15 +344,15 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { StringClear(tmpbuff); StringClear(tmpbuff2); StringClear(fspath); - StringStrcat(headers, ""); - StringStrcat(output, ""); - StringStrcat(tmpbuff, ""); - StringStrcat(tmpbuff2, ""); - StringStrcat(fspath, ""); + StringCat(headers, ""); + StringCat(output, ""); + StringCat(tmpbuff, ""); + StringCat(tmpbuff2, ""); + StringCat(fspath, ""); memset(&dummyaddr, 0, sizeof(dummyaddr)); /* UnLock */ - webhttrack_lock(0); + webhttrack_release(); /* sigpipe */ #ifndef _WIN32 @@ -356,10 +360,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { #endif /* Accept */ - while ( (soc_c = accept(soc, &dummyaddr, &dummylen)) == INVALID_SOCKET); + while ( (soc_c = (T_SOC) accept(soc, &dummyaddr, &dummylen)) == INVALID_SOCKET); /* Lock */ - webhttrack_lock(1); + webhttrack_lock(); if(linputsoc_t(soc_c, line1, sizeof(line1) - 2, timeout) > 0) { int meth = 0; @@ -411,21 +415,21 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { if (commandReturn) { char tmp[32]; sprintf(tmp, "%d", commandReturn); - inthash_write(NewLangList, "commandReturn", (unsigned long int)strdup(tmp)); - inthash_write(NewLangList, "commandReturnMsg", (unsigned long int)commandReturnMsg); - inthash_write(NewLangList, "commandReturnCmdl", (unsigned long int)commandReturnCmdl); + inthash_write(NewLangList, "commandReturn", (intptr_t)strdup(tmp)); + inthash_write(NewLangList, "commandReturnMsg", (intptr_t)commandReturnMsg); + inthash_write(NewLangList, "commandReturnCmdl", (intptr_t)commandReturnCmdl); } else { - inthash_write(NewLangList, "commandReturn", (unsigned long int)NULL); - inthash_write(NewLangList, "commandReturnMsg", (unsigned long int)NULL); - inthash_write(NewLangList, "commandReturnCmdl", (unsigned long int)NULL); + inthash_write(NewLangList, "commandReturn", (intptr_t)NULL); + inthash_write(NewLangList, "commandReturnMsg", (intptr_t)NULL); + inthash_write(NewLangList, "commandReturnCmdl", (intptr_t)NULL); } } /* SID check */ { - unsigned long int adr = 0; - if (inthash_readptr(NewLangList, "_sid", (long int *)&adr)) { - if (inthash_write(NewLangList, "sid", (unsigned long int)strdup((char*)adr))) { + intptr_t adr = 0; + if (inthash_readptr(NewLangList, "_sid", &adr)) { + if (inthash_write(NewLangList, "sid", (intptr_t)strdup((char*)adr))) { } } } @@ -443,9 +447,9 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { ua = e + 1; if (strfield2(ua, "on")) /* hack : "on" == 1 */ ua = "1"; - len = strlen(ua); + len = (int) strlen(ua); unescapehttp(ua, &sua); - inthash_write(NewLangList, s, (unsigned long int)StringAcquire(&sua)); + inthash_write(NewLangList, s, (intptr_t)StringAcquire(&sua)); s = f + 1; } } @@ -453,10 +457,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { /* Error check */ { - unsigned long int adr = 0; - unsigned long int adr2 = 0; - if (inthash_readptr(NewLangList, "sid", (long int *)&adr)) { - if (inthash_readptr(NewLangList, "_sid", (long int *)&adr2)) { + intptr_t adr = 0; + intptr_t adr2 = 0; + if (inthash_readptr(NewLangList, "sid", &adr)) { + if (inthash_readptr(NewLangList, "_sid", &adr2)) { if (strcmp((char*)adr, (char*)adr2) != 0) { meth = 0; } @@ -467,50 +471,50 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { /* Check variables (internal) */ if (meth) { int doLoad=0; - unsigned long int adr = 0; - if (inthash_readptr(NewLangList, "lang", (long int *)&adr)) { + intptr_t adr = 0; + if (inthash_readptr(NewLangList, "lang", &adr)) { int n = 0; if (sscanf((char*)adr, "%d", &n) == 1 && n > 0 && n - 1 != LANG_T(path, -1)) { LANG_T(path, n - 1); /* make a backup, because the GUI will override it */ - inthash_write(NewLangList, "lang_", (unsigned long int)strdup((char*)adr)); + inthash_write(NewLangList, "lang_", (intptr_t)strdup((char*)adr)); } } /* Load existing project settings */ - if (inthash_readptr(NewLangList, "loadprojname", (long int *)&adr)) { + if (inthash_readptr(NewLangList, "loadprojname", &adr)) { char* pname = (char*) adr; if (*pname) { - inthash_write(NewLangList, "projname", (unsigned long int)strdup(pname)); + inthash_write(NewLangList, "projname", (intptr_t)strdup(pname)); } - inthash_write(NewLangList, "loadprojname", (unsigned long int)NULL); + inthash_write(NewLangList, "loadprojname", (intptr_t)NULL); doLoad=1; } - else if (inthash_readptr(NewLangList, "loadprojcateg", (long int *)&adr)) { + else if (inthash_readptr(NewLangList, "loadprojcateg", &adr)) { char* pname = (char*) adr; if (*pname) { - inthash_write(NewLangList, "projcateg", (unsigned long int)strdup(pname)); + inthash_write(NewLangList, "projcateg", (intptr_t)strdup(pname)); } - inthash_write(NewLangList, "loadprojcateg", (unsigned long int)NULL); + inthash_write(NewLangList, "loadprojcateg", (intptr_t)NULL); } /* intial configuration */ { if (!inthash_read(NewLangList, "conf_file_loaded", NULL)) { - inthash_write(NewLangList, "conf_file_loaded", (unsigned long int)strdup("true")); + inthash_write(NewLangList, "conf_file_loaded", (intptr_t)strdup("true")); doLoad = 2; } } /* path : <path>/<project> */ if (!commandRunning) { - unsigned long int adrw = 0, adrpath = 0, adrprojname = 0; - if (inthash_readptr(NewLangList, "path", (long int *)&adrpath) - && inthash_readptr(NewLangList, "projname", (long int *)&adrprojname)) { + intptr_t adrw = 0, adrpath = 0, adrprojname = 0; + if (inthash_readptr(NewLangList, "path", &adrpath) + && inthash_readptr(NewLangList, "projname", &adrprojname)) { StringClear(fspath); - StringStrcat(fspath, (char*)adrpath); - StringStrcat(fspath, "/"); - StringStrcat(fspath, (char*)adrprojname); + StringCat(fspath, (char*)adrpath); + StringCat(fspath, "/"); + StringCat(fspath, (char*)adrprojname); } } @@ -518,13 +522,13 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { if (doLoad) { FILE* fp; if (doLoad == 1) { - StringStrcat(fspath, "/hts-cache/winprofile.ini"); + StringCat(fspath, "/hts-cache/winprofile.ini"); } else if (doLoad == 2) { - StringStrcpy(fspath, gethomedir()); + StringCopy(fspath, gethomedir()); #ifdef _WIN32 - StringStrcat(fspath, "/httrack.ini"); + StringCat(fspath, "/httrack.ini"); #else - StringStrcat(fspath, "/.httrack.ini"); + StringCat(fspath, "/.httrack.ini"); #endif } fp = fopen(StringBuff(fspath), "rb"); @@ -543,7 +547,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { if (pos[0] == '0' && pos[1] == '\0') *pos = '\0'; /* 0 => empty */ unescapeini(pos, &escline); - inthash_write(NewLangList, line, (unsigned long int)StringAcquire(&escline)); + inthash_write(NewLangList, line, (intptr_t)StringAcquire(&escline)); } } @@ -555,38 +559,38 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { /* Execute command */ { - unsigned long int adr = 0; + intptr_t adr = 0; int p = 0; - if (inthash_readptr(NewLangList, "command", (long int *)&adr)) { + if (inthash_readptr(NewLangList, "command", &adr)) { if (strcmp((char*)adr, "cancel") == 0) { if (commandRunning) { if (!commandEndRequested) { commandEndRequested=1; - hts_request_stop(0); + hts_request_stop(global_opt, 0); } else { - hts_request_stop(1); /* note: the force flag does not have anyeffect yet */ + hts_request_stop(global_opt, 1); /* note: the force flag does not have anyeffect yet */ commandEndRequested=2; /* will break the loop() callback */ } } } else if ((p=strfield((char*)adr, "cancel-file="))) { if (commandRunning) { - hts_cancel_file((char*)adr + p); + hts_cancel_file_push(global_opt, (char*)adr + p); } } else if (strcmp((char*)adr, "cancel-parsing") == 0) { if (commandRunning) { - hts_cancel_parsing(); + hts_cancel_parsing(global_opt); } } else if ((p=strfield((char*)adr, "pause="))) { if (commandRunning) { - hts_setpause(1); + hts_setpause(global_opt, 1); } } else if ((p=strfield((char*)adr, "unpause"))) { if (commandRunning) { - hts_setpause(0); + hts_setpause(global_opt, 0); } } else if (strcmp((char*)adr, "abort") == 0) { if (commandRunning) { - hts_request_stop(1); + hts_request_stop(global_opt, 1); commandEndRequested=2; /* will break the loop() callback */ } } else if ((p=strfield((char*)adr, "add-url="))) { @@ -594,28 +598,28 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { char* ptraddr[2]; ptraddr[0] = (char*)adr + p; ptraddr[1] = NULL; - hts_addurl(ptraddr); + hts_addurl(global_opt, ptraddr); } } else if ((p=strfield((char*)adr, "httrack"))) { if (!commandRunning) { - unsigned long int adrcd = 0; - if (inthash_readptr(NewLangList, "command_do", (long int *)&adrcd)) { - unsigned long int adrw = 0, adrpath = 0, adrprojname = 0; - if (inthash_readptr(NewLangList, "winprofile", (long int *)&adrw)) { + intptr_t adrcd = 0; + if (inthash_readptr(NewLangList, "command_do", &adrcd)) { + intptr_t adrw = 0, adrpath = 0, adrprojname = 0; + if (inthash_readptr(NewLangList, "winprofile", &adrw)) { /* User general profile */ - unsigned long int adruserprofile = 0; - if (inthash_readptr(NewLangList, "userprofile", (long int *)&adruserprofile) + intptr_t adruserprofile = 0; + if (inthash_readptr(NewLangList, "userprofile", &adruserprofile) && adruserprofile != 0) { int count = (int) strlen((char*)adruserprofile); if (count > 0) { FILE* fp; StringClear(tmpbuff); - StringStrcpy(tmpbuff, gethomedir()); + StringCopy(tmpbuff, gethomedir()); #ifdef _WIN32 - StringStrcat(tmpbuff, "/httrack.ini"); + StringCat(tmpbuff, "/httrack.ini"); #else - StringStrcat(tmpbuff, "/.httrack.ini"); + StringCat(tmpbuff, "/.httrack.ini"); #endif fp = fopen(StringBuff(tmpbuff), "wb"); if (fp != NULL) { @@ -627,13 +631,13 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { /* Profile */ StringClear(tmpbuff); - StringStrcat(tmpbuff, StringBuff(fspath)); - StringStrcat(tmpbuff, "/hts-cache/"); + StringCat(tmpbuff, StringBuff(fspath)); + StringCat(tmpbuff, "/hts-cache/"); /* Create minimal directory structure */ if (!structcheck(StringBuff(tmpbuff))) { FILE* fp; - StringStrcat(tmpbuff, "winprofile.ini"); + StringCat(tmpbuff, "winprofile.ini"); fp = fopen(StringBuff(tmpbuff), "wb"); if (fp != NULL) { int count = (int) strlen((char*)adrw); @@ -644,8 +648,8 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { The behaviour is exactly the same as in WinHTTrack */ StringClear(tmpbuff); - StringStrcat(tmpbuff, StringBuff(fspath)); - StringStrcat(tmpbuff, "/hts-cache/doit.log"); + StringCat(tmpbuff, StringBuff(fspath)); + StringCat(tmpbuff, "/hts-cache/doit.log"); remove(StringBuff(tmpbuff)); /* @@ -682,7 +686,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } else if (strcmp((char*)adr, "quit") == 0) { willexit=1; } - inthash_write(NewLangList, "command", (unsigned long int)NULL); + inthash_write(NewLangList, "command", (intptr_t)NULL); } } @@ -732,8 +736,8 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { if (strncmp(file, "/website/", 9) != 0) { sprintf(fsfile, "%shtml%s", path, file); } else { - unsigned long int adr = 0; - if (inthash_readptr(NewLangList, "projpath", (long int *)&adr)) { + intptr_t adr = 0; + if (inthash_readptr(NewLangList, "projpath", &adr)) { sprintf(fsfile, "%s%s", (char*)adr, file + 9); } } @@ -759,7 +763,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { ; /* register current page */ - inthash_write(NewLangList, "thisfile", (unsigned long int)strdup(file)); + inthash_write(NewLangList, "thisfile", (intptr_t)strdup(file)); /* Force GET for the last request */ if (meth == 2 && willexit) { @@ -771,9 +775,9 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { char redir[] = "HTTP/1.0 302 Redirect\r\n" "Connection: close\r\n" "Server: httrack-small-server\r\n"; - unsigned long int adr = 0; + intptr_t adr = 0; char* newfile = file; - if (inthash_readptr(NewLangList, "redirect", (long int *)&adr) && adr != 0) { + if (inthash_readptr(NewLangList, "redirect", &adr) && adr != 0) { char* newadr = (char*)adr; if (*newadr) { newfile = newadr; @@ -787,14 +791,14 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { StringMemcat(headers, tmp, strlen(tmp)); } } - inthash_write(NewLangList, "redirect", (unsigned long int)NULL); + inthash_write(NewLangList, "redirect", (intptr_t)NULL); } else if (is_html(file)) { int outputmode = 0; StringMemcat(headers, ok, sizeof(ok) - 1); while(!feof(fp)) { char* str = line; - int prevlen = StringLength(output); + int prevlen = (int) StringLength(output); int nocr = 0; if (!linput(fp, line, sizeof(line) - 2)) { *str = '\0'; @@ -805,7 +809,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } while(*str) { char* pos; - int n; + size_t n; if (*str == '$' && *++str == '{' && (pos = strchr(++str, '}')) && (n = (pos - str) ) && n < 1024 ) { char name_[1024 + 2]; char* name = name_; @@ -839,7 +843,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { if (pos2 != NULL) { *pos2 = '\0'; if (strstr(name, "..") == NULL) { - if (fexist(fconcat(path, name))) { + if (fexist(fconcat(catbuff, path, name))) { langstr = pos2 + 1; } } @@ -869,37 +873,37 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } } else if (strcmp(name, "if-file-exists") == 0) { if (strstr(pos2, "..") == NULL) { - if (!fexist(fconcat(path, pos2))) { + if (!fexist(fconcat(catbuff, path, pos2))) { outputmode = -1; } } } else if (strcmp(name, "if-project-file-exists") == 0) { if (strstr(pos2, "..") == NULL) { - if (!fexist(fconcat(StringBuff(fspath), pos2))) { + if (!fexist(fconcat(catbuff, StringBuff(fspath), pos2))) { outputmode = -1; } } } else if (strcmp(name, "if-file-do-not-exists") == 0) { if (strstr(pos2, "..") == NULL) { - if (fexist(fconcat(path, pos2))) { + if (fexist(fconcat(catbuff, path, pos2))) { outputmode = -1; } } } else if (strcmp(name, "if-not-empty") == 0) { - unsigned long int adr = 0; - if (!inthash_readptr(NewLangList, pos2, (long int *)&adr) || *((char*)adr) == 0 ) { + intptr_t adr = 0; + if (!inthash_readptr(NewLangList, pos2, &adr) || *((char*)adr) == 0 ) { outputmode = -1; } } else if (strcmp(name, "if-empty") == 0) { - unsigned long int adr = 0; - if (inthash_readptr(NewLangList, pos2, (long int *)&adr) && *((char*)adr) != 0 ) { + intptr_t adr = 0; + if (inthash_readptr(NewLangList, pos2, &adr) && *((char*)adr) != 0 ) { outputmode = -1; } } else if (strcmp(name, "end-if") == 0) { outputmode = 0; } else if (strcmp(name, "loadhash") == 0) { - unsigned long int adr = 0; - if (inthash_readptr(NewLangList, "path", (long int *)&adr)) { + intptr_t adr = 0; + if (inthash_readptr(NewLangList, "path", &adr)) { char* rpath = (char*) adr; //find_handle h; if (rpath[0]) { @@ -908,21 +912,21 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } } { - char* profiles = hts_getcategories(rpath, 0); - char* categ = hts_getcategories(rpath,1 ); - inthash_write(NewLangList, "winprofile", (unsigned long int)profiles); - inthash_write(NewLangList, "wincateg", (unsigned long int)categ); + const char* profiles = hts_getcategories(rpath, 0); + const char* categ = hts_getcategories(rpath,1 ); + inthash_write(NewLangList, "winprofile", (intptr_t)profiles); + inthash_write(NewLangList, "wincateg", (intptr_t)categ); } } } else if (strcmp(name, "copy") == 0) { if (*pos2) { char* pos3 = strchr(pos2, ':'); if ( pos3 && *(pos3 + 1) ) { - unsigned long int adr = 0; + intptr_t adr = 0; *pos3++ = '\0'; - if (inthash_readptr(NewLangList, pos2, (long int *)&adr)) { - inthash_write(NewLangList, pos3, (unsigned long int)strdup((char*)adr)); - inthash_write(NewLangList, pos2, (unsigned long int)NULL); + if (inthash_readptr(NewLangList, pos2, &adr)) { + inthash_write(NewLangList, pos3, (intptr_t)strdup((char*)adr)); + inthash_write(NewLangList, pos2, (intptr_t)NULL); } } } @@ -931,9 +935,9 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { char* pos3 = strchr(pos2, ':'); if ( pos3 ) { *pos3++ = '\0'; - inthash_write(NewLangList, pos2, (unsigned long int)strdup(pos3)); + inthash_write(NewLangList, pos2, (intptr_t)strdup(pos3)); } else { - inthash_write(NewLangList, pos2, (unsigned long int)NULL); + inthash_write(NewLangList, pos2, (intptr_t)NULL); } } } @@ -944,7 +948,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { ztest:<if == 0 || !exist>:<if == 1>:<if == 2>.. */ else if ( ( p = strfield(name, "test:")) || ( p = strfield(name, "ztest:")) ) { - unsigned long int adr = 0; + intptr_t adr = 0; char* pos2; int ztest = (name[0] == 'z'); langstr = ""; @@ -952,7 +956,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { pos2 = strchr(name, ':'); if (pos2 != NULL) { *pos2 = '\0'; - if (inthash_readptr(NewLangList, name, (long int *)&adr) || ztest) { + if (inthash_readptr(NewLangList, name, &adr) || ztest) { char* newadr = (char*)adr; if (!newadr) newadr = ""; @@ -1004,10 +1008,10 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { char dname[32]; int n2 = (int) (pos2 - name); if (n2 > 0 && n2 < sizeof(dname) - 2) { - unsigned long int adr = 0; + intptr_t adr = 0; dname[0] = '\0'; strncatbuff(dname, name, n2); - if (inthash_readptr(NewLangList, dname, (long int *)&adr)) { + if (inthash_readptr(NewLangList, dname, &adr)) { int n = 0; if (sscanf((char*)adr, "%d", &n) == 1) { listDefault = n; @@ -1030,8 +1034,8 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } else { langstr = LANGSEL(name); if (langstr == NULL || *langstr == '\0') { - unsigned long int adr = 0; - if (inthash_readptr(NewLangList, name, (long int *)&adr)) { + intptr_t adr = 0; + if (inthash_readptr(NewLangList, name, &adr)) { char* newadr = (char*)adr; langstr = newadr; } @@ -1053,21 +1057,21 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } a += 2; } else if (outputmode && a[0] == '<') { - StringStrcat(output, "<"); + StringCat(output, "<"); } else if (outputmode && a[0] == '>') { - StringStrcat(output, ">"); + StringCat(output, ">"); } else if (outputmode && a[0] == '&') { - StringStrcat(output, "&"); + StringCat(output, "&"); } else if (outputmode == 3 && a[0] == ' ') { - StringStrcat(output, "%20"); + StringCat(output, "%20"); } else if (outputmode >= 2 && ((unsigned char)a[0]) < 32) { char tmp[32]; sprintf(tmp, "%%%02x", (unsigned char)a[0]); - StringStrcat(output, tmp); + StringCat(output, tmp); } else if (outputmode == 2 && a[0] == '%') { - StringStrcat(output, "%%"); + StringCat(output, "%%"); } else if (outputmode == 3 && a[0] == '%') { - StringStrcat(output, "%25"); + StringCat(output, "%25"); } else { StringMemcat(output, a, 1); } @@ -1077,7 +1081,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { break; case 3: if (*langstr) { - StringStrcat(output, "checked"); + StringCat(output, "checked"); } break; default: @@ -1086,45 +1090,45 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { char* fstr = langstr; StringClear(tmpbuff); if (format == 2) { - StringStrcat(output, "<option value=1>"); + StringCat(output, "<option value=1>"); } else if (format == -2) { - StringStrcat(output, "<option value=\""); + StringCat(output, "<option value=\""); } while(*fstr) { switch(*fstr) { case 13: break; case 10: if (format == 1) { - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "<br>\r\n"); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "<br>\r\n"); } else if (format == -2) { - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "\">"); - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "</option>\r\n"); - StringStrcat(output, "<option value=\""); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "\">"); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "</option>\r\n"); + StringCat(output, "<option value=\""); } else { char tmp[32]; sprintf(tmp, "%d", ++id); - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "</option>\r\n"); - StringStrcat(output, "<option value="); - StringStrcat(output, tmp); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "</option>\r\n"); + StringCat(output, "<option value="); + StringCat(output, tmp); if (listDefault == id) { - StringStrcat(output, " selected"); + StringCat(output, " selected"); } - StringStrcat(output, ">"); + StringCat(output, ">"); } StringClear(tmpbuff); break; case '<': - StringStrcat(tmpbuff, "<"); + StringCat(tmpbuff, "<"); break; case '>': - StringStrcat(tmpbuff, ">"); + StringCat(tmpbuff, ">"); break; case '&': - StringStrcat(tmpbuff, "&"); + StringCat(tmpbuff, "&"); break; default: StringMemcat(tmpbuff, fstr, 1); @@ -1133,15 +1137,15 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { fstr++; } if (format == 2) { - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "</option>"); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "</option>"); } else if (format == -2) { - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "\">"); - StringStrcat(output, StringBuff(tmpbuff)); - StringStrcat(output, "</option>"); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "\">"); + StringCat(output, StringBuff(tmpbuff)); + StringCat(output, "</option>"); } else { - StringStrcat(output, StringBuff(tmpbuff)); + StringCat(output, StringBuff(tmpbuff)); } StringClear(tmpbuff); } @@ -1156,7 +1160,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { str++; } if (!nocr && prevlen != StringLength(output)) { - StringStrcat(output, "\r\n"); + StringCat(output, "\r\n"); } } #ifdef _DEBUG @@ -1168,7 +1172,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } else if (is_text(file)) { StringMemcat(headers, ok_text, sizeof(ok_text) - 1); while(!feof(fp)) { - int n = fread(line, 1, sizeof(line) - 2, fp); + int n = (int) fread(line, 1, sizeof(line) - 2, fp); if (n > 0) { StringMemcat(output, line, n); } @@ -1176,7 +1180,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { } else { StringMemcat(headers, ok_img, sizeof(ok_img) - 1); while(!feof(fp)) { - int n = fread(line, 1, sizeof(line) - 2, fp); + int n = (int) fread(line, 1, sizeof(line) - 2, fp); if (n > 0) { StringMemcat(output, line, n); } @@ -1189,8 +1193,8 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { "Content-type: text/html\r\n"; char error[] = "Page not found.\r\n"; - StringStrcat(headers, error_hdr); - StringStrcat(output, error); + StringCat(headers, error_hdr); + StringCat(output, error); //assert(file == NULL); } } @@ -1201,20 +1205,20 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { "Content-type: text/html\r\n"; char error[] = "Server error.\r\n"; - StringStrcat(headers, error_hdr); - StringStrcat(output, error); + StringCat(headers, error_hdr); + StringCat(output, error); #endif } { char tmp[256]; sprintf(tmp, "Content-length: %d\r\n", (int) StringLength(output)); - StringStrcat(headers, tmp); + StringCat(headers, tmp); } - StringStrcat(headers, "\r\n"); + StringCat(headers, "\r\n"); if ( - (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers)) + (send(soc_c, StringBuff(headers), (int) StringLength(headers), 0) != StringLength(headers)) || - ( (meth == 1) && (send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output)) ) + ( (meth == 1) && (send(soc_c, StringBuff(output), (int) StringLength(output), 0) != StringLength(output)) ) ) { #ifdef _DEBUG //assert(FALSE); @@ -1238,7 +1242,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { while(recv(soc_c, ((char*)&c), 1, 0) > 0); } -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_c); #else close(soc_c); @@ -1270,7 +1274,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { commandReturnCmdl = NULL; /* Unlock */ - webhttrack_lock(0); + webhttrack_release(); return retour; } @@ -1280,7 +1284,7 @@ int smallserver(T_SOC soc,char* url,char* method,char* data, char* path) { /* Language files */ -int htslang_init() { +int htslang_init(void) { if (NewLangList==NULL) { int i = 0; NewLangList=inthash_new(NewLangListSz); @@ -1293,7 +1297,7 @@ int htslang_init() { return 1; } -int htslang_uninit() { +int htslang_uninit(void) { if (NewLangList!=NULL) { inthash_delete(&NewLangList); } @@ -1301,21 +1305,22 @@ int htslang_uninit() { } int smallserver_setkey(char* key, char* value) { - return inthash_write(NewLangList, key, (unsigned long int)strdup(value)); + return inthash_write(NewLangList, key, (intptr_t)strdup(value)); } int smallserver_setkeyint(char* key, LLint value) { char tmp[256]; sprintf(tmp, LLintP, value); - return inthash_write(NewLangList, key, (unsigned long int)strdup(tmp)); + return inthash_write(NewLangList, key, (intptr_t)strdup(tmp)); } int smallserver_setkeyarr(char* key, int id, char* key2, char* value) { char tmp[256]; sprintf(tmp, "%s%d%s", key, id, key2); - return inthash_write(NewLangList, tmp, (unsigned long int)strdup(value)); + return inthash_write(NewLangList, tmp, (intptr_t)strdup(value)); } static int htslang_load(char* limit_to, char* path) { char* hashname; + char catbuff[CATBUFF_SIZE]; // int selected_lang=LANG_T(path, -1); // @@ -1334,7 +1339,7 @@ static int htslang_load(char* limit_to, char* path) { /* Load master file (list of keys and internal keys) */ if (!limit_to) { char* mname = "lang.def"; - FILE* fp=fopen(fconcat(path, mname),"rb"); + FILE* fp=fopen(fconcat(catbuff, path, mname),"rb"); if (fp) { char intkey[8192]; char key[8192]; @@ -1347,7 +1352,7 @@ static int htslang_load(char* limit_to, char* path) { /* Increment for multiple definitions */ if (strnotempty(test)) { int increment=0; - int pos=strlen(key); + size_t pos = strlen(key); do { increment++; sprintf(key+pos,"%d",increment); @@ -1359,11 +1364,11 @@ static int htslang_load(char* limit_to, char* path) { // conv_printf(key,key); int len; char* buff; - len=strlen(intkey); + len = (int) strlen(intkey); buff=(char*)malloc(len+2); if (buff) { strcpybuff(buff,intkey); - inthash_add(NewLangStrKeys,key,(long int)(char*)buff); + inthash_add(NewLangStrKeys,key,(intptr_t)buff); } } } // if @@ -1407,7 +1412,7 @@ static int htslang_load(char* limit_to, char* path) { hashname=LANGINTKEY(name); } sprintf(lbasename, "lang/%s.txt",hashname); - fp=fopen(fconcat(path, lbasename), "rb"); + fp=fopen(fconcat(catbuff, path, lbasename), "rb"); if (fp) { char extkey[8192]; char value[8192]; @@ -1429,7 +1434,7 @@ static int htslang_load(char* limit_to, char* path) { if (strnotempty(test)) { if (loops == 0) { int increment=0; - int pos=strlen(extkey); + size_t pos=strlen(extkey); do { increment++; sprintf(extkey+pos,"%d",increment); @@ -1451,11 +1456,11 @@ static int htslang_load(char* limit_to, char* path) { /* Add key */ if (strnotempty(intkey)) { - len=strlen(value); + len = (int) strlen(value); buff=(char*)malloc(len+2); if (buff) { conv_printf(value,buff); - inthash_add(NewLangStr,intkey,(long int)(char*)buff); + inthash_add(NewLangStr,intkey,(intptr_t)buff); } } @@ -1479,7 +1484,7 @@ static int htslang_load(char* limit_to, char* path) { /* NOTE : also contains the "webhttrack" hack */ static void conv_printf(char* from,char* to) { int i=0,j=0,len; - len=strlen(from); + len = (int) strlen(from); while(i<len) { switch(from[i]) { case '\\': @@ -1519,7 +1524,7 @@ static void conv_printf(char* from,char* to) { } } -static void LANG_DELETE() { +static void LANG_DELETE(void) { inthash_delete(&NewLangStr); inthash_delete(&NewLangStrKeys); } @@ -1546,7 +1551,6 @@ static int LANG_SEARCH(char* path, char* iso) { int i = 0; int curr_lng=LANG_T(path, -1); int found = 0; - unsigned long int adr = 0; do { QLANG_T(i); strcpybuff(lang_str,"LANGUAGE_ISO"); @@ -1590,9 +1594,9 @@ static int QLANG_T(int l) { } static char* LANGSEL(char* name) { - unsigned long int adr = 0; + intptr_t adr = 0; if (NewLangStr) - if (!inthash_read(NewLangStr,name,(long int *)&adr)) + if (!inthash_read(NewLangStr,name,&adr)) adr=0; if (adr) { return (char*)adr; @@ -1601,9 +1605,9 @@ static char* LANGSEL(char* name) { } static char* LANGINTKEY(char* name) { - unsigned long int adr=0; + intptr_t adr=0; if (NewLangStrKeys) - if (!inthash_read(NewLangStrKeys,name,(long int *)&adr)) + if (!inthash_read(NewLangStrKeys,name,&adr)) adr=0; if (adr) { return (char*)adr; diff --git a/src/htsserver.h b/src/htsserver.h index 9a633cb..74f9825 100644 --- a/src/htsserver.h +++ b/src/htsserver.h @@ -81,6 +81,8 @@ extern inthash NewLangStrKeys; extern int NewLangListSz; extern inthash NewLangList; +extern httrackp *global_opt; + /* Spaces: CR,LF,TAB,FF */ #define is_space(c) ( ((c)==' ') || ((c)=='\"') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) || ((c)=='\'') ) #define is_realspace(c) ( ((c)==' ') || ((c)==10) || ((c)==13) || ((c)==9) || ((c)==12) || ((c)==11) ) @@ -113,8 +115,7 @@ int htslang_uninit(void); static char* gethomedir(void); static int linput_cpp(FILE* fp,char* s,int max); static int linput_trim(FILE* fp,char* s,int max); -static char* concat(const char* a,const char* b); -static int fexist(char* s); +static int fexist(const char* s); static int linput(FILE* fp,char* s,int max); static int linputsoc(T_SOC soc, char* s, int max) { @@ -186,23 +187,8 @@ static int linput_cpp(FILE* fp,char* s,int max) { } while((s[max(rlen-1,0)]=='\\') && (rlen<max)); return rlen; } -// copy of concat -typedef struct concat_strc { - char buff[16][HTS_URLMAXSIZE*2*2]; - int rol; -} concat_strc; -static char* concat(const char* a,const char* b) { - static concat_strc* strc = NULL; - if (strc == NULL) { - strc = (concat_strc*) calloc(16, sizeof(concat_strc)); - } - strc->rol=((strc->rol+1)%16); // roving pointer - strcpybuff(strc->buff[strc->rol],a); - if (b) strcatbuff(strc->buff[strc->rol],b); - return strc->buff[strc->rol]; -} -static int fexist(char* s) { +static int fexist(const char* s) { struct stat st; memset(&st, 0, sizeof(st)); if (stat(s, &st) == 0) { @@ -258,6 +244,38 @@ static int linput_trim(FILE* fp,char* s,int max) { return rlen; } +static int ehexh(char c) { + if ((c>='0') && (c<='9')) return c-'0'; + if ((c>='a') && (c<='f')) c-=('a'-'A'); + if ((c>='A') && (c<='F')) return (c-'A'+10); + return 0; +} + +static int ehex(char* s) { + return 16*ehexh(*s)+ehexh(*(s+1)); +} + +static void unescapehttp(char* s, String* tempo) { + int i; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(*tempo, '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + StringAddchar(*tempo, (char) hc); + i++; // sauter 2 caractères finalement + } + else if (s[i]=='+') { + StringAddchar(*tempo, ' '); + } + else + StringAddchar(*tempo, s[i]); + } +} + static void unescapeini(char* s, String* tempo) { int i; char lastc=0; @@ -279,28 +297,5 @@ static void unescapeini(char* s, String* tempo) { } } -#ifndef _WIN32 -#define fconv(a) (a) -#define fconcat(a,b) concat(a,b) -#endif - -#ifdef _WIN32 -static char* __fconv(char* a) { - int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='/') // convertir - a[i]='\\'; - return a; -} -static char* fconcat(char* a,char* b) { - return __fconv(concat(a,b)); -} -static char* fconv(char* a) { - return __fconv(concat(a,"")); -} -#endif - #endif - - diff --git a/src/htsstrings.h b/src/htsstrings.h index 14e851f..6dba2b0 100755 --- a/src/htsstrings.h +++ b/src/htsstrings.h @@ -34,105 +34,249 @@ Please visit our Website: http://www.httrack.com /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ -// Strings a bit safer than static buffers +/* Safer Strings ; standalone .h library */ #ifndef HTS_STRINGS_DEFSTATIC #define HTS_STRINGS_DEFSTATIC -typedef struct String { - char* buff; - int len; - int capa; -} String; - -#define STRING_EMPTY {NULL, 0, 0} -#define STRING_BLK_SIZE 256 -#define StringBuff(blk) ((blk).buff) -#define StringLength(blk) ((blk).len) -#define StringCapacity(blk) ((blk).capa) -#define StringRoom(blk, size) do { \ - if ((blk).len + (int)(size) + 1 > (blk).capa) { \ - (blk).capa = ((blk).len + (size) + 1) * 2; \ - (blk).buff = (char*) realloct((blk).buff, (blk).capa); \ - assertf((blk).buff != NULL); \ +/* System definitions. */ +#include <string.h> + +/* GCC extension */ +#ifndef HTS_UNUSED +#ifdef __GNUC__ +#define HTS_UNUSED __attribute__ ((unused)) +#define HTS_STATIC static __attribute__ ((unused)) +#else +#define HTS_UNUSED +#define HTS_STATIC static +#endif +#endif + +/** Forward definitions **/ +#ifndef HTS_DEF_FWSTRUCT_String +#define HTS_DEF_FWSTRUCT_String +typedef struct String String; +#endif +#ifndef HTS_DEF_STRUCT_String +#define HTS_DEF_STRUCT_String +struct String { + char* buffer_; + size_t length_; + size_t capacity_; +}; +#endif + +/** Allocator **/ +#ifndef STRING_REALLOC +#define STRING_REALLOC(BUFF, SIZE) ( (char*) realloc(BUFF, SIZE) ) +#define STRING_FREE(BUFF) free(BUFF) +#endif +#ifndef STRING_ASSERT +#include <assert.h> +#define STRING_ASSERT(EXP) assert(EXP) +#endif + +/** An empty string **/ +#define STRING_EMPTY { (char*) NULL, 0, 0 } + +/** String buffer **/ +#define StringBuff(BLK) ( (const char*) ((BLK).buffer_) ) + +/** String buffer (read/write) **/ +#define StringBuffRW(BLK) ((BLK).buffer_) + +/** String length **/ +#define StringLength(BLK) ((BLK).length_) + +/** String not empty ? **/ +#define StringNotEmpty(BLK) ( StringLength(BLK) > 0 ) + +/** String capacity **/ +#define StringCapacity(BLK) ((BLK).capacity_) + +/** Subcharacter **/ +#define StringSub(BLK, POS) ( StringBuff(BLK)[POS] ) + +/** Subcharacter (read/write) **/ +#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] ) + +/** Subcharacter (read/write) **/ +#define StringSubRW(BLK, POS) ( StringBuffRW(BLK)[POS] ) + +/** Right subcharacter **/ +#define StringRight(BLK, POS) ( StringBuff(BLK)[StringLength(BLK) - POS] ) + +/** Right subcharacter (read/write) **/ +#define StringRightRW(BLK, POS) ( StringBuffRW(BLK)[StringLength(BLK) - POS] ) + +/** Remove the utter right character from the string. **/ +#define StringPopRight(BLK) do { \ + StringBuffRW(BLK)[--StringLength(BLK)] = '\0'; \ +} while(0) + +/** Ensure the string is large enough **/ +#define StringRoom(BLK, SIZE) do { \ + if ((BLK).length_ + (int)(SIZE) + 1 > (BLK).capacity_) { \ + (BLK).capacity_ = ((BLK).length_ + (SIZE) + 1) * 2; \ + (BLK).buffer_ = (char*) STRING_REALLOC((BLK).buffer_, (BLK).capacity_); \ + STRING_ASSERT((BLK).buffer_ != NULL); \ } \ } while(0) -#define StringBuffN(blk, size) StringBuffN_(&(blk), size) -static char* StringBuffN_(String* blk, int size) { - StringRoom(*blk, (blk->len) + size); - return StringBuff(*blk); +#define StringBuffN(BLK, SIZE) StringBuffN_(&(BLK), SIZE) +HTS_STATIC char* StringBuffN_(String* blk, int size) { + StringRoom(*blk, StringLength(*blk) + size); + return StringBuffRW(*blk); } -#define StringClear(blk) do { \ - StringRoom(blk, 0); \ - (blk).buff[0] = '\0'; \ - (blk).len = 0; \ + +/** Initialize a string. **/ +#define StringInit(BLK) do { \ + (BLK).buffer_ = NULL; \ + (BLK).capacity_ = 0; \ + (BLK).length_ = 0; \ +} while(0) + +/** Clear a string (set its length to 0) **/ +#define StringClear(BLK) do { \ + StringRoom(BLK, 0); \ + (BLK).buffer_[0] = '\0'; \ + (BLK).length_ = 0; \ } while(0) -#define StringFree(blk) do { \ - if ((blk).buff != NULL) { \ - freet((blk).buff); \ - (blk).buff = NULL; \ + +/** Set the length of a string to 'SIZE'. If SIZE is negative, check the size using strlen(). **/ +#define StringSetLength(BLK, SIZE) do { \ + if (SIZE >= 0) { \ + (BLK).length_ = SIZE; \ + } else { \ + (BLK).length_ = strlen((BLK).buffer_); \ } \ - (blk).capa = 0; \ - (blk).len = 0; \ } while(0) -#define StringMemcat(blk, str, size) do { \ - StringRoom(blk, size); \ - if ((int)(size) > 0) { \ - memcpy((blk).buff + (blk).len, (str), (size)); \ - (blk).len += (size); \ + +/** Free a string (release memory) **/ +#define StringFree(BLK) do { \ + if ((BLK).buffer_ != NULL) { \ + STRING_FREE((BLK).buffer_); \ + (BLK).buffer_ = NULL; \ } \ - *((blk).buff + (blk).len) = '\0'; \ + (BLK).capacity_ = 0; \ + (BLK).length_ = 0; \ } while(0) -#define StringAddchar(blk, c) do { \ - char __c = (c); \ - StringMemcat(blk, &__c, 1); \ + +/** Assign an allocated pointer to a a string. +The pointer _MUST_ be compatible with STRING_REALLOC() and STRING_FREE() **/ +#define StringSetBuffer(BLK, STR) do { \ + size_t len__ = strlen( STR ); \ + StringFree(BLK); \ + (BLK).buffer_ = ( STR ); \ + (BLK).capacity_ = len__; \ + (BLK).length_ = len__; \ } while(0) -static void* StringAcquire(String* blk) { - void* buff = blk->buff; - blk->buff = NULL; - blk->capa = 0; - blk->len = 0; - return buff; -} -#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? (int)strlen(str) : 0) -#define StringStrcpy(blk, str) do { \ - StringClear(blk); \ - StringStrcat(blk, str); \ + +/** Append a memory block to a string **/ +#define StringMemcat(BLK, STR, SIZE) do { \ + StringRoom(BLK, SIZE); \ + if ((int)(SIZE) > 0) { \ + memcpy((BLK).buffer_ + (BLK).length_, (STR), (SIZE)); \ + (BLK).length_ += (int)(SIZE); \ + } \ + *((BLK).buffer_ + (BLK).length_) = '\0'; \ } while(0) -/* Tools */ +/** Copy a memory block to a string **/ +#define StringMemcpy(BLK, STR, SIZE) do { \ + (BLK).length_ = 0; \ + StringMemcat(BLK, STR, SIZE); \ +} while(0) + +/** Add a character **/ +#define StringAddchar(BLK, c) do { \ + String * const s__ = &(BLK); \ + char c__ = (c); \ + StringRoom(*s__, 1); \ + StringBuffRW(*s__)[StringLength(*s__)++] = c__; \ + StringBuffRW(*s__)[StringLength(*s__) ] = 0; \ +} while(0) -static int ehexh(char c) { - if ((c>='0') && (c<='9')) return c-'0'; - if ((c>='a') && (c<='f')) c-=('a'-'A'); - if ((c>='A') && (c<='F')) return (c-'A'+10); - return 0; +/** Acquire a string ; it's the client's responsability to free() it **/ +HTS_STATIC char* StringAcquire(String* blk) { + char* buff = StringBuffRW(*blk); + StringBuffRW(*blk) = NULL; + StringCapacity(*blk) = 0; + StringLength(*blk) = 0; + return buff; } -static int ehex(char* s) { - return 16*ehexh(*s)+ehexh(*(s+1)); +/** Clone a string. **/ +HTS_STATIC String StringDup(const String* src) { + String s = STRING_EMPTY; + StringMemcat(s, StringBuff(*src), StringLength(*src)); + return s; } -static void unescapehttp(char* s, String* tempo) { - int i; - for (i=0;i<(int) strlen(s);i++) { - if (s[i]=='%' && s[i+1]=='%') { - i++; - StringAddchar(*tempo, '%'); - } else if (s[i]=='%') { - char hc; - i++; - hc = (char) ehex(s+i); - StringAddchar(*tempo, (char) hc); - i++; // sauter 2 caractères finalement - } - else if (s[i]=='+') { - StringAddchar(*tempo, ' '); - } - else - StringAddchar(*tempo, s[i]); +/** Attach a string using a pointer. **/ +HTS_STATIC void StringAttach(String* blk, char** str) { + StringFree(*blk); + if (str != NULL && *str != NULL) { + StringBuffRW(*blk) = *str; + StringCapacity(*blk) = StringLength(*blk) = strlen(StringBuff(*blk)); + *str = NULL; } } +/** Append a string to another one. **/ +#define StringCat(BLK, STR) do { \ + const char *str__ = ( STR ); \ + if (str__ != NULL) { \ + size_t size__ = strlen(str__); \ + StringMemcat(BLK, str__, size__); \ + } \ +} while(0) + +#define StringCatN(BLK, STR, SIZE) do { \ + const char *str__ = ( STR ); \ + if (str__ != NULL) { \ + size_t size__ = strlen(str__); \ + if (size__ > (SIZE)) { \ + size__ = (SIZE); \ + } \ + StringMemcat(BLK, str__, size__); \ + } \ +} while(0) + +#define StringCopyN(BLK, STR, SIZE) do { \ + const char *str__ = ( STR ); \ + const size_t usize__ = (SIZE); \ + (BLK).length_ = 0; \ + if (str__ != NULL) { \ + size_t size__ = strlen(str__); \ + if (size__ > usize__ ) { \ + size__ = usize__; \ + } \ + StringMemcat(BLK, str__, size__); \ + } else { \ + StringClear(BLK); \ + } \ +} while(0) + +#define StringCopyS(blk, blk2) StringCopyN(blk, (blk2).buffer_, (blk2).length_) + +/** Copy a string to another one. **/ +#define StringCopy(BLK, STR) do { \ + const char *str__ = ( STR ); \ + if (str__ != NULL) { \ + size_t size__ = strlen(str__); \ + StringMemcpy(BLK, str__, size__); \ + } else { \ + StringClear(BLK); \ + } \ +} while(0) + +/** Copy a (potentially overlapping) string to another one. **/ +#define StringCopyOverlapped(BLK, STR) do { \ + String s__ = STRING_EMPTY; \ + StringCopy(s__, STR); \ + StringCopyS(BLK, s__); \ + StringFree(s__); \ +} while(0) #endif diff --git a/src/htssystem.h b/src/htssystem.h deleted file mode 100644 index 6c4d216..0000000 --- a/src/htssystem.h +++ /dev/null @@ -1 +0,0 @@ -/* (empty file) */ diff --git a/src/htsthread.c b/src/htsthread.c index a766a40..5fbb4f1 100644 --- a/src/htsthread.c +++ b/src/htsthread.c @@ -42,13 +42,13 @@ Please visit our Website: http://www.httrack.com #include "htsthread.h" #if USE_BEGINTHREAD -#if HTS_WIN +#ifdef _WIN32 #include <process.h> #endif #endif static int process_chain = 0; -static PTHREAD_LOCK_TYPE process_chain_mutex; +static htsmutex process_chain_mutex = HTSMUTEX_INIT; HTSEXT_API void htsthread_wait(void ) { htsthread_wait_n(0); @@ -58,75 +58,94 @@ HTSEXT_API void htsthread_wait_n(int n_wait) { #if USE_BEGINTHREAD int wait = 0; do { - htsSetLock(&process_chain_mutex, 1); + hts_mutexlock(&process_chain_mutex); wait = (process_chain > n_wait ); - htsSetLock(&process_chain_mutex, 0); + hts_mutexrelease(&process_chain_mutex); if (wait) Sleep(100); } while(wait); #endif } +/* ensure initialized */ HTSEXT_API void htsthread_init(void ) { #if USE_BEGINTHREAD +#if (defined(_DEBUG) || defined(DEBUG)) assertf(process_chain == 0); - htsSetLock(&process_chain_mutex, -999); +#endif + if (process_chain_mutex == HTSMUTEX_INIT) { + hts_mutexinit(&process_chain_mutex); + } #endif } HTSEXT_API void htsthread_uninit(void ) { htsthread_wait(); #if USE_BEGINTHREAD - htsSetLock(&process_chain_mutex, -998); + hts_mutexfree(&process_chain_mutex); #endif } -typedef struct { - PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ); - void** arglist; -} execth_args; -static PTHREAD_TYPE PTHREAD_TYPE_FNC execth( void * arg ) +typedef struct hts_thread_s { + void *arg; + void (*fun)(void *arg); +} hts_thread_s; + +#ifdef _WIN32 +static unsigned int __stdcall hts_entry_point(void *tharg) +#else +static void* hts_entry_point(void *tharg) +#endif { - execth_args* args = (execth_args*) arg; - assertf(args != NULL); + hts_thread_s *s_args = (hts_thread_s*) tharg; + void * const arg = s_args->arg; + void (*fun)(void *arg) = s_args->fun; + free(tharg); - htsSetLock(&process_chain_mutex, 1); + hts_mutexlock(&process_chain_mutex); process_chain++; assertf(process_chain > 0); - htsSetLock(&process_chain_mutex, 0); + hts_mutexrelease(&process_chain_mutex); - (void) args->start_address(args->arglist); + /* run */ + fun(arg); - htsSetLock(&process_chain_mutex, 1); + hts_mutexlock(&process_chain_mutex); process_chain--; assertf(process_chain >= 0); - htsSetLock(&process_chain_mutex, 0); - - free(arg); - return PTHREAD_RETURN; + hts_mutexrelease(&process_chain_mutex); +#ifdef _WIN32 + return 0; +#else + return NULL; +#endif } - -HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ), unsigned stack_size, void *arglist ) +/* create a thread */ +HTSEXT_API int hts_newthread( void (*fun)(void *arg), void *arg) { - execth_args* args = (execth_args*) malloc(sizeof(execth_args)); - assertf(args != NULL); - args->start_address = start_address; - args->arglist = arglist; - - /* create a thread */ + hts_thread_s *s_args = malloc(sizeof(hts_thread_s)); + assertf(s_args != NULL); + s_args->arg = arg; + s_args->fun = fun; #ifdef _WIN32 - if (_beginthread(execth, stack_size, args) == -1) { - free(args); - return -1; + { + unsigned int idt; + HANDLE handle = (HANDLE) _beginthreadex(NULL, 0, hts_entry_point, s_args, 0, &idt); + if (handle == 0) { + free(s_args); + return -1; + } else { + /* detach the thread from the main process so that is can be independent */ + CloseHandle(handle); + } } #else { - PTHREAD_HANDLE handle = 0; - int retcode; - retcode = pthread_create(&handle, NULL, execth, args); + pthread_t handle = 0; + int retcode = pthread_create(&handle, NULL, hts_entry_point, s_args); if (retcode != 0) { /* error */ - free(args); + free(s_args); return -1; } else { /* detach the thread from the main process so that is can be independent */ @@ -137,83 +156,54 @@ HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( return 0; } +#if USE_BEGINTHREAD + +/* Note: new 3.41 cleaned up functions. */ -// Threads - emulate _beginthread under Linux/Unix using pthread_XX -// Some changes will have to be done, see PTHREAD_RETURN,PTHREAD_TYPE -#if USE_PTHREAD -#include <pthread.h> /* _beginthread, _endthread */ -#if 0 -unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist ) -{ - pthread_t th; - int retcode; - /* create a thread */ - retcode = pthread_create(&th, NULL, start_address, arglist); - if (retcode != 0) /* error */ - return -1; - /* detach the thread from the main process so that is can be independent */ - pthread_detach(th); - return 0; -} +HTSEXT_API void hts_mutexinit(htsmutex* mutex) { + htsmutex_s* smutex = malloct(sizeof(htsmutex_s)); +#ifdef _WIN32 + smutex->handle = CreateMutex(NULL, FALSE, NULL); +#else + pthread_mutex_init(&smutex->handle, 0); #endif + *mutex = smutex; +} + +HTSEXT_API void hts_mutexfree(htsmutex* mutex) { + if (mutex != NULL && *mutex != NULL) { +#ifdef _WIN32 + CloseHandle((*mutex)->handle); +#else + pthread_mutex_destroy(& ( (*mutex)->handle ) ); #endif + freet(*mutex); + *mutex = NULL; + } +} -#if USE_BEGINTHREAD -/* - Simple lock function - - Return value: always 0 - Parameter: - 1 wait for lock (mutex) available and lock it - 0 unlock the mutex - [-1 check if locked (always return 0 with mutex)] - -999 initialize - -998 free - */ -HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE* hMutex,int lock) { -#if HTS_WIN - /* lock */ - switch(lock) { - case 1: /* lock */ - assertf(*hMutex != NULL); - WaitForSingleObject(*hMutex,INFINITE); - break; - case 0: /* unlock */ - assertf(*hMutex != NULL); - ReleaseMutex(*hMutex); - break; - case -999: /* create */ - *hMutex=CreateMutex(NULL,FALSE,NULL); - break; - case -998: /* destroy */ - CloseHandle(*hMutex); - *hMutex = NULL; - break; - default: - assert(FALSE); - break; +HTSEXT_API void hts_mutexlock(htsmutex* mutex) { + assertf(mutex != NULL); + if (*mutex == HTSMUTEX_INIT) { /* must be initialized */ + hts_mutexinit(mutex); } + assertf(*mutex != NULL); +#ifdef _WIN32 + assert((*mutex)->handle != NULL); + WaitForSingleObject((*mutex)->handle, INFINITE); #else - switch(lock) { - case 1: /* lock */ - pthread_mutex_lock(hMutex); - break; - case 0: /* unlock */ - pthread_mutex_unlock(hMutex); - break; - case -999: /* create */ - pthread_mutex_init(hMutex,0); - break; - case -998: /* destroy */ - pthread_mutex_destroy(hMutex); - break; - default: - assert(0); - break; - } + pthread_mutex_lock(&(*mutex)->handle); #endif - return 0; } +HTSEXT_API void hts_mutexrelease(htsmutex* mutex) { + assertf(mutex != NULL && *mutex != NULL); +#ifdef _WIN32 + assert((*mutex)->handle != NULL); + ReleaseMutex((*mutex)->handle); +#else + pthread_mutex_unlock(&(*mutex)->handle); #endif +} +#endif diff --git a/src/htsthread.h b/src/htsthread.h index f62c39c..1bcecc6 100644 --- a/src/htsthread.h +++ b/src/htsthread.h @@ -38,10 +38,10 @@ Please visit our Website: http://www.httrack.com #define HTS_DEFTHREAD #include "htsglobal.h" -#if USE_PTHREAD -#include <pthread.h> /* _beginthread, _endthread */ +#ifndef _WIN32 +#include <pthread.h> #endif -#if HTS_WIN +#ifdef _WIN32 #include "windows.h" #ifdef _WIN32_WCE #ifndef HTS_CECOMPAT @@ -49,68 +49,44 @@ Please visit our Website: http://www.httrack.com #endif #endif #endif - -#if USE_BEGINTHREAD -#if HTS_WIN - -#define PTHREAD_RETURN -#define PTHREAD_TYPE void -#define PTHREAD_TYPE_FNC __cdecl -#define PTHREAD_LOCK_TYPE HANDLE -#define PTHREAD_HANDLE HANDLE - - -/* Useless - see '__declspec( thread )' */ -/* -#define PTHREAD_KEY_TYPE void* -#define PTHREAD_KEY_CREATE(ptrkey, uninit) do { *(ptrkey)=(void*)NULL; } while(0) -#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0) -#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0) -#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0) -*/ - -#else - -#define PTHREAD_RETURN NULL -#define PTHREAD_TYPE void* -#define PTHREAD_TYPE_FNC -#define PTHREAD_LOCK_TYPE pthread_mutex_t -#define PTHREAD_KEY_TYPE pthread_key_t -#define PTHREAD_KEY_CREATE(ptrkey, uninit) pthread_key_create(ptrkey, uninit) -#define PTHREAD_KEY_DELETE(key) pthread_key_delete(key) -#define PTHREAD_KEY_SET(key, val, ptrtype) pthread_setspecific(key, (void*)val) -#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)pthread_getspecific(key); } while(0) -#define PTHREAD_HANDLE pthread_t - +#ifndef USE_BEGINTHREAD +#error needs USE_BEGINTHREAD #endif -#else - -#define PTHREAD_LOCK_TYPE void* -#define PTHREAD_KEY_TYPE void* -#define PTHREAD_KEY_CREATE(ptrkey, uninit) do { *(ptrkey)=(void*)NULL; } while(0) -#define PTHREAD_KEY_DELETE(key) do { key=(void*)NULL; } while(0) -#define PTHREAD_KEY_SET(key, val, ptrtype) do { key=(void*)(val); } while(0) -#define PTHREAD_KEY_GET(key, ptrval, ptrtype) do { *(ptrval)=(ptrtype)(key); } while(0) -#define PTHREAD_HANDLE void - +/* Forward definition */ +#ifndef HTS_DEF_FWSTRUCT_htsmutex_s +#define HTS_DEF_FWSTRUCT_htsmutex_s +typedef struct htsmutex_s htsmutex_s, *htsmutex; #endif +#define HTSMUTEX_INIT NULL + +#ifdef _WIN32 +struct htsmutex_s { + HANDLE handle; +}; +#else /* #ifdef _WIN32 */ +struct htsmutex_s { + pthread_mutex_t handle; +}; +#endif /* #ifdef _WIN32 */ /* Library internal definictions */ -HTSEXT_API int hts_newthread( PTHREAD_TYPE ( PTHREAD_TYPE_FNC *start_address )( void * ), unsigned stack_size, void *arglist ); +HTSEXT_API int hts_newthread( void (*fun)(void *arg), void *arg); +#ifndef HTTRACK_DEFLIB HTSEXT_API void htsthread_wait(void ); +#endif HTSEXT_API void htsthread_wait_n(int n_wait); -#ifdef HTS_INTERNAL_BYTECODE -HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock); -HTSEXT_API void htsthread_init(void ); -HTSEXT_API void htsthread_uninit(void ); - -#if USE_PTHREAD -// unsigned long _beginthread( void* ( *start_address )( void * ), unsigned stack_size, void *arglist ); +/* Locking functions */ +HTSEXT_API void hts_mutexinit(htsmutex* mutex); +HTSEXT_API void hts_mutexfree(htsmutex* mutex); +HTSEXT_API void hts_mutexlock(htsmutex* mutex); +HTSEXT_API void hts_mutexrelease(htsmutex* mutex); -#endif +#ifdef HTS_INTERNAL_BYTECODE +/* Thread initialization */ +HTSEXT_API void htsthread_init(void); +HTSEXT_API void htsthread_uninit(void); #endif #endif - diff --git a/src/htstools.c b/src/htstools.c index ee83b09..69b6b3b 100644 --- a/src/htstools.c +++ b/src/htstools.c @@ -38,22 +38,91 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE -#include "htstools.h" - -/* specific definitions */ -#include "htsbase.h" -#include <ctype.h> /* String */ +#include <ctype.h> +#include "htscore.h" +#include "htstools.h" #include "htsstrings.h" -/* END specific definitions */ +#ifdef _WIN32 +#include "windows.h" +#else +#include <dirent.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <sys/stat.h> +#endif + +// Portable directory find functions +#ifndef HTS_DEF_FWSTRUCT_find_handle_struct +#define HTS_DEF_FWSTRUCT_find_handle_struct +typedef struct find_handle_struct find_handle_struct; +#endif +#ifdef _WIN32 +struct find_handle_struct { + WIN32_FIND_DATAA hdata; + HANDLE handle; +}; +#else +struct find_handle_struct { + DIR * hdir; + struct dirent* dirp; + struct stat filestat; + char path[2048]; +}; +#endif +#ifndef HTS_DEF_FWSTRUCT_topindex_chain +#define HTS_DEF_FWSTRUCT_topindex_chain +typedef struct topindex_chain topindex_chain; +#endif +struct topindex_chain { + int level; /* sort level */ + char* category; /* category */ + char name[2048]; /* path */ + struct topindex_chain* next; /* next element */ +}; +/* Tools */ + +static int ehexh(char c) { + if ((c>='0') && (c<='9')) return c-'0'; + if ((c>='a') && (c<='f')) c-=('a'-'A'); + if ((c>='A') && (c<='F')) return (c-'A'+10); + return 0; +} + +static int ehex(char* s) { + return 16*ehexh(*s)+ehexh(*(s+1)); +} + +static void unescapehttp(char* s, String* tempo) { + int i; + for (i=0;i<(int) strlen(s);i++) { + if (s[i]=='%' && s[i+1]=='%') { + i++; + StringAddchar(*tempo, '%'); + } else if (s[i]=='%') { + char hc; + i++; + hc = (char) ehex(s+i); + StringAddchar(*tempo, (char) hc); + i++; // sauter 2 caractères finalement + } + else if (s[i]=='+') { + StringAddchar(*tempo, ' '); + } + else + StringAddchar(*tempo, s[i]); + } +} + // forme à partir d'un lien et du contexte (origin_fil et origin_adr d'où il est tiré) adr et fil // [adr et fil sont des buffers de 1ko] // 0 : ok // -1 : erreur // -2 : protocole non supporté (ftp) -int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,char* fil) { +int ident_url_relatif(const char *lien,const char* origin_adr,const char* origin_fil,char* adr,char* fil) { int ok=0; int scheme=0; @@ -64,7 +133,7 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha // Scheme? { - char* a=lien; + const char* a=lien; while (isalpha((unsigned char)*a)) a++; if (*a == ':') @@ -118,8 +187,6 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha )) { ok=-1; // unknown scheme } else { // c'est un lien relatif - char* a; - // On forme l'URL complète à partie de l'url actuelle // et du chemin actuel si besoin est. @@ -158,7 +225,7 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha if (a) *a='\0'; strcatbuff(fil,lien); } else { - a=strchr(origin_fil,'?'); + const char *a=strchr(origin_fil,'?'); if (a == NULL) a=origin_fil+strlen(origin_fil); while((*a!='/') && ( a > origin_fil) ) a--; if (*a=='/') { // ok on a un '/' @@ -210,7 +277,7 @@ int ident_url_relatif(char *lien,char* origin_adr,char* origin_fil,char* adr,cha // créer dans s, à partir du chemin courant curr_fil, le lien vers link (absolu) // un ident_url_relatif a déja été fait avant, pour que link ne soit pas un chemin relatif -int lienrelatif(char* s,char* link,char* curr_fil) { +int lienrelatif(char* s,const char* link,const char* curr_fil) { char BIGSTK _curr[HTS_URLMAXSIZE*2]; char BIGSTK newcurr_fil[HTS_URLMAXSIZE*2],newlink[HTS_URLMAXSIZE*2]; char* curr; @@ -222,13 +289,16 @@ int lienrelatif(char* s,char* link,char* curr_fil) { // // patch: éliminer les ? (paramètres) sinon bug - if ( (a=strchr(curr_fil,'?')) ) { - strncatbuff(newcurr_fil,curr_fil,(int) (a - curr_fil)); - curr_fil = newcurr_fil; - } - if ( (a=strchr(link,'?')) ) { - strncatbuff(newlink,link,(int) (a - link)); - link = newlink; + { + const char* a; + if ( (a=strchr(curr_fil,'?')) ) { + strncatbuff(newcurr_fil,curr_fil,(int) (a - curr_fil)); + curr_fil = newcurr_fil; + } + if ( (a=strchr(link,'?')) ) { + strncatbuff(newlink,link,(int) (a - link)); + link = newlink; + } } // recopier uniquement le chemin courant @@ -244,7 +314,7 @@ int lienrelatif(char* s,char* link,char* curr_fil) { // sauter ce qui est commun aux 2 chemins { - char *l,*c; + const char *l,*c; if (*link=='/') link++; // sauter slash if (*curr=='/') curr++; l=link; @@ -279,8 +349,8 @@ int lienrelatif(char* s,char* link,char* curr_fil) { } /* Is the link absolute (http://www..) or relative (/bar/foo.html) ? */ -int link_has_authority(char* lien) { - char* a=lien; +int link_has_authority(const char* lien) { + const char* a=lien; if (isalpha((unsigned char)*a)) { // Skip scheme? while (isalpha((unsigned char)*a)) @@ -295,10 +365,10 @@ int link_has_authority(char* lien) { return 0; } -int link_has_authorization(char* lien) { - char* adr = jump_protocol(lien); - char* firstslash = strchr(adr, '/'); - char* detect = strchr(adr, '@'); +int link_has_authorization(const char* lien) { + const char* adr = jump_protocol(lien); + const char* firstslash = strchr(adr, '/'); + const char* detect = strchr(adr, '@'); if (firstslash) { if (detect) { return (detect < firstslash); @@ -332,7 +402,8 @@ void long_to_83(int mode,char* n83,char* save) { // conversion nom de fichier/dossier isolé vers 8-3 ou ISO9660 void longfile_to_83(int mode,char* n83,char* save) { - int i=0,j=0,max=0; + int j=0,max=0; + int i = 0; char nom[256]; char ext[256]; nom[0]=ext[0]='\0'; @@ -391,7 +462,7 @@ void longfile_to_83(int mode,char* n83,char* save) { } // recopier nom nom[i]='\0'; if (save[j]) { // il reste au moins un point - i=strlen(save)-1; + i = (int) strlen(save)-1; while((i>0) && (save[i]!='.') && (save[i]!='/')) i--; // rechercher dernier . if (save[i]=='.') { // point! int j=0; @@ -410,33 +481,32 @@ void longfile_to_83(int mode,char* n83,char* save) { } // écrire backblue.gif -int verif_backblue(httrackp* opt,char* base) { - int* done; +int verif_backblue(httrackp* opt, const char* base) { + int* done = &opt->state.verif_backblue_done; int ret=0; - NOSTATIC_RESERVE(done, int, 1); // if (!base) { // init *done=0; return 0; } if ( (!*done) - || (fsize(fconcat(base,"backblue.gif")) != HTS_DATA_BACK_GIF_LEN)) { - FILE* fp = filecreate(fconcat(base,"backblue.gif")); + || (fsize(fconcat(OPT_GET_BUFF(opt), base,"backblue.gif")) != HTS_DATA_BACK_GIF_LEN)) { + FILE* fp = filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), base,"backblue.gif")); *done=1; if (fp) { if (fwrite(HTS_DATA_BACK_GIF,HTS_DATA_BACK_GIF_LEN,1,fp) != HTS_DATA_BACK_GIF_LEN) ret=1; fclose(fp); - usercommand(opt,0,NULL,fconcat(base,"backblue.gif"),"",""); + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), base,"backblue.gif"),"",""); } else ret=1; // - fp = filecreate(fconcat(base,"fade.gif")); + fp = filecreate(&opt->state.strc, fconcat(OPT_GET_BUFF(opt), base,"fade.gif")); if (fp) { if (fwrite(HTS_DATA_FADE_GIF,HTS_DATA_FADE_GIF_LEN,1,fp) != HTS_DATA_FADE_GIF_LEN) ret=1; fclose(fp); - usercommand(opt,0,NULL,fconcat(base,"fade.gif"),"",""); + usercommand(opt,0,NULL,fconcat(OPT_GET_BUFF(opt), base,"fade.gif"),"",""); } else ret=1; } @@ -444,9 +514,8 @@ int verif_backblue(httrackp* opt,char* base) { } // flag -int verif_external(int nb,int test) { - int* status; - NOSTATIC_RESERVE(status, int, 2); +int verif_external(httrackp* opt,int nb,int test) { + int* status = &opt->state.verif_external_status; if (!test) status[nb]=0; // reset else if (!status[nb]) { @@ -580,10 +649,10 @@ HTS_INLINE int check_tag(char* from,const char* tag) { } // teste si un fichier dépasse le quota -int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type) { +int istoobig(httrackp *opt,LLint size,LLint maxhtml,LLint maxnhtml,char* type) { int ok=1; if (size>0) { - if (is_hypertext_mime(type, "")) { + if (is_hypertext_mime(opt,type, "")) { if (maxhtml>0) { if (size>maxhtml) ok=0; @@ -612,19 +681,20 @@ static int sortTopIndexFnc(const void * a_, const void * b_) { return cmp; } -HTSEXT_API char* hts_getcategory(char* filename); +HTSEXT_API char* hts_getcategory(const char* filename); -HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { +HTSEXT_API int hts_buildtopindex(httrackp* opt,const char* path,const char* binpath) { FILE* fpo; int retval=0; char BIGSTK rpath[1024*2]; char *toptemplate_header=NULL,*toptemplate_body=NULL,*toptemplate_footer=NULL,*toptemplate_bodycat=NULL; - + char catbuff[CATBUFF_SIZE]; + // et templates html - toptemplate_header=readfile_or(fconcat(binpath,"templates/topindex-header.html"),HTS_INDEX_HEADER); - toptemplate_body=readfile_or(fconcat(binpath,"templates/topindex-body.html"),HTS_INDEX_BODY); - toptemplate_bodycat=readfile_or(fconcat(binpath,"templates/topindex-bodycat.html"),HTS_INDEX_BODYCAT); - toptemplate_footer=readfile_or(fconcat(binpath,"templates/topindex-footer.html"),HTS_INDEX_FOOTER); + toptemplate_header=readfile_or(fconcat(catbuff, binpath,"templates/topindex-header.html"),HTS_INDEX_HEADER); + toptemplate_body=readfile_or(fconcat(catbuff, binpath,"templates/topindex-body.html"),HTS_INDEX_BODY); + toptemplate_bodycat=readfile_or(fconcat(catbuff, binpath,"templates/topindex-bodycat.html"),HTS_INDEX_BODYCAT); + toptemplate_footer=readfile_or(fconcat(catbuff, binpath,"templates/topindex-footer.html"),HTS_INDEX_FOOTER); if (toptemplate_header && toptemplate_body && toptemplate_footer && toptemplate_bodycat) { @@ -634,11 +704,10 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { rpath[strlen(rpath)-1]='\0'; } - fpo=fopen(fconcat(rpath,"/index.html"),"wb"); + fpo=fopen(fconcat(catbuff, rpath,"/index.html"),"wb"); if (fpo) { - String iname = STRING_EMPTY; find_handle h; - verif_backblue(opt,concat(rpath,"/")); // générer gif + verif_backblue(opt,concat(catbuff, rpath,"/")); // générer gif // Header fprintf(fpo,toptemplate_header, "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->" @@ -653,20 +722,20 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { int chainSize = 0; do { if (hts_findisdir(h)) { - StringStrcpy(iname,rpath); - StringStrcat(iname,"/"); - StringStrcat(iname,hts_findgetname(h)); - StringStrcat(iname,"/index.html"); + StringCopy(iname,rpath); + StringCat(iname,"/"); + StringCat(iname,hts_findgetname(h)); + StringCat(iname,"/index.html"); if (fexist(StringBuff(iname))) { int level = 0; char* category = NULL; struct topindex_chain * oldchain=chain; /* Check for an existing category */ - StringStrcpy(iname,rpath); - StringStrcat(iname,"/"); - StringStrcat(iname,hts_findgetname(h)); - StringStrcat(iname,"/hts-cache/winprofile.ini"); + StringCopy(iname,rpath); + StringCat(iname,"/"); + StringCat(iname,hts_findgetname(h)); + StringCat(iname,"/hts-cache/winprofile.ini"); if (fexist(StringBuff(iname))) { category = hts_getcategory(StringBuff(iname)); if (category != NULL) { @@ -774,7 +843,7 @@ HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath) { return retval; } -HTSEXT_API char* hts_getcategory(char* filename) { +HTSEXT_API char* hts_getcategory(const char* filename) { String categ = STRING_EMPTY; if (fexist(filename)) { FILE* fp = fopen(filename, "rb"); @@ -793,7 +862,7 @@ HTSEXT_API char* hts_getcategory(char* filename) { fclose(fp); } } - return StringBuff(categ); + return StringBuffRW(categ); } HTSEXT_API char* hts_getcategories(char* path, int type) { @@ -809,21 +878,19 @@ HTSEXT_API char* hts_getcategories(char* path, int type) { } h = hts_findfirst(rpath); if (h) { - struct topindex_chain * chain=NULL; - struct topindex_chain * startchain=NULL; String iname = STRING_EMPTY; if (type == 1) { hashCateg = inthash_new(127); - StringStrcat(categ, "Test category 1"); - StringStrcat(categ, "\r\nTest category 2"); + StringCat(categ, "Test category 1"); + StringCat(categ, "\r\nTest category 2"); } do { if (hts_findisdir(h)) { char BIGSTK line2[1024]; - StringStrcpy(iname,rpath); - StringStrcat(iname,"/"); - StringStrcat(iname,hts_findgetname(h)); - StringStrcat(iname,"/hts-cache/winprofile.ini"); + StringCopy(iname,rpath); + StringCat(iname,"/"); + StringCat(iname,hts_findgetname(h)); + StringCat(iname,"/hts-cache/winprofile.ini"); if (fexist(StringBuff(iname))) { if (type == 1) { FILE* fp = fopen(StringBuff(iname), "rb"); @@ -837,7 +904,7 @@ HTSEXT_API char* hts_getcategories(char* path, int type) { if (!inthash_read(hashCateg, line2+9, NULL)) { inthash_write(hashCateg, line2+9, 0); if (StringLength(categ) > 0) { - StringStrcat(categ, "\r\n"); + StringCat(categ, "\r\n"); } unescapehttp(line2+9, &categ); } @@ -851,9 +918,9 @@ HTSEXT_API char* hts_getcategories(char* path, int type) { } } else { if (StringLength(profiles) > 0) { - StringStrcat(profiles, "\r\n"); + StringCat(profiles, "\r\n"); } - StringStrcat(profiles, hts_findgetname(h)); + StringCat(profiles, hts_findgetname(h)); } } @@ -867,9 +934,9 @@ HTSEXT_API char* hts_getcategories(char* path, int type) { hashCateg = NULL; } if (type == 1) - return StringBuff(categ); + return StringBuffRW(categ); else - return StringBuff(profiles); + return StringBuffRW(profiles); } @@ -895,7 +962,7 @@ HTSEXT_API find_handle hts_findfirst(char* path) { find_handle_struct* find = (find_handle_struct*) calloc(1,sizeof(find_handle_struct)); if (find) { memset(find, 0, sizeof(find_handle_struct)); -#if HTS_WIN +#ifdef _WIN32 { char BIGSTK rpath[1024*2]; strcpybuff(rpath,path); @@ -931,14 +998,15 @@ HTSEXT_API find_handle hts_findfirst(char* path) { HTSEXT_API int hts_findnext(find_handle find) { if (find) { -#if HTS_WIN +#ifdef _WIN32 if ( (FindNextFileA(find->handle,&find->hdata))) return 1; #else + char catbuff[CATBUFF_SIZE]; memset(&(find->filestat), 0, sizeof(find->filestat)); if ((find->dirp=readdir(find->hdir))) if (find->dirp->d_name) - if (!stat(concat(find->path,find->dirp->d_name),&find->filestat)) + if (!stat(concat(catbuff, find->path,find->dirp->d_name),&find->filestat)) return 1; #endif } @@ -947,7 +1015,7 @@ HTSEXT_API int hts_findnext(find_handle find) { HTSEXT_API int hts_findclose(find_handle find) { if (find) { -#if HTS_WIN +#ifdef _WIN32 if (find->handle) { FindClose(find->handle); find->handle=NULL; @@ -965,7 +1033,7 @@ HTSEXT_API int hts_findclose(find_handle find) { HTSEXT_API char* hts_findgetname(find_handle find) { if (find) { -#if HTS_WIN +#ifdef _WIN32 return find->hdata.cFileName; #else if (find->dirp) @@ -977,7 +1045,7 @@ HTSEXT_API char* hts_findgetname(find_handle find) { HTSEXT_API int hts_findgetsize(find_handle find) { if (find) { -#if HTS_WIN +#ifdef _WIN32 return find->hdata.nFileSizeLow; #else return find->filestat.st_size; @@ -989,7 +1057,7 @@ HTSEXT_API int hts_findgetsize(find_handle find) { HTSEXT_API int hts_findisdir(find_handle find) { if (find) { if (!hts_findissystem(find)) { -#if HTS_WIN +#ifdef _WIN32 if (find->hdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) return 1; #else @@ -1003,7 +1071,7 @@ HTSEXT_API int hts_findisdir(find_handle find) { HTSEXT_API int hts_findisfile(find_handle find) { if (find) { if (!hts_findissystem(find)) { -#if HTS_WIN +#ifdef _WIN32 if (!(find->hdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) return 1; #else @@ -1016,7 +1084,7 @@ HTSEXT_API int hts_findisfile(find_handle find) { } HTSEXT_API int hts_findissystem(find_handle find) { if (find) { -#if HTS_WIN +#ifdef _WIN32 if (find->hdata.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_TEMPORARY)) return 1; else if ( (!strcmp(find->hdata.cFileName,"..")) || (!strcmp(find->hdata.cFileName,".")) ) diff --git a/src/htstools.h b/src/htstools.h index 90d5d7b..f2dceb8 100644 --- a/src/htstools.h +++ b/src/htstools.h @@ -40,49 +40,25 @@ Please visit our Website: http://www.httrack.com #define HTSTOOLS_DEFH /* specific definitions */ -#include "htsbase.h" -#include "htscore.h" - -#ifdef _WIN32 -#else -#include <dirent.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#include <sys/stat.h> -#endif - -#ifndef HTTRACK_DEFLIB +#include "htsglobal.h" -// Portable directory find functions -#ifdef _WIN32 -typedef struct find_handle_struct { - WIN32_FIND_DATAA hdata; - HANDLE handle; -} find_handle_struct; -#else -typedef struct find_handle_struct { - DIR * hdir; - struct dirent* dirp; - struct stat filestat; - char path[2048]; -} find_handle_struct; +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; #endif +#ifndef HTS_DEF_FWSTRUCT_find_handle_struct +#define HTS_DEF_FWSTRUCT_find_handle_struct +typedef struct find_handle_struct find_handle_struct; typedef find_handle_struct* find_handle; -typedef struct topindex_chain { - int level; /* sort level */ - char* category; /* category */ - char name[2048]; /* path */ - struct topindex_chain* next; /* next element */ -} topindex_chain ; #endif /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -int ident_url_relatif(char *lien,char* urladr,char* urlfil,char* adr,char* fil); -int lienrelatif(char* s,char* link,char* curr); -int link_has_authority(char* lien); -int link_has_authorization(char* lien); +int ident_url_relatif(const char *lien, const char* urladr, const char* urlfil, char* adr, char* fil); +int lienrelatif(char* s,const char* link,const char* curr); +int link_has_authority(const char* lien); +int link_has_authorization(const char* lien); void long_to_83(int mode,char* n83,char* save); void longfile_to_83(int mode,char* n83,char* save); HTS_INLINE int __rech_tageq(const char* adr,const char* s); @@ -112,11 +88,11 @@ HTS_INLINE int rech_tageq_all(const char* adr, const char* s); HTS_INLINE int rech_sampletag(const char* adr,const char* s); HTS_INLINE int rech_endtoken(const char* adr, const char** start); HTS_INLINE int check_tag(char* from,const char* tag); -int verif_backblue(httrackp* opt,char* base); -int verif_external(int nb,int test); +int verif_backblue(httrackp* opt, const char* base); +int verif_external(httrackp *opt,int nb,int test); -int istoobig(LLint size,LLint maxhtml,LLint maxnhtml,char* type); -HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath); +int istoobig(httrackp *opt,LLint size,LLint maxhtml,LLint maxnhtml,char* type); +HTSEXT_API int hts_buildtopindex(httrackp* opt,const char* path,const char* binpath); // Portable directory find functions // Directory find functions @@ -130,6 +106,11 @@ HTSEXT_API int hts_findisdir(find_handle find); HTSEXT_API int hts_findisfile(find_handle find); HTSEXT_API int hts_findissystem(find_handle find); +#ifndef HTTRACK_DEFLIB +HTSEXT_API char* hts_getcategory(const char* filename); +HTSEXT_API char* hts_getcategories(char* path, int type); +#endif + #endif #endif diff --git a/src/htsweb.c b/src/htsweb.c index 51c85df..a713fe3 100644 --- a/src/htsweb.c +++ b/src/htsweb.c @@ -53,8 +53,10 @@ Please visit our Website: http://www.httrack.com #endif // htswrap_add #include "htsglobal.h" +#include "htsbasenet.h" #include "htswrap.h" #include "httrack-library.h" +#include "htsdefines.h" /* Threads */ #include "htsthread.h" @@ -71,7 +73,7 @@ Please visit our Website: http://www.httrack.com #error fatal: no threads support #endif -#if HTS_WIN +#ifdef _WIN32 #ifndef __cplusplus // DOS #include <process.h> /* _beginthread, _endthread */ @@ -79,7 +81,7 @@ Please visit our Website: http://www.httrack.com #else #endif -static PTHREAD_LOCK_TYPE refreshMutex; +static htsmutex refreshMutex = HTSMUTEX_INIT; static int help_server(char* dest_path, int defaultPort); extern int commandRunning; @@ -128,7 +130,6 @@ int main(int argc, char* argv[]) /* init and launch */ hts_init(); htslang_init(); - webhttrack_lock(-999); /* set general keys */ #ifdef HTS_ETCPATH @@ -172,7 +173,7 @@ int main(int argc, char* argv[]) smallserver_setkey("HTTRACK_AFF_VERSION", HTTRACK_AFF_VERSION); { char tmp[32]; - sprintf(tmp, "%d", HTS_PLATFORM); + sprintf(tmp, "%d", -1); smallserver_setkey("HTS_PLATFORM", tmp); } smallserver_setkey("HTTRACK_WEB", HTTRACK_WEB); @@ -181,7 +182,7 @@ int main(int argc, char* argv[]) { char buff[1024]; char digest[32 + 2]; - srand(time(NULL)); + srand((unsigned int)time(NULL)); sprintf(buff, "%d-%d", (int)time(NULL), (int)rand()); domd5mem(buff,strlen(buff),digest,1); smallserver_setkey("sid", digest); @@ -192,7 +193,7 @@ int main(int argc, char* argv[]) for(i = 2 ; i < argc ; i += 2) { if (strcmp(argv[i], "--port") == 0) { if (sscanf(argv[i + 1], "%d", &defaultPort) != 1 || defaultPort < 0 || defaultPort >= 65535 ) { - fprintf(stderr, "couldn't set the port number to %d\n", argv[i + 1]); + fprintf(stderr, "couldn't set the port number to %s\n", argv[i + 1]); return -1; } } else { @@ -218,13 +219,15 @@ int main(int argc, char* argv[]) return ret; } -static int webhttrack_runmain(int argc, char** argv); -static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_cmd( void* pP ) { +static int webhttrack_runmain(httrackp *opt, int argc, char** argv); +static void back_launch_cmd( void* pP ) { char* cmd = (char*) pP; char** argv = (char**) malloct(1024 * sizeof(char*)); int argc = 0; int i = 0; int g = 0; + // + httrackp *opt; /* copy commandline */ if (commandReturnCmdl) @@ -253,15 +256,24 @@ static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_cmd( void* pP ) { } i++; } - + + /* init */ + hts_init(); + global_opt = opt = hts_create_opt(); + /* run */ - commandReturn = webhttrack_runmain(argc, argv); + commandReturn = webhttrack_runmain(opt, argc, argv); if (commandReturn) { if (commandReturnMsg) free(commandReturnMsg); - commandReturnMsg = strdup(hts_errmsg()); + commandReturnMsg = strdup(hts_errmsg(opt)); } + /* free */ + global_opt = NULL; + hts_free_opt(opt); + hts_uninit(); + /* okay */ commandRunning = 0; @@ -271,47 +283,53 @@ static PTHREAD_TYPE PTHREAD_TYPE_FNC back_launch_cmd( void* pP ) { /* free */ free(cmd); freet(argv); - return PTHREAD_RETURN; + return ; } void webhttrack_main(char* cmd) { commandRunning = 1; - (void)hts_newthread(back_launch_cmd, 0, (void*) strdup(cmd)); + hts_newthread(back_launch_cmd, (void*) strdup(cmd)); } -/* Internal locking */ -HTSEXT_API int htsSetLock(PTHREAD_LOCK_TYPE * hMutex,int lock); +void webhttrack_lock(void) { + hts_mutexlock(&refreshMutex); +} -void webhttrack_lock(int lock) { - htsSetLock(&refreshMutex, lock); +void webhttrack_release(void) { + hts_mutexrelease(&refreshMutex); } -static int webhttrack_runmain(int argc, char** argv) { - hts_init(); - htswrap_add("init",htsshow_init); - htswrap_add("free",htsshow_uninit); - htswrap_add("start",htsshow_start); - htswrap_add("change-options",htsshow_chopt); - htswrap_add("end",htsshow_end); - htswrap_add("preprocess-html",htsshow_preprocesshtml); - htswrap_add("check-html",htsshow_checkhtml); - htswrap_add("loop",htsshow_loop); - htswrap_add("query",htsshow_query); - htswrap_add("query2",htsshow_query2); - htswrap_add("query3",htsshow_query3); - htswrap_add("check-link",htsshow_check); - htswrap_add("check-mime",htsshow_check_mime); - htswrap_add("pause",htsshow_pause); - htswrap_add("save-file",htsshow_filesave); - htswrap_add("save-file2",htsshow_filesave2); - htswrap_add("link-detected",htsshow_linkdetected); - htswrap_add("link-detected2",htsshow_linkdetected2); - htswrap_add("transfer-status",htsshow_xfrstatus); - htswrap_add("save-name",htsshow_savename); +static int webhttrack_runmain(httrackp *opt, int argc, char** argv) { + int ret; + + CHAIN_FUNCTION(opt, init, htsshow_init, NULL); + CHAIN_FUNCTION(opt, uninit, htsshow_uninit, NULL); + CHAIN_FUNCTION(opt, start, htsshow_start, NULL); + CHAIN_FUNCTION(opt, end, htsshow_end, NULL); + CHAIN_FUNCTION(opt, chopt, htsshow_chopt, NULL); + CHAIN_FUNCTION(opt, preprocess, htsshow_preprocesshtml, NULL); + CHAIN_FUNCTION(opt, postprocess, htsshow_postprocesshtml, NULL); + CHAIN_FUNCTION(opt, check_html, htsshow_checkhtml, NULL); + CHAIN_FUNCTION(opt, query, htsshow_query, NULL); + CHAIN_FUNCTION(opt, query2, htsshow_query2, NULL); + CHAIN_FUNCTION(opt, query3, htsshow_query3, NULL); + CHAIN_FUNCTION(opt, loop, htsshow_loop, NULL); + CHAIN_FUNCTION(opt, check_link, htsshow_check, NULL); + CHAIN_FUNCTION(opt, check_mime, htsshow_check_mime, NULL); + CHAIN_FUNCTION(opt, pause, htsshow_pause, NULL); + CHAIN_FUNCTION(opt, filesave, htsshow_filesave, NULL); + CHAIN_FUNCTION(opt, filesave2, htsshow_filesave2, NULL); + CHAIN_FUNCTION(opt, linkdetected, htsshow_linkdetected, NULL); + CHAIN_FUNCTION(opt, linkdetected2, htsshow_linkdetected2, NULL); + CHAIN_FUNCTION(opt, xfrstatus, htsshow_xfrstatus, NULL); + CHAIN_FUNCTION(opt, savename, htsshow_savename, NULL); + CHAIN_FUNCTION(opt, sendhead, htsshow_sendheader, NULL); + CHAIN_FUNCTION(opt, receivehead, htsshow_receiveheader, NULL); + + ret = hts_main2(argc, argv, opt); htsthread_wait_n(1); - hts_uninit(); - return hts_main(argc,argv); - + + return ret; } static int help_server(char* dest_path, int defaultPort) { @@ -337,7 +355,8 @@ static int help_server(char* dest_path, int defaultPort) { fflush(stderr); // if (!smallserver(soc,url,method,data,dest_path)) { - fprintf(stderr, "Unable to create the server: %s\n", strerror(errno)); + int last_errno = errno; + fprintf(stderr, "Unable to create the server: %s\n", strerror(last_errno)); #ifdef _WIN32 closesocket(soc); #else @@ -362,26 +381,29 @@ static int help_server(char* dest_path, int defaultPort) { /* CALLBACK FUNCTIONS */ /* Initialize the Winsock */ -void __cdecl htsshow_init(void) { +void __cdecl htsshow_init(t_hts_callbackarg *carg) { } -void __cdecl htsshow_uninit(void) { +void __cdecl htsshow_uninit(t_hts_callbackarg *carg) { } -int __cdecl htsshow_start(httrackp* opt) { +int __cdecl htsshow_start(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsshow_chopt(httrackp* opt) { - return htsshow_start(opt); +int __cdecl htsshow_chopt(t_hts_callbackarg *carg, httrackp* opt) { + return htsshow_start(carg, opt); } -int __cdecl htsshow_end(void) { +int __cdecl htsshow_end(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { +int __cdecl htsshow_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) { + return 1; +} +int __cdecl htsshow_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) { return 1; } -int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { +int __cdecl htsshow_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file) { return 1; } -int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack +int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack static TStamp prev_mytime=0; /* ok */ static t_InpInfo SInfo; /* ok */ // @@ -407,7 +429,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, return 0; /* Lock */ - webhttrack_lock(1); + webhttrack_lock(); if (stats) { stat_written=stats->stat_files; @@ -506,22 +528,22 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, } break; case 1: - if (back[i].status==99) { + if (back[i].status==STATUS_WAIT_HEADERS) { strcpybuff(StatsBuffer[index].state,"request"); ok=1; } - else if (back[i].status==100) { + else if (back[i].status==STATUS_CONNECTING) { strcpybuff(StatsBuffer[index].state,"connect"); ok=1; } - else if (back[i].status==101) { + else if (back[i].status==STATUS_WAIT_DNS) { strcpybuff(StatsBuffer[index].state,"search"); ok=1; } - else if (back[i].status==1000) { // ohh le beau ftp + else if (back[i].status==STATUS_FTP_TRANSFER) { // ohh le beau ftp char proto[] = "ftp"; if (back[i].url_adr[0]) { char* ep = strchr(back[i].url_adr, ':'); char* eps = strchr(back[i].url_adr, '/'); int count; - if (ep != NULL && ep < eps && (count = (ep - back[i].url_adr) ) < 4) { + if (ep != NULL && ep < eps && (count = (int) (ep - back[i].url_adr) ) < 4) { proto[0] = '\0'; strncat(proto, back[i].url_adr, count); } @@ -530,8 +552,8 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, } break; default: - if (back[i].status==0) { // prêt - if ((back[i].r.statuscode==200)) { + if (back[i].status==STATUS_READY) { // prêt + if ((back[i].r.statuscode==HTTP_OK)) { strcpybuff(StatsBuffer[index].state,"ready"); ok=1; } else if ((back[i].r.statuscode>=100) && (back[i].r.statuscode<=599)) { @@ -570,7 +592,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, } } - if ((l=strlen(s))<MAX_LEN_INPROGRESS) + if ((l = (int) strlen(s))<MAX_LEN_INPROGRESS) strcpybuff(StatsBuffer[index].name,s); else { // couper @@ -584,7 +606,7 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, StatsBuffer[index].sizetot=back[i].r.totalsize; StatsBuffer[index].size=back[i].r.size; } else { // pas de taille prédéfinie - if (back[i].status==0) { // prêt + if (back[i].status==STATUS_READY) { // prêt StatsBuffer[index].sizetot=back[i].r.size; StatsBuffer[index].size=back[i].r.size; } else { @@ -604,12 +626,12 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, int parsing=0; if (commandEndRequested) smallserver_setkey("info.currentjob", "finishing pending transfers - Select [Cancel] to stop now!"); - else if (!(parsing=hts_is_parsing(-1))) + else if (!(parsing=hts_is_parsing(opt, -1))) smallserver_setkey("info.currentjob", "receiving files"); else { char tmp[1024]; tmp[0] = '\0'; - switch(hts_is_testing()) { + switch(hts_is_testing(opt)) { case 0: sprintf(tmp, "parsing HTML file (%d%%)",parsing); break; @@ -638,11 +660,12 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, int i; for(i=0;i<NStatsBuffer;i++) { if (strnotempty(StatsBuffer[i].state)) { + strc_int2bytes2 strc; smallserver_setkeyarr("info.state[", i, "]", StatsBuffer[i].state); smallserver_setkeyarr("info.name[", i, "]", StatsBuffer[i].name); smallserver_setkeyarr("info.file[", i, "]", StatsBuffer[i].file); - smallserver_setkeyarr("info.size[", i, "]", int2bytes(StatsBuffer[i].size)); - smallserver_setkeyarr("info.sizetot[", i, "]", int2bytes(StatsBuffer[i].sizetot)); + smallserver_setkeyarr("info.size[", i, "]", int2bytes(&strc,StatsBuffer[i].size)); + smallserver_setkeyarr("info.sizetot[", i, "]", int2bytes(&strc,StatsBuffer[i].sizetot)); smallserver_setkeyarr("info.url_adr[", i, "]", StatsBuffer[i].url_adr); smallserver_setkeyarr("info.url_fil[", i, "]", StatsBuffer[i].url_fil); smallserver_setkeyarr("info.url_sav[", i, "]", StatsBuffer[i].url_sav); @@ -656,50 +679,50 @@ int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n, } /* UnLock */ - webhttrack_lock(0); + webhttrack_release(); return 1; } -char* __cdecl htsshow_query(char* question) { +const char* __cdecl htsshow_query(t_hts_callbackarg *carg, httrackp *opt, const char* question) { static char s[]=""; /* ok */ return s; } -char* __cdecl htsshow_query2(char* question) { +const char* __cdecl htsshow_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question) { static char s[]=""; /* ok */ return s; } -char* __cdecl htsshow_query3(char* question) { +const char* __cdecl htsshow_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question) { static char s[]=""; /* ok */ return s; } -int __cdecl htsshow_check(char* adr,char* fil,int status) { +int __cdecl htsshow_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status) { return -1; } -int __cdecl htsshow_check_mime(char* adr,char* fil,char* mime,int status) { +int __cdecl htsshow_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status) { return -1; } -void __cdecl htsshow_pause(char* lockfile) { +void __cdecl htsshow_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile) { } -void __cdecl htsshow_filesave(char* file) { +void __cdecl htsshow_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file) { } -void __cdecl htsshow_filesave2(char* adr, char* fil, char* save, int is_new, int is_modified,int not_updated) { +void __cdecl htsshow_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* fil, const char* save, int is_new, int is_modified,int not_updated) { } -int __cdecl htsshow_linkdetected(char* link) { +int __cdecl htsshow_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link) { return 1; } -int __cdecl htsshow_linkdetected2(char* link, char* start_tag) { +int __cdecl htsshow_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag) { return 1; } -int __cdecl htsshow_xfrstatus(lien_back* back) { +int __cdecl htsshow_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, lien_back* back) { return 1; } -int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { +int __cdecl htsshow_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save) { return 1; } -int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { +int __cdecl htsshow_sendheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing) { return 1; } -int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { +int __cdecl htsshow_receiveheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming) { return 1; } diff --git a/src/htsweb.h b/src/htsweb.h index 4f9439d..d2ee716 100644 --- a/src/htsweb.h +++ b/src/htsweb.h @@ -80,31 +80,33 @@ typedef struct t_InpInfo { } t_InpInfo; // wrappers -void __cdecl htsshow_init(void); -void __cdecl htsshow_uninit(void); -int __cdecl htsshow_start(httrackp* opt); -int __cdecl htsshow_chopt(httrackp* opt); -int __cdecl htsshow_end(void); -int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); -int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); -char* __cdecl htsshow_query(char* question); -char* __cdecl htsshow_query2(char* question); -char* __cdecl htsshow_query3(char* question); -int __cdecl htsshow_check(char* adr,char* fil,int status); -int __cdecl htsshow_check_mime(char* adr,char* fil,char* mime,int status); -void __cdecl htsshow_pause(char* lockfile); -void __cdecl htsshow_filesave(char* file); -void __cdecl htsshow_filesave2(char* adr, char* fil, char* save, int is_new, int is_modified, int not_updated); -int __cdecl htsshow_linkdetected(char* link); -int __cdecl htsshow_linkdetected2(char* link, char* start_tag); -int __cdecl htsshow_xfrstatus(lien_back* back); -int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); +void __cdecl htsshow_init(t_hts_callbackarg *carg); +void __cdecl htsshow_uninit(t_hts_callbackarg *carg); +int __cdecl htsshow_start(t_hts_callbackarg *carg, httrackp* opt); +int __cdecl htsshow_chopt(t_hts_callbackarg *carg, httrackp* opt); +int __cdecl htsshow_end(t_hts_callbackarg *carg, httrackp* opt); +int __cdecl htsshow_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file); +int __cdecl htsshow_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file); +int __cdecl htsshow_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file); +int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); +const char* __cdecl htsshow_query(t_hts_callbackarg *carg, httrackp *opt, const char* question); +const char* __cdecl htsshow_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question); +const char* __cdecl htsshow_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question); +int __cdecl htsshow_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status); +int __cdecl htsshow_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status); +void __cdecl htsshow_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile); +void __cdecl htsshow_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file); +void __cdecl htsshow_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* fil, const char* save, int is_new, int is_modified, int not_updated); +int __cdecl htsshow_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link); +int __cdecl htsshow_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag); +int __cdecl htsshow_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, lien_back* back); +int __cdecl htsshow_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save); +int __cdecl htsshow_sendheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing); +int __cdecl htsshow_receiveheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming); int main(int argc, char **argv); void webhttrack_main(char* cmd); -void webhttrack_lock(int lock); +void webhttrack_lock(void); +void webhttrack_release(void); #endif diff --git a/src/htswizard.c b/src/htswizard.c index ab851bf..80cbbda 100644 --- a/src/htswizard.c +++ b/src/htswizard.c @@ -38,8 +38,8 @@ Please visit our Website: http://www.httrack.com /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE +#include "htscore.h" #include "htswizard.h" -#include "htsdefines.h" /* specific definitions */ #include "htsbase.h" @@ -48,7 +48,7 @@ Please visit our Website: http://www.httrack.com // version 1 pour httpmirror // flusher si on doit lire peu à peu le fichier -#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->errlog); } +#define test_flush if (opt->flush) { fflush(opt->log); fflush(opt->log); } // pour alléger la syntaxe, des raccourcis sont créés #define urladr (liens[ptr]->adr) @@ -126,16 +126,14 @@ int hts_acceptlink(httrackp* opt, int prev_prio = set_prio_to ? *set_prio_to : 0; // -------------------- PHASE 6 -------------------- -#if HTS_ANALYSTE - if (hts_htmlcheck_check != NULL) { - int test_url = hts_htmlcheck_check(adr, fil, forbidden_url); + { + int test_url = RUN_CALLBACK3(opt, check_link, adr, fil, forbidden_url); if (test_url != -1) { forbidden_url = test_url; if (set_prio_to) *set_prio_to = prev_prio; } } -#endif return forbidden_url; } @@ -165,7 +163,7 @@ static int hts_acceptlink_(httrackp* opt, /* Infos */ if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"wizard test begins: %s%s"LF,adr,fil); test_flush; } @@ -173,7 +171,7 @@ static int hts_acceptlink_(httrackp* opt, if (adr[0] != '\0' && fil[0] != '\0' && opt->hash != NULL - && hash_read((hash_struct*)opt->hash, adr, fil, 1, opt->urlhack) >= 0 + && hash_read(opt->hash, adr, fil, 1, opt->urlhack) >= 0 ) { return 0; /* Yokai */ } @@ -199,11 +197,11 @@ static int hts_acceptlink_(httrackp* opt, /* Doit-on traiter les non html? */ if ((opt->getmode & 2)==0) { // non on ne doit pas - if (!ishtml(fil)) { // non il ne faut pas + if (!ishtml(opt,fil)) { // non il ne faut pas //adr[0]='\0'; // ne pas traiter ce lien, pas traiter forbidden_url=1; // interdire récupération du lien if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"non-html file ignored at %s : %s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"non-html file ignored at %s : %s"LF,adr,fil); test_flush; } @@ -215,7 +213,7 @@ static int hts_acceptlink_(httrackp* opt, if ( ( liens[ptr]->depth <= 0 ) || ( liens[ptr]->depth <= 1 && !embedded_triggered ) ) { forbidden_url=1; // interdire récupération du lien if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"file from too far level ignored at %s : %s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"file from too far level ignored at %s : %s"LF,adr,fil); test_flush; } } @@ -232,7 +230,7 @@ static int hts_acceptlink_(httrackp* opt, // doit-on traiter ce lien?.. vérifier droits de déplacement meme_adresse=strfield2(adr,urladr); if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); + HTS_LOG(opt,LOG_DEBUG); if (meme_adresse) fprintf(opt->log,"Compare addresses: %s=%s"LF,adr,urladr); else @@ -253,7 +251,7 @@ static int hts_acceptlink_(httrackp* opt, if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) { if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"build relative links to test: %s %s (with %s and %s)"LF,tempo,tempo2,liens[liens[ptr]->premier]->fil,liens[ptr]->fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"build relative links to test: %s %s (with %s and %s)"LF,tempo,tempo2,liens[liens[ptr]->premier]->fil,liens[ptr]->fil); test_flush; } @@ -275,7 +273,7 @@ static int hts_acceptlink_(httrackp* opt, if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same level link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -296,14 +294,14 @@ static int hts_acceptlink_(httrackp* opt, if ((opt->seeker & 1)==0) { // interdiction de descendre forbidden_url=1; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"lower link canceled: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link canceled: %s%s"LF,adr,fil); test_flush; } } else { // autorisé à priori - NEW if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -313,7 +311,7 @@ static int hts_acceptlink_(httrackp* opt, if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"lower link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -327,14 +325,14 @@ static int hts_acceptlink_(httrackp* opt, if ((opt->seeker & 2)==0) { // interdiction de monter forbidden_url=1; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"upper link canceled: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link canceled: %s%s"LF,adr,fil); test_flush; } } else { // autorisé à monter - NEW if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -344,7 +342,7 @@ static int hts_acceptlink_(httrackp* opt, if (!liens[ptr]->link_import) { // ne résulte pas d'un 'moved' forbidden_url=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"upper link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -353,14 +351,14 @@ static int hts_acceptlink_(httrackp* opt, } else { - if (opt->errlog) { - fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil); + if (opt->log) { + fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil); test_flush; } } } else { - if (opt->errlog) { - fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil); + if (opt->log) { + fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil); test_flush; } } @@ -373,15 +371,15 @@ static int hts_acceptlink_(httrackp* opt, if (lienrelatif(tempo,fil,liens[liens[ptr]->premier]->fil)==0) { if (lienrelatif(tempo2,fil,liens[ptr]->fil)==0) { } else { - if (opt->errlog) { - fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil); + if (opt->log) { + fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[ptr]->fil); test_flush; } } } else { - if (opt->errlog) { - fprintf(opt->errlog,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil); + if (opt->log) { + fprintf(opt->log,"Error building relative link %s and %s"LF,fil,liens[liens[ptr]->premier]->fil); test_flush; } @@ -397,8 +395,8 @@ static int hts_acceptlink_(httrackp* opt, if (!opt->wizard) // mode non wizard forbidden_url=1; break; // interdicton de sortir au dela de l'adresse case 1: { // sortie sur le même dom.xxx - int i=strlen(adr)-1; - int j=strlen(urladr)-1; + size_t i = strlen(adr)-1; + size_t j = strlen(urladr)-1; while( (i>0) && (adr[i]!='.')) i--; while( (j>0) && (urladr[j]!='.')) j--; i--; j--; @@ -410,7 +408,7 @@ static int hts_acceptlink_(httrackp* opt, //printf("refused: %s\n",adr); forbidden_url=1; // pas même domaine if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"foreign domain link canceled: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"foreign domain link canceled: %s%s"LF,adr,fil); test_flush; } } @@ -419,7 +417,7 @@ static int hts_acceptlink_(httrackp* opt, if (opt->wizard) { // mode wizard forbidden_url=0; // même domaine if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"same domain link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same domain link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -430,8 +428,8 @@ static int hts_acceptlink_(httrackp* opt, } break; case 2: { // sortie sur le même .xxx - int i=strlen(adr)-1; - int j=strlen(urladr)-1; + size_t i = strlen(adr)-1; + size_t j = strlen(urladr)-1; while( (i>0) && (adr[i]!='.')) i--; while( (j>0) && (urladr[j]!='.')) j--; if ((i>0) && (j>0)) { @@ -440,7 +438,7 @@ static int hts_acceptlink_(httrackp* opt, //printf("refused: %s\n",adr); forbidden_url=1; // pas même .xx if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"foreign location link canceled: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"foreign location link canceled: %s%s"LF,adr,fil); test_flush; } } @@ -448,7 +446,7 @@ static int hts_acceptlink_(httrackp* opt, if (opt->wizard) { // mode wizard forbidden_url=0; // même domaine if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"same location link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"same location link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -471,12 +469,12 @@ static int hts_acceptlink_(httrackp* opt, // récupérer les liens à côtés d'un lien (nearlink) (nvelle pos) if (forbidden_url != 0 && opt->nearlink) { - if (!ishtml(fil)) { // non html + if (!ishtml(opt,fil)) { // non html //printf("ok %s%s\n",ad,fil); forbidden_url=0; // autoriser may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"near link authorized: %s%s"LF,adr,fil); test_flush; } } @@ -489,7 +487,7 @@ static int hts_acceptlink_(httrackp* opt, forbidden_url=0; // autoriser may_set_prio_to=1+1; // set prio to 1 (parse but skip urls) if near is the winner if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"near link authorized (friendly tag): %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"near link authorized (friendly tag): %s%s"LF,adr,fil); test_flush; } } @@ -549,9 +547,9 @@ static int hts_acceptlink_(httrackp* opt, question=0; // résolution auto if ((opt->debug>1) && (opt->log!=NULL)) { if (question) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) ambiguous link accepted (external depth): link %s at %s%s"LF,l,urladr,urlfil); } else { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) forced to accept link (external depth): link %s at %s%s"LF,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) forced to accept link (external depth): link %s at %s%s"LF,l,urladr,urlfil); } test_flush; } @@ -591,7 +589,7 @@ static int hts_acceptlink_(httrackp* opt, forbidden_url=0; // URL autorisée may_set_prio_to=0; // clear may-set flag if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit authorized (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); test_flush; } } else if (jok == -1) { // forbidden @@ -599,7 +597,7 @@ static int hts_acceptlink_(httrackp* opt, question=0; // ne pas poser de question: forbidden_url=1; // URL interdite if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit forbidden (%s) link: link %s at %s%s"LF,mdepth,l,urladr,urlfil); test_flush; } } // sinon on touche à rien @@ -615,7 +613,7 @@ static int hts_acceptlink_(httrackp* opt, question=1; // résolution auto force_mirror=5; // mirror (5) if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit mirror link: link %s at %s%s"LF,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit mirror link: link %s at %s%s"LF,l,urladr,urlfil); test_flush; } } @@ -629,7 +627,7 @@ static int hts_acceptlink_(httrackp* opt, question=0; forbidden_url=1; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) ambiguous forbidden link: link %s at %s%s"LF,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) ambiguous forbidden link: link %s at %s%s"LF,l,urladr,urlfil); test_flush; } } @@ -647,7 +645,7 @@ static int hts_acceptlink_(httrackp* opt, r=0; // annuler interdiction des robots if (!forbidden_url) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Warning link followed against robots.txt: link %s at %s%s"LF,l,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Warning link followed against robots.txt: link %s at %s%s"LF,l,adr,fil); test_flush; } } @@ -656,7 +654,7 @@ static int hts_acceptlink_(httrackp* opt, forbidden_url=1; question=0; if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(robots.txt) forbidden link: link %s at %s%s"LF,l,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(robots.txt) forbidden link: link %s at %s%s"LF,l,adr,fil); test_flush; } } @@ -666,9 +664,9 @@ static int hts_acceptlink_(httrackp* opt, if (!question) { if ((opt->debug>1) && (opt->log!=NULL)) { if (!forbidden_url) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) shared foreign domain link: link %s at %s%s"LF,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) shared foreign domain link: link %s at %s%s"LF,l,urladr,urlfil); } else { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) cancelled foreign domain link: link %s at %s%s"LF,l,urladr,urlfil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) cancelled foreign domain link: link %s at %s%s"LF,l,urladr,urlfil); } test_flush; } @@ -681,11 +679,7 @@ static int hts_acceptlink_(httrackp* opt, /* en cas de question, ou lien primaire (enregistrer autorisations) */ if (question || (ptr==0)) { -#if HTS_ANALYSTE - char* s; -#else - char s[4]; -#endif + const char* s; int n=0; // si primaire (plus bas) alors ... @@ -704,23 +698,15 @@ static int hts_acceptlink_(httrackp* opt, HT_PRINT("5 Mirror this link (useful)"LF); HT_PRINT("6 Mirror links located in the same domain"LF); HT_PRINT(LF); -//#if HTS_ANALYSTE!=2 -//HT_PRINT("! View extract of html code where the link is located"LF); -//#endif HTS_REQUEST_END; -#if HTS_ANALYSTE { char BIGSTK tempo[HTS_URLMAXSIZE*2]; tempo[0]='\0'; strcatbuff(tempo,adr); strcatbuff(tempo,"/"); strcatbuff(tempo,fil); - s=hts_htmlcheck_query3(tempo); + s = RUN_CALLBACK1(opt, query3, tempo); } -#else - do { - io_flush; linput(stdin,s,2); -#endif if (strnotempty(s)==0) // entrée n=0; else if (isdigit((unsigned char)*s)) @@ -745,10 +731,6 @@ static int hts_acceptlink_(httrackp* opt, } } -#if HTS_ANALYSTE -#else - } while(n==-999); -#endif io_flush; } else { // lien primaire: autoriser répertoire entier if (!force_mirror) { @@ -767,9 +749,9 @@ static int hts_acceptlink_(httrackp* opt, if (filters_init(&_FILTERS, opt->maxfilter, HTS_FILTERSINC) == 0) { printf("PANIC! : Too many filters : >%d [%d]\n", (*_FILTERS_PTR),__LINE__); fflush(stdout); - if (opt->errlog) { - fprintf(opt->errlog,LF"Too many filters, giving up..(>%d)"LF, (*_FILTERS_PTR) ); - fprintf(opt->errlog,"To avoid that: use #F option for more filters (example: -#F5000)"LF); + if (opt->log) { + fprintf(opt->log,LF"Too many filters, giving up..(>%d)"LF, (*_FILTERS_PTR) ); + fprintf(opt->log,"To avoid that: use #F option for more filters (example: -#F5000)"LF); test_flush; } assertf("too many filters - giving up" == NULL); // wild.. @@ -794,7 +776,7 @@ static int hts_acceptlink_(httrackp* opt, case 1: // éliminer répertoire entier et sous rép: adr/path/ * forbidden_url=1; { - int i=strlen(fil)-1; + size_t i = strlen(fil)-1; while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 @@ -851,7 +833,7 @@ static int hts_acceptlink_(httrackp* opt, case 5: // autoriser répertoire entier et fils if ((opt->seeker & 2)==0) { // interdiction de monter - int i=strlen(fil)-1; + size_t i = strlen(fil)-1; while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 @@ -878,7 +860,7 @@ static int hts_acceptlink_(httrackp* opt, // case 7: // autoriser ce répertoire { - int i=strlen(fil)-1; + size_t i = strlen(fil)-1; while((fil[i]!='/') && (i>0)) i--; if (fil[i]=='/') { HT_INSERT_FILTERS0; // insérer en 0 @@ -913,7 +895,7 @@ static int hts_acceptlink_(httrackp* opt, forbidden_url=1; // oui oui toujours interdit (note: sert à rien car ==1 mais c pour comprendre) *just_test_it=1; // mais on teste if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"Testing link %s%s"LF,adr,fil); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"Testing link %s%s"LF,adr,fil); } } } @@ -953,26 +935,24 @@ int hts_acceptmime(httrackp* opt, if (jok == 1) { // autorisé forbidden_url=0; // URL autorisée if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit authorized (%s) link %s%s: mime '%s'"LF,mdepth,adr,fil,mime); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit authorized (%s) link %s%s: mime '%s'"LF,mdepth,adr,fil,mime); test_flush; } } else if (jok == -1) { // forbidden forbidden_url=1; // URL interdite if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"(wizard) explicit forbidden (%s) link %s%s: mime '%s'"LF,mdepth,adr,fil,mime); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"(wizard) explicit forbidden (%s) link %s%s: mime '%s'"LF,mdepth,adr,fil,mime); test_flush; } } // sinon on touche à rien } /* userdef test */ -#if HTS_ANALYSTE - if (hts_htmlcheck_check_mime != NULL) { - int test_url=hts_htmlcheck_check_mime(adr,fil,mime,forbidden_url); + { + int test_url = RUN_CALLBACK4(opt, check_mime, adr, fil, mime, forbidden_url); if (test_url!=-1) { forbidden_url=test_url; } - } -#endif + } return forbidden_url; #undef _FILTERS #undef _FILTERS_PTR @@ -1035,12 +1015,12 @@ int hts_testlinksize(httrackp* opt, // log if (jok==1) { if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File confirmed (size test): %s%s ("LLintP")"LF,adr,fil,(LLint)(size)); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File confirmed (size test): %s%s ("LLintP")"LF,adr,fil,(LLint)(size)); } } else if (jok==-1) { if (size_flag) { /* interdit à cause de la taille */ if ((opt->debug>1) && (opt->log!=NULL)) { - fspc(opt->log,"debug"); fprintf(opt->log,"File cancelled due to its size: %s%s ("LLintP", limit: "LLintP")"LF,adr,fil,(LLint)(size),(LLint)(sz)); + HTS_LOG(opt,LOG_DEBUG); fprintf(opt->log,"File cancelled due to its size: %s%s ("LLintP", limit: "LLintP")"LF,adr,fil,(LLint)(size),(LLint)(sz)); } } else { jok=1; diff --git a/src/htswizard.h b/src/htswizard.h index 7236573..0f94eeb 100644 --- a/src/htswizard.h +++ b/src/htswizard.h @@ -38,11 +38,21 @@ Please visit our Website: http://www.httrack.com #ifndef HTSWIZARD_DEFH #define HTSWIZARD_DEFH -#include "htscore.h" - - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE + +#include "htsglobal.h" + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_lien_url +#define HTS_DEF_FWSTRUCT_lien_url +typedef struct lien_url lien_url; +#endif + int hts_acceptlink(httrackp* opt, int ptr,int lien_tot,lien_url** liens, char* adr,char* fil, diff --git a/src/htswrap.c b/src/htswrap.c index 3150f1d..ccb83fb 100644 --- a/src/htswrap.c +++ b/src/htswrap.c @@ -41,33 +41,21 @@ Please visit our Website: http://www.httrack.com #include "htswrap.h" #include "htshash.h" #include "htsinthash.h" +#include "htslib.h" -// typedef long (__stdcall * XSHBFF_WndProc_type)(HWND ,UINT ,WPARAM ,LPARAM); - -inthash wrappers=NULL; - -HTSEXT_API int htswrap_init(void) { - if (!wrappers) - wrappers=inthash_new(42); - return inthash_created(wrappers); +HTSEXT_API int htswrap_init(void) { // LEGACY + return 1; } -HTSEXT_API int htswrap_free(void) { - inthash_delete(&wrappers); +HTSEXT_API int htswrap_free(void) { // LEGACY return 1; } -HTSEXT_API int htswrap_add(char* name,void* fct) { - if (!wrappers) - htswrap_init(); - inthash_write(wrappers,name,(unsigned long int)fct); - return 1; +HTSEXT_API int htswrap_add(httrackp *opt, const char* name,void* fct) { + return hts_set_callback((t_hts_htmlcheck_callbacks*)opt->callbacks_fun, name, fct); } -HTSEXT_API unsigned long int htswrap_read(char* name) { - unsigned long int fct=0; - if (!wrappers) - htswrap_init(); - inthash_read(wrappers,name,(void*)&fct); - return fct; +HTSEXT_API uintptr_t htswrap_read(httrackp *opt, const char* name) { + return (uintptr_t) hts_get_callback((t_hts_htmlcheck_callbacks*)opt->callbacks_fun, name); } + diff --git a/src/htswrap.h b/src/htswrap.h index f97157a..7de1f5c 100644 --- a/src/htswrap.h +++ b/src/htswrap.h @@ -38,14 +38,24 @@ Please visit our Website: http://www.httrack.com #ifndef HTSWRAP_DEFH #define HTSWRAP_DEFH -#include "htsglobal.h" - /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -HTSEXT_API int htswrap_init(void); -HTSEXT_API int htswrap_add(char* name,void* fct); -HTSEXT_API int htswrap_free(void); -HTSEXT_API unsigned long int htswrap_read(char* name); + +#include "htsglobal.h" +#include "htsinthash.h" + +/* Forward definitions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif + +HTSEXT_API int htswrap_init(void); // LEGACY +HTSEXT_API int htswrap_free(void); // LEGACY + +HTSEXT_API int htswrap_add(httrackp *opt, const char* name, void* fct); +HTSEXT_API uintptr_t htswrap_read(httrackp *opt, const char* name); + #endif #endif diff --git a/src/htszlib.c b/src/htszlib.c index 19e3abb..9227bbb 100644 --- a/src/htszlib.c +++ b/src/htszlib.c @@ -55,12 +55,13 @@ Please visit our Website: http://www.httrack.com Return value: size of the new file, or -1 if an error occured */ int hts_zunpack(char* filename,char* newfile) { + char catbuff[CATBUFF_SIZE]; if (gz_is_available && filename && newfile) { if (filename[0] && newfile[0]) { gzFile gz = gzopen (filename, "rb"); if (gz) { - FILE* fpout=fopen(fconv(newfile),"wb"); - INTsys size=0; + FILE* fpout=fopen(fconv(catbuff, newfile),"wb"); + int size=0; if (fpout) { int nr; do { @@ -68,7 +69,7 @@ int hts_zunpack(char* filename,char* newfile) { nr=gzread (gz, buff, 1024); if (nr>0) { size+=nr; - if ((INTsys)fwrite(buff,1,nr,fpout) != nr) + if (fwrite(buff,1,nr,fpout) != nr) nr=size=-1; } } while(nr>0); @@ -76,23 +77,23 @@ int hts_zunpack(char* filename,char* newfile) { } else size=-1; gzclose(gz); - return size; + return (int) size; } } } return -1; } -int hts_extract_meta(char* path) { - unzFile zFile = unzOpen(fconcat(path,"hts-cache/new.zip")); - zipFile zFileOut = zipOpen(fconcat(path,"hts-cache/meta.zip"), 0); +int hts_extract_meta(const char* path) { + char catbuff[CATBUFF_SIZE]; + unzFile zFile = unzOpen(fconcat(catbuff,path,"hts-cache/new.zip")); + zipFile zFileOut = zipOpen(fconcat(catbuff,path,"hts-cache/meta.zip"), 0); if (zFile != NULL && zFileOut != NULL) { if (unzGoToFirstFile(zFile) == Z_OK) { zip_fileinfo fi; unz_file_info ufi; char BIGSTK filename[HTS_URLMAXSIZE * 4]; char BIGSTK comment[8192]; - int entries = 0; memset(comment, 0, sizeof(comment)); // for truncated reads memset(&fi, 0, sizeof(fi)); memset(&ufi, 0, sizeof(ufi)); diff --git a/src/htszlib.h b/src/htszlib.h index 8f8b565..3d91dc3 100644 --- a/src/htszlib.h +++ b/src/htszlib.h @@ -52,7 +52,7 @@ Please visit our Website: http://www.httrack.com #ifdef HTS_INTERNAL_BYTECODE extern int gz_is_available; extern int hts_zunpack(char* filename,char* newfile); -extern int hts_extract_meta(char* path); +extern int hts_extract_meta(const char* path); #endif #endif diff --git a/src/httrack-library.h b/src/httrack-library.h index 2ee2511..b651cee 100644 --- a/src/httrack-library.h +++ b/src/httrack-library.h @@ -39,26 +39,87 @@ Please visit our Website: http://www.httrack.com #define HTTRACK_DEFLIB #include "htsglobal.h" -#include "htsopt.h" -#include "htswrap.h" -/* Main functions */ +#ifndef HTS_DEF_FWSTRUCT_httrackp +#define HTS_DEF_FWSTRUCT_httrackp +typedef struct httrackp httrackp; +#endif +#ifndef HTS_DEF_FWSTRUCT_strc_int2bytes2 +#define HTS_DEF_FWSTRUCT_strc_int2bytes2 +typedef struct strc_int2bytes2 strc_int2bytes2; +#endif + +/* Helpers for plugging callbacks +requires: htsdefines.h */ + +/* +Add a function callback 'FUNCTION' to the option structure 'OPT' callback member 'MEMBER', +with an optional (may be NULL) argument 'ARGUMENT' +*/ +#define CHAIN_FUNCTION(OPT, MEMBER, FUNCTION, ARGUMENT) do { \ + t_hts_callbackarg *carg = (t_hts_callbackarg*) hts_malloc(sizeof(t_hts_callbackarg)); \ + carg->userdef = ( ARGUMENT ); \ + carg->prev.fun = (void*) ( OPT )->callbacks_fun-> MEMBER .fun; \ + carg->prev.carg = ( OPT )->callbacks_fun-> MEMBER .carg; \ + ( OPT )->callbacks_fun-> MEMBER .fun = ( FUNCTION ); \ + ( OPT )->callbacks_fun-> MEMBER .carg = carg; \ +} while(0) + +/* The following helpers are useful only if you know that an existing callback migh be existing before before the call to CHAIN_FUNCTION() +If your functions were added just after hts_create_opt(), no need to make the previous function check */ + +/* Get the user-defined pointer initially passed to CHAIN_FUNCTION(), given the callback's carg argument */ +#define CALLBACKARG_USERDEF(CARG) ( ( (CARG) != NULL ) ? (CARG)->userdef : NULL ) + +/* Get the previously existing function before the call to CHAIN_FUNCTION(), given the callback's carg argument */ +#define CALLBACKARG_PREV_FUN(CARG, NAME) ( (t_hts_htmlcheck_ ##NAME) ( ( (CARG) != NULL ) ? (CARG)->prev.fun : NULL ) ) + +/* Get the previously existing function argument before the call to CHAIN_FUNCTION(), given the callback's carg argument */ +#define CALLBACKARG_PREV_CARG(CARG) ( ( (CARG) != NULL ) ? (CARG)->prev.carg : NULL ) + +/* Functions */ + +/* Initialization */ HTSEXT_API int hts_init(void); HTSEXT_API int hts_uninit(void); +HTSEXT_API void htsthread_wait(void); + +/* Main functions */ HTSEXT_API int hts_main(int argc, char **argv); +HTSEXT_API int hts_main2(int argc, char **argv, httrackp *opt); -/* Wrapper functions */ -HTSEXT_API int htswrap_init(void); -HTSEXT_API int htswrap_add(char* name,void* fct); -HTSEXT_API int htswrap_free(void); -HTSEXT_API unsigned long int htswrap_read(char* name); +/* Options handling */ +HTSEXT_API httrackp* hts_create_opt(void); +HTSEXT_API void hts_free_opt(httrackp *opt); +HTSEXT_API void set_wrappers(httrackp *opt); // DEPRECATED - DUMMY FUNCTION +HTSEXT_API int plug_wrapper(httrackp *opt, const char *moduleName, const char* argv); + +/* Logging */ +HTSEXT_API int hts_log(httrackp *opt, const char* prefix, const char *msg); + +/* Infos */ +HTSEXT_API const char* hts_get_version_info(httrackp *opt); HTSEXT_API const char* hts_is_available(void); +/* Wrapper functions */ +HTSEXT_API int htswrap_init(void); // DEPRECATED - DUMMY FUNCTION +HTSEXT_API int htswrap_free(void); // DEPRECATED - DUMMY FUNCTION +HTSEXT_API int htswrap_add(httrackp *opt, const char* name, void* fct); +HTSEXT_API unsigned long int htswrap_read(httrackp *opt, const char* name); +HTSEXT_API int htswrap_set_userdef(httrackp *opt, void *userdef); +HTSEXT_API void* htswrap_get_userdef(httrackp *opt); + +/* Internal library allocators, if a different libc is being used by the client */ +HTSEXT_API char* hts_strdup(const char* string); +HTSEXT_API void* hts_malloc(size_t size); +HTSEXT_API void* hts_realloc(void* data, size_t size); +HTSEXT_API void hts_free(void* data); + /* Other functions */ -HTSEXT_API int hts_resetvar(void); -HTSEXT_API int hts_buildtopindex(httrackp* opt,char* path,char* binpath); -HTSEXT_API char* hts_getcategories(char* path, int type); -HTSEXT_API char* hts_getcategory(char* filename); +HTSEXT_API int hts_resetvar(void); // DEPRECATED - DUMMY FUNCTION +HTSEXT_API int hts_buildtopindex(httrackp* opt,const char* path,const char* binpath); +HTSEXT_API const char* hts_getcategories(const char* path, int type); +HTSEXT_API const char* hts_getcategory(const char* filename); /* Catch-URL */ HTSEXT_API T_SOC catch_url_init_std(int* port_prox,char* adr_prox); @@ -66,32 +127,32 @@ HTSEXT_API T_SOC catch_url_init(int* port,char* adr); HTSEXT_API int catch_url(T_SOC soc,char* url,char* method,char* data); /* State */ -HTSEXT_API int hts_is_parsing(int flag); -HTSEXT_API int hts_is_testing(void); -HTSEXT_API int hts_is_exiting(void); -HTSEXT_API int hts_setopt(httrackp* opt); -HTSEXT_API int hts_addurl(char** url); -HTSEXT_API int hts_resetaddurl(void); +HTSEXT_API int hts_is_parsing(httrackp *opt, int flag); +HTSEXT_API int hts_is_testing(httrackp *opt); +HTSEXT_API int hts_is_exiting(httrackp *opt); +/*HTSEXT_API int hts_setopt(httrackp* opt); DEPRECATED ; see copy_htsopt() */ +HTSEXT_API int hts_addurl(httrackp *opt, char** url); +HTSEXT_API int hts_resetaddurl(httrackp *opt); HTSEXT_API int copy_htsopt(httrackp* from, httrackp* to); -HTSEXT_API char* hts_errmsg(void); -HTSEXT_API int hts_setpause(int); // pause transfer -HTSEXT_API int hts_request_stop(int force); -HTSEXT_API char* hts_cancel_file(char * s); -HTSEXT_API void hts_cancel_test(void); -HTSEXT_API void hts_cancel_parsing(void); -HTSEXT_API char* hts_cancel_file(char * s); -HTSEXT_API void hts_cancel_test(void); -HTSEXT_API void hts_cancel_parsing(void); +HTSEXT_API char* hts_errmsg(httrackp *opt); +HTSEXT_API int hts_setpause(httrackp *opt, int); // pause transfer +HTSEXT_API int hts_request_stop(httrackp* opt, int force); +HTSEXT_API int hts_cancel_file_push(httrackp *opt, const char *url); +HTSEXT_API void hts_cancel_test(httrackp *opt); +HTSEXT_API void hts_cancel_parsing(httrackp *opt); +HTSEXT_API void hts_cancel_test(httrackp *opt); +HTSEXT_API void hts_cancel_parsing(httrackp *opt); /* Tools */ -HTSEXT_API int structcheck(char* s); +HTSEXT_API int structcheck(const char* path); +HTSEXT_API int dir_exists(const char* path); HTSEXT_API void infostatuscode(char* msg,int statuscode); HTSEXT_API HTS_INLINE TStamp mtime_local(void); HTSEXT_API void qsec2str(char *st,TStamp t); -HTSEXT_API char* int2char(int n); -HTSEXT_API char* int2bytes(LLint n); -HTSEXT_API char* int2bytessec(long int n); -HTSEXT_API char** int2bytes2(LLint n); +HTSEXT_API char* int2char(strc_int2bytes2* strc, int n); +HTSEXT_API char* int2bytes(strc_int2bytes2* strc, LLint n); +HTSEXT_API char* int2bytessec(strc_int2bytes2* strc, long int n); +HTSEXT_API char** int2bytes2(strc_int2bytes2* strc, LLint n); HTSEXT_API char* jump_identification(char*); HTSEXT_API char* jump_normalized(char*); HTSEXT_API char* jump_toport(char*); @@ -108,25 +169,42 @@ HTSEXT_API void escape_uri_utf(char* s); HTSEXT_API void escape_check_url(char* s); HTSEXT_API char* escape_check_url_addr(char* s); HTSEXT_API void x_escape_http(char* s,int mode); -HTSEXT_API char* unescape_http(char* s); -HTSEXT_API char* unescape_http_unharm(char* s, int no_high); -HTSEXT_API char* antislash_unescaped(char* s); +HTSEXT_API char* unescape_http(char *catbuff, const char* s); +HTSEXT_API char* unescape_http_unharm(char *catbuff, const char* s, int no_high); +HTSEXT_API char* antislash_unescaped(char *catbuff, const char* s); HTSEXT_API void escape_remove_control(char* s); +HTSEXT_API void get_httptype(httrackp *opt,char *s,const char *fil,int flag); +HTSEXT_API int is_knowntype(httrackp *opt,const char *fil); +HTSEXT_API int is_userknowntype(httrackp *opt,const char *fil); +HTSEXT_API int is_dyntype(const char *fil); +HTSEXT_API char* get_ext(char *catbuff, const char *fil); + +/* Ugly string tools */ +HTSEXT_API char* concat(char *catbuff,const char* a,const char* b) ; +HTSEXT_API char* fconcat(char *catbuff, const char* a, const char* b); +HTSEXT_API char* fconv(char *catbuff, const char* a); /* Debugging */ HTSEXT_API void hts_debug(int level); /* Portable directory API */ +#ifndef HTS_DEF_FWSTRUCT_find_handle_struct +#define HTS_DEF_FWSTRUCT_find_handle_struct typedef struct find_handle_struct find_handle_struct; typedef find_handle_struct* find_handle; +#endif -typedef struct topindex_chain { +#ifndef HTS_DEF_FWSTRUCT_topindex_chain +#define HTS_DEF_FWSTRUCT_topindex_chain +typedef struct topindex_chain topindex_chain; +#endif +struct topindex_chain { int level; /* sort level */ char* category; /* category */ char name[2048]; /* path */ struct topindex_chain* next; /* next element */ -} topindex_chain ; +}; HTSEXT_API find_handle hts_findfirst(char* path); HTSEXT_API int hts_findnext(find_handle find); HTSEXT_API int hts_findclose(find_handle find); @@ -136,71 +214,4 @@ HTSEXT_API int hts_findisdir(find_handle find); HTSEXT_API int hts_findisfile(find_handle find); HTSEXT_API int hts_findissystem(find_handle find); -/* Wrapper functions types (commented) : */ -/* -typedef void (* t_hts_htmlcheck_init)(void); -typedef void (* t_hts_htmlcheck_uninit)(void); -typedef int (* t_hts_htmlcheck_start)(httrackp* opt); -typedef int (* t_hts_htmlcheck_end)(void); -typedef int (* t_hts_htmlcheck_chopt)(httrackp* opt); -typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); -typedef char* (* t_hts_htmlcheck_query)(char* question); -typedef char* (* t_hts_htmlcheck_query2)(char* question); -typedef char* (* t_hts_htmlcheck_query3)(char* question); -typedef int (* t_hts_htmlcheck_loop)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats); -typedef int (* t_hts_htmlcheck_check)(char* adr,char* fil,int status); -typedef void (* t_hts_htmlcheck_pause)(char* lockfile); -typedef void (* t_hts_htmlcheck_filesave)(char* file); -typedef int (* t_hts_htmlcheck_linkdetected)(char* link); -typedef int (* t_hts_htmlcheck_xfrstatus)(lien_back* back); -typedef int (* t_hts_htmlcheck_savename)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -typedef int (* t_hts_htmlcheck_sendhead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -typedef int (* t_hts_htmlcheck_receivehead)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); -*/ - -/* Wrapper functions names : */ -/* - hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); -Log: "engine: init" - - hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); -Log: "engine: free" - - hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); -Log: "engine: start" - - hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); -Log: "engine: end" - - hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); -Log: "engine: change-options" - - hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); -Log: "check-html: <url>" - - hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); - hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); - hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); - hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); - hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); -Log: none - - hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); -Log: "pause: <lockfile>" - - hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); - hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); -Log: none - - hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); -Log: - "engine: transfer-status: link updated: <url> -> <file>" - | "engine: transfer-status: link added: <url> -> <file>" - | "engine: transfer-status: link recorded: <url> -> <file>" - | "engine: transfer-status: link link error (<errno>, '<err_msg>'): <url>" - hts_htmlcheck_savename = (t_hts_htmlcheck_savename ) htswrap_read("save-name"); -Log: - "engine: save-name: local name: <url> -> <file>" -*/ - #endif diff --git a/src/httrack.c b/src/httrack.c index e2b6729..778ca41 100644 --- a/src/httrack.c +++ b/src/httrack.c @@ -46,23 +46,24 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htsbase.h" #include "htsopt.h" +#include "htsdefines.h" #include "httrack.h" +#include "htslib.h" /* Static definitions */ -static int fexist(char* s); +static int fexist(const char* s); static int linput(FILE* fp,char* s,int max); // htswrap_add #include "htswrap.h" -#if HTS_ANALYSTE_CONSOLE - /* specific definitions */ //#include "htsbase.h" #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <signal.h> #ifdef HAVE_SYS_TYPES_H #include <sys/types.h> #endif @@ -75,6 +76,34 @@ static int linput(FILE* fp,char* s,int max); #include <ctype.h> /* END specific definitions */ +static void __cdecl htsshow_init(t_hts_callbackarg *carg); +static void __cdecl htsshow_uninit(t_hts_callbackarg *carg); +static int __cdecl htsshow_start(t_hts_callbackarg *carg, httrackp* opt); +static int __cdecl htsshow_chopt(t_hts_callbackarg *carg, httrackp* opt); +static int __cdecl htsshow_end(t_hts_callbackarg *carg, httrackp* opt); +static int __cdecl htsshow_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file); +static int __cdecl htsshow_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file); +static int __cdecl htsshow_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file); +static int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); +static const char* __cdecl htsshow_query(t_hts_callbackarg *carg, httrackp *opt, const char* question); +static const char* __cdecl htsshow_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question); +static const char* __cdecl htsshow_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question); +static int __cdecl htsshow_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status); +static int __cdecl htsshow_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status); +static void __cdecl htsshow_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile); +static void __cdecl htsshow_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file); +static void __cdecl htsshow_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* fil, const char* save, int is_new, int is_modified,int not_updated); +static int __cdecl htsshow_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link); +static int __cdecl htsshow_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag); +static int __cdecl htsshow_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, lien_back* back); +static int __cdecl htsshow_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save); +static int __cdecl htsshow_sendheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing); +static int __cdecl htsshow_receiveheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming); + +static void vt_color(int text,int back); +static void vt_clear(void); +static void vt_home(void); + // ISO VT100/220 definitions #define VT_COL_TEXT_BLACK "30" #define VT_COL_TEXT_RED "31" @@ -131,100 +160,14 @@ static void vt_home(void) { #define MAX_LEN_INPROGRESS 40 static int use_show; +static httrackp *global_opt = NULL; + +static void signal_handlers(void); int main(int argc, char **argv) { int ret = 0; - hts_init(); + httrackp *opt; - /* - hts_htmlcheck_init = (t_hts_htmlcheck_init) htswrap_read("init"); -Log: "engine: init" - - hts_htmlcheck_uninit = (t_hts_htmlcheck_uninit) htswrap_read("free"); -Log: "engine: free" - - hts_htmlcheck_start = (t_hts_htmlcheck_start) htswrap_read("start"); -Log: "engine: start" - - hts_htmlcheck_end = (t_hts_htmlcheck_end) htswrap_read("end"); -Log: "engine: end" - - hts_htmlcheck_chopt = (t_hts_htmlcheck_chopt) htswrap_read("change-options"); -Log: "engine: change-options" - - hts_htmlcheck_preprocess = (t_hts_htmlcheck_process) htswrap_read("preprocess-html"); -Log: "preprocess-html: <url>" - - hts_htmlcheck_postprocess = (t_hts_htmlcheck_process) htswrap_read("postprocess-html"); -Log: "postprocess-html: <url>" - -hts_htmlcheck = (t_hts_htmlcheck) htswrap_read("check-html"); -Log: "check-html: <url>" - - hts_htmlcheck_query = (t_hts_htmlcheck_query) htswrap_read("query"); - hts_htmlcheck_query2 = (t_hts_htmlcheck_query2) htswrap_read("query2"); - hts_htmlcheck_query3 = (t_hts_htmlcheck_query3) htswrap_read("query3"); - hts_htmlcheck_loop = (t_hts_htmlcheck_loop) htswrap_read("loop"); - hts_htmlcheck_check = (t_hts_htmlcheck_check) htswrap_read("check-link"); - hts_htmlcheck_check_mime = (t_hts_htmlcheck_check_mime) htswrap_read("check-mime"); -Log: none - - hts_htmlcheck_pause = (t_hts_htmlcheck_pause) htswrap_read("pause"); -Log: "pause: <lockfile>" - - hts_htmlcheck_filesave = (t_hts_htmlcheck_filesave) htswrap_read("save-file"); - hts_htmlcheck_filesave2 = (t_hts_htmlcheck_filesave2) htswrap_read("save-file2"); - hts_htmlcheck_linkdetected = (t_hts_htmlcheck_linkdetected) htswrap_read("link-detected"); - hts_htmlcheck_linkdetected2 = (t_hts_htmlcheck_linkdetected2) htswrap_read("link-detected2"); -Log: none - - hts_htmlcheck_xfrstatus = (t_hts_htmlcheck_xfrstatus) htswrap_read("transfer-status"); -Log: - "engine: transfer-status: link updated: <url> -> <file>" - | "engine: transfer-status: link added: <url> -> <file>" - | "engine: transfer-status: link recorded: <url> -> <file>" - | "engine: transfer-status: link link error (<errno>, '<err_msg>'): <url>" - hts_htmlcheck_savename = (t_hts_htmlcheck_savename ) htswrap_read("save-name"); -Log: - "engine: save-name: local name: <url> -> <file>" -*/ - - htswrap_add("init",htsshow_init); - htswrap_add("free",htsshow_uninit); - htswrap_add("start",htsshow_start); - htswrap_add("change-options",htsshow_chopt); - htswrap_add("end",htsshow_end); - htswrap_add("preprocess-html",htsshow_preprocesshtml); - htswrap_add("postprocess-html",htsshow_preprocesshtml); - htswrap_add("check-html",htsshow_checkhtml); - htswrap_add("loop",htsshow_loop); - htswrap_add("query",htsshow_query); - htswrap_add("query2",htsshow_query2); - htswrap_add("query3",htsshow_query3); - htswrap_add("check-link",htsshow_check); - htswrap_add("check-mime",htsshow_check_mime); - htswrap_add("pause",htsshow_pause); - htswrap_add("save-file",htsshow_filesave); - htswrap_add("save-file2",htsshow_filesave2); - htswrap_add("link-detected",htsshow_linkdetected); - htswrap_add("link-detected2",htsshow_linkdetected2); - htswrap_add("transfer-status",htsshow_xfrstatus); - htswrap_add("save-name",htsshow_savename); - htswrap_add("send-header", htsshow_sendheader); - htswrap_add("receive-header", htsshow_receiveheader); - - ret = hts_main(argc,argv); - if (ret) { - fprintf(stderr, "* %s\n", hts_errmsg()); - } - return ret; -} - - -/* CALLBACK FUNCTIONS */ - -/* Initialize the Winsock */ -static void __cdecl htsshow_init(void) { #ifdef _WIN32 { WORD wVersionRequested; // requested version WinSock API @@ -243,13 +186,59 @@ static void __cdecl htsshow_init(void) { } #endif -} -static void __cdecl htsshow_uninit(void) { + signal_handlers(); + hts_init(); + opt = global_opt = hts_create_opt(); + + CHAIN_FUNCTION(opt, init, htsshow_init, NULL); + CHAIN_FUNCTION(opt, uninit, htsshow_uninit, NULL); + CHAIN_FUNCTION(opt, start, htsshow_start, NULL); + CHAIN_FUNCTION(opt, end, htsshow_end, NULL); + CHAIN_FUNCTION(opt, chopt, htsshow_chopt, NULL); + CHAIN_FUNCTION(opt, preprocess, htsshow_preprocesshtml, NULL); + CHAIN_FUNCTION(opt, postprocess, htsshow_postprocesshtml, NULL); + CHAIN_FUNCTION(opt, check_html, htsshow_checkhtml, NULL); + CHAIN_FUNCTION(opt, query, htsshow_query, NULL); + CHAIN_FUNCTION(opt, query2, htsshow_query2, NULL); + CHAIN_FUNCTION(opt, query3, htsshow_query3, NULL); + CHAIN_FUNCTION(opt, loop, htsshow_loop, NULL); + CHAIN_FUNCTION(opt, check_link, htsshow_check, NULL); + CHAIN_FUNCTION(opt, check_mime, htsshow_check_mime, NULL); + CHAIN_FUNCTION(opt, pause, htsshow_pause, NULL); + CHAIN_FUNCTION(opt, filesave, htsshow_filesave, NULL); + CHAIN_FUNCTION(opt, filesave2, htsshow_filesave2, NULL); + CHAIN_FUNCTION(opt, linkdetected, htsshow_linkdetected, NULL); + CHAIN_FUNCTION(opt, linkdetected2, htsshow_linkdetected2, NULL); + CHAIN_FUNCTION(opt, xfrstatus, htsshow_xfrstatus, NULL); + CHAIN_FUNCTION(opt, savename, htsshow_savename, NULL); + CHAIN_FUNCTION(opt, sendhead, htsshow_sendheader, NULL); + CHAIN_FUNCTION(opt, receivehead, htsshow_receiveheader, NULL); + + ret = hts_main2(argc, argv, opt); + if (ret) { + fprintf(stderr, "* %s\n", hts_errmsg(opt)); + } + global_opt = NULL; + hts_free_opt(opt); + htsthread_wait(); /* wait for pending threads */ + hts_uninit(); + #ifdef _WIN32 WSACleanup(); #endif + + return ret; +} + + +/* CALLBACK FUNCTIONS */ + +/* Initialize the Winsock */ +static void __cdecl htsshow_init(t_hts_callbackarg *carg) { +} +static void __cdecl htsshow_uninit(t_hts_callbackarg *carg) { } -static int __cdecl htsshow_start(httrackp* opt) { +static int __cdecl htsshow_start(t_hts_callbackarg *carg, httrackp* opt) { use_show=0; if (opt->verbosedisplay==2) { use_show=1; @@ -257,19 +246,22 @@ static int __cdecl htsshow_start(httrackp* opt) { } return 1; } -static int __cdecl htsshow_chopt(httrackp* opt) { - return htsshow_start(opt); +static int __cdecl htsshow_chopt(t_hts_callbackarg *carg, httrackp* opt) { + return htsshow_start(carg, opt); } -static int __cdecl htsshow_end(void) { +static int __cdecl htsshow_end(t_hts_callbackarg *carg, httrackp* opt) { return 1; } -static int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier) { +static int __cdecl htsshow_preprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) { return 1; } -static int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier) { +static int __cdecl htsshow_postprocesshtml(t_hts_callbackarg *carg, httrackp *opt, char** html,int* len,const char* url_address,const char* url_file) { return 1; } -static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack +static int __cdecl htsshow_checkhtml(t_hts_callbackarg *carg, httrackp *opt, char* html,int len,const char* url_address,const char* url_file) { + return 1; +} +static int __cdecl htsshow_loop(t_hts_callbackarg *carg, httrackp *opt, lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time, hts_stat_struct* stats) { // appelé à chaque boucle de HTTrack static TStamp prev_mytime=0; /* ok */ static t_InpInfo SInfo; /* ok */ // @@ -329,6 +321,7 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int if ( ((mytime - prev_mytime)>100) || ((mytime - prev_mytime)<0) ) { + strc_int2bytes2 strc, strc2, strc3; prev_mytime=mytime; @@ -382,11 +375,11 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int STYLE_STATRESET , /* */ - (char*)int2bytes(SInfo.stat_bytes), + (char*)int2bytes(&strc,SInfo.stat_bytes), (int)lien_n,(int)SInfo.lien_tot,(int)nbk, (char*)st, (int)SInfo.stat_written, - (char*)int2bytessec(SInfo.irate),(char*)int2bytessec(SInfo.rate), + (char*)int2bytessec(&strc2,SInfo.irate),(char*)int2bytessec(&strc3,SInfo.rate), (int)SInfo.stat_updated, (int)SInfo.stat_nsocket, (int)SInfo.stat_errors @@ -430,21 +423,21 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int } break; case 1: - if (back[i].status==99) { + if (back[i].status==STATUS_WAIT_HEADERS) { strcpybuff(StatsBuffer[index].state,"request"); ok=1; } - else if (back[i].status==100) { + else if (back[i].status==STATUS_CONNECTING) { strcpybuff(StatsBuffer[index].state,"connect"); ok=1; } - else if (back[i].status==101) { + else if (back[i].status==STATUS_WAIT_DNS) { strcpybuff(StatsBuffer[index].state,"search"); ok=1; } - else if (back[i].status==1000) { // ohh le beau ftp + else if (back[i].status==STATUS_FTP_TRANSFER) { // ohh le beau ftp sprintf(StatsBuffer[index].state,"ftp: %s",back[i].info); ok=1; } break; default: - if (back[i].status==0) { // prêt + if (back[i].status==STATUS_READY) { // prêt if ((back[i].r.statuscode==200)) { strcpybuff(StatsBuffer[index].state,"ready"); ok=1; } @@ -484,7 +477,7 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int } } - if ((l=strlen(s))<MAX_LEN_INPROGRESS) + if ((l = (int) strlen(s))<MAX_LEN_INPROGRESS) strcpybuff(StatsBuffer[index].name,s); else { // couper @@ -498,7 +491,7 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int StatsBuffer[index].sizetot=back[i].r.totalsize; StatsBuffer[index].size=back[i].r.size; } else { // pas de taille prédéfinie - if (back[i].status==0) { // prêt + if (back[i].status==STATUS_READY) { // prêt StatsBuffer[index].sizetot=back[i].r.size; StatsBuffer[index].size=back[i].r.size; } else { @@ -520,10 +513,10 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int { int parsing=0; printf("Current job: "); - if (!(parsing=hts_is_parsing(-1))) + if (!(parsing=hts_is_parsing(opt, -1))) printf("receiving files"); else { - switch(hts_is_testing()) { + switch(hts_is_testing(opt)) { case 0: printf("parsing HTML file (%d%%)",parsing); break; @@ -556,8 +549,8 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int StatsBuffer[i].state, StatsBuffer[i].name, StatsBuffer[i].file, - int2bytes(StatsBuffer[i].size), - int2bytes(StatsBuffer[i].sizetot) + int2bytes(&strc,StatsBuffer[i].size), + int2bytes(&strc2,StatsBuffer[i].sizetot) ); } printf("%s\n",VT_CLREOL); @@ -573,19 +566,19 @@ static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int return 1; } -static char* __cdecl htsshow_query(char* question) { +static const char* __cdecl htsshow_query(t_hts_callbackarg *carg, httrackp *opt, const char* question) { static char s[12]=""; /* ok */ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question); io_flush; linput(stdin,s,4); return s; } -static char* __cdecl htsshow_query2(char* question) { +static const char* __cdecl htsshow_query2(t_hts_callbackarg *carg, httrackp *opt, const char* question) { static char s[12]=""; /* ok */ printf("%s\nPress <Y><Enter> to confirm, <N><Enter> to abort\n",question); io_flush; linput(stdin,s,4); return s; } -static char* __cdecl htsshow_query3(char* question) { +static const char* __cdecl htsshow_query3(t_hts_callbackarg *carg, httrackp *opt, const char* question) { static char line[256]; /* ok */ do { io_flush; linput(stdin,line,206); @@ -593,44 +586,44 @@ static char* __cdecl htsshow_query3(char* question) { printf("ok..\n"); return line; } -static int __cdecl htsshow_check(char* adr,char* fil,int status) { +static int __cdecl htsshow_check(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,int status) { return -1; } -static int __cdecl htsshow_check_mime(char* adr,char* fil,char* mime,int status) { +static int __cdecl htsshow_check_mime(t_hts_callbackarg *carg, httrackp *opt, const char* adr,const char* fil,const char* mime,int status) { return -1; } -static void __cdecl htsshow_pause(char* lockfile) { +static void __cdecl htsshow_pause(t_hts_callbackarg *carg, httrackp *opt, const char* lockfile) { while (fexist(lockfile)) { Sleep(1000); } } -static void __cdecl htsshow_filesave(char* file) { +static void __cdecl htsshow_filesave(t_hts_callbackarg *carg, httrackp *opt, const char* file) { } -static void __cdecl htsshow_filesave2(char* adr, char* fil, char* save, int is_new, int is_modified,int not_updated) { +static void __cdecl htsshow_filesave2(t_hts_callbackarg *carg, httrackp *opt, const char* adr, const char* fil, const char* save, int is_new, int is_modified,int not_updated) { } -static int __cdecl htsshow_linkdetected(char* link) { +static int __cdecl htsshow_linkdetected(t_hts_callbackarg *carg, httrackp *opt, char* link) { return 1; } -static int __cdecl htsshow_linkdetected2(char* link, char* start_tag) { +static int __cdecl htsshow_linkdetected2(t_hts_callbackarg *carg, httrackp *opt, char* link, const char* start_tag) { return 1; } -static int __cdecl htsshow_xfrstatus(lien_back* back) { +static int __cdecl htsshow_xfrstatus(t_hts_callbackarg *carg, httrackp *opt, lien_back* back) { return 1; } -static int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save) { +static int __cdecl htsshow_savename(t_hts_callbackarg *carg, httrackp *opt, const char* adr_complete,const char* fil_complete,const char* referer_adr,const char* referer_fil,char* save) { return 1; } -static int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing) { +static int __cdecl htsshow_sendheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing) { return 1; } -static int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming) { +static int __cdecl htsshow_receiveheader(t_hts_callbackarg *carg, httrackp *opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming) { return 1; } /* *** Various functions *** */ -static int fexist(char* s) { +static int fexist(const char* s) { struct stat st; memset(&st, 0, sizeof(st)); if (stat(s, &st) == 0) { @@ -659,4 +652,128 @@ static int linput(FILE* fp,char* s,int max) { return j; } + +// routines de détournement de SIGHUP & co (Unix) +// +static void sig_ignore( int code ) { // ignorer signal +} +static void sig_term( int code ) { // quitter brutalement + fprintf(stderr,"\nProgram terminated (signal %d)\n",code); + exit(0); +} +static void sig_finish( int code ) { // finir et quitter + signal(code,sig_term); // quitter si encore + if (global_opt != NULL) { + global_opt->state.exit_xh=1; + } + fprintf(stderr,"\nExit requested to engine (signal %d)\n",code); +} +#ifdef _WIN32 +static void sig_ask( int code ) { // demander + char s[256]; + signal(code,sig_term); // quitter si encore + printf("\nQuit program/Interrupt/Cancel? (Q/I/C) "); + fflush(stdout); + scanf("%s",s); + if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q')) + exit(0); // quitter + else if ( (s[0]=='i') || (s[0]=='I') ) { + if (global_opt != NULL) { + // ask for stop + global_opt->state.stop=1; + } + } + signal(code,sig_ask); // remettre signal +} +#else +static void sig_doback(int blind); +static void sig_back( int code ) { // ignorer et mettre en backing + signal(code,sig_ignore); + sig_doback(0); +} +static void sig_ask( int code ) { // demander + char s[256]; + signal(code,sig_term); // quitter si encore + printf("\nQuit program/Interrupt/Background/bLind background/Cancel? (Q/I/B/L/C) "); + fflush(stdout); + scanf("%s",s); + if ( (s[0]=='y') || (s[0]=='Y') || (s[0]=='o') || (s[0]=='O') || (s[0]=='q') || (s[0]=='Q')) + exit(0); // quitter + else if ( (s[0]=='b') || (s[0]=='B') || (s[0]=='a') || (s[0]=='A') ) + sig_doback(0); // arrière plan + else if ( (s[0]=='l') || (s[0]=='L') ) + sig_doback(1); // arrière plan + else if ( (s[0]=='i') || (s[0]=='I') ) { + if (global_opt != NULL) { + // ask for stop + printf("finishing pending transfers.. please wait\n"); + global_opt->state.stop=1; + } + signal(code,sig_ask); // remettre signal + } + else { + printf("cancel..\n"); + signal(code,sig_ask); // remettre signal + } +} +static void sig_brpipe( int code ) { // treat if necessary + signal(code, sig_brpipe); +} +static void sig_doback(int blind) { // mettre en backing + int out=-1; + // + printf("\nMoving into background to complete the mirror...\n"); fflush(stdout); + + if (global_opt != NULL) { + // suppress logging and asking lousy questions + global_opt->quiet=1; + global_opt->verbosedisplay=0; + } + + if (!blind) + out = open("hts-nohup.out",O_CREAT|O_WRONLY,S_IRUSR|S_IWUSR); + if (out == -1) + out = open("/dev/null",O_WRONLY,S_IRUSR|S_IWUSR); + close(0); + close(1); + dup(out); + close(2); + dup(out); + // + switch (fork()) { + case 0: + break; + case -1: + fprintf(stderr,"Error: can not fork process\n"); + break; + default: // pere + usleep(100000); // pause 1/10s "A microsecond is .000001s" + _exit(0); + break; + } +} +#endif + +static void signal_handlers(void) { +#ifdef _WIN32 +#ifndef _WIN32_WCE +#if 0 /* BUG366763 */ + signal( SIGINT , sig_ask ); // ^C +#endif + signal( SIGTERM , sig_finish ); // kill <process> +#endif +#else +#if 0 /* BUG366763 */ + signal( SIGHUP , sig_back ); // close window +#endif + signal( SIGTSTP , sig_back ); // ^Z + signal( SIGTERM , sig_finish ); // kill <process> +#if 0 /* BUG366763 */ + signal( SIGINT , sig_ask ); // ^C #endif + signal( SIGPIPE , sig_brpipe ); // broken pipe (write into non-opened socket) + signal( SIGCHLD , sig_ignore ); // child change status +#endif +} + +// fin routines de détournement de SIGHUP & co diff --git a/src/httrack.h b/src/httrack.h index 229251c..f8350f6 100644 --- a/src/httrack.h +++ b/src/httrack.h @@ -37,12 +37,14 @@ Please visit our Website: http://www.httrack.com #ifndef HTSTOOLS_DEFH #define HTSTOOLS_DEFH -#if HTS_ANALYSTE_CONSOLE - #include "htsglobal.h" #include "htscore.h" -typedef struct t_StatsBuffer { +#ifndef HTS_DEF_FWSTRUCT_t_StatsBuffer +#define HTS_DEF_FWSTRUCT_t_StatsBuffer +typedef struct t_StatsBuffer t_StatsBuffer; +#endif +struct t_StatsBuffer { char name[1024]; char file[1024]; char state[256]; @@ -56,9 +58,13 @@ typedef struct t_StatsBuffer { int back; // int actived; // pour disabled -} t_StatsBuffer; +}; -typedef struct t_InpInfo { +#ifndef HTS_DEF_FWSTRUCT_t_InpInfo +#define HTS_DEF_FWSTRUCT_t_InpInfo +typedef struct t_InpInfo t_InpInfo; +#endif +struct t_InpInfo { int ask_refresh; int refresh; LLint stat_bytes; @@ -76,38 +82,9 @@ typedef struct t_InpInfo { int stat_infos; TStamp stat_timestart; int stat_back; -} t_InpInfo; - -// wrappers -static void __cdecl htsshow_init(void); -static void __cdecl htsshow_uninit(void); -static int __cdecl htsshow_start(httrackp* opt); -static int __cdecl htsshow_chopt(httrackp* opt); -static int __cdecl htsshow_end(void); -static int __cdecl htsshow_preprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -static int __cdecl htsshow_postprocesshtml(char** html,int* len,char* url_adresse,char* url_fichier); -static int __cdecl htsshow_checkhtml(char* html,int len,char* url_adresse,char* url_fichier); -static int __cdecl htsshow_loop(lien_back* back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct* stats); -static char* __cdecl htsshow_query(char* question); -static char* __cdecl htsshow_query2(char* question); -static char* __cdecl htsshow_query3(char* question); -static int __cdecl htsshow_check(char* adr,char* fil,int status); -static int __cdecl htsshow_check_mime(char* adr,char* fil,char* mime,int status); -static void __cdecl htsshow_pause(char* lockfile); -static void __cdecl htsshow_filesave(char* file); -static void __cdecl htsshow_filesave2(char* adr, char* fil, char* save, int is_new, int is_modified,int not_updated); -static int __cdecl htsshow_linkdetected(char* link); -static int __cdecl htsshow_linkdetected2(char* link, char* start_tag); -static int __cdecl htsshow_xfrstatus(lien_back* back); -static int __cdecl htsshow_savename(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save); -static int __cdecl htsshow_sendheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing); -static int __cdecl htsshow_receiveheader(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming); +}; int main(int argc, char **argv); -static void vt_color(int text,int back); -static void vt_clear(void); -static void vt_home(void); - #endif /* */ @@ -269,6 +246,3 @@ extern HTSEXT_API t_abortLog abortLog__; #define strdupt(A) strdup(A) #define realloct(A,B) ( ((A) != NULL) ? realloc((A), (B)) : malloc(B) ) #define memcpybuff(A, B, N) memcpy((A), (B), (N)) - -#endif - diff --git a/src/httrack.vcproj b/src/httrack.vcproj new file mode 100755 index 0000000..2e8bd28 --- /dev/null +++ b/src/httrack.vcproj @@ -0,0 +1,253 @@ +<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="7.10"
+ Name="httrack"
+ ProjectGUID="{D0E894E7-F64C-4722-9807-9ABB1054D43A}"
+ SccProjectName=""
+ SccLocalPath="">
+ <Platforms>
+ <Platform
+ Name="Win32"/>
+ </Platforms>
+ <Configurations>
+ <Configuration
+ Name="Release avec debug|Win32"
+ OutputDirectory=".\Release_avec_debug"
+ IntermediateDirectory="c:\temp\vcpp"
+ ConfigurationType="1"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/Zm200 "
+ Optimization="4"
+ InlineFunctionExpansion="2"
+ EnableIntrinsicFunctions="TRUE"
+ FavorSizeOrSpeed="1"
+ OmitFramePointers="TRUE"
+ AdditionalIncludeDirectories="C:\Dev\IPv6Kit\inc\,C:\Dev\zlib\,C:\Dev\openssl\include,C:\Dev\Winhttrack"
+ PreprocessorDefinitions="WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE"
+ RuntimeLibrary="2"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderFile="c:\temp\vcpp/httrack.pch"
+ AssemblerOutput="2"
+ AssemblerListingLocation="c:\temp\vcpp/"
+ ObjectFile="c:\temp\vcpp/"
+ ProgramDataBaseFileName="c:\temp\vcpp/"
+ BrowseInformation="1"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"
+ Detect64BitPortabilityProblems="TRUE"
+ DebugInformationFormat="3"
+ CompileAs="0"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="wsock32.lib libhttrack.lib"
+ OutputFile="L:\HTTrack\httrack\httrack.exe"
+ LinkIncremental="1"
+ SuppressStartupBanner="TRUE"
+ AdditionalLibraryDirectories="C:\Dev\openssl\lib,C:\Dev\zlib\dll32,C:\Dev\openssl\lib\out32dll"
+ GenerateDebugInformation="TRUE"
+ ProgramDatabaseFile=".\Release_avec_debug/httrack.pdb"
+ SubSystem="1"
+ TargetMachine="1"/>
+ <Tool
+ Name="VCMIDLTool"
+ TypeLibraryName=".\Release_avec_debug/httrack.tlb"
+ HeaderFileName=""/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCXMLDataGeneratorTool"/>
+ <Tool
+ Name="VCWebDeploymentTool"/>
+ <Tool
+ Name="VCManagedWrapperGeneratorTool"/>
+ <Tool
+ Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+ </Configuration>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="C:\temp\httrackcmd\Debug"
+ IntermediateDirectory="C:\temp\httrackcmd\Debug"
+ ConfigurationType="1"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/Zm200 "
+ Optimization="0"
+ AdditionalIncludeDirectories="C:\Dev\IPv6Kit\inc\,C:\Dev\zlib\,C:\Dev\openssl\include,C:\Dev\Winhttrack"
+ PreprocessorDefinitions="_DEBUG;WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE"
+ RuntimeLibrary="3"
+ BufferSecurityCheck="TRUE"
+ RuntimeTypeInfo="TRUE"
+ PrecompiledHeaderFile="c:\temp\vcpp/httrack.pch"
+ AssemblerOutput="2"
+ AssemblerListingLocation="c:\temp\vcpp/"
+ ObjectFile="c:\temp\vcpp/"
+ ProgramDataBaseFileName="c:\temp\vcpp/"
+ BrowseInformation="1"
+ BrowseInformationFile="c:\temp\vcpp/"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"
+ Detect64BitPortabilityProblems="TRUE"
+ DebugInformationFormat="4"
+ CompileAs="0"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="wsock32.lib libhttrack.lib"
+ OutputFile="C:\temp\httrack.exe"
+ LinkIncremental="1"
+ SuppressStartupBanner="TRUE"
+ AdditionalLibraryDirectories="C:\Dev\openssl\lib,C:\Dev\zlib\dll32,C:\Dev\openssl\lib\out32dll,C:\temp\Debuglib"
+ GenerateDebugInformation="TRUE"
+ ProgramDatabaseFile=".\Debug/httrack.pdb"
+ SubSystem="1"
+ TargetMachine="1"/>
+ <Tool
+ Name="VCMIDLTool"
+ TypeLibraryName=".\Debug/httrack.tlb"
+ HeaderFileName=""/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCXMLDataGeneratorTool"/>
+ <Tool
+ Name="VCWebDeploymentTool"/>
+ <Tool
+ Name="VCManagedWrapperGeneratorTool"/>
+ <Tool
+ Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="C:\temp\httrackcmd\Release"
+ IntermediateDirectory="C:\temp\httrackcmd\Release"
+ ConfigurationType="1"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/Zm200 "
+ Optimization="3"
+ InlineFunctionExpansion="2"
+ ImproveFloatingPointConsistency="TRUE"
+ AdditionalIncludeDirectories="C:\Dev\IPv6Kit\inc\,C:\Dev\zlib\,C:\Dev\openssl\include,C:\Dev\Winhttrack"
+ PreprocessorDefinitions="NDEBUG;WIN32;_CONSOLE;HTS_ANALYSTE_CONSOLE"
+ StringPooling="TRUE"
+ RuntimeLibrary="2"
+ EnableFunctionLevelLinking="TRUE"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderFile="c:\temp\vcpp/httrack.pch"
+ AssemblerListingLocation="c:\temp\vcpp/"
+ ObjectFile="c:\temp\vcpp/"
+ ProgramDataBaseFileName="c:\temp\vcpp/"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"
+ Detect64BitPortabilityProblems="TRUE"
+ CompileAs="0"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="wsock32.lib libhttrack.lib"
+ OutputFile="L:\HTTrack\httrack\httrack.exe"
+ LinkIncremental="1"
+ SuppressStartupBanner="TRUE"
+ AdditionalLibraryDirectories="C:\Dev\openssl\lib,C:\Dev\zlib\dll32,C:\Dev\openssl\lib\out32dll,C:\temp\Releaselib"
+ ProgramDatabaseFile=".\Release/httrack.pdb"
+ SubSystem="1"
+ TargetMachine="1"/>
+ <Tool
+ Name="VCMIDLTool"
+ TypeLibraryName=".\Release/httrack.tlb"
+ HeaderFileName=""/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCXMLDataGeneratorTool"/>
+ <Tool
+ Name="VCWebDeploymentTool"/>
+ <Tool
+ Name="VCManagedWrapperGeneratorTool"/>
+ <Tool
+ Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <File
+ RelativePath="httrack.c">
+ <FileConfiguration
+ Name="Release avec debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BrowseInformation="1"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BrowseInformation="1"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\httrack.h">
+ </File>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/src/mmsrip/mms.h b/src/mmsrip/mms.h index 600d7bd..9ce909f 100644 --- a/src/mmsrip/mms.h +++ b/src/mmsrip/mms.h @@ -28,6 +28,8 @@ #ifndef __MMS_H__ #define __MMS_H__ +#include "common.h" + #ifdef _WIN32 typedef signed long int ssize_t; #define bcopy(s, d, l) memcpy(d, s, l) @@ -46,7 +48,7 @@ typedef unsigned int uint32_t; #endif typedef unsigned long long int uint64_t; #else -#if defined(SOLARIS) || defined(sun) +#if defined(SOLARIS) || defined(sun)|| defined (__FreeBSD__) || defined(HAVE_INTTYPES_H) #include <inttypes.h> #else #include <stdint.h> diff --git a/src/proxy/changelog.txt b/src/proxy/changelog.txt index f5ae48b..b50a8d6 100644 --- a/src/proxy/changelog.txt +++ b/src/proxy/changelog.txt @@ -1,3 +1,7 @@ +0.5 - May 8 2006
+- added ARC (Internet Archive 1.0) file format
+- first index output routines (proxytrack --convert ..)
+
0.4 - Sept 18 2005
- implemented very limited WebDAV (RFC2518) primitives
- index enumeration fixes
diff --git a/src/proxy/main.c b/src/proxy/main.c index e48b51d..bf35377 100644 --- a/src/proxy/main.c +++ b/src/proxy/main.c @@ -64,7 +64,7 @@ int main(int argc, char* argv[]) { int i; int ret = 0; - int proxyPort, icpPort; + int proxyPort = 0, icpPort = 0; char proxyAddr[256 + 1], icpAddr[256 + 1]; PT_Indexes index; @@ -98,10 +98,21 @@ int main(int argc, char* argv[]) printf("*** This version is a development release ***\n"); printf("\n"); if (argc < 3 - || !scanHostPort(argv[1], proxyAddr, &proxyPort) - || !scanHostPort(argv[2], icpAddr, &icpPort)) + || ( + strcmp(argv[1], "--convert") != 0 + && + ( + !scanHostPort(argv[1], proxyAddr, &proxyPort) + || !scanHostPort(argv[2], icpAddr, &icpPort) + ) + ) + ) { - fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | --list <file-list> ) ..]\n", argv[0]); + fprintf(stderr, "proxy mode:\n"); + fprintf(stderr, "usage: %s <proxy-addr:proxy-port> <ICP-addr:ICP-port> [ ( <new.zip path> | <new.ndx path> | <archive.arc path> | --list <file-list> ) ..]\n", argv[0]); + fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]); + fprintf(stderr, "convert mode:\n"); + fprintf(stderr, "usage: %s --convert <archive-output-path> [ ( <new.zip path> | <new.ndx path> | <archive.arc path> | --list <file-list> ) ..]\n", argv[0]); fprintf(stderr, "\texample:%s proxy:8080 localhost:3130 /home/archives/www-archive-01.zip /home/old-archives/www-archive-02.ndx\n", argv[0]); return 1; } @@ -150,7 +161,15 @@ int main(int argc, char* argv[]) #endif /* Go */ - ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index); + if (strcmp(argv[1], "--convert") != 0) { + ret = proxytrack_main(proxyAddr, proxyPort, icpAddr, icpPort, index); + } else { + if ((ret = PT_SaveCache(index, argv[2])) == 0) { + fprintf(stderr, "processed: '%s'\n", argv[2]); + } else { + fprintf(stderr, "error: could not save '%s'\n", argv[2]); + } + } /* Wipe */ PT_Delete(index); diff --git a/src/proxy/proxystrings.h b/src/proxy/proxystrings.h index 87bcf34..9ee8fa7 100755 --- a/src/proxy/proxystrings.h +++ b/src/proxy/proxystrings.h @@ -28,77 +28,11 @@ Please visit our Website: http://www.httrack.com // Strings a bit safer than static buffers -#ifndef HTS_STRINGS_DEFSTATIC -#define HTS_STRINGS_DEFSTATIC - -typedef struct String { - char* buff; - int len; - int capa; -} String; - -#define STRING_EMPTY {NULL, 0, 0} -#define STRING_BLK_SIZE 256 -#define StringBuff(blk) ((blk).buff) -#define StringLength(blk) ((blk).len) -#define StringCapacity(blk) ((blk).capa) -#define StringRoom(blk, size) do { \ - if ((blk).len + (int)(size) + 1 > (blk).capa) { \ - (blk).capa = ((blk).len + (size) + 1) * 2; \ - (blk).buff = (char*) realloc((blk).buff, (blk).capa); \ - } \ -} while(0) -#define StringBuffN(blk, size) StringBuffN_(&(blk), size) -static char* StringBuffN_(String* blk, int size) { - StringRoom(*blk, (blk->len) + size); - return StringBuff(*blk); -} -#define StringClear(blk) do { \ - StringRoom(blk, 0); \ - (blk).buff[0] = '\0'; \ - (blk).len = 0; \ -} while(0) -#define StringFree(blk) do { \ - if ((blk).buff != NULL) { \ - free((blk).buff); \ - (blk).buff = NULL; \ - } \ - (blk).capa = 0; \ - (blk).len = 0; \ -} while(0) -#define StringMemcat(blk, str, size) do { \ - StringRoom(blk, size); \ - if ((int)(size) > 0) { \ - memcpy((blk).buff + (blk).len, (str), (size)); \ - (blk).len += (size); \ - } \ - *((blk).buff + (blk).len) = '\0'; \ -} while(0) -#define StringAddchar(blk, c) do { \ - char __c = (c); \ - StringMemcat(blk, &__c, 1); \ -} while(0) -static void* StringAcquire(String* blk) { - void* buff = blk->buff; - blk->buff = NULL; - blk->capa = 0; - blk->len = 0; - return buff; -} -static StringAttach(String* blk, char** str) { - StringFree(*blk); - if (str != NULL && *str != NULL) { - blk->buff = *str; - blk->capa = (int)strlen(blk->buff); - blk->len = blk->capa; - *str = NULL; - } -} -#define StringStrcat(blk, str) StringMemcat(blk, str, ((str) != NULL) ? (int)strlen(str) : 0) -#define StringStrcpy(blk, str) do { \ - StringClear(blk); \ - StringStrcat(blk, str); \ -} while(0) +#ifndef HTS_PROXYSTRINGS_DEFSTATIC +#define HTS_PROXYSTRINGS_DEFSTATIC + +#include "htsstrings.h" + /* Tools */ @@ -138,16 +72,46 @@ static void escapexml(const char* s, String* tempo) { int i; for (i=0 ; s[i] != '\0' ; i++) { if (s[i] == '&') - StringStrcat(*tempo, "&"); + StringCat(*tempo, "&"); else if (s[i] == '<') - StringStrcat(*tempo, "<"); + StringCat(*tempo, "<"); else if (s[i] == '>') - StringStrcat(*tempo, ">"); + StringCat(*tempo, ">"); else if (s[i] == '\"') - StringStrcat(*tempo, """); + StringCat(*tempo, """); else StringAddchar(*tempo, s[i]); } } +static char* concat(char *catbuff,const char* a,const char* b) { + if (a != NULL && a[0] != '\0') { + strcpy(catbuff, a); + } else { + catbuff[0] = '\0'; + } + if (b != NULL && b[0] != '\0') { + strcat(catbuff, b); + } + return catbuff; +} + +static char* __fconv(char* a) { +#ifdef WIN32 + int i; + for(i = 0 ; a[i] != 0 ; i++) + if (a[i] == '/') // Unix-to-DOS style + a[i] = '\\'; +#endif + return a; +} + +static char* fconcat(char *catbuff, const char* a, const char* b) { + return __fconv(concat(catbuff,a,b)); +} + +static char* fconv(char *catbuff, const char* a) { + return __fconv(concat(catbuff,a,"")); +} + #endif diff --git a/src/proxy/proxytrack.c b/src/proxy/proxytrack.c index 7604804..195c1a5 100644 --- a/src/proxy/proxytrack.c +++ b/src/proxy/proxytrack.c @@ -119,13 +119,10 @@ Remark: If no cache newer than the added one is found, all entries can be added #include <string.h> #include <time.h> #include <fcntl.h> -#if HTS_WIN +#ifdef _WIN32 #else #include <arpa/inet.h> #endif -#ifndef _WIN32 -#include <signal.h> -#endif /* END specific definitions */ /* String */ @@ -137,12 +134,6 @@ Remark: If no cache newer than the added one is found, all entries can be added /* définitions globales */ #include "htsglobal.h" -/* htslib */ -/*#include "htslib.h"*/ - -/* HTTrack Website Copier Library */ -#include "httrack-library.h" - /* htsweb */ #include "htsinthash.h" @@ -228,27 +219,6 @@ static int linputsoc_t(T_SOC soc, char* s, int max, int timeout) { return -1; } -static void unescapeini(char* s, String* tempo) { - int i; - char lastc=0; - for (i=0;i<(int) strlen(s);i++) { - if (s[i]=='%' && s[i+1]=='%') { - i++; - StringAddchar(*tempo, lastc = '%'); - } else if (s[i]=='%') { - char hc; - i++; - hc = (char) ehex(s+i); - if (!is_retorsep(hc) || !is_retorsep(lastc)) { - StringAddchar(*tempo, lastc = (char) hc); - } - i++; // sauter 2 caractères finalement - } - else - StringAddchar(*tempo, lastc = s[i]); - } -} - static int gethost(const char* hostname, SOCaddr *server, size_t server_size) { if (hostname != NULL && *hostname != '\0') { #if HTS_INET6==0 @@ -257,8 +227,8 @@ static int gethost(const char* hostname, SOCaddr *server, size_t server_size) { */ t_hostent* hp=gethostbyname(hostname); if (hp!=NULL) { - if ( (hp->h_length) && ( ((unsigned int) hp->h_length) <= buffer->addr_maxlen) ) { - SOCaddr_copyaddr(server, server_size, hp->h_addr_list[0], hp->h_length); + if (hp->h_length) { + SOCaddr_copyaddr(*server, server_size, hp->h_addr_list[0], hp->h_length); return 1; } } @@ -325,7 +295,7 @@ static T_SOC smallserver_init(const char* adr, int port, int family) { SOCaddr_initany(server, server_size); if (gethost(adr, &server, server_size)) { // host name T_SOC soc = INVALID_SOCKET; - if ( (soc = socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) { + if ( (soc = (T_SOC) socket(SOCaddr_sinfamily(server), family, 0)) != INVALID_SOCKET) { SOCaddr_initport(server, port); if ( bind(soc,(struct sockaddr*) &server, (int)server_size) == 0 ) { if (family != SOCK_STREAM @@ -379,7 +349,8 @@ int proxytrack_main(char* proxyAddr, int proxyPort, fflush(stderr); // if (!proxytrack_start(index, soc, socICP)) { - fprintf(stderr, "Unable to create the server: %s\n", strerror(errno)); + int last_errno = errno; + fprintf(stderr, "Unable to create the server: %s\n", strerror(last_errno)); #ifdef _WIN32 closesocket(soc); #else @@ -391,7 +362,8 @@ int proxytrack_main(char* proxyAddr, int proxyPort, returncode = 0; } } else { - fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(errno)); + int last_errno = errno; + fprintf(stderr, "Unable to initialize a temporary server : %s\n", strerror(last_errno)); returncode = 1; } printf("EXITED\n"); @@ -452,7 +424,7 @@ static const char* GetHttpMessage(int statuscode) { #ifndef NO_WEBDAV static void proxytrack_add_DAV_Item(String *item, String *buff, const char* filename, - unsigned long int size, + size_t size, time_t timestamp, const char* mime, int isDir, @@ -485,7 +457,7 @@ static void proxytrack_add_DAV_Item(String *item, String *buff, } StringRoom(*item, 1024); - sprintf(StringBuff(*item), + sprintf(StringBuffRW(*item), "<response xmlns=\"DAV:\">\r\n" "<href>/webdav%s%s</href>\r\n" "<propstat>\r\n" @@ -644,17 +616,17 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char StringClear(buff); /* Canonize URL */ - StringStrcpy(url, file + ((file[0] == '/') ? 1 : 0)); + StringCopy(url, file + ((file[0] == '/') ? 1 : 0)); if (StringLength(url) > 0) { if (StringBuff(url)[StringLength(url) - 1] == '/') { - StringBuff(url)[StringLength(url) - 1] = '\0'; + StringBuffRW(url)[StringLength(url) - 1] = '\0'; StringLength(url)--; } } /* Form response */ StringRoom(response, 1024); - sprintf(StringBuff(response), + sprintf(StringBuffRW(response), "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n" "<multistatus xmlns=\"DAV:\">\r\n"); StringLength(response) = (int) strlen(StringBuff(response)); @@ -683,22 +655,22 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char /* Item URL */ StringRoom(itemUrl, thisUrlLen + prefixLen + sizeof("/webdav/") + 1); StringClear(itemUrl); - sprintf(StringBuff(itemUrl), "/%s/%s", prefix, thisUrl); + sprintf(StringBuffRW(itemUrl), "/%s/%s", prefix, thisUrl); if (!thisIsDir) StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)); else StringLength(itemUrl) = (int) strlen(StringBuff(itemUrl)) - 1; - StringBuff(itemUrl)[StringLength(itemUrl)] = '\0'; + StringBuffRW(itemUrl)[StringLength(itemUrl)] = '\0'; if (thisIsDir == isDir) { - unsigned long size = 0; + size_t size = 0; time_t timestamp = (time_t) 0; PT_Element file = NULL; /* Item stats */ if (!isDir) { file = PT_ReadIndex(indexes, StringBuff(itemUrl) + 1, FETCH_HEADERS); - if (file != NULL && file->statuscode == 200 ) { + if (file != NULL && file->statuscode == HTTP_OK ) { size = file->size; if (file->lastmodified) { timestamp = get_time_rfc822(file->lastmodified); @@ -735,7 +707,7 @@ static PT_Element proxytrack_process_DAV_Request(PT_Indexes indexes, const char } /* Depth > 0 */ /* End of responses */ - StringStrcat(response, + StringCat(response, "</multistatus>\r\n" ); @@ -767,7 +739,7 @@ static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char * int i, isDir; String html = STRING_EMPTY; StringClear(html); - StringStrcat(html, + StringCat(html, "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -787,28 +759,28 @@ static PT_Element proxytrack_process_HTTP_List(PT_Indexes indexes, const char * int thisIsDir = (thisUrl[thisUrlLen - 1] == '/') ? 1 : 0; if (thisIsDir == isDir) { if (isDir) - StringStrcat(html, "<tt>[DIR] "); + StringCat(html, "<tt>[DIR] "); else - StringStrcat(html, "<tt> "); - StringStrcat(html, "<a href=\""); + StringCat(html, "<tt> "); + StringCat(html, "<a href=\""); if (isDir) { - StringStrcat(html, "http://proxytrack/"); + StringCat(html, "http://proxytrack/"); } - StringStrcat(html, url); - StringStrcat(html, list[i]); - StringStrcat(html, "\">"); - StringStrcat(html, list[i]); - StringStrcat(html, "</a></tt><br />"); + StringCat(html, url); + StringCat(html, list[i]); + StringCat(html, "\">"); + StringCat(html, list[i]); + StringCat(html, "</a></tt><br />"); } } } - StringStrcat(html, + StringCat(html, "</body>" "</html>"); PT_Enumerate_Delete(&list); elt->size = StringLength(html); elt->adr = StringAcquire(&html); - elt->statuscode = 200; + elt->statuscode = HTTP_OK; strcpy(elt->charset, "iso-8859-1"); strcpy(elt->contenttype, "text/html"); strcpy(elt->msg, "OK"); @@ -843,10 +815,10 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { #endif StringRoom(localhost, 256); - if (gethostname(StringBuff(localhost), StringCapacity(localhost) - 1) == 0) { + if (gethostname(StringBuffRW(localhost), (int) StringCapacity(localhost) - 1) == 0) { StringLength(localhost) = (int) strlen(StringBuff(localhost)); } else { - StringStrcpy(localhost, "localhost"); + StringCopy(localhost, "localhost"); } #ifdef _DEBUG @@ -855,7 +827,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (buffer == NULL || line == NULL || line1 == NULL) { CRITICAL("proxytrack_process_HTTP:memory exhausted"); -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_c); #else close(soc_c); @@ -918,7 +890,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { char* chost = line + p; if (*chost == ' ') chost++; - StringStrcpy(host, chost); + StringCopy(host, chost); } #ifndef NO_WEBDAV else if ((p = strfield(line, "Depth: "))) { @@ -935,8 +907,8 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (length > 0) { if (length < 32768) { StringRoom(davRequest, length + 1); - if (recv(soc_c, StringBuff(davRequest), length, 0) == length) { - StringBuff(davRequest)[length] = 0; + if (recv(soc_c, StringBuffRW(davRequest), length, 0) == length) { + StringBuffRW(davRequest)[length] = 0; } else { msgCode = 500; msgError = "Posted Data Read Error"; @@ -970,9 +942,9 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { else if (strcasecmp(command, "options") == 0) { const char * options = "GET, HEAD, OPTIONS, POST, PROPFIND, TRACE" ", MKCOL, DELETE, PUT"; /* Not supported */ - msgCode = 200; + msgCode = HTTP_OK; StringRoom(headers, 8192); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" "DAV: 1, 2\r\n" "MS-Author-Via: DAV\r\n" @@ -1020,11 +992,10 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { /* Post-process request */ if (link_has_authority(surl)) { - const unsigned int prefixLen = sizeof("http://proxytrack/") - 1; - if (strncasecmp(surl, "http://proxytrack/", prefixLen) == 0) { + if (strncasecmp(surl, "http://proxytrack/", sizeof("http://proxytrack/") - 1) == 0) { directHit = 1; /* Another direct hit hack */ } - StringStrcpy(url, surl); + StringCopy(url, surl); } else { if (StringLength(host) > 0) { /* Direct hit */ @@ -1046,15 +1017,23 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } /* Direct hit */ directHit = 1; - StringStrcpy(url, ""); + StringCopy(url, ""); + if (!link_has_authority(toHit)) + StringCat(url, "http://"); + StringCat(url, toHit); + } else if (strncasecmp(surl, "/proxytrack/", sizeof("/proxytrack/") - 1) == 0) { + const char * toHit = surl + sizeof("/proxytrack/") - 1; + /* Direct hit */ + directHit = 1; + StringCopy(url, ""); if (!link_has_authority(toHit)) - StringStrcat(url, "http://"); - StringStrcat(url, toHit); + StringCat(url, "http://"); + StringCat(url, toHit); } else { /* Transparent proxy */ - StringStrcpy(url, "http://"); - StringStrcat(url, StringBuff(host)); - StringStrcat(url, surl); + StringCopy(url, "http://"); + StringCat(url, StringBuff(host)); + StringCat(url, surl); } } else { msgCode = 500; @@ -1073,7 +1052,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if ((element = proxytrack_process_DAV_Request(indexes, StringBuff(url), davDepth)) != NULL) { msgCode = element->statuscode; StringRoom(davHeaders, 1024); - sprintf(StringBuff(davHeaders), + sprintf(StringBuffRW(davHeaders), "DAV: 1, 2\r\n" "MS-Author-Via: DAV\r\n" "Cache-Control: private\r\n"); @@ -1100,7 +1079,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (element != NULL) { msgCode = element->statuscode; StringRoom(headers, 8192); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" #ifndef NO_WEBDAV "%s" @@ -1138,12 +1117,12 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } else { /* No query string, no ending / : check the the <url>/ page */ if (StringLength(url) > 0 && StringBuff(url)[StringLength(url) - 1] != '/' && strchr(StringBuff(url), '?') == NULL) { - StringStrcpy(urlRedirect, StringBuff(url)); - StringStrcat(urlRedirect, "/"); + StringCopy(urlRedirect, StringBuff(url)); + StringCat(urlRedirect, "/"); if (PT_LookupIndex(indexes, StringBuff(urlRedirect))) { msgCode = 301; /* Moved Permanently */ StringRoom(headers, 8192); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" "Content-Type: text/html\r\n" "Location: %s\r\n", @@ -1155,7 +1134,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { StringLength(headers) = (int) strlen(StringBuff(headers)); /* */ StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES)); - sprintf(StringBuff(output), + sprintf(StringBuffRW(output), "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -1195,14 +1174,14 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { msgError = GetHttpMessage(msgCode); } StringRoom(headers, 256); - sprintf(StringBuff(headers), + sprintf(StringBuffRW(headers), "HTTP/1.1 %d %s\r\n" "Content-type: text/html\r\n", msgCode, msgError); StringLength(headers) = (int) strlen(StringBuff(headers)); StringRoom(output, 1024 + sizeof(PROXYTRACK_COMMENT_HEADER) + sizeof(DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES)); - sprintf(StringBuff(output), + sprintf(StringBuffRW(output), "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -1225,7 +1204,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } { char tmp[20 + 1]; /* 2^64 = 18446744073709551616 */ - unsigned int dataSize = 0; + size_t dataSize = 0; if (!headRequest) { dataSize = StringLength(output); if (dataSize == 0 && element != NULL) { @@ -1233,30 +1212,30 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } } sprintf(tmp, "%d", (int) dataSize); - StringStrcat(headers, "Content-length: "); - StringStrcat(headers, tmp); - StringStrcat(headers, "\r\n"); + StringCat(headers, "Content-length: "); + StringCat(headers, tmp); + StringCat(headers, "\r\n"); } if (keepAlive) { - StringStrcat(headers, + StringCat(headers, "Connection: Keep-Alive\r\n" "Proxy-Connection: Keep-Alive\r\n"); } else { - StringStrcat(headers, + StringCat(headers, "Connection: Close\r\n" "Proxy-Connection: Close\r\n"); } if (msgCode != 500) - StringStrcat(headers, "X-Cache: HIT from "); + StringCat(headers, "X-Cache: HIT from "); else - StringStrcat(headers, "X-Cache: MISS from "); - StringStrcat(headers, StringBuff(localhost)); - StringStrcat(headers, "\r\n"); + StringCat(headers, "X-Cache: MISS from "); + StringCat(headers, StringBuff(localhost)); + StringCat(headers, "\r\n"); /* Logging */ { const char * contentType = "text/html"; - unsigned long int size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 ); + size_t size = StringLength(output) ? StringLength(output) : ( element ? element->size : 0 ); /* */ String ip = STRING_EMPTY; SOCaddr serverClient; @@ -1265,7 +1244,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { if (getsockname(soc_c, (struct sockaddr*) &serverClient, &lenServerClient) == 0) { ip = getip(&serverClient, lenServerClient); } else { - StringStrcpy(ip, "unknown"); + StringCopy(ip, "unknown"); } if (element != NULL && element->contenttype[0] != '\0') { contentType = element->contenttype; @@ -1275,11 +1254,11 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } /* Send reply */ - StringStrcat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n"); - StringStrcat(headers, "\r\n"); /* Headers separator */ - if (send(soc_c, StringBuff(headers), StringLength(headers), 0) != StringLength(headers) - || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), StringLength(output), 0) != StringLength(output)) - || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, element->size, 0) != element->size) + StringCat(headers, "Server: ProxyTrack " PROXYTRACK_VERSION " (HTTrack " HTTRACK_VERSIONID ")\r\n"); + StringCat(headers, "\r\n"); /* Headers separator */ + if (send(soc_c, StringBuff(headers), (int)StringLength(headers), 0) != StringLength(headers) + || ( !headRequest && StringLength(output) > 0 && send(soc_c, StringBuff(output), (int)StringLength(output), 0) != StringLength(output)) + || ( !headRequest && StringLength(output) == 0 && element != NULL && element->adr != NULL && send(soc_c, element->adr, (int)element->size, 0) != element->size) ) { keepAlive = 0; /* Error, abort connection */ @@ -1298,7 +1277,7 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { } } while(keepAlive); -#if HTS_WIN +#ifdef _WIN32 closesocket(soc_c); #else close(soc_c); @@ -1314,19 +1293,8 @@ static void proxytrack_process_HTTP(PT_Indexes indexes, T_SOC soc_c) { free(buffer); } -#ifdef _WIN32 -#define PTHREAD_RETURN -#define PTHREAD_TYPE void -#define PTHREAD_TYPE_FNC __cdecl -#else -#define PTHREAD_RETURN NULL -#define PTHREAD_TYPE void* -#define PTHREAD_TYPE_FNC -#endif - /* Generic threaded function start */ -static int startThread(PTHREAD_TYPE (PTHREAD_TYPE_FNC * funct)(void* ), - void* param) +static int startThread(void (*funct)(void*), void* param) { if (param != NULL) { #ifdef _WIN32 @@ -1361,14 +1329,14 @@ typedef struct proxytrack_process_th_p { } proxytrack_process_th_p; /* Generic socket/index function stub */ -static PTHREAD_TYPE PTHREAD_TYPE_FNC proxytrack_process_th(void* param_) { +static void proxytrack_process_th(void* param_) { proxytrack_process_th_p *param = (proxytrack_process_th_p *) param_; T_SOC soc_c = param->soc_c; PT_Indexes indexes = param->indexes; void (*process)(PT_Indexes indexes, T_SOC soc_c) = param->process; free(param); process(indexes, soc_c); - return PTHREAD_RETURN ; + return ; } /* Process generic socket/index operation */ @@ -1400,7 +1368,7 @@ static int proxytrack_start_HTTP(PT_Indexes indexes, T_SOC soc) { struct sockaddr clientAddr; int clientAddrLen = sizeof(struct sockaddr); memset(&clientAddr, 0, sizeof(clientAddr)); - if ( (soc_c = accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) { + if ( (soc_c = (T_SOC) accept(soc, &clientAddr, &clientAddrLen)) != INVALID_SOCKET) { if (!proxytrack_process_HTTP_threaded(indexes, soc_c)) { CRITICAL("proxytrack_start_HTTP::Can not fork a thread"); } @@ -1587,7 +1555,7 @@ static int proxytrack_start_ICP(PT_Indexes indexes, T_SOC soc) { if (lenServerClient > 0) { ip = getip(&serverClient, lenServerClient); } else { - StringStrcpy(ip, "unknown"); + StringCopy(ip, "unknown"); } LOG("ICP %s %s/%s %s" _ StringBuff(ip) _ LogRequest _ LogReply _ (UrlRequest ? UrlRequest : "-") ); StringFree(ip); diff --git a/src/proxy/proxytrack.h b/src/proxy/proxytrack.h index 498f4d8..737c5ea 100644 --- a/src/proxy/proxytrack.h +++ b/src/proxy/proxytrack.h @@ -29,13 +29,26 @@ Please visit our Website: http://www.httrack.com #define WEBHTTRACK_PROXYTRACK /* Version */ -#define PROXYTRACK_VERSION "0.4" +#define PROXYTRACK_VERSION "0.5" /* Store manager */ #include "../minizip/mztools.h" #include "store.h" #include <sys/stat.h> +#ifndef HTS_DO_NOT_USE_FTIME +#ifdef _WIN32 +#include <sys/utime.h> +#else +#include <utime.h> +#endif +#include <sys/timeb.h> +#else +#include <utime.h> +#endif +#ifndef _WIN32 +#include <pthread.h> +#endif /* generic */ @@ -233,23 +246,6 @@ static int linput_trim(FILE* fp,char* s,int max) { return rlen; } -// copy of concat -#define HTS_URLMAXSIZE 1024 -typedef struct concat_strc { - char buff[16][HTS_URLMAXSIZE*2*2]; - int rol; -} concat_strc; -static char* concat(const char* a,const char* b) { - static concat_strc* strc = NULL; - if (strc == NULL) { - strc = (concat_strc*) calloc(16, sizeof(concat_strc)); - } - strc->rol=((strc->rol+1)%16); // roving pointer - strcpy(strc->buff[strc->rol],a); - if (b) strcat(strc->buff[strc->rol],b); - return strc->buff[strc->rol]; -} - #ifndef S_ISREG #define S_ISREG(m) ((m) & _S_IFREG) #endif @@ -264,25 +260,135 @@ static int fexist(char* s) { return 0; } -#ifndef _WIN32 -#define fconv(a) (a) -#define fconcat(a,b) concat(a,b) -#endif - -#ifdef _WIN32 -static char* __fconv(char* a) { +/* convertir une chaine en temps */ +static void set_lowcase(char* s) { int i; - for(i=0;i<(int) strlen(a);i++) - if (a[i]=='/') // convertir - a[i]='\\'; - return a; + for(i=0;i<(int) strlen(s);i++) + if ((s[i]>='A') && (s[i]<='Z')) + s[i]+=('a'-'A'); } -static char* fconcat(char* a,char* b) { - return __fconv(concat(a,b)); +static struct tm* convert_time_rfc822(struct tm *result,const char* s) { + char months[]="jan feb mar apr may jun jul aug sep oct nov dec"; + char str[256]; + char* a; + /* */ + int result_mm=-1; + int result_dd=-1; + int result_n1=-1; + int result_n2=-1; + int result_n3=-1; + int result_n4=-1; + /* */ + + if ((int) strlen(s) > 200) + return NULL; + strcpy(str,s); + set_lowcase(str); + /* éliminer :,- */ + while( (a=strchr(str,'-')) ) *a=' '; + while( (a=strchr(str,':')) ) *a=' '; + while( (a=strchr(str,',')) ) *a=' '; + /* tokeniser */ + a=str; + while(*a) { + char *first, *last; + char tok[256]; + /* découper mot */ + while(*a==' ') a++; /* sauter espaces */ + first=a; + while((*a) && (*a!=' ')) a++; + last=a; + tok[0]='\0'; + if (first!=last) { + char* pos; + strncat(tok,first,(int) (last - first)); + /* analyser */ + if ( (pos=strstr(months,tok)) ) { /* month always in letters */ + result_mm=((int) (pos - months))/4; + } else { + int number; + if (sscanf(tok,"%d",&number) == 1) { /* number token */ + if (result_dd<0) /* day always first number */ + result_dd=number; + else if (result_n1<0) + result_n1=number; + else if (result_n2<0) + result_n2=number; + else if (result_n3<0) + result_n3=number; + else if (result_n4<0) + result_n4=number; + } /* sinon, bruit de fond(+1GMT for exampel) */ + } + } + } + if ((result_n1>=0) && (result_mm>=0) && (result_dd>=0) && (result_n2>=0) && (result_n3>=0) && (result_n4>=0)) { + if (result_n4>=1000) { /* Sun Nov 6 08:49:37 1994 */ + result->tm_year=result_n4-1900; + result->tm_hour=result_n1; + result->tm_min=result_n2; + result->tm_sec=max(result_n3,0); + } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */ + result->tm_hour=result_n2; + result->tm_min=result_n3; + result->tm_sec=max(result_n4,0); + if (result_n1<=50) /* 00 means 2000 */ + result->tm_year=result_n1+100; + else if (result_n1<1000) /* 99 means 1999 */ + result->tm_year=result_n1; + else /* 2000 */ + result->tm_year=result_n1-1900; + } + result->tm_isdst=0; /* assume GMT */ + result->tm_yday=-1; /* don't know */ + result->tm_wday=-1; /* don't know */ + result->tm_mon=result_mm; + result->tm_mday=result_dd; + return result; + } + return NULL; } -static char* fconv(char* a) { - return __fconv(concat(a,"")); +static struct tm PT_GetTime(time_t t) { + struct tm tmbuf; +#ifdef _WIN32 + struct tm * tm = gmtime(&t); +#else + struct tm * tm = gmtime_r(&t, &tmbuf); +#endif + if (tm != NULL) + return *tm; + else { + memset(&tmbuf, 0, sizeof(tmbuf)); + return tmbuf; + } } +static int set_filetime(const char* file, struct tm* tm_time) { + struct utimbuf tim; +#ifndef HTS_DO_NOT_USE_FTIME + struct timeb B; + memset(&B, 0, sizeof(B)); + B.timezone=0; + ftime( &B ); + tim.actime = tim.modtime = mktime(tm_time) - B.timezone*60; +#else + // bogus time (GMT/local).. + tim.actime=tim.modtime=mktime(tm_time); #endif + return utime(file, &tim); +} +static int set_filetime_time_t(const char* file, time_t t) { + if (t != (time_t) 0 && t != (time_t) -1) { + struct tm tm = PT_GetTime(t); + return set_filetime(file, &tm); + } + return -1; +} +static int set_filetime_rfc822(const char* file, const char* date) { + struct tm buffer; + struct tm* tm_s = convert_time_rfc822(&buffer,date); + if (tm_s) { + return set_filetime(file,tm_s); + } else return -1; +} #endif diff --git a/src/proxy/store.c b/src/proxy/store.c index 1d17574..b8233a8 100644 --- a/src/proxy/store.c +++ b/src/proxy/store.c @@ -20,6 +20,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Please visit our Website: http://www.httrack.com */ +/* Parts (inside ARC format routines) by Lars Clausen (lc@statsbiblioteket.dk) */ + /* ------------------------------------------------------------ */ /* File: Cache manager for ProxyTrack */ /* Author: Xavier Roche */ @@ -28,6 +30,7 @@ Please visit our Website: http://www.httrack.com #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> /* Locking */ #ifdef _WIN32 @@ -40,8 +43,10 @@ Please visit our Website: http://www.httrack.com #define HTS_INTERNAL_BYTECODE #include "htsinthash.h" +#include "htsmd5.h" #undef HTS_INTERNAL_BYTECODE #include "../minizip/mztools.h" +#include "../minizip/zip.h" #include "htscore.h" #include "htsback.h" @@ -58,6 +63,8 @@ static PT_Element PT_ReadCache__New_u(PT_Index index, const char* url, int flags static int PT_LookupCache__Old_u(PT_Index index, const char* url); static PT_Element PT_ReadCache__Old_u(PT_Index index, const char* url, int flags); +static int PT_LookupCache__Arc_u(PT_Index index, const char* url); +static PT_Element PT_ReadCache__Arc_u(PT_Index index, const char* url, int flags); /* Locking */ @@ -100,10 +107,12 @@ void MutexFree(PT_Mutex *pMutex) { typedef struct _PT_Index__New _PT_Index__New; typedef struct _PT_Index__Old _PT_Index__Old; +typedef struct _PT_Index__Arc _PT_Index__Arc; typedef struct _PT_Index_Functions _PT_Index_Functions; typedef struct _PT_Index__New *PT_Index__New; typedef struct _PT_Index__Old *PT_Index__Old; +typedef struct _PT_Index__Arc *PT_Index__Arc; typedef struct _PT_Index_Functions *PT_Index_Functions; enum { @@ -111,29 +120,42 @@ enum { PT_CACHE_MIN = 0, PT_CACHE__NEW = PT_CACHE_MIN, PT_CACHE__OLD, - PT_CACHE_MAX = PT_CACHE__OLD + PT_CACHE__ARC, + PT_CACHE_MAX = PT_CACHE__ARC }; static int PT_LoadCache__New(PT_Index index, const char *filename); static void PT_Index_Delete__New(PT_Index *pindex); static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags); static int PT_LookupCache__New(PT_Index index, const char* url); +static int PT_SaveCache__New(PT_Indexes indexes, const char *filename); /**/ static int PT_LoadCache__Old(PT_Index index, const char *filename); static void PT_Index_Delete__Old(PT_Index *pindex); static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags); static int PT_LookupCache__Old(PT_Index index, const char* url); +/**/ +static int PT_LoadCache__Arc(PT_Index index, const char *filename); +static void PT_Index_Delete__Arc(PT_Index *pindex); +static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags); +static int PT_LookupCache__Arc(PT_Index index, const char* url); +static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename); struct _PT_Index_Functions { + /* Mandatory services */ int (*PT_LoadCache)(PT_Index index, const char *filename); void (*PT_Index_Delete)(PT_Index *pindex); PT_Element (*PT_ReadCache)(PT_Index index, const char* url, int flags); int (*PT_LookupCache)(PT_Index index, const char* url); + + /* Optional services */ + int (*PT_SaveCache)(PT_Indexes indexes, const char *filename); }; static _PT_Index_Functions _IndexFuncts[] = { - { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New }, - { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old }, + { PT_LoadCache__New, PT_Index_Delete__New, PT_ReadCache__New, PT_LookupCache__New, PT_SaveCache__New }, + { PT_LoadCache__Old, PT_Index_Delete__Old, PT_ReadCache__Old, PT_LookupCache__Old, NULL }, + { PT_LoadCache__Arc, PT_Index_Delete__Arc, PT_ReadCache__Arc, PT_LookupCache__Arc, PT_SaveCache__Arc }, { NULL, NULL, NULL, NULL } }; @@ -164,11 +186,22 @@ struct _PT_Index__Old { int safeCache; }; +struct _PT_Index__Arc { + PT_INDEX_COMMON_STRUCTURE; + FILE *file; + PT_Mutex fileLock; + int version; + char lastmodified[1024]; + char line[2048]; + char filenameIndexBuff[2048]; +}; + struct _PT_Index { int type; union { _PT_Index__New formatNew; _PT_Index__Old formatOld; + _PT_Index__Arc formatArc; struct { PT_INDEX_COMMON_STRUCTURE; } common; @@ -194,7 +227,7 @@ struct _PT_Cache { int count; }; -PT_Indexes PT_New() { +PT_Indexes PT_New(void) { PT_Indexes index = (PT_Indexes) calloc(sizeof(_PT_Indexes), 1); index->cil = inthash_new(127); index->index_size = 0; @@ -301,6 +334,16 @@ static void PT_Index_Delete__Old(PT_Index *pindex) { } } +static void PT_Index_Delete__Arc(PT_Index *pindex) { + if (pindex != NULL && (*pindex) != NULL) { + PT_Index__Arc index = &(*pindex)->slots.formatArc; + if (index->file != NULL) { + fclose(index->file); + } + MutexFree(&index->fileLock); + } +} + int PT_AddIndex(PT_Indexes indexes, const char *path) { PT_Index index = PT_LoadCache(path); if (index != NULL) { @@ -319,7 +362,7 @@ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) { int i; String html = STRING_EMPTY; StringClear(html); - StringStrcat(html, + StringCat(html, "<html>" PROXYTRACK_COMMENT_HEADER DISABLE_IE_FRIENDLY_HTTP_ERROR_MESSAGES @@ -330,26 +373,26 @@ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes) { "<h3>Available sites in this cache:</h3><br />" "<br />" ); - StringStrcat(html, "<ul>\r\n"); + StringCat(html, "<ul>\r\n"); for(i = 0 ; i < indexes->index_size ; i++) { if (indexes->index[i] != NULL && indexes->index[i]->slots.common.startUrl[0] != '\0') { const char * url = indexes->index[i]->slots.common.startUrl; - StringStrcat(html, "<li>\r\n"); - StringStrcat(html, "<a href=\""); - StringStrcat(html, url); - StringStrcat(html, "\">"); - StringStrcat(html, url); - StringStrcat(html, "</a>\r\n"); - StringStrcat(html, "</li>\r\n"); + StringCat(html, "<li>\r\n"); + StringCat(html, "<a href=\""); + StringCat(html, url); + StringCat(html, "\">"); + StringCat(html, url); + StringCat(html, "</a>\r\n"); + StringCat(html, "</li>\r\n"); } } - StringStrcat(html, "</ul>\r\n"); - StringStrcat(html, "</body></html>\r\n"); + StringCat(html, "</ul>\r\n"); + StringCat(html, "</body></html>\r\n"); elt->size = StringLength(html); elt->adr = StringAcquire(&html); - elt->statuscode = 200; + elt->statuscode = HTTP_OK; strcpy(elt->charset, "iso-8859-1"); strcpy(elt->contenttype, "text/html"); strcpy(elt->msg, "OK"); @@ -404,9 +447,9 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { char* ptr = NULL; ptr += StringLength(list); if (len > 0) - StringStrcat(list, StringBuff(subitem)); + StringCat(list, StringBuff(subitem)); if (isFolder) - StringStrcat(list, "/"); + StringCat(list, "/"); StringMemcat(list, "\0", 1); /* NULL terminated strings */ StringMemcat(listindexes, &ptr, sizeof(ptr)); listCount++; @@ -434,7 +477,7 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { StringMemcat(listindexes, StringBuff(list), StringLength(list)); /* ---- no reallocation beyond this point (fixed addresses) ---- */ /* start of all strings (pointer) */ - startStrings = (startStrings - nullPointer) + StringBuff(listindexes); + startStrings = (startStrings - nullPointer) + StringBuffRW(listindexes); /* transform indexes into references */ for(i = 0 ; i < listCount ; i++) { char *ptr = NULL; @@ -442,7 +485,7 @@ char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree) { memcpy(&ptr, &StringBuff(listindexes)[i*sizeof(char*)], sizeof(char*)); ndx = (unsigned int) (ptr - nullPointer); ptr = startStrings + ndx; - memcpy(&StringBuff(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*)); + memcpy(&StringBuffRW(listindexes)[i*sizeof(char*)], &ptr, sizeof(char*)); } blk = StringAcquire(&listindexes); StringFree(list); @@ -460,16 +503,22 @@ void PT_Enumerate_Delete(char ***plist) { } } -PT_Index PT_LoadCache(const char *filename) { - int type = PT_CACHE_UNDEFINED; +static int PT_GetType(const char *filename) { char * dot = strrchr(filename, '.'); if (dot != NULL) { if (strcasecmp(dot, ".zip") == 0) { - type = PT_CACHE__NEW; + return PT_CACHE__NEW; } else if (strcasecmp(dot, ".ndx") == 0 || strcasecmp(dot, ".dat") == 0) { - type = PT_CACHE__OLD; + return PT_CACHE__OLD; + } else if (strcasecmp(dot, ".arc") == 0) { + return PT_CACHE__ARC; } } + return PT_CACHE_UNDEFINED; +} + +PT_Index PT_LoadCache(const char *filename) { + int type = PT_GetType(filename); if (type != PT_CACHE_UNDEFINED) { PT_Index index = calloc(sizeof(_PT_Index), 1); if (index != NULL) { @@ -524,6 +573,44 @@ int PT_LookupCache(PT_Index index, const char* url) { return 0; } +int PT_SaveCache(PT_Indexes indexes, const char *filename) { + int type = PT_GetType(filename); + if (type != PT_CACHE_UNDEFINED) { + if (_IndexFuncts[type].PT_SaveCache != NULL) { + int ret = _IndexFuncts[type].PT_SaveCache(indexes, filename); + if (ret == 0) { + (void) set_filetime_time_t(filename, PT_GetTimeIndex(indexes)); + return 0; + } + } + } + return -1; +} + +int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg) { + if (indexes != NULL && indexes->cil != NULL) { + struct_inthash_enum en = inthash_enum_new(indexes->cil); + inthash_chain* chain; + while((chain = inthash_enum_next(&en))) { + const long int index_id = (long int)chain->value.intg; + const char *const url = chain->name; + if (index_id >= 0 && index_id <= indexes->index_size) { + PT_Element item = PT_ReadCache(indexes->index[index_id], url, FETCH_HEADERS | FETCH_BODY); + if (item != NULL) { + int ret = callback(arg, url, item); + PT_Element_Delete(&item); + if (ret != 0) + return ret; + } + } else { + CRITICAL("PT_ReadCache:Corrupted central index locator"); + return -1; + } + } + } + return 0; +} + time_t PT_Index_Timestamp(PT_Index index) { return index->slots.common.timestamp; } @@ -569,8 +656,8 @@ int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex) while((chain = inthash_enum_next(&en)) != NULL) { const char * url = chain->name; if (url != NULL && url[0] != '\0') { - long int previous_index_id = 0; - if (inthash_read(indexes->cil, url, (long int*)&previous_index_id)) { + intptr_t previous_index_id = 0; + if (inthash_read(indexes->cil, url, &previous_index_id)) { if (previous_index_id >= 0 && previous_index_id < indexes->index_size) { if (indexes->index[previous_index_id]->slots.common.timestamp > index->slots.common.timestamp) // existing entry is newer break; @@ -616,14 +703,14 @@ PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags) { if (indexes != NULL) { - long int index_id; + intptr_t index_id; if (strncmp(url, "http://", 7) == 0) url += 7; if (inthash_read(indexes->cil, url, &index_id)) { if (index_id >= 0 && index_id <= indexes->index_size) { PT_Element item = PT_ReadCache(indexes->index[index_id], url, flags); if (item != NULL) { - item->indexId = index_id; + item->indexId = (int) index_id; return item; } } else { @@ -637,7 +724,7 @@ PT_Element PT_ReadIndex(PT_Indexes indexes, const char* url, int flags) int PT_LookupIndex(PT_Indexes indexes, const char* url) { if (indexes != NULL) { - long int index_id; + intptr_t index_id; if (strncmp(url, "http://", 7) == 0) url += 7; if (inthash_read(indexes->cil, url, &index_id)) { @@ -651,6 +738,22 @@ int PT_LookupIndex(PT_Indexes indexes, const char* url) { return 0; } +time_t PT_GetTimeIndex(PT_Indexes indexes) { + if (indexes != NULL && indexes->index_size > 0) + { + int i; + time_t maxt = indexes->index[0]->slots.common.timestamp; + for(i = 1 ; i < indexes->index_size ; i++) { + const time_t currt = indexes->index[i]->slots.common.timestamp; + if (currt > maxt) { + maxt = currt; + } + } + return maxt; + } + return (time_t) -1; +} + PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) { if (indexes != NULL && indexId >= 0 && indexId < indexes->index_size) { @@ -659,7 +762,7 @@ PT_Index PT_GetIndex(PT_Indexes indexes, int indexId) { return NULL; } -PT_Element PT_ElementNew() { +PT_Element PT_ElementNew(void) { PT_Element r = NULL; if ((r = calloc(sizeof(_PT_Element), 1)) == NULL) return NULL; @@ -690,6 +793,22 @@ static PT_Element PT_ReadCache__New(PT_Index index, const char* url, int flags) /* New HTTrack cache (new.zip) format */ /* ------------------------------------------------------------ */ +#define ZIP_FIELD_STRING(headers, headersSize, field, value) do { \ + if ( (value != NULL) && (value)[0] != '\0') { \ + sprintf(headers + headersSize, "%s: %s\r\n", field, (value != NULL) ? (value) : ""); \ + (headersSize) += (int) strlen(headers + headersSize); \ + } \ +} while(0) +#define ZIP_FIELD_INT(headers, headersSize, field, value) do { \ + if ( (value != 0) ) { \ + sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \ + (headersSize) += (int) strlen(headers + headersSize); \ + } \ +} while(0) +#define ZIP_FIELD_INT_FORCE(headers, headersSize, field, value) do { \ + sprintf(headers + headersSize, "%s: "LLintP"\r\n", field, (LLint)(value)); \ + (headersSize) += (int) strlen(headers + headersSize); \ +} while(0) #define ZIP_READFIELD_STRING(line, value, refline, refvalue) do { \ if (line[0] != '\0' && strfield2(line, refline)) { \ strcpy(refvalue, value); \ @@ -821,7 +940,8 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag char location_default[HTS_URLMAXSIZE*2]; char previous_save[HTS_URLMAXSIZE*2]; char previous_save_[HTS_URLMAXSIZE*2]; - long int hash_pos; + char catbuff[CATBUFF_SIZE]; + intptr_t hash_pos; int hash_pos_return; PT_Element r = NULL; if (index == NULL || index->hash == NULL || index->zFile == NULL || url == NULL || *url == 0) @@ -835,7 +955,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag strcpy(r->location, ""); if (strncmp(url, "http://", 7) == 0) url += 7; - hash_pos_return = inthash_read(index->hash, url, (long int*)&hash_pos); + hash_pos_return = inthash_read(index->hash, url, &hash_pos); if (hash_pos_return) { uLong posInZip; @@ -888,6 +1008,16 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag //ZIP_READFIELD_STRING(line, value, "X-Addr", ..); // Original address //ZIP_READFIELD_STRING(line, value, "X-Fil", ..); // Original URI filename ZIP_READFIELD_STRING(line, value, "X-Save", previous_save_); // Original save filename + if (line[0] != '\0') { + int len = r->headers ? ((int) strlen(r->headers)) : 0; + int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 ); + r->headers = realloc(r->headers, len + nlen); + r->headers[len] = '\0'; + strcat(r->headers, line); + strcat(r->headers, ": "); + strcat(r->headers, value); + strcat(r->headers, "\r\n"); + } } } while(offset < readSizeHeader && !lineEof); totalHeader = offset; @@ -955,13 +1085,14 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag /* Read in memory from cache */ if (flags & FETCH_BODY) { if (strnotempty(previous_save)) { - FILE* fp = fopen(fconv(previous_save), "rb"); + FILE* fp = fopen(fconv(catbuff,previous_save), "rb"); if (fp != NULL) { r->adr = (char*) malloc(r->size + 4); if (r->adr != NULL) { if (r->size > 0 && fread(r->adr, 1, r->size, fp) != r->size) { + int last_errno = errno; r->statuscode=STATUSCODE_INVALID; - sprintf(r->msg,"Read error in cache disk data: %s", strerror(errno)); + sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno)); } } else { r->statuscode=STATUSCODE_INVALID; @@ -970,7 +1101,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag fclose(fp); } else { r->statuscode=STATUSCODE_INVALID; - sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(previous_save)); + sprintf(r->msg, "Read error (can't open '%s') from cache", fconv(catbuff,previous_save)); } } else { r->statuscode=STATUSCODE_INVALID; @@ -982,7 +1113,7 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag if (flags & FETCH_BODY) { r->adr=(char*) malloc(r->size+1); if (r->adr!=NULL) { - if (unzReadCurrentFile(index->zFile, r->adr, r->size) != r->size) { // erreur + if (unzReadCurrentFile(index->zFile, r->adr, (unsigned int) r->size) != r->size) { // erreur free(r->adr); r->adr=NULL; r->statuscode=STATUSCODE_INVALID; @@ -1024,6 +1155,121 @@ static PT_Element PT_ReadCache__New_u(PT_Index index_, const char* url, int flag return r; } +static int PT_SaveCache__New_Fun(void *arg, const char *url, PT_Element element) { + zipFile zFileOut = (zipFile) arg; + char headers[8192]; + int headersSize; + zip_fileinfo fi; + int zErr; + const char *url_adr = ""; + const char *url_fil = ""; + + headers[0] = '\0'; + headersSize = 0; + + /* Fields */ + headers[0] = '\0'; + headersSize = 0; + /* */ + { + char* message; + if (strlen(element->msg) < 32) { + message = element->msg; + } else { + message = "(See X-StatusMessage)"; + } + /* 64 characters MAX for first line */ + sprintf(headers + headersSize, "HTTP/1.%c %d %s\r\n", '1', element->statuscode, element->msg); + } + headersSize += (int) strlen(headers + headersSize); + + /* Second line MUST ALWAYS be X-In-Cache */ + ZIP_FIELD_INT_FORCE(headers, headersSize, "X-In-Cache", 1); + ZIP_FIELD_INT(headers, headersSize, "X-StatusCode", element->statuscode); + ZIP_FIELD_STRING(headers, headersSize, "X-StatusMessage", element->msg); + ZIP_FIELD_INT(headers, headersSize, "X-Size", element->size); // size + ZIP_FIELD_STRING(headers, headersSize, "Content-Type", element->contenttype); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "X-Charset", element->charset); // contenttype + ZIP_FIELD_STRING(headers, headersSize, "Last-Modified", element->lastmodified); // last-modified + ZIP_FIELD_STRING(headers, headersSize, "Etag", element->etag); // Etag + ZIP_FIELD_STRING(headers, headersSize, "Location", element->location); // 'location' pour moved + ZIP_FIELD_STRING(headers, headersSize, "Content-Disposition", element->cdispo); // Content-disposition + ZIP_FIELD_STRING(headers, headersSize, "X-Addr", url_adr); // Original address + ZIP_FIELD_STRING(headers, headersSize, "X-Fil", url_fil); // Original URI filename + ZIP_FIELD_STRING(headers, headersSize, "X-Save", ""); // Original save filename + + /* Time */ + memset(&fi, 0, sizeof(fi)); + if (element->lastmodified[0] != '\0') { + struct tm buffer; + struct tm* tm_s = convert_time_rfc822(&buffer, element->lastmodified); + if (tm_s) { + fi.tmz_date.tm_sec = (uInt) tm_s->tm_sec; + fi.tmz_date.tm_min = (uInt) tm_s->tm_min; + fi.tmz_date.tm_hour = (uInt) tm_s->tm_hour; + fi.tmz_date.tm_mday = (uInt) tm_s->tm_mday; + fi.tmz_date.tm_mon = (uInt) tm_s->tm_mon; + fi.tmz_date.tm_year = (uInt) tm_s->tm_year; + } + } + + /* Open file - NOTE: headers in "comment" */ + if ((zErr = zipOpenNewFileInZip(zFileOut, + url, + &fi, + /* + Store headers in realtime in the local file directory as extra field + In case of crash, we'll be able to recover the whole ZIP file by rescanning it + */ + headers, + (uInt) strlen(headers), + NULL, + 0, + NULL, /* comment */ + Z_DEFLATED, + Z_DEFAULT_COMPRESSION)) != Z_OK) + { + int zip_zipOpenNewFileInZip_failed = 0; + assertf(zip_zipOpenNewFileInZip_failed); + } + + /* Write data in cache */ + if (element->size > 0 && element->adr != NULL) { + if ((zErr = zipWriteInFileInZip(zFileOut, element->adr, (int) element->size)) != Z_OK) { + int zip_zipWriteInFileInZip_failed = 0; + assertf(zip_zipWriteInFileInZip_failed); + } + } + + /* Close */ + if ((zErr = zipCloseFileInZip(zFileOut)) != Z_OK) { + int zip_zipCloseFileInZip_failed = 0; + assertf(zip_zipCloseFileInZip_failed); + } + + /* Flush */ + if ((zErr = zipFlush(zFileOut)) != 0) { + int zip_zipFlush_failed = 0; + assertf(zip_zipFlush_failed); + } + + return 0; +} + +static int PT_SaveCache__New(PT_Indexes indexes, const char *filename) { + zipFile zFileOut = zipOpen(filename, 0); + if (zFileOut != NULL) { + int ret = PT_EnumCache(indexes, PT_SaveCache__New_Fun, (void *) zFileOut); + zipClose(zFileOut, "Created by HTTrack Website Copier/ProxyTrack "PROXYTRACK_VERSION); + zFileOut = NULL; + if (ret != 0) + (void) unlink(filename); + return ret; + } + return -1; +} + + /* ------------------------------------------------------------ */ /* Old HTTrack cache (dat/ndx) format */ @@ -1167,7 +1413,7 @@ static int PT_LoadCache__Old(PT_Index index_, const char *filename) { /* */ } else { // Vieille version du cache /* */ - // fspc(opt->log,"warning"); fprintf(opt->log,"Cache: importing old cache format"LF); + // HTS_LOG(opt,LOG_WARNING); fprintf(opt->log,"Cache: importing old cache format"LF); cache->version=0; // cache 1.0 strcpy(cache->lastmodified,firstline); } @@ -1257,7 +1503,7 @@ static PT_Element PT_ReadCache__Old(PT_Index index, const char* url, int flags) static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flags) { PT_Index__Old cache = (PT_Index__Old) &index_->slots.formatOld; - long int hash_pos; + intptr_t hash_pos; int hash_pos_return; char location_default[HTS_URLMAXSIZE*2]; char previous_save[HTS_URLMAXSIZE*2]; @@ -1276,7 +1522,7 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag strcpy(r->location, ""); if (strncmp(url, "http://", 7) == 0) url += 7; - hash_pos_return=inthash_read(cache->hash, url, (long int*)&hash_pos); + hash_pos_return=inthash_read(cache->hash, url, &hash_pos); if (hash_pos_return) { int pos = (int) hash_pos; /* simply */ @@ -1325,10 +1571,12 @@ static PT_Element PT_ReadCache__Old_u(PT_Index index_, const char* url, int flag } else { char check[256]; unsigned long size_read; + unsigned long int size_; check[0]='\0'; // cache_rint(cache->dat,&r->statuscode); - cache_rLLint(cache->dat,&r->size); + cache_rLLint(cache->dat,&size_); + r->size = (size_t) size_; cache_rstr(cache->dat,r->msg); cache_rstr(cache->dat,r->contenttype); if (cache->version >= 3) @@ -1503,3 +1751,497 @@ static int PT_LookupCache__Old_u(PT_Index index_, const char* url) { return 0; } + +/* ------------------------------------------------------------ */ +/* Internet Archive Arc 1.0 (arc) format */ +/* Xavier Roche (roche@httrack.com) */ +/* Lars Clausen (lc@statsbiblioteket.dk) */ +/* ------------------------------------------------------------ */ + +#define ARC_SP ' ' + +static const char* getArcField(const char *line, int pos) { + int i; + for(i = 0 ; line[i] != '\0' && pos > 0 ; i++) { + if (line[i] == ARC_SP) + pos--; + } + if (pos == 0) + return &line[i]; + return NULL; +} + +static char* copyArcField(const char *line, int npos, char *dest, int destMax) { + const char *pos; + if ((pos = getArcField(line, npos)) != NULL) { + int i; + for(i = 0 ; pos[i] != '\0' && pos[i] != ARC_SP && ( --destMax ) > 0; i++) { + dest[i] = pos[i]; + } + dest[i] = 0; + return dest; + } + dest[0] = 0; + return NULL; +} + +static int getArcLength(const char *line) { + const char *pos; + if ((pos = getArcField(line, 9)) != NULL + || (pos = getArcField(line, 4)) != NULL + || (pos = getArcField(line, 2)) != NULL + ) { + int length; + if (sscanf(pos, "%d", &length) == 1) { + return length; + } + } + return -1; +} + +static int skipArcNl(FILE* file) { + if (fgetc(file) == 0x0a) { + return 0; + } + return -1; +} + +static int skipArcData(FILE* file, const char *line) { + int jump = getArcLength(line); + if (jump != -1) { + if (fseek(file, jump, SEEK_CUR) == 0 /* && skipArcNl(file) == 0 */) { + return 0; + } + } + return -1; +} + +static int getDigit(const char digit) { + return (int) ( digit - '0' ); +} + +static int getDigit2(const char * const pos) { + return getDigit(pos[0])*10 + getDigit(pos[1]); +} + +static int getDigit4(const char * const pos) { + return getDigit(pos[0])*1000 + getDigit(pos[1])*100 + getDigit(pos[2])*10 + getDigit(pos[3]); +} + +static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */ + time_t t = mktime(tm); + if (t != (time_t) -1 && t != (time_t) 0) { + /* BSD does not have static "timezone" declared */ +#if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) + time_t now = time(NULL); + time_t timezone = - localtime(&now)->tm_gmtoff; +#endif + return (time_t) (t - timezone); + } + return (time_t) -1; +} + +static time_t getArcTimestamp(const char * const line) { + const char *pos; + if ((pos = getArcField(line, 2)) != NULL) { + int i; + /* date == YYYYMMDDhhmmss (Greenwich Mean Time) */ + /* example: 20050405154029 */ + for(i = 0 ; pos[i] >= '0' && pos[i] <= '9' ; i++); + if (i == 14) { + struct tm tm; + memset(&tm, 0, sizeof(tm)); + tm.tm_year = getDigit4(pos + 0) - 1900; /* current year minus 1900 */ + tm.tm_mon = getDigit2(pos + 4) - 1; /* 0 – 11 */ + tm.tm_mday = getDigit2(pos + 6); /* 1 – 31 */ + tm.tm_hour = getDigit2(pos + 8); /* 0 – 23 */ + tm.tm_min = getDigit2(pos + 10); /* 0 – 59 */ + tm.tm_sec = getDigit2(pos + 12); /* 0 – 59 */ + tm.tm_isdst = 0; + return getGMT(&tm); + } + } + return (time_t) -1; +} + +static int readArcURLRecord(PT_Index__Arc index) { + index->line[0] = '\0'; + if (linput(index->file, index->line, sizeof(index->line) - 1)) { + return 0; + } + return -1; +} + +#define str_begins(str, sstr) ( strncmp(str, sstr, sizeof(sstr) - 1) == 0 ) +static int PT_CompatibleScheme(const char *url) { + return (str_begins(url, "http:") + || str_begins(url, "https:") + || str_begins(url, "ftp:") + || str_begins(url, "file:")); +} + +int PT_LoadCache__Arc(PT_Index index_, const char *filename) { + if (index_ != NULL && filename != NULL) { + PT_Index__Arc index = &index_->slots.formatArc; + index->timestamp = file_timestamp(filename); + MutexInit(&index->fileLock); + index->file = fopen(filename, "rb"); + + // Opened ? + if (index->file != NULL) { + inthash hashtable = index->hash; + if (readArcURLRecord(index) == 0) { + int entries = 0; + /* Read first line */ + if (strncmp(index->line, "filedesc://", sizeof("filedesc://") - 1) != 0) { + fprintf(stderr, "Unexpected bad signature #%s"LF, index->line); + fclose(index->file); + index->file = NULL; + return 0; + } + /* Timestamp */ + index->timestamp = getArcTimestamp(index->line); + /* Skip first entry */ + if (skipArcData(index->file, index->line) != 0 || skipArcNl(index->file) != 0) { + fprintf(stderr, "Unexpected bad data offset size first entry"LF); + fclose(index->file); + index->file = NULL; + return 0; + } + /* Read all meta-entries (not data) */ + while(!feof(index->file)) { + unsigned long int fpos = ftell(index->file); + if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) { + int length = getArcLength(index->line); + if (length >= 0) { + const char * filenameIndex = copyArcField(index->line, 0, + index->filenameIndexBuff, sizeof(index->filenameIndexBuff) - 1); /* can not be NULL */ + if (strncmp(filenameIndex, "http://", 7) == 0) { + filenameIndex += 7; + } + if (*filenameIndex != 0) { + if (skipArcData(index->file, index->line) != 0) { + fprintf(stderr, "Corrupted cache data entry #%d (truncated file?), aborting read"LF, (int)entries); + } + /*fprintf(stdout, "adding %s [%d]\n", filenameIndex, (int)fpos);*/ + if (PT_CompatibleScheme(index->filenameIndexBuff)) { + inthash_add(hashtable, filenameIndex, fpos); /* position of meta-data */ + entries++; + } + } else { + fprintf(stderr, "Corrupted cache meta entry #%d"LF, (int)entries); + } + } else { + fprintf(stderr, "Corrupted cache meta entry #%d, aborting read"LF, (int)entries); + break ; + } + } else { + break ; + } + } + + /* OK */ + return 1; + } else { + fprintf(stderr, "Bad file (empty ?)"LF); + } + } else { + fprintf(stderr, "Unable to open file"LF); + index = NULL; + } + } else { + fprintf(stderr, "Bad arguments"LF); + } + return 0; +} + +#define HTTP_READFIELD_STRING(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + strcpy(refvalue, value); \ + line[0] = '\0'; \ + } \ +} while(0) +#define HTTP_READFIELD_INT(line, value, refline, refvalue) do { \ + if (line[0] != '\0' && strfield2(line, refline)) { \ + int intval = 0; \ + sscanf(value, "%d", &intval); \ + (refvalue) = intval; \ + line[0] = '\0'; \ + } \ +} while(0) + +static PT_Element PT_ReadCache__Arc(PT_Index index, const char* url, int flags) { + PT_Element retCode; + MutexLock(&index->slots.formatArc.fileLock); + { + retCode = PT_ReadCache__Arc_u(index, url, flags); + } + MutexUnlock(&index->slots.formatArc.fileLock); + return retCode; +} + +static PT_Element PT_ReadCache__Arc_u(PT_Index index_, const char* url, int flags) +{ + PT_Index__Arc index = (PT_Index__Arc) &index_->slots.formatArc; + char location_default[HTS_URLMAXSIZE*2]; + intptr_t hash_pos; + int hash_pos_return; + PT_Element r = NULL; + if (index == NULL || index->hash == NULL || url == NULL || *url == 0) + return NULL; + if ((r = PT_ElementNew()) == NULL) + return NULL; + location_default[0] = '\0'; + memset(r, 0, sizeof(_PT_Element)); + r->location = location_default; + strcpy(r->location, ""); + if (strncmp(url, "http://", 7) == 0) + url += 7; + hash_pos_return = inthash_read(index->hash, url, &hash_pos); + + if (hash_pos_return) { + if (fseek(index->file, (long)hash_pos, SEEK_SET) == 0) { + if (skipArcNl(index->file) == 0 && readArcURLRecord(index) == 0) { + long int fposMeta = ftell(index->file); + int dataLength = getArcLength(index->line); + const char *pos; + + /* Read HTTP headers */ + /* HTTP/1.1 404 Not Found */ + if (linput(index->file, index->line, sizeof(index->line) - 1)) { + if ((pos = getArcField(index->line, 1)) != NULL) { + if (sscanf(pos, "%d", &r->statuscode) != 1) { + r->statuscode = STATUSCODE_INVALID; + } + } + if ((pos = getArcField(index->line, 2)) != NULL) { + r->msg[0] = '\0'; + strncat(r->msg, pos, sizeof(pos) - 1); + } + while (linput(index->file, index->line, sizeof(index->line) - 1) && index->line[0] != '\0') { + char* const line = index->line; + char* value = strchr(line, ':'); + if (value != NULL) { + *value = '\0'; + for( value++ ; *value == ' ' || *value == '\t' ; value++); + HTTP_READFIELD_INT(line, value, "Content-Length", r->size); // size + HTTP_READFIELD_STRING(line, value, "Content-Type", r->contenttype); // contenttype + HTTP_READFIELD_STRING(line, value, "Last-Modified", r->lastmodified); // last-modified + HTTP_READFIELD_STRING(line, value, "Etag", r->etag); // Etag + HTTP_READFIELD_STRING(line, value, "Location", r->location); // 'location' pour moved + HTTP_READFIELD_STRING(line, value, "Content-Disposition", r->cdispo); // Content-disposition + if (line[0] != '\0') { + int len = r->headers ? ((int) strlen(r->headers)) : 0; + int nlen = (int) ( strlen(line) + 2 + strlen(value) + sizeof("\r\n") + 1 ); + r->headers = realloc(r->headers, len + nlen); + r->headers[len] = '\0'; + strcat(r->headers, line); + strcat(r->headers, ": "); + strcat(r->headers, value); + strcat(r->headers, "\r\n"); + } + } + } + + /* FIXME charset */ + if (r->contenttype[0] != '\0') { + char *pos = strchr(r->contenttype, ';'); + if (pos != NULL) { + /*char *chs = strchr(pos, "charset=");*/ + /*HTTP_READFIELD_STRING(line, value, "X-Charset", r->charset);*/ + *pos = 0; + if ((pos = strchr(r->contenttype, ' ')) != NULL) { + *pos = 0; + } + } + } + + /* Read data */ + if (r->statuscode != STATUSCODE_INVALID) { /* Can continue */ + if (flags & FETCH_BODY) { + long int fposCurrent = ftell(index->file); + long int metaSize = fposCurrent - fposMeta; + long int fetchSize = (long int) r->size; + if (fetchSize <= 0) { + fetchSize = dataLength - metaSize; + } else if (fetchSize > dataLength - metaSize) { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Truncated Data"); + } + r->size = 0; + if (r->statuscode != STATUSCODE_INVALID) { + r->adr = (char*) malloc(fetchSize); + if (r->adr != NULL) { + if (fetchSize > 0 && ( r->size = (int) fread(r->adr, 1, fetchSize, index->file) ) != fetchSize) { + int last_errno = errno; + r->statuscode=STATUSCODE_INVALID; + sprintf(r->msg,"Read error in cache disk data: %s", strerror(last_errno)); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"Read error (memory exhausted) from cache"); + } + } + } + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Read Header Error"); + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Read Header Error"); + } + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg, "Cache Read Error : Seek Error"); + } + + } else { + r->statuscode=STATUSCODE_INVALID; + strcpy(r->msg,"File Cache Entry Not Found"); + } + if (r->location[0] != '\0') { + r->location = strdup(r->location); + } else { + r->location = NULL; + } + return r; +} + +static int PT_LookupCache__Arc(PT_Index index, const char* url) { + int retCode; + MutexLock(&index->slots.formatArc.fileLock); + { + retCode = PT_LookupCache__Arc_u(index, url); + } + MutexUnlock(&index->slots.formatArc.fileLock); + return retCode; +} + +static int PT_LookupCache__Arc_u(PT_Index index_, const char* url) { + if (index_ != NULL) { + PT_Index__New cache = (PT_Index__New) &index_->slots.formatNew; + if (cache == NULL || cache->hash == NULL || url == NULL || *url == 0) + return 0; + if (strncmp(url, "http://", 7) == 0) + url += 7; + if (inthash_read(cache->hash, url, NULL)) + return 1; + } + return 0; +} + +typedef struct PT_SaveCache__Arc_t { + PT_Indexes indexes; + FILE *fp; + time_t t; + char filename[64]; + struct tm buff; + char headers[8192]; + char md5[32 + 2]; +} PT_SaveCache__Arc_t; + +static int PT_SaveCache__Arc_Fun(void *arg, const char *url, PT_Element element) { + PT_SaveCache__Arc_t *st = (PT_SaveCache__Arc_t*) arg; + FILE * const fp = st->fp; + struct tm* tm = convert_time_rfc822(&st->buff, element->lastmodified); + int size_headers; + + sprintf(st->headers, + "HTTP/1.0 %d %s" "\r\n" + "X-Server: ProxyTrack " PROXYTRACK_VERSION "\r\n" + "Content-type: %s%s%s%s" "\r\n" + "Last-modified: %s" "\r\n" + "Content-length: %d" "\r\n" + , + element->statuscode, element->msg, + /**/ + element->contenttype, + (element->charset[0] ? "; charset=\"" : ""), + (element->charset[0] ? element->charset : ""), + (element->charset[0] ? "\"" : ""), + /**/ + element->lastmodified, + (int) element->size + ); + if (element->location != NULL && element->location[0] != '\0') { + sprintf(st->headers + strlen(st->headers), "Location: %s" "\r\n", element->location); + } + if (element->headers != NULL) { + if ( strlen(element->headers) < sizeof(st->headers) - strlen(element->headers) - 1 ) { + strcat(st->headers, element->headers); + } + } + strcat(st->headers, "\r\n"); + size_headers = (int) strlen(st->headers); + + /* doc == <nl><URL-record><nl><network_doc> */ + + /* Format: URL IP date mime result checksum location offset filename length */ + if (element->adr != NULL) { + domd5mem(element->adr, element->size, st->md5, 1); + } else { + strcpy(st->md5, "-"); + } + fprintf(fp, + /* nl */ + "\n" + /* URL-record */ + "%s%s %s %04d%02d%02d%02d%02d%02d %s %d %s %s %ld %s %ld" + /* nl */ + "\n", + /* args */ + ( link_has_authority(url) ? "" : "http://" ), url, + "0.0.0.0", + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, + element->contenttype, + element->statuscode, + st->md5, ( element->location ? element->location : "-" ), + (long int)ftell(fp), st->filename, + (long int)( size_headers + element->size )); + /* network_doc */ + if (fwrite(st->headers, 1, size_headers, fp) != size_headers + || ( element->size > 0 && fwrite(element->adr, 1, element->size, fp) != element->size ) + ) { + return 1; /* Error */ + } + + return 0; +} + +static int PT_SaveCache__Arc(PT_Indexes indexes, const char *filename) { + FILE *fp = fopen(filename, "wb"); + if (fp != NULL) { + PT_SaveCache__Arc_t st; + int ret; + time_t t = PT_GetTimeIndex(indexes); + struct tm tm = PT_GetTime(t); + + /* version-2-block == + filedesc://<path><sp><ip_address><sp><date><sp>text/plain<sp>200<sp>-<sp>-<sp>0<sp><filename><sp><length><nl> + 2<sp><reserved><sp><origin-code><nl> + URL<sp>IP-address<sp>Archive-date<sp>Content-type<sp>Result-code<sp>Checksum<sp>Location<sp> Offset<sp>Filename<sp>Archive-length<nl> + <nl> */ + const char* prefix = + "2 0 HTTrack Website Copier" "\n" + "URL IP-address Archive-Date Content-Type Result-code Checksum Location Offset Filename Archive-length" "\n" "\n"; + sprintf(st.filename, "httrack_%d.arc", (int) t); + fprintf(fp, "filedesc://%s 0.0.0.0 %04d%02d%02d%02d%02d%02d text/plain 200 - - 0 %s %d" "\n" + "%s", + st.filename, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, + st.filename, (int)strlen(prefix), prefix); + st.fp = fp; + st.indexes = indexes; + st.t = t; + ret = PT_EnumCache(indexes, PT_SaveCache__Arc_Fun, (void *)&st); + fclose(fp); + if (ret != 0) + (void) unlink(filename); + return ret; + } + return -1; +} diff --git a/src/proxy/store.h b/src/proxy/store.h index 805bc20..5d4a76e 100644 --- a/src/proxy/store.h +++ b/src/proxy/store.h @@ -28,6 +28,13 @@ Please visit our Website: http://www.httrack.com #ifndef WEBHTTRACK_PROXYTRACK_STORE #define WEBHTTRACK_PROXYTRACK_STORE +/* Includes */ +#ifndef _WIN32 +#include <pthread.h> +#else +#include "windows.h" +#endif + /* Proxy */ typedef struct _PT_Index _PT_Index; @@ -43,12 +50,12 @@ typedef struct _PT_CacheItem _PT_CacheItem; typedef struct _PT_CacheItem *PT_CacheItem; typedef struct _PT_Element { - int indexId; // index identifier, if suitable (!= -1) + int indexId; // index identifier, if suitable (!= -1) // int statuscode; // status-code, -1=erreur, 200=OK,201=..etc (cf RFC1945) char* adr; // adresse du bloc de mémoire, NULL=vide - char* headers; // adresse des en têtes si présents - unsigned long int size; // taille fichier + char* headers; // adresse des en têtes si présents (RFC822 format) + size_t size; // taille fichier char msg[1024]; // error message ("\0"=undefined) char contenttype[64]; // content-type ("text/html" par exemple) char charset[64]; // charset ("iso-8859-1" par exemple) @@ -85,11 +92,14 @@ int PT_AddIndex(PT_Indexes index, const char *path); int PT_RemoveIndex(PT_Indexes index, int indexId); int PT_IndexMerge(PT_Indexes indexes, PT_Index *pindex); PT_Index PT_GetIndex(PT_Indexes indexes, int indexId); +time_t PT_GetTimeIndex(PT_Indexes indexes); /* Indexes list */ PT_Element PT_Index_HTML_BuildRootInfo(PT_Indexes indexes); char ** PT_Enumerate(PT_Indexes indexes, const char *url, int subtree); void PT_Enumerate_Delete(char ***plist); +int PT_EnumCache(PT_Indexes indexes, int (*callback)(void *, const char *url, PT_Element), void *arg); +int PT_SaveCache(PT_Indexes indexes, const char *filename); /* Index */ PT_Index PT_LoadCache(const char *filename); diff --git a/src/webhttrack b/src/webhttrack index aa0edca..11682c1 100755 --- a/src/webhttrack +++ b/src/webhttrack @@ -4,7 +4,7 @@ # Initializes the htsserver GUI frontend and launch the default browser BROWSEREXE= -SRCHBROWSEREXE="x-www-browser www-browser mozilla firefox firebird galeon konqueror opera netscape" +SRCHBROWSEREXE="x-www-browser www-browser iceape mozilla firefox firebird galeon konqueror opera netscape" if test -n "${BROWSER}"; then # sensible-browser will f up if BROWSER is not set SRCHBROWSEREXE="sensible-browser ${SRCHBROWSEREXE}" @@ -23,6 +23,8 @@ return 0 function mozillabrowser { # returns 0, if the browser is mozilla type +echo "$1" | grep -q "iceape" +[ $? -eq 0 ] && return 0 echo "$1" | grep -q "mozilla" [ $? -eq 0 ] && return 0 echo "$1" | grep -q "netscape" @@ -39,7 +41,7 @@ if ! test -n "${user_name}"; then user_name=`id -un` fi if test -n "${user_name}"; then -ps -e --user "$user_name" | grep -qE "(mozilla|netscape|firebird|firefox)" +ps -e --user "$user_name" | grep -qE "(iceape|mozilla|netscape|firebird|firefox)" else false fi |