From 660b569b0980fc8f71b03ed666dd02eec8388b4c Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Mon, 19 Mar 2012 12:59:03 +0000 Subject: httrack 3.41.2 --- html/Makefile.am | 2 +- html/Makefile.in | 2 +- html/abuse.html | 2 +- html/addurl.html | 2 +- html/cache.html | 2 +- html/cmddoc.html | 2 +- html/contact.html | 4 +- html/dev.html | 5 +- html/faq.html | 2 +- html/fcguide.html | 2 +- html/filters.html | 2 +- html/httrack.man.html | 564 +++++++++++++--------------------------------- html/index.html | 2 +- html/library.html | 2 +- html/options.html | 2 +- html/overview.html | 2 +- html/plug.html | 256 +++++++++++++++------ html/plug_330.html | 215 ++++++++++++++++++ html/scripting.html | 2 +- html/server/about.html | 2 +- html/server/addurl.html | 2 +- html/server/error.html | 2 +- html/server/file.html | 2 +- html/server/finished.html | 2 +- html/server/help.html | 2 +- html/server/index.html | 2 +- html/server/option1.html | 2 +- html/server/option10.html | 2 +- html/server/option11.html | 2 +- html/server/option2.html | 2 +- html/server/option2b.html | 2 +- html/server/option3.html | 2 +- html/server/option4.html | 2 +- html/server/option5.html | 2 +- html/server/option6.html | 2 +- html/server/option7.html | 2 +- html/server/option8.html | 2 +- html/server/option9.html | 2 +- html/server/refresh.html | 2 +- html/server/step2.html | 2 +- html/server/step3.html | 2 +- html/server/step4.html | 2 +- html/shelldoc.html | 2 +- html/step.html | 2 +- html/step1.html | 2 +- html/step2.html | 2 +- html/step3.html | 2 +- html/step4.html | 2 +- html/step5.html | 2 +- html/step9.html | 2 +- html/step9_opt1.html | 2 +- html/step9_opt10.html | 2 +- html/step9_opt11.html | 2 +- html/step9_opt2.html | 2 +- html/step9_opt3.html | 2 +- html/step9_opt4.html | 6 +- html/step9_opt5.html | 2 +- html/step9_opt6.html | 2 +- html/step9_opt7.html | 2 +- html/step9_opt8.html | 2 +- html/step9_opt9.html | 2 +- 61 files changed, 626 insertions(+), 534 deletions(-) create mode 100644 html/plug_330.html (limited to 'html') diff --git a/html/Makefile.am b/html/Makefile.am index 416dbf5..2bd25bc 100755 --- a/html/Makefile.am +++ b/html/Makefile.am @@ -34,7 +34,7 @@ EXTRA_DIST = $(HelpHtml_DATA) $(HelpHtmlimg_DATA) $(HelpHtmlimages_DATA) \ httrack.css install-data-hook: - if test ! -f $(DESTDIR)$(prefix)/share/httrack/html ; then \ + if test ! -L $(DESTDIR)$(prefix)/share/httrack/html ; then \ ( cd $(DESTDIR)$(prefix)/share/httrack \ && mv -f ../doc/httrack/html html \ && cd ../doc/httrack/ \ diff --git a/html/Makefile.in b/html/Makefile.in index fd5387c..8855606 100644 --- a/html/Makefile.in +++ b/html/Makefile.in @@ -570,7 +570,7 @@ uninstall-am: uninstall-HelpHtmlDATA uninstall-HelpHtmlTxtDATA \ install-data-hook: - if test ! -f $(DESTDIR)$(prefix)/share/httrack/html ; then \ + if test ! -L $(DESTDIR)$(prefix)/share/httrack/html ; then \ ( cd $(DESTDIR)$(prefix)/share/httrack \ && mv -f ../doc/httrack/html html \ && cd ../doc/httrack/ \ diff --git a/html/abuse.html b/html/abuse.html index 1d98f95..92c46fa 100644 --- a/html/abuse.html +++ b/html/abuse.html @@ -579,7 +579,7 @@ And then, put the email address in your pages through: - +
diff --git a/html/addurl.html b/html/addurl.html index 46a163a..59f5a66 100644 --- a/html/addurl.html +++ b/html/addurl.html @@ -144,7 +144,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
diff --git a/html/cache.html b/html/cache.html index df28dc3..a17cb70 100755 --- a/html/cache.html +++ b/html/cache.html @@ -282,7 +282,7 @@ Libraries should generally handle this peculiar format, however. - +
diff --git a/html/cmddoc.html b/html/cmddoc.html index 7879071..4260b09 100644 --- a/html/cmddoc.html +++ b/html/cmddoc.html @@ -145,7 +145,7 @@ The command-line version - +
diff --git a/html/contact.html b/html/contact.html index 0706f6e..2a2e81d 100644 --- a/html/contact.html +++ b/html/contact.html @@ -243,7 +243,7 @@ roche at httrack dot com (Xavier ROCHE)




This program is covered by the GNU General Public License.
- HTTrack/HTTrack Website Copier is Copyright (C) 1998-2003 Xavier Roche and other contributors + HTTrack/HTTrack Website Copier is Copyright (C) 1998-2007 Xavier Roche and other contributors
@@ -259,7 +259,7 @@ roche at httrack dot com (Xavier ROCHE)
- +
diff --git a/html/dev.html b/html/dev.html index 66ae8a1..01fbdf0 100644 --- a/html/dev.html +++ b/html/dev.html @@ -116,7 +116,8 @@ You can use tens of options (see httrack --help) to control precisely t

  • More complex use: plugging external C functions to the httrack library

  • For advanced functions, you may have to use external C wrappers ; for example when adding advanced crawl features, such as "tuned" filestructure type -
    Important note: please read the license information of httrack before developing add-ons +
    Important note: please read the license information of httrack before developing add-ons
    +See also the page for versions prior to 3.41

  • Advanced use: using the library

  • The library can be used to write graphical GUIs for httrack, or to run mirrors from a program. @@ -146,7 +147,7 @@ This page describes the HTTrack cache format. - +
    diff --git a/html/faq.html b/html/faq.html index 9225147..3b4a051 100644 --- a/html/faq.html +++ b/html/faq.html @@ -934,7 +934,7 @@ A: Feel free to contact us! - +
    diff --git a/html/fcguide.html b/html/fcguide.html index e2f03d4..f86702f 100644 --- a/html/fcguide.html +++ b/html/fcguide.html @@ -2708,7 +2708,7 @@ for only 2 simultaneous sesions. - +
    diff --git a/html/filters.html b/html/filters.html index dac8545..fa79ca9 100644 --- a/html/filters.html +++ b/html/filters.html @@ -466,7 +466,7 @@ See also: The FAQ
    - +
    diff --git a/html/httrack.man.html b/html/httrack.man.html index 116cbb5..13ee1a0 100644 --- a/html/httrack.man.html +++ b/html/httrack.man.html @@ -1,5 +1,5 @@ - + @@ -46,14 +46,15 @@ local directory

    httrack [ url ]... [ -filter ]... [ +filter ]... [ -] [ -w, --mirror ] [ -W, --mirror-wizard ] [ --g, --get-files ] [ -i, --continue ] [ -Y, ---mirrorlinks ] [ -P, --proxy ] [ -%f, ---httpproxy-ftp[=N] ] [ -%b, --bind ] [ -rN, ---depth[=N] ] [ -%eN, --ext-depth[=N] ] [ -mN, ---max-files[=N] ] [ -MN, --max-size[=N] ] [ --EN, --max-time[=N] ] [ -AN, --max-rate[=N] ] -[ -%cN, --connection-per-second[=N] ] [ -GN, +] [ -%O, --chroot ] [ -w, --mirror ] [ -W, +--mirror-wizard ] [ -g, --get-files ] [ -i, +--continue ] [ -Y, --mirrorlinks ] [ -P, +--proxy ] [ -%f, --httpproxy-ftp[=N] ] [ -%b, +--bind ] [ -rN, --depth[=N] ] [ -%eN, +--ext-depth[=N] ] [ -mN, --max-files[=N] ] [ +-MN, --max-size[=N] ] [ -EN, --max-time[=N] ] +[ -AN, --max-rate[=N] ] [ -%cN, +--connection-per-second[=N] ] [ -GN, --max-pause[=N] ] [ -%mN, --max-mms-time[=N] ] [ -cN, --sockets[=N] ] [ -TN, --timeout ] [ -RN, --retries[=N] ] [ -JN, --min-rate[=N] ] [ @@ -72,20 +73,20 @@ local directory

    ] [ -%h, --http-10 ] [ -%k, --keep-alive ] [ -%B, --tolerant ] [ -%s, --updatehack ] [ -%u, --urlhack ] [ -%A, --assume ] [ -@iN, ---protocol[=N] ] [ -F, --user-agent ] [ -%R, ---referer ] [ -%E, --from ] [ -%F, ---footer ] [ -%l, --language ] [ -C, ---cache[=N] ] [ -k, --store-all-in-cache ] [ --%n, --do-not-recatch ] [ -%v, --display ] [ --Q, --do-not-log ] [ -q, --quiet ] [ -z, ---extra-log ] [ -Z, --debug-log ] [ -v, ---verbose ] [ -f, --file-log ] [ -f2, ---single-log ] [ -I, --index ] [ -%i, ---build-top-index ] [ -%I, --search-index ] [ --pN, --priority[=N] ] [ -S, --stay-on-same-dir -] [ -D, --can-go-down ] [ -U, --can-go-up ] [ --B, --can-go-up-and-down ] [ -a, ---stay-on-same-address ] [ -d, +--protocol[=N] ] [ -%w, --disable-module ] [ +-F, --user-agent ] [ -%R, --referer ] [ +-%E, --from ] [ -%F, --footer ] [ -%l, +--language ] [ -C, --cache[=N] ] [ -k, +--store-all-in-cache ] [ -%n, --do-not-recatch ] +[ -%v, --display ] [ -Q, --do-not-log ] [ +-q, --quiet ] [ -z, --extra-log ] [ -Z, +--debug-log ] [ -v, --verbose ] [ -f, +--file-log ] [ -f2, --single-log ] [ -I, +--index ] [ -%i, --build-top-index ] [ -%I, +--search-index ] [ -pN, --priority[=N] ] [ -S, +--stay-on-same-dir ] [ -D, --can-go-down ] [ +-U, --can-go-up ] [ -B, --can-go-up-and-down ] +[ -a, --stay-on-same-address ] [ -d, --stay-on-same-domain ] [ -l, --stay-on-same-tld ] [ -e, --go-everywhere ] [ -%H, --debug-headers ] [ -%!, @@ -255,8 +256,8 @@ proxy

    - - + @@ -266,6 +267,18 @@ proxy

    path for mirror/logfiles+cache (-O path mirror[,path cache and logfiles]) (--path <param>)

    + + + + +
    +

    -O

    + +

    -%O

    +
    + +

    chroot path to, must be r00t (-%O root path) (--chroot +<param>)

    +
    cols="4" cellspacing="0" cellpadding="0"> - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + + + + + +
    +

    -P

    proxy use (-P proxy:port or -P user:pass@proxy:port) @@ -363,11 +376,11 @@ cache and logfiles]) (--path <param>)

    +

    -%f

    *use proxy for ftp (f0 don t use) @@ -375,11 +388,11 @@ cache and logfiles]) (--path <param>)

    +

    -%b

    use this local hostname to make/send requests (-%b @@ -529,22 +542,22 @@ minute, 3600=1 hour) (--max-mms-time[=N])

    cols="4" cellspacing="0" cellpadding="0">
    +

    -cN

    number of multiple connections (*c8) (--sockets[=N])

    +

    -TN

    timeout, number of seconds after a non-responding link @@ -552,11 +565,11 @@ is shutdown (--timeout)

    +

    -RN

    number of retries, in case of timeout or non-fatal @@ -564,11 +577,11 @@ errors (*R1) (--retries[=N])

    +

    -JN

    traffic jam control, minimum transfert rate @@ -576,11 +589,11 @@ errors (*R1) (--retries[=N])

    +

    -HN

    host is abandonned if: 0=never, 1=timeout, 2=slow, @@ -672,11 +685,11 @@ file (one scan rule per line) (--urllist <param>)

    cols="4" cellspacing="0" cellpadding="0">
    +

    -NN

    structure type (0 *original structure, 1+: see below) @@ -684,22 +697,22 @@ file (one scan rule per line) (--urllist <param>)

    +

    -or

    user defined structure (-N "%h%p/%n%q.%t")

    +

    -%N

    delayed type check, don t make any link test but wait @@ -708,11 +721,11 @@ t use, %N1 use for unknown extensions, * %N2 always use)

    +

    -%D

    cached delayed type check, don t wait for remote type @@ -721,11 +734,11 @@ during updates, to speedup them (%D0 wait, * %D1 don t wait)

    +

    -%M

    generate a RFC MIME-encapsulated full-archive (.mht) @@ -733,11 +746,11 @@ during updates, to speedup them (%D0 wait, * %D1 don t wait)

    +

    -LN

    long names (L1 *long names / L0 8-3 conversion / L2 @@ -745,11 +758,11 @@ ISO9660 compatible) (--long-names[=N])

    +

    -KN

    keep original links (e.g. http://www.adr/link) (K0 @@ -758,11 +771,11 @@ absolute URI links) (--keep-links[=N])

    +

    -x

    replace external html links by error pages @@ -770,11 +783,11 @@ absolute URI links) (--keep-links[=N])

    +

    -%x

    do not include any password for external password @@ -782,11 +795,11 @@ protected websites (%x0 include) (--disable-passwords)

    +

    -%q

    *include query string for local files (useless, for @@ -795,11 +808,11 @@ information purpose only) (%q0 don t include)

    +

    -o

    *generate output html file in case of error (404..) (o0 @@ -807,11 +820,11 @@ don t generate) (--generate-errors)

    +

    -X

    *purge old files after update (X0 keep delete) @@ -819,11 +832,11 @@ don t generate) (--generate-errors)

    +

    -%p

    preserve html files as is (identical to -K4 -%F @@ -843,11 +856,11 @@ don t generate) (--generate-errors)

    cols="4" cellspacing="0" cellpadding="0">
    +

    -bN

    accept cookies in cookies.txt (0=do not accept,* @@ -855,11 +868,11 @@ don t generate) (--generate-errors)

    +

    -u

    check document type if unknown (cgi,asp..) (u0 don t @@ -868,23 +881,24 @@ check, * u1 check but /, u2 check always)

    +

    -j

    -

    *parse Java Classes (j0 don t parse) -(--parse-java[=N])

    +

    *parse Java Classes (j0 don t parse, bitmask: |1 parse +default, |2 don t parse .class |4 don t parse .js |8 don t +be aggressive) (--parse-java[=N])

    +

    -sN

    follow robots.txt and meta robots tags @@ -893,11 +907,11 @@ rules)) (--robots[=N])

    +

    -%h

    force HTTP/1.0 requests (reduce update features, only @@ -905,11 +919,11 @@ for old servers or proxies) (--http-10)

    +

    -%k

    use keep-alive if possible, greately reducing latency @@ -918,11 +932,11 @@ for small files and test requests (%k0 don t use)

    +

    -%B

    tolerant requests (accept bogus responses on some @@ -930,11 +944,11 @@ servers, but not standard!) (--tolerant)

    +

    -%s

    update hacks: various hacks to limit re-transfers when @@ -943,11 +957,11 @@ updating (identical size, bogus response..)

    +

    -%u

    url hacks: various hacks to limit duplicate URLs (strip @@ -955,11 +969,11 @@ updating (identical size, bogus response..)

    +

    -%A

    assume that a type (cgi,asp..) is always linked with a @@ -968,11 +982,11 @@ mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)

    +

    -can

    also be used to force a specific file type: --assume @@ -980,16 +994,28 @@ foo.cgi=text/html

    +

    -@iN

    internet protocol (0=both ipv6+ipv4, 4=ipv4 only, 6=ipv6 only) (--protocol[=N])

    + +

    -%w

    +
    + +

    disable a specific external mime module (-%w htsswf -%w +htsjava) (--disable-module <param>)

    +
    @@ -1612,7 +1638,7 @@ make) (--search-index)

    @@ -1768,8 +1794,7 @@ smith) (--user <param>)

    -

    cache repair (damaged cache) (--debug-oldftp)

    +

    cache repair (damaged cache) (--repair-cache)

    -

    old FTP routines (debug) (--debug-oldftp)

    +

    old FTP routines (debug) (--repair-cache)

    use an external library function as a wrapper (-%W -link-detected=foo.so:myfunction[,myparameters]) (--callback -<param>)

    +myfoo.so[,myparameters]) (--callback <param>)

    @@ -1782,58 +1807,53 @@ link-detected=foo.so:myfunction[,myparameters]) (--callback + cols="3" cellspacing="0" cellpadding="0"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - + - - + - - +
    +

    -N0

    Site-structure (default)

    +

    -N1

    HTML in web/, images/other files in web/images/

    +

    -N2

    HTML in web/HTML, images/other in web/images

    +

    -N3

    HTML in web/, images/other in web/

    +

    -N4

    HTML in web/, images/other in web/xxx, where xxx is the @@ -1842,44 +1862,40 @@ example)

    +

    -N5

    Images/other in web/xxx and HTML in web/HTML

    +

    -N99

    All files in web/, with random names (gadget !)

    +

    -N100

    Site-structure, without www.domain.xxx/

    +

    -N101

    Identical to N1 exept that "web" is replaced @@ -1887,11 +1903,10 @@ by the site s name

    +

    -N102

    Identical to N2 exept that "web" is replaced @@ -1899,11 +1914,10 @@ by the site s name

    +

    -N103

    Identical to N3 exept that "web" is replaced @@ -1911,11 +1925,10 @@ by the site s name

    +

    -N104

    Identical to N4 exept that "web" is replaced @@ -1923,11 +1936,10 @@ by the site s name

    +

    -N105

    Identical to N5 exept that "web" is replaced @@ -1935,11 +1947,10 @@ by the site s name

    +

    -N199

    Identical to N99 exept that "web" is replaced @@ -1947,11 +1958,10 @@ by the site s name

    +

    -N1001

    Identical to N1 exept that there is no "web" @@ -1959,11 +1969,10 @@ directory

    +

    -N1002

    Identical to N2 exept that there is no "web" @@ -1971,11 +1980,10 @@ directory

    +

    -N1003

    Identical to N3 exept that there is no "web" @@ -1983,11 +1991,10 @@ directory (option set for g option)

    +

    -N1004

    Identical to N4 exept that there is no "web" @@ -1995,11 +2002,10 @@ directory

    +

    -N1005

    Identical to N5 exept that there is no "web" @@ -2007,11 +2013,10 @@ directory

    +

    -N1099

    Identical to N99 exept that there is no "web" @@ -2171,11 +2176,11 @@ be empty

    cols="4" cellspacing="0" cellpadding="0">
    +

    -K0

    foo.cgi?q=45 -> foo4B54.html?q=45 (relative URI, @@ -2183,11 +2188,11 @@ default)

    +

    -K

    -> http://www.foobar.com/folder/foo.cgi?q=45 @@ -2195,22 +2200,22 @@ default)

    +

    -K4

    -> foo.cgi?q=45 (original URL)

    +

    -K3

    -> /folder/foo.cgi?q=45 (absolute URI)

    @@ -2243,14 +2248,13 @@ default)

    + cols="3" cellspacing="0" cellpadding="0"> - - - - -
    +

    --get

    <URLs> get the files indicated, do not seek other @@ -2258,11 +2262,10 @@ URLs (-qg)

    +

    --list

    <text file> add all URL located in this text file @@ -2457,264 +2460,7 @@ prototypes

    -

    init : void (* myfunction)(void);

    - - - - - -
    -

    free : void (* myfunction)(void);

    - - - - - -
    -

    start : int (* myfunction)(httrackp* -opt);

    - - - - - -
    -

    end : int (* myfunction)(void);

    - - - - - -
    -

    change-options : int (* myfunction)(httrackp* -opt);

    - - - - - -
    -

    preprocess-html : int (* myfunction)(char** html,int* -len,char* url

    - - - - - -
    -

    adresse,char* url fichier);

    -
    - - - - - -
    -

    postprocess-html : int (* myfunction)(char** html,int* -len,char* url

    - - - - - -
    -

    adresse,char* url fichier);

    -
    - - - - - -
    -

    check-html : int (* myfunction)(char* html,int -len,char* url

    - - - - - -
    -

    adresse,char* url fichier);

    -
    - - - - - -
    -

    query : char* (* myfunction)(char* -question);

    - - - - - -
    -

    query2 : char* (* myfunction)(char* -question);

    - - - - - -
    -

    query3 : char* (* myfunction)(char* -question);

    - - - - - -
    -

    loop : int (* myfunction)(lien

    - - - - - -
    -

    back* back,int back max,int back index,int lien tot,int -lien ntot,int stat time,hts stat struct* stats);

    -
    - - - - - -
    -

    check-link : int (* myfunction)(char* adr,char* -fil,int status);

    - - - - - -
    -

    pause : void (* myfunction)(char* -lockfile);

    - - - - - -
    -

    save-file : void (* myfunction)(char* -file);

    - - - - - -
    -

    save-file2 : void (* myfunction)(char* hostname,char* -filename,char* localfile,int is

    - - - - - -
    -

    new,int is modified);

    -
    - - - - - -
    -

    link-detected : int (* myfunction)(char* -link);

    - - - - - -
    -

    link-detected2 : int (* myfunction)(char* link, char* -start

    - - - - - -
    -

    tag);

    -
    - - - - - -
    -

    transfer-status : int (* myfunction)(lien

    - - - - - -
    -

    back* back);

    -
    - - - - - -
    -

    save-name : int (* myfunction)(char* adr

    - - - - - -
    -

    complete,char* fil complete,char* referer adr,char* -referer fil,char* save);

    -
    - - - - - -
    -

    And <wrappername>

    - - - - - +

    see htsdefines.h

    -

    init() functions if defined, called upon plug

    -

    FILES

    diff --git a/html/index.html b/html/index.html index 993e9fc..a393563 100644 --- a/html/index.html +++ b/html/index.html @@ -142,7 +142,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/library.html b/html/library.html index 2b61131..4d555bc 100755 --- a/html/library.html +++ b/html/library.html @@ -125,7 +125,7 @@ You may also want to check the httrack.c and httrack.h files to - +
    diff --git a/html/options.html b/html/options.html index 4f552a9..f59b941 100644 --- a/html/options.html +++ b/html/options.html @@ -352,7 +352,7 @@ Add debug informations on log files - +
    diff --git a/html/overview.html b/html/overview.html index 934d992..7cd28c8 100644 --- a/html/overview.html +++ b/html/overview.html @@ -145,7 +145,7 @@ downloads. HTTrack is fully configurable, and has an integrated help system. - +
    diff --git a/html/plug.html b/html/plug.html index 42b0895..1b82c46 100755 --- a/html/plug.html +++ b/html/plug.html @@ -108,85 +108,215 @@ We'll see there some examples.

    -The httrack commandline tool allows (since the 3.30 release) to plug external functions to various callbacks defined in httrack.
    -See also: the httrack-library.h prototype file, and the callbacks-example.c given in the httrack archive.
    +The httrack commandline tool allows (since the 3.30 release) to plug external functions to various callbacks defined in httrack. +The 3.41 release introduces a cleaned up verion of callbacks, with two major changes: +
      +
    • Cleaned up function prototypes, with two arguments always passed (the caller carg structure, and the httrackp* object), convenient to pass an user-defined pointer (see CALLBACKARG_USERDEF(carg))
    • +
    • The httrackp* option structure can be directly accessed to plug callbacks (no need to give the callback name and function name in the commandline!)
    • +
    • The callback plug is made through the CHAIN_FUNCTION() helper, allowing to chain multiple callbacks of the same type (the callbacks MUST preserve the chain by calling ancestors)
    • +

    +References: +
      +
    • the httrack-library.h prototype file +
      +Note: the Initialization, Main functions, Options handling and Wrapper functions sections are generally the only ones to be considered. +
    • +
    • the htsdefines.h prototype file, which describes callback function prototypes
    • +
    • the htsopt.h prototype file, which describes the full httrackp* structure
    • +
    • the callbacks-example*.c files given in the httrack archive
    • +
    • the htsjava.c source file (the java class plugin ; overrides 'detect' and 'parse')
    • +
    • the example given at the end of this document
    • +
    + +
    +Below the list of functions to be defined in the module (plugin).
    +
    + + + + + + + + + +
    module function namefunction descriptionfunction signature
    hts_plug +The module entry point. The opt structure can be used to plug callbacks, using the CHAIN_FUNCTION() macro helper. The argv optional argument is the one passed in the commandline as --wrapper parameter.
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    + +
    +Wrappers can be plugged inside hts_plug() using:
    + +CHAIN_FUNCTION(opt, <callback name>, <our callback function name>, <our callback function optional custom pointer argument>); + +
    + +
    Example: +
    -httrack --wrapper check-html=callback:process_file .. +CHAIN_FUNCTION(opt, check_html, process, userdef); -
    -With the callback.so (or callback.dll) module defined as below: +
    -
    -int process_file(char* html, int len, char* url_adresse, char* url_fichier) {
    -  printf("now parsing %s%s..\n", url_adresse, url_fichier);
    -  strcpy(currentURLBeingParsed, url_adresse);
    -  strcat(currentURLBeingParsed, url_fichier);
    -  return 1;  /* success */
    -}
    -
    +
    extern int hts_plug(httrackp *opt, const char* argv);
    hts_unplug +The module exit point. To free allocated resources without using global variables, use the uninit callback (see below)extern int hts_unplug(httrackp *opt);
    -Below the list of callbacks, and associated external wrappers:
    + +
    +Note that all callbacks (except init and uninit) take as first two argument: +
      +
    • the t_hts_callbackarg structure
      +this structure holds the callback chain (parent callbacks defined before the current callback) pointers, and the user-defined pointer ; see CALLBACKARG_USERDEF(carg)) +
    • +
    • the httrackp structure
      +this structure, holding all current httrack options and mirror state, can be read or mofidied +
    • +
    + +
    +Below the list of callbacks, and associated external wrappers. - - - - - - - - - - - - - - - - - - - - -typedef void (* t_hts_htmlcheck_filesave2)(); - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    "callback name"callback descriptioncallback function signature
    "init"Note: deprecated, should not be used anymore (unsafe callback) - see "start" callback or wrapper_init() module function below this table.Called during initialization ; use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks.
    return value: none
    void (* myfunction)(void);
    "free"Note: deprecated, should not be used anymore (unsafe callback) - see "end" callback or wrapper_exit() module function below this table.
    Called during un-initialization
    return value: none
    void (* myfunction)(void);
    "start"Called when the mirror starts. The opt structure passed lists all options defined for this mirror. You may modify the opt structure to fit your needs. Besides, use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks.
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    int (* myfunction)(httrackp* opt);
    "end"Called when the mirror ends
    return value: 1 upon success, 0 upon error (the mirror will then be considered aborted)
    int (* myfunction)(void);
    "change-options"Called when options are to be changed. The opt structure passed lists all options, updated to take account of recent changes
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    int (* myfunction)(httrackp* opt);
    "check-html"Called when a document (which may not be an html document) is to be parsed. The html address points to the document data, of lenth len. The url_adresse and url_fichier are the address and URI of the file being processed
    return value: 1 if the parsing can be processed, 0 if the file must be skipped without being parsed
    int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);
    "preprocess-html"Called when a document (which is an html document) is to be parsed (original, not yet modified document). The html address points to the document data address (char**), and the length address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The url_adresse and url_fichier are the address and URI of the file being processed
    return value: 1 if the new pointers can be applied (default value)
    int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);
    "postprocess-html"Called when a document (which is an html document) is parsed and transformed (links rewritten). The html address points to the document data address (char**), and the length address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The url_adresse and url_fichier are the address and URI of the file being processed
    return value: 1 if the new pointers can be applied (default value)
    int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);
    "query"Called when the wizard needs to ask a question. The question string contains the question for the (human) user
    return value: the string answer ("" for default reply)
    char* (* myfunction)(char* question);
    "query2"Called when the wizard needs to ask a questionchar* (* myfunction)(char* question);
    "query3"Called when the wizard needs to ask a questionchar* (* myfunction)(char* question);
    "loop"Called periodically (informational, to display statistics)
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);
    "check-link"Called when a link has to be tested. The adr and fil are the address and URI of the link being tested. The passed status value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine
    return value: same meaning as the passed status value ; you may generally return -1 to let the engine take the decision by itself
    int (* myfunction)(char* adr,char* fil,int status);
    "check-mime"Called when a link download has begun, and needs to be tested against its MIME type. The adr and fil are the address and URI of the link being tested, and the mime string contains the link type being processed. The passed status value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine
    return value: same meaning as the passed status value ; you may generally return -1 to let the engine take the decision by itself
    int (* myfunction)(char* adr,char* fil,char* mime,int status);
    "pause"Called when the engine must pause. When the lockfile passed is deleted, the function can return
    return value: none
    void (* myfunction)(char* lockfile);
    "save-file"Called when a file is to be saved on disk
    return value: none
    void (* myfunction)(char* file);
    "save-file2"Called when a file is to be saved or checked on disk
    The hostname, filename and local filename are given. Two additional flags tells if the file is new (is_new) and is the file is to be modified (is_modified).
    (!is_new && !is_modified): the file is up-to-date, and will not be modified
    (is_new && is_modified): a new file will be written (or an updated file is being written)
    (!is_new && is_modified): a file is being updated (append)
    (is_new && !is_modified): an empty file will be written ("do not recatch locally erased files")
    return value: none
    void (* myfunction)(char* hostname,char* filename,char* localfile,int is_new,int is_modified);
    "link-detected"Called when a link has been detected
    return value: 1 if the link can be analyzed, 0 if the link must not even be considered
    int (* myfunction)(char* link);
    "transfer-status"Called when a file has been processed (downloaded, updated, or error)
    return value: must return 1
    int (* myfunction)(lien_back* back);
    "save-name"Called when a local filename has to be processed. The adr_complete and fil_complete are the address and URI of the file being saved ; the referer_adr and referer_fil are the address and URI of the referer link. The save string contains the local filename being used. You may modifiy the save string to fit your needs, up to 1024 bytes (note: filename collisions, if any, will be handled by the engine by renaming the file into file-2.ext, file-3.ext ..).
    return value: must return 1
    int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
    "send-header"Called when HTTP headers are to be sent to the remote server. The buff buffer contains text headers, adr and fil the URL, and referer_adr and referer_fil the referer URL. The outgoing structure contains all information related to the current slot.
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
    "receive-header"Called when HTTP headers are recevived from the remote server. The buff buffer contains text headers, adr and fil the URL, and referer_adr and referer_fil the referer URL. The incoming structure contains all information related to the current slot.
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
    callback namecallback descriptioncallback function signature
    initNote: the use the "start" callback is advised. Called during initialization.
    return value: none
    void mycallback(t_hts_callbackarg *carg);
    uninitNote: the use os the "end" callback is advised.
    Called during un-initialization
    return value: none
    void mycallback(t_hts_callbackarg *carg);
    startCalled when the mirror starts. The opt structure passed lists all options defined for this mirror. You may modify the opt structure to fit your needs.
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    int mycallback(t_hts_callbackarg *carg, httrackp* opt);
    endCalled when the mirror ends
    return value: 1 upon success, 0 upon error (the mirror will then be considered aborted)
    int mycallback(t_hts_callbackarg *carg, httrackp* opt);
    choptCalled when options are to be changed. The opt structure passed lists all options, updated to take account of recent changes
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    int mycallback(t_hts_callbackarg *carg, httrackp* opt);
    preprocessCalled when a document (which is an html document) is to be parsed (original, not yet modified document). The html address points to the document data address (char**), and the length address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using the hts_realloc()/hts_free() library functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of hts_strdup() in such cases is advised. The url_address and url_file are the address and URI of the file being processed
    return value: 1 if the new pointers can be applied (default value)
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char** html, int* len, const char* url_address, const char* url_file);
    postprocessCalled when a document (which is an html document) is parsed and transformed (links rewritten). The html address points to the document data address (char**), and the length address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using the hts_realloc()/hts_free() library functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of hts_strdup() in such cases is advised. The url_address and url_file are the address and URI of the file being processed
    return value: 1 if the new pointers can be applied (default value)
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char** html, int* len, const char* url_address, const char* url_file);
    check_htmlCalled when a document (which may not be an html document) is to be parsed. The html address points to the document data, of lenth len. The url_address and url_file are the address and URI of the file being processed
    return value: 1 if the parsing can be processed, 0 if the file must be skipped without being parsed
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file);
    queryCalled when the wizard needs to ask a question. The question string contains the question for the (human) user
    return value: the string answer ("" for default reply)
    const char* mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* question);
    query2Called when the wizard needs to ask a questionconst char* mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* question);
    query3Called when the wizard needs to ask a questionconst char* mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* question);
    loopCalled periodically (informational, to display statistics)
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, lien_back* back, int back_max, int back_index, int lien_tot, int lien_ntot, int stat_time, hts_stat_struct* stats);
    check_linkCalled when a link has to be tested. The adr and fil are the address and URI of the link being tested. The passed status value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine
    return value: same meaning as the passed status value ; you may generally return -1 to let the engine take the decision by itself
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* adr, const char* fil, int status);
    check_mimeCalled when a link download has begun, and needs to be tested against its MIME type. The adr and fil are the address and URI of the link being tested, and the mime string contains the link type being processed. The passed status value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine
    return value: same meaning as the passed status value ; you may generally return -1 to let the engine take the decision by itself
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* adr, const char* fil, const char* mime, int status);
    pauseCalled when the engine must pause. When the lockfile passed is deleted, the function can return
    return value: none
    void mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* lockfile);
    filesaveCalled when a file is to be saved on disk
    return value: none
    void mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* file);
    filesave2Called when a file is to be saved or checked on disk
    The hostname, filename and local filename are given. Two additional flags tells if the local file is new (is_new), if the local file is to be modified (is_modified), and if the file was not updated remotely (not_updated).
    (!is_new && !is_modified): the file is up-to-date, and will not be modified
    (is_new && is_modified): a new file will be written (or an updated file is being written)
    (!is_new && is_modified): a file is being updated (append)
    (is_new && !is_modified): an empty file will be written ("do not recatch locally erased files")
    not_updated: the file was not re-downloaded because it was up-to-date (no data transfered again)

    return value: none
    void mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* hostname, const char* filename, const char* localfile, int is_new, int is_modified, int not_updated);
    linkdetectedCalled when a link has been detected
    return value: 1 if the link can be analyzed, 0 if the link must not even be considered
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* link);
    linkdetected2Called when a link has been detected
    return value: 1 if the link can be analyzed, 0 if the link must not even be considered
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* link, const const char* tag_start);
    xfrstatusCalled when a file has been processed (downloaded, updated, or error)
    return value: must return 1
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, lien_back* back);
    savenameCalled when a local filename has to be processed. The adr_complete and fil_complete are the address and URI of the file being saved ; the referer_adr and referer_fil are the address and URI of the referer link. The save string contains the local filename being used. You may modifiy the save string to fit your needs, up to 1024 bytes (note: filename collisions, if any, will be handled by the engine by renaming the file into file-2.ext, file-3.ext ..).
    return value: must return 1
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, const char* adr_complete, const char* fil_complete, const char* referer_adr, const char* referer_fil, char* save);
    sendheadCalled when HTTP headers are to be sent to the remote server. The buff buffer contains text headers, adr and fil the URL, and referer_adr and referer_fil the referer URL. The outgoing structure contains all information related to the current slot.
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* outgoing);
    receiveheadCalled when HTTP headers are recevived from the remote server. The buff buffer contains text headers, adr and fil the URL, and referer_adr and referer_fil the referer URL. The incoming structure contains all information related to the current slot.
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, char* buff, const char* adr, const char* fil, const char* referer_adr, const char* referer_fil, htsblk* incoming);
    detectCalled when an unknown document is to be parsed. The str structure contains all information related to the document.
    return value: 1 if the type is known and can be parsed, 0 if the document type is unknown
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, htsmoduleStruct* str);
    parseThe str structure contains all information related to the document.
    return value: 1 if the document was successfully parsed, 0 if an error occured
    int mycallback(t_hts_callbackarg *carg, httrackp* opt, htsmoduleStruct* str);


    -Below additional function names that can be defined inside the module (DLL/.so):
    - - - +Note: the optional libhttrack-plugin module (libhttrack-plugin.dll or libhttrack-plugin.so), if found in the library environment, is loaded automatically, and its hts_plug() function being called.
    - - - -
    "module function name"function description
    int function-name_init(char *args);Called when a function named function-name is extracted from the current module (same as wrapper_init). The optional args provides additional commandline parameters. Returns 1 upon success, 0 if the function should not be extracted.
    int wrapper_init(char *fname, char *args);Called when a function named fname is extracted from the current module. The optional args provides additional commandline parameters. Besides, use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks. Returns 1 upon success, 0 if the function should not be extracted.
    int wrapper_exit(void);Called when the module is unloaded. The function should return 1 (but the result is ignored).
    - -

    -Below additional function names that can be defined inside the optional libhttrack-plugin module (libhttrack-plugin.dll or libhttrack-plugin.so) searched inside common library path:
    - - - +
    +An example is generally more efficient than anything else, so let's write our first module, aimed to stupidely print all parsed html files: +
    "module function name"function description
    +
    +
    +/* system includes */
    +#include <stdio.h>
    +#include <stdlib.h>
    +#include <string.h>
    +
    +/* standard httrack module includes */
    +#include "httrack-library.h"
    +#include "htsopt.h"
    +#include "htsdefines.h"
    +
    +/* local function called as "check_html" callback */
    +static int process_file(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt, 
    +                        /*other parameters are callback-specific*/
    +                        char* html, int len, const char* url_address, const char* url_file) {
    +  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/
    +
    +  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
    +  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    +    if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt,
    +                                                html, len, url_address, url_file)) {
    +        return 0;  /* abort */
    +      }
    +  }
    +
    +  printf("file %s%s content: %s\n", url_address, url_file, html);
    +  return 1;  /* success */
    +}
    +
    +/* local function called as "end" callback */
    +static int end_of_mirror(t_hts_callbackarg /*the carg structure, holding various information*/*carg, /*the option settings*/httrackp *opt) {
    +  void *ourDummyArg = (void*) CALLBACKARG_USERDEF(carg);    /*optional user-defined arg*/
    +
    +  /* processing */
    +  fprintf(stderr, "That's all, folks!\n");
    +
    +  /* call parent functions if multiple callbacks are chained. you can skip this part, if you don't want previous callbacks to be called. */
    +  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    +    /* status is ok on our side, return other callabck's status */
    +    return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt);
    +  }
    +
    +  return 1;  /* success */
    +}
    +
    +/*
    +module entry point
    +the function name and prototype MUST match this prototype
    +*/
    +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) {
    +  /* optional argument passed in the commandline we won't be using here */
    +  const char *arg = strchr(argv, ',');
    +  if (arg != NULL)
    +    arg++;
    +
    +  /* plug callback functions */
    +  CHAIN_FUNCTION(opt, check_html, process_file, /*optional user-defined arg*/NULL);
    +  CHAIN_FUNCTION(opt, end, end_of_mirror, /*optional user-defined arg*/NULL);
    +
    +  return 1;  /* success */
    +}
    +
    +/*
    +module exit point
    +the function name and prototype MUST match this prototype
    +*/
    +EXTERNAL_FUNCTION int hts_unplug(httrackp *opt) {
    +  fprintf(stder, "Module unplugged");
    +
    +  return 1;  /* success */
    +}
    +
    +
    + +
    +Compile this file ; for example: +
    + +gcc -O -g3 -shared -o mylibrary.so myexample.c + +
    +and plug the module using the commandline ; for example: +
    + +httrack --wrapper mylibrary http://www.example.com + +
    +or, if some parameters are desired: +
    + +httrack --wrapper mylibrary,myparameter-string http://www.example.com + +
    +(the "myparameter-string" string will be available in the 'arg' parameter passed to the hts_plug entry point) +
    -void plugin_init(void);Called if the module (named libhttrack-plugin.(so|dll)) is found in the library path. Use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks. - - - -

    -

    @@ -202,7 +332,7 @@ Below additional function names that can be defined inside the optional libhttra - +
    diff --git a/html/plug_330.html b/html/plug_330.html new file mode 100644 index 0000000..d2b7ffb --- /dev/null +++ b/html/plug_330.html @@ -0,0 +1,215 @@ + + + + + + + HTTrack Website Copier - Offline Browser + + + + + + + + + +
    HTTrack Website Copier
    + + + + +
    Open Source offline browser
    + + + + +
    + + + + +
    + + + + +
    + + +

    HTTrack Programming page - plugging functions
    +releases 3.30 to 3.40 (not beyond) +

    + +
    + +You can write external functions to be plugged in the httrack library very easily. +We'll see there some examples. + +

    + +The httrack commandline tool allows (since the 3.30 release) to plug external functions to various callbacks defined in httrack.
    +See also: the httrack-library.h prototype file, and the callbacks-example.c given in the httrack archive.
    + +
    +Example: + +httrack --wrapper check-html=callback:process_file .. + +
    +With the callback.so (or callback.dll) module defined as below: + +
    +int process_file(char* html, int len, char* url_adresse, char* url_fichier) {
    +  printf("now parsing %s%s..\n", url_adresse, url_fichier);
    +  strcpy(currentURLBeingParsed, url_adresse);
    +  strcat(currentURLBeingParsed, url_fichier);
    +  return 1;  /* success */
    +}
    +
    + +Below the list of callbacks, and associated external wrappers:
    + + + + + + + + + + + + + + + + + + + + + + +typedef void (* t_hts_htmlcheck_filesave2)(); + + + + + + + + +
    "callback name"callback descriptioncallback function signature
    "init"Note: deprecated, should not be used anymore (unsafe callback) - see "start" callback or wrapper_init() module function below this table.Called during initialization ; use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks.
    return value: none
    void (* myfunction)(void);
    "free"Note: deprecated, should not be used anymore (unsafe callback) - see "end" callback or wrapper_exit() module function below this table.
    Called during un-initialization
    return value: none
    void (* myfunction)(void);
    "start"Called when the mirror starts. The opt structure passed lists all options defined for this mirror. You may modify the opt structure to fit your needs. Besides, use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks.
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    int (* myfunction)(httrackp* opt);
    "end"Called when the mirror ends
    return value: 1 upon success, 0 upon error (the mirror will then be considered aborted)
    int (* myfunction)(void);
    "change-options"Called when options are to be changed. The opt structure passed lists all options, updated to take account of recent changes
    return value: 1 upon success, 0 upon error (the mirror will then be aborted)
    int (* myfunction)(httrackp* opt);
    "check-html"Called when a document (which may not be an html document) is to be parsed. The html address points to the document data, of lenth len. The url_adresse and url_fichier are the address and URI of the file being processed
    return value: 1 if the parsing can be processed, 0 if the file must be skipped without being parsed
    int (* myfunction)(char* html,int len,char* url_adresse,char* url_fichier);
    "preprocess-html"Called when a document (which is an html document) is to be parsed (original, not yet modified document). The html address points to the document data address (char**), and the length address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The url_adresse and url_fichier are the address and URI of the file being processed
    return value: 1 if the new pointers can be applied (default value)
    int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);
    "postprocess-html"Called when a document (which is an html document) is parsed and transformed (links rewritten). The html address points to the document data address (char**), and the length address points to the lenth of this document. Both pointer values (address and size) can be modified to change the document. It is up to the callback function to reallocate the given pointer (using standard C library realloc()/free() functions), which will be free()'ed by the engine. Hence, return of static buffers is strictly forbidden, and the use of strdup() in such cases is advised. The url_adresse and url_fichier are the address and URI of the file being processed
    return value: 1 if the new pointers can be applied (default value)
    int (* myfunction)(char** html,int* len,char* url_adresse,char* url_fichier);
    "query"Called when the wizard needs to ask a question. The question string contains the question for the (human) user
    return value: the string answer ("" for default reply)
    char* (* myfunction)(char* question);
    "query2"Called when the wizard needs to ask a questionchar* (* myfunction)(char* question);
    "query3"Called when the wizard needs to ask a questionchar* (* myfunction)(char* question);
    "loop"Called periodically (informational, to display statistics)
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int (* myfunction)(lien_back* back,int back_max,int back_index,int lien_tot,int lien_ntot,int stat_time,hts_stat_struct* stats);
    "check-link"Called when a link has to be tested. The adr and fil are the address and URI of the link being tested. The passed status value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine
    return value: same meaning as the passed status value ; you may generally return -1 to let the engine take the decision by itself
    int (* myfunction)(char* adr,char* fil,int status);
    "check-mime"Called when a link download has begun, and needs to be tested against its MIME type. The adr and fil are the address and URI of the link being tested, and the mime string contains the link type being processed. The passed status value has the following meaning: 0 if the link is to be accepted by default, 1 if the link is to be refused by default, and -1 if no decision has yet been taken by the engine
    return value: same meaning as the passed status value ; you may generally return -1 to let the engine take the decision by itself
    int (* myfunction)(char* adr,char* fil,char* mime,int status);
    "pause"Called when the engine must pause. When the lockfile passed is deleted, the function can return
    return value: none
    void (* myfunction)(char* lockfile);
    "save-file"Called when a file is to be saved on disk
    return value: none
    void (* myfunction)(char* file);
    "save-file2"Called when a file is to be saved or checked on disk
    The hostname, filename and local filename are given. Two additional flags tells if the file is new (is_new) and is the file is to be modified (is_modified).
    (!is_new && !is_modified): the file is up-to-date, and will not be modified
    (is_new && is_modified): a new file will be written (or an updated file is being written)
    (!is_new && is_modified): a file is being updated (append)
    (is_new && !is_modified): an empty file will be written ("do not recatch locally erased files")
    return value: none
    void (* myfunction)(char* hostname,char* filename,char* localfile,int is_new,int is_modified);
    "link-detected"Called when a link has been detected
    return value: 1 if the link can be analyzed, 0 if the link must not even be considered
    int (* myfunction)(char* link);
    "transfer-status"Called when a file has been processed (downloaded, updated, or error)
    return value: must return 1
    int (* myfunction)(lien_back* back);
    "save-name"Called when a local filename has to be processed. The adr_complete and fil_complete are the address and URI of the file being saved ; the referer_adr and referer_fil are the address and URI of the referer link. The save string contains the local filename being used. You may modifiy the save string to fit your needs, up to 1024 bytes (note: filename collisions, if any, will be handled by the engine by renaming the file into file-2.ext, file-3.ext ..).
    return value: must return 1
    int (* myfunction)(char* adr_complete,char* fil_complete,char* referer_adr,char* referer_fil,char* save);
    "send-header"Called when HTTP headers are to be sent to the remote server. The buff buffer contains text headers, adr and fil the URL, and referer_adr and referer_fil the referer URL. The outgoing structure contains all information related to the current slot.
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* outgoing);
    "receive-header"Called when HTTP headers are recevived from the remote server. The buff buffer contains text headers, adr and fil the URL, and referer_adr and referer_fil the referer URL. The incoming structure contains all information related to the current slot.
    return value: 1 if the mirror can continue, 0 if the mirror must be aborted
    int (* myfunction)(char* buff, char* adr, char* fil, char* referer_adr, char* referer_fil, htsblk* incoming);
    + +

    +Below additional function names that can be defined inside the module (DLL/.so):
    + + + + + + + + +
    "module function name"function description
    int function-name_init(char *args);Called when a function named function-name is extracted from the current module (same as wrapper_init). The optional args provides additional commandline parameters. Returns 1 upon success, 0 if the function should not be extracted.
    int wrapper_init(char *fname, char *args);Called when a function named fname is extracted from the current module. The optional args provides additional commandline parameters. Besides, use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks. Returns 1 upon success, 0 if the function should not be extracted.
    int wrapper_exit(void);Called when the module is unloaded. The function should return 1 (but the result is ignored).
    + +

    +Below additional function names that can be defined inside the optional libhttrack-plugin module (libhttrack-plugin.dll or libhttrack-plugin.so) searched inside common library path:
    + + + + + + +
    "module function name"function description
    void plugin_init(void);Called if the module (named libhttrack-plugin.(so|dll)) is found in the library path. Use of htswrap_add (see httrack-library.h) is permitted inside this function to setup other callbacks.
    + +

    + + +

    + + +
    +
    +
    + + + + + +
    + + + + + + diff --git a/html/scripting.html b/html/scripting.html index 2752a0d..02abb93 100755 --- a/html/scripting.html +++ b/html/scripting.html @@ -250,7 +250,7 @@ Script example: - +
    diff --git a/html/server/about.html b/html/server/about.html index 087f68e..82af482 100755 --- a/html/server/about.html +++ b/html/server/about.html @@ -162,7 +162,7 @@ ${LANG_K3} : ${HTTRACK_WEB} - +
    diff --git a/html/server/addurl.html b/html/server/addurl.html index ee1f5a7..21a0953 100755 --- a/html/server/addurl.html +++ b/html/server/addurl.html @@ -218,7 +218,7 @@ ${do:end-if} - +
    diff --git a/html/server/error.html b/html/server/error.html index d8ea4d2..c600805 100755 --- a/html/server/error.html +++ b/html/server/error.html @@ -139,7 +139,7 @@ ${error} - +
    diff --git a/html/server/file.html b/html/server/file.html index 2dd3df6..4108afd 100755 --- a/html/server/file.html +++ b/html/server/file.html @@ -167,7 +167,7 @@ ${do:loadhash} - +
    diff --git a/html/server/finished.html b/html/server/finished.html index 5777a27..27130de 100755 --- a/html/server/finished.html +++ b/html/server/finished.html @@ -213,7 +213,7 @@ ${path}/${projname} - +
    diff --git a/html/server/help.html b/html/server/help.html index ef7f830..19decdb 100755 --- a/html/server/help.html +++ b/html/server/help.html @@ -174,7 +174,7 @@ ${do:end-if} - +
    diff --git a/html/server/index.html b/html/server/index.html index 6d48219..6998976 100755 --- a/html/server/index.html +++ b/html/server/index.html @@ -200,7 +200,7 @@ ${LANG_THANKYOU}! - +
    diff --git a/html/server/option1.html b/html/server/option1.html index 7e75e9e..9ba1d5b 100755 --- a/html/server/option1.html +++ b/html/server/option1.html @@ -229,7 +229,7 @@ ${do:end-if} - +
    diff --git a/html/server/option10.html b/html/server/option10.html index 0ebe51f..99d0a4d 100755 --- a/html/server/option10.html +++ b/html/server/option10.html @@ -221,7 +221,7 @@ ${LANG_IOPT10}: - +
    diff --git a/html/server/option11.html b/html/server/option11.html index 7aea791..4b688e8 100755 --- a/html/server/option11.html +++ b/html/server/option11.html @@ -321,7 +321,7 @@ ${LANG_W3} - +
    diff --git a/html/server/option2.html b/html/server/option2.html index fd3ab8c..6df78f3 100755 --- a/html/server/option2.html +++ b/html/server/option2.html @@ -247,7 +247,7 @@ ${listid:build:LISTDEF_3} - +
    diff --git a/html/server/option2b.html b/html/server/option2b.html index d227c30..ef6d0b2 100755 --- a/html/server/option2b.html +++ b/html/server/option2b.html @@ -211,7 +211,7 @@ ${do:output-mode:} - +
    diff --git a/html/server/option3.html b/html/server/option3.html index 743dc81..9c6149e 100755 --- a/html/server/option3.html +++ b/html/server/option3.html @@ -262,7 +262,7 @@ ${listid:travel3:LISTDEF_11} - +
    diff --git a/html/server/option4.html b/html/server/option4.html index d686ec1..c2176c3 100755 --- a/html/server/option4.html +++ b/html/server/option4.html @@ -255,7 +255,7 @@ ${LANG_I46} - +
    diff --git a/html/server/option5.html b/html/server/option5.html index 64ce3c5..0da4e32 100755 --- a/html/server/option5.html +++ b/html/server/option5.html @@ -291,7 +291,7 @@ ${LANG_I64b} - +
    diff --git a/html/server/option6.html b/html/server/option6.html index 4e4a0ac..c0a18c5 100755 --- a/html/server/option6.html +++ b/html/server/option6.html @@ -227,7 +227,7 @@ ${LANG_I43b} - +
    diff --git a/html/server/option7.html b/html/server/option7.html index d689de2..676c11b 100755 --- a/html/server/option7.html +++ b/html/server/option7.html @@ -219,7 +219,7 @@ ${LANG_B13} - +
    diff --git a/html/server/option8.html b/html/server/option8.html index 397e33c..5f32e0e 100755 --- a/html/server/option8.html +++ b/html/server/option8.html @@ -256,7 +256,7 @@ ${listid:robots:LISTDEF_8} - +
    diff --git a/html/server/option9.html b/html/server/option9.html index b77cccb..2185f9f 100755 --- a/html/server/option9.html +++ b/html/server/option9.html @@ -237,7 +237,7 @@ ${listid:logtype:LISTDEF_9} - +
    diff --git a/html/server/refresh.html b/html/server/refresh.html index 35d3ecf..ca840a6 100755 --- a/html/server/refresh.html +++ b/html/server/refresh.html @@ -271,7 +271,7 @@ ${LANG_H20} ${info.currentjob} - +
    diff --git a/html/server/step2.html b/html/server/step2.html index 246a264..e2d74db 100755 --- a/html/server/step2.html +++ b/html/server/step2.html @@ -343,7 +343,7 @@ ${do:end-if:} - +
    diff --git a/html/server/step3.html b/html/server/step3.html index 54cc9da..a1b035f 100755 --- a/html/server/step3.html +++ b/html/server/step3.html @@ -276,7 +276,7 @@ ${do:output-mode:} - +
    diff --git a/html/server/step4.html b/html/server/step4.html index a65cf22..452edac 100755 --- a/html/server/step4.html +++ b/html/server/step4.html @@ -378,7 +378,7 @@ ${do:output-mode:} - +
    diff --git a/html/shelldoc.html b/html/shelldoc.html index 05e0c80..737794b 100644 --- a/html/shelldoc.html +++ b/html/shelldoc.html @@ -141,7 +141,7 @@ You may encounter minor differences (in the display, or in various options) betw - +
    diff --git a/html/step.html b/html/step.html index fff295f..26193cd 100644 --- a/html/step.html +++ b/html/step.html @@ -128,7 +128,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step1.html b/html/step1.html index 645bf32..e61a02a 100644 --- a/html/step1.html +++ b/html/step1.html @@ -143,7 +143,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step2.html b/html/step2.html index c861e03..44cea6c 100644 --- a/html/step2.html +++ b/html/step2.html @@ -157,7 +157,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step3.html b/html/step3.html index 00b6b66..add093b 100644 --- a/html/step3.html +++ b/html/step3.html @@ -151,7 +151,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step4.html b/html/step4.html index 0de1c54..ab05d2f 100644 --- a/html/step4.html +++ b/html/step4.html @@ -128,7 +128,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step5.html b/html/step5.html index cae81bd..eee809a 100644 --- a/html/step5.html +++ b/html/step5.html @@ -127,7 +127,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9.html b/html/step9.html index 0400822..af565cd 100644 --- a/html/step9.html +++ b/html/step9.html @@ -144,7 +144,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt1.html b/html/step9_opt1.html index cf79c85..e263076 100644 --- a/html/step9_opt1.html +++ b/html/step9_opt1.html @@ -145,7 +145,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt10.html b/html/step9_opt10.html index 658b82f..c1d2b1f 100644 --- a/html/step9_opt10.html +++ b/html/step9_opt10.html @@ -150,7 +150,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt11.html b/html/step9_opt11.html index e4c0395..12caeee 100644 --- a/html/step9_opt11.html +++ b/html/step9_opt11.html @@ -182,7 +182,7 @@ In this case, HTTrack won't check the type, because it has learned that "foo" is - +
    diff --git a/html/step9_opt2.html b/html/step9_opt2.html index 3fd774d..3b7b8ff 100644 --- a/html/step9_opt2.html +++ b/html/step9_opt2.html @@ -181,7 +181,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt3.html b/html/step9_opt3.html index 98511f2..df96322 100644 --- a/html/step9_opt3.html +++ b/html/step9_opt3.html @@ -145,7 +145,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt4.html b/html/step9_opt4.html index 3a3a9d5..54b4713 100644 --- a/html/step9_opt4.html +++ b/html/step9_opt4.html @@ -112,11 +112,11 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; }

  • Exclude link(s)
  • -
    This button lets you add a filter to authorize either a directory, a domain, a certain file type... +
    This button lets you add a filter to exclude either a directory, a domain, a certain file type...
    See below to find out how to add a filter rule...


    -
  • Exclude link(s)
  • +
  • Include link(s)

  • This button lets you add a filter to authorize either a directory, a domain, a certain file type...
    See below to find out how to add a filter rule...


    @@ -176,7 +176,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt5.html b/html/step9_opt5.html index c13666c..c9fbf60 100644 --- a/html/step9_opt5.html +++ b/html/step9_opt5.html @@ -165,7 +165,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt6.html b/html/step9_opt6.html index 110b27f..41f4d5e 100644 --- a/html/step9_opt6.html +++ b/html/step9_opt6.html @@ -162,7 +162,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt7.html b/html/step9_opt7.html index 876fb3e..3be0a9e 100644 --- a/html/step9_opt7.html +++ b/html/step9_opt7.html @@ -151,7 +151,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt8.html b/html/step9_opt8.html index 97d424f..2f74b00 100644 --- a/html/step9_opt8.html +++ b/html/step9_opt8.html @@ -141,7 +141,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    diff --git a/html/step9_opt9.html b/html/step9_opt9.html index b706121..0568554 100644 --- a/html/step9_opt9.html +++ b/html/step9_opt9.html @@ -156,7 +156,7 @@ h4 { margin: 0; font-weight: bold; font-size: 1.18em; } - +
    -- cgit v1.2.3