From 660b569b0980fc8f71b03ed666dd02eec8388b4c Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Mon, 19 Mar 2012 12:59:03 +0000 Subject: httrack 3.41.2 --- libtest/callbacks-example-baselinks.c | 141 ++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 57 deletions(-) (limited to 'libtest/callbacks-example-baselinks.c') diff --git a/libtest/callbacks-example-baselinks.c b/libtest/callbacks-example-baselinks.c index 9f6eb11..63b5175 100755 --- a/libtest/callbacks-example-baselinks.c +++ b/libtest/callbacks-example-baselinks.c @@ -3,89 +3,116 @@ Can be useful to make copies of site's archives using site's URL base href as root reference .c file + How to build: (callback.so or callback.dll) + With GNU-GCC: + gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack1 + With MS-Visual C++: + cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack1.lib + + Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback + How to use: - - compile this file as a module (callback.so or callback.dll) - example: - (with gcc) - gcc -O -g3 -Wall -D_REENTRANT -DINET6 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -shared -o callback.so callbacks-example.c - or (with visual c++) - cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"callback.dll" callbacks-example.c - - use the --wrapper option in httrack: - httrack --wrapper check-html=callback:process_file - --wrapper link-detected=callback:check_detectedlink + httrack --wrapper mycallback .. */ #include #include #include +/* Standard httrack module includes */ #include "httrack-library.h" +#include "htsopt.h" +#include "htsdefines.h" -/* "External" */ -#ifdef _WIN32 -#define EXTERNAL_FUNCTION __declspec(dllexport) -#else -#define EXTERNAL_FUNCTION -#endif - -/* Function definitions */ -EXTERNAL_FUNCTION int process_file(char* html, int len, char* url_adresse, char* url_fichier); -EXTERNAL_FUNCTION int check_detectedlink(char* link); -EXTERNAL_FUNCTION int check_detectedlink_init(char* initString); -EXTERNAL_FUNCTION int check_detectedlink_exit(void); +/* Local function definitions */ +static int process_file(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file); +static int check_detectedlink(t_hts_callbackarg *carg, httrackp* opt, char* link); +static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt); -static char base[HTS_URLMAXSIZE + 2] = ""; +/* external functions */ +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv); -/* -"check-html" callback -typedef int (* t_hts_htmlcheck)(char* html,int len,char* url_adresse,char* url_fichier); +/* +module entry point */ -EXTERNAL_FUNCTION int process_file(char* html, int len, char* url_adresse, char* url_fichier) { +EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) { + const char *arg = strchr(argv, ','); + if (arg != NULL) + arg++; + + /* Check args */ + fprintf(stderr, "Plugged..\n"); + if (arg == NULL || *arg == '\0' || strlen(arg) >= HTS_URLMAXSIZE / 2) { + fprintf(stderr, "** callback error: arguments expected or bad arguments\n"); + fprintf(stderr, "usage: httrack --wrapper modulename,base\n"); + fprintf(stderr, "example: httrack --wrapper callback,http://www.example.com/\n"); + return 0; /* failed */ + } else { + char *callbacks_userdef = strdup(arg); /* userdef */ + + /* Plug callback functions */ + CHAIN_FUNCTION(opt, check_html, process_file, callbacks_userdef); + CHAIN_FUNCTION(opt, linkdetected, check_detectedlink, callbacks_userdef); + CHAIN_FUNCTION(opt, end, check_detectedlink_end, callbacks_userdef); + + fprintf(stderr, "Using root '%s'\n", callbacks_userdef); + } + + return 1; /* success */ +} + +static int process_file(t_hts_callbackarg *carg, httrackp* opt, char* html, int len, const char* url_address, const char* url_file) { + char* prevBase; + + /* Call parent functions if multiple callbacks are chained. */ + if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) { + if (!CALLBACKARG_PREV_FUN(carg, check_html)(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) { + return 0; /* Abort */ + } + } + /* Disable base href, if any */ - char* prevBase = strstr(html, "_init() will be called, if exists, upon startup */ -EXTERNAL_FUNCTION int check_detectedlink_init(char* initString) { - fprintf(stderr, "Plugged..\n"); - if (initString != NULL && *initString != '\0' && strlen(initString) < HTS_URLMAXSIZE / 2) { - strcpy(base, initString); - fprintf(stderr, "Using root '%s'\n", base); - return 1; /* success */ - } else { - fprintf(stderr, "** callback error: arguments expected or bad arguments\n"); - fprintf(stderr, "usage: httrack --wrapper save-name=callback:mysavename,base\n"); - fprintf(stderr, "example: httrack --wrapper save-name=callback:mysavename,http://www.example.com/\n"); - return 0; /* failed */ - } -} +static int check_detectedlink_end(t_hts_callbackarg *carg, httrackp *opt) { + char *base = (char*) CALLBACKARG_USERDEF(carg); -/* _exit() will be called, if exists, upon exit */ -EXTERNAL_FUNCTION int check_detectedlink_exit(void) { fprintf(stderr, "Unplugged ..\n"); - return 1; /* success (result ignored anyway in xx_exit) */ + if (base != NULL) { + free(base); + base = NULL; + } + + /* Call parent functions if multiple callbacks are chained. */ + if (CALLBACKARG_PREV_FUN(carg, end) != NULL) { + return CALLBACKARG_PREV_FUN(carg, end)(CALLBACKARG_PREV_CARG(carg), opt); + } + + return 1; /* success */ } -- cgit v1.2.3