1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
/*
HTTrack external callbacks example : enforce a constant base href
Can be useful to make copies of site's archives using site's URL base href as root reference
.c file
How to build: (callback.so or callback.dll)
With GNU-GCC:
gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack2
With MS-Visual C++:
cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack.lib
Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback
How to use:
httrack --wrapper mycallback ..
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Standard httrack module includes */
#include "httrack-library.h"
#include "htsopt.h"
#include "htsdefines.h"
/* Local function definitions */
static int process_file(t_hts_callbackarg * carg, httrackp * opt, char *html,
int len, const char *url_address, const char *url_file);
static int check_detectedlink(t_hts_callbackarg * carg, httrackp * opt,
char *link);
static int check_detectedlink_end(t_hts_callbackarg * carg, httrackp * opt);
/* external functions */
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv);
/*
module entry point
*/
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv) {
const char *arg = strchr(argv, ',');
if (arg != NULL)
arg++;
/* Check args */
fprintf(stderr, "Plugged..\n");
if (arg == NULL || *arg == '\0' || strlen(arg) >= HTS_URLMAXSIZE / 2) {
fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
fprintf(stderr, "usage: httrack --wrapper modulename,base\n");
fprintf(stderr,
"example: httrack --wrapper callback,http://www.example.com/\n");
return 0; /* failed */
} else {
char *callbacks_userdef = strdup(arg); /* userdef */
/* Plug callback functions */
CHAIN_FUNCTION(opt, check_html, process_file, callbacks_userdef);
CHAIN_FUNCTION(opt, linkdetected, check_detectedlink, callbacks_userdef);
CHAIN_FUNCTION(opt, end, check_detectedlink_end, callbacks_userdef);
fprintf(stderr, "Using root '%s'\n", callbacks_userdef);
}
return 1; /* success */
}
static int process_file(t_hts_callbackarg * carg, httrackp * opt, char *html,
int len, const char *url_address,
const char *url_file) {
char *prevBase;
/* Call parent functions if multiple callbacks are chained. */
if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
if (!CALLBACKARG_PREV_FUN(carg, check_html)
(CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
return 0; /* Abort */
}
}
/* Disable base href, if any */
if ((prevBase = strstr(html, "<BASE HREF=\"")) != NULL) {
prevBase[1] = 'X';
}
return 1; /* success */
}
static int check_detectedlink(t_hts_callbackarg * carg, httrackp * opt,
char *link) {
const char *base = (char *) CALLBACKARG_USERDEF(carg);
/* Call parent functions if multiple callbacks are chained. */
if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
if (!CALLBACKARG_PREV_FUN(carg, linkdetected)
(CALLBACKARG_PREV_CARG(carg), opt, link)) {
return 0; /* Abort */
}
}
/* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */
if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) {
char temp[HTS_URLMAXSIZE * 2];
strcpy(temp, base);
strcat(temp, link);
strcpy(link, temp);
}
return 1; /* success */
}
static int check_detectedlink_end(t_hts_callbackarg * carg, httrackp * opt) {
char *base = (char *) CALLBACKARG_USERDEF(carg);
fprintf(stderr, "Unplugged ..\n");
if (base != NULL) {
free(base);
base = NULL;
}
/* Call parent functions if multiple callbacks are chained. */
if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
return CALLBACKARG_PREV_FUN(carg, end) (CALLBACKARG_PREV_CARG(carg), opt);
}
return 1; /* success */
}
|