summaryrefslogtreecommitdiff
path: root/libtest/callbacks-example-baselinks.c
blob: f6705e50f7baad39217fcf15cff9df8cc23f353b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
    HTTrack external callbacks example : enforce a constant base href
	Can be useful to make copies of site's archives using site's URL base href as root reference
    .c file

    How to build: (callback.so or callback.dll)
      With GNU-GCC:
        gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack2
      With MS-Visual C++:
        cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack.lib

      Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback

    How to use:
      httrack --wrapper mycallback ..
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Standard httrack module includes */
#include "httrack-library.h"
#include "htsopt.h"
#include "htsdefines.h"

/* Local function definitions */
static int process_file(t_hts_callbackarg * carg, httrackp * opt, char *html,
                        int len, const char *url_address, const char *url_file);
static int check_detectedlink(t_hts_callbackarg * carg, httrackp * opt,
                              char *link);
static int check_detectedlink_end(t_hts_callbackarg * carg, httrackp * opt);

/* external functions */
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv);

/* 
module entry point 
*/
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv) {
  const char *arg = strchr(argv, ',');

  if (arg != NULL)
    arg++;

  /* Check args */
  fprintf(stderr, "Plugged..\n");
  if (arg == NULL || *arg == '\0' || strlen(arg) >= HTS_URLMAXSIZE / 2) {
    fprintf(stderr, "** callback error: arguments expected or bad arguments\n");
    fprintf(stderr, "usage: httrack --wrapper modulename,base\n");
    fprintf(stderr,
            "example: httrack --wrapper callback,http://www.example.com/\n");
    return 0;                   /* failed */
  } else {
    char *callbacks_userdef = strdup(arg);      /* userdef */

    /* Plug callback functions */
    CHAIN_FUNCTION(opt, check_html, process_file, callbacks_userdef);
    CHAIN_FUNCTION(opt, linkdetected, check_detectedlink, callbacks_userdef);
    CHAIN_FUNCTION(opt, end, check_detectedlink_end, callbacks_userdef);

    fprintf(stderr, "Using root '%s'\n", callbacks_userdef);
  }

  return 1;                     /* success */
}

static int process_file(t_hts_callbackarg * carg, httrackp * opt, char *html,
                        int len, const char *url_address,
                        const char *url_file) {
  char *prevBase;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)
        (CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
      return 0;                 /* Abort */
    }
  }

  /* Disable base href, if any */
  if ((prevBase = strstr(html, "<BASE HREF=\"")) != NULL) {
    prevBase[1] = 'X';
  }

  return 1;                     /* success */
}

static int check_detectedlink(t_hts_callbackarg * carg, httrackp * opt,
                              char *link) {
  const char *base = (char *) CALLBACKARG_USERDEF(carg);

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, linkdetected)
        (CALLBACKARG_PREV_CARG(carg), opt, link)) {
      return 0;                 /* Abort */
    }
  }

  /* The incoming (read/write) buffer is at least HTS_URLMAXSIZE bytes long */
  if (strncmp(link, "http://", 7) == 0 || strncmp(link, "https://", 8) == 0) {
    char temp[HTS_URLMAXSIZE * 2];

    strcpy(temp, base);
    strcat(temp, link);
    strcpy(link, temp);
  }

  return 1;                     /* success */
}

static int check_detectedlink_end(t_hts_callbackarg * carg, httrackp * opt) {
  char *base = (char *) CALLBACKARG_USERDEF(carg);

  fprintf(stderr, "Unplugged ..\n");
  if (base != NULL) {
    free(base);
    base = NULL;
  }

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    return CALLBACKARG_PREV_FUN(carg, end) (CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;                     /* success */
}