summaryrefslogtreecommitdiff
path: root/libtest/callbacks-example-listlinks.c
blob: 598f8d470cb75a17f27e1efe9c5b6f1566088b51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*
    HTTrack external callbacks example
    .c file

    How to build: (callback.so or callback.dll)
      With GNU-GCC:
        gcc -O -g3 -Wall -D_REENTRANT -shared -o mycallback.so callbacks-example.c -lhttrack2
      With MS-Visual C++:
        cl -LD -nologo -W3 -Zi -Zp4 -DWIN32 -Fe"mycallback.dll" callbacks-example.c libhttrack.lib

      Note: the httrack library linker option is only necessary when using libhttrack's functions inside the callback

    How to use:
      httrack --wrapper mycallback ..
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Standard httrack module includes */
#include "httrack-library.h"
#include "htsopt.h"
#include "htsdefines.h"

/* Function definitions */
static int process_file(t_hts_callbackarg * carg, httrackp * opt, char *html,
                        int len, const char *url_address, const char *url_file);
static int check_detectedlink(t_hts_callbackarg * carg, httrackp * opt,
                              char *link);
static int check_loop(t_hts_callbackarg * carg, httrackp * opt, lien_back *back,
                      int back_max, int back_index, int lien_tot, int lien_ntot,
                      int stat_time, hts_stat_struct * stats);
static int end(t_hts_callbackarg * carg, httrackp * opt);

/* external functions */
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv);

/*
  This sample just lists all links in documents with the parent link:
  <parent> -> <link>
  This sample can be improved, for example, to make a map of a website.
*/

typedef struct t_my_userdef {
  char currentURLBeingParsed[2048];
} t_my_userdef;

/* 
module entry point 
*/
EXTERNAL_FUNCTION int hts_plug(httrackp * opt, const char *argv) {
  t_my_userdef *userdef;

  /* */
  const char *arg = strchr(argv, ',');

  if (arg != NULL)
    arg++;

  /* Create user-defined structure */
  userdef = (t_my_userdef *) malloc(sizeof(t_my_userdef));      /* userdef */
  userdef->currentURLBeingParsed[0] = '\0';

  /* Plug callback functions */
  CHAIN_FUNCTION(opt, check_html, process_file, userdef);
  CHAIN_FUNCTION(opt, end, end, userdef);
  CHAIN_FUNCTION(opt, linkdetected, check_detectedlink, userdef);
  CHAIN_FUNCTION(opt, loop, check_loop, userdef);

  return 1;                     /* success */
}

static int process_file(t_hts_callbackarg * carg, httrackp * opt, char *html,
                        int len, const char *url_address,
                        const char *url_file) {
  t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);
  char *const currentURLBeingParsed = userdef->currentURLBeingParsed;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, check_html) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, check_html)
        (CALLBACKARG_PREV_CARG(carg), opt, html, len, url_address, url_file)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  printf("now parsing %s%s..\n", url_address, url_file);
  strcpy(currentURLBeingParsed, url_address);
  strcat(currentURLBeingParsed, url_file);

  return 1;                     /* success */
}

static int check_detectedlink(t_hts_callbackarg * carg, httrackp * opt,
                              char *link) {
  t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);
  char *const currentURLBeingParsed = userdef->currentURLBeingParsed;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, linkdetected) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, linkdetected)
        (CALLBACKARG_PREV_CARG(carg), opt, link)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  printf("[%s] -> [%s]\n", currentURLBeingParsed, link);

  return 1;                     /* success */
}

static int check_loop(t_hts_callbackarg * carg, httrackp * opt, lien_back *back,
                      int back_max, int back_index, int lien_tot, int lien_ntot,
                      int stat_time, hts_stat_struct * stats) {
  static int fun_animation = 0;

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, loop) != NULL) {
    if (!CALLBACKARG_PREV_FUN(carg, loop)
        (CALLBACKARG_PREV_CARG(carg), opt, back, back_max, back_index, lien_tot,
         lien_ntot, stat_time, stats)) {
      return 0;                 /* Abort */
    }
  }

  /* Process */
  printf("%c\r", "/-\\|"[(fun_animation++) % 4]);
  return 1;
}

static int end(t_hts_callbackarg * carg, httrackp * opt) {
  t_my_userdef *userdef = (t_my_userdef *) CALLBACKARG_USERDEF(carg);

  fprintf(stderr, "** info: wrapper_exit() called!\n");
  if (userdef != NULL) {
    free(userdef);
    userdef = NULL;
  }

  /* Call parent functions if multiple callbacks are chained. */
  if (CALLBACKARG_PREV_FUN(carg, end) != NULL) {
    return CALLBACKARG_PREV_FUN(carg, end) (CALLBACKARG_PREV_CARG(carg), opt);
  }

  return 1;                     /* success */
}