summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-05-29 15:42:53 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-05-29 15:42:53 +0000
commit4f5776a8fac840619f01a61a3655a4608e04d9fd (patch)
treeb9bb0b6ce0eb39118ed5646f66687a616890248a
parent8adeadbdb63e1cb8e4d62fe400ffefada9122d86 (diff)
Big cleanup: introducing cleaner lien_adrfilsave and lien_adrfil structures holding address/uri or address/uri/filename rather than passing opaque char* of unknown size.
-rw-r--r--src/htscore.c44
-rw-r--r--src/htscore.h21
-rw-r--r--src/htscoremain.c39
-rw-r--r--src/htslib.c94
-rw-r--r--src/htslib.h11
-rw-r--r--src/htsname.c332
-rw-r--r--src/htsname.h23
-rw-r--r--src/htsparse.c812
-rw-r--r--src/htsparse.h6
-rw-r--r--src/htstools.c57
-rw-r--r--src/htstools.h13
11 files changed, 730 insertions, 722 deletions
diff --git a/src/htscore.c b/src/htscore.c
index c20c2a0..d2d396d 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -998,6 +998,10 @@ int httpmirror(char *url1, httrackp * opt) {
}
ptr++;
}
+ // We're done!
+ if (ptr == opt->lien_tot) {
+ goto jump_if_done;
+ }
}
if (heap(ptr) != NULL) { // on a qq chose à récupérer?
@@ -3716,8 +3720,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
if (link != NULL && str != NULL && link[0] != '\0') {
ENGINE_LOAD_CONTEXT_BASE();
/* */
- char BIGSTK adr[HTS_URLMAXSIZE * 2], fil[HTS_URLMAXSIZE * 2],
- save[HTS_URLMAXSIZE * 2];
+ lien_adrfilsave afs;
char BIGSTK codebase[HTS_URLMAXSIZE * 2];
/* */
@@ -3788,13 +3791,13 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
if (strnotempty(lien) && strlen(lien) < HTS_URLMAXSIZE) {
// calculer les chemins et noms de sauvegarde
- if (ident_url_relatif(lien, urladr(), codebase, adr, fil) >= 0) { // reformage selon chemin
+ if (ident_url_relatif(lien, urladr(), codebase, &afs.af) >= 0) { // reformage selon chemin
int r;
int set_prio_to = 0;
int just_test_it = 0;
forbidden_url =
- hts_acceptlink(opt, ptr, adr, fil, NULL, NULL, &set_prio_to, &just_test_it);
+ hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil, NULL, NULL, &set_prio_to, &just_test_it);
hts_log_print(opt, LOG_DEBUG,
"result for wizard external module link: %d",
forbidden_url);
@@ -3811,29 +3814,26 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
opt->savename_83 = 0;
// note: adr,fil peuvent être patchés
r =
- url_savename(adr, fil, save, NULL, NULL, NULL, NULL, opt, opt->liens,
- opt->lien_tot, sback, cache, hashptr, ptr, numero_passe,
+ url_savename(&afs, NULL, NULL, NULL, opt, sback, cache, hashptr, ptr, numero_passe,
NULL);
// resolve unresolved type
- if (r != -1 && forbidden_url == 0 && IS_DELAYED_EXT(save)
+ if (r != -1 && forbidden_url == 0 && IS_DELAYED_EXT(afs.save)
) { // pas d'erreur, on continue
- char BIGSTK former_adr[HTS_URLMAXSIZE * 2];
- char BIGSTK former_fil[HTS_URLMAXSIZE * 2];
+ lien_adrfil former;
- former_adr[0] = former_fil[0] = '\0';
+ former.adr[0] = former.fil[0] = '\0';
r =
- hts_wait_delayed(str, adr, fil, save, NULL, NULL, former_adr,
- former_fil, &forbidden_url);
+ hts_wait_delayed(str, &afs, NULL, NULL, &former, &forbidden_url);
}
// end resolve unresolved type
opt->savename_type = a;
opt->savename_83 = b;
if (r != -1 && !forbidden_url) {
if (savename()) {
- if (lienrelatif(tempo, save, savename()) == 0) {
+ if (lienrelatif(tempo, afs.save, savename()) == 0) {
hts_log_print(opt, LOG_DEBUG,
"(module): relative link at %s build with %s and %s: %s",
- adr, save, savename(), tempo);
+ afs.af.adr, afs.save, savename(), tempo);
if (str->localLink
&& str->localLinkSize > (int) strlen(tempo) + 1) {
strcpybuff(str->localLink, tempo);
@@ -3847,19 +3847,19 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
hts_log_print(opt, LOG_DEBUG, "(module): file not caught: %s",
lien);
if (str->localLink
- && str->localLinkSize > (int) (strlen(adr) + strlen(fil) + 8)) {
+ && str->localLinkSize > (int) (strlen(afs.af.adr) + strlen(afs.af.fil) + 8)) {
str->localLink[0] = '\0';
- if (!link_has_authority(adr))
+ if (!link_has_authority(afs.af.adr))
strcpybuff(str->localLink, "http://");
- strcatbuff(str->localLink, adr);
- strcatbuff(str->localLink, fil);
+ strcatbuff(str->localLink, afs.af.adr);
+ strcatbuff(str->localLink, afs.af.fil);
}
r = -1;
}
//
if (r != -1) {
- hts_log_print(opt, LOG_DEBUG, "(module): %s%s -> %s (base %s)", adr,
- fil, save, codebase);
+ hts_log_print(opt, LOG_DEBUG, "(module): %s%s -> %s (base %s)", afs.af.adr,
+ afs.af.fil, afs.save, codebase);
// modifié par rapport à l'autre version (cf prio_fix notamment et save2)
@@ -3869,7 +3869,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
//
// On part de la fin et on essaye de se presser (économise temps machine)
{
- int i = hash_read(hashptr, save, NULL, HASH_STRUCT_FILENAME ); // lecture type 0 (sav)
+ int i = hash_read(hashptr, afs.save, NULL, HASH_STRUCT_FILENAME ); // lecture type 0 (sav)
if (i >= 0) {
heap(i)->depth = maximum(heap(i)->depth, prio_fix);
@@ -3882,7 +3882,7 @@ int htsAddLink(htsmoduleStruct * str, char *link) {
// >>>> CREER LE LIEN JAVA <<<<
// enregistrer fichier (MACRO)
- if (!hts_record_link(opt, adr, fil, save, "", "", "")) { // erreur, pas de place réservée
+ if (!hts_record_link(opt, afs.af.adr, afs.af.fil, afs.save, "", "", "")) { // erreur, pas de place réservée
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
hts_log_print(opt, LOG_PANIC, "Not enough memory");
opt->state.exit_xh = -1; /* fatal error -> exit */
diff --git a/src/htscore.h b/src/htscore.h
index 35593d1..774b1d7 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -152,6 +152,26 @@ struct lien_url {
int testmode; // mode test uniquement, envoyer juste un head!
};
+// adr, fil
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfil
+#define HTS_DEF_FWSTRUCT_lien_adrfil
+typedef struct lien_adrfil lien_adrfil;
+#endif
+struct lien_adrfil {
+ char adr[HTS_URLMAXSIZE * 2]; // adresse
+ char fil[HTS_URLMAXSIZE * 2]; // nom du fichier distant
+};
+
+// adr, fil, save
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave
+#define HTS_DEF_FWSTRUCT_lien_adrfilsave
+typedef struct lien_adrfilsave lien_adrfilsave;
+#endif
+struct lien_adrfilsave {
+ lien_adrfil af;
+ char save[HTS_URLMAXSIZE * 2]; // nom à sauver sur disque (avec chemin éventuel)
+};
+
// chargement de fichiers en 'arrière plan'
#ifndef HTS_DEF_FWSTRUCT_lien_back
#define HTS_DEF_FWSTRUCT_lien_back
@@ -398,7 +418,6 @@ int htsAddLink(htsmoduleStruct * str, char *link);
// Void
void voidf(void);
-
#define HTS_TOPINDEX "TOP_INDEX_HTTRACK"
#endif
diff --git a/src/htscoremain.c b/src/htscoremain.c
index 1ca82e8..e5af672 100644
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -2007,8 +2007,7 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) {
cache.hashtable = (void *) cache_hashtable; /* copy backcache hash */
cache.ro = 1; /* read only */
if (cache.hashtable) {
- char BIGSTK adr[HTS_URLMAXSIZE * 2];
- char BIGSTK fil[HTS_URLMAXSIZE * 2];
+ lien_adrfilsave afs;
char BIGSTK url[HTS_URLMAXSIZE * 2];
char linepos[256];
int pos;
@@ -2031,52 +2030,50 @@ HTSEXT_API int hts_main2(int argc, char **argv, httrackp * opt) {
/* */
a++;
/* read "host/file" */
- a += binput(a, adr, HTS_URLMAXSIZE);
- a += binput(a, fil, HTS_URLMAXSIZE);
+ a += binput(a, afs.af.adr, HTS_URLMAXSIZE);
+ a += binput(a, afs.af.fil, HTS_URLMAXSIZE);
url[0] = '\0';
- if (!link_has_authority(adr))
+ if (!link_has_authority(afs.af.adr))
strcatbuff(url, "http://");
- strcatbuff(url, adr);
- strcatbuff(url, fil);
+ strcatbuff(url, afs.af.adr);
+ strcatbuff(url, afs.af.fil);
/* read position */
a += binput(a, linepos, 200);
sscanf(linepos, "%d", &pos);
if (!hasFilter
|| (strjoker(url, filter, NULL, NULL) != NULL)
) {
- r = cache_read_ro(opt, &cache, adr, fil, "", NULL); // lire entrée cache + data
+ r = cache_read_ro(opt, &cache, afs.af.adr, afs.af.fil, "", NULL); // lire entrée cache + data
if (r.statuscode != -1) { // No errors
found++;
if (!hasFilter) {
fprintf(stdout, "%s%s%s\r\n",
- (link_has_authority(adr)) ? "" :
- "http://", adr, fil);
+ (link_has_authority(afs.af.adr)) ? "" :
+ "http://", afs.af.adr, afs.af.fil);
} else {
char msg[256], cdate[256];
- char BIGSTK sav[HTS_URLMAXSIZE * 2];
infostatuscode(msg, r.statuscode);
time_gmt_rfc822(cdate);
fprintf(stdout, "HTTP/1.1 %d %s\r\n",
r.statuscode, r.msg[0] ? r.msg : msg);
- fprintf(stdout, "X-Host: %s\r\n", adr);
- fprintf(stdout, "X-File: %s\r\n", fil);
+ fprintf(stdout, "X-Host: %s\r\n", afs.af.adr);
+ fprintf(stdout, "X-File: %s\r\n", afs.af.fil);
fprintf(stdout, "X-URL: %s%s%s\r\n",
- (link_has_authority(adr)) ? "" :
- "http://", adr, fil);
+ (link_has_authority(afs.af.adr)) ? "" :
+ "http://", afs.af.adr, afs.af.fil);
if (url_savename
- (adr, fil, sav, /*former_adr */ NULL,
- /*former_fil */ NULL, /*referer_adr */
+ (&afs, /*former */ NULL,
+ /*referer_adr */
NULL, /*referer_fil */ NULL,
- /*opt */ opt, /*liens */ NULL,
- /*lien_tot */ 0, /*sback */ NULL,
+ /*opt */ opt, /*sback */ NULL,
/*cache */ &cache, /*hash */ NULL, /*ptr */
0, /*numero_passe */ 0, /*mime_type */
NULL) != -1) {
- if (fexist(sav)) {
+ if (fexist(afs.save)) {
fprintf(stdout, "Content-location: %s\r\n",
- sav);
+ afs.save);
}
}
fprintf(stdout, "Date: %s\r\n", cdate);
diff --git a/src/htslib.c b/src/htslib.c
index c058733..fe2863e 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -1984,36 +1984,6 @@ LLint http_xfread1(htsblk * r, int bufl) {
}
}
-// teste une adresse, et suit l'éventuel chemin "moved"
-// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
-// copie dans loc la véritable adresse si celle-ci est différente
-htsblk http_location(httrackp * opt, char *adr, char *fil, char *loc) {
- htsblk retour;
- int retry = 0;
- int tryagain;
-
- // note: "RFC says"
- // 5 boucles au plus, on en teste au plus 8 ici
- // sinon abandon..
- do {
- tryagain = 0;
- switch ((retour = http_test(opt, adr, fil, loc)).statuscode) {
- case HTTP_OK:
- break; // ok!
- case HTTP_MOVED_PERMANENTLY:
- case HTTP_FOUND:
- case HTTP_SEE_OTHER:
- case HTTP_TEMPORARY_REDIRECT: // moved!
- // recalculer adr et fil!
- if (ident_url_absolute(loc, adr, fil) != -1) {
- tryagain = 1; // retenter
- retry++; // ..encore une fois
- }
- }
- } while((tryagain) && (retry < 5 + 3));
- return retour;
-}
-
// teste si une URL (validité, header, taille)
// retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
// en cas de moved xx, dans location
@@ -2340,12 +2310,12 @@ T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
// couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html
// retour=-1 si erreur.
// si file://... alors adresse=file:// (et coupe le ?query dans ce cas)
-int ident_url_absolute(const char *url, char *adr, char *fil) {
+int ident_url_absolute(const char *url, lien_adrfil *adrfil) {
int pos = 0;
int scheme = 0;
- // effacer adr et fil
- adr[0] = fil[0] = '\0';
+ // effacer adrfil->adr et adrfil->fil
+ adrfil->adr[0] = adrfil->fil[0] = '\0';
#if HDEBUG
printf("protocol: %s\n", url);
@@ -2364,15 +2334,15 @@ int ident_url_absolute(const char *url, char *adr, char *fil) {
// 1. optional scheme ":"
if ((pos = strfield(url, "file:"))) { // fichier local!! (pour les tests)
//!!p+=3;
- strcpybuff(adr, "file://");
+ strcpybuff(adrfil->adr, "file://");
} else if ((pos = strfield(url, "http:"))) { // HTTP
//!!p+=3;
} else if ((pos = strfield(url, "ftp:"))) { // FTP
- strcpybuff(adr, "ftp://"); // FTP!!
+ strcpybuff(adrfil->adr, "ftp://"); // FTP!!
//!!p+=3;
#if HTS_USEOPENSSL
} else if ((pos = strfield(url, "https:"))) { // HTTPS
- strcpybuff(adr, "https://");
+ strcpybuff(adrfil->adr, "https://");
#endif
} else if (scheme) {
return -1; // erreur non reconnu
@@ -2385,13 +2355,13 @@ int ident_url_absolute(const char *url, char *adr, char *fil) {
// (url+pos) now points to the path (not net path)
- //## if (adr[0]!=lOCAL_CHAR) { // adresse normale http
- if (!strfield(adr, "file:")) { // PAS file://
+ //## if (adrfil->adr[0]!=lOCAL_CHAR) { // adrfil->adresse normale http
+ if (!strfield(adrfil->adr, "file:")) { // PAS adrfil->file://
const char *p, *q;
p = url + pos;
- // p pointe sur le début de l'adresse, ex: www.truc.fr/sommaire/index.html
+ // p pointe sur le début de l'adrfil->adresse, ex: www.truc.fr/sommaire/index.html
q = strchr(jump_identification(p), '/');
if (q == 0)
q = strchr(jump_identification(p), '?'); // http://www.foo.com?bar=1
@@ -2404,53 +2374,53 @@ int ident_url_absolute(const char *url, char *adr, char *fil) {
//strcpybuff(retour.msg,"Path too long");
return -1; // erreur
}
- // recopier adresse www..
- strncatbuff(adr, p, ((int) (q - p)));
- // *( adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
+ // recopier adrfil->adresse www..
+ strncatbuff(adrfil->adr, p, ((int) (q - p)));
+ // *( adrfil->adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
// recopier chemin /pub/..
if (q[0] != '/') // page par défaut (/)
- strcatbuff(fil, "/");
- strcatbuff(fil, q);
+ strcatbuff(adrfil->fil, "/");
+ strcatbuff(adrfil->fil, q);
// SECURITE:
// simplifier url pour les ../
- fil_simplifie(fil);
- } else { // localhost file://
+ fil_simplifie(adrfil->fil);
+ } else { // localhost adrfil->file://
const char *p;
size_t i;
char *a;
p = url + pos;
- if (*p == '/' || *p == '\\') { /* file:///.. */
- strcatbuff(fil, p); // fichier local ; adr="#"
+ if (*p == '/' || *p == '\\') { /* adrfil->file:///.. */
+ strcatbuff(adrfil->fil, p); // fichier local ; adrfil->adr="#"
} else {
if (p[1] != ':') {
- strcatbuff(fil, "//"); /* file://server/foo */
- strcatbuff(fil, p);
+ strcatbuff(adrfil->fil, "//"); /* adrfil->file://server/foo */
+ strcatbuff(adrfil->fil, p);
} else {
- strcatbuff(fil, p); // file://C:\..
+ strcatbuff(adrfil->fil, p); // adrfil->file://C:\..
}
}
- a = strchr(fil, '?');
+ a = strchr(adrfil->fil, '?');
if (a)
- *a = '\0'; /* couper query (inutile pour file:// lors de la requête) */
- // filtrer les \\ -> / pour les fichiers DOS
- for(i = 0; fil[i] != '\0'; i++)
- if (fil[i] == '\\')
- fil[i] = '/';
+ *a = '\0'; /* couper query (inutile pour adrfil->file:// lors de la requête) */
+ // adrfil->filtrer les \\ -> / pour les fichiers DOS
+ for(i = 0; adrfil->fil[i] != '\0'; i++)
+ if (adrfil->fil[i] == '\\')
+ adrfil->fil[i] = '/';
}
// no hostname
- if (!strnotempty(adr))
+ if (!strnotempty(adrfil->adr))
return -1; // erreur non reconnu
// nommer au besoin.. (non utilisé normalement)
- if (!strnotempty(fil))
- strcpybuff(fil, "default-index.html");
+ if (!strnotempty(adrfil->fil))
+ strcpybuff(adrfil->fil, "default-index.html");
- // case insensitive pour adresse
+ // case insensitive pour adrfil->adresse
{
- char *a = jump_identification(adr);
+ char *a = jump_identification(adrfil->adr);
while(*a) {
if ((*a >= 'A') && (*a <= 'Z'))
diff --git a/src/htslib.h b/src/htslib.h
index 84c5666..cbd71aa 100644
--- a/src/htslib.h
+++ b/src/htslib.h
@@ -50,6 +50,14 @@ typedef struct htsblk htsblk;
#define HTS_DEF_FWSTRUCT_t_dnscache
typedef struct t_dnscache t_dnscache;
#endif
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfil
+#define HTS_DEF_FWSTRUCT_lien_adrfil
+typedef struct lien_adrfil lien_adrfil;
+#endif
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave
+#define HTS_DEF_FWSTRUCT_lien_adrfilsave
+typedef struct lien_adrfilsave lien_adrfilsave;
+#endif
/* définitions globales */
#include "htsglobal.h"
@@ -264,7 +272,6 @@ HTS_INLINE void deletehttp(htsblk * r);
HTS_INLINE int deleteaddr(htsblk * r);
HTS_INLINE void deletesoc(T_SOC soc);
HTS_INLINE void deletesoc_r(htsblk * r);
-htsblk http_location(httrackp * opt, char *adr, char *fil, char *loc);
htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc);
int check_readinput(htsblk * r);
int check_readinput_t(T_SOC soc, int timeout);
@@ -317,7 +324,7 @@ int linput_trim(FILE * fp, char *s, int max);
int linput_cpp(FILE * fp, char *s, int max);
void rawlinput(FILE * fp, char *s, int max);
char *strstrcase(char *s, const char *o);
-int ident_url_absolute(const char *url, char *adr, char *fil);
+int ident_url_absolute(const char *url, lien_adrfil *adrfil);
void fil_simplifie(char *f);
int is_unicode_utf8(const char *buffer, const size_t size);
void map_characters(unsigned char *buffer, unsigned int size,
diff --git a/src/htsname.c b/src/htsname.c
index ace3a33..ec6a839 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -47,15 +47,15 @@ Please visit our Website: http://www.httrack.com
{ /* ajout nom */\
char BIGSTK buff[HTS_URLMAXSIZE*2];\
buff[0]='\0';\
- strncatbuff(buff,start_pos,(int) (nom_pos - start_pos));\
- url_savename_addstr(save,buff);\
+ strncatbuff(buff,start_pos,nom_pos - start_pos);\
+ url_savename_addstr(afs->save, buff);\
}
#define ADD_STANDARD_NAME(shortname) \
{ /* ajout nom */\
char BIGSTK buff[HTS_URLMAXSIZE*2];\
standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\
- url_savename_addstr(save,buff);\
+ url_savename_addstr(afs->save, buff);\
}
/* Avoid stupid DOS system folders/file such as 'nul' */
@@ -82,11 +82,11 @@ static const char *hts_tbdev[] = {
HTS_STAT.stat_errors=fspc(opt,NULL,"error"); \
HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); \
HTS_STAT.stat_infos=fspc(opt,NULL,"info"); \
- HTS_STAT.nbk=backlinks_done(sback,liens,lien_tot,ptr); \
+ HTS_STAT.nbk=backlinks_done(sback,opt->liens,opt->lien_tot,ptr); \
HTS_STAT.nb=back_transferred(HTS_STAT.stat_bytes,sback); \
/* Check */ \
{ \
- if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \
+ if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,opt->lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \
return -1; \
} \
} \
@@ -139,10 +139,10 @@ static void cleanEndingSpaceOrDot(char *s) {
// forme le nom du fichier à sauver (save) à partir de fil et adr
// système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
-int url_savename(char *adr_complete, char *fil_complete, char *save,
- char *former_adr, char *former_fil, const char *referer_adr,
- const char *referer_fil, httrackp * opt, lien_url ** liens,
- int lien_tot, struct_back * sback, cache_back * cache,
+int url_savename(lien_adrfilsave *const afs,
+ lien_adrfil *const former,
+ const char *referer_adr, const char *referer_fil,
+ httrackp * opt, struct_back * sback, cache_back * cache,
hash_struct * hash, int ptr, int numero_passe,
const lien_back * headers) {
char catbuff[CATBUFF_SIZE];
@@ -152,7 +152,10 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
lien_back *const back = sback->lnk;
/* */
- char BIGSTK newfil[HTS_URLMAXSIZE * 2]; /* ="" */
+ char BIGSTK fil[HTS_URLMAXSIZE * 2]; /* ="" */
+
+ const char *const adr_complete = afs->af.adr;
+ const char *const fil_complete = afs->af.fil;
/*char BIGSTK normadr_[HTS_URLMAXSIZE*2]; */
char BIGSTK normadr_[HTS_URLMAXSIZE * 2], normfil_[HTS_URLMAXSIZE * 2];
@@ -162,12 +165,11 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
{ "http", "https", "ftp", "file", "unknown" };
int protocol = PROTOCOL_HTTP;
const char *const adr = jump_identification(adr_complete);
- char *fil = fil_complete;
// copy of fil, used for lookups (see urlhack)
const char *normadr = adr;
- const char *normfil = fil;
+ const char *normfil = fil_complete;
const char *const print_adr = jump_protocol(adr);
- char *start_pos = NULL, *nom_pos = NULL, *dot_pos = NULL; // Position nom et point
+ const char *start_pos = NULL, *nom_pos = NULL, *dot_pos = NULL; // Position nom et point
// pour changement d'extension ou de nom (content-disposition)
int ext_chg = 0, ext_chg_delayed = 0;
@@ -176,8 +178,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
int max_char = 0;
//CLEAR
- newfil[0] = ext[0] = '\0';
- save[0] = '\0';
+ fil[0] = ext[0] = '\0';
+ afs->save[0] = '\0';
/* 8-3 ? */
switch (opt->savename_83) {
@@ -199,7 +201,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
if (opt->urlhack) {
// copy of adr (without protocol), used for lookups (see urlhack)
normadr = adr_normalized(adr, normadr_);
- normfil = fil_normalized(fil, normfil_);
+ normfil = fil_normalized(fil_complete, normfil_);
} else {
if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder
char *pos = strchr(adr_complete, ':');
@@ -227,8 +229,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// court-circuit pour lien primaire
if (strnotempty(adr) == 0) {
- if (strcmp(fil, "primary") == 0) {
- strcatbuff(save, "primary.html");
+ if (strcmp(fil_complete, "primary") == 0) {
+ strcatbuff(afs->save, "primary.html");
return 0;
}
}
@@ -259,21 +261,21 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// vérifier que le nom n'a pas déja été calculé (si oui le renvoyer tel que)
// vérifier que le nom n'est pas déja pris...
// NOTE: si on cherche /toto/ et que /toto est trouvé on le prend (et réciproquqment) ** // **
- if (liens != NULL) {
+ if (opt->liens != NULL) {
int i;
i = hash_read(hash, normadr, normfil, HASH_STRUCT_ADR_PATH); // recherche table 1 (adr+fil)
if (i >= 0) { // ok, trouvé
- strcpybuff(save, liens[i]->sav);
+ strcpybuff(afs->save, heap(i)->sav);
return 0;
}
- i = hash_read(hash, normadr, normfil, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former_adr+former_fil)
+ i = hash_read(hash, normadr, normfil, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former->adr+former->fil)
if (i >= 0) { // ok, trouvé
// copier location moved!
- strcpybuff(adr_complete, liens[i]->adr);
- strcpybuff(fil_complete, liens[i]->fil);
+ strcpybuff(afs->af.adr, heap(i)->adr);
+ strcpybuff(afs->af.fil, heap(i)->fil);
// et save
- strcpybuff(save, liens[i]->sav); // copier (formé à partir du nouveau lien!)
+ strcpybuff(afs->save, heap(i)->sav); // copier (formé à partir du nouveau lien!)
return 0;
}
// chercher sans / ou avec / dans former
@@ -286,13 +288,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
fil_complete_patche[strlen(fil_complete_patche) - 1] = '\0';
else
strcatbuff(fil_complete_patche, "/");
- i = hash_read(hash, normadr, fil_complete_patche, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former_adr+former_fil)
+ i = hash_read(hash, normadr, fil_complete_patche, HASH_STRUCT_ORIGINAL_ADR_PATH); // recherche table 2 (former->adr+former->fil)
if (i >= 0) {
- // écraser fil et adr (pas former_fil?????)
- strcpybuff(adr_complete, liens[i]->adr);
- strcpybuff(fil_complete, liens[i]->fil);
+ // écraser fil et adr (pas former->fil?????)
+ strcpybuff(afs->af.adr, heap(i)->adr);
+ strcpybuff(afs->af.fil, heap(i)->fil);
// écrire save
- strcpybuff(save, liens[i]->sav);
+ strcpybuff(afs->save, heap(i)->sav);
return 0;
}
}
@@ -303,13 +305,12 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
{
char *a;
- a = strchr(fil, '?');
+ a = strchr(fil_complete, '?');
if (a != NULL) {
- strncatbuff(newfil, fil, (int) (a - fil));
+ strncatbuff(fil, fil_complete, a - fil_complete);
} else {
- strcpybuff(newfil, fil);
+ strcpybuff(fil, fil_complete);
}
- fil = newfil;
}
// decode remaining % (normally not necessary; already done in htsparse.c)
@@ -465,8 +466,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
//
int hihp = opt->state._hts_in_html_parsing;
int has_been_moved = 0;
- char BIGSTK curr_adr[HTS_URLMAXSIZE * 2],
- curr_fil[HTS_URLMAXSIZE * 2];
+ lien_adrfil current;
/* Ensure we don't use too many sockets by using a "testing" one
If we have only 1 simultaneous connection authorized, wait for pending download
@@ -475,20 +475,20 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET();
/* Rock'in */
- curr_adr[0] = curr_fil[0] = '\0';
+ current.adr[0] = current.fil[0] = '\0';
opt->state._hts_in_html_parsing = 2; // test
hts_log_print(opt, LOG_DEBUG, "Testing link type %s%s",
adr_complete, fil_complete);
- strcpybuff(curr_adr, adr_complete);
- strcpybuff(curr_fil, fil_complete);
+ strcpybuff(current.adr, adr_complete);
+ strcpybuff(current.fil, fil_complete);
// ajouter dans le backing le fichier en mode test
// savename: rien car en mode test
if (back_add
- (sback, opt, cache, curr_adr, curr_fil, BACK_ADD_TEST,
+ (sback, opt, cache, current.adr, current.fil, BACK_ADD_TEST,
referer_adr, referer_fil, 1) != -1) {
int b;
- b = back_index(opt, sback, curr_adr, curr_fil, BACK_ADD_TEST);
+ b = back_index(opt, sback, current.adr, current.fil, BACK_ADD_TEST);
if (b >= 0) {
int stop_looping = 0;
int petits_tours = 0;
@@ -511,11 +511,11 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
HTS_STAT.stat_errors = fspc(opt, NULL, "error");
HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt, NULL, "info");
- HTS_STAT.nbk = backlinks_done(sback, liens, lien_tot, ptr);
+ HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
if (!RUN_CALLBACK7
- (opt, loop, sback->lnk, sback->count, b, ptr, lien_tot,
+ (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot,
(int) (time_local() - HTS_STAT.stat_timestart),
&HTS_STAT)) {
return -1;
@@ -526,42 +526,40 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// traitement des 304,303..
if (back[b].status <= 0) {
if (HTTP_IS_REDIRECT(back[b].r.statuscode)) { // agh moved.. un tit tour de plus
- if ((petits_tours < 5) && (former_adr) && (former_fil)) { // on va pas tourner en rond non plus!
- if ((int) strnotempty(back[b].r.location)) { // location existe!
- char BIGSTK mov_url[HTS_URLMAXSIZE * 2],
- mov_adr[HTS_URLMAXSIZE * 2],
- mov_fil[HTS_URLMAXSIZE * 2];
- mov_url[0] = mov_adr[0] = mov_fil[0] = '\0';
+ if ((petits_tours < 5) && former != NULL) { // on va pas tourner en rond non plus!
+ if (strnotempty(back[b].r.location)) { // location existe!
+ char BIGSTK mov_url[HTS_URLMAXSIZE * 2];
+ lien_adrfil moved;
+ mov_url[0] = moved.adr[0] = moved.fil[0] = '\0';
//
strcpybuff(mov_url, back[b].r.location); // copier URL
if (ident_url_relatif
- (mov_url, curr_adr, curr_fil, mov_adr,
- mov_fil) >= 0) {
+ (mov_url, current.adr, current.fil, &moved) >= 0) {
// si non bouclage sur soi même, ou si test avec GET non testé
- if ((strcmp(mov_adr, curr_adr))
- || (strcmp(mov_fil, curr_fil))
+ if ((strcmp(moved.adr, current.adr))
+ || (strcmp(moved.fil, current.fil))
|| (get_test_request == 0)) {
// bouclage?
- if ((!strcmp(mov_adr, curr_adr))
- && (!strcmp(mov_fil, curr_fil)))
+ if ((!strcmp(moved.adr, current.adr))
+ && (!strcmp(moved.fil, current.fil)))
get_test_request = 1; // faire requète avec GET
- // recopier former_adr/fil?
- if ((former_adr) && (former_fil)) {
- if (strnotempty(former_adr) == 0) { // Pas déja noté
- strcpybuff(former_adr, curr_adr);
- strcpybuff(former_fil, curr_fil);
+ // recopier former->adr/fil?
+ if (former != NULL) {
+ if (strnotempty(former->adr) == 0) { // Pas déja noté
+ strcpybuff(former->adr, current.adr);
+ strcpybuff(former->fil, current.fil);
}
}
// check explicit forbidden - don't follow 3xx in this case
{
int set_prio_to = 0;
- if (hts_acceptlink(opt, ptr, lien_tot, liens, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
+ if (hts_acceptlink(opt, ptr, moved.adr, moved.fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
has_been_moved = 1;
back_maydelete(opt, cache, sback, b); // ok
- strcpybuff(curr_adr, mov_adr);
- strcpybuff(curr_fil, mov_fil);
+ strcpybuff(current.adr, moved.adr);
+ strcpybuff(current.fil, moved.fil);
mov_url[0] = '\0';
stop_looping = 1;
}
@@ -572,8 +570,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
) { // ftp, ok on arrête
has_been_moved = 1;
back_maydelete(opt, cache, sback, b); // ok
- strcpybuff(curr_adr, mov_adr);
- strcpybuff(curr_fil, mov_fil);
+ strcpybuff(current.adr, moved.adr);
+ strcpybuff(current.fil, moved.fil);
stop_looping = 1;
} else if (*mov_url) {
const char *methode;
@@ -584,24 +582,24 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
methode = BACK_ADD_TEST2; // tester avec GET
hts_log_print(opt, LOG_WARNING,
"Loop with HEAD request (during prefetch) at %s%s",
- curr_adr, curr_fil);
+ current.adr, current.fil);
}
// Ajouter
URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET();
- if (back_add(sback, opt, cache, mov_adr, mov_fil, methode, referer_adr, referer_fil, 1) != -1) { // OK
+ if (back_add(sback, opt, cache, moved.adr, moved.fil, methode, referer_adr, referer_fil, 1) != -1) { // OK
hts_log_print(opt, LOG_DEBUG,
"(during prefetch) %s (%d) to link %s at %s%s",
back[b].r.msg,
back[b].r.statuscode,
- back[b].r.location, curr_adr,
- curr_fil);
+ back[b].r.location, current.adr,
+ current.fil);
// libérer emplacement backing actuel et attendre le prochain
back_maydelete(opt, cache, sback, b);
- strcpybuff(curr_adr, mov_adr);
- strcpybuff(curr_fil, mov_fil);
+ strcpybuff(current.adr, moved.adr);
+ strcpybuff(current.fil, moved.fil);
b =
- back_index(opt, sback, curr_adr, curr_fil,
+ back_index(opt, sback, current.adr, current.fil,
methode);
if (!get_test_request)
has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé
@@ -610,7 +608,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
} else { // sinon on fait rien et on s'en va.. (ftp etc)
hts_log_print(opt, LOG_DEBUG,
"Warning: Savename redirect backing error at %s%s",
- mov_adr, mov_fil);
+ moved.adr, moved.fil);
}
}
} else {
@@ -672,13 +670,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// oops, a été déplacé.. on recalcule en récursif (osons!)
if (has_been_moved) {
// copier adr, fil (optionnel, mais sinon marche pas pour le rip)
- strcpybuff(adr_complete, curr_adr);
- strcpybuff(fil_complete, curr_fil);
+ strcpybuff(afs->af.adr, current.adr);
+ strcpybuff(afs->af.fil, current.fil);
// copier adr, fil
- return url_savename(curr_adr, curr_fil, save, NULL, NULL,
- referer_adr, referer_fil, opt, liens,
- lien_tot, sback, cache, hash, ptr,
+ return url_savename(afs, NULL,
+ referer_adr, referer_fil, opt,
+ sback, cache, hash, ptr,
numero_passe, NULL);
}
// --- --- ---
@@ -748,7 +746,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
}
// Rechercher premier / et dernier .
{
- char *a = fil + strlen(fil) - 1;
+ const char *a = fil + strlen(fil) - 1;
// passer structures
start_pos = fil;
@@ -769,7 +767,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// ajouter nom du site éventuellement en premier
if (opt->savename_type == -1) { // utiliser savename_userdef! (%h%p/%n%q.%t)
const char *a = StringBuff(opt->savename_userdef);
- char *b = save;
+ char *b = afs->save;
/*char *nom_pos=NULL,*dot_pos=NULL; // Position nom et point */
char tok;
@@ -790,7 +788,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
*/
// Construire nom
- while((*a) && (((int) (b - save)) < HTS_URLMAXSIZE)) { // parser, et pas trop long..
+ while((*a) && (((int) (b - afs->save)) < HTS_URLMAXSIZE)) { // parser, et pas trop long..
if (*a == '%') {
int short_ver = 0;
@@ -1047,26 +1045,26 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// adresse url
if (!opt->savename_83) { // noms longs (et pas de .)
- strcatbuff(save, final_adr);
+ strcatbuff(afs->save, final_adr);
} else { // noms 8-3
if (strlen(final_adr) > 4) {
if (strfield(final_adr, "www."))
- hts_appendStringUTF8(save, final_adr + 4, max_char);
+ hts_appendStringUTF8(afs->save, final_adr + 4, max_char);
else
- hts_appendStringUTF8(save, final_adr, max_char);
+ hts_appendStringUTF8(afs->save, final_adr, max_char);
} else
- hts_appendStringUTF8(save, final_adr, max_char);
+ hts_appendStringUTF8(afs->save, final_adr, max_char);
}
/* release */
RELEASE_ADR();
if (*fil != '/')
- strcatbuff(save, "/");
+ strcatbuff(afs->save, "/");
}
}
- hts_lowcase(save);
+ hts_lowcase(afs->save);
/*
// ne sert à rien car a déja été filtré normalement
@@ -1092,42 +1090,42 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
DECLARE_ADR(final_adr);
if (!opt->savename_83) { // noms longs
- strcatbuff(save, final_adr);
- strcatbuff(save, "/");
+ strcatbuff(afs->save, final_adr);
+ strcatbuff(afs->save, "/");
} else { // noms 8-3
if (strlen(final_adr) > 4) {
if (strfield(final_adr, "www."))
- hts_appendStringUTF8(save, final_adr + 4, max_char);
+ hts_appendStringUTF8(afs->save, final_adr + 4, max_char);
else
- hts_appendStringUTF8(save, final_adr, max_char);
- strcatbuff(save, "/");
+ hts_appendStringUTF8(afs->save, final_adr, max_char);
+ strcatbuff(afs->save, "/");
} else {
- hts_appendStringUTF8(save, final_adr, max_char);
- strcatbuff(save, "/");
+ hts_appendStringUTF8(afs->save, final_adr, max_char);
+ strcatbuff(afs->save, "/");
}
}
/* release */
RELEASE_ADR();
} else {
- strcatbuff(save, "web/"); // répertoire général
+ strcatbuff(afs->save, "web/"); // répertoire général
}
}
// si un html à coup sûr
if ((ext_chg != 0) ? (ishtml_ext(ext) == 1) : (ishtml(opt, fil) == 1)) {
if (opt->savename_type % 100 == 2) { // html/
- strcatbuff(save, "html/");
+ strcatbuff(afs->save, "html/");
}
} else {
if ((opt->savename_type % 100 == 1) || (opt->savename_type % 100 == 2)) { // html & images
- strcatbuff(save, "images/");
+ strcatbuff(afs->save, "images/");
}
}
switch (opt->savename_type % 100) {
case 4:
case 5:{ // séparer par types
- char *a = fil + strlen(fil) - 1;
+ const char *a = fil + strlen(fil) - 1;
// passer structures
while((a > fil) && (*a != '/') && (*a != '\\'))
@@ -1138,17 +1136,17 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// html?
if ((ext_chg != 0) ? (ishtml_ext(ext) == 1) : (ishtml(opt, fil) == 1)) {
if (opt->savename_type % 100 == 5)
- strcatbuff(save, "html/");
+ strcatbuff(afs->save, "html/");
} else {
- char *a = fil + strlen(fil) - 1;
+ const char *a = fil + strlen(fil) - 1;
while((a > fil) && (*a != '/') && (*a != '.'))
a--;
if (*a != '.')
- strcatbuff(save, "other");
+ strcatbuff(afs->save, "other");
else
- strcatbuff(save, a + 1);
- strcatbuff(save, "/");
+ strcatbuff(afs->save, a + 1);
+ strcatbuff(afs->save, "/");
}
/*strcatbuff(save,a); */
/* add name */
@@ -1157,8 +1155,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
break;
case 99:{ // 'codé' .. c'est un gadget
size_t i;
- int j;
- char *a;
+ size_t j;
+ const char *a;
char C[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-";
int L;
@@ -1174,19 +1172,19 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
}
srand(s);
- j = (int) strlen(save);
+ j = strlen(afs->save);
for(i = 0; i < 8; i++) {
char c = C[(rand() % L)];
- save[i + j] = c;
+ afs->save[i + j] = c;
}
- save[i + j] = '\0';
+ afs->save[i + j] = '\0';
// ajouter extension
a = fil + strlen(fil) - 1;
while((a > fil) && (*a != '/') && (*a != '.'))
a--;
if (*a == '.') {
- strcatbuff(save, a); // ajouter
+ strcatbuff(afs->save, a); // ajouter
}
}
break;
@@ -1205,10 +1203,10 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
break;
}
- hts_lowcase(save);
+ hts_lowcase(afs->save);
- if (save[strlen(save) - 1] == '/')
- strcatbuff(save, DEFAULT_HTML); // nommer page par défaut!!
+ if (afs->save[strlen(afs->save) - 1] == '/')
+ strcatbuff(afs->save, DEFAULT_HTML); // nommer page par défaut!!
}
// vérifier qu'on ne doit pas forcer l'extension
@@ -1226,23 +1224,23 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// de même en cas de manque d'extension on en place une de manière forcée..
// cela évite les /chez/toto et les /chez/toto/index.html incompatibles
if (opt->savename_type != -1 && opt->savename_delayed != 2) {
- char *a = save + strlen(save) - 1;
+ char *a = afs->save + strlen(afs->save) - 1;
- while((a > save) && (*a != '.') && (*a != '/'))
+ while((a > afs->save) && (*a != '.') && (*a != '/'))
a--;
if (*a != '.') { // agh pas de point
//strcatbuff(save,".none"); // a éviter
- strcatbuff(save, ".html"); // préférable!
+ strcatbuff(afs->save, ".html"); // préférable!
hts_log_print(opt, LOG_DEBUG, "Default HTML type set for %s%s => %s",
- adr_complete, fil_complete, save);
+ adr_complete, fil_complete, afs->save);
}
}
// effacer pass au besoin pour les autentifications
// (plus la peine : masqué au début)
/*
{
- char* a=jump_identification(save);
- if (a!=save) {
+ char* a=jump_identification(afs->save);
+ if (a!=afs->save) {
char BIGSTK tempo[HTS_URLMAXSIZE*2];
char *b;
tempo[0]='\0';
@@ -1259,18 +1257,18 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
*/
// éviter les / au début (cause: N100)
- if (save[0] == '/') {
+ if (afs->save[0] == '/') {
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
- strcpybuff(tempo, save + 1);
- strcpybuff(save, tempo);
+ strcpybuff(tempo, afs->save + 1);
+ strcpybuff(afs->save, tempo);
}
/* Cleanup reserved or forbidden characters. */
{
size_t i;
- for(i = 0 ; save[i] != '\0' ; i++) {
- unsigned char c = (unsigned char) save[i];
+ for(i = 0 ; afs->save[i] != '\0' ; i++) {
+ unsigned char c = (unsigned char) afs->save[i];
if (c < 32 // control
|| c == 127 // unwise
|| c == '~' // unix unwise
@@ -1295,13 +1293,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
)
)
{
- save[i] = '_';
+ afs->save[i] = '_';
}
}
}
// éliminer les // (comme ftp://)
- cleanDoubleSlash(save);
+ cleanDoubleSlash(afs->save);
#if HTS_OVERRIDE_DOS_FOLDERS
/* Replace /foo/nul/bar by /foo/nul_/bar */
@@ -1309,7 +1307,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
int i = 0;
while(hts_tbdev[i][0]) {
- char *a = save;
+ char *a = afs->save;
while((a = strstrcase(a, (char *) hts_tbdev[i]))) {
switch ((int) a[strlen(hts_tbdev[i])]) {
@@ -1320,10 +1318,10 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
tempo[0] = '\0';
- strncatbuff(tempo, save, (int) (a - save) + strlen(hts_tbdev[i]));
+ strncatbuff(tempo, afs->save, (int) (a - afs->save) + strlen(hts_tbdev[i]));
strcatbuff(tempo, "_");
strcatbuff(tempo, a + strlen(hts_tbdev[i]));
- strcpybuff(save, tempo);
+ strcpybuff(afs->save, tempo);
}
break;
}
@@ -1334,7 +1332,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
}
/* Strip ending . or ' ' forbidden on windoz */
- cleanEndingSpaceOrDot(save);
+ cleanEndingSpaceOrDot(afs->save);
#endif
@@ -1342,8 +1340,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
if (opt->savename_83) {
char BIGSTK n83[HTS_URLMAXSIZE * 2];
- long_to_83(opt->savename_83, n83, save);
- strcpybuff(save, n83);
+ long_to_83(opt->savename_83, n83, afs->save);
+ strcpybuff(afs->save, n83);
}
// enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson)
// Level 1 File names are restricted to 8 characters with a 3 character extension,
@@ -1354,12 +1352,12 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
if (opt->savename_83 > 0) {
char *a, *last;
- for(last = save + strlen(save) - 1;
- last != save && *last != '/' && *last != '\\' && *last != '.'; last--) ;
+ for(last = afs->save + strlen(afs->save) - 1;
+ last != afs->save && *last != '/' && *last != '\\' && *last != '.'; last--) ;
if (*last != '.') {
last = NULL;
}
- for(a = save; *a != '\0'; a++) {
+ for(a = afs->save; *a != '\0'; a++) {
if (*a >= 'a' && *a <= 'z') {
*a -= 'a' - 'A';
} else if (*a == '.') {
@@ -1376,7 +1374,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
}
/* ensure that there is no ../ (potential vulnerability) */
- fil_simplifie(save);
+ fil_simplifie(afs->save);
/* convert name to UTF-8 ? Note: already done while parsing. */
//if (charset != NULL && charset[0] != '\0') {
@@ -1393,17 +1391,17 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
/* callback */
RUN_CALLBACK5(opt, savename, adr_complete, fil_complete, referer_adr,
- referer_fil, save);
+ referer_fil, afs->save);
hts_log_print(opt, LOG_DEBUG, "engine: save-name: local name: %s%s -> %s",
- adr, fil, save);
+ adr, fil, afs->save);
/* Ensure that the MANDATORY "temporary" extension is set */
if (ext_chg_delayed) {
char *ptr;
char *lastDot = NULL;
- for(ptr = save; *ptr != 0; ptr++) {
+ for(ptr = afs->save; *ptr != 0; ptr++) {
if (*ptr == '.') {
lastDot = ptr;
} else if (*ptr == '/' || *ptr == '\\') {
@@ -1411,8 +1409,8 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
}
}
if (lastDot == NULL) {
- strcatbuff(save, "." DELAYED_EXT);
- } else if (!IS_DELAYED_EXT(save)) {
+ strcatbuff(afs->save, "." DELAYED_EXT);
+ } else if (!IS_DELAYED_EXT(afs->save)) {
strcatbuff(lastDot, "." DELAYED_EXT);
}
}
@@ -1425,12 +1423,12 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
#define MIN_LAST_SEG_RESERVE 12
#define MAX_LAST_SEG_RESERVE 24
#define MAX_SEG_LEN 48
- if (hts_stringLengthUTF8(save) +
+ if (hts_stringLengthUTF8(afs->save) +
hts_stringLengthUTF8(StringBuff(opt->path_html_utf8)) >=
HTS_MAX_PATH_LEN) {
// convert to Unicode (much simpler)
size_t wsaveLen;
- hts_UCS4 *const wsave = hts_convertUTF8StringToUCS4(save, strlen(save), &wsaveLen);
+ hts_UCS4 *const wsave = hts_convertUTF8StringToUCS4(afs->save, strlen(afs->save), &wsaveLen);
if (wsave != NULL) {
const size_t parentLen =
hts_stringLengthUTF8(StringBuff(opt->path_html_utf8));
@@ -1485,7 +1483,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// copy final name and cleanup
saveFinal = hts_convertUCS4StringToUTF8(wsave, j);
if (saveFinal != NULL) {
- strcpybuff(save, saveFinal);
+ strcpybuff(afs->save, saveFinal);
free(saveFinal);
} else {
hts_log_print(opt, LOG_ERROR, "Could not revert to UTF-8: %s%s",
@@ -1495,13 +1493,13 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
// log in debug
hts_log_print(opt, LOG_DEBUG, "Too long filename shortened: %s%s => %s",
- adr_complete, fil_complete, save);
+ adr_complete, fil_complete, afs->save);
} else {
- hts_log_print(opt, LOG_ERROR, "Could not read UTF-8: %s", save);
+ hts_log_print(opt, LOG_ERROR, "Could not read UTF-8: %s", afs->save);
}
// Re-check again ending space or dot after cut (see bug #5)
- cleanEndingSpaceOrDot(save);
+ cleanEndingSpaceOrDot(afs->save);
}
#undef MAX_UTF8_SEQ_CHARS
#undef MIN_LAST_SEG_RESERVE
@@ -1512,11 +1510,11 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
strcpybuff(tempo, StringBuff(opt->path_html_utf8));
- strcatbuff(tempo, save);
- strcpybuff(save, tempo);
+ strcatbuff(tempo, afs->save);
+ strcpybuff(afs->save, tempo);
}
// vérifier que le nom n'est pas déja pris...
- if (liens != NULL) {
+ if (opt->liens != NULL) {
int nom_ok;
do {
@@ -1530,20 +1528,20 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
printf("\nStart search\n");
#endif
- i = hash_read(hash, save, NULL, HASH_STRUCT_FILENAME); // lecture type 0 (sav)
+ i = hash_read(hash, afs->save, NULL, HASH_STRUCT_FILENAME); // lecture type 0 (sav)
if (i >= 0) {
- int sameAdr = (strfield2(liens[i]->adr, normadr) != 0);
+ int sameAdr = (strfield2(heap(i)->adr, normadr) != 0);
int sameFil;
// NO - URL hack is only for stripping // and www.
//if (opt->urlhack != 0)
- // sameFil = ( strfield2(liens[i]->fil, normfil) != 0);
+ // sameFil = ( strfield2(heap(i)->fil, normfil) != 0);
//else
- sameFil = (strcmp(liens[i]->fil, normfil) == 0);
+ sameFil = (strcmp(heap(i)->fil, normfil) == 0);
if (sameAdr && sameFil) { // ok c'est le même lien, adresse déja définie
/* Take the existing name not to screw up with cAsE sEnSiTiViTy of Linux/Unix */
- if (strcmp(liens[i]->sav, save) != 0) {
- strcpybuff(save, liens[i]->sav);
+ if (strcmp(heap(i)->sav, afs->save) != 0) {
+ strcpybuff(afs->save, heap(i)->sav);
}
i = 0;
#if DEBUG_SAVENAME
@@ -1551,7 +1549,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
#endif
} else { // utilisé par un AUTRE, changer de nom
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
- char *a = save + strlen(save) - 1;
+ char *a = afs->save + strlen(afs->save) - 1;
char *b;
int n = 2;
char collisionSeparator = ((opt->savename_83 != 2) ? '-' : '_');
@@ -1559,18 +1557,18 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
tempo[0] = '\0';
#if DEBUG_SAVENAME
- printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n", liens[i]->fil,
+ printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n", heap(i)->fil,
fil_complete);
#endif
nom_ok = 0;
i = 0;
- while((a > save) && (*a != '.') && (*a != '\\') && (*a != '/'))
+ while((a > afs->save) && (*a != '.') && (*a != '\\') && (*a != '/'))
a--;
if (*a == '.')
- strncatbuff(tempo, save, (int) (a - save));
+ strncatbuff(tempo, afs->save, a - afs->save);
else
- strcatbuff(tempo, save);
+ strcatbuff(tempo, afs->save);
// tester la présence d'un -xx (ex: index-2.html -> index-3.html)
b = tempo + strlen(tempo) - 1;
@@ -1601,7 +1599,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
if (*a == '.')
strcatbuff(tempo, a);
- strcpybuff(save, tempo);
+ strcpybuff(afs->save, tempo);
//printf("switched: %s\n",save);
@@ -1619,7 +1617,7 @@ int url_savename(char *adr_complete, char *fil_complete, char *save,
}
/* nom avec md5 urilisé partout */
-void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete,
+void standard_name(char *b, const char *dot_pos, const char *nom_pos, const char *fil,
int short_ver) {
char md5[32 + 2];
@@ -1627,9 +1625,9 @@ void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete,
/* Nom */
if (dot_pos) {
if (!short_ver) // Noms longs
- strncatbuff(b, nom_pos, (int) (dot_pos - nom_pos));
+ strncatbuff(b, nom_pos, (dot_pos - nom_pos));
else
- strncatbuff(b, nom_pos, min((int) (dot_pos - nom_pos), 8));
+ strncatbuff(b, nom_pos, min(dot_pos - nom_pos, 8));
} else {
if (!short_ver) // Noms longs
strcatbuff(b, nom_pos);
@@ -1637,7 +1635,7 @@ void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete,
strncatbuff(b, nom_pos, 8);
}
/* MD5 - 16 bits */
- strncatbuff(b, url_md5(md5, fil_complete), 4);
+ strncatbuff(b, url_md5(md5, fil), 4);
/* Ext */
if (dot_pos) {
strcatbuff(b, ".");
@@ -1658,11 +1656,11 @@ void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete,
}
/* Petit md5 */
-char *url_md5(char *digest, char *fil_complete) {
+char *url_md5(char *digest, const char *fil) {
char *a;
digest[0] = '\0';
- a = strchr(fil_complete, '?');
+ a = strchr(fil, '?');
if (a) {
if (strlen(a)) {
char BIGSTK buff[HTS_URLMAXSIZE * 2];
@@ -1677,7 +1675,7 @@ char *url_md5(char *digest, char *fil_complete) {
}
// interne à url_savename: ajoute une chaîne à une autre avec \ -> /
-void url_savename_addstr(char *d, char *s) {
+void url_savename_addstr(char *d, const char *s) {
int i = (int) strlen(d);
while(*s) {
diff --git a/src/htsname.h b/src/htsname.h
index 4c67893..675b0cb 100644
--- a/src/htsname.h
+++ b/src/htsname.h
@@ -80,18 +80,27 @@ typedef struct hash_struct hash_struct;
#define HTS_DEF_FWSTRUCT_lien_back
typedef struct lien_back lien_back;
#endif
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfil
+#define HTS_DEF_FWSTRUCT_lien_adrfil
+typedef struct lien_adrfil lien_adrfil;
+#endif
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave
+#define HTS_DEF_FWSTRUCT_lien_adrfilsave
+typedef struct lien_adrfilsave lien_adrfilsave;
+#endif
// note: 'headers' can either be null, or incomplete (only r member filled)
-int url_savename(char *adr_complete, char *fil_complete, char *save,
- char *former_adr, char *former_fil, const char *referer_adr,
- const char *referer_fil, httrackp * opt, lien_url ** liens,
- int lien_tot, struct_back * sback, cache_back * cache,
+int url_savename(lien_adrfilsave *const afs,
+ lien_adrfil *const former,
+ const char *referer_adr, const char *referer_fil,
+ httrackp * opt, struct_back * sback, cache_back * cache,
hash_struct * hash, int ptr, int numero_passe,
const lien_back * headers);
-void standard_name(char *b, char *dot_pos, char *nom_pos, char *fil_complete,
+void standard_name(char *b, const char *dot_pos, const char *nom_pos,
+ const char *fil_complete,
int short_ver);
-void url_savename_addstr(char *d, char *s);
-char *url_md5(char *digest_buffer, char *fil_complete);
+void url_savename_addstr(char *d, const char *s);
+char *url_md5(char *digest_buffer, const char *fil_complete);
void url_savename_refname(const char *adr, const char *fil, char *filename);
char *url_savename_refname_fullpath(httrackp * opt, const char *adr,
const char *fil);
diff --git a/src/htsparse.c b/src/htsparse.c
index 820af38..6269450 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -78,12 +78,12 @@ Please visit our Website: http://www.httrack.com
} \
} \
ht_len+=A;
-#define HT_ADD_ADR \
+#define HT_add_adr \
if ((opt->getmode & 1) && (ptr>0)) { \
- size_t i = ((size_t) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
+ size_t i = ((html - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
memcpy(ht_buff+j, lastsaved, i); \
ht_buff[j+i]='\0'; \
- lastsaved=adr; \
+ lastsaved=html; \
}
#define HT_ADD(A) \
if ((opt->getmode & 1) && (ptr>0)) { \
@@ -283,7 +283,7 @@ Please visit our Website: http://www.httrack.com
#define AUTOMATE_LOOKUP_CURRENT_ADR() do { \
if (inscript) { \
int new_state_pos; \
- new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \
+ new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*html]; \
if (new_state_pos < 0) { \
new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \
} \
@@ -297,7 +297,7 @@ Please visit our Website: http://www.httrack.com
#define INCREMENT_CURRENT_ADR(steps) do { \
int steps__ = (int) ( steps ); \
while(steps__ > 0) { \
- adr++; \
+ html++; \
AUTOMATE_LOOKUP_CURRENT_ADR(); \
steps__ --; \
} \
@@ -322,13 +322,26 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
}
if (RUN_CALLBACK4(opt, check_html, r->adr, (int) r->size, urladr(), urlfil())) {
- FILE *fp = NULL; // fichier écrit localement
- char *adr = r->adr; // pointeur (on parcourt)
- char *lastsaved; // adresse du dernier octet sauvé + 1
+ FILE *fp = NULL; // fichier écrit localement
+ const char *html = r->adr; // pointeur (on parcours)
+ const char *lastsaved; // adresse du dernier octet sauvé + 1
hts_log_print(opt, LOG_DEBUG, "scanning file %s%s (%s)..", urladr(), urlfil(),
savename());
+ /* Hack to avoid NULL char problems with C syntax */
+ /* Yes, some bogus HTML pages can embed null chars
+ and therefore can not be properly handled if this hack is not done
+ */
+ if (r->adr != NULL) {
+ size_t i;
+ for(i = 0 ; i < (size_t) r->size ; i++) {
+ if (r->adr[i] == '\0') {
+ r->adr[i] = ' ';
+ }
+ }
+ }
+
// Indexing!
#if HTS_MAKE_KEYWORD_INDEX
if (opt->kindex) {
@@ -392,7 +405,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//int parseall_incomment=0; // dans un /* */ (exemple: a = /* URL */ "img.gif";)
//
- const char *intag_start = adr;
+ const char *intag_start = html;
const char *intag_name = NULL;
const char *intag_startattr = NULL;
int intag_start_valid = 0;
@@ -405,7 +418,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int parent_relative = 0; // the parent is the base path (.js, .css..)
HT_ADD_START; // débuter
- lastsaved = adr;
+ lastsaved = html;
/* Initialize script automate for comments, quotes.. */
memset(inscript_state, 0xff, sizeof(inscript_state));
@@ -491,15 +504,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
else if (compare_mime(opt, r->contenttype, str->url_file, "text/xml") != 0
|| compare_mime(opt, r->contenttype, str->url_file,
"application/xml") != 0) {
- if (strstr(adr, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup
+ if (strstr(html, "http://purl.org/rss/") != NULL) // Hmm, this is a bit lame ; will have to cleanup
{ /* RSS file */
inscript = intag = 0;
intag_start_valid = 0;
in_media = NULL; // regular XML
} else { // cancel: write all
- adr = r->adr + r->size;
- HT_ADD_ADR;
- lastsaved = adr;
+ html = r->adr + r->size;
+ HT_add_adr;
+ lastsaved = html;
}
}
// Detect UTF8 format
@@ -531,18 +544,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
error = 0;
/* Break if we are done yet */
- if ((adr - r->adr) >= r->size)
+ if (html - r->adr >= r->size)
break;
- /* Hack to avoid NULL char problems with C syntax */
- /* Yes, some bogus HTML pages can embed null chars
- and therefore can not be properly handled if this hack is not done
- */
- if (!(*adr)) {
- if (((int) (adr - r->adr)) < r->size)
- *adr = ' ';
- }
-
/*
index.html built here
*/
@@ -555,18 +559,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (!in_media) {
if (opt->makeindex && (ptr > 0)) {
if (opt->getmode & 1) { // autorisation d'écrire
- p = strfield(adr, "title");
+ p = strfield(html, "title");
if (p) {
- if (*(adr - 1) == '/')
+ if (*(html - 1) == '/')
p = 0; // /title
} else {
- if (strfield(adr, "/html"))
+ if (strfield(html, "/html"))
p = -1; // noter, mais sans titre
- else if (strfield(adr, "body"))
+ else if (strfield(html, "body"))
p = -1; // noter, mais sans titre
- else if (((int) (adr - r->adr)) >= (r->size - 1))
+ else if (((int) (html - r->adr)) >= (r->size - 1))
p = -1; // noter, mais sans titre
- else if ((int) (adr - r->adr) >= r->size - 2) // we got to hurry
+ else if ((int) (html - r->adr) >= r->size - 2) // we got to hurry
p = -1; // xxc xxc xxc
}
} else
@@ -603,7 +607,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
s[0] = '\0';
if (p > 0) {
- a = strchr(adr, '>');
+ a = strchr(html, '>');
if (a != NULL) {
a++;
while(is_space(*a))
@@ -669,7 +673,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
*/
/* Parse */
- if ((*adr == '<') /* No starting tag */
+ if ((*html == '<') /* No starting tag */
&&(!inscript) /* Not in (java)script */
&&(!incomment) /* Not in comment (<!--) */
&&(!in_media) /* Not in media */
@@ -678,8 +682,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
intag_ctype = 0;
//parseall_incomment=0;
//inquote=0; // effacer quote
- intag_start = adr;
- for(intag_name = adr + 1; is_realspace(*intag_name); intag_name++) ;
+ intag_start = html;
+ for(intag_name = html + 1; is_realspace(*intag_name); intag_name++) ;
intag_start_valid = 1;
codebase[0] = '\0'; // effacer éventuel codebase
@@ -688,9 +692,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int pos;
// <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
- if ((pos = rech_tageq_all(adr, "http-equiv"))) {
+ if ((pos = rech_tageq_all(html, "http-equiv"))) {
const char *token = NULL;
- int len = rech_endtoken(adr + pos, &token);
+ int len = rech_endtoken(html + pos, &token);
if (len > 0) {
if (strfield(token, "content-type")) {
@@ -711,11 +715,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// We are looking for the first head so that we can declare the HTTP-headers charset early
// Emit as soon as we see the first <head>, <meta>, or <body> tag.
// FIXME: we currently emit the tag BEFORE the <head> tag, actually, which is not clean
- if ((p = strfield(adr, "<head>")) != 0
- || ((p = strfield(adr, "<head")) != 0 && isspace(adr[p]))
- || (p = strfield(adr, "<body>")) != 0
- || ((p = strfield(adr, "<body")) != 0 && isspace(adr[p]))
- || ((p = strfield(adr, "<meta")) != 0 && isspace(adr[p]))
+ if ((p = strfield(html, "<head>")) != 0
+ || ((p = strfield(html, "<head")) != 0 && isspace(html[p]))
+ || (p = strfield(html, "<body>")) != 0
+ || ((p = strfield(html, "<body")) != 0 && isspace(html[p]))
+ || ((p = strfield(html, "<meta")) != 0 && isspace(html[p]))
) {
emited_footer++;
} else {
@@ -724,7 +728,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
break;
case 1:
// And the closing comment info tag
- if ((p = strfield(adr, "</html") != 0)) {
+ if ((p = strfield(html, "</html") != 0)) {
emited_footer++;
} else {
p = 0;
@@ -768,15 +772,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
}
// éliminer les <!-- (commentaires) : intag dévalidé
- if (*(adr + 1) == '!')
- if (*(adr + 2) == '-')
- if (*(adr + 3) == '-') {
+ if (*(html + 1) == '!')
+ if (*(html + 2) == '-')
+ if (*(html + 3) == '-') {
intag = 0;
incomment = 1;
intag_start_valid = 0;
}
- } else if ((*adr == '>') /* ending tag */
+ } else if ((*html == '>') /* ending tag */
&&((!inscript && !in_media) || (inscript_tag)) /* and in tag (or in script) */
) {
if (inscript_tag) {
@@ -817,7 +821,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
} else { /* end of comment? */
// vérifier fermeture correcte
- if ((*(adr - 1) == '-') && (*(adr - 2) == '-')) {
+ if ((*(html - 1) == '-') && (*(html - 2) == '-')) {
intag = 0;
incomment = 0;
intag_start_valid = 0;
@@ -830,7 +834,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
<!-- foo > example <!-- bar > is sometimes accepted by browsers
when no --> is used somewhere else.. darn those browsers are dirty
*/
- if (!strstr(adr, "-->")) {
+ if (!strstr(html, "-->")) {
intag = 0;
incomment = 0;
intag_start_valid = 0;
@@ -850,18 +854,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int p_searchMETAURL = 0; // chercher ..URL=<url>
int add_class = 0; // ajouter .class
int add_class_dots_to_patch = 0; // number of '.' in code="x.y.z<realname>"
- char *p_flush = NULL;
+ const char *p_flush = NULL;
// ------------------------------------------------------------
// parsing évolé
// ------------------------------------------------------------
- if (((isalpha((unsigned char) *adr)) || (*adr == '/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester..
+ if (((isalpha((unsigned char) *html)) || (*html == '/') || (inscript) || (in_media) || (inscriptgen))) { // sinon pas la peine de tester..
/* caractère de terminaison pour "miniparsing" javascript=.. ?
(ex: <a href="javascript:()" action="foo"> ) */
if (inscript_tag) {
if (inscript_tag_lastc) {
- if (*adr == inscript_tag_lastc) {
+ if (*html == inscript_tag_lastc) {
/* sortir */
inscript_tag = inscript = 0;
incomment = 0;
@@ -888,9 +892,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
p = 0;
valid_p = 1;
} else if (strcmp(in_media, "AAM") == 0) { // AAM
- if (is_space((unsigned char) adr[0])
- && !is_space((unsigned char) adr[1])) {
- char *a = adr + 1;
+ if (is_space((unsigned char) html[0])
+ && !is_space((unsigned char) html[1])) {
+ const char *a = html + 1;
int n = 0;
int ok = 0;
int dot = 0;
@@ -909,7 +913,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
tmp[0] = '\0';
strncat(tmp, a + dot + 1, n - dot - 1);
if (is_knowntype(opt, tmp) || ishtml_ext(tmp) != -1) {
- adr++;
+ html++;
p = 0;
valid_p = 1;
unquoted_script = 1;
@@ -926,21 +930,21 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// note: inscript==1 donc on sautera après les \"
if (inscript) {
if (inscriptgen) { // on est déja dans un objet générant..
- if (*adr == scriptgen_q) { // fermeture des " ou '
- if (*(adr - 1) != '\\') { // non
+ if (*html == scriptgen_q) { // fermeture des " ou '
+ if (*(html - 1) != '\\') { // non
inscriptgen = 0; // ok parsing terminé
}
}
} else {
- char *a = NULL;
+ const char *a = NULL;
char check_this_fking_line = 0; // parsing code javascript..
char must_be_terminated = 0; // caractère obligatoire de terminaison!
int token_size;
- if (!(token_size = strfield(adr, ".writeln"))) // détection ...objet.write[ln]("code html")...
- token_size = strfield(adr, ".write");
+ if (!(token_size = strfield(html, ".writeln"))) // détection ...objet.write[ln]("code html")...
+ token_size = strfield(html, ".write");
if (token_size) {
- a = adr + token_size;
+ a = html + token_size;
while(is_realspace(*a))
a++; // sauter espaces
if (*a == '(') { // début parenthèse
@@ -966,7 +970,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
while(is_realspace(*a))
a++;
if ((*a == '\'') || (*a == '"')) { // départ de '' ou ""
- char *b;
+ const char *b;
scriptgen_q = *a; // quote
b = a + 1; // départ de la chaîne
@@ -997,10 +1001,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// NOTE: le code javascript autogénéré n'est pas pris en compte!!
// (et ne marche pas dans 50% des cas de toute facon!)
if (check_this_fking_line == 1) {
- p = (int) (b - adr); // calculer saut!
+ p = (int) (b - html); // calculer saut!
} else {
inscriptgen = 1; // SCRIPTGEN actif
- adr = b; // jump
+ html = b; // jump
}
if ((opt->debug > 1) && (opt->log != NULL)) {
@@ -1029,9 +1033,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (!p) {
// si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
if ((intag && (!inscript)) || inscriptgen) {
- if ((*(adr - 1) == '<') || (is_space(*(adr - 1)))) { // <tag < tag etc
+ if ((*(html - 1) == '<') || (is_space(*(html - 1)))) { // <tag < tag etc
// <A HREF=.. pour les liens HTML
- p = rech_tageq(adr, "href");
+ p = rech_tageq(html, "href");
if (p) { // href.. tester si c'est une bas href!
if ((intag_start_valid) && check_tag(intag_start, "base")) { // oui!
// ** note: base href et codebase ne font pas bon ménage..
@@ -1044,7 +1048,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int i = 0;
while((p == 0) && (strnotempty(hts_detect[i]))) {
- p = rech_tageq(adr, hts_detect[i]);
+ p = rech_tageq(html, hts_detect[i]);
if (p) {
/* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */
if (strcmp(hts_detect[i], "archive") == 0) {
@@ -1060,7 +1064,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int i = 0;
while((p == 0) && (strnotempty(hts_detectbeg[i]))) {
- p = rech_tageqbegdigits(adr, hts_detectbeg[i]);
+ p = rech_tageqbegdigits(html, hts_detectbeg[i]);
i++;
}
}
@@ -1070,17 +1074,17 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int i = 0;
while((p == 0) && (strnotempty(hts_detectURL[i]))) {
- p = rech_tageq(adr, hts_detectURL[i]);
+ p = rech_tageq(html, hts_detectURL[i]);
i++;
}
if (p) {
if (intag_ctype == 1) {
p = 0;
#if 0
- //if ((pos=rech_tageq(adr, "content"))) {
+ //if ((pos=rech_tageq(html, "content"))) {
char temp[256];
char *token = NULL;
- int len = rech_endtoken(adr + pos, &token);
+ int len = rech_endtoken(html + pos, &token);
if (len > 0 && len < sizeof(temp) - 2) {
char *chpos;
@@ -1113,7 +1117,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int i = 0;
while((p == 0) && (strnotempty(hts_detectandleave[i]))) {
- p = rech_tageq(adr, hts_detectandleave[i]);
+ p = rech_tageq(html, hts_detectandleave[i]);
i++;
}
if (p)
@@ -1127,20 +1131,20 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
/* détection onLoad etc */
while((p == 0) && (strnotempty(hts_detect_js[i]))) {
- p = rech_tageq(adr, hts_detect_js[i]);
+ p = rech_tageq(html, hts_detect_js[i]);
i++;
}
/* non détecté - détecter également les onXxxxx= */
if (p == 0) {
- if ((*adr == 'o') && (*(adr + 1) == 'n')
- && isUpperLetter(*(adr + 2))) {
+ if ((*html == 'o') && (*(html + 1) == 'n')
+ && isUpperLetter(*(html + 2))) {
p = 0;
- while(isalpha((unsigned char) adr[p]) && (p < 64))
+ while(isalpha((unsigned char) html[p]) && (p < 64))
p++;
if (p < 64) {
- while(is_space(adr[p]))
+ while(is_space(html[p]))
p++;
- if (adr[p] == '=')
+ if (html[p] == '=')
p++;
else
p = 0;
@@ -1150,8 +1154,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
/* OK, événement repéré */
if (p) {
- inscript_tag_lastc = *(adr + p); /* à attendre à la fin */
- adr += p /*+ 1*/; /* saut */
+ inscript_tag_lastc = *(html + p); /* à attendre à la fin */
+ html += p /*+ 1*/; /* saut */
/*
On est désormais dans du code javascript
*/
@@ -1166,7 +1170,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) à faire]
if (p == 0) {
- p = rech_tageq(adr, "code");
+ p = rech_tageq(html, "code");
if (p) {
if ((intag_start_valid) && check_tag(intag_start, "applet")) { // dans un <applet !
p_type = -1; // juste le nom de fichier+dossier, écire avant codebase
@@ -1177,9 +1181,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// pas très propre mais c'est ce qu'il y a de plus simple à faire!!
{
- char *a;
+ const char *a;
- a = adr;
+ a = html;
while((*a) && (*a != '>')
&& (!rech_tageq(a, "codebase")))
a++;
@@ -1187,20 +1191,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
char *b;
b = strchr(a, '>');
- if (b) {
- if (((int) (b - adr)) < 1000) { // au total < 1Ko
+ if (b != NULL) {
+ if (b - html < 1000) { // au total < 1Ko
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
+ const size_t offset = html - r->adr;
+ char *const modify = &r->adr[offset];
+ assertf(modify == html);
tempo[0] = '\0';
- strncatbuff(tempo, a, (int) (b - a));
+ strncatbuff(tempo, a, b - a);
strcatbuff(tempo, " ");
- strncatbuff(tempo, adr, (int) (a - adr - 1));
+ strncatbuff(tempo, html, a - html - 1);
// éventuellement remplire par des espaces pour avoir juste la taille
- while((int) strlen(tempo) < ((int) (b - adr)))
+ while(strlen(tempo) < (size_t) (b - html))
strcatbuff(tempo, " ");
// pas d'erreur?
- if ((int) strlen(tempo) == ((int) (b - adr))) {
- strncpy(adr, tempo, strlen(tempo)); // PAS d'octet nul à la fin!
+ if (strlen(tempo) == b - html) {
+ strncpy(modify, tempo, strlen(tempo)); // PAS d'octet nul à la fin!
p = 0; // DEVALIDER!!
p_type = 0;
add_class = 0;
@@ -1215,7 +1222,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// liens à patcher mais pas à charger (ex: codebase)
if (p == 0) { // note: si non chargé (ex: ignorer .class) patché tout de même
- p = rech_tageq(adr, "codebase");
+ p = rech_tageq(html, "codebase");
if (p) {
if ((intag_start_valid) && check_tag(intag_start, "applet")) { // dans un <applet !
p_type = -2;
@@ -1229,18 +1236,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (opt->robots) {
if ((intag_start_valid)
&& check_tag(intag_start, "meta")) {
- if (rech_tageq(adr, "name")) { // name=robots.txt
+ if (rech_tageq(html, "name")) { // name=robots.txt
char tempo[1100];
char *a;
tempo[0] = '\0';
- a = strchr(adr, '>');
+ a = strchr(html, '>');
#if DEBUG_ROBOTS
printf("robots.txt meta tag detected\n");
#endif
if (a) {
- if (((int) (a - adr)) < 999) {
- strncatbuff(tempo, adr, (int) (a - adr));
+ if (((int) (a - html)) < 999) {
+ strncatbuff(tempo, html, (int) (a - html));
if (strstrcase(tempo, "content")) {
if (strstrcase(tempo, "robots")) {
if (strstrcase(tempo, "nofollow")) {
@@ -1265,7 +1272,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// entrée dans une applet javascript
/*if (!inscript) { // sinon on est dans un obj.write("..
if (p==0)
- if (rech_sampletag(adr,"script"))
+ if (rech_sampletag(html,"script"))
if (check_tag(intag_start,"script")) {
inscript=1;
}
@@ -1280,13 +1287,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
#if 0
/* Check // javascript comments */
- if (*adr == 10 || *adr == 13) {
+ if (*html == 10 || *html == 13) {
inscript_check_comments = 1;
inscript_in_comments = 0;
} else if (inscript_check_comments) {
- if (!is_realspace(*adr)) {
+ if (!is_realspace(*html)) {
inscript_check_comments = 0;
- if (adr[0] == '/' && adr[1] == '/') {
+ if (html[0] == '/' && html[1] == '/') {
inscript_in_comments = 1;
}
}
@@ -1295,15 +1302,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
/* Parse */
assertf(inscript_name != NULL);
- if (*adr == '/'
+ if (*html == '/'
&&
- ((strfield(adr, "/script")
+ ((strfield(html, "/script")
&& strfield(inscript_name, "script"))
- || (strfield(adr, "/style")
+ || (strfield(html, "/style")
&& strfield(inscript_name, "style"))
)
&& inscript_locked == 0) {
- char *a = adr;
+ const char *a = html;
//while(is_realspace(*(--a)));
while(is_realspace(*a))
@@ -1336,54 +1343,54 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if ((opt->parsejava & HTSPARSE_NO_JAVASCRIPT) == 0) {
int nc;
- nc = strfield(adr, ".src"); // nom.src="image";
- if (!nc && inscript_tag && inscript_tag_lastc == *(adr - 1))
- nc = strfield(adr, "src"); // onXXX='src="image";'
+ nc = strfield(html, ".src"); // nom.src="image";
+ if (!nc && inscript_tag && inscript_tag_lastc == *(html - 1))
+ nc = strfield(html, "src"); // onXXX='src="image";'
if (!nc)
- nc = strfield(adr, ".location"); // document.location="doc"
+ nc = strfield(html, ".location"); // document.location="doc"
if (!nc)
- nc = strfield(adr, ":location"); // javascript:location="doc"
+ nc = strfield(html, ":location"); // javascript:location="doc"
if (!nc) { // location="doc"
- if ((nc = strfield(adr, "location"))
- && !isspace(*(adr - 1))
+ if ((nc = strfield(html, "location"))
+ && !isspace(*(html - 1))
)
nc = 0;
}
if (!nc)
- nc = strfield(adr, ".href"); // document.location="doc"
+ nc = strfield(html, ".href"); // document.location="doc"
if (!nc)
- if ((nc = strfield(adr, ".open"))) { // window.open("doc",..
+ if ((nc = strfield(html, ".open"))) { // window.open("doc",..
expected = '('; // parenthèse
expected_end = "),"; // fin: virgule ou parenthèse
ensure_not_mime = 1; //* ensure the url is not a mime type */
}
if (!nc)
- if ((nc = strfield(adr, ".replace"))) { // window.replace("url")
+ if ((nc = strfield(html, ".replace"))) { // window.replace("url")
expected = '('; // parenthèse
expected_end = ")"; // fin: parenthèse
}
if (!nc)
- if ((nc = strfield(adr, ".link"))) { // window.link("url")
+ if ((nc = strfield(html, ".link"))) { // window.link("url")
expected = '('; // parenthèse
expected_end = ")"; // fin: parenthèse
}
- if (!nc && (nc = strfield(adr, "url")) && (!isalnum(*(adr - 1))) && *(adr - 1) != '_') { // url(url)
+ if (!nc && (nc = strfield(html, "url")) && (!isalnum(*(html - 1))) && *(html - 1) != '_') { // url(url)
expected = '('; // parenthèse
expected_end = ")"; // fin: parenthèse
can_avoid_quotes = 1;
quotes_replacement = ')';
}
if (!nc)
- if ((nc = strfield(adr, "import"))) { // import "url"
- if (is_space(*(adr + nc))) {
+ if ((nc = strfield(html, "import"))) { // import "url"
+ if (is_space(*(html + nc))) {
expected = 0; // no char expected
} else
nc = 0;
}
if (nc) {
- char *a;
+ const char *a;
- a = adr + nc;
+ a = html + nc;
while(is_realspace(*a))
a++;
if ((*a == expected) || (!expected)) {
@@ -1392,7 +1399,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
while(is_realspace(*a))
a++;
if ((*a == 34) || (*a == '\'') || (can_avoid_quotes)) {
- char *b, *c;
+ const char *b, *c;
int ndelim = 1;
if ((*a == 34) || (*a == '\''))
@@ -1465,7 +1472,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
"link detected in javascript: %s",
str);
}
- p = (int) (a - adr); // p non nul: TRAITER CHAINE COMME FICHIER
+ p = (int) (a - html); // p non nul: TRAITER CHAINE COMME FICHIER
if (can_avoid_quotes) {
ending_p = quotes_replacement;
}
@@ -1487,14 +1494,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//p=rech_tageq(adr,"primary"); // lien primaire, yeah
p = 0; // No stupid tag anymore, raw link
valid_p = 1; // Valid even if p==0
- while((adr[p] == '\r') || (adr[p] == '\n'))
+ while((html[p] == '\r') || (html[p] == '\n'))
p++;
//can_avoid_quotes=1;
ending_p = '\r';
}
- } else if (isspace((unsigned char) *adr)) {
- intag_startattr = adr + 1; // attribute in tag (for dirty parsing)
+ } else if (isspace((unsigned char) *html)) {
+ intag_startattr = html + 1; // attribute in tag (for dirty parsing)
}
// ------------------------------------------------------------
@@ -1504,18 +1511,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// ------------------------------------------------------------
if (opt->parseall && (opt->parsejava & HTSPARSE_NO_AGGRESSIVE) == 0 && (ptr > 0) && (!in_media) /* && (!inscript_in_comments) */ ) { // option parsing "brut"
//int incomment_justquit=0;
- if (!is_realspace(*adr)) {
+ if (!is_realspace(*html)) {
int noparse = 0;
// Gestion des /* */
#if 0
if (inscript) {
if (parseall_incomment) {
- if ((*adr == '/') && (*(adr - 1) == '*'))
+ if ((*html == '/') && (*(html - 1) == '*'))
parseall_incomment = 0;
incomment_justquit = 1; // ne pas noter dernier caractère
} else {
- if ((*adr == '/') && (*(adr + 1) == '*'))
+ if ((*html == '/') && (*(html + 1) == '*'))
parseall_incomment = 1;
}
} else
@@ -1536,12 +1543,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (!noparse) {
//if ((!parseall_incomment) && (!noparse)) {
if (!p) { // non déja trouvé
- if (adr != r->adr) { // >1 caractère
+ if (html != r->adr) { // >1 caractère
// scanner les chaines
- if ((*adr == '\"') || (*adr == '\'')) { // "xx.gif" 'xx.gif'
+ if ((*html == '\"') || (*html == '\'')) { // "xx.gif" 'xx.gif'
if (strchr("=(,", parseall_lastc)) { // exemple: a="img.gif.. (handles comments)
- char *a = adr;
- char stop = *adr; // " ou '
+ const char *a = html;
+ char stop = *html; // " ou '
int count = 0;
// sauter caractères
@@ -1574,7 +1581,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
tempo[0] = '\0';
type[0] = '\0';
//
- strncatbuff(tempo, adr + 1, count);
+ strncatbuff(tempo, html + 1, count);
//
if ((!strchr(tempo, ' ')) || inscript) { // espace dedans: méfiance! (sauf dans code javascript)
int invalid_url = 0;
@@ -1702,7 +1709,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// plus dans un commentaire
if (inscript_state_pos == INSCRIPT_START
&& inscript_state_pos_prev == INSCRIPT_START) {
- parseall_lastc = *adr; // caractère avant le prochain
+ parseall_lastc = *html; // caractère avant le prochain
}
} // if realspace
@@ -1714,14 +1721,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//
if ((p > 0) || (valid_p)) { // on a repéré un lien
//int lien_valide=0;
- char *eadr = NULL; /* fin de l'URL */
+ const char *eadr = NULL; /* fin de l'URL */
//char* quote_adr=NULL; /* adresse du ? dans l'adresse */
int ok = 1;
char quote = '\0';
int quoteinscript = 0;
int noquote = 0;
- char *tag_attr_start = adr;
+ const char *tag_attr_start = html;
// si nofollow ou un stop a été déclenché, réécrire tous les liens en externe
if ((nofollow)
@@ -1734,27 +1741,27 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// écrire codebase avant, flusher avant code
if ((p_type == -1) || (p_type == -2)) {
if ((opt->getmode & 1) && (ptr > 0)) {
- HT_ADD_ADR; // refresh
+ HT_add_adr; // refresh
}
- lastsaved = adr; // dernier écrit+1
+ lastsaved = html; // dernier écrit+1
}
// sauter espaces
// adr+=p;
INCREMENT_CURRENT_ADR(p);
- while((is_space(*adr)
- || (inscriptgen && adr[0] == '\\' && is_space(adr[1])
+ while((is_space(*html)
+ || (inscriptgen && html[0] == '\\' && is_space(html[1])
)
)
&& quote == '\0') {
if (!quote)
- if ((*adr == '\"') || (*adr == '\'')) {
- quote = *adr; // on doit attendre cela à la fin
- if (inscriptgen && *(adr - 1) == '\\') {
+ if ((*html == '\"') || (*html == '\'')) {
+ quote = *html; // on doit attendre cela à la fin
+ if (inscriptgen && *(html - 1) == '\\') {
quoteinscript = 1; /* will wait for \" */
}
}
// puis quitter
- // adr++; // sauter les espaces, "" et cie
+ // html++; // sauter les espaces, "" et cie
INCREMENT_CURRENT_ADR(1);
}
@@ -1767,9 +1774,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// sauter éventuel \" ou \' javascript
if (inscript) { // on est dans un obj.write("..
- if (*adr == '\\') {
- if ((*(adr + 1) == '\'') || (*(adr + 1) == '"')) { // \" ou \'
- // adr+=2; // sauter
+ if (*html == '\\') {
+ if ((*(html + 1) == '\'') || (*(html + 1) == '"')) { // \" ou \'
+ // html+=2; // sauter
INCREMENT_CURRENT_ADR(2);
}
}
@@ -1778,19 +1785,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (p_searchMETAURL) {
int l = 0;
- while((adr + l + 4 < r->adr + r->size)
- && (!strfield(adr + l, "URL="))
+ while((html + l + 4 < r->adr + r->size)
+ && (!strfield(html + l, "URL="))
&& (l < 128))
l++;
- if (!strfield(adr + l, "URL="))
+ if (!strfield(html + l, "URL="))
ok = -1;
else
- adr += (l + 4);
+ html += (l + 4);
}
/* éviter les javascript:document.location=.. : les parser, plutôt */
if (ok != -1) {
- if (strfield(adr, "javascript:")
+ if (strfield(html, "javascript:")
&& !inscript /* we don't want to parse 'javascript:' inside document.write inside scripts */
) {
ok = -1;
@@ -1808,22 +1815,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
if (p_type == 1) {
- if (*adr == '#') {
- adr++; // sauter # pour usemap etc
+ if (*html == '#') {
+ html++; // sauter # pour usemap etc
}
}
- eadr = adr;
+ eadr = html;
// ne pas flusher après code si on doit écrire le codebase avant!
if ((p_type != -1) && (p_type != 2) && (p_type != -2)) {
if ((opt->getmode & 1) && (ptr > 0)) {
- HT_ADD_ADR; // refresh
+ HT_add_adr; // refresh
}
- lastsaved = adr; // dernier écrit+1
+ lastsaved = html; // dernier écrit+1
// après on écrira soit les données initiales,
// soir une URL/lien modifié!
} else if (p_type == -1)
- p_flush = adr; // flusher jusqu'à adr ensuite
+ p_flush = html; // flusher jusqu'à adr ensuite
if (ok != -1) { // continuer
// découper le lien
@@ -1832,7 +1839,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (!is_space(*eadr))
ok = 0;
}
- if ((((int) (eadr - adr))) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path)
+ if ((((int) (eadr - html))) > HTS_URLMAXSIZE) // ** trop long, >HTS_URLMAXSIZE caractères (on prévoit HTS_URLMAXSIZE autres pour path)
ok = -1; // ne pas traiter ce lien
if (ok > 0) {
@@ -1877,10 +1884,10 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} while(ok == 1);
// Empty link detected
- if ((((int) (eadr - adr))) <= 1) { // link empty
+ if ((((int) (eadr - html))) <= 1) { // link empty
ok = -1; // No
- if (*adr != '#') { // Not empty+unique #
- if ((((int) (eadr - adr)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
+ if (*html != '#') { // Not empty+unique #
+ if ((((int) (eadr - html)) == 1)) { // 1=link empty with delim (end_adr-start_adr)
if (quote) {
if ((opt->getmode & 1) && (ptr > 0)) {
HT_ADD("#"); // We add this for a <href="">
@@ -1890,7 +1897,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
}
// This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag
- if (strfield(adr, "(Empty Reference!)")) {
+ if (strfield(html, "(Empty Reference!)")) {
ok = -1; // No
}
@@ -1900,13 +1907,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
char BIGSTK lien[HTS_URLMAXSIZE * 2];
int meme_adresse = 0; // 0 par défaut pour primary
- //char *copie_de_adr=adr;
+ //char *copie_de_adr=html;
//char* p;
// construire lien (découpage)
- if ((((int) (eadr - adr)) - 1) < HTS_URLMAXSIZE) { // pas trop long?
- strncpy(lien, adr, ((int) (eadr - adr)) - 1);
- *(lien + (((int) (eadr - adr))) - 1) = '\0';
+ if ((((int) (eadr - html)) - 1) < HTS_URLMAXSIZE) { // pas trop long?
+ strncpy(lien, html, ((int) (eadr - html)) - 1);
+ *(lien + (((int) (eadr - html))) - 1) = '\0';
//printf("link: %s\n",lien);
// supprimer les espaces
while((lien[strlen(lien) - 1] == ' ') && (strnotempty(lien)))
@@ -1918,15 +1925,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// ------------------------------------------------------
// Lien repéré et extrait
if (strnotempty(lien) > 0) { // construction du lien
- char BIGSTK adr[HTS_URLMAXSIZE * 2], fil[HTS_URLMAXSIZE * 2]; // ATTENTION adr cache le "vrai" adr
+ lien_adrfilsave afs;
int forbidden_url = -1; // lien non interdit (mais non autorisé..)
int just_test_it = 0; // mode de test des liens
int set_prio_to = 0; // pour capture de page isolée
int import_done = 0; // lien importé (ne pas scanner ensuite *à priori*)
//
- adr[0] = '\0';
- fil[0] = '\0';
+ afs.af.adr[0] = '\0';
+ afs.af.fil[0] = '\0';
+ afs.save[0] = '\0';
//
// 0: autorisé
// 1: interdit (patcher tout de même adresse)
@@ -2207,7 +2215,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// Vérifier les codebase=applet (au lieu de applet/)
if (p_type == -2) { // codebase
if (strnotempty(lien)) {
- if (fil[strlen(lien) - 1] != '/') { // pas répertoire
+ if (lien[strlen(lien) - 1] != '/') { // pas répertoire
strcatbuff(lien, "/");
}
}
@@ -2231,17 +2239,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// copier nom host si besoin est
if (!link_has_authority(lien)) { // pas de http://
- char BIGSTK adr2[HTS_URLMAXSIZE * 2], fil2[HTS_URLMAXSIZE * 2]; // ** euh ident_url_relatif??
+ lien_adrfil af2; // ** euh ident_url_relatif??
- if (ident_url_relatif(lien, urladr(), urlfil(), adr2, fil2) <
- 0) {
+ if (ident_url_relatif(lien, urladr(), urlfil(), &af2) < 0) {
error = 1;
} else {
strcpybuff(lien, "http://");
- strcatbuff(lien, adr2);
- if (*fil2 != '/')
+ strcatbuff(lien, af2.adr);
+ if (*af2.fil != '/')
strcatbuff(lien, "/");
- strcatbuff(lien, fil2);
+ strcatbuff(lien, af2.fil);
{
char *a;
@@ -2321,7 +2328,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
HTS_URLMAXSIZE) {
// mailto: and co: do NOT add base
if (ident_url_relatif
- (lien, urladr(), urlfil(), adr, fil) >= 0) {
+ (lien, urladr(), urlfil(), &afs.af) >= 0) {
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
// base est absolue
@@ -2342,19 +2349,18 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
lien);
}
} else {
- char BIGSTK badr[HTS_URLMAXSIZE * 2],
- bfil[HTS_URLMAXSIZE * 2];
- if (ident_url_absolute(_base, badr, bfil) >= 0) {
- if (((int) strlen(badr) + (int) strlen(lien)) <
+ lien_adrfil baseaf;
+ if (ident_url_absolute(_base, &baseaf) >= 0) {
+ if (((int) strlen(baseaf.adr) + (int) strlen(lien)) <
HTS_URLMAXSIZE) {
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
// base est absolue
tempo[0] = '\0';
- if (!link_has_authority(badr)) {
+ if (!link_has_authority(baseaf.adr)) {
strcatbuff(tempo, "http://");
}
- strcatbuff(tempo, badr);
+ strcatbuff(tempo, baseaf.adr);
strcatbuff(tempo, lien);
strcpybuff(lien, tempo); // patcher en considérant base
@@ -2384,8 +2390,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
relativeurladr(), relativeurlfil());
if ((reponse =
ident_url_relatif(lien, relativeurladr(), relativeurlfil(),
- adr, fil)) < 0) {
- adr[0] = '\0'; // erreur
+ &afs.af)) < 0) {
+ afs.af.adr[0] = '\0'; // erreur
if (reponse == -2) {
hts_log_print(opt, LOG_WARNING,
"Link %s not caught (unknown protocol)",
@@ -2398,14 +2404,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} else {
hts_log_print(opt, LOG_DEBUG,
"built relative link %s with %s%s -> %s%s",
- lien, relativeurladr(), relativeurlfil(), adr,
- fil);
+ lien, relativeurladr(), relativeurlfil(), afs.af.adr,
+ afs.af.fil);
}
} else {
hts_log_print(opt, LOG_DEBUG,
"link %s not build, error detected before",
lien);
- adr[0] = '\0';
+ afs.af.adr[0] = '\0';
}
// Le lien doit juste être réécrit, mais ne doit pas générer un lien
@@ -2413,7 +2419,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (p_nocatch) {
forbidden_url = 1; // interdire récupération du lien
hts_log_print(opt, LOG_DEBUG, "link forced external at %s%s",
- adr, fil);
+ afs.af.adr, afs.af.fil);
}
// Tester si un lien doit être accepté ou refusé (wizard)
// forbidden_url=1 : lien refusé
@@ -2421,11 +2427,11 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//if ((ptr>0) && (p_type!=2) && (p_type!=-2)) { // tester autorisations?
if ((p_type != 2) && (p_type != -2)) { // tester autorisations?
if (!p_nocatch) {
- if (adr[0] != '\0') {
+ if (afs.af.adr[0] != '\0') {
hts_log_print(opt, LOG_DEBUG,
- "wizard link test at %s%s..", adr, fil);
+ "wizard link test at %s%s..", afs.af.adr, afs.af.fil);
forbidden_url =
- hts_acceptlink(opt, ptr, adr, fil,
+ hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil,
intag_name ? intag_name : NULL,
intag_name ? tag_attr_start : NULL,
&set_prio_to, &just_test_it);
@@ -2437,25 +2443,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// calculer meme_adresse
meme_adresse =
- strfield2(jump_identification(adr),
+ strfield2(jump_identification(afs.af.adr),
jump_identification(urladr()));
// Début partie sauvegarde
// ici on forme le nom du fichier à sauver, et on patche l'URL
- if (adr[0] != '\0') {
+ if (afs.af.adr[0] != '\0') {
// savename(): simplifier les ../ et autres joyeusetés
- char BIGSTK save[HTS_URLMAXSIZE * 2];
int r_sv = 0;
// En cas de moved, adresse première
- char BIGSTK former_adr[HTS_URLMAXSIZE * 2];
- char BIGSTK former_fil[HTS_URLMAXSIZE * 2];
+ lien_adrfil former;
//
- save[0] = '\0';
- former_adr[0] = '\0';
- former_fil[0] = '\0';
+ afs.save[0] = '\0';
+ former.adr[0] = '\0';
+ former.fil[0] = '\0';
//
// nom du chemin à sauver si on doit le calculer
@@ -2471,14 +2475,14 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
/* Calc */
last_adr[0] = '\0';
//char last_fil[HTS_URLMAXSIZE*2]="";
- strcpybuff(last_adr, adr); // ancienne adresse
+ strcpybuff(last_adr, afs.af.adr); // ancienne adresse
//strcpybuff(last_fil,fil); // ancien chemin
r_sv =
- url_savename(adr, fil, save, former_adr, former_fil,
- heap(ptr)->adr, heap(ptr)->fil, opt,
- opt->liens, opt->lien_tot, sback, cache, hash, ptr,
+ url_savename(&afs, &former, heap(ptr)->adr, heap(ptr)->fil, opt,
+ sback, cache, hash, ptr,
numero_passe, NULL);
- if (strcmp(jump_identification(last_adr), jump_identification(adr)) != 0) { // a changé
+ if (strcmp(jump_identification(last_adr),
+ jump_identification(afs.af.adr)) != 0) { // a changé
// 2e test si moved
@@ -2487,12 +2491,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// forbidden_url=0 : lien accepté
if ((ptr > 0) && (p_type != 2) && (p_type != -2)) { // tester autorisations?
if (!p_nocatch) {
- if (adr[0] != '\0') {
+ if (afs.af.adr[0] != '\0') {
hts_log_print(opt, LOG_DEBUG,
"wizard moved link retest at %s%s..",
- adr, fil);
+ afs.af.adr, afs.af.fil);
forbidden_url =
- hts_acceptlink(opt, ptr, adr, fil,
+ hts_acceptlink(opt, ptr, afs.af.adr, afs.af.fil,
intag_name ? intag_name : NULL,
intag_name ? tag_attr_start :
NULL, &set_prio_to,
@@ -2507,19 +2511,19 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
meme_adresse = 0; // on a changé
}
} else {
- strcpybuff(save, ""); // dummy
+ strcpybuff(afs.save, ""); // dummy
}
}
// resolve unresolved type
if (r_sv != -1 && p_type != 2 && p_type != -2
- && forbidden_url == 0 && IS_DELAYED_EXT(save)
+ && forbidden_url == 0 && IS_DELAYED_EXT(afs.save)
) {
time_t t;
// pas d'erreur, on continue
r_sv =
- hts_wait_delayed(str, adr, fil, save, heap(ptr)->adr,
- heap(ptr)->fil, former_adr, former_fil,
+ hts_wait_delayed(str, &afs, heap(ptr)->adr,
+ heap(ptr)->fil, &former,
&forbidden_url);
/* User interaction, because hts_wait_delayed can be slow.. (3.43) */
@@ -2541,22 +2545,22 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (forbidden_url != 1) { // le lien va être chargé
if ((p_type == 2) || (p_type == -2)) { // base href ou codebase, pas un lien
hts_log_print(opt, LOG_DEBUG, "Code/Codebase: %s%s",
- adr, fil);
+ afs.af.adr, afs.af.fil);
} else if ((opt->getmode & 4) == 0) {
hts_log_print(opt, LOG_DEBUG, "Record: %s%s -> %s",
- adr, fil, save);
+ afs.af.adr, afs.af.fil, afs.save);
} else {
- if (!ishtml(opt, fil))
+ if (!ishtml(opt, afs.af.fil))
hts_log_print(opt, LOG_DEBUG,
- "Record after: %s%s -> %s", adr, fil,
- save);
+ "Record after: %s%s -> %s", afs.af.adr, afs.af.fil,
+ afs.save);
else
hts_log_print(opt, LOG_DEBUG, "Record: %s%s -> %s",
- adr, fil, save);
+ afs.af.adr, afs.af.fil, afs.save);
}
} else
- hts_log_print(opt, LOG_DEBUG, "External: %s%s", adr,
- fil);
+ hts_log_print(opt, LOG_DEBUG, "External: %s%s", afs.af.adr,
+ afs.af.fil);
}
/* FIN log */
@@ -2567,29 +2571,29 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
/* */
else if (opt->urlmode == 0) { // URL absolue dans tous les cas
if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html
- if (!link_has_authority(adr)) {
+ if (!link_has_authority(afs.af.adr)) {
HT_ADD("http://");
} else {
- char *aut = strstr(adr, "//");
+ char *aut = strstr(afs.af.adr, "//");
if (aut) {
char tmp[256];
tmp[0] = '\0';
- strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ strncatbuff(tmp, afs.af.adr, aut - afs.af.adr); // scheme
HT_ADD(tmp); // Protocol
HT_ADD("//");
}
}
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password
+ HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
- if (*fil != '/')
+ if (afs.af.fil[0] != '/')
HT_ADD("/");
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
}
lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
/* */
@@ -2601,34 +2605,34 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if ((opt->getmode & 1) && (ptr > 0)) {
if (p_type != -1) { // pas que le nom de fichier (pas classe java)
if (!opt->external) {
- if (!link_has_authority(adr)) {
+ if (!link_has_authority(afs.af.adr)) {
HT_ADD("http://");
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(adr); // Password
+ HT_ADD_HTMLESCAPED(afs.af.adr); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
- if (*fil != '/')
+ if (afs.af.fil[0] != '/')
HT_ADD("/");
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
} else {
- char *aut = strstr(adr, "//");
+ char *aut = strstr(afs.af.adr, "//");
if (aut) {
char tmp[256];
tmp[0] = '\0';
- strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr)); // scheme
HT_ADD(tmp); // Protocol
HT_ADD("//");
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password
+ HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
- if (*fil != '/')
+ if (afs.af.fil[0] != '/')
HT_ADD("/");
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
}
}
//
@@ -2642,9 +2646,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
int cat_data_len = 0;
// ajouter lien external
- switch ((link_has_authority(adr)) ? 1
- : ((fil[strlen(fil) - 1] ==
- '/') ? 1 : (ishtml(opt, fil)))) {
+ switch ((link_has_authority(afs.af.adr)) ? 1
+ : ((afs.af.fil[strlen(afs.af.fil) - 1] ==
+ '/') ? 1 : (ishtml(opt, afs.af.fil)))) {
case 1:
case -2: // html ou répertoire
if (opt->getmode & 1) { // sauver html
@@ -2659,15 +2663,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
default: // inconnu
// asp, cgi..
if ((strfield2
- (fil + max(0, (int) strlen(fil) - 4),
+ (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4),
".gif"))
||
(strfield2
- (fil + max(0, (int) strlen(fil) - 4),
+ (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4),
".jpg"))
||
(strfield2
- (fil + max(0, (int) strlen(fil) - 4),
+ (afs.af.fil + max(0, (int) strlen(afs.af.fil) - 4),
".xbm"))
/*|| (ishtml(opt,fil)!=0) */
) {
@@ -2690,13 +2694,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} // html,gif
if (patch_it) {
- char BIGSTK save[HTS_URLMAXSIZE * 2];
+ char BIGSTK save[HTS_URLMAXSIZE * 2];
char BIGSTK tempo[HTS_URLMAXSIZE * 2];
strcpybuff(save, StringBuff(opt->path_html_utf8));
strcatbuff(save, cat_name);
- if (lienrelatif(tempo, save, relativesavename()) ==
- 0) {
+ if (lienrelatif(tempo, save, relativesavename()) == 0) {
/* Never escape high-chars (we don't know the encoding!!) */
inplace_escape_uri_utf(tempo, sizeof(tempo)); // escape with %xx
//if (!no_esc_utf)
@@ -2708,33 +2711,33 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
HT_ADD("?link="); // page externe
// same as above
- if (!link_has_authority(adr)) {
+ if (!link_has_authority(afs.af.adr)) {
HT_ADD("http://");
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(adr); // Password
+ HT_ADD_HTMLESCAPED(afs.af.adr); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
- if (*fil != '/')
+ if (afs.af.fil[0] != '/')
HT_ADD("/");
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
} else {
- char *aut = strstr(adr, "//");
+ char *aut = strstr(afs.af.adr, "//");
if (aut) {
char tmp[256];
tmp[0] = '\0';
- strncatbuff(tmp, adr, (int) (aut - adr) + 2); // scheme
+ strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr) + 2); // scheme
HT_ADD(tmp);
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(jump_protocol(adr)); // Password
+ HT_ADD_HTMLESCAPED(jump_protocol(afs.af.adr)); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
- if (*fil != '/')
+ if (afs.af.fil[0] != '/')
HT_ADD("/");
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
}
}
//
@@ -2773,13 +2776,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} else { // écrire normalement le nom de fichier
HT_ADD("http://");
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(adr); // Password
+ HT_ADD_HTMLESCAPED(afs.af.adr); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
- if (*fil != '/')
+ if (afs.af.fil[0] != '/')
HT_ADD("/");
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
} // patcher?
} // external
} else { // que le nom de fichier (classe java)
@@ -2790,7 +2793,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// Calculer chemin
tempo_pat[0] = '\0';
- strcpybuff(tempo, fil); // <-- ajouté
+ strcpybuff(tempo, afs.af.fil); // <-- ajouté
{
char *a = strrchr(tempo, '/');
@@ -2814,7 +2817,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
char BIGSTK tempo2[HTS_URLMAXSIZE * 2];
strcpybuff(tempo2, a + 1); // FICHIER
- strncatbuff(tempo_pat, tempo, (int) (a - tempo) + 1); // chemin
+ strncatbuff(tempo_pat, tempo, (a - tempo) + 1); // chemin
strcpybuff(tempo, tempo2); // fichier
}
}
@@ -2828,9 +2831,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if (strnotempty(tempo_pat)) {
HT_ADD("codebase=\"http://");
if (!opt->passprivacy) {
- HT_ADD_HTMLESCAPED(adr); // Password
+ HT_ADD_HTMLESCAPED(afs.af.adr); // Password
} else {
- HT_ADD_HTMLESCAPED(jump_identification(adr)); // No Password
+ HT_ADD_HTMLESCAPED(jump_identification(afs.af.adr)); // No Password
}
if (*tempo_pat != '/')
HT_ADD("/");
@@ -2866,12 +2869,12 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
char BIGSTK cid[HTS_URLMAXSIZE * 3];
HT_ADD("cid:");
- make_content_id(adr, fil, cid, sizeof(cid));
+ make_content_id(afs.af.adr, afs.af.fil, cid, sizeof(cid));
HT_ADD_HTMLESCAPED(cid);
lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
} else if (opt->urlmode == 3) { // URI absolue /
if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html
- HT_ADD_HTMLESCAPED(fil);
+ HT_ADD_HTMLESCAPED(afs.af.fil);
}
lastsaved = eadr - 1; // dernier écrit+1 (enfin euh apres on fait un ++ alors hein)
} else if (opt->urlmode == 5) { // transparent proxy URL
@@ -2881,23 +2884,23 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
char *pos;
if ((opt->getmode & 1) && (ptr > 0)) { // ecrire les html
- if (!link_has_authority(adr)) {
+ if (!link_has_authority(afs.af.adr)) {
HT_ADD("http://");
} else {
- char *aut = strstr(adr, "//");
+ char *aut = strstr(afs.af.adr, "//");
if (aut) {
char tmp[256];
tmp[0] = '\0';
- strncatbuff(tmp, adr, (int) (aut - adr)); // scheme
+ strncatbuff(tmp, afs.af.adr, (aut - afs.af.adr)); // scheme
HT_ADD(tmp); // Protocol
HT_ADD("//");
}
}
// filename is taken as URI (ex: "C:\My Website\www.example.com\foo4242.html)
- uri = save;
+ uri = afs.save;
// .. after stripping the path prefix (ex: "www.example.com\foo4242.html)
if (strnotempty(StringBuff(opt->path_html_utf8))) {
@@ -2913,7 +2916,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
// put original query string if any (ex: "www.example.com/foo4242.html?q=45)
- pos = strchr(fil, '?');
+ pos = strchr(afs.af.fil, '?');
if (pos != NULL) {
strcatbuff(tempo, pos);
}
@@ -2927,7 +2930,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
tempo[0] = '\0';
// calculer le lien relatif
- if (lienrelatif(tempo, save, relativesavename()) == 0) {
+ if (lienrelatif(tempo, afs.save, relativesavename()) == 0) {
if (!in_media) { // In media (such as real audio): don't patch
/* Never escape high-chars (we don't know the encoding!!) */
inplace_escape_uri_utf(tempo, sizeof(tempo));
@@ -2944,7 +2947,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
}
hts_log_print(opt, LOG_DEBUG,
"relative link at %s build with %s and %s: %s",
- adr, save, relativesavename(), tempo);
+ afs.af.adr, afs.save, relativesavename(), tempo);
// lien applet (code) - il faut placer un codebase avant
if (p_type == -1) { // que le nom de fichier
@@ -3033,7 +3036,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} else {
hts_log_print(opt, LOG_WARNING,
"Error building relative link %s and %s",
- save, relativesavename());
+ afs.save, relativesavename());
}
} // sinon le lien sera écrit normalement
@@ -3048,13 +3051,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
#endif
/* Security check */
- if (strlen(save) >= HTS_URLMAXSIZE) {
- adr[0] = '\0';
+ if (strlen(afs.save) >= HTS_URLMAXSIZE) {
+ afs.af.adr[0] = '\0';
hts_log_print(opt, LOG_WARNING, "Link is too long: %s",
- save);
+ afs.save);
}
- if ((adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste
+ if ((afs.af.adr[0] != '\0') && (p_type != 2) && (p_type != -2) && (forbidden_url != 1)) { // si le fichier n'existe pas, ajouter à la liste
// n'y a-t-il pas trop de liens?
if (0) {
// CLEANUP
@@ -3083,7 +3086,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
if ((opt->getmode & 4) == 0) { // traiter html après
pass_fix = 0;
} else { // vérifier que ce n'est pas un !html
- if (!ishtml(opt, fil))
+ if (!ishtml(opt, afs.af.fil))
pass_fix = 1; // priorité inférieure (traiter après)
else
pass_fix = max(0, numero_passe); // priorité normale
@@ -3106,15 +3109,15 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
//
// On part de la fin et on essaye de se presser (économise temps machine)
{
- int i = hash_read(hash, save, NULL, 0); // lecture type 0 (sav)
+ int i = hash_read(hash, afs.save, NULL, 0); // lecture type 0 (sav)
if (i >= 0) {
if ((opt->debug > 1) && (opt->log != NULL)) {
- if (strcmp(adr, heap(i)->adr) != 0
- || strcmp(fil, heap(i)->fil) != 0) {
+ if (strcmp(afs.af.adr, heap(i)->adr) != 0
+ || strcmp(afs.af.fil, heap(i)->fil) != 0) {
hts_log_print(opt, LOG_DEBUG,
"merging similar links %s%s and %s%s",
- adr, fil, heap(i)->adr,
+ afs.af.adr, afs.af.fil, heap(i)->adr,
heap(i)->fil);
}
}
@@ -3136,16 +3139,16 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// DEBUT ROBOTS.TXT AJOUT
if (!just_test_it) {
- if ((!strfield(adr, "ftp://")) // non ftp
- && (!strfield(adr, "file://"))
+ if ((!strfield(afs.af.adr, "ftp://")) // non ftp
+ && (!strfield(afs.af.adr, "file://"))
) { // non file
if (opt->robots) { // récupérer robots
- if (ishtml(opt, fil) != 0) { // pas la peine pour des fichiers isolés
- if (checkrobots(_ROBOTS, adr, "") != -1) { // robots.txt ?
- checkrobots_set(_ROBOTS, adr, ""); // ajouter entrée vide
- if (checkrobots(_ROBOTS, adr, "") == -1) { // robots.txt ?
+ if (ishtml(opt, afs.af.fil) != 0) { // pas la peine pour des fichiers isolés
+ if (checkrobots(_ROBOTS, afs.af.adr, "") != -1) { // robots.txt ?
+ checkrobots_set(_ROBOTS, afs.af.adr, ""); // ajouter entrée vide
+ if (checkrobots(_ROBOTS, afs.af.adr, "") == -1) { // robots.txt ?
// enregistrer robots.txt (MACRO)
- if (!hts_record_link(opt, adr, "/robots.txt", "", "", "", NULL)) {
+ if (!hts_record_link(opt, afs.af.adr, "/robots.txt", "", "", "", NULL)) {
printf
("PANIC! : Not enough memory [%d]\n",
__LINE__);
@@ -3173,7 +3176,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
#endif
hts_log_print(opt, LOG_DEBUG,
"robots.txt added at %s",
- adr);
+ afs.af.adr);
} else {
hts_log_print(opt, LOG_ERROR,
"Unexpected robots.txt error at %d",
@@ -3187,7 +3190,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// FIN ROBOTS.TXT AJOUT
// enregistrer
- if (!hts_record_link(opt, adr, fil, save, former_adr, former_fil, codebase)) {
+ if (!hts_record_link(opt, afs.af.adr, afs.af.fil, afs.save,
+ former.adr, former.fil, codebase)) {
printf("PANIC! : Not enough memory [%d]\n",
__LINE__);
hts_log_print(opt, LOG_PANIC, "Not enough memory");
@@ -3245,7 +3249,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} else { // if !dejafait
hts_log_print(opt, LOG_DEBUG,
"link has already been recorded, cancelled: %s",
- save);
+ afs.save);
}
@@ -3260,9 +3264,9 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} // if ok==0
- assertf(eadr - adr >= 0); // Should not go back
- if (eadr > adr) {
- INCREMENT_CURRENT_ADR(eadr - 1 - adr);
+ assertf(eadr - html >= 0); // Should not go back
+ if (eadr > html) {
+ INCREMENT_CURRENT_ADR(eadr - 1 - html);
}
// adr=eadr-1; // ** sauter
@@ -3276,7 +3280,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
} // si '<' ou '>'
// plus loin
- adr++; // automate will be checked next loop
+ html++; // automate will be checked next loop
/* Otimization: if we are scanning in HTML data (not in tag or script),
then jump to the next starting tag */
@@ -3288,25 +3292,25 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
&&(!inscript_tag) /* Not in tag with script inside */
) {
/* Not at the end */
- if ((((int) (adr - r->adr))) < r->size) {
+ if ((((int) (html - r->adr))) < r->size) {
/* Not on a starting tag yet */
- if (*adr != '<') {
+ if (*html != '<') {
/* strchr does not well behave with null chrs.. */
/* char* adr_next = strchr(adr,'<'); */
- char *adr_next = adr;
+ const char *adr_next = html;
while(*adr_next != '<' && (adr_next - r->adr) < r->size) {
adr_next++;
}
/* Jump to near end (index hack) */
if (!adr_next || *adr_next != '<') {
- if (((int) (adr - r->adr) < (r->size - 4))
+ if (((int) (html - r->adr) < (r->size - 4))
&& (r->size > 4)
) {
- adr = r->adr + r->size - 2;
+ html = r->adr + r->size - 2;
}
} else {
- adr = adr_next;
+ html = adr_next;
}
}
}
@@ -3315,8 +3319,8 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// ----------
// écrire peu à peu
if ((opt->getmode & 1) && (ptr > 0))
- HT_ADD_ADR;
- lastsaved = adr; // dernier écrit+1
+ HT_add_adr;
+ lastsaved = html; // dernier écrit+1
// ----------
// Checks
@@ -3325,13 +3329,13 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
// Check max time
if (!back_checkmirror(opt)) {
- adr = r->adr + r->size;
+ html = r->adr + r->size;
}
}
// pour les stats du shell si parsing trop long
if (r->size)
opt->state._hts_in_html_done =
- (100 * ((int) (adr - r->adr))) / (int) (r->size);
+ (100 * ((int) (html - r->adr))) / (int) (r->size);
if (opt->state._hts_in_html_poll) {
opt->state._hts_in_html_poll = 0;
// temps à attendre, et remplir autant que l'on peut le cache (backing)
@@ -3369,7 +3373,7 @@ int htsparse(htsmoduleStruct * str, htsmoduleStructExtended * stre) {
back_wait(sback, opt, cache, HTS_STAT.stat_timestart);
back_fillmax(sback, opt, cache, ptr, numero_passe);
}
- } while((((int) (adr - r->adr))) < r->size);
+ } while(html - r->adr < r->size);
opt->state._hts_in_html_parsing = 0; // flag
opt->state._hts_cancel = 0; // pas de cancel
@@ -3440,22 +3444,23 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
hts_log_print(opt, LOG_WARNING, "%s for %s%s", r->msg, urladr(), urlfil());
{
- char BIGSTK mov_url[HTS_URLMAXSIZE * 2], mov_adr[HTS_URLMAXSIZE * 2],
- mov_fil[HTS_URLMAXSIZE * 2];
+ char BIGSTK mov_url[HTS_URLMAXSIZE * 2];
+ lien_adrfilsave savedmoved;
+ lien_adrfil *const moved = &savedmoved.af;
int get_it = 0; // ne pas prendre le fichier à la même adresse par défaut
int reponse = 0;
mov_url[0] = '\0';
- mov_adr[0] = '\0';
- mov_fil[0] = '\0';
+ moved->adr[0] = '\0';
+ moved->fil[0] = '\0';
+ savedmoved.save[0] = '\0';
//
strcpybuff(mov_url, r->location);
// url qque -> adresse+fichier
if ((reponse =
- ident_url_relatif(mov_url, urladr(), urlfil(), mov_adr,
- mov_fil)) >= 0) {
+ ident_url_relatif(mov_url, urladr(), urlfil(), moved)) >= 0) {
int set_prio_to = 0; // pas de priotité fixéd par wizard
// check whether URLHack is harmless or not
@@ -3464,24 +3469,24 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2];
n_adr[0] = n_fil[0] = '\0';
- (void) adr_normalized(mov_adr, n_adr);
- (void) fil_normalized(mov_fil, n_fil);
+ (void) adr_normalized(moved->adr, n_adr);
+ (void) fil_normalized(moved->fil, n_fil);
(void) adr_normalized(urladr(), pn_adr);
(void) fil_normalized(urlfil(), pn_fil);
if (strcasecmp(n_adr, pn_adr) == 0
&& strcasecmp(n_fil, pn_fil) == 0) {
hts_log_print(opt, LOG_WARNING,
"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s",
- urladr(), urlfil(), mov_adr, mov_fil);
+ urladr(), urlfil(), moved->adr, moved->fil);
}
}
- //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
+ //if (ident_url_absolute(mov_url,moved->adr,moved->fil)!=-1) { // ok URL reconnue
// c'est (en gros) la même URL..
// si c'est un problème de casse dans le host c'est que le serveur est buggé
// ("RFC says.." : host name IS case insensitive)
- if ((strfield2(mov_adr, urladr()) != 0) && (strfield2(mov_fil, urlfil()) != 0)) { // identique à casse près
+ if ((strfield2(moved->adr, urladr()) != 0) && (strfield2(moved->fil, urlfil()) != 0)) { // identique à casse près
// on tourne en rond
- if (strcmp(mov_fil, urlfil()) == 0) {
+ if (strcmp(moved->fil, urlfil()) == 0) {
error = 1;
get_it = -1; // ne rien faire
hts_log_print(opt, LOG_WARNING,
@@ -3495,23 +3500,23 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
// -> on prend à cette adresse, le lien sera enregistré avec lien_record() (hash)
hts_log_print(opt, LOG_DEBUG,
"wizard link test for moved file at %s%s..",
- mov_adr, mov_fil);
+ moved->adr, moved->fil);
// accepté?
- if (hts_acceptlink(opt, ptr, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */
+ if (hts_acceptlink(opt, ptr, moved->adr, moved->fil, NULL, NULL, &set_prio_to, NULL) != 1) { /* nouvelle adresse non refusée ? */
get_it = 1;
hts_log_print(opt, LOG_DEBUG, "moved link accepted: %s%s",
- mov_adr, mov_fil);
+ moved->adr, moved->fil);
}
} /* sinon traité normalement */
}
- //if ((strfield2(mov_adr,urladr())!=0) && (strfield2(mov_fil,urlfil())!=0)) { // identique à casse près
+ //if ((strfield2(moved->adr,urladr())!=0) && (strfield2(moved->fil,urlfil())!=0)) { // identique à casse près
if (get_it == 1) {
// court-circuiter le reste du traitement
// et reculer pour mieux sauter
hts_log_print(opt, LOG_WARNING,
"Warning moved treated for %s%s (real one is %s%s)",
- urladr(), urlfil(), mov_adr, mov_fil);
+ urladr(), urlfil(), moved->adr, moved->fil);
// canceller lien actuel
error = 1;
hash_invalidate_entry(hashptr, ptr); // invalidate hashtable entry
@@ -3520,17 +3525,14 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
// set_prio_to=0+1; // protection if the moved URL is an html page!!
//xxc xxc
{
- char BIGSTK mov_sav[HTS_URLMAXSIZE * 2];
-
// calculer lien et éventuellement modifier addresse/fichier
- if (url_savename
- (mov_adr, mov_fil, mov_sav, NULL, NULL,
+ if (url_savename(&savedmoved, NULL,
heap(heap(ptr)->precedent)->adr,
- heap(heap(ptr)->precedent)->fil, opt, opt->liens, opt->lien_tot,
+ heap(heap(ptr)->precedent)->fil, opt,
sback, cache, hash, ptr, numero_passe, NULL) != -1) {
- if (hash_read(hash, mov_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja
+ if (hash_read(hash, savedmoved.save, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja
// enregistrer lien avec SAV IDENTIQUE
- if (hts_record_link(opt, mov_adr, mov_fil, heap(ptr)->sav, "", "", NULL)) {
+ if (hts_record_link(opt, moved->adr, moved->fil, heap(ptr)->sav, "", "", NULL)) {
// mode test?
heap_top()->testmode = heap(ptr)->testmode;
heap_top()->link_import = 0; // mode normal
@@ -3577,7 +3579,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
inplace_escape_uri(mov_url, sizeof(mov_url));
} else {
char BIGSTK cid[HTS_URLMAXSIZE * 3];
- make_content_id(mov_adr, mov_fil, cid, sizeof(cid));
+ make_content_id(moved->adr, moved->fil, cid, sizeof(cid));
strcpybuff(mov_url, "cid:");
strcatbuff(mov_url, cid);
}
@@ -3918,28 +3920,24 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
// changement dans les préférences
if (opt->state._hts_addurl) {
- char BIGSTK add_adr[HTS_URLMAXSIZE * 2];
- char BIGSTK add_fil[HTS_URLMAXSIZE * 2];
+ lien_adrfilsave add;
while(*opt->state._hts_addurl) {
char BIGSTK add_url[HTS_URLMAXSIZE * 2];
- add_adr[0] = add_fil[0] = add_url[0] = '\0';
+ add.af.adr[0] = add.af.fil[0] = add_url[0] = '\0';
if (!link_has_authority(*opt->state._hts_addurl))
strcpybuff(add_url, "http://"); // ajouter http://
strcatbuff(add_url, *opt->state._hts_addurl);
- if (ident_url_absolute(add_url, add_adr, add_fil) >= 0) {
+ if (ident_url_absolute(add_url, &add.af) >= 0) {
// ----Ajout----
- // noter NOUVEAU lien
- char BIGSTK add_sav[HTS_URLMAXSIZE * 2];
// calculer lien et éventuellement modifier addresse/fichier
if (url_savename
- (add_adr, add_fil, add_sav, NULL, NULL, NULL, NULL, opt, opt->liens,
- opt->lien_tot, sback, cache, hash, ptr, numero_passe, NULL) != -1) {
- if (hash_read(hash, add_sav, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja
+ (&add, NULL, NULL, NULL, opt, sback, cache, hash, ptr, numero_passe, NULL) != -1) {
+ if (hash_read(hash, add.save, NULL, HASH_STRUCT_FILENAME) < 0) { // n'existe pas déja
// enregistrer lien
- if (hts_record_link(opt, add_adr, add_fil, add_sav, "", "", NULL)) {
+ if (hts_record_link(opt, add.af.adr, add.af.fil, add.save, "", "", NULL)) {
heap_top()->testmode = 0; // mode test?
heap_top()->link_import = 0; // mode normal
heap_top()->depth = opt->depth;
@@ -3948,8 +3946,8 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
heap_top()->premier = heap_top_index();
heap_top()->precedent = heap_top_index();
//
- hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add_adr,
- add_fil);
+ hts_log_print(opt, LOG_INFO, "Link added by user: %s%s", add.af.adr,
+ add.af.fil);
//
} else { // oups erreur, plus de mémoire!!
printf("PANIC! : Not enough memory [%d]\n", __LINE__);
@@ -3961,7 +3959,7 @@ void hts_mirror_process_user_interaction(htsmoduleStruct * str,
} else {
hts_log_print(opt, LOG_NOTICE,
"Existing link %s%s not added after user request",
- add_adr, add_fil);
+ add.af.adr, add.af.fil);
}
}
@@ -4415,9 +4413,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
}
/* Wait for delayed types */
-int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
- char *parent_adr, char *parent_fil, char *former_adr,
- char *former_fil, int *forbidden_url) {
+int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,
+ char *parent_adr, char *parent_fil, lien_adrfil *former,
+ int *forbidden_url) {
ENGINE_LOAD_CONTEXT_BASE();
hash_struct *const hash = hashptr;
@@ -4426,17 +4424,17 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
char in_error_msg[32];
// resolve unresolved type
- if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(save)
+ if (opt->savename_delayed != 0 && *forbidden_url == 0 && IS_DELAYED_EXT(afs->save)
&& !opt->state.stop) {
int loops;
int continue_loop;
- hts_log_print(opt, LOG_DEBUG, "Waiting for type to be known: %s%s", adr,
- fil);
+ hts_log_print(opt, LOG_DEBUG, "Waiting for type to be known: %s%s", afs->af.adr,
+ afs->af.fil);
/* Follow while type is unknown and redirects occurs */
for(loops = 0, continue_loop = 1;
- IS_DELAYED_EXT(save) && continue_loop && loops < 7; loops++) {
+ IS_DELAYED_EXT(afs->save) && continue_loop && loops < 7; loops++) {
continue_loop = 0;
/*
@@ -4449,22 +4447,22 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
lien_back back;
memset(&back, 0, sizeof(back));
- back.r = cache_read(opt, cache, adr, fil, NULL, NULL); // test uniquement
+ back.r = cache_read(opt, cache, afs->af.adr, afs->af.fil, NULL, NULL); // test uniquement
if (back.r.statuscode == HTTP_OK && strnotempty(back.r.contenttype)) { // cache found, and aswer is 'OK'
hts_log_print(opt, LOG_DEBUG,
"Direct type lookup in cache (-%%D1): %s",
back.r.contenttype);
/* Recompute filename with MIME type */
- save[0] = '\0';
- url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr,
- heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache,
+ afs->save[0] = '\0';
+ url_savename(afs, former, heap(ptr)->adr,
+ heap(ptr)->fil, opt, sback, cache,
hash, ptr, numero_passe, &back);
/* Recompute authorization with MIME type */
{
int new_forbidden_url =
- hts_acceptmime(opt, ptr, adr, fil, back.r.contenttype);
+ hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, back.r.contenttype);
if (new_forbidden_url != -1) {
hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d",
new_forbidden_url);
@@ -4472,7 +4470,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
*forbidden_url = new_forbidden_url;
hts_log_print(opt, LOG_DEBUG,
"link forbidden because of MIME types restrictions: %s%s",
- adr, fil);
+ afs->af.adr, afs->af.fil);
break; // exit loop
}
}
@@ -4484,11 +4482,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
}
/* Check if the file was recorded already (necessary for redirects) */
- if (hash_read(hash, save, NULL, HASH_STRUCT_FILENAME) >= 0) {
+ if (hash_read(hash, afs->save, NULL, HASH_STRUCT_FILENAME) >= 0) {
if (loops == 0) { /* Should not happend */
hts_log_print(opt, LOG_ERROR,
"Duplicate entry in hts_wait_delayed() cancelled: %s%s -> %s",
- adr, fil, save);
+ afs->af.adr, afs->af.fil, afs->save);
}
/* Exit loop (we're done) */
continue_loop = 0;
@@ -4497,11 +4495,11 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Add in backing (back_index() will respond correctly) */
if (back_add_if_not_exists
- (sback, opt, cache, adr, fil, save, parent_adr, parent_fil,
+ (sback, opt, cache, afs->af.adr, afs->af.fil, afs->save, parent_adr, parent_fil,
0) != -1) {
int b;
- b = back_index(opt, sback, adr, fil, save);
+ b = back_index(opt, sback, afs->af.adr, afs->af.fil, afs->save);
if (b < 0) {
printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",
__LINE__);
@@ -4529,15 +4527,15 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
b = -1;
/* Recompute filename with MIME type */
- save[0] = '\0';
- url_savename(adr, fil, save, former_adr, former_fil, heap(ptr)->adr,
- heap(ptr)->fil, opt, opt->liens, opt->lien_tot, sback, cache,
+ afs->save[0] = '\0';
+ url_savename(afs, former, heap(ptr)->adr,
+ heap(ptr)->fil, opt, sback, cache,
hash, ptr, numero_passe, &delayed_back);
/* Recompute authorization with MIME type */
{
int new_forbidden_url =
- hts_acceptmime(opt, ptr, adr, fil, delayed_back.r.contenttype);
+ hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, delayed_back.r.contenttype);
if (new_forbidden_url != -1) {
hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d",
*forbidden_url);
@@ -4545,7 +4543,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
*forbidden_url = new_forbidden_url;
hts_log_print(opt, LOG_DEBUG,
"link forbidden because of MIME types restrictions: %s%s",
- adr, fil);
+ afs->af.adr, afs->af.fil);
break; // exit loop
}
}
@@ -4553,9 +4551,9 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Re-Add wiht correct type */
if (back_add_if_not_exists
- (sback, opt, cache, adr, fil, save, parent_adr, parent_fil,
+ (sback, opt, cache, afs->af.adr, afs->af.fil, afs->save, parent_adr, parent_fil,
0) != -1) {
- b = back_index(opt, sback, adr, fil, save);
+ b = back_index(opt, sback, afs->af.adr, afs->af.fil, afs->save);
}
if (b < 0) {
printf
@@ -4664,36 +4662,35 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
/* Handle redirect */
if ((int) strnotempty(mov_url)) { // location existe!
- char BIGSTK mov_adr[HTS_URLMAXSIZE * 2],
- mov_fil[HTS_URLMAXSIZE * 2];
- mov_adr[0] = mov_fil[0] = '\0';
+ lien_adrfil moved;
+ moved.adr[0] = moved.fil[0] = '\0';
//
- if (ident_url_relatif(mov_url, adr, fil, mov_adr, mov_fil) >= 0) {
+ if (ident_url_relatif(mov_url, afs->af.adr, afs->af.fil, &moved) >= 0) {
hts_log_print(opt, LOG_DEBUG,
"Redirect while resolving type: %s%s -> %s%s",
- adr, fil, mov_adr, mov_fil);
+ afs->af.adr, afs->af.fil, moved.adr, moved.fil);
// si non bouclage sur soi même, ou si test avec GET non testé
- if (strcmp(mov_adr, adr) != 0 || strcmp(mov_fil, fil) != 0) {
+ if (strcmp(moved.adr, afs->af.adr) != 0 || strcmp(moved.fil, afs->af.fil) != 0) {
- // recopier former_adr/fil?
- if ((former_adr) && (former_fil)) {
- if (strnotempty(former_adr) == 0) { // Pas déja noté
- strcpybuff(former_adr, adr);
- strcpybuff(former_fil, fil);
+ // recopier former->adr/fil?
+ if (former != NULL) {
+ if (strnotempty(former->adr) == 0) { // Pas déja noté
+ strcpybuff(former->adr, afs->af.adr);
+ strcpybuff(former->fil, afs->af.fil);
}
}
// check explicit forbidden - don't follow 3xx in this case
{
int set_prio_to = 0;
- if (hts_acceptlink(opt, ptr, mov_adr, mov_fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
+ if (hts_acceptlink(opt, ptr, moved.adr, moved.fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
/* Note: the cache 'cached_tests' system will remember this error, and we'll only issue ONE request */
*forbidden_url = 1; /* Forbidden! */
hts_log_print(opt, LOG_DEBUG,
"link forbidden because of redirect beyond the mirror scope at %s%s -> %s%s",
- adr, fil, mov_adr, mov_fil);
- strcpybuff(adr, mov_adr);
- strcpybuff(fil, mov_fil);
+ afs->af.adr, afs->af.fil, moved.adr, moved.fil);
+ strcpybuff(afs->af.adr, moved.adr);
+ strcpybuff(afs->af.fil, moved.fil);
mov_url[0] = '\0';
break;
}
@@ -4701,45 +4698,44 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
// ftp: stop!
if (strfield(mov_url, "ftp://")) {
- strcpybuff(adr, mov_adr);
- strcpybuff(fil, mov_fil);
+ strcpybuff(afs->af.adr, moved.adr);
+ strcpybuff(afs->af.fil, moved.fil);
break;
}
/* ok, continue */
- strcpybuff(adr, mov_adr);
- strcpybuff(fil, mov_fil);
+ strcpybuff(afs->af.adr, moved.adr);
+ strcpybuff(afs->af.fil, moved.fil);
continue_loop = 1;
/* Recompute filename for hash lookup */
- save[0] = '\0';
- url_savename(adr, fil, save, former_adr, former_fil,
- heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens,
- opt->lien_tot, sback, cache, hash, ptr, numero_passe,
+ afs->save[0] = '\0';
+ url_savename(afs, former, heap(ptr)->adr, heap(ptr)->fil,
+ opt, sback, cache, hash, ptr, numero_passe,
&delayed_back);
} else {
hts_log_print(opt, LOG_WARNING,
"Unable to test %s%s (loop to same filename)",
- adr, fil);
+ afs->af.adr, afs->af.fil);
} // loop to same location
} // ident_url_relatif()
} // location
} // redirect
- hts_log_print(opt, LOG_DEBUG, "Final type for %s%s: '%s'", adr, fil,
+ hts_log_print(opt, LOG_DEBUG, "Final type for %s%s: '%s'", afs->af.adr, afs->af.fil,
delayed_back.r.contenttype);
/* If we are done, do additional checks with final type and authorizations */
if (!continue_loop) {
/* Recompute filename with MIME type */
- save[0] = '\0';
- url_savename(adr, fil, save, former_adr, former_fil,
- heap(ptr)->adr, heap(ptr)->fil, opt, opt->liens, opt->lien_tot,
+ afs->save[0] = '\0';
+ url_savename(afs, former,
+ heap(ptr)->adr, heap(ptr)->fil, opt,
sback, cache, hash, ptr, numero_passe, &delayed_back);
/* Recompute authorization with MIME type */
{
int new_forbidden_url =
- hts_acceptmime(opt, ptr, adr, fil, delayed_back.r.contenttype);
+ hts_acceptmime(opt, ptr, afs->af.adr, afs->af.fil, delayed_back.r.contenttype);
if (new_forbidden_url != -1) {
hts_log_print(opt, LOG_DEBUG, "result for wizard mime test: %d",
*forbidden_url);
@@ -4747,7 +4743,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
*forbidden_url = new_forbidden_url;
hts_log_print(opt, LOG_DEBUG,
"link forbidden because of MIME types restrictions: %s%s",
- adr, fil);
+ afs->af.adr, afs->af.fil);
break; // exit loop
}
}
@@ -4764,7 +4760,7 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
}
}
/* Patch destination filename for direct-to-disk mode */
- strcpybuff(back[b].url_sav, save);
+ strcpybuff(back[b].url_sav, afs->save);
}
} // b >= 0
@@ -4791,25 +4787,25 @@ int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
if (in_error == STATUSCODE_TOO_BIG) {
hts_log_print(opt, LOG_INFO,
"link not taken because of its size (%d bytes) at %s%s",
- (int) in_error_size, adr, fil);
+ (int) in_error_size, afs->af.adr, afs->af.fil);
} else {
hts_log_print(opt, LOG_INFO,
"link not taken because of error (%d '%s') at %s%s",
- in_error, in_error_msg, adr, fil);
+ in_error, in_error_msg, afs->af.adr, afs->af.fil);
}
}
}
// error
- if (*forbidden_url != 1 && IS_DELAYED_EXT(save)) {
+ if (*forbidden_url != 1 && IS_DELAYED_EXT(afs->save)) {
*forbidden_url = 1;
if (in_error) {
hts_log_print(opt, LOG_WARNING,
"link in error (%d '%s'), type unknown, aborting: %s%s",
- in_error, in_error_msg, adr, fil);
+ in_error, in_error_msg, afs->af.adr, afs->af.fil);
} else {
hts_log_print(opt, LOG_WARNING,
"link is probably looping, type unknown, aborting: %s%s",
- adr, fil);
+ afs->af.adr, afs->af.fil);
}
}
diff --git a/src/htsparse.h b/src/htsparse.h
index 2d1abd4..01eb953 100644
--- a/src/htsparse.h
+++ b/src/htsparse.h
@@ -135,9 +135,9 @@ int hts_mirror_wait_for_next_file(htsmoduleStruct * str,
to be ready for naming, having its header MIME type
If the final URL is to be forbidden, sets 'forbidden_url' to the corresponding value
*/
-int hts_wait_delayed(htsmoduleStruct * str, char *adr, char *fil, char *save,
- char *parent_adr, char *parent_fil, char *former_adr,
- char *former_fil, int *forbidden_url);
+int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,
+ char *parent_adr, char *parent_fil, lien_adrfil *former,
+ int *forbidden_url);
/* Context state */
diff --git a/src/htstools.c b/src/htstools.c
index 1780a53..1422df4 100644
--- a/src/htstools.c
+++ b/src/htstools.c
@@ -121,13 +121,16 @@ static void unescapehttp(const char *s, String * tempo) {
// 0 : ok
// -1 : erreur
// -2 : protocole non supporté (ftp)
-int ident_url_relatif(const char *lien, const char *origin_adr,
- const char *origin_fil, char *adr, char *fil) {
+int ident_url_relatif(const char *lien, const char *origin_adr,
+ const char *origin_fil,
+ lien_adrfil* const adrfil) {
int ok = 0;
int scheme = 0;
- adr[0] = '\0';
- fil[0] = '\0'; //effacer buffers
+ assertf(adrfil != NULL);
+
+ adrfil->adr[0] = '\0';
+ adrfil->fil[0] = '\0'; //effacer buffers
// lien non vide!
if (strnotempty(lien) == 0)
@@ -149,13 +152,13 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
|| (strfield(lien, "file://")) // scheme+//
|| (strncmp(lien, "//", 2) == 0) // // sans scheme (-> default)
) {
- if (ident_url_absolute(lien, adr, fil) == -1) {
+ if (ident_url_absolute(lien, adrfil) == -1) {
ok = -1; // erreur URL
}
} else if (strfield(lien, "ftp://")) {
// Note: ftp:foobar.gif is not valid
if (ftp_available()) { // ftp supporté
- if (ident_url_absolute(lien, adr, fil) == -1) {
+ if (ident_url_absolute(lien, adrfil) == -1) {
ok = -1; // erreur URL
}
} else {
@@ -164,7 +167,7 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
#if HTS_USEOPENSSL
} else if (strfield(lien, "https://")) {
// Note: ftp:foobar.gif is not valid
- if (ident_url_absolute(lien, adr, fil) == -1) {
+ if (ident_url_absolute(lien, adrfil) == -1) {
ok = -1; // erreur URL
}
#endif
@@ -191,30 +194,30 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
/* patch scheme if necessary */
if (strfield(lien, "http:")) {
lien += 5;
- strcpybuff(adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http)
+ strcpybuff(adrfil->adr, jump_protocol(origin_adr)); // même adresse ; protocole vide (http)
} else if (strfield(lien, "https:")) {
lien += 6;
- strcpybuff(adr, "https://"); // même adresse forcée en https
- strcatbuff(adr, jump_protocol(origin_adr));
+ strcpybuff(adrfil->adr, "https://"); // même adresse forcée en https
+ strcatbuff(adrfil->adr, jump_protocol(origin_adr));
} else if (strfield(lien, "ftp:")) {
lien += 4;
- strcpybuff(adr, "ftp://"); // même adresse forcée en ftp
- strcatbuff(adr, jump_protocol(origin_adr));
+ strcpybuff(adrfil->adr, "ftp://"); // même adresse forcée en ftp
+ strcatbuff(adrfil->adr, jump_protocol(origin_adr));
} else {
- strcpybuff(adr, origin_adr); // même adresse ; et même éventuel protocole
+ strcpybuff(adrfil->adr, origin_adr); // même adresse ; et même éventuel protocole
}
if (*lien != '/') { // sinon c'est un lien absolu
if (*lien == '\0') {
- strcpybuff(fil, origin_fil);
+ strcpybuff(adrfil->fil, origin_fil);
} else if (*lien == '?') { // example: a href="?page=2"
char *a;
- strcpybuff(fil, origin_fil);
- a = strchr(fil, '?');
+ strcpybuff(adrfil->fil, origin_fil);
+ a = strchr(adrfil->fil, '?');
if (a)
*a = '\0';
- strcatbuff(fil, lien);
+ strcatbuff(adrfil->fil, lien);
} else {
const char *a = strchr(origin_fil, '?');
@@ -225,14 +228,14 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
if (*a == '/') { // ok on a un '/'
if ((((int) (a - origin_fil)) + 1 + strlen(lien)) < HTS_URLMAXSIZE) {
// copier chemin
- strncpy(fil, origin_fil, ((int) (a - origin_fil)) + 1);
- *(fil + ((int) (a - origin_fil)) + 1) = '\0';
+ strncpy(adrfil->fil, origin_fil, ((int) (a - origin_fil)) + 1);
+ *(adrfil->fil + ((int) (a - origin_fil)) + 1) = '\0';
// copier chemin relatif
- if (((int) strlen(fil) + (int) strlen(lien)) < HTS_URLMAXSIZE) {
- strcatbuff(fil, lien + ((*lien == '/') ? 1 : 0));
+ if (((int) strlen(adrfil->fil) + (int) strlen(lien)) < HTS_URLMAXSIZE) {
+ strcatbuff(adrfil->fil, lien + ((*lien == '/') ? 1 : 0));
// simplifier url pour les ../
- fil_simplifie(fil);
+ fil_simplifie(adrfil->fil);
} else
ok = -1; // erreur
} else { // erreur
@@ -244,8 +247,8 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
}
} else { // chemin absolu
// copier chemin directement
- strcatbuff(fil, lien);
- fil_simplifie(fil);
+ strcatbuff(adrfil->fil, lien);
+ fil_simplifie(adrfil->fil);
} // *lien!='/'
} else
ok = -1;
@@ -254,7 +257,7 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
// case insensitive pour adresse
{
- char *a = jump_identification(adr);
+ char *a = jump_identification(adrfil->adr);
while(*a) {
if ((*a >= 'A') && (*a <= 'Z'))
@@ -264,8 +267,8 @@ int ident_url_relatif(const char *lien, const char *origin_adr,
}
// IDNA / RFC 3492 (Punycode) handling for HTTP(s)
- if (!link_has_authority(adr) || strfield(adr, "https:")) {
- char *const a = jump_identification(adr);
+ if (!link_has_authority(adrfil->adr) || strfield(adrfil->adr, "https:")) {
+ char *const a = jump_identification(adrfil->adr);
// Non-ASCII characters (theorically forbidden, but browsers are lenient)
if (!hts_isStringAscii(a, strlen(a))) {
char *const idna = hts_convertStringUTF8ToIDNA(a, strlen(a));
diff --git a/src/htstools.h b/src/htstools.h
index 7198fef..94f7c4a 100644
--- a/src/htstools.h
+++ b/src/htstools.h
@@ -47,11 +47,20 @@ typedef struct httrackp httrackp;
typedef struct find_handle_struct find_handle_struct;
typedef find_handle_struct *find_handle;
#endif
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfil
+#define HTS_DEF_FWSTRUCT_lien_adrfil
+typedef struct lien_adrfil lien_adrfil;
+#endif
+#ifndef HTS_DEF_FWSTRUCT_lien_adrfilsave
+#define HTS_DEF_FWSTRUCT_lien_adrfilsave
+typedef struct lien_adrfilsave lien_adrfilsave;
+#endif
/* Library internal definictions */
#ifdef HTS_INTERNAL_BYTECODE
-int ident_url_relatif(const char *lien, const char *urladr, const char *urlfil,
- char *adr, char *fil);
+int ident_url_relatif(const char *lien, const char *origin_adr,
+ const char *origin_fil,
+ lien_adrfil* const adrfil);
int lienrelatif(char *s, const char *link, const char *curr);
int link_has_authority(const char *lien);
int link_has_authorization(const char *lien);