summaryrefslogtreecommitdiff
path: root/src/htshash.c
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-08-12 13:42:55 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-08-12 13:42:55 +0000
commit4e903579b22f3698934294370f076119576041ab (patch)
treeb2007c09c0994a8a3c2d1aba4c8c04ae540daf9a /src/htshash.c
parente77141ceaefb55f427edcfb777f5f1a96b885583 (diff)
Big cleanup in core heap hashtable code, rewritten using new fancy hashtables.
Diffstat (limited to 'src/htshash.c')
-rw-r--r--src/htshash.c206
1 files changed, 149 insertions, 57 deletions
diff --git a/src/htshash.c b/src/htshash.c
index daf987c..d740de7 100644
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -60,10 +60,143 @@ Please visit our Website: http://www.httrack.com
// type: numero enregistrement - 0 est case insensitive (sav) 1 (adr+fil) 2 (former_adr+former_fil)
// recherche dans la table selon nom1,nom2 et le no d'enregistrement
-void hash_init(hash_struct * hash) {
+/* Key free handler (NOOP) ; addresses are kept */
+static void key_freehandler(void *arg, void *value) {
+}
+
+/* Key strdup (pointer copy) */
+static char* key_duphandler(void *arg, const char *name) {
+ union {
+ const char *roname;
+ char *name;
+ } u;
+ u.roname = name;
+ return u.name;
+}
+
+/* Key sav hashes are using case-insensitive version */
+static inthash_keys key_sav_hashes(void *arg, const char *value) {
+ hash_struct *const hash = (hash_struct*) arg;
+ convtolower(hash->catbuff, value);
+ return inthash_hash_value(hash->catbuff);
+}
+
+/* Key sav comparison is case-insensitive */
+static int key_sav_equals(void *arg, const char *a, const char *b) {
+ return strcasecmp(a, b) == 0;
+}
+
+/* Pseudo-key (lien_url structure) hash function */
+static inthash_keys key_adrfil_hashes_generic(void *arg, const char *value_,
+ const int former) {
+ hash_struct *const hash = (hash_struct*) arg;
+ const lien_url*const lien = (lien_url*) value_;
+ int i;
+ const char *const adr = !former ? lien->adr : lien->former_adr;
+ const char *const fil = !former ? lien->fil : lien->former_fil;
+ const char *const adr_norm = adr != NULL ?
+ ( hash->normalized ? jump_normalized(adr) : jump_identification(adr) )
+ : NULL;
+
+ // copy address
+ assertf(adr_norm != NULL);
+ strcpy(hash->normfil, adr_norm);
+
+ // copy link
+ assertf(fil != NULL);
+ if (hash->normalized) {
+ fil_normalized(fil, &hash->normfil[strlen(hash->normfil)]);
+ } else {
+ strcpy(&hash->normfil[i], fil);
+ }
+
+ // hash
+ return inthash_hash_value(hash->normfil);
+}
+
+/* Pseudo-key (lien_url structure) comparison function */
+static int key_adrfil_equals_generic(void *arg, const char *a_, const char *b_,
+ const int former) {
+ hash_struct *const hash = (hash_struct*) arg;
+ const int normalized = hash->normalized;
+ const lien_url*const a = (lien_url*) a_;
+ const lien_url*const b = (lien_url*) b_;
+ const char *const a_adr = !former ? a->adr : a->former_adr;
+ const char *const b_adr = !former ? b->adr : b->former_adr;
+ const char *const a_fil = !former ? a->fil : a->former_fil;
+ const char *const b_fil = !former ? b->fil : b->former_fil;
+ const char *ja;
+ const char *jb;
+
+ // safety
+ assertf(a_adr != NULL);
+ assertf(b_adr != NULL);
+
+ // skip scheme and authentication to the domain (possibly without www.)
+ ja = normalized ? jump_normalized(a_adr) : jump_identification(a_adr);
+ jb = normalized ? jump_normalized(b_adr) : jump_identification(b_adr);
+ if (strcasecmp(ja, jb) != 0) {
+ return 0;
+ }
+
+ // now compare pathes
+ if (normalized) {
+ fil_normalized(a_fil, hash->normfil);
+ fil_normalized(b_fil, hash->normfil2);
+ return strcmp(hash->normfil, hash->normfil2) == 0;
+ } else {
+ return strcmp(a_fil, b_fil) == 0;
+ }
+}
+
+/* "adr"/"fil" lien_url structure members hashing function */
+static inthash_keys key_adrfil_hashes(void *arg, const char *value_) {
+ return key_adrfil_hashes_generic(arg, value_, 0);
+}
+
+/* "adr"/"fil" lien_url structure members comparison function */
+static int key_adrfil_equals(void *arg, const char *a, const char *b) {
+ return key_adrfil_equals_generic(arg, a, b, 0);
+}
+
+/* "former_adr"/"former_fil" lien_url structure members hashing function */
+static inthash_keys key_former_adrfil_hashes(void *arg, const char *value_) {
+ return key_adrfil_hashes_generic(arg, value_, 1);
+}
+
+/* "former_adr"/"former_fil" lien_url structure members comparison function */
+static int key_former_adrfil_equals(void *arg, const char *a, const char *b) {
+ return key_adrfil_equals_generic(arg, a, b, 1);
+}
+
+void hash_init(hash_struct * hash, int normalized) {
hash->sav = inthash_new(0);
hash->adrfil = inthash_new(0);
hash->former_adrfil = inthash_new(0);
+ hash->normalized = normalized;
+
+ /* Case-insensitive comparison ; keys are direct char* filenames */
+ inthash_value_set_key_handler(hash->sav,
+ key_duphandler,
+ key_freehandler,
+ key_sav_hashes,
+ key_sav_equals,
+ hash);
+
+ /* URL-style comparison ; keys are lien_url structure pointers casted
+ to char* */
+ inthash_value_set_key_handler(hash->adrfil,
+ key_duphandler,
+ key_freehandler,
+ key_adrfil_hashes,
+ key_adrfil_equals,
+ hash);
+ inthash_value_set_key_handler(hash->former_adrfil,
+ key_duphandler,
+ key_freehandler,
+ key_former_adrfil_hashes,
+ key_former_adrfil_equals,
+ hash);
}
void hash_free(hash_struct *hash) {
@@ -74,66 +207,36 @@ void hash_free(hash_struct *hash) {
}
}
-static char * normalize_key(const char *nom1, const char *nom2,
- hash_struct_type type, int normalized,
- char *normfil_, char *catbuff) {
- /* dispatch type */
- const char *normfil;
- switch(type) {
- case HASH_STRUCT_FILENAME:
- /* first entry: destination filename (lowercased) */
- assertf(nom2 == NULL || *nom2 == '\0');
- return convtolower(catbuff, nom1);
- break;
- case HASH_STRUCT_ADR_PATH:
- case HASH_STRUCT_ORIGINAL_ADR_PATH:
- /* second and third entries: URL address and path */
- if (!normalized)
- normfil = nom2;
- else
- normfil = fil_normalized(nom2, normfil_);
- if (!normalized) {
- strcpybuff(catbuff, jump_identification(nom1));
- } else {
- strcpybuff(catbuff, jump_normalized(nom1));
- }
- strcatbuff(catbuff, normfil);
- return catbuff;
- break;
- default:
- assertf(! "unexpected case");
- return NULL;
- break;
- }
-}
-
// retour: position ou -1 si non trouvé
int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
- hash_struct_type type, int normalized) {
- char BIGSTK normfil_[HTS_URLMAXSIZE * 2];
- char catbuff[CATBUFF_SIZE];
+ hash_struct_type type) {
intptr_t intvalue;
- char *const name = normalize_key(nom1, nom2, type, normalized,
- normfil_, catbuff);
+ lien_url lien;
/* read */
switch(type) {
case HASH_STRUCT_FILENAME:
- if (inthash_read(hash->sav, name, &intvalue)) {
+ if (inthash_read(hash->sav, nom1, &intvalue)) {
return (int) intvalue;
} else {
return -1;
}
break;
case HASH_STRUCT_ADR_PATH:
- if (inthash_read(hash->adrfil, name, &intvalue)) {
+ memset(&lien, 0, sizeof(lien));
+ lien.adr = key_duphandler(NULL, nom1);
+ lien.fil = key_duphandler(NULL, nom2);
+ if (inthash_read(hash->adrfil, (char*) &lien, &intvalue)) {
return (int) intvalue;
} else {
return -1;
}
break;
case HASH_STRUCT_ORIGINAL_ADR_PATH:
- if (inthash_read(hash->former_adrfil, name, &intvalue)) {
+ memset(&lien, 0, sizeof(lien));
+ lien.former_adr = key_duphandler(NULL, nom1);
+ lien.former_fil = key_duphandler(NULL, nom2);
+ if (inthash_read(hash->former_adrfil, (char*) &lien, &intvalue)) {
return (int) intvalue;
} else {
return -1;
@@ -147,26 +250,15 @@ int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
}
// enregistrement lien lpos dans les 3 tables hash1..3
-void hash_write(hash_struct * hash, int lpos, int normalized) {
- char BIGSTK normfil_[HTS_URLMAXSIZE * 2];
- char catbuff[CATBUFF_SIZE];
- const char *name;
-
+void hash_write(hash_struct * hash, int lpos) {
/* first entry: destination filename (lowercased) */
- name = normalize_key(hash->liens[lpos]->sav, NULL, HASH_STRUCT_FILENAME,
- normalized, normfil_, catbuff);
- inthash_write(hash->sav, name, lpos);
+ inthash_write(hash->sav, hash->liens[lpos]->sav, lpos);
/* second entry: URL address and path */
- name = normalize_key(hash->liens[lpos]->adr, hash->liens[lpos]->fil,
- HASH_STRUCT_ADR_PATH, normalized, normfil_, catbuff);
- inthash_write(hash->adrfil, name, lpos);
+ inthash_write(hash->adrfil, (char*) hash->liens[lpos], lpos);
/* third entry: URL address and path before redirect */
if (hash->liens[lpos]->former_adr) { // former_adr existe?
- name = normalize_key(hash->liens[lpos]->former_adr,
- hash->liens[lpos]->former_fil, HASH_STRUCT_ORIGINAL_ADR_PATH,
- normalized, normfil_, catbuff);
- inthash_write(hash->former_adrfil, name, lpos);
+ inthash_write(hash->former_adrfil, (char*) hash->liens[lpos], lpos);
}
}