summaryrefslogtreecommitdiff
path: root/src/htshash.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/htshash.c')
-rw-r--r--src/htshash.c263
1 files changed, 63 insertions, 200 deletions
diff --git a/src/htshash.c b/src/htshash.c
index b02f2ba..3cbdb5f 100644
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -39,12 +39,21 @@ Please visit our Website: http://www.httrack.com
/* specific definitions */
#include "htsbase.h"
+#include "htsglobal.h"
#include "htsmd5.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* END specific definitions */
+/* Specific macros */
+#ifndef malloct
+#define malloct malloc
+#define freet free
+#define calloct calloc
+#define strcpybuff strcpy
+#endif
+
// GESTION DES TABLES DE HACHAGE
// Méthode à 2 clés (adr+fil), 2e cle facultative
// hash[no_enregistrement][pos]->hash est un index dans le tableau général liens
@@ -53,7 +62,10 @@ Please visit our Website: http://www.httrack.com
#if HTS_HASH
// recherche dans la table selon nom1,nom2 et le no d'enregistrement
// retour: position ou -1 si non trouvé
-int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
+int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) {
+ char normfil_[HTS_URLMAXSIZE*2];
+ char* normfil;
+ char* normadr;
unsigned int cle;
int pos;
// calculer la clé de recherche, non modulée
@@ -64,7 +76,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
// la position se calcule en modulant
pos = (int) (cle%HTS_HASH_SIZE);
// entrée trouvée?
- if (hash->hash[type][pos] >= 0) { // un enregistrement avec une telle clé existe..
+ if (hash->hash[type][pos] >= 0) { // un ou plusieurs enregistrement(s) avec une telle clé existe..
// tester table de raccourcis (hash)
// pos est maintenant la position recherchée dans liens
pos = hash->hash[type][pos];
@@ -79,20 +91,42 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
}
break;
case 1: // adr+fil
- if ((strcmp(nom1,jump_identification(hash->liens[pos]->adr))==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) {
+ {
+ if (!normalized)
+ normfil=hash->liens[pos]->fil;
+ else
+ normfil=fil_normalized(hash->liens[pos]->fil,normfil_);
+ if (!normalized)
+ normadr = jump_identification(hash->liens[pos]->adr);
+ else
+ normadr = jump_normalized(hash->liens[pos]->adr);
+ if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) {
#if DEBUG_HASH==2
- printf("hash: found shortcut at %d\n",pos);
+ printf("hash: found shortcut at %d\n",pos);
#endif
- return pos;
+ return pos;
+ }
}
break;
case 2: // former_adr+former_fil
- if (hash->liens[pos]->former_adr)
- if ((strcmp(nom1,jump_identification(hash->liens[pos]->former_adr))==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) {
+ {
+ if (hash->liens[pos]->former_adr) {
+ if (!normalized)
+ normfil=hash->liens[pos]->former_fil;
+ else
+ normfil=fil_normalized(hash->liens[pos]->former_fil,normfil_);
+ if (!normalized)
+ normadr = jump_identification(hash->liens[pos]->former_adr);
+ else
+ normadr = jump_normalized(hash->liens[pos]->former_adr);
+
+ if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) {
#if DEBUG_HASH==2
- printf("hash: found shortcut at %d\n",pos);
+ printf("hash: found shortcut at %d\n",pos);
#endif
- return pos;
+ return pos;
+ }
+ }
}
break;
}
@@ -164,7 +198,9 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) {
}
// enregistrement lien lpos dans les 3 tables hash1..3
-void hash_write(hash_struct* hash,int lpos) {
+void hash_write(hash_struct* hash,int lpos,int normalized) {
+ char normfil_[HTS_URLMAXSIZE*2];
+ char* normfil;
unsigned int cle;
int pos;
int* ptr;
@@ -185,7 +221,14 @@ void hash_write(hash_struct* hash,int lpos) {
printf("[%d",pos);
#endif
//
- cle = hash_cle(jump_identification(hash->liens[lpos]->adr),hash->liens[lpos]->fil);
+ if (!normalized)
+ normfil=hash->liens[lpos]->fil;
+ else
+ normfil=fil_normalized(hash->liens[lpos]->fil,normfil_);
+ if (!normalized)
+ cle = hash_cle(jump_identification(hash->liens[lpos]->adr),normfil);
+ else
+ cle = hash_cle(jump_normalized(hash->liens[lpos]->adr),normfil);
pos = (int) (cle%HTS_HASH_SIZE);
ptr = hash_calc_chaine(hash,1,pos); // calculer adresse chaine
*ptr = lpos; // noter dernier enregistré
@@ -194,7 +237,14 @@ void hash_write(hash_struct* hash,int lpos) {
#endif
//
if (hash->liens[lpos]->former_adr) { // former_adr existe?
- cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),hash->liens[lpos]->former_fil);
+ if (!normalized)
+ normfil=hash->liens[lpos]->former_fil;
+ else
+ normfil=fil_normalized(hash->liens[lpos]->former_fil,normfil_);
+ if (!normalized)
+ cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),normfil);
+ else
+ cle = hash_cle(jump_normalized(hash->liens[lpos]->former_adr),normfil);
pos = (int) (cle%HTS_HASH_SIZE);
ptr = hash_calc_chaine(hash,2,pos); // calculer adresse chaine
*ptr = lpos; // noter dernier enregistré
@@ -209,6 +259,7 @@ void hash_write(hash_struct* hash,int lpos) {
#if DEBUT_HASH
else {
printf("* hash_write=0!!\n");
+ abortLogFmt("unexpected error in hash_write (pos=%d)" _ pos);
exit(1);
}
#endif
@@ -263,191 +314,3 @@ int* hash_calc_chaine(hash_struct* hash,int type,int pos) {
#endif
// FIN GESTION DES TABLES DE HACHAGE
-
-
-
-
-
-
-
-
-
-
-
-// inthash -- simple hash table, using a key (char[]) and a value (ulong int)
-
-unsigned long int inthash_key(char* value) {
- return md5sum32(value);
-}
-
-// Check for duplicate entry (==1 : added)
-int inthash_write(inthash hashtable,char* name,long int value) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain* h=hashtable->hash[pos];
- while (h) {
- if (strcmp(h->name,name)==0) {
- h->value.intg=value;
- return 0;
- }
- h=h->next;
- }
- // Not found, add it!
- inthash_add(hashtable,name,value);
- return 1;
-}
-
-// Increment pos value, create one if necessary (=0)
-// (==1 : created)
-int inthash_inc(inthash hashtable,char* name) {
- long int value=0;
- int r=0;
- if (inthash_read(hashtable,name,&value)) {
- value++;
- }
- else { /* create new value */
- value=0;
- r=1;
- }
- inthash_write(hashtable,name,value);
- return (r);
-}
-
-
-// Does not check for duplicate entry
-void inthash_add(inthash hashtable,char* name,long int value) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain** h=&hashtable->hash[pos];
-
- while (*h)
- h=&((*h)->next);
- *h=(inthash_chain*)calloc(1,
- sizeof(inthash_chain)
- +
- strlen(name)+2
- );
- if (*h) {
- (*h)->name=((char*)(*h)) + sizeof(inthash_chain);
- (*h)->next=NULL;
- strcpy((*h)->name,name);
- (*h)->value.intg=value;
- }
-}
-
-void* inthash_addblk(inthash hashtable,char* name,int blksize) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain** h=&hashtable->hash[pos];
-
- while (*h)
- h=&((*h)->next);
- *h=(inthash_chain*)calloc(1,
- sizeof(inthash_chain)
- +
- strlen(name)+2
- +
- blksize
- );
- if (*h) {
- (*h)->name = ((char*)(*h)) + sizeof(inthash_chain);
- (*h)->next=NULL;
- strcpy((*h)->name,name);
- (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2;
- return (void*)(*h)->value.intg;
- }
- return NULL;
-}
-
-int inthash_read(inthash hashtable,char* name,long int* value) {
- int pos = (inthash_key(name) % hashtable->hash_size);
- inthash_chain* h=hashtable->hash[pos];
- while (h) {
- if (strcmp(h->name,name)==0) {
- *value=h->value.intg;
- return 1;
- }
- h=h->next;
- }
- return 0;
-}
-
-void inthash_init(inthash hashtable) {
- unsigned int i;
- for(i=0;i<hashtable->hash_size;i++) {
- hashtable->hash[i]=NULL;
- }
-}
-
-void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) {
- if (hash) {
- inthash_delchain(hash->next,free_handler);
- if (free_handler) { // pos is a malloc() block, delete it!
- if (hash->value.intg) {
- if (free_handler)
- free_handler((void*)hash->value.intg);
- else
- free((void*)hash->value.intg);
- }
- hash->value.intg=0;
- }
- free(hash);
- }
-}
-
-void inthash_default_free_handler(void* value) {
- if (value)
- free(value);
-}
-
-// --
-
-inthash inthash_new(int size) {
- inthash hashtable=(inthash)calloc(1,sizeof(struct_inthash));
- if (hashtable) {
- hashtable->hash_size=0;
- hashtable->flag_valueismalloc=0;
- if ((hashtable->hash=(inthash_chain**)calloc(size,sizeof(inthash_chain*)))) {
- hashtable->hash_size=size;
- inthash_init(hashtable);
- }
- }
- return hashtable;
-}
-
-int inthash_created(inthash hashtable) {
- if (hashtable)
- if (hashtable->hash)
- return 1;
- return 0;
-}
-
-void inthash_value_is_malloc(inthash hashtable,int flag) {
- hashtable->flag_valueismalloc=flag;
-}
-
-void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) {
- hashtable->free_handler = free_handler;
-}
-
-void inthash_delete(inthash* hashtable) {
- if (hashtable) {
- if (*hashtable) {
- if ((*hashtable)->hash) {
- unsigned int i;
- t_inthash_freehandler free_handler=NULL;
- if ( (*hashtable)->flag_valueismalloc ) {
- if ( (*hashtable)->free_handler )
- free_handler=(*hashtable)->free_handler;
- else
- free_handler=inthash_default_free_handler;
- }
- for(i=0;i<(*hashtable)->hash_size;i++) {
- inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler);
- (*hashtable)->hash[i]=NULL;
- }
- }
- free(*hashtable);
- *hashtable=NULL;
- }
- }
-}
-
-