diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:51:31 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2012-03-19 12:51:31 +0000 |
commit | 25adbdabb47499fe641c7bd9595024ff82667058 (patch) | |
tree | 4200bb5e746bc1c0606de615ec99f0a247d4d9ba /src/htshash.c | |
parent | ad5b7acc19290ff91e0f42a0de448a26760fcf99 (diff) |
httrack 3.30.1
Diffstat (limited to 'src/htshash.c')
-rw-r--r-- | src/htshash.c | 263 |
1 files changed, 63 insertions, 200 deletions
diff --git a/src/htshash.c b/src/htshash.c index b02f2ba..3cbdb5f 100644 --- a/src/htshash.c +++ b/src/htshash.c @@ -39,12 +39,21 @@ Please visit our Website: http://www.httrack.com /* specific definitions */ #include "htsbase.h" +#include "htsglobal.h" #include "htsmd5.h" #include <stdio.h> #include <stdlib.h> #include <string.h> /* END specific definitions */ +/* Specific macros */ +#ifndef malloct +#define malloct malloc +#define freet free +#define calloct calloc +#define strcpybuff strcpy +#endif + // GESTION DES TABLES DE HACHAGE // Méthode à 2 clés (adr+fil), 2e cle facultative // hash[no_enregistrement][pos]->hash est un index dans le tableau général liens @@ -53,7 +62,10 @@ Please visit our Website: http://www.httrack.com #if HTS_HASH // recherche dans la table selon nom1,nom2 et le no d'enregistrement // retour: position ou -1 si non trouvé -int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { +int hash_read(hash_struct* hash,char* nom1,char* nom2,int type,int normalized) { + char normfil_[HTS_URLMAXSIZE*2]; + char* normfil; + char* normadr; unsigned int cle; int pos; // calculer la clé de recherche, non modulée @@ -64,7 +76,7 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { // la position se calcule en modulant pos = (int) (cle%HTS_HASH_SIZE); // entrée trouvée? - if (hash->hash[type][pos] >= 0) { // un enregistrement avec une telle clé existe.. + if (hash->hash[type][pos] >= 0) { // un ou plusieurs enregistrement(s) avec une telle clé existe.. // tester table de raccourcis (hash) // pos est maintenant la position recherchée dans liens pos = hash->hash[type][pos]; @@ -79,20 +91,42 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { } break; case 1: // adr+fil - if ((strcmp(nom1,jump_identification(hash->liens[pos]->adr))==0) && (strcmp(nom2,hash->liens[pos]->fil)==0)) { + { + if (!normalized) + normfil=hash->liens[pos]->fil; + else + normfil=fil_normalized(hash->liens[pos]->fil,normfil_); + if (!normalized) + normadr = jump_identification(hash->liens[pos]->adr); + else + normadr = jump_normalized(hash->liens[pos]->adr); + if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) { #if DEBUG_HASH==2 - printf("hash: found shortcut at %d\n",pos); + printf("hash: found shortcut at %d\n",pos); #endif - return pos; + return pos; + } } break; case 2: // former_adr+former_fil - if (hash->liens[pos]->former_adr) - if ((strcmp(nom1,jump_identification(hash->liens[pos]->former_adr))==0) && (strcmp(nom2,hash->liens[pos]->former_fil)==0)) { + { + if (hash->liens[pos]->former_adr) { + if (!normalized) + normfil=hash->liens[pos]->former_fil; + else + normfil=fil_normalized(hash->liens[pos]->former_fil,normfil_); + if (!normalized) + normadr = jump_identification(hash->liens[pos]->former_adr); + else + normadr = jump_normalized(hash->liens[pos]->former_adr); + + if ((strfield2(nom1,normadr)!=0) && (strcmp(nom2,normfil)==0)) { #if DEBUG_HASH==2 - printf("hash: found shortcut at %d\n",pos); + printf("hash: found shortcut at %d\n",pos); #endif - return pos; + return pos; + } + } } break; } @@ -164,7 +198,9 @@ int hash_read(hash_struct* hash,char* nom1,char* nom2,int type) { } // enregistrement lien lpos dans les 3 tables hash1..3 -void hash_write(hash_struct* hash,int lpos) { +void hash_write(hash_struct* hash,int lpos,int normalized) { + char normfil_[HTS_URLMAXSIZE*2]; + char* normfil; unsigned int cle; int pos; int* ptr; @@ -185,7 +221,14 @@ void hash_write(hash_struct* hash,int lpos) { printf("[%d",pos); #endif // - cle = hash_cle(jump_identification(hash->liens[lpos]->adr),hash->liens[lpos]->fil); + if (!normalized) + normfil=hash->liens[lpos]->fil; + else + normfil=fil_normalized(hash->liens[lpos]->fil,normfil_); + if (!normalized) + cle = hash_cle(jump_identification(hash->liens[lpos]->adr),normfil); + else + cle = hash_cle(jump_normalized(hash->liens[lpos]->adr),normfil); pos = (int) (cle%HTS_HASH_SIZE); ptr = hash_calc_chaine(hash,1,pos); // calculer adresse chaine *ptr = lpos; // noter dernier enregistré @@ -194,7 +237,14 @@ void hash_write(hash_struct* hash,int lpos) { #endif // if (hash->liens[lpos]->former_adr) { // former_adr existe? - cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),hash->liens[lpos]->former_fil); + if (!normalized) + normfil=hash->liens[lpos]->former_fil; + else + normfil=fil_normalized(hash->liens[lpos]->former_fil,normfil_); + if (!normalized) + cle = hash_cle(jump_identification(hash->liens[lpos]->former_adr),normfil); + else + cle = hash_cle(jump_normalized(hash->liens[lpos]->former_adr),normfil); pos = (int) (cle%HTS_HASH_SIZE); ptr = hash_calc_chaine(hash,2,pos); // calculer adresse chaine *ptr = lpos; // noter dernier enregistré @@ -209,6 +259,7 @@ void hash_write(hash_struct* hash,int lpos) { #if DEBUT_HASH else { printf("* hash_write=0!!\n"); + abortLogFmt("unexpected error in hash_write (pos=%d)" _ pos); exit(1); } #endif @@ -263,191 +314,3 @@ int* hash_calc_chaine(hash_struct* hash,int type,int pos) { #endif // FIN GESTION DES TABLES DE HACHAGE - - - - - - - - - - - -// inthash -- simple hash table, using a key (char[]) and a value (ulong int) - -unsigned long int inthash_key(char* value) { - return md5sum32(value); -} - -// Check for duplicate entry (==1 : added) -int inthash_write(inthash hashtable,char* name,long int value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain* h=hashtable->hash[pos]; - while (h) { - if (strcmp(h->name,name)==0) { - h->value.intg=value; - return 0; - } - h=h->next; - } - // Not found, add it! - inthash_add(hashtable,name,value); - return 1; -} - -// Increment pos value, create one if necessary (=0) -// (==1 : created) -int inthash_inc(inthash hashtable,char* name) { - long int value=0; - int r=0; - if (inthash_read(hashtable,name,&value)) { - value++; - } - else { /* create new value */ - value=0; - r=1; - } - inthash_write(hashtable,name,value); - return (r); -} - - -// Does not check for duplicate entry -void inthash_add(inthash hashtable,char* name,long int value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain** h=&hashtable->hash[pos]; - - while (*h) - h=&((*h)->next); - *h=(inthash_chain*)calloc(1, - sizeof(inthash_chain) - + - strlen(name)+2 - ); - if (*h) { - (*h)->name=((char*)(*h)) + sizeof(inthash_chain); - (*h)->next=NULL; - strcpy((*h)->name,name); - (*h)->value.intg=value; - } -} - -void* inthash_addblk(inthash hashtable,char* name,int blksize) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain** h=&hashtable->hash[pos]; - - while (*h) - h=&((*h)->next); - *h=(inthash_chain*)calloc(1, - sizeof(inthash_chain) - + - strlen(name)+2 - + - blksize - ); - if (*h) { - (*h)->name = ((char*)(*h)) + sizeof(inthash_chain); - (*h)->next=NULL; - strcpy((*h)->name,name); - (*h)->value.intg = (unsigned long) (char*) ((char*)(*h)) + sizeof(inthash_chain) + strlen(name) + 2; - return (void*)(*h)->value.intg; - } - return NULL; -} - -int inthash_read(inthash hashtable,char* name,long int* value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain* h=hashtable->hash[pos]; - while (h) { - if (strcmp(h->name,name)==0) { - *value=h->value.intg; - return 1; - } - h=h->next; - } - return 0; -} - -void inthash_init(inthash hashtable) { - unsigned int i; - for(i=0;i<hashtable->hash_size;i++) { - hashtable->hash[i]=NULL; - } -} - -void inthash_delchain(inthash_chain* hash,t_inthash_freehandler free_handler) { - if (hash) { - inthash_delchain(hash->next,free_handler); - if (free_handler) { // pos is a malloc() block, delete it! - if (hash->value.intg) { - if (free_handler) - free_handler((void*)hash->value.intg); - else - free((void*)hash->value.intg); - } - hash->value.intg=0; - } - free(hash); - } -} - -void inthash_default_free_handler(void* value) { - if (value) - free(value); -} - -// -- - -inthash inthash_new(int size) { - inthash hashtable=(inthash)calloc(1,sizeof(struct_inthash)); - if (hashtable) { - hashtable->hash_size=0; - hashtable->flag_valueismalloc=0; - if ((hashtable->hash=(inthash_chain**)calloc(size,sizeof(inthash_chain*)))) { - hashtable->hash_size=size; - inthash_init(hashtable); - } - } - return hashtable; -} - -int inthash_created(inthash hashtable) { - if (hashtable) - if (hashtable->hash) - return 1; - return 0; -} - -void inthash_value_is_malloc(inthash hashtable,int flag) { - hashtable->flag_valueismalloc=flag; -} - -void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler) { - hashtable->free_handler = free_handler; -} - -void inthash_delete(inthash* hashtable) { - if (hashtable) { - if (*hashtable) { - if ((*hashtable)->hash) { - unsigned int i; - t_inthash_freehandler free_handler=NULL; - if ( (*hashtable)->flag_valueismalloc ) { - if ( (*hashtable)->free_handler ) - free_handler=(*hashtable)->free_handler; - else - free_handler=inthash_default_free_handler; - } - for(i=0;i<(*hashtable)->hash_size;i++) { - inthash_delchain((*hashtable)->hash[i],(*hashtable)->free_handler); - (*hashtable)->hash[i]=NULL; - } - } - free(*hashtable); - *hashtable=NULL; - } - } -} - - |