diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-06-23 18:36:38 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-06-23 18:36:38 +0000 |
commit | b2a632795b24a88a3aa00dc7f7f4ef12315be8b2 (patch) | |
tree | 2f3cb78f5210b6b469adfcacbfcb95855cb831c5 /src | |
parent | 43524982270ef4097cb9fa30db93dd5002566838 (diff) |
Rewritten hashtables
* cuckoo hashing with stash, auto-resize
* pool of strings for keys (auto-resize, auto-compact)
- no auto-shrink for hashtable currently
Still experimental, expect bugs!
Diffstat (limited to 'src')
-rw-r--r-- | src/htsinthash.c | 759 | ||||
-rw-r--r-- | src/htsinthash.h | 38 |
2 files changed, 574 insertions, 223 deletions
diff --git a/src/htsinthash.c b/src/htsinthash.c index 426f815..65a1c49 100644 --- a/src/htsinthash.c +++ b/src/htsinthash.c @@ -28,46 +28,211 @@ Please visit our Website: http://www.httrack.com /* ------------------------------------------------------------ */ /* File: httrack.c subroutines: */ -/* hash table system (fast index) */ +/* hash tables */ /* Author: Xavier Roche */ /* ------------------------------------------------------------ */ +/** + * Library notes: + * This small hashtable library provides key/value hashtable, with a string + * key, and integer/pointer value (with an associated optional allocator) + * It features O(1) average insertion, O(1) lookup, and O(1) delete. + * + * Implementation notes: + * Implementation is auto-rehashable, and uses cuckoo hashing of size 2**n + * with a LCG hash function, with one additional auxiliary hash function. + * It also uses a small stash area to handle rare cases of collisions. + * Enumeration of all key/values is possible, deletion is also possible, but + * currently without any auto-shrinking (ie. table will never shrink). + * Overall, two main blocks are allocated: one for the items, and one for + * the keys (pool). + * + * References: + * Cuckoo Hashing http://en.wikipedia.org/wiki/Cuckoo_hashing + * LCG http://en.wikipedia.org/wiki/Linear_congruential_generator + * Cuckoo Stash http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf + **/ + /* Internal engine bytecode */ #define HTS_INTERNAL_BYTECODE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + #include "htsinthash.h" -/* specific definitions */ +#ifdef LIBHTTRACK_EXPORTS #include "htsbase.h" -#include "htsmd5.h" -/* END specific definitions */ - -/* Specific macros */ -#ifdef NO_MALLOCT -#undef malloct -#undef freet -#undef calloct -#undef strcpybuff -#endif -#ifndef malloct -#define malloct malloc -#define freet free -#define calloct calloc -#define strcpybuff strcpy +#undef assert +#define assert assertf #endif -// static functions +/* Key (32-bit) */ +typedef unsigned int inthash_key; + +/* Numerical Recipes function. */ +#define HASH_PRIME ( 1664525 ) +#define HASH_CONST ( 1013904223 ) +#define HASH_ADD(HASH, C) do { \ + (HASH) *= HASH_PRIME; \ + (HASH) += HASH_CONST; \ + (HASH) += (C); \ + } while(0) + +/* 2**X */ +#define POW2(X) ( (size_t) 1 << (X) ) + +/* pair of hashes */ +typedef struct inthash_keys { + inthash_key hash1; + inthash_key hash2; +} inthash_keys; + +static inthash_keys inthash_calc_hashes(const char *value) { + /* compute two lcg hashes using different seeds */ + inthash_keys hash = { 0, (inthash_key) -1 }; + size_t i; + for(i = 0 ; value[i] != '\0' ; i++) { + const unsigned char c1 = value[i]; + const unsigned char c2 = 0xff - c1; + HASH_ADD(hash.hash1, c1); + HASH_ADD(hash.hash2, c2); + } + /* yes, it may happen */ + if (hash.hash1 == hash.hash2) { + hash.hash2 = ~hash.hash2; + } + return hash; +} + +static int inthash_is_free(const inthash hashtable, size_t pos) { + return hashtable->items[pos].name == NULL; +} + +static int inthash_matches(const inthash hashtable, + size_t pos, const char *name) { + return hashtable->items[pos].name != NULL + && strcmp(hashtable->items[pos].name, name) == 0; +} -static void inthash_delchain(inthash_chain * hash, - t_inthash_freehandler free_handler); -static void inthash_default_free_handler(void *value); -static unsigned long int inthash_key(const char *value); -static void inthash_init(inthash hashtable); +/* realloc (expand) string pool ; does not change the compacity */ +static void inthash_realloc_pool(inthash hashtable) { + const size_t hash_size = POW2(hashtable->hash_size_power); + char *const oldbase = hashtable->string_pool; + size_t i; + hashtable->string_pool = realloc(hashtable->string_pool, + hashtable->string_pool_capacity); + if (hashtable->string_pool == NULL) { + fprintf(stderr, + "** hashtable string pool allocation error: could not allocate %u bytes", + (int) hashtable->string_pool_capacity); + assert(! "hashtable string pool allocation error"); + } -// inthash -- simple hash table, using a key (char[]) and a value (ulong int) + /* recompute string address */ +#define RECOMPUTE_STRING(S) do { \ + if (S != NULL) { \ + const size_t offset = (S) - oldbase; \ + hashtable->items[i].name = &hashtable->string_pool[offset]; \ + } \ +} while(0) + + /* recompute */ + for(i = 0 ; i < hash_size ; i++) { + RECOMPUTE_STRING(hashtable->items[i].name); + } + for(i = 0 ; i < hashtable->stash_size ; i++) { + RECOMPUTE_STRING(hashtable->stash[i].name); + } -static unsigned long int inthash_key(const char *value) { - return md5sum32(value); +#undef RECOMPUTE_STRING +} + +/* compact string pool ; does not change the capacity */ +static void inthash_compact_pool(inthash hashtable) { + const size_t hash_size = POW2(hashtable->hash_size_power); + size_t i; + char* old_pool = hashtable->string_pool; + + hashtable->string_pool = malloc(hashtable->string_pool_capacity); + hashtable->string_pool_size = 0; + hashtable->string_pool_chars = 0; + if (hashtable->string_pool == NULL) { + fprintf(stderr, + "** hashtable string pool compaction error: could not allocate %u bytes", + (int) hashtable->string_pool_capacity); + assert(! "hashtable string pool compaction error"); + } + + /* relocate a string on a different pool */ +#define RELOCATE_STRING(S) do { \ + if (S != NULL) { \ + const size_t len = strlen(S) + 1; \ + assert(hashtable->string_pool_size + len \ + <= hashtable->string_pool_capacity); \ + memcpy(&hashtable->string_pool[hashtable->string_pool_size], \ + S, len); \ + S = &hashtable->string_pool[hashtable->string_pool_size]; \ + hashtable->string_pool_size += len; \ + } \ +} while(0) + + /* relocate */ + for(i = 0 ; i < hash_size ; i++) { + RELOCATE_STRING(hashtable->items[i].name); + } + for(i = 0 ; i < hashtable->stash_size ; i++) { + RELOCATE_STRING(hashtable->stash[i].name); + } + +#undef RELOCATE_STRING + + /* compacted (used chars == current size) */ + hashtable->string_pool_chars = hashtable->string_pool_size; + + /* wipe previous pool */ + free(old_pool); +} + +static char* inthash_dup_name(inthash hashtable, const char *name) { + const size_t len = strlen(name) + 1; + char *s; + + if (hashtable->string_pool_capacity - hashtable->string_pool_size < len) { + if (hashtable->string_pool_capacity == 0) { + hashtable->string_pool_capacity = 16; + } + for( ; hashtable->string_pool_capacity - hashtable->string_pool_size < len + ; hashtable->string_pool_capacity <<= 1) ; + inthash_realloc_pool(hashtable); + } + + s = &hashtable->string_pool[hashtable->string_pool_size]; + memcpy(s, name, len); + hashtable->string_pool_size += len; + hashtable->string_pool_chars += len; + + return s; +} + +static void inthash_free_name(inthash hashtable, char *name) { + const size_t len = strlen(name) + 1; + hashtable->string_pool_chars -= len; + if (hashtable->string_pool_chars < hashtable->string_pool_size / 2) { + inthash_compact_pool(hashtable); + } +} + +static size_t inthash_hash_to_pos_(size_t hash_size_power, inthash_key hash) { + const inthash_key mask = POW2(hash_size_power) - 1; + /* TODO: investigate if lower bits of LCG are sane */ + return hash & mask; +} + +static size_t inthash_hash_to_pos(const inthash hashtable, inthash_key hash) { + return inthash_hash_to_pos_(hashtable->hash_size_power, hash); } int inthash_read_pvoid(inthash hashtable, const char *name, void **pvalue) { @@ -90,10 +255,9 @@ void inthash_add_pvoid(inthash hashtable, const char *name, void *pvalue) { inthash_value value = INTHASH_VALUE_NULL; value.ptr = pvalue; - inthash_add_value(hashtable, name, value); + inthash_write_value(hashtable, name, value); } -// Check for duplicate entry (==1 : added) int inthash_write(inthash hashtable, const char *name, intptr_t intvalue) { inthash_value value = INTHASH_VALUE_NULL; @@ -101,33 +265,239 @@ int inthash_write(inthash hashtable, const char *name, intptr_t intvalue) { return inthash_write_value(hashtable, name, value); } +static void inthash_default_free_handler(void *value) { + if (value) + free(value); +} + +static void inthash_del_value_(inthash hashtable, inthash_value *pvalue) { + if (hashtable->flag_valueismalloc) { + if (hashtable->free_handler) + hashtable->free_handler(pvalue->ptr); + else + inthash_default_free_handler(pvalue->ptr); + } + pvalue->ptr = NULL; +} + +static void inthash_del_value(inthash hashtable, size_t pos) { + inthash_del_value_(hashtable, &hashtable->items[pos].value); +} + +static void inthash_del_name_(inthash hashtable, char **pname) { + char *const name = *pname; + *pname = NULL; + /* free after detach (we may compact the pool) */ + inthash_free_name(hashtable, name); +} + +static void inthash_del_name(inthash hashtable, size_t pos) { + inthash_del_name_(hashtable, &hashtable->items[pos].name); +} + +static void inthash_del_item(inthash hashtable, inthash_item *pitem) { + inthash_del_value_(hashtable, &pitem->value); + inthash_del_name_(hashtable, &pitem->name); +} + +static int inthash_write_value_(inthash hashtable, const char *name, + inthash_value value) { + inthash_item item; + size_t pos; + inthash_keys hashes = inthash_calc_hashes(name); + inthash_key cuckoo_hash, initial_cuckoo_hash; + size_t loops; + + /* replace at position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash1); + if (inthash_matches(hashtable, pos, name)) { + inthash_del_value(hashtable, pos); + hashtable->items[pos].value = value; + return 0; /* replaced */ + } + + /* replace at position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash2); + if (inthash_matches(hashtable, pos, name)) { + inthash_del_value(hashtable, pos); + hashtable->items[pos].value = value; + return 0; /* replaced */ + } + + /* replace in the stash ? */ + if (hashtable->stash_size != 0) { + size_t i; + for(i = 0 ; i < hashtable->stash_size ; i++) { + if (strcmp(hashtable->stash[i].name, name) == 0) { + inthash_del_value_(hashtable, &hashtable->stash[i].value); + hashtable->stash[i].value = value; + return 0; /* replaced */ + } + } + } + + /* otherwise we need to create a new item */ + item.name = inthash_dup_name(hashtable, name); + item.value = value; + + /* place at free position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash1); + if (inthash_is_free(hashtable, pos)) { + hashtable->items[pos] = item; + return 1; /* added */ + } else { + /* place at free position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash2); + if (inthash_is_free(hashtable, pos)) { + hashtable->items[pos] = item; + return 1; /* added */ + } + /* prepare cuckoo ; let's take position 1 */ + else { + cuckoo_hash = initial_cuckoo_hash = hashes.hash1; + } + } + + /* put 'item' in place with hash 'cuckoo_hash' */ + for(loops = POW2(hashtable->hash_size_power) ; loops != 0 ; --loops) { + const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash); + + /* place at alternate free position ? */ + if (inthash_is_free(hashtable, pos)) { + hashtable->items[pos] = item; + return 1; /* added */ + } + /* then cuckoo's place it is */ + else { + const inthash_item backup_item = hashtable->items[pos]; + hashtable->items[pos] = item; + /* take care of new lost item */ + item = backup_item; + hashes = inthash_calc_hashes(item.name); + /* we just kicked this item from its position 1 */ + if (pos == inthash_hash_to_pos(hashtable, hashes.hash1)) { + /* then place it on position 2 on next run */ + cuckoo_hash = hashes.hash2; + } + /* we just kicked this item from its position 2 */ + else if (pos == inthash_hash_to_pos(hashtable, hashes.hash2)) { + /* then place it on position 1 on next run */ + cuckoo_hash = hashes.hash1; + + /* we are looping */ + if (cuckoo_hash == initial_cuckoo_hash) { + /* emergency stash */ + break; + } + } + else { + assert(! "hashtable internal error: unexpected position"); + } + } + } + + /* emergency stashing */ + if (hashtable->stash_size < STASH_SIZE) { + hashtable->stash[hashtable->stash_size] = item; + hashtable->stash_size++; + return 1; /* added */ + } else { + /* we are doomed. hopefully the probability is lower than being killed + by a wandering radioactive monkey */ + assert(! "hashtable internal error: cuckoo/stash collision"); + /* not reachable code */ + return -1; + } +} + int inthash_write_value(inthash hashtable, const char *name, inthash_value value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain *h = hashtable->hash[pos]; - - while(h) { - if (strcmp(h->name, name) == 0) { - /* Delete element */ - if (hashtable->flag_valueismalloc) { - void *ptr = h->value.ptr; - - if (ptr != NULL) { - if (hashtable->free_handler) - hashtable->free_handler(ptr); - else - freet(ptr); + /* replace of add item */ + const int ret = inthash_write_value_(hashtable, name, value); + if (ret) { + /* size of half of the table */ + const size_t half_size = POW2(hashtable->hash_size_power - 1); + + /* item was added: increase count */ + hashtable->nitems++; + + /* table is more than half-full */ + if (hashtable->nitems >= half_size) { + size_t i; + char *data; + + /* size before */ + const size_t prev_power = hashtable->hash_size_power; + const size_t prev_size = half_size * 2; + const size_t prev_alloc_size = prev_size*sizeof(inthash_item); + + /* size after doubling it */ + const size_t new_size = prev_size * 2; + const size_t alloc_size = prev_alloc_size * 2; + + /* realloc */ + hashtable->hash_size_power++; + hashtable->items = + (inthash_item *) realloc(hashtable->items, alloc_size); + if (hashtable->items == NULL) { + fprintf(stderr, + "** hashtable allocation error: could not allocate %u bytes", + (int) alloc_size); + assert(! "hashtable allocation error"); + } + + /* clear upper half */ + data = (char*) hashtable->items; + memset(data + prev_size, 0, prev_size); + + /* relocate lower half items when needed */ + for(i = 0 ; i < prev_size ; i++) { + const inthash_keys hashes = inthash_calc_hashes(hashtable->items[i].name); + size_t pos; + /* currently at old position 1 */ + pos = inthash_hash_to_pos_(prev_power, hashes.hash1); + if (pos == i) { + const size_t pos = inthash_hash_to_pos(hashtable, hashes.hash1); + /* no more the expected position */ + if (pos != i) { + hashtable->items[pos] = hashtable->items[i]; + memset(&hashtable->items[i], 0, sizeof(hashtable->items[i])); + } + } + else if (inthash_hash_to_pos_(prev_power, hashes.hash2) == i) { + const size_t pos = inthash_hash_to_pos(hashtable, hashes.hash2); + /* no more the expected position */ + if (pos != i) { + hashtable->items[pos] = hashtable->items[i]; + memset(&hashtable->items[i], 0, sizeof(hashtable->items[i])); + } + } + else { + assert(! "hashtable unexpected internal error"); + } + } + + /* attempt to merge the stash if present */ + if (hashtable->stash_size != 0) { + size_t i; + /* backup stash and reset it */ + inthash_item stash[STASH_SIZE]; + memcpy(&stash, hashtable->stash, sizeof(hashtable->stash)); + hashtable->stash_size = 0; + /* insert all items */ + for(i = 0 ; i < hashtable->stash_size ; i++) { + const int ret = inthash_write_value_(hashtable, stash[i].name, stash[i].value); + if (ret == 0) { + assert(! "hashtable duplicate key when merging the stash"); + } + inthash_free_name(hashtable, stash[i].name); } } - /* Insert */ - h->value = value; - return 0; + } - h = h->next; } - // Not found, add it! - inthash_add_value(hashtable, name, value); - return 1; + + return ret; } // Increment pos value, create one if necessary (=0) @@ -152,44 +522,7 @@ void inthash_add(inthash hashtable, const char *name, intptr_t intvalue) { memset(&value, 0, sizeof(value)); value.intg = intvalue; - inthash_add_value(hashtable, name, value); -} - -void inthash_add_value(inthash hashtable, const char *name, inthash_value value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain **h = &hashtable->hash[pos]; - - while(*h) - h = &((*h)->next); - *h = (inthash_chain *) calloct(1, sizeof(inthash_chain) - + strlen(name) + 2); - if (*h) { - (*h)->name = ((char *) (*h)) + sizeof(inthash_chain); - (*h)->next = NULL; - strcpybuff((*h)->name, name); - (*h)->value = value; - hashtable->nitems++; - } -} - -void *inthash_addblk(inthash hashtable, const char *name, int blksize) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain **h = &hashtable->hash[pos]; - - while(*h) - h = &((*h)->next); - *h = (inthash_chain *) calloct(1, sizeof(inthash_chain) - + strlen(name) + 2 + blksize); - if (*h) { - (*h)->name = ((char *) (*h)) + sizeof(inthash_chain); - (*h)->next = NULL; - strcpybuff((*h)->name, name); - (*h)->value.ptr = - (void *) (((char *) (*h)) + sizeof(inthash_chain) + strlen(name) + 2); - hashtable->nitems++; - return (*h)->value.ptr; - } - return NULL; + inthash_write_value(hashtable, name, value); } int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue) { @@ -203,17 +536,38 @@ int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue) { int inthash_read_value(inthash hashtable, const char *name, inthash_value * value) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain *h = hashtable->hash[pos]; - - while(h) { - if (strcmp(h->name, name) == 0) { - if (value != NULL) - *value = h->value; - return 1; + const inthash_keys hashes = inthash_calc_hashes(name); + size_t pos; + + /* found at position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash1); + if (inthash_matches(hashtable, pos, name)) { + if (value != NULL) + *value = hashtable->items[pos].value; + return 1; + } + + /* found at position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash2); + if (inthash_matches(hashtable, pos, name)) { + if (value != NULL) + *value = hashtable->items[pos].value; + return 1; + } + + /* find in stash ? */ + if (hashtable->stash_size != 0) { + size_t i; + for(i = 0 ; i < hashtable->stash_size ; i++) { + if (strcmp(hashtable->stash[i].name, name) == 0) { + if (value != NULL) + *value = hashtable->stash[i].value; + return 1; + } } - h = h->next; } + + /* not found */ return 0; } @@ -221,43 +575,47 @@ int inthash_exists(inthash hashtable, const char *name) { return inthash_read_value(hashtable, name, NULL); } -int inthash_remove(inthash hashtable, const char *name) { - int pos = (inthash_key(name) % hashtable->hash_size); - inthash_chain **h = &hashtable->hash[pos]; - t_inthash_freehandler free_handler = NULL; +static int inthash_remove_(inthash hashtable, const char *name) { + const inthash_keys hashes = inthash_calc_hashes(name); + size_t pos; - if (hashtable->flag_valueismalloc) { - if (hashtable->free_handler) - free_handler = hashtable->free_handler; - else - free_handler = inthash_default_free_handler; + /* found at position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash1); + if (inthash_matches(hashtable, pos, name)) { + inthash_del_item(hashtable, &hashtable->items[pos]); + return 1; } - while(*h) { - if (strcmp((*h)->name, name) == 0) { - inthash_chain *next; - - if (free_handler) { - if ((*h)->value.ptr) { - void *ptr = (*h)->value.ptr; - - if (free_handler) - free_handler(ptr); - else - freet(ptr); - (*h)->value.ptr = 0; + + /* found at position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash2); + if (inthash_matches(hashtable, pos, name)) { + inthash_del_item(hashtable, &hashtable->items[pos]); + return 1; + } + + /* find in stash ? */ + if (hashtable->stash_size != 0) { + size_t i; + for(i = 0 ; i < hashtable->stash_size ; i++) { + if (strcmp(hashtable->stash[i].name, name) == 0) { + inthash_del_item(hashtable, &hashtable->stash[i]); + for( ; i + 1 < hashtable->stash_size ; i++) { + hashtable->stash[i] = hashtable->stash[i + 1]; } + hashtable->stash_size--; + return 1; } - next = (*h)->next; - freet(*h); - *h = next; - hashtable->nitems--; - return 1; } - h = &((*h)->next); } + + /* not found */ return 0; } +int inthash_remove(inthash hashtable, const char *name) { + return inthash_remove_(hashtable, name); +} + int inthash_readptr(inthash hashtable, const char *name, intptr_t * value) { int ret; @@ -268,63 +626,29 @@ int inthash_readptr(inthash hashtable, const char *name, intptr_t * value) { return ret; } -static void inthash_init(inthash hashtable) { - unsigned int i; - - for(i = 0; i < hashtable->hash_size; i++) { - hashtable->hash[i] = NULL; - } -} - -static void inthash_delchain(inthash_chain * hash, - t_inthash_freehandler free_handler) { - while(hash != NULL) { - inthash_chain *next = hash->next; - - if (free_handler) { // pos is a malloc() block, delete it! - if (hash->value.ptr) { - void *ptr = hash->value.ptr; - - if (free_handler) - free_handler(ptr); - else - freet(ptr); - hash->value.ptr = 0; - } - } - freet(hash); - hash = next; - } -} - -static void inthash_default_free_handler(void *value) { - if (value) - freet(value); -} - -// -- - -inthash inthash_new(int size) { - inthash hashtable = (inthash) calloct(1, sizeof(struct_inthash)); +inthash inthash_new(size_t initial_size) { + inthash hashtable = (inthash) calloc(1, sizeof(struct_inthash)); if (hashtable) { - hashtable->hash_size = 0; + size_t size; + for(size = 8 ; POW2(size) < initial_size ; size++) ; hashtable->flag_valueismalloc = 0; - if ((hashtable->hash = - (inthash_chain **) calloct(size, sizeof(inthash_chain *)))) { - hashtable->hash_size = size; - inthash_init(hashtable); + if ((hashtable->items = + (inthash_item *) calloc(POW2(size), sizeof(inthash_item)))) { + hashtable->hash_size_power = size; } hashtable->nitems = 0; + hashtable->stash_size = 0; + hashtable->string_pool = NULL; + hashtable->string_pool_size = 0; + hashtable->string_pool_capacity = 0; + hashtable->string_pool_chars = 0; } return hashtable; } int inthash_created(inthash hashtable) { - if (hashtable) - if (hashtable->hash) - return 1; - return 0; + return hashtable != NULL && hashtable->items != NULL; } void inthash_value_is_malloc(inthash hashtable, int flag) { @@ -336,34 +660,52 @@ void inthash_value_set_free_handler(inthash hashtable, hashtable->free_handler = free_handler; } -unsigned int inthash_nitems(inthash hashtable) { +size_t inthash_nitems(inthash hashtable) { if (hashtable != NULL) return hashtable->nitems; return 0; } -void inthash_delete(inthash * hashtable) { - if (hashtable) { - if (*hashtable) { - if ((*hashtable)->hash) { - unsigned int i; - t_inthash_freehandler free_handler = NULL; - - if ((*hashtable)->flag_valueismalloc) { - if ((*hashtable)->free_handler) - free_handler = (*hashtable)->free_handler; - else - free_handler = inthash_default_free_handler; +void inthash_delete(inthash *phashtable) { + if (phashtable != NULL) { + inthash hashtable = *phashtable; + if (hashtable != NULL) { + if (hashtable->items != NULL) { + /* we need to delete values */ + const size_t hash_size = POW2(hashtable->hash_size_power); + const t_inthash_freehandler free_handler = + hashtable->flag_valueismalloc ? + ( hashtable->free_handler != NULL + ? hashtable->free_handler : inthash_default_free_handler ) + : NULL; + size_t i; + /* wipe hashtable */ + for(i = 0 ; i < hash_size ; i++) { + if (!inthash_is_free(hashtable, i)) { + if (free_handler != NULL) { + free_handler(hashtable->items[i].value.ptr); + } + hashtable->items[i].value.ptr = NULL; + inthash_free_name(hashtable, hashtable->items[i].name); + hashtable->items[i].name = NULL; + } } - for(i = 0; i < (*hashtable)->hash_size; i++) { - inthash_delchain((*hashtable)->hash[i], free_handler); - (*hashtable)->hash[i] = NULL; + /* wipe auxiliary stash if any */ + for(i = 0 ; i < hashtable->stash_size ; i++) { + if (free_handler != NULL) { + free_handler(hashtable->stash[i].value.ptr); + } + hashtable->stash[i].value.ptr = NULL; + inthash_free_name(hashtable, hashtable->stash[i].name); + hashtable->stash[i].name = NULL; } - freet((*hashtable)->hash); - (*hashtable)->hash = NULL; } - freet(*hashtable); - *hashtable = NULL; + /* wipe top-level */ + hashtable->hash_size_power = 0; + hashtable->nitems = 0; + free(hashtable->items); + free(hashtable); + *phashtable = NULL; } } } @@ -373,25 +715,30 @@ void inthash_delete(inthash * hashtable) { struct_inthash_enum inthash_enum_new(inthash hashtable) { struct_inthash_enum e; - memset(&e, 0, sizeof(e)); e.index = 0; - e.item = NULL; e.table = hashtable; return e; } -inthash_chain *inthash_enum_next(struct_inthash_enum * e) { - inthash_chain *item = NULL; - - if (e != NULL) { - while(e->item == NULL && e->index < (int) e->table->hash_size) { - e->item = e->table->hash[e->index]; - e->index++; - } - if (e->item != NULL) { - item = e->item; - e->item = e->item->next; - } +inthash_item *inthash_enum_next(struct_inthash_enum * e) { + const size_t hash_size = POW2(e->table->hash_size_power); + for( ; e->index < hash_size + && inthash_is_free(e->table, e->index) ; e->index++) ; + /* enumerate all table */ + if (e->index < hash_size) { + inthash_item *const next = &e->table->items[e->index]; + e->index++; + return next; + } + /* enumerate stash if present */ + else if (e->index < hash_size + e->table->stash_size) { + const size_t index = e->index - hash_size; + inthash_item *const next = &e->table->stash[index]; + e->index++; + return next; + } + /* eof */ + else { + return NULL; } - return item; } diff --git a/src/htsinthash.h b/src/htsinthash.h index a085b4b..7f2675e 100644 --- a/src/htsinthash.h +++ b/src/htsinthash.h @@ -56,16 +56,17 @@ typedef union inthash_value { #define INTHASH_VALUE_NULL { 0 } // simple hash table for other routines -#ifndef HTS_DEF_FWSTRUCT_inthash_chain -#define HTS_DEF_FWSTRUCT_inthash_chain -typedef struct inthash_chain inthash_chain; +#ifndef HTS_DEF_FWSTRUCT_inthash_item +#define HTS_DEF_FWSTRUCT_inthash_item +typedef struct inthash_item inthash_item; #endif -struct inthash_chain { +struct inthash_item { char *name; /* key (name) */ inthash_value value; /* value */ - struct inthash_chain *next; /* next element */ }; +typedef inthash_item inthash_chain; + typedef void (*t_inthash_freehandler) (void *value); /* inthash structure */ @@ -73,12 +74,19 @@ typedef void (*t_inthash_freehandler) (void *value); #define HTS_DEF_FWSTRUCT_struct_inthash typedef struct struct_inthash struct_inthash, *inthash; #endif +#define STASH_SIZE 16 struct struct_inthash { - inthash_chain **hash; - unsigned int nitems; + inthash_item *items; + size_t nitems; t_inthash_freehandler free_handler; - unsigned int hash_size; - unsigned short flag_valueismalloc; + size_t hash_size_power; + inthash_item stash[STASH_SIZE]; + size_t stash_size; + char *string_pool; + size_t string_pool_size; + size_t string_pool_capacity; + size_t string_pool_chars; + unsigned char flag_valueismalloc; }; // enumeration @@ -88,8 +96,7 @@ typedef struct struct_inthash_enum struct_inthash_enum; #endif struct struct_inthash_enum { inthash table; - int index; - inthash_chain *item; + size_t index; }; /* Library internal definictions */ @@ -98,9 +105,9 @@ struct struct_inthash_enum { // main functions: /* Hash functions: */ -inthash inthash_new(int size); /* Create a new hash table */ +inthash inthash_new(size_t size); /* Create a new hash table */ int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */ -unsigned int inthash_nitems(inthash hashtable); /* Number of items */ +size_t inthash_nitems(inthash hashtable); /* Number of items */ void inthash_delete(inthash * hashtable); /* Delete an hash table */ void inthash_value_is_malloc(inthash hashtable, int flag); /* Is the 'value' member a value that needs to be free()'ed ? */ void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */ @@ -116,8 +123,6 @@ int inthash_read_value(inthash hashtable, const char *name, inthash_value * value); int inthash_write_value(inthash hashtable, const char *name, inthash_value value); -void inthash_add_value(inthash hashtable, const char *name, - inthash_value value); /* */ int inthash_read_pvoid(inthash hashtable, const char *name, void **value); int inthash_write_pvoid(inthash hashtable, const char *name, void *value); @@ -125,14 +130,13 @@ void inthash_add_pvoid(inthash hashtable, const char *name, void *value); /* */ void inthash_add(inthash hashtable, const char *name, intptr_t value); /* Add entry in the hash table */ -void *inthash_addblk(inthash hashtable, const char *name, int blksize); /* Add entry in the hash table and set value to a new memory block */ int inthash_write(inthash hashtable, const char *name, intptr_t value); /* Overwrite/add entry in the hash table */ int inthash_inc(inthash hashtable, const char *name); /* Increment entry in the hash table */ int inthash_remove(inthash hashtable, const char *name); /* Remove an entry from the hashtable */ /* */ struct_inthash_enum inthash_enum_new(inthash hashtable); /* Start a new enumerator */ -inthash_chain *inthash_enum_next(struct_inthash_enum * e); /* Fetch an item in the enumerator */ +inthash_item *inthash_enum_next(struct_inthash_enum * e); /* Fetch an item in the enumerator */ /* End of hash functions: */ #endif |