diff options
-rw-r--r-- | src/htsinthash.c | 142 | ||||
-rw-r--r-- | src/htsinthash.h | 58 |
2 files changed, 164 insertions, 36 deletions
diff --git a/src/htsinthash.c b/src/htsinthash.c index d6a6e55..b273a01 100644 --- a/src/htsinthash.c +++ b/src/htsinthash.c @@ -146,8 +146,27 @@ struct struct_inthash { /** Settings. **/ struct { - /** Items value de-allocator. (may be NULL) **/ - t_inthash_freehandler free_handler; + /** How to handle values (might be NULL). **/ + struct { + /** free() **/ + t_inthash_freehandler free; + /** opaque argument **/ + void *arg; + } value; + + /** How to handle names (might be NULL). **/ + struct { + /** strdup() **/ + t_inthash_duphandler dup; + /** free() **/ + t_inthash_freehandler free; + /** hash **/ + t_inthash_hasheshandler hash; + /** comparison **/ + t_inthash_cmphandler equals; + /** opaque argument **/ + void *arg; + } key; } custom; }; @@ -206,7 +225,7 @@ static void inthash_nolog(const inthash hashtable, const char *format, ...) HTS_PRINTF_FUN(2, 3) { } -static inthash_keys inthash_calc_hashes(const char *value) { +inthash_keys inthash_hash_value(const char *value) { #if HTS_INTHASH_USES_MD5 == 1 /* compute a regular MD5 and extract two 32-bit integers */ MD5_CTX ctx; @@ -264,25 +283,41 @@ static inthash_keys inthash_calc_hashes(const char *value) { #endif } +static HTS_INLINE inthash_keys inthash_calc_hashes(inthash hashtable, + const char *value) { + return hashtable->custom.key.hash == NULL + ? inthash_hash_value(value) + : hashtable->custom.key.hash(hashtable->custom.key.arg, value); +} + /* position 'pos' is free ? */ static HTS_INLINE int inthash_is_free(const inthash hashtable, size_t pos) { return hashtable->items[pos].name == NULL; } -static HTS_INLINE int inthash_matches_(const inthash_item *const item, +/* compare two keys ; by default using strcmp() */ +static HTS_INLINE int inthash_equals(inthash hashtable, + const char *a, const char *b) { + return hashtable->custom.key.equals == NULL + ? strcmp(a, b) == 0 + : hashtable->custom.key.equals(hashtable->custom.key.arg, a, b); +} + +static HTS_INLINE int inthash_matches_(inthash hashtable, + const inthash_item *const item, const char *name, const inthash_keys *hashes) { return item->name != NULL && item->hashes.hash1 == hashes->hash1 && item->hashes.hash2 == hashes->hash2 - && strcmp(item->name, name) == 0; + && inthash_equals(hashtable, item->name, name); } -static HTS_INLINE int inthash_matches(const inthash hashtable, size_t pos, +static HTS_INLINE int inthash_matches(inthash hashtable, size_t pos, const char *name, const inthash_keys *hashes) { const inthash_item *const item = &hashtable->items[pos]; - return inthash_matches_(item, name, hashes); + return inthash_matches_(hashtable, item, name, hashes); } /* compact string pool ; does not change the capacity */ @@ -293,6 +328,9 @@ static void inthash_compact_pool(inthash hashtable, size_t capacity) { const size_t old_size = hashtable->pool.size; size_t count = 0; + /* we manage the string pool */ + assert(hashtable->custom.key.dup == NULL); + /* statistics */ hashtable->stats.pool_compact_count++; @@ -361,6 +399,9 @@ static void inthash_realloc_pool(inthash hashtable, size_t capacity) { size_t i; size_t count = 0; + /* we manage the string pool */ + assert(hashtable->custom.key.dup == NULL); + /* compact instead ? */ if (hashtable->pool.used < ( hashtable->pool.size*3 ) / 4) { inthash_compact_pool(hashtable, capacity); @@ -408,7 +449,7 @@ static void inthash_realloc_pool(inthash hashtable, size_t capacity) { (uint64_t) count, (uint64_t) hashtable->pool.capacity); } -static char* inthash_dup_name(inthash hashtable, const char *name) { +static char* inthash_dup_name_internal(inthash hashtable, const char *name) { const size_t len = strlen(name) + 1; char *s; @@ -428,8 +469,16 @@ static char* inthash_dup_name(inthash hashtable, const char *name) { return s; } -/* note: pointer must have been kicked from the pool first */ -static void inthash_free_name(inthash hashtable, char *name) { +/* duplicate a key. default is to use the internal pool. */ +static HTS_INLINE char* inthash_dup_name(inthash hashtable, const char *name) { + return hashtable->custom.key.dup == NULL + ? inthash_dup_name_internal(hashtable, name) + : hashtable->custom.key.dup(hashtable->custom.key.arg, name); +} + +/* internal pool free handler. + note: pointer must have been kicked from the pool first */ +static void inthash_free_key_internal(inthash hashtable, char *name) { const size_t len = strlen(name) + 1; hashtable->pool.used -= len; if (hashtable->pool.used < hashtable->pool.size / 2) { @@ -437,6 +486,16 @@ static void inthash_free_name(inthash hashtable, char *name) { } } +/* free a key. default is to use the internal pool. + note: pointer must have been kicked from the pool first */ +static void inthash_free_key(inthash hashtable, char *name) { + if (hashtable->custom.key.free == NULL) { + inthash_free_key_internal(hashtable, name); + } else { + hashtable->custom.key.free(hashtable->custom.key.arg, name); + } +} + static HTS_INLINE size_t inthash_hash_to_pos_(size_t lg_size, inthash_key hash) { const inthash_key mask = POW2(lg_size) - 1; @@ -450,7 +509,7 @@ static HTS_INLINE size_t inthash_hash_to_pos(const inthash hashtable, int inthash_read_pvoid(inthash hashtable, const char *name, void **pvalue) { inthash_value value = INTHASH_VALUE_NULL; - int ret = + const int ret = inthash_read_value(hashtable, name, (pvalue != NULL) ? &value : NULL); if (pvalue != NULL) *pvalue = value.ptr; @@ -478,14 +537,14 @@ int inthash_write(inthash hashtable, const char *name, intptr_t intvalue) { return inthash_write_value(hashtable, name, value); } -static void inthash_default_free_handler(void *value) { +static void inthash_default_free_handler(void *arg, void *value) { if (value) free(value); } static void inthash_del_value_(inthash hashtable, inthash_value *pvalue) { - if (hashtable->custom.free_handler != NULL) - hashtable->custom.free_handler(pvalue->ptr); + if (hashtable->custom.value.free != NULL) + hashtable->custom.value.free(hashtable->custom.value.arg, pvalue->ptr); pvalue->ptr = NULL; } @@ -499,7 +558,7 @@ static void inthash_del_name(inthash hashtable, inthash_item *item) { item->name = NULL; /* there must be no reference remaining */ item->hashes = nullHash; /* free after detach (we may compact the pool) */ - inthash_free_name(hashtable, name); + inthash_free_key(hashtable, name); } static void inthash_del_item(inthash hashtable, inthash_item *pitem) { @@ -511,7 +570,7 @@ static int inthash_write_value_(inthash hashtable, const char *name, inthash_value value) { inthash_item item; size_t pos; - const inthash_keys hashes = inthash_calc_hashes(name); + const inthash_keys hashes = inthash_calc_hashes(hashtable, name); inthash_key cuckoo_hash, initial_cuckoo_hash; size_t loops; @@ -538,7 +597,8 @@ static int inthash_write_value_(inthash hashtable, const char *name, if (hashtable->stash.size != 0) { size_t i; for(i = 0 ; i < hashtable->stash.size ; i++) { - if (inthash_matches_(&hashtable->stash.items[i], name, &hashes)) { + if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, + &hashes)) { inthash_del_value_(hashtable, &hashtable->stash.items[i].value); hashtable->stash.items[i].value = value; return 0; /* replaced */ @@ -785,7 +845,7 @@ int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue) { static inthash_value* inthash_read_value_(inthash hashtable, const char *name) { - const inthash_keys hashes = inthash_calc_hashes(name); + const inthash_keys hashes = inthash_calc_hashes(hashtable, name); size_t pos; /* found at position 1 ? */ @@ -804,7 +864,8 @@ static inthash_value* inthash_read_value_(inthash hashtable, if (hashtable->stash.size != 0) { size_t i; for(i = 0 ; i < hashtable->stash.size ; i++) { - if (inthash_matches_(&hashtable->stash.items[i], name, &hashes)) { + if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, + &hashes)) { return &hashtable->stash.items[i].value; } } @@ -876,7 +937,8 @@ static int inthash_remove_(inthash hashtable, const char *name, if (hashtable->stash.size != 0) { size_t i; for(i = 0 ; i < hashtable->stash.size ; i++) { - if (inthash_matches_(&hashtable->stash.items[i], name, hashes)) { + if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, + hashes)) { inthash_del_item(hashtable, &hashtable->stash.items[i]); for( ; i + 1 < hashtable->stash.size ; i++) { hashtable->stash.items[i] = hashtable->stash.items[i + 1]; @@ -895,7 +957,7 @@ static int inthash_remove_(inthash hashtable, const char *name, } int inthash_remove(inthash hashtable, const char *name) { - const inthash_keys hashes = inthash_calc_hashes(name); + const inthash_keys hashes = inthash_calc_hashes(hashtable, name); size_t removed; const int ret = inthash_remove_(hashtable, name, &hashes, &removed); @@ -967,6 +1029,13 @@ inthash inthash_new(size_t initial_size) { hashtable->stats.pool_compact_count = 0; hashtable->stats.pool_realloc_count = 0; hashtable->stats.rehash_count = 0; + hashtable->custom.value.free = NULL; + hashtable->custom.value.arg = NULL; + hashtable->custom.key.dup = NULL; + hashtable->custom.key.free = NULL; + hashtable->custom.key.hash = NULL; + hashtable->custom.key.equals = NULL; + hashtable->custom.key.arg = NULL; } return hashtable; } @@ -977,17 +1046,36 @@ int inthash_created(inthash hashtable) { void inthash_value_is_malloc(inthash hashtable, int flag) { if (flag) { - if (hashtable->custom.free_handler == NULL) { - hashtable->custom.free_handler = inthash_default_free_handler; + if (hashtable->custom.value.free == NULL) { + hashtable->custom.value.free = inthash_default_free_handler; + hashtable->custom.value.arg = NULL; } } else { - hashtable->custom.free_handler = NULL; + hashtable->custom.value.free = NULL; + hashtable->custom.value.arg = NULL; } } -void inthash_value_set_free_handler(inthash hashtable, - t_inthash_freehandler free_handler) { - hashtable->custom.free_handler = free_handler; +void inthash_value_set_value_handler(inthash hashtable, + t_inthash_freehandler free, + void *arg) { + hashtable->custom.value.free = free; + hashtable->custom.value.arg = arg; +} + +void inthash_value_set_key_handler(inthash hashtable, + t_inthash_duphandler dup, + t_inthash_freehandler free, + t_inthash_hasheshandler hash, + t_inthash_cmphandler equals, + void *arg) { + /* dup and free must be consistent */ + assert( ( dup == NULL ) == ( free == NULL ) ); + hashtable->custom.key.dup = dup; + hashtable->custom.key.free = free; + hashtable->custom.key.hash = hash; + hashtable->custom.key.equals = equals; + hashtable->custom.key.arg = arg; } size_t inthash_nitems(inthash hashtable) { diff --git a/src/htsinthash.h b/src/htsinthash.h index 732fbb4..43bdd38 100644 --- a/src/htsinthash.h +++ b/src/htsinthash.h @@ -117,8 +117,19 @@ struct inthash_item { /** Alias for legacy code. **/ typedef inthash_item inthash_chain; -/** Free handler **/ -typedef void (*t_inthash_freehandler) (void *value); +/** Value free handler **/ +typedef void (*t_inthash_freehandler) (void *arg, void *value); + +/** Name dup handler. **/ +typedef char* (*t_inthash_duphandler) (void *arg, const char *name); + +/** Hash computation handler. **/ +typedef inthash_keys (*t_inthash_hasheshandler)(void *arg, const char *value); + +/** + * Value comparison handler (returns non-zero value if strings are equal). + **/ +typedef int (*t_inthash_cmphandler)(void *arg, const char *a, const char *b); /** Hashtable (opaque structure). **/ #ifndef HTS_DEF_FWSTRUCT_struct_inthash @@ -170,17 +181,40 @@ size_t inthash_nitems(inthash hashtable); **/ size_t inthash_memory_size(inthash hashtable); +/**
+ * If 'flag' is non-zero, calls inthash_value_set_value_handler() with
+ * default system free() handler function, otherwise, free the value handlers.
+ **/
+void inthash_value_is_malloc(inthash hashtable, int flag);
+ /** - * Are the values inside this hashtable to be free'd ? + * Set handlers for values. + * free: this handler will be called when a value is to be removed from + * the hashtable. if NULL, values won't be free'd. + * arg: opaque custom argument to be used by functions. + * Handler(s) MUST NOT be changed once elements have been added. **/ -void inthash_value_is_malloc(inthash hashtable, int flag); +void inthash_value_set_value_handler(inthash hashtable, + t_inthash_freehandler free, + void *arg); /** - * Set a free handler for values. This handler will be called when a value - * is to be removed from the hashtable. + * Set handlers for keys. + * dup: handler called to duplicate a key. if NULL, the internal pool is used. + * free: handler called to free a key. if NULL, the internal pool is used. + * hash: hashing handler, called to hash a key. if NULL, the default hash + * function is used. + * equals: comparison handler, returning non-zero value when two keys are + * identical. if NULL, the default comparison function is used. + * arg: opaque custom argument to be used by functions. + * Handler(s) MUST NOT be changed once elements have been added. **/ -void inthash_value_set_free_handler(inthash hashtable, - t_inthash_freehandler free_handler); +void inthash_value_set_key_handler(inthash hashtable, + t_inthash_duphandler dup, + t_inthash_freehandler free, + t_inthash_hasheshandler hash, + t_inthash_cmphandler equals, + void *arg); /** * Read an integer entry from the hashtable. @@ -261,7 +295,8 @@ int inthash_remove(inthash hashtable, const char *name); /** * Return a new enumerator. - * Note: you may not add or delete entries while enumerating. + * Note: deleting entries is safe while enumerating, but adding entries + * lead to undefined enumeration behavior (yet safe). **/ struct_inthash_enum inthash_enum_new(inthash hashtable); @@ -270,6 +305,11 @@ struct_inthash_enum inthash_enum_new(inthash hashtable); **/ inthash_item *inthash_enum_next(struct_inthash_enum * e); +/** + * Compute a hash, given a string value. + **/ +inthash_keys inthash_hash_value(const char *value); + #endif #endif |