diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-06-26 20:05:49 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-06-26 20:05:49 +0000 |
commit | b5e663cd80904d70ad2f0d6202a0c3ce6e582b58 (patch) | |
tree | c342b5214cdb589fd3bbf094ca7c9744af3a0948 | |
parent | 588d526aed19151bf6ae14e33f4bc433a7ee8b69 (diff) |
Cosmetic and documentation, logging.
-rw-r--r-- | src/htsinthash.c | 122 | ||||
-rw-r--r-- | src/htsinthash.h | 147 |
2 files changed, 201 insertions, 68 deletions
diff --git a/src/htsinthash.c b/src/htsinthash.c index 0110915..b45fed7 100644 --- a/src/htsinthash.c +++ b/src/htsinthash.c @@ -55,16 +55,26 @@ Please visit our Website: http://www.httrack.com #include "htsmd5.h" #endif +/** Size of auxiliary stash. **/ #define STASH_SIZE 16 +/** Minimum value for lg_size. **/ +#define MIN_LG_SIZE 8 + +/** Minimum value for pool.capacity. **/ +#define MIN_POOL_CAPACITY 256 + /* 64-bit constant */ #if defined(WIN32) #define UINT_64_CONST(X) ((uint64_t) (X)) +#define UINT_64_FORMAT "I64d" #elif (defined(_LP64) || defined(__x86_64__) \ || defined(__powerpc64__) || defined(__64BIT__)) #define UINT_64_CONST(X) ((uint64_t) X##UL) +#define UINT_64_FORMAT "ld" #else #define UINT_64_CONST(X) ((uint64_t) X##ULL) +#define UINT_64_FORMAT "lld" #endif /** Hashtable. **/ @@ -235,7 +245,7 @@ static void inthash_realloc_pool(inthash hashtable) { const size_t hash_size = POW2(hashtable->lg_size); char *const oldbase = hashtable->pool.buffer; size_t i; - int count = 0; + size_t count = 0; /* statistics */ hashtable->stats.pool_realloc_count++; @@ -244,9 +254,10 @@ static void inthash_realloc_pool(inthash hashtable) { hashtable->pool.buffer = realloc(hashtable->pool.buffer, hashtable->pool.capacity); if (hashtable->pool.buffer == NULL) { - fprintf(stderr, - "** hashtable string pool allocation error: could not allocate %u bytes", - (int) hashtable->pool.capacity); + inthash_log(hashtable, + "** hashtable string pool allocation error: could not allocate " + "%"UINT_64_FORMAT" bytes", + (uint64_t) hashtable->pool.capacity); inthash_assert(! "hashtable string pool allocation error"); } @@ -269,8 +280,9 @@ static void inthash_realloc_pool(inthash hashtable) { #undef RECOMPUTE_STRING - inthash_log(hashtable, "reallocated string pool for %d strings: %d bytes", - count, (int) hashtable->pool.capacity); + inthash_log(hashtable, "reallocated string pool for " + "%"UINT_64_FORMAT" strings: %"UINT_64_FORMAT" bytes", + (uint64_t) count, (uint64_t) hashtable->pool.capacity); } /* compact string pool ; does not change the capacity */ @@ -279,7 +291,7 @@ static void inthash_compact_pool(inthash hashtable) { size_t i; char* old_pool = hashtable->pool.buffer; const size_t old_size = hashtable->pool.size; - int count = 0; + size_t count = 0; /* statistics */ hashtable->stats.pool_compact_count++; @@ -289,9 +301,10 @@ static void inthash_compact_pool(inthash hashtable) { hashtable->pool.size = 0; hashtable->pool.used = 0; if (hashtable->pool.buffer == NULL) { - fprintf(stderr, - "** hashtable string pool compaction error: could not allocate %u bytes", - (int) hashtable->pool.capacity); + inthash_log(hashtable, + "** hashtable string pool compaction error: could not allocate " + "%"UINT_64_FORMAT" bytes", + (uint64_t) hashtable->pool.capacity); inthash_assert(! "hashtable string pool compaction error"); } @@ -326,8 +339,10 @@ static void inthash_compact_pool(inthash hashtable) { free(old_pool); inthash_log(hashtable, - "compacted string pool for %d strings: %d bytes => %d bytes", - count, (int) old_size, (int) hashtable->pool.size); + "compacted string pool for %"UINT_64_FORMAT" strings: " + "%"UINT_64_FORMAT" bytes => %"UINT_64_FORMAT" bytes", + (uint64_t) count, (uint64_t) old_size, + (uint64_t) hashtable->pool.size); } static char* inthash_dup_name(inthash hashtable, const char *name) { @@ -336,7 +351,7 @@ static char* inthash_dup_name(inthash hashtable, const char *name) { if (hashtable->pool.capacity - hashtable->pool.size < len) { if (hashtable->pool.capacity == 0) { - hashtable->pool.capacity = 16; + hashtable->pool.capacity = MIN_POOL_CAPACITY; } for( ; hashtable->pool.capacity - hashtable->pool.size < len ; hashtable->pool.capacity <<= 1) ; @@ -492,8 +507,9 @@ static int inthash_write_value_(inthash hashtable, const char *name, /* prepare cuckoo ; let's take position 1 */ else { cuckoo_hash = initial_cuckoo_hash = hashes.hash1; - inthash_log(hashtable, "debug:collision with '%s' at %d (%x)", - item.name, pos, cuckoo_hash); + inthash_log(hashtable, + "debug:collision with '%s' at %"UINT_64_FORMAT" (%x)", + item.name, (uint64_t) pos, cuckoo_hash); } } @@ -501,8 +517,9 @@ static int inthash_write_value_(inthash hashtable, const char *name, for(loops = POW2(hashtable->lg_size) ; loops != 0 ; --loops) { const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash); - inthash_log(hashtable, "\tdebug:placing cuckoo '%s' at %d (%x)", - item.name, pos, cuckoo_hash); + inthash_log(hashtable, + "\tdebug:placing cuckoo '%s' at %"UINT_64_FORMAT" (%x)", + item.name, (uint64_t) pos, cuckoo_hash); /* place at alternate free position ? */ if (inthash_is_free(hashtable, pos)) { @@ -547,7 +564,7 @@ static int inthash_write_value_(inthash hashtable, const char *name, } } - /* emergency stashing */ + /* emergency stashing for the rare cases of collisions */ if (hashtable->stash.size < STASH_SIZE) { hashtable->stash.items[hashtable->stash.size] = item; hashtable->stash.size++; @@ -603,9 +620,10 @@ int inthash_write_value(inthash hashtable, const char *name, hashtable->items = (inthash_item *) realloc(hashtable->items, alloc_size); if (hashtable->items == NULL) { - fprintf(stderr, - "** hashtable allocation error: could not allocate %u bytes", - (int) alloc_size); + inthash_log(hashtable, + "** hashtable allocation error: " + "could not allocate %"UINT_64_FORMAT" bytes", + (uint64_t) alloc_size); inthash_assert(! "hashtable allocation error"); } @@ -643,8 +661,9 @@ int inthash_write_value(inthash hashtable, const char *name, } } - inthash_log(hashtable, "expanded hashtable to %d elements", - (int) POW2(hashtable->lg_size)); + inthash_log(hashtable, + "expanded hashtable to %"UINT_64_FORMAT" elements", + (uint64_t) POW2(hashtable->lg_size)); /* attempt to merge the stash if present */ if (hashtable->stash.size != 0) { @@ -673,11 +692,12 @@ int inthash_write_value(inthash hashtable, const char *name, /* logging */ inthash_assert(hashtable->stash.size <= old_size); if (hashtable->stash.size < old_size) { - inthash_log(hashtable, "reduced stash size from %d to %d", - (int) old_size, (int) hashtable->stash.size); + inthash_log(hashtable, "reduced stash size from %"UINT_64_FORMAT" " + "to %"UINT_64_FORMAT, + (uint64_t) old_size, (uint64_t) hashtable->stash.size); } else { - inthash_log(hashtable, "stash has still %d elements", - (int) hashtable->stash.size); + inthash_log(hashtable, "stash has still %"UINT_64_FORMAT" elements", + (uint64_t) hashtable->stash.size); } } @@ -869,7 +889,7 @@ inthash inthash_new(size_t initial_size) { if (hashtable) { size_t size; - for(size = 8 ; POW2(size) < initial_size ; size++) ; + for(size = MIN_LG_SIZE ; POW2(size) < initial_size ; size++) ; if ((hashtable->items = (inthash_item *) calloc(POW2(size), sizeof(inthash_item)))) { hashtable->lg_size = size; @@ -917,23 +937,43 @@ size_t inthash_nitems(inthash hashtable) { return 0; } +size_t inthash_memory_size(inthash hashtable) { + const size_t size_struct = sizeof(struct_inthash); + const size_t hash_size = POW2(hashtable->lg_size)*sizeof(inthash_item); + const size_t pool_size = hashtable->pool.capacity*sizeof(char); + return size_struct + hash_size + pool_size; +} + +static void inthash_log_stats(inthash hashtable) { + inthash_log(hashtable, "freeing table ; " + "writes=%"UINT_64_FORMAT" " + "(new=%"UINT_64_FORMAT") " + "moved=%"UINT_64_FORMAT " " + "stashed=%"UINT_64_FORMAT" " + "max-stash-size=%"UINT_64_FORMAT" " + "avg-moved=%g " + "rehash=%"UINT_64_FORMAT" " + "pool-compact=%"UINT_64_FORMAT" " + "pool-realloc=%"UINT_64_FORMAT" " + "memory=%"UINT_64_FORMAT, + (uint64_t) hashtable->stats.write_count, + (uint64_t) hashtable->stats.add_count, + (uint64_t) hashtable->stats.cuckoo_moved, + (uint64_t) hashtable->stats.stash_added, + (uint64_t) hashtable->stats.max_stash_size, + (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count, + (uint64_t) hashtable->stats.rehash_count, + (uint64_t) hashtable->stats.pool_compact_count, + (uint64_t) hashtable->stats.pool_realloc_count, + (uint64_t) inthash_memory_size(hashtable) + ); +} + void inthash_delete(inthash *phashtable) { if (phashtable != NULL) { inthash hashtable = *phashtable; if (hashtable != NULL) { - inthash_log(hashtable, "freeing table ; " - "writes=%d (new=%d) moved=%d stashed=%d collision-room=%d " - "avg-moved=%g rehash=%d pool-compact=%d pool-realloc=%d", - (int) hashtable->stats.write_count, - (int) hashtable->stats.add_count, - (int) hashtable->stats.cuckoo_moved, - (int) hashtable->stats.stash_added, - (int) hashtable->stats.max_stash_size, - (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count, - (int) hashtable->stats.rehash_count, - (int) hashtable->stats.pool_compact_count, - (int) hashtable->stats.pool_realloc_count - ); + inthash_log_stats(hashtable); if (hashtable->items != NULL) { /* we need to delete values */ const size_t hash_size = POW2(hashtable->lg_size); diff --git a/src/htsinthash.h b/src/htsinthash.h index b448840..90f8401 100644 --- a/src/htsinthash.h +++ b/src/htsinthash.h @@ -85,10 +85,10 @@ typedef union inthash_value { typedef struct inthash_item inthash_item; #endif -/* Hash key (32-bit) */ +/** Hash key (32-bit) **/ typedef uint32_t inthash_key; -/* Pair of hashes */ +/** Pair of hashes **/ typedef struct inthash_keys { inthash_key hash1; inthash_key hash2; @@ -109,10 +109,10 @@ struct inthash_item { inthash_keys hashes; }; -/* Alias for legacy code */ +/** Alias for legacy code. **/ typedef inthash_item inthash_chain; -/* Free handler */ +/** Free handler **/ typedef void (*t_inthash_freehandler) (void *value); /** Hashtable (opaque structure). **/ @@ -136,42 +136,135 @@ struct struct_inthash_enum { /* Library internal definictions */ #ifdef HTS_INTERNAL_BYTECODE -/* Hash functions: */ -inthash inthash_new(size_t size); /* Create a new hash table */ -int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */ -size_t inthash_nitems(inthash hashtable); /* Number of items */ -void inthash_delete(inthash * hashtable); /* Delete an hash table */ -void inthash_value_is_malloc(inthash hashtable, int flag); /* Is the 'value' member a value that needs to be free()'ed ? */ -void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */ +/** + * Create a new hashtable, with initial bucket size of 'size'. + * If size is 0, use the default minimal bucket size. + * Return a non-NULL pointer upon success. + **/ +inthash inthash_new(size_t size); + +/** + * Was the hashtable successfully created ? + * Return non-zero value if the hashtable is valid. + **/ +int inthash_created(inthash hashtable); + +/** + * Delete a hashtable, freeing all entries. + **/ +void inthash_delete(inthash * hashtable); + +/** + * Return the number of items in the hashtable. + **/ +size_t inthash_nitems(inthash hashtable); + +/** + * Return the memory size taken by the hashtable. + * (This does not take account of the possible memory taken by values) + **/ +size_t inthash_memory_size(inthash hashtable); + +/** + * Are the values inside this hashtable to be free'd ? + **/ +void inthash_value_is_malloc(inthash hashtable, int flag); + +/** + * Set a free handler for values. This handler will be called when a value + * is to be removed from the hashtable. + **/ +void inthash_value_set_free_handler(inthash hashtable, t_inthash_freehandler free_handler); -/* */ -int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue); /* Read entry from the hash table */ -int inthash_readptr(inthash hashtable, const char *name, intptr_t * intvalue); /* Same function, but returns 0 upon null ptr */ -int inthash_exists(inthash hashtable, const char *name); /* Is the key existing ? */ +/** + * Read an integer entry from the hashtable. + * Return non-zero value upon success and sets intvalue. + **/ +int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue); -/* */ +/** + * Same as inthash_read(), but return 0 is the value was zero. + **/ +int inthash_readptr(inthash hashtable, const char *name, intptr_t * intvalue); + +/** + * Return non-zero value if the given entry exists. + **/ +int inthash_exists(inthash hashtable, const char *name); + +/** + * Read an entry from the hashtable. + * Return non-zero value upon success and sets value. + **/ int inthash_read_value(inthash hashtable, const char *name, inthash_value * value); + +/** + * Write an entry to the hashtable. + * Return non-zero value if the entry was added, zero if it was replaced. + **/ int inthash_write_value(inthash hashtable, const char *name, inthash_value value); -/* */ +/** + * Read a pointer entry from the hashtable. + * Return non-zero value upon success and sets value. + **/ int inthash_read_pvoid(inthash hashtable, const char *name, void **value); + +/** + * Write a pointer entry to the hashtable. + * Return non-zero value if the entry was added, zero if it was replaced. + **/ int inthash_write_pvoid(inthash hashtable, const char *name, void *value); + +/** + * Alias to inthash_write_pvoid() + **/ void inthash_add_pvoid(inthash hashtable, const char *name, void *value); -/* */ -void inthash_add(inthash hashtable, const char *name, intptr_t value); /* Add entry in the hash table */ -int inthash_write(inthash hashtable, const char *name, intptr_t value); /* Overwrite/add entry in the hash table */ -int inthash_inc(inthash hashtable, const char *name); /* Increment entry in the hash table */ -int inthash_dec(inthash hashtable, const char *name); /* Decrement entry in the hash table */ -int inthash_remove(inthash hashtable, const char *name); /* Remove an entry from the hashtable */ +/** + * Write an integer entry to the hashtable. + * Return non-zero value if the entry was added, zero if it was replaced. + **/ +int inthash_write(inthash hashtable, const char *name, intptr_t value); + +/** + * Alias to inthash_write() + **/ +void inthash_add(inthash hashtable, const char *name, intptr_t value); + +/** + * Increment an entry value in the hashtable + * (or create a new entry with value 1 if it does not yet exist) + * Return non-zero value if the entry was added, zero if it was changed. + **/ +int inthash_inc(inthash hashtable, const char *name); + +/** + * Decrement an entry value in the hashtable + * (or create a new entry with value -1 if it does not yet exist) + * Return non-zero value if the entry was added, zero if it was changed. + **/ +int inthash_dec(inthash hashtable, const char *name); -/* */ -struct_inthash_enum inthash_enum_new(inthash hashtable); /* Start a new enumerator */ -inthash_item *inthash_enum_next(struct_inthash_enum * e); /* Fetch an item in the enumerator */ +/** + * Remove an entry from the hashtable + * Return non-zero value if the entry was removed, zero otherwise. + **/ +int inthash_remove(inthash hashtable, const char *name); + +/** + * Return a new enumerator. + * Note: you may not add or delete entries while enumerating. + **/ +struct_inthash_enum inthash_enum_new(inthash hashtable); + +/** + * Enumerate the next entry. + **/ +inthash_item *inthash_enum_next(struct_inthash_enum * e); -/* End of hash functions: */ #endif #endif |