summaryrefslogtreecommitdiff
path: root/src/htsinthash.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/htsinthash.c')
-rw-r--r--src/htsinthash.c122
1 files changed, 81 insertions, 41 deletions
diff --git a/src/htsinthash.c b/src/htsinthash.c
index 0110915..b45fed7 100644
--- a/src/htsinthash.c
+++ b/src/htsinthash.c
@@ -55,16 +55,26 @@ Please visit our Website: http://www.httrack.com
#include "htsmd5.h"
#endif
+/** Size of auxiliary stash. **/
#define STASH_SIZE 16
+/** Minimum value for lg_size. **/
+#define MIN_LG_SIZE 8
+
+/** Minimum value for pool.capacity. **/
+#define MIN_POOL_CAPACITY 256
+
/* 64-bit constant */
#if defined(WIN32)
#define UINT_64_CONST(X) ((uint64_t) (X))
+#define UINT_64_FORMAT "I64d"
#elif (defined(_LP64) || defined(__x86_64__) \
|| defined(__powerpc64__) || defined(__64BIT__))
#define UINT_64_CONST(X) ((uint64_t) X##UL)
+#define UINT_64_FORMAT "ld"
#else
#define UINT_64_CONST(X) ((uint64_t) X##ULL)
+#define UINT_64_FORMAT "lld"
#endif
/** Hashtable. **/
@@ -235,7 +245,7 @@ static void inthash_realloc_pool(inthash hashtable) {
const size_t hash_size = POW2(hashtable->lg_size);
char *const oldbase = hashtable->pool.buffer;
size_t i;
- int count = 0;
+ size_t count = 0;
/* statistics */
hashtable->stats.pool_realloc_count++;
@@ -244,9 +254,10 @@ static void inthash_realloc_pool(inthash hashtable) {
hashtable->pool.buffer = realloc(hashtable->pool.buffer,
hashtable->pool.capacity);
if (hashtable->pool.buffer == NULL) {
- fprintf(stderr,
- "** hashtable string pool allocation error: could not allocate %u bytes",
- (int) hashtable->pool.capacity);
+ inthash_log(hashtable,
+ "** hashtable string pool allocation error: could not allocate "
+ "%"UINT_64_FORMAT" bytes",
+ (uint64_t) hashtable->pool.capacity);
inthash_assert(! "hashtable string pool allocation error");
}
@@ -269,8 +280,9 @@ static void inthash_realloc_pool(inthash hashtable) {
#undef RECOMPUTE_STRING
- inthash_log(hashtable, "reallocated string pool for %d strings: %d bytes",
- count, (int) hashtable->pool.capacity);
+ inthash_log(hashtable, "reallocated string pool for "
+ "%"UINT_64_FORMAT" strings: %"UINT_64_FORMAT" bytes",
+ (uint64_t) count, (uint64_t) hashtable->pool.capacity);
}
/* compact string pool ; does not change the capacity */
@@ -279,7 +291,7 @@ static void inthash_compact_pool(inthash hashtable) {
size_t i;
char* old_pool = hashtable->pool.buffer;
const size_t old_size = hashtable->pool.size;
- int count = 0;
+ size_t count = 0;
/* statistics */
hashtable->stats.pool_compact_count++;
@@ -289,9 +301,10 @@ static void inthash_compact_pool(inthash hashtable) {
hashtable->pool.size = 0;
hashtable->pool.used = 0;
if (hashtable->pool.buffer == NULL) {
- fprintf(stderr,
- "** hashtable string pool compaction error: could not allocate %u bytes",
- (int) hashtable->pool.capacity);
+ inthash_log(hashtable,
+ "** hashtable string pool compaction error: could not allocate "
+ "%"UINT_64_FORMAT" bytes",
+ (uint64_t) hashtable->pool.capacity);
inthash_assert(! "hashtable string pool compaction error");
}
@@ -326,8 +339,10 @@ static void inthash_compact_pool(inthash hashtable) {
free(old_pool);
inthash_log(hashtable,
- "compacted string pool for %d strings: %d bytes => %d bytes",
- count, (int) old_size, (int) hashtable->pool.size);
+ "compacted string pool for %"UINT_64_FORMAT" strings: "
+ "%"UINT_64_FORMAT" bytes => %"UINT_64_FORMAT" bytes",
+ (uint64_t) count, (uint64_t) old_size,
+ (uint64_t) hashtable->pool.size);
}
static char* inthash_dup_name(inthash hashtable, const char *name) {
@@ -336,7 +351,7 @@ static char* inthash_dup_name(inthash hashtable, const char *name) {
if (hashtable->pool.capacity - hashtable->pool.size < len) {
if (hashtable->pool.capacity == 0) {
- hashtable->pool.capacity = 16;
+ hashtable->pool.capacity = MIN_POOL_CAPACITY;
}
for( ; hashtable->pool.capacity - hashtable->pool.size < len
; hashtable->pool.capacity <<= 1) ;
@@ -492,8 +507,9 @@ static int inthash_write_value_(inthash hashtable, const char *name,
/* prepare cuckoo ; let's take position 1 */
else {
cuckoo_hash = initial_cuckoo_hash = hashes.hash1;
- inthash_log(hashtable, "debug:collision with '%s' at %d (%x)",
- item.name, pos, cuckoo_hash);
+ inthash_log(hashtable,
+ "debug:collision with '%s' at %"UINT_64_FORMAT" (%x)",
+ item.name, (uint64_t) pos, cuckoo_hash);
}
}
@@ -501,8 +517,9 @@ static int inthash_write_value_(inthash hashtable, const char *name,
for(loops = POW2(hashtable->lg_size) ; loops != 0 ; --loops) {
const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash);
- inthash_log(hashtable, "\tdebug:placing cuckoo '%s' at %d (%x)",
- item.name, pos, cuckoo_hash);
+ inthash_log(hashtable,
+ "\tdebug:placing cuckoo '%s' at %"UINT_64_FORMAT" (%x)",
+ item.name, (uint64_t) pos, cuckoo_hash);
/* place at alternate free position ? */
if (inthash_is_free(hashtable, pos)) {
@@ -547,7 +564,7 @@ static int inthash_write_value_(inthash hashtable, const char *name,
}
}
- /* emergency stashing */
+ /* emergency stashing for the rare cases of collisions */
if (hashtable->stash.size < STASH_SIZE) {
hashtable->stash.items[hashtable->stash.size] = item;
hashtable->stash.size++;
@@ -603,9 +620,10 @@ int inthash_write_value(inthash hashtable, const char *name,
hashtable->items =
(inthash_item *) realloc(hashtable->items, alloc_size);
if (hashtable->items == NULL) {
- fprintf(stderr,
- "** hashtable allocation error: could not allocate %u bytes",
- (int) alloc_size);
+ inthash_log(hashtable,
+ "** hashtable allocation error: "
+ "could not allocate %"UINT_64_FORMAT" bytes",
+ (uint64_t) alloc_size);
inthash_assert(! "hashtable allocation error");
}
@@ -643,8 +661,9 @@ int inthash_write_value(inthash hashtable, const char *name,
}
}
- inthash_log(hashtable, "expanded hashtable to %d elements",
- (int) POW2(hashtable->lg_size));
+ inthash_log(hashtable,
+ "expanded hashtable to %"UINT_64_FORMAT" elements",
+ (uint64_t) POW2(hashtable->lg_size));
/* attempt to merge the stash if present */
if (hashtable->stash.size != 0) {
@@ -673,11 +692,12 @@ int inthash_write_value(inthash hashtable, const char *name,
/* logging */
inthash_assert(hashtable->stash.size <= old_size);
if (hashtable->stash.size < old_size) {
- inthash_log(hashtable, "reduced stash size from %d to %d",
- (int) old_size, (int) hashtable->stash.size);
+ inthash_log(hashtable, "reduced stash size from %"UINT_64_FORMAT" "
+ "to %"UINT_64_FORMAT,
+ (uint64_t) old_size, (uint64_t) hashtable->stash.size);
} else {
- inthash_log(hashtable, "stash has still %d elements",
- (int) hashtable->stash.size);
+ inthash_log(hashtable, "stash has still %"UINT_64_FORMAT" elements",
+ (uint64_t) hashtable->stash.size);
}
}
@@ -869,7 +889,7 @@ inthash inthash_new(size_t initial_size) {
if (hashtable) {
size_t size;
- for(size = 8 ; POW2(size) < initial_size ; size++) ;
+ for(size = MIN_LG_SIZE ; POW2(size) < initial_size ; size++) ;
if ((hashtable->items =
(inthash_item *) calloc(POW2(size), sizeof(inthash_item)))) {
hashtable->lg_size = size;
@@ -917,23 +937,43 @@ size_t inthash_nitems(inthash hashtable) {
return 0;
}
+size_t inthash_memory_size(inthash hashtable) {
+ const size_t size_struct = sizeof(struct_inthash);
+ const size_t hash_size = POW2(hashtable->lg_size)*sizeof(inthash_item);
+ const size_t pool_size = hashtable->pool.capacity*sizeof(char);
+ return size_struct + hash_size + pool_size;
+}
+
+static void inthash_log_stats(inthash hashtable) {
+ inthash_log(hashtable, "freeing table ; "
+ "writes=%"UINT_64_FORMAT" "
+ "(new=%"UINT_64_FORMAT") "
+ "moved=%"UINT_64_FORMAT " "
+ "stashed=%"UINT_64_FORMAT" "
+ "max-stash-size=%"UINT_64_FORMAT" "
+ "avg-moved=%g "
+ "rehash=%"UINT_64_FORMAT" "
+ "pool-compact=%"UINT_64_FORMAT" "
+ "pool-realloc=%"UINT_64_FORMAT" "
+ "memory=%"UINT_64_FORMAT,
+ (uint64_t) hashtable->stats.write_count,
+ (uint64_t) hashtable->stats.add_count,
+ (uint64_t) hashtable->stats.cuckoo_moved,
+ (uint64_t) hashtable->stats.stash_added,
+ (uint64_t) hashtable->stats.max_stash_size,
+ (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count,
+ (uint64_t) hashtable->stats.rehash_count,
+ (uint64_t) hashtable->stats.pool_compact_count,
+ (uint64_t) hashtable->stats.pool_realloc_count,
+ (uint64_t) inthash_memory_size(hashtable)
+ );
+}
+
void inthash_delete(inthash *phashtable) {
if (phashtable != NULL) {
inthash hashtable = *phashtable;
if (hashtable != NULL) {
- inthash_log(hashtable, "freeing table ; "
- "writes=%d (new=%d) moved=%d stashed=%d collision-room=%d "
- "avg-moved=%g rehash=%d pool-compact=%d pool-realloc=%d",
- (int) hashtable->stats.write_count,
- (int) hashtable->stats.add_count,
- (int) hashtable->stats.cuckoo_moved,
- (int) hashtable->stats.stash_added,
- (int) hashtable->stats.max_stash_size,
- (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count,
- (int) hashtable->stats.rehash_count,
- (int) hashtable->stats.pool_compact_count,
- (int) hashtable->stats.pool_realloc_count
- );
+ inthash_log_stats(hashtable);
if (hashtable->items != NULL) {
/* we need to delete values */
const size_t hash_size = POW2(hashtable->lg_size);