summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2013-06-26 20:05:49 +0000
committerXavier Roche <xroche@users.noreply.github.com>2013-06-26 20:05:49 +0000
commitb5e663cd80904d70ad2f0d6202a0c3ce6e582b58 (patch)
treec342b5214cdb589fd3bbf094ca7c9744af3a0948
parent588d526aed19151bf6ae14e33f4bc433a7ee8b69 (diff)
Cosmetic and documentation, logging.
-rw-r--r--src/htsinthash.c122
-rw-r--r--src/htsinthash.h147
2 files changed, 201 insertions, 68 deletions
diff --git a/src/htsinthash.c b/src/htsinthash.c
index 0110915..b45fed7 100644
--- a/src/htsinthash.c
+++ b/src/htsinthash.c
@@ -55,16 +55,26 @@ Please visit our Website: http://www.httrack.com
#include "htsmd5.h"
#endif
+/** Size of auxiliary stash. **/
#define STASH_SIZE 16
+/** Minimum value for lg_size. **/
+#define MIN_LG_SIZE 8
+
+/** Minimum value for pool.capacity. **/
+#define MIN_POOL_CAPACITY 256
+
/* 64-bit constant */
#if defined(WIN32)
#define UINT_64_CONST(X) ((uint64_t) (X))
+#define UINT_64_FORMAT "I64d"
#elif (defined(_LP64) || defined(__x86_64__) \
|| defined(__powerpc64__) || defined(__64BIT__))
#define UINT_64_CONST(X) ((uint64_t) X##UL)
+#define UINT_64_FORMAT "ld"
#else
#define UINT_64_CONST(X) ((uint64_t) X##ULL)
+#define UINT_64_FORMAT "lld"
#endif
/** Hashtable. **/
@@ -235,7 +245,7 @@ static void inthash_realloc_pool(inthash hashtable) {
const size_t hash_size = POW2(hashtable->lg_size);
char *const oldbase = hashtable->pool.buffer;
size_t i;
- int count = 0;
+ size_t count = 0;
/* statistics */
hashtable->stats.pool_realloc_count++;
@@ -244,9 +254,10 @@ static void inthash_realloc_pool(inthash hashtable) {
hashtable->pool.buffer = realloc(hashtable->pool.buffer,
hashtable->pool.capacity);
if (hashtable->pool.buffer == NULL) {
- fprintf(stderr,
- "** hashtable string pool allocation error: could not allocate %u bytes",
- (int) hashtable->pool.capacity);
+ inthash_log(hashtable,
+ "** hashtable string pool allocation error: could not allocate "
+ "%"UINT_64_FORMAT" bytes",
+ (uint64_t) hashtable->pool.capacity);
inthash_assert(! "hashtable string pool allocation error");
}
@@ -269,8 +280,9 @@ static void inthash_realloc_pool(inthash hashtable) {
#undef RECOMPUTE_STRING
- inthash_log(hashtable, "reallocated string pool for %d strings: %d bytes",
- count, (int) hashtable->pool.capacity);
+ inthash_log(hashtable, "reallocated string pool for "
+ "%"UINT_64_FORMAT" strings: %"UINT_64_FORMAT" bytes",
+ (uint64_t) count, (uint64_t) hashtable->pool.capacity);
}
/* compact string pool ; does not change the capacity */
@@ -279,7 +291,7 @@ static void inthash_compact_pool(inthash hashtable) {
size_t i;
char* old_pool = hashtable->pool.buffer;
const size_t old_size = hashtable->pool.size;
- int count = 0;
+ size_t count = 0;
/* statistics */
hashtable->stats.pool_compact_count++;
@@ -289,9 +301,10 @@ static void inthash_compact_pool(inthash hashtable) {
hashtable->pool.size = 0;
hashtable->pool.used = 0;
if (hashtable->pool.buffer == NULL) {
- fprintf(stderr,
- "** hashtable string pool compaction error: could not allocate %u bytes",
- (int) hashtable->pool.capacity);
+ inthash_log(hashtable,
+ "** hashtable string pool compaction error: could not allocate "
+ "%"UINT_64_FORMAT" bytes",
+ (uint64_t) hashtable->pool.capacity);
inthash_assert(! "hashtable string pool compaction error");
}
@@ -326,8 +339,10 @@ static void inthash_compact_pool(inthash hashtable) {
free(old_pool);
inthash_log(hashtable,
- "compacted string pool for %d strings: %d bytes => %d bytes",
- count, (int) old_size, (int) hashtable->pool.size);
+ "compacted string pool for %"UINT_64_FORMAT" strings: "
+ "%"UINT_64_FORMAT" bytes => %"UINT_64_FORMAT" bytes",
+ (uint64_t) count, (uint64_t) old_size,
+ (uint64_t) hashtable->pool.size);
}
static char* inthash_dup_name(inthash hashtable, const char *name) {
@@ -336,7 +351,7 @@ static char* inthash_dup_name(inthash hashtable, const char *name) {
if (hashtable->pool.capacity - hashtable->pool.size < len) {
if (hashtable->pool.capacity == 0) {
- hashtable->pool.capacity = 16;
+ hashtable->pool.capacity = MIN_POOL_CAPACITY;
}
for( ; hashtable->pool.capacity - hashtable->pool.size < len
; hashtable->pool.capacity <<= 1) ;
@@ -492,8 +507,9 @@ static int inthash_write_value_(inthash hashtable, const char *name,
/* prepare cuckoo ; let's take position 1 */
else {
cuckoo_hash = initial_cuckoo_hash = hashes.hash1;
- inthash_log(hashtable, "debug:collision with '%s' at %d (%x)",
- item.name, pos, cuckoo_hash);
+ inthash_log(hashtable,
+ "debug:collision with '%s' at %"UINT_64_FORMAT" (%x)",
+ item.name, (uint64_t) pos, cuckoo_hash);
}
}
@@ -501,8 +517,9 @@ static int inthash_write_value_(inthash hashtable, const char *name,
for(loops = POW2(hashtable->lg_size) ; loops != 0 ; --loops) {
const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash);
- inthash_log(hashtable, "\tdebug:placing cuckoo '%s' at %d (%x)",
- item.name, pos, cuckoo_hash);
+ inthash_log(hashtable,
+ "\tdebug:placing cuckoo '%s' at %"UINT_64_FORMAT" (%x)",
+ item.name, (uint64_t) pos, cuckoo_hash);
/* place at alternate free position ? */
if (inthash_is_free(hashtable, pos)) {
@@ -547,7 +564,7 @@ static int inthash_write_value_(inthash hashtable, const char *name,
}
}
- /* emergency stashing */
+ /* emergency stashing for the rare cases of collisions */
if (hashtable->stash.size < STASH_SIZE) {
hashtable->stash.items[hashtable->stash.size] = item;
hashtable->stash.size++;
@@ -603,9 +620,10 @@ int inthash_write_value(inthash hashtable, const char *name,
hashtable->items =
(inthash_item *) realloc(hashtable->items, alloc_size);
if (hashtable->items == NULL) {
- fprintf(stderr,
- "** hashtable allocation error: could not allocate %u bytes",
- (int) alloc_size);
+ inthash_log(hashtable,
+ "** hashtable allocation error: "
+ "could not allocate %"UINT_64_FORMAT" bytes",
+ (uint64_t) alloc_size);
inthash_assert(! "hashtable allocation error");
}
@@ -643,8 +661,9 @@ int inthash_write_value(inthash hashtable, const char *name,
}
}
- inthash_log(hashtable, "expanded hashtable to %d elements",
- (int) POW2(hashtable->lg_size));
+ inthash_log(hashtable,
+ "expanded hashtable to %"UINT_64_FORMAT" elements",
+ (uint64_t) POW2(hashtable->lg_size));
/* attempt to merge the stash if present */
if (hashtable->stash.size != 0) {
@@ -673,11 +692,12 @@ int inthash_write_value(inthash hashtable, const char *name,
/* logging */
inthash_assert(hashtable->stash.size <= old_size);
if (hashtable->stash.size < old_size) {
- inthash_log(hashtable, "reduced stash size from %d to %d",
- (int) old_size, (int) hashtable->stash.size);
+ inthash_log(hashtable, "reduced stash size from %"UINT_64_FORMAT" "
+ "to %"UINT_64_FORMAT,
+ (uint64_t) old_size, (uint64_t) hashtable->stash.size);
} else {
- inthash_log(hashtable, "stash has still %d elements",
- (int) hashtable->stash.size);
+ inthash_log(hashtable, "stash has still %"UINT_64_FORMAT" elements",
+ (uint64_t) hashtable->stash.size);
}
}
@@ -869,7 +889,7 @@ inthash inthash_new(size_t initial_size) {
if (hashtable) {
size_t size;
- for(size = 8 ; POW2(size) < initial_size ; size++) ;
+ for(size = MIN_LG_SIZE ; POW2(size) < initial_size ; size++) ;
if ((hashtable->items =
(inthash_item *) calloc(POW2(size), sizeof(inthash_item)))) {
hashtable->lg_size = size;
@@ -917,23 +937,43 @@ size_t inthash_nitems(inthash hashtable) {
return 0;
}
+size_t inthash_memory_size(inthash hashtable) {
+ const size_t size_struct = sizeof(struct_inthash);
+ const size_t hash_size = POW2(hashtable->lg_size)*sizeof(inthash_item);
+ const size_t pool_size = hashtable->pool.capacity*sizeof(char);
+ return size_struct + hash_size + pool_size;
+}
+
+static void inthash_log_stats(inthash hashtable) {
+ inthash_log(hashtable, "freeing table ; "
+ "writes=%"UINT_64_FORMAT" "
+ "(new=%"UINT_64_FORMAT") "
+ "moved=%"UINT_64_FORMAT " "
+ "stashed=%"UINT_64_FORMAT" "
+ "max-stash-size=%"UINT_64_FORMAT" "
+ "avg-moved=%g "
+ "rehash=%"UINT_64_FORMAT" "
+ "pool-compact=%"UINT_64_FORMAT" "
+ "pool-realloc=%"UINT_64_FORMAT" "
+ "memory=%"UINT_64_FORMAT,
+ (uint64_t) hashtable->stats.write_count,
+ (uint64_t) hashtable->stats.add_count,
+ (uint64_t) hashtable->stats.cuckoo_moved,
+ (uint64_t) hashtable->stats.stash_added,
+ (uint64_t) hashtable->stats.max_stash_size,
+ (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count,
+ (uint64_t) hashtable->stats.rehash_count,
+ (uint64_t) hashtable->stats.pool_compact_count,
+ (uint64_t) hashtable->stats.pool_realloc_count,
+ (uint64_t) inthash_memory_size(hashtable)
+ );
+}
+
void inthash_delete(inthash *phashtable) {
if (phashtable != NULL) {
inthash hashtable = *phashtable;
if (hashtable != NULL) {
- inthash_log(hashtable, "freeing table ; "
- "writes=%d (new=%d) moved=%d stashed=%d collision-room=%d "
- "avg-moved=%g rehash=%d pool-compact=%d pool-realloc=%d",
- (int) hashtable->stats.write_count,
- (int) hashtable->stats.add_count,
- (int) hashtable->stats.cuckoo_moved,
- (int) hashtable->stats.stash_added,
- (int) hashtable->stats.max_stash_size,
- (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count,
- (int) hashtable->stats.rehash_count,
- (int) hashtable->stats.pool_compact_count,
- (int) hashtable->stats.pool_realloc_count
- );
+ inthash_log_stats(hashtable);
if (hashtable->items != NULL) {
/* we need to delete values */
const size_t hash_size = POW2(hashtable->lg_size);
diff --git a/src/htsinthash.h b/src/htsinthash.h
index b448840..90f8401 100644
--- a/src/htsinthash.h
+++ b/src/htsinthash.h
@@ -85,10 +85,10 @@ typedef union inthash_value {
typedef struct inthash_item inthash_item;
#endif
-/* Hash key (32-bit) */
+/** Hash key (32-bit) **/
typedef uint32_t inthash_key;
-/* Pair of hashes */
+/** Pair of hashes **/
typedef struct inthash_keys {
inthash_key hash1;
inthash_key hash2;
@@ -109,10 +109,10 @@ struct inthash_item {
inthash_keys hashes;
};
-/* Alias for legacy code */
+/** Alias for legacy code. **/
typedef inthash_item inthash_chain;
-/* Free handler */
+/** Free handler **/
typedef void (*t_inthash_freehandler) (void *value);
/** Hashtable (opaque structure). **/
@@ -136,42 +136,135 @@ struct struct_inthash_enum {
/* Library internal definictions */
#ifdef HTS_INTERNAL_BYTECODE
-/* Hash functions: */
-inthash inthash_new(size_t size); /* Create a new hash table */
-int inthash_created(inthash hashtable); /* Test if the hash table was successfully created */
-size_t inthash_nitems(inthash hashtable); /* Number of items */
-void inthash_delete(inthash * hashtable); /* Delete an hash table */
-void inthash_value_is_malloc(inthash hashtable, int flag); /* Is the 'value' member a value that needs to be free()'ed ? */
-void inthash_value_set_free_handler(inthash hashtable, /* value free() handler (default one is 'free') */
+/**
+ * Create a new hashtable, with initial bucket size of 'size'.
+ * If size is 0, use the default minimal bucket size.
+ * Return a non-NULL pointer upon success.
+ **/
+inthash inthash_new(size_t size);
+
+/**
+ * Was the hashtable successfully created ?
+ * Return non-zero value if the hashtable is valid.
+ **/
+int inthash_created(inthash hashtable);
+
+/**
+ * Delete a hashtable, freeing all entries.
+ **/
+void inthash_delete(inthash * hashtable);
+
+/**
+ * Return the number of items in the hashtable.
+ **/
+size_t inthash_nitems(inthash hashtable);
+
+/**
+ * Return the memory size taken by the hashtable.
+ * (This does not take account of the possible memory taken by values)
+ **/
+size_t inthash_memory_size(inthash hashtable);
+
+/**
+ * Are the values inside this hashtable to be free'd ?
+ **/
+void inthash_value_is_malloc(inthash hashtable, int flag);
+
+/**
+ * Set a free handler for values. This handler will be called when a value
+ * is to be removed from the hashtable.
+ **/
+void inthash_value_set_free_handler(inthash hashtable,
t_inthash_freehandler free_handler);
-/* */
-int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue); /* Read entry from the hash table */
-int inthash_readptr(inthash hashtable, const char *name, intptr_t * intvalue); /* Same function, but returns 0 upon null ptr */
-int inthash_exists(inthash hashtable, const char *name); /* Is the key existing ? */
+/**
+ * Read an integer entry from the hashtable.
+ * Return non-zero value upon success and sets intvalue.
+ **/
+int inthash_read(inthash hashtable, const char *name, intptr_t * intvalue);
-/* */
+/**
+ * Same as inthash_read(), but return 0 is the value was zero.
+ **/
+int inthash_readptr(inthash hashtable, const char *name, intptr_t * intvalue);
+
+/**
+ * Return non-zero value if the given entry exists.
+ **/
+int inthash_exists(inthash hashtable, const char *name);
+
+/**
+ * Read an entry from the hashtable.
+ * Return non-zero value upon success and sets value.
+ **/
int inthash_read_value(inthash hashtable, const char *name,
inthash_value * value);
+
+/**
+ * Write an entry to the hashtable.
+ * Return non-zero value if the entry was added, zero if it was replaced.
+ **/
int inthash_write_value(inthash hashtable, const char *name,
inthash_value value);
-/* */
+/**
+ * Read a pointer entry from the hashtable.
+ * Return non-zero value upon success and sets value.
+ **/
int inthash_read_pvoid(inthash hashtable, const char *name, void **value);
+
+/**
+ * Write a pointer entry to the hashtable.
+ * Return non-zero value if the entry was added, zero if it was replaced.
+ **/
int inthash_write_pvoid(inthash hashtable, const char *name, void *value);
+
+/**
+ * Alias to inthash_write_pvoid()
+ **/
void inthash_add_pvoid(inthash hashtable, const char *name, void *value);
-/* */
-void inthash_add(inthash hashtable, const char *name, intptr_t value); /* Add entry in the hash table */
-int inthash_write(inthash hashtable, const char *name, intptr_t value); /* Overwrite/add entry in the hash table */
-int inthash_inc(inthash hashtable, const char *name); /* Increment entry in the hash table */
-int inthash_dec(inthash hashtable, const char *name); /* Decrement entry in the hash table */
-int inthash_remove(inthash hashtable, const char *name); /* Remove an entry from the hashtable */
+/**
+ * Write an integer entry to the hashtable.
+ * Return non-zero value if the entry was added, zero if it was replaced.
+ **/
+int inthash_write(inthash hashtable, const char *name, intptr_t value);
+
+/**
+ * Alias to inthash_write()
+ **/
+void inthash_add(inthash hashtable, const char *name, intptr_t value);
+
+/**
+ * Increment an entry value in the hashtable
+ * (or create a new entry with value 1 if it does not yet exist)
+ * Return non-zero value if the entry was added, zero if it was changed.
+ **/
+int inthash_inc(inthash hashtable, const char *name);
+
+/**
+ * Decrement an entry value in the hashtable
+ * (or create a new entry with value -1 if it does not yet exist)
+ * Return non-zero value if the entry was added, zero if it was changed.
+ **/
+int inthash_dec(inthash hashtable, const char *name);
-/* */
-struct_inthash_enum inthash_enum_new(inthash hashtable); /* Start a new enumerator */
-inthash_item *inthash_enum_next(struct_inthash_enum * e); /* Fetch an item in the enumerator */
+/**
+ * Remove an entry from the hashtable
+ * Return non-zero value if the entry was removed, zero otherwise.
+ **/
+int inthash_remove(inthash hashtable, const char *name);
+
+/**
+ * Return a new enumerator.
+ * Note: you may not add or delete entries while enumerating.
+ **/
+struct_inthash_enum inthash_enum_new(inthash hashtable);
+
+/**
+ * Enumerate the next entry.
+ **/
+inthash_item *inthash_enum_next(struct_inthash_enum * e);
-/* End of hash functions: */
#endif
#endif