summaryrefslogtreecommitdiff
path: root/src/htsinthash.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/htsinthash.c')
-rw-r--r--src/htsinthash.c89
1 files changed, 69 insertions, 20 deletions
diff --git a/src/htsinthash.c b/src/htsinthash.c
index e8a72d3..d82a6d7 100644
--- a/src/htsinthash.c
+++ b/src/htsinthash.c
@@ -43,6 +43,12 @@ Please visit our Website: http://www.httrack.com
#include "htsinthash.h"
+#define USES_MD5
+
+#ifdef USES_MD5
+#include "htsmd5.h"
+#endif
+
/** Hashtable. **/
struct struct_inthash {
inthash_item *items;
@@ -77,7 +83,7 @@ static void inthash_fail(const char* exp, const char* file, int line) {
#define HASH_ADD(HASH, C) do { \
(HASH) *= HASH_PRIME; \
(HASH) += HASH_CONST; \
- (HASH) += (C); \
+ (HASH) ^= ( (C) << 24); \
} while(0)
/* 2**X */
@@ -95,7 +101,31 @@ static void inthash_log(inthash hashtable,
va_end(args);
}
+#ifndef USES_MD5
+static unsigned int rev32(unsigned int v) {
+ /* swap odd and even bits */
+ v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+ /* swap consecutive pairs */
+ v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+ /* swap nibbles ... */
+ v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+ /* swap bytes */
+ v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+ /* swap 2-byte long pairs */
+ v = ( v >> 16 ) | ( v << 16);
+ return v;
+}
+#endif
+
static inthash_keys inthash_calc_hashes(const char *value) {
+#ifdef USES_MD5
+ union {
+ char md5digest[16];
+ inthash_keys hashes;
+ } u;
+ domd5mem(value, strlen(value), u.md5digest, 0);
+ return u.hashes;
+#else
/* compute two lcg hashes using different seeds */
inthash_keys hash = { 0, ~0 };
size_t i;
@@ -109,7 +139,12 @@ static inthash_keys inthash_calc_hashes(const char *value) {
if (hash.hash1 == hash.hash2) {
hash.hash2 = ~hash.hash2;
}
+ /* reverse bits: "higher-order bits have longer periods than the lower
+ order bits" (Wikipedia) */
+ hash.hash1 ^= rev32(hash.hash1);
+ hash.hash2 ^= rev32(hash.hash2);
return hash;
+#endif
}
static HTS_INLINE int inthash_is_free(const inthash hashtable, size_t pos) {
@@ -167,7 +202,7 @@ static void inthash_realloc_pool(inthash hashtable) {
#undef RECOMPUTE_STRING
- inthash_log(hashtable, "reallocated string pool for %d strings: %d bytes\n",
+ inthash_log(hashtable, "reallocated string pool for %d strings: %d bytes",
count, (int) hashtable->string_pool_capacity);
}
@@ -257,7 +292,6 @@ static void inthash_free_name(inthash hashtable, char *name) {
static HTS_INLINE size_t inthash_hash_to_pos_(size_t hash_size_power,
inthash_key hash) {
const inthash_key mask = POW2(hash_size_power) - 1;
- /* TODO: investigate if lower bits of LCG are sane */
return hash & mask;
}
@@ -385,6 +419,8 @@ static int inthash_write_value_(inthash hashtable, const char *name,
/* prepare cuckoo ; let's take position 1 */
else {
cuckoo_hash = initial_cuckoo_hash = hashes.hash1;
+ inthash_log(hashtable, "debug:collision with '%s' at %d (%x)",
+ item.name, pos, cuckoo_hash);
}
}
@@ -392,8 +428,12 @@ static int inthash_write_value_(inthash hashtable, const char *name,
for(loops = POW2(hashtable->hash_size_power) ; loops != 0 ; --loops) {
const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash);
+ inthash_log(hashtable, "\tdebug:placing cuckoo '%s' at %d (%x)",
+ item.name, pos, cuckoo_hash);
+
/* place at alternate free position ? */
if (inthash_is_free(hashtable, pos)) {
+ inthash_log(hashtable, "debug:free position");
hashtable->items[pos] = item;
return 1; /* added */
}
@@ -408,11 +448,13 @@ static int inthash_write_value_(inthash hashtable, const char *name,
/* we just kicked this item from its position 1 */
if (pos == inthash_hash_to_pos(hashtable, item.hashes.hash1)) {
/* then place it on position 2 on next run */
+ inthash_log(hashtable, "\tdebug:position 1");
cuckoo_hash = item.hashes.hash2;
}
/* we just kicked this item from its position 2 */
else if (pos == inthash_hash_to_pos(hashtable, item.hashes.hash2)) {
/* then place it on position 1 on next run */
+ inthash_log(hashtable, "\tdebug:position 2");
cuckoo_hash = item.hashes.hash1;
}
else {
@@ -421,7 +463,7 @@ static int inthash_write_value_(inthash hashtable, const char *name,
/* we are looping (back to same hash) */
/* TODO FIXME: we should actually check the positions */
- if (cuckoo_hash == initial_cuckoo_hash) {
+ if (cuckoo_hash == initial_cuckoo_hash && 0) {
/* emergency stash */
break;
}
@@ -433,7 +475,8 @@ static int inthash_write_value_(inthash hashtable, const char *name,
hashtable->stash[hashtable->stash_size] = item;
hashtable->stash_size++;
inthash_log(hashtable, "used stash because of collision (%d entries)",
- (int) hashtable->stash_size++);
+ (int) hashtable->stash_size);
+ inthash_log(hashtable, "debug:stash position");
return 1; /* added */
} else {
/* we are doomed. hopefully the probability is lower than being killed
@@ -483,18 +526,16 @@ int inthash_write_value(inthash hashtable, const char *name,
/* clear upper half */
data = (char*) hashtable->items;
- memset(data + prev_size, 0, prev_size);
+ memset(data + prev_alloc_size, 0, prev_alloc_size);
/* relocate lower half items when needed */
for(i = 0 ; i < prev_size ; i++) {
if (!inthash_is_free(hashtable, i)) {
- const inthash_keys hashes = hashtable->items[i].hashes;
- size_t pos;
+ const inthash_keys *const hashes = &hashtable->items[i].hashes;
/* currently at old position 1 */
- pos = inthash_hash_to_pos_(prev_power, hashes.hash1);
- if (pos == i) {
- const size_t pos = inthash_hash_to_pos(hashtable, hashes.hash1);
+ if (inthash_hash_to_pos_(prev_power, hashes->hash1) == i) {
+ const size_t pos = inthash_hash_to_pos(hashtable, hashes->hash1);
/* no more the expected position */
if (pos != i) {
inthash_assert(pos >= prev_size);
@@ -502,8 +543,8 @@ int inthash_write_value(inthash hashtable, const char *name,
memset(&hashtable->items[i], 0, sizeof(hashtable->items[i]));
}
}
- else if (inthash_hash_to_pos_(prev_power, hashes.hash2) == i) {
- const size_t pos = inthash_hash_to_pos(hashtable, hashes.hash2);
+ else if (inthash_hash_to_pos_(prev_power, hashes->hash2) == i) {
+ const size_t pos = inthash_hash_to_pos(hashtable, hashes->hash2);
/* no more the expected position */
if (pos != i) {
inthash_assert(pos >= prev_size);
@@ -512,7 +553,7 @@ int inthash_write_value(inthash hashtable, const char *name,
}
}
else {
- inthash_assert(! "hashtable unexpected internal error");
+ inthash_assert(! "hashtable unexpected internal error (bad position)");
}
}
}
@@ -531,7 +572,7 @@ int inthash_write_value(inthash hashtable, const char *name,
hashtable->stash_size = 0;
/* insert all items */
- for(i = 0 ; i < hashtable->stash_size ; i++) {
+ for(i = 0 ; i < old_size ; i++) {
const int ret = inthash_write_value_(hashtable, stash[i].name,
stash[i].value);
if (ret == 0) {
@@ -539,7 +580,7 @@ int inthash_write_value(inthash hashtable, const char *name,
}
}
- /* delete old names */
+ /* delete old names (not values) */
for(i = 0 ; i < hashtable->stash_size ; i++) {
inthash_del_name(hashtable, &stash[i]);
}
@@ -678,6 +719,8 @@ static int inthash_remove_(inthash hashtable, const char *name,
}
hashtable->stash_size--;
*removed = (size_t) -1;
+ inthash_log(hashtable, "debug:deleted item in stash (%d entries)",
+ (int) hashtable->stash_size);
return 1;
}
}
@@ -715,6 +758,9 @@ int inthash_remove(inthash hashtable, const char *name) {
hashtable->stash[i] = hashtable->stash[i + 1];
}
hashtable->stash_size--;
+ inthash_log(hashtable, "debug:moved item from stash (%d entries)",
+ (int) hashtable->stash_size);
+ break;
}
}
}
@@ -782,22 +828,25 @@ void inthash_delete(inthash *phashtable) {
const size_t hash_size = POW2(hashtable->hash_size_power);
size_t i;
- /* wipe hashtable */
+ /* wipe hashtable values (not names) */
for(i = 0 ; i < hash_size ; i++) {
if (!inthash_is_free(hashtable, i)) {
- inthash_del_item(hashtable, &hashtable->items[i]);
+ inthash_del_value(hashtable, i);
}
}
- /* wipe auxiliary stash if any */
+ /* wipe auxiliary stash values (not names) if any */
for(i = 0 ; i < hashtable->stash_size ; i++) {
- inthash_del_item(hashtable, &hashtable->stash[i]);
+ inthash_del_value_(hashtable, &hashtable->stash[i].value);
}
}
/* wipe top-level */
hashtable->hash_size_power = 0;
hashtable->nitems = 0;
+ free(hashtable->string_pool);
+ hashtable->string_pool = NULL;
free(hashtable->items);
+ hashtable->items = NULL;
free(hashtable);
*phashtable = NULL;
}