summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-06-15 10:24:06 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-06-15 10:24:06 +0000
commit8e72bb5deb74a68e7a909d97d7d395af9c33204d (patch)
treec97a10a736718ee490b76d038e832b0b070420e2 /src
parent2e6a99ce09cea47a9890a30c8180b823967e9a46 (diff)
Optional 64-bit hash for really big hashtables. (disabled by default)
Diffstat (limited to 'src')
-rw-r--r--src/coucal.c22
-rw-r--r--src/coucal.h29
2 files changed, 48 insertions, 3 deletions
diff --git a/src/coucal.c b/src/coucal.c
index 9b24b1a..2efe862 100644
--- a/src/coucal.c
+++ b/src/coucal.c
@@ -359,7 +359,9 @@ coucal_hashkeys coucal_hash_data(const void *data_, size_t size) {
MD5_CTX ctx;
union {
unsigned char md5digest[16];
+#if (COUCAL_HASH_SIZE == 32)
coucal_hashkeys mhashes[2];
+#endif
coucal_hashkeys hashes;
} u;
@@ -368,9 +370,11 @@ coucal_hashkeys coucal_hash_data(const void *data_, size_t size) {
MD5Update(&ctx, data, (unsigned int) size);
MD5Final(u.md5digest, &ctx);
+#if (COUCAL_HASH_SIZE == 32)
/* mix mix mix */
u.mhashes[0].hash1 ^= u.mhashes[1].hash1;
u.mhashes[0].hash2 ^= u.mhashes[1].hash2;
+#endif
/* do not keep identical hashes */
if (u.hashes.hash1 == u.hashes.hash2) {
@@ -383,12 +387,13 @@ coucal_hashkeys coucal_hash_data(const void *data_, size_t size) {
uint32_t result[4];
coucal_hashkeys hashes;
} u;
- MurmurHash3_x86_128(data, (const int) size,
- 42, &u.result) ;
+ MurmurHash3_x86_128(data, (const int) size, 42, &u.result);
+#if (COUCAL_HASH_SIZE == 32)
/* mix mix mix */
u.result[0] ^= u.result[2];
u.result[1] ^= u.result[3];
+#endif
/* do not keep identical hashes */
if (u.hashes.hash1 == u.hashes.hash2) {
@@ -417,9 +422,17 @@ coucal_hashkeys coucal_hash_data(const void *data_, size_t size) {
h2 = ( h2 * FNV1_PRIME ) ^ c2;
}
+#if (COUCAL_HASH_SIZE == 32)
/* XOR-folding to improve diffusion (Wikipedia) */
hashes.hash1 = ( (uint32_t) h1 ^ (uint32_t) ( h1 >> 32 ) );
hashes.hash2 = ( (uint32_t) h2 ^ (uint32_t) ( h2 >> 32 ) );
+#elif (COUCAL_HASH_SIZE == 64)
+ /* Direct hashes */
+ hashes.hash1 = h1;
+ hashes.hash2 = h2;
+#else
+#error "Unsupported COUCAL_HASH_SIZE"
+#endif
#undef FNV1_PRIME
#undef FNV1_OFFSET_BASIS
@@ -1025,6 +1038,7 @@ int coucal_write_value(coucal hashtable, coucal_key_const name,
hashtable->stats.rehash_count++;
/* realloc */
+ coucal_assert(hashtable, hashtable->lg_size < COUCAL_HASH_SIZE);
hashtable->lg_size++;
hashtable->items =
(coucal_item *) realloc(hashtable->items, alloc_size);
@@ -1417,6 +1431,10 @@ size_t coucal_memory_size(coucal hashtable) {
return size_struct + hash_size + pool_size;
}
+size_t coucal_hash_size() {
+ return COUCAL_HASH_SIZE;
+}
+
void coucal_delete(coucal *phashtable) {
if (phashtable != NULL) {
coucal hashtable = *phashtable;
diff --git a/src/coucal.h b/src/coucal.h
index 56814c8..5fb03e4 100644
--- a/src/coucal.h
+++ b/src/coucal.h
@@ -52,6 +52,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* the keys (pool).
* If the string pool is being used (to store dup'ed keys), deleting is only
* O(1) average, because the pool needs to be compacted time to time.
+ * Currently the default maximum number of elements in the hashtable is
+ * 2**31 - 1 (that is, 2,147,483,648 elements), but this default can be changed
+ * by setting COUCAL_HASH_SIZE to a higher value (64 is the only higher value
+ * currently supported), and rebuilding the library.
*
* References:
*
@@ -132,9 +136,26 @@ typedef const coucal_value coucal_value_const;
typedef struct coucal_item coucal_item;
#endif
-/** Hash key (32-bit) **/
+/** Coucal primary hash size. Default is 32-bit. **/
+#ifndef COUCAL_HASH_SIZE
+#define COUCAL_HASH_SIZE 32
+#endif
+
+/** Hash integer type. **/
+#if (COUCAL_HASH_SIZE == 32)
+
+/** 32-bit hash key. **/
typedef uint32_t coucal_hashkey;
+#elif (COUCAL_HASH_SIZE == 64)
+
+/** 64-bit hash key. **/
+typedef uint64_t coucal_hashkey;
+
+#else
+#error "Unsupported COUCAL_HASH_SIZE"
+#endif
+
/** Pair of hashes **/
typedef struct coucal_hashkeys {
coucal_hashkey hash1;
@@ -262,6 +283,12 @@ COUCAL_EXTERN size_t coucal_nitems(coucal hashtable);
COUCAL_EXTERN size_t coucal_memory_size(coucal hashtable);
/**
+ * Return the library hash size compiled.
+ * The returned value MUST match COUCAL_HASH_SIZE.
+ **/
+COUCAL_EXTERN size_t coucal_hash_size(void);
+
+/**
* If 'flag' is non-zero, calls coucal_value_set_value_handler() with
* default system free() handler function, otherwise, free the value handlers.
**/