From e0fe304f0b19a20c6eacca5ceda6312aa203fc91 Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Sat, 14 Jun 2014 09:50:17 +0000 Subject: Preparing to export the cuckoo hashtable library as "coucal" project --- src/Makefile.am | 6 +- src/Makefile.in | 48 +- src/coucal.c | 1476 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/coucal.h | 407 +++++++++++++++ src/htscore.h | 2 +- src/htshash.c | 2 +- src/htsindex.c | 2 +- src/htsinthash.c | 1476 ------------------------------------------------------ src/htsinthash.h | 407 --------------- src/htsserver.c | 2 +- src/htsweb.c | 2 +- src/htswrap.c | 2 +- src/htswrap.h | 2 +- 13 files changed, 1917 insertions(+), 1917 deletions(-) create mode 100644 src/coucal.c create mode 100644 src/coucal.h delete mode 100644 src/htsinthash.c delete mode 100644 src/htsinthash.h diff --git a/src/Makefile.am b/src/Makefile.am index f242109..476d137 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -37,14 +37,14 @@ lib_LTLIBRARIES = libhttrack.la libhtsjava.la htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h proxytrack_SOURCES = proxy/main.c \ proxy/proxytrack.c proxy/store.c \ - htsinthash.c htsmd5.c md5.c \ + coucal.c htsmd5.c md5.c \ minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c whttrackrundir = $(bindir) whttrackrun_SCRIPTS = webhttrack libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ - htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ + htscatchurl.c htsfilters.c htsftp.c htshash.c coucal.c \ htshelp.c htslib.c htscoremain.c \ htsname.c htsrobots.c htstools.c htswizard.c \ htsalias.c htsthread.c htsindex.c htsbauth.c \ @@ -55,7 +55,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ - htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ + htsfilters.h htsftp.h htsglobal.h htshash.h coucal.h \ htshelp.h htsindex.h htslib.h htsmd5.h \ htsmodules.h htsname.h htsnet.h \ htsopt.h htsrobots.h htsthread.h \ diff --git a/src/Makefile.in b/src/Makefile.in index 74188ab..8f1860b 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -115,7 +115,7 @@ am_libhttrack_la_OBJECTS = libhttrack_la-htscore.lo \ libhttrack_la-htsparse.lo libhttrack_la-htsback.lo \ libhttrack_la-htscache.lo libhttrack_la-htscatchurl.lo \ libhttrack_la-htsfilters.lo libhttrack_la-htsftp.lo \ - libhttrack_la-htshash.lo libhttrack_la-htsinthash.lo \ + libhttrack_la-htshash.lo libhttrack_la-coucal.lo \ libhttrack_la-htshelp.lo libhttrack_la-htslib.lo \ libhttrack_la-htscoremain.lo libhttrack_la-htsname.lo \ libhttrack_la-htsrobots.lo libhttrack_la-htstools.lo \ @@ -143,9 +143,9 @@ httrack_OBJECTS = httrack.$(OBJEXT) httrack_DEPENDENCIES = $(am__DEPENDENCIES_1) libhttrack.la am_proxytrack_OBJECTS = proxy/proxytrack-main.$(OBJEXT) \ proxy/proxytrack-proxytrack.$(OBJEXT) \ - proxy/proxytrack-store.$(OBJEXT) \ - proxytrack-htsinthash.$(OBJEXT) proxytrack-htsmd5.$(OBJEXT) \ - proxytrack-md5.$(OBJEXT) minizip/proxytrack-ioapi.$(OBJEXT) \ + proxy/proxytrack-store.$(OBJEXT) proxytrack-coucal.$(OBJEXT) \ + proxytrack-htsmd5.$(OBJEXT) proxytrack-md5.$(OBJEXT) \ + minizip/proxytrack-ioapi.$(OBJEXT) \ minizip/proxytrack-mztools.$(OBJEXT) \ minizip/proxytrack-unzip.$(OBJEXT) \ minizip/proxytrack-zip.$(OBJEXT) @@ -348,13 +348,13 @@ lib_LTLIBRARIES = libhttrack.la libhtsjava.la htsserver_SOURCES = htsserver.c htsserver.h htsweb.c htsweb.h proxytrack_SOURCES = proxy/main.c \ proxy/proxytrack.c proxy/store.c \ - htsinthash.c htsmd5.c md5.c \ + coucal.c htsmd5.c md5.c \ minizip/ioapi.c minizip/mztools.c minizip/unzip.c minizip/zip.c whttrackrundir = $(bindir) whttrackrun_SCRIPTS = webhttrack libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ - htscatchurl.c htsfilters.c htsftp.c htshash.c htsinthash.c \ + htscatchurl.c htsfilters.c htsftp.c htshash.c coucal.c \ htshelp.c htslib.c htscoremain.c \ htsname.c htsrobots.c htstools.c htswizard.c \ htsalias.c htsthread.c htsindex.c htsbauth.c \ @@ -365,7 +365,7 @@ libhttrack_la_SOURCES = htscore.c htsparse.c htsback.c htscache.c \ hts-indextmpl.h htsalias.h htsback.h htsbase.h htssafe.h \ htsbasenet.h htsbauth.h htscache.h htscatchurl.h \ htsconfig.h htscore.h htsparse.h htscoremain.h htsdefines.h \ - htsfilters.h htsftp.h htsglobal.h htshash.h htsinthash.h \ + htsfilters.h htsftp.h htsglobal.h htshash.h coucal.h \ htshelp.h htsindex.h htslib.h htsmd5.h \ htsmodules.h htsname.h htsnet.h \ htsopt.h htsrobots.h htsthread.h \ @@ -625,6 +625,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsserver.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htsweb.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/httrack.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-coucal.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsalias.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsback.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsbauth.Plo@am__quote@ @@ -640,7 +641,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htshash.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htshelp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsindex.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsinthash.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htslib.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsmd5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htsmodules.Plo@am__quote@ @@ -654,7 +654,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-htszlib.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-md5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhttrack_la-punycode.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proxytrack-htsinthash.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proxytrack-coucal.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proxytrack-htsmd5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proxytrack-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@minizip/$(DEPDIR)/libhttrack_la-ioapi.Plo@am__quote@ @@ -749,12 +749,12 @@ libhttrack_la-htshash.lo: htshash.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libhttrack_la_CFLAGS) $(CFLAGS) -c -o libhttrack_la-htshash.lo `test -f 'htshash.c' || echo '$(srcdir)/'`htshash.c -libhttrack_la-htsinthash.lo: htsinthash.c -@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libhttrack_la_CFLAGS) $(CFLAGS) -MT libhttrack_la-htsinthash.lo -MD -MP -MF $(DEPDIR)/libhttrack_la-htsinthash.Tpo -c -o libhttrack_la-htsinthash.lo `test -f 'htsinthash.c' || echo '$(srcdir)/'`htsinthash.c -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libhttrack_la-htsinthash.Tpo $(DEPDIR)/libhttrack_la-htsinthash.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='htsinthash.c' object='libhttrack_la-htsinthash.lo' libtool=yes @AMDEPBACKSLASH@ +libhttrack_la-coucal.lo: coucal.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libhttrack_la_CFLAGS) $(CFLAGS) -MT libhttrack_la-coucal.lo -MD -MP -MF $(DEPDIR)/libhttrack_la-coucal.Tpo -c -o libhttrack_la-coucal.lo `test -f 'coucal.c' || echo '$(srcdir)/'`coucal.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libhttrack_la-coucal.Tpo $(DEPDIR)/libhttrack_la-coucal.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='coucal.c' object='libhttrack_la-coucal.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libhttrack_la_CFLAGS) $(CFLAGS) -c -o libhttrack_la-htsinthash.lo `test -f 'htsinthash.c' || echo '$(srcdir)/'`htsinthash.c +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libhttrack_la_CFLAGS) $(CFLAGS) -c -o libhttrack_la-coucal.lo `test -f 'coucal.c' || echo '$(srcdir)/'`coucal.c libhttrack_la-htshelp.lo: htshelp.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libhttrack_la_CFLAGS) $(CFLAGS) -MT libhttrack_la-htshelp.lo -MD -MP -MF $(DEPDIR)/libhttrack_la-htshelp.Tpo -c -o libhttrack_la-htshelp.lo `test -f 'htshelp.c' || echo '$(srcdir)/'`htshelp.c @@ -966,19 +966,19 @@ proxy/proxytrack-store.obj: proxy/store.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -c -o proxy/proxytrack-store.obj `if test -f 'proxy/store.c'; then $(CYGPATH_W) 'proxy/store.c'; else $(CYGPATH_W) '$(srcdir)/proxy/store.c'; fi` -proxytrack-htsinthash.o: htsinthash.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -MT proxytrack-htsinthash.o -MD -MP -MF $(DEPDIR)/proxytrack-htsinthash.Tpo -c -o proxytrack-htsinthash.o `test -f 'htsinthash.c' || echo '$(srcdir)/'`htsinthash.c -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/proxytrack-htsinthash.Tpo $(DEPDIR)/proxytrack-htsinthash.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='htsinthash.c' object='proxytrack-htsinthash.o' libtool=no @AMDEPBACKSLASH@ +proxytrack-coucal.o: coucal.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -MT proxytrack-coucal.o -MD -MP -MF $(DEPDIR)/proxytrack-coucal.Tpo -c -o proxytrack-coucal.o `test -f 'coucal.c' || echo '$(srcdir)/'`coucal.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/proxytrack-coucal.Tpo $(DEPDIR)/proxytrack-coucal.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='coucal.c' object='proxytrack-coucal.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -c -o proxytrack-htsinthash.o `test -f 'htsinthash.c' || echo '$(srcdir)/'`htsinthash.c +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -c -o proxytrack-coucal.o `test -f 'coucal.c' || echo '$(srcdir)/'`coucal.c -proxytrack-htsinthash.obj: htsinthash.c -@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -MT proxytrack-htsinthash.obj -MD -MP -MF $(DEPDIR)/proxytrack-htsinthash.Tpo -c -o proxytrack-htsinthash.obj `if test -f 'htsinthash.c'; then $(CYGPATH_W) 'htsinthash.c'; else $(CYGPATH_W) '$(srcdir)/htsinthash.c'; fi` -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/proxytrack-htsinthash.Tpo $(DEPDIR)/proxytrack-htsinthash.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='htsinthash.c' object='proxytrack-htsinthash.obj' libtool=no @AMDEPBACKSLASH@ +proxytrack-coucal.obj: coucal.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -MT proxytrack-coucal.obj -MD -MP -MF $(DEPDIR)/proxytrack-coucal.Tpo -c -o proxytrack-coucal.obj `if test -f 'coucal.c'; then $(CYGPATH_W) 'coucal.c'; else $(CYGPATH_W) '$(srcdir)/coucal.c'; fi` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/proxytrack-coucal.Tpo $(DEPDIR)/proxytrack-coucal.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='coucal.c' object='proxytrack-coucal.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -c -o proxytrack-htsinthash.obj `if test -f 'htsinthash.c'; then $(CYGPATH_W) 'htsinthash.c'; else $(CYGPATH_W) '$(srcdir)/htsinthash.c'; fi` +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -c -o proxytrack-coucal.obj `if test -f 'coucal.c'; then $(CYGPATH_W) 'coucal.c'; else $(CYGPATH_W) '$(srcdir)/coucal.c'; fi` proxytrack-htsmd5.o: htsmd5.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(proxytrack_CFLAGS) $(CFLAGS) -MT proxytrack-htsmd5.o -MD -MP -MF $(DEPDIR)/proxytrack-htsmd5.Tpo -c -o proxytrack-htsmd5.o `test -f 'htsmd5.c' || echo '$(srcdir)/'`htsmd5.c diff --git a/src/coucal.c b/src/coucal.c new file mode 100644 index 0000000..593bf94 --- /dev/null +++ b/src/coucal.c @@ -0,0 +1,1476 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) 1998-2014 Xavier Roche and other contributors + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: hash tables */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +/* Internal engine bytecode */ +#define HTS_INTERNAL_BYTECODE + +#include +#include +#include +#include +#include + +#include "htsinthash.h" + +/* We use md5 as hashing function for its quality regarding diffusion and + collisions. MD5 is slower than other hashing functions, but is known to be + an excellent hashing function. FNV-1 is generally good enough for this + purpose, too, but the performance gain is not sufficient to use it by + default. + + On several benchmarks, both MD5 and FNV were quite good (0.45 cuckoo moved + on average for each new item inserted in the hashtable), but FNV-1 was more + prone to mutual collisions (creating cycles requiring stash handling), and + was causing the stash area to be more filled than the MD5 variant. + + Simpler hashing functions, such as rolling hashes (LCG) were also tested, + but with collision rate and diffusion were terrible. + + [ On a 10M key tests, both variants acheived 0.45 cuckoo/add ration, + but the FNV-1 variant collided 11 times with a maximum stash area + filled with 4 entries ; whereas the MD5 variant did only collide + once ] +*/ +#if (!defined(HTS_INTHASH_USES_MD5) && !defined(HTS_INTHASH_USES_MURMUR)) +#define HTS_INTHASH_USES_MD5 1 +#endif + +#if HTS_INTHASH_USES_MD5 == 1 +#include "md5.h" +#elif (defined(HTS_INTHASH_USES_MURMUR)) +#include "murmurhash3.h" +#else +/* use the Openssl implementation */ +#if 0 +#include +#define MD5Init MD5_Init +#define MD5Update MD5_Update +#define MD5Final MD5_Final +#endif +#endif + +/** Size of auxiliary stash. **/ +#define STASH_SIZE 16 + +/** Minimum value for lg_size. **/ +#define MIN_LG_SIZE 4 + +/** Minimum value for pool.capacity. **/ +#define MIN_POOL_CAPACITY 256 + +/* 64-bit constant */ +#if defined(WIN32) +#define UINT_64_CONST(X) ((uint64_t) (X)) +#define UINT_64_FORMAT "I64d" +#elif (defined(_LP64) || defined(__x86_64__) \ + || defined(__powerpc64__) || defined(__64BIT__)) +#define UINT_64_CONST(X) ((uint64_t) X##UL) +#define UINT_64_FORMAT "ld" +#else +#define UINT_64_CONST(X) ((uint64_t) X##ULL) +#define UINT_64_FORMAT "lld" +#endif + +/** Hashtable. **/ +struct struct_inthash { + /** Hashtable items. **/ + inthash_item *items; + + /** Log-2 of the hashtable size. **/ + size_t lg_size; + + /** Number of used items (<= POW2(lg_size)). **/ + size_t used; + + /** Stash area for collisions. **/ + struct { + /** Stash items. **/ + inthash_item items[STASH_SIZE]; + + /** Stash size (<= STASH_SIZE). **/ + size_t size; + } stash; + + /** String pool. **/ + struct { + /** String buffer. **/ + char *buffer; + /** Buffer used size (high watermark). **/ + size_t size; + /** Buffer capacity. **/ + size_t capacity; + /** Used chars (== size if compacted). **/ + size_t used; + } pool; + + /** Statistics **/ + struct { + /** Highest stash.size seen. **/ + size_t max_stash_size; + /** Number of writes. **/ + size_t write_count; + /** Number of writes causing an add. **/ + size_t add_count; + /** Number of cuckoo moved during adds. **/ + size_t cuckoo_moved; + /** Number of items added to stash. **/ + size_t stash_added; + /** Number of hashtable rehash/expand operations. **/ + size_t rehash_count; + /** Number of pool compact operations. **/ + size_t pool_compact_count; + /** Number of pool realloc operations. **/ + size_t pool_realloc_count; + } stats; + + /** Settings. **/ + struct { + /** How to handle values (might be NULL). **/ + struct { + /** free() **/ + t_inthash_value_freehandler free; + /** opaque argument **/ + inthash_opaque arg; + } value; + + /** How to handle names (might be NULL). **/ + struct { + /** strdup() **/ + t_inthash_duphandler dup; + /** free() **/ + t_inthash_key_freehandler free; + /** hash **/ + t_inthash_hasheshandler hash; + /** comparison **/ + t_inthash_cmphandler equals; + /** opaque argument **/ + inthash_opaque arg; + } key; + + /** How to handle fatal assertions (might be NULL). **/ + struct { + /** logging **/ + t_inthash_loghandler log; + /** abort() **/ + t_inthash_asserthandler fatal; + /** opaque argument **/ + inthash_opaque arg; + /** hashtable name for logging **/ + inthash_key_const name; + } error; + + /** How to handle pretty-print (debug) (might be NULL). **/ + struct { + /** key print() **/ + t_inthash_printkeyhandler key; + /** value print() **/ + t_inthash_printvaluehandler value; + /** opaque argument **/ + inthash_opaque arg; + } print; + } custom; +}; + +/* Assertion check. */ +#define inthash_assert(HASHTABLE, EXP) \ + (void)( (EXP) || (inthash_assert_failed(HASHTABLE, #EXP, __FILE__, __LINE__), 0) ) + +/* Compiler-specific. */ +#ifdef __GNUC__ +#define INTHASH_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg))) +#define INTHASH_INLINE __inline__ +#elif (defined(_MSC_VER)) +#define INTHASH_PRINTF_FUN(FMT, ARGS) +#define INTHASH_INLINE __inline +#else +#define INTHASH_PRINTF_FUN(FMT, ARGS) +#define INTHASH_INLINE +#endif + +/* Logging level. */ +static void inthash_log(const inthash hashtable, inthash_loglevel level, + const char *format, va_list args); +#define DECLARE_LOG_FUNCTION(NAME, LEVEL) \ +static void NAME(const inthash hashtable, const char *format, ...) \ + INTHASH_PRINTF_FUN(2, 3); \ +static void NAME(const inthash hashtable, const char *format, ...) { \ + va_list args; \ + va_start(args, format); \ + inthash_log(hashtable, LEVEL, format, args); \ + va_end(args); \ +} +#if 0 +/* Verbose. */ +DECLARE_LOG_FUNCTION(inthash_crit, inthash_log_critical) +DECLARE_LOG_FUNCTION(inthash_warning, inthash_log_warning) +DECLARE_LOG_FUNCTION(inthash_info, inthash_log_info) +DECLARE_LOG_FUNCTION(inthash_debug, inthash_log_debug) +DECLARE_LOG_FUNCTION(inthash_trace, inthash_log_trace) +#elif 0 +/* Info. */ +DECLARE_LOG_FUNCTION(inthash_crit, inthash_log_critical) +DECLARE_LOG_FUNCTION(inthash_warning, inthash_log_warning) +DECLARE_LOG_FUNCTION(inthash_info, inthash_log_info) +#define inthash_debug inthash_log +#define inthash_trace inthash_nolog +#else +/* No logging except stats and critical. */ +DECLARE_LOG_FUNCTION(inthash_crit, inthash_log_critical) +DECLARE_LOG_FUNCTION(inthash_warning, inthash_log_warning) +DECLARE_LOG_FUNCTION(inthash_info, inthash_log_info) +#define inthash_debug inthash_nolog +#define inthash_trace inthash_nolog +#endif + +/* 2**X */ +#define POW2(X) ( (size_t) 1 << (X) ) + +/* the empty string for the string pool */ +static char the_empty_string[1] = { 0 }; + +/* global assertion handler */ +static t_inthash_asserthandler global_assert_handler = NULL; + +/* global assertion handler */ +static t_inthash_loghandler global_log_handler = NULL; + +/* default assertion handler, if neither hashtable one nor global one + were defined */ +static void inthash_fail(const char* exp, const char* file, int line) { + fprintf(stderr, "assertion '%s' failed at %s:%d\n", exp, file, line); + abort(); +} + +/* assert failed handler. */ +static void inthash_assert_failed(const inthash hashtable, const char* exp, const char* file, int line) { + const char *const name = inthash_get_name(hashtable); + inthash_crit(hashtable, "hashtable %s: %s failed at %s:%d", + name != NULL ? name : "", exp, file, line); + if (hashtable != NULL && hashtable->custom.error.fatal != NULL) { + hashtable->custom.error.fatal(hashtable->custom.error.arg, exp, file, line); + } else if (global_assert_handler != NULL) { + global_assert_handler(hashtable, exp, file, line); + } else { + inthash_fail(exp, file, line); + } + abort(); +} + +/* Logging */ +static void inthash_log(const inthash hashtable, inthash_loglevel level, + const char *format, va_list args) { + inthash_assert(hashtable, format != NULL); + if (hashtable != NULL && hashtable->custom.error.log != NULL) { + hashtable->custom.error.log(hashtable->custom.error.arg, level, format, args); + } else if (global_log_handler != NULL) { + global_log_handler(hashtable, level, format, args); + } else { + fprintf(stderr, "[%p] ", (void*) hashtable); + (void) vfprintf(stderr, format, args); + putc('\n', stderr); + } +} + +/* No logging (should be dropped by the compiler) */ +static void inthash_nolog(const inthash hashtable, const char *format, ...) + INTHASH_PRINTF_FUN(2, 3); +static void inthash_nolog(const inthash hashtable, const char *format, ...) { + (void) hashtable; + (void) format; +} + +const char* inthash_get_name(inthash hashtable) { + return hashtable->custom.error.name; +} + +static void inthash_log_stats(inthash hashtable) { + const char *const name = inthash_get_name(hashtable); + inthash_info(hashtable, "hashtable %s%s%ssummary: " + "size=%"UINT_64_FORMAT" (lg2=%"UINT_64_FORMAT") " + "used=%"UINT_64_FORMAT" " + "stash-size=%"UINT_64_FORMAT" " + "pool-size=%"UINT_64_FORMAT" " + "pool-capacity=%"UINT_64_FORMAT" " + "pool-used=%"UINT_64_FORMAT" " + "writes=%"UINT_64_FORMAT" " + "(new=%"UINT_64_FORMAT") " + "moved=%"UINT_64_FORMAT " " + "stashed=%"UINT_64_FORMAT" " + "max-stash-size=%"UINT_64_FORMAT" " + "avg-moved=%g " + "rehash=%"UINT_64_FORMAT" " + "pool-compact=%"UINT_64_FORMAT" " + "pool-realloc=%"UINT_64_FORMAT" " + "memory=%"UINT_64_FORMAT, + name != NULL ? "\"" : "", + name != NULL ? name : "", + name != NULL ? "\" " : "", + (uint64_t) POW2(hashtable->lg_size), + (uint64_t) hashtable->lg_size, + (uint64_t) hashtable->used, + (uint64_t) hashtable->stash.size, + (uint64_t) hashtable->pool.size, + (uint64_t) hashtable->pool.capacity, + (uint64_t) hashtable->pool.used, + (uint64_t) hashtable->stats.write_count, + (uint64_t) hashtable->stats.add_count, + (uint64_t) hashtable->stats.cuckoo_moved, + (uint64_t) hashtable->stats.stash_added, + (uint64_t) hashtable->stats.max_stash_size, + (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count, + (uint64_t) hashtable->stats.rehash_count, + (uint64_t) hashtable->stats.pool_compact_count, + (uint64_t) hashtable->stats.pool_realloc_count, + (uint64_t) inthash_memory_size(hashtable) + ); +} + +/* default hash function when key is a regular C-string */ +inthash_hashkeys inthash_hash_string(const char *name) { +#if HTS_INTHASH_USES_MD5 == 1 + /* compute a regular MD5 and extract two 32-bit integers */ + MD5_CTX ctx; + union { + unsigned char md5digest[16]; + inthash_hashkeys mhashes[2]; + inthash_hashkeys hashes; + } u; + + /* compute MD5 */ + MD5Init(&ctx, 0); + MD5Update(&ctx, (const unsigned char *) name, + (unsigned int) strlen(name)); + MD5Final(u.md5digest, &ctx); + + /* mix mix mix */ + u.mhashes[0].hash1 ^= u.mhashes[1].hash1; + u.mhashes[0].hash2 ^= u.mhashes[1].hash2; + + /* do not keep identical hashes */ + if (u.hashes.hash1 == u.hashes.hash2) { + u.hashes.hash2 = ~u.hashes.hash2; + } + + return u.hashes; +#elif (defined(HTS_INTHASH_USES_MURMUR)) + union { + uint32_t result[4]; + inthash_hashkeys hashes; + } u; + MurmurHash3_x86_128(name, (const int) strlen(name), + 42, &u.result) ; + + /* mix mix mix */ + u.result[0] ^= u.result[2]; + u.result[1] ^= u.result[3]; + + /* do not keep identical hashes */ + if (u.hashes.hash1 == u.hashes.hash2) { + u.hashes.hash2 = ~u.hashes.hash2; + } + + return u.hashes; +#else + /* compute two Fowler-Noll-Vo hashes (64-bit FNV-1 variant) ; + each 64-bit hash being XOR-folded into a single 32-bit hash. */ + size_t i; + inthash_hashkeys hashes; + uint64_t h1, h2; + + /* FNV-1, 64-bit. */ +#define FNV1_PRIME UINT_64_CONST(1099511628211) +#define FNV1_OFFSET_BASIS UINT_64_CONST(14695981039346656037) + + /* compute the hashes ; second variant is using xored data */ + h1 = FNV1_OFFSET_BASIS; + h2 = ~FNV1_OFFSET_BASIS; + for(i = 0 ; name[i] != '\0' ; i++) { + const unsigned char c1 = name[i]; + const unsigned char c2 = ~c1; + h1 = ( h1 * FNV1_PRIME ) ^ c1; + h2 = ( h2 * FNV1_PRIME ) ^ c2; + } + + /* XOR-folding to improve diffusion (Wikipedia) */ + hashes.hash1 = ( (uint32_t) h1 ^ (uint32_t) ( h1 >> 32 ) ); + hashes.hash2 = ( (uint32_t) h2 ^ (uint32_t) ( h2 >> 32 ) ); + +#undef FNV1_PRIME +#undef FNV1_OFFSET_BASIS + + /* do not keep identical hashes */ + if (hashes.hash1 == hashes.hash2) { + hashes.hash2 = ~hashes.hash2; + } + + return hashes; +#endif +} + +static INTHASH_INLINE inthash_hashkeys inthash_calc_hashes(inthash hashtable, + inthash_key_const value) { + return hashtable->custom.key.hash == NULL + ? inthash_hash_string(value) + : hashtable->custom.key.hash(hashtable->custom.key.arg, value); +} + +/* position 'pos' is free ? */ +static INTHASH_INLINE int inthash_is_free(const inthash hashtable, size_t pos) { + return hashtable->items[pos].name == NULL; +} + +/* compare two keys ; by default using strcmp() */ +static INTHASH_INLINE int inthash_equals(inthash hashtable, + inthash_key_const a, + inthash_key_const b) { + return hashtable->custom.key.equals == NULL + ? strcmp((const char*) a, (const char*) b) == 0 + : hashtable->custom.key.equals(hashtable->custom.key.arg, a, b); +} + +static INTHASH_INLINE int inthash_matches_(inthash hashtable, + const inthash_item *const item, + inthash_key_const name, + const inthash_hashkeys *hashes) { + return item->name != NULL + && item->hashes.hash1 == hashes->hash1 + && item->hashes.hash2 == hashes->hash2 + && inthash_equals(hashtable, item->name, name); +} + +static INTHASH_INLINE int inthash_matches(inthash hashtable, size_t pos, + inthash_key_const name, + const inthash_hashkeys *hashes) { + const inthash_item *const item = &hashtable->items[pos]; + return inthash_matches_(hashtable, item, name, hashes); +} + +/* compact string pool ; does not change the capacity */ +static void inthash_compact_pool(inthash hashtable, size_t capacity) { + const size_t hash_size = POW2(hashtable->lg_size); + size_t i; + char*const old_pool = hashtable->pool.buffer; + const size_t old_size = hashtable->pool.size; + size_t count = 0; + + /* we manage the string pool */ + inthash_assert(hashtable, hashtable->custom.key.dup == NULL); + + /* statistics */ + hashtable->stats.pool_compact_count++; + + /* change capacity now */ + if (hashtable->pool.capacity != capacity) { + hashtable->pool.capacity = capacity; + } + + /* realloc */ + hashtable->pool.buffer = malloc(hashtable->pool.capacity); + hashtable->pool.size = 0; + hashtable->pool.used = 0; + if (hashtable->pool.buffer == NULL) { + inthash_debug(hashtable, + "** hashtable string pool compaction error: could not allocate " + "%"UINT_64_FORMAT" bytes", + (uint64_t) hashtable->pool.capacity); + inthash_assert(hashtable, ! "hashtable string pool compaction error"); + } + + /* relocate a string on a different pool */ +#define RELOCATE_STRING(S) do { \ + if (S != NULL && S != the_empty_string) { \ + const char *const src = (S); \ + char *const dest = \ + &hashtable->pool.buffer[hashtable->pool.size]; \ + const size_t capacity = hashtable->pool.capacity; \ + char *const max_dest = \ + &hashtable->pool.buffer[capacity]; \ + /* copy string */ \ + inthash_assert(hashtable, dest < max_dest); \ + dest[0] = src[0]; \ + { \ + size_t i; \ + for(i = 1 ; src[i - 1] != '\0' ; i++) { \ + inthash_assert(hashtable, &dest[i] < max_dest); \ + dest[i] = src[i]; \ + } \ + /* update pool size */ \ + hashtable->pool.size += i; \ + inthash_assert(hashtable, \ + hashtable->pool.size <= capacity); \ + } \ + /* update source */ \ + S = dest; \ + count++; \ + } \ +} while(0) + + /* relocate */ + for(i = 0 ; i < hash_size ; i++) { + RELOCATE_STRING(hashtable->items[i].name); + } + for(i = 0 ; i < hashtable->stash.size ; i++) { + RELOCATE_STRING(hashtable->stash.items[i].name); + } + +#undef RELOCATE_STRING + + /* compacted (used chars == current size) */ + hashtable->pool.used = hashtable->pool.size; + + /* wipe previous pool */ + free(old_pool); + + inthash_debug(hashtable, + "compacted string pool for %"UINT_64_FORMAT" strings: " + "%"UINT_64_FORMAT" bytes => %"UINT_64_FORMAT" bytes", + (uint64_t) count, (uint64_t) old_size, + (uint64_t) hashtable->pool.size); +} + +/* realloc (expand) string pool ; does not change the compacity */ +static void inthash_realloc_pool(inthash hashtable, size_t capacity) { + const size_t hash_size = POW2(hashtable->lg_size); + char *const oldbase = hashtable->pool.buffer; + size_t i; + size_t count = 0; + + /* we manage the string pool */ + inthash_assert(hashtable, hashtable->custom.key.dup == NULL); + + /* compact instead ? */ + if (hashtable->pool.used < ( hashtable->pool.size*3 ) / 4) { + inthash_compact_pool(hashtable, capacity); + return ; + } + + /* statistics */ + hashtable->stats.pool_realloc_count++; + + /* change capacity now */ + hashtable->pool.capacity = capacity; + + /* realloc */ + hashtable->pool.buffer = realloc(hashtable->pool.buffer, + hashtable->pool.capacity); + if (hashtable->pool.buffer == NULL) { + inthash_crit(hashtable, + "** hashtable string pool allocation error: could not allocate " + "%"UINT_64_FORMAT" bytes", + (uint64_t) hashtable->pool.capacity); + inthash_assert(hashtable, ! "hashtable string pool allocation error"); + } + + /* recompute string address */ +#define RECOMPUTE_STRING(S) do { \ + if (S != NULL && S != the_empty_string) { \ + const size_t offset = (const char*) (S) - oldbase; \ + inthash_assert(hashtable, offset < hashtable->pool.capacity); \ + S = &hashtable->pool.buffer[offset]; \ + count++; \ + } \ +} while(0) + + /* recompute */ + for(i = 0 ; i < hash_size ; i++) { + RECOMPUTE_STRING(hashtable->items[i].name); + } + for(i = 0 ; i < hashtable->stash.size ; i++) { + RECOMPUTE_STRING(hashtable->stash.items[i].name); + } + +#undef RECOMPUTE_STRING + + inthash_debug(hashtable, "reallocated string pool for " + "%"UINT_64_FORMAT" strings: %"UINT_64_FORMAT" bytes", + (uint64_t) count, (uint64_t) hashtable->pool.capacity); +} + +static inthash_key inthash_dup_name_internal(inthash hashtable, + inthash_key_const name_) { + const char *const name = (const char*) name_; + const size_t len = strlen(name) + 1; + char *s; + + /* the pool does not allow empty strings for safety purpose ; handhe that + (keys are being emptied when free'd to detect duplicate free) */ + if (len == 1) { + inthash_assert(hashtable, the_empty_string[0] == '\0'); + return the_empty_string; + } + + /* expand pool capacity */ + inthash_assert(hashtable, hashtable->pool.size <= hashtable->pool.capacity); + if (hashtable->pool.capacity - hashtable->pool.size < len) { + size_t capacity; + for(capacity = MIN_POOL_CAPACITY ; capacity < hashtable->pool.size + len + ; capacity <<= 1) ; + inthash_assert(hashtable, hashtable->pool.size < capacity); + inthash_realloc_pool(hashtable, capacity); + } + + /* copy */ + inthash_assert(hashtable, len + hashtable->pool.size <= hashtable->pool.capacity); + s = &hashtable->pool.buffer[hashtable->pool.size]; + memcpy(s, name, len); + hashtable->pool.size += len; + hashtable->pool.used += len; + + return s; +} + +/* duplicate a key. default is to use the internal pool. */ +static INTHASH_INLINE inthash_key inthash_dup_name(inthash hashtable, + inthash_key_const name) { + return hashtable->custom.key.dup == NULL + ? inthash_dup_name_internal(hashtable, name) + : hashtable->custom.key.dup(hashtable->custom.key.arg, name); +} + +/* internal pool free handler. + note: pointer must have been kicked from the pool first */ +static void inthash_free_key_internal(inthash hashtable, inthash_key name_) { + char *const name = (char*) name_; + const size_t len = strlen(name) + 1; + + /* see inthash_dup_name_internal() handling */ + if (len == 1 && name == the_empty_string) { + inthash_assert(hashtable, the_empty_string[0] == '\0'); + return ; + } + + inthash_assert(hashtable, *name != '\0' || !"duplicate or bad string pool release"); + hashtable->pool.used -= len; + *name = '\0'; /* the string is now invalidated */ + + /* compact the pool is too many holes */ + if (hashtable->pool.used < hashtable->pool.size / 2) { + size_t capacity = hashtable->pool.capacity; + /* compact and shrink */ + if (hashtable->pool.used < capacity / 4) { + capacity /= 2; + } + inthash_assert(hashtable, hashtable->pool.used < capacity); + inthash_compact_pool(hashtable, capacity); + } +} + +/* free a key. default is to use the internal pool. + note: pointer must have been kicked from the pool first */ +static void inthash_free_key(inthash hashtable, inthash_key name) { + if (hashtable->custom.key.free == NULL) { + inthash_free_key_internal(hashtable, name); + } else { + hashtable->custom.key.free(hashtable->custom.key.arg, name); + } +} + +static INTHASH_INLINE size_t inthash_hash_to_pos_(size_t lg_size, + inthash_hashkey hash) { + const inthash_hashkey mask = POW2(lg_size) - 1; + return hash & mask; +} + +static INTHASH_INLINE size_t inthash_hash_to_pos(const inthash hashtable, + inthash_hashkey hash) { + return inthash_hash_to_pos_(hashtable->lg_size, hash); +} + +int inthash_read_pvoid(inthash hashtable, inthash_key_const name, void **pvalue) { + inthash_value value = INTHASH_VALUE_NULL; + const int ret = + inthash_read_value(hashtable, name, (pvalue != NULL) ? &value : NULL); + if (pvalue != NULL) + *pvalue = value.ptr; + return ret; +} + +int inthash_write_pvoid(inthash hashtable, inthash_key_const name, void *pvalue) { + inthash_value value = INTHASH_VALUE_NULL; + + value.ptr = pvalue; + return inthash_write_value(hashtable, name, value); +} + +void inthash_add_pvoid(inthash hashtable, inthash_key_const name, void *pvalue) { + inthash_value value = INTHASH_VALUE_NULL; + + value.ptr = pvalue; + inthash_write_value(hashtable, name, value); +} + +int inthash_write(inthash hashtable, inthash_key_const name, intptr_t intvalue) { + inthash_value value = INTHASH_VALUE_NULL; + + value.intg = intvalue; + return inthash_write_value(hashtable, name, value); +} + +static void inthash_default_free_handler(inthash_opaque arg, + inthash_value value) { + (void) arg; + if (value.ptr != NULL) + free(value.ptr); +} + +static void inthash_del_value_(inthash hashtable, inthash_value *pvalue) { + if (hashtable->custom.value.free != NULL) + hashtable->custom.value.free(hashtable->custom.value.arg, *pvalue); + pvalue->ptr = NULL; +} + +static void inthash_del_value(inthash hashtable, size_t pos) { + inthash_del_value_(hashtable, &hashtable->items[pos].value); +} + +static void inthash_del_name(inthash hashtable, inthash_item *item) { + const inthash_hashkeys nullHash = INTHASH_KEYS_NULL; + char *const name = (char*) item->name; + item->name = NULL; /* there must be no reference remaining */ + item->hashes = nullHash; + /* free after detach (we may compact the pool) */ + inthash_free_key(hashtable, name); +} + +static void inthash_del_item(inthash hashtable, inthash_item *pitem) { + inthash_del_value_(hashtable, &pitem->value); + inthash_del_name(hashtable, pitem); +} + +static int inthash_add_item_(inthash hashtable, inthash_item item); + +/* Write (add or replace) a value in the hashtable. */ +static int inthash_write_value_(inthash hashtable, inthash_key_const name, + inthash_value value) { + inthash_item item; + size_t pos; + const inthash_hashkeys hashes = inthash_calc_hashes(hashtable, name); + + /* Statistics */ + hashtable->stats.write_count++; + + /* replace at position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash1); + if (inthash_matches(hashtable, pos, name, &hashes)) { + inthash_del_value(hashtable, pos); + hashtable->items[pos].value = value; + return 0; /* replaced */ + } + + /* replace at position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash2); + if (inthash_matches(hashtable, pos, name, &hashes)) { + inthash_del_value(hashtable, pos); + hashtable->items[pos].value = value; + return 0; /* replaced */ + } + + /* replace in the stash ? */ + if (hashtable->stash.size != 0) { + size_t i; + for(i = 0 ; i < hashtable->stash.size ; i++) { + if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, + &hashes)) { + inthash_del_value_(hashtable, &hashtable->stash.items[i].value); + hashtable->stash.items[i].value = value; + return 0; /* replaced */ + } + } + } + + /* Statistics */ + hashtable->stats.add_count++; + + /* otherwise we need to create a new item */ + item.name = inthash_dup_name(hashtable, name); + item.value = value; + item.hashes = hashes; + + return inthash_add_item_(hashtable, item); +} + +/* Return the string representation of a key */ +static const char* inthash_print_key(inthash hashtable, + inthash_key_const name) { + return hashtable->custom.print.key != NULL + ? hashtable->custom.print.key(hashtable->custom.print.arg, name) + : (const char*) name; +} + +/* Add a new item in the hashtable. The item SHALL NOT be alreasy present. */ +static int inthash_add_item_(inthash hashtable, inthash_item item) { + inthash_hashkey cuckoo_hash, initial_cuckoo_hash; + size_t loops; + size_t pos; + + /* place at free position 1 ? */ + pos = inthash_hash_to_pos(hashtable, item.hashes.hash1); + if (inthash_is_free(hashtable, pos)) { + hashtable->items[pos] = item; + return 1; /* added */ + } else { + /* place at free position 2 ? */ + pos = inthash_hash_to_pos(hashtable, item.hashes.hash2); + if (inthash_is_free(hashtable, pos)) { + hashtable->items[pos] = item; + return 1; /* added */ + } + /* prepare cuckoo ; let's take position 1 */ + else { + cuckoo_hash = initial_cuckoo_hash = item.hashes.hash1; + inthash_trace(hashtable, + "debug:collision with '%s' at %"UINT_64_FORMAT" (%x)", + inthash_print_key(hashtable, item.name), + (uint64_t) pos, cuckoo_hash); + } + } + + /* put 'item' in place with hash 'cuckoo_hash' */ + for(loops = POW2(hashtable->lg_size) ; loops != 0 ; --loops) { + const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash); + + inthash_trace(hashtable, + "\tdebug:placing cuckoo '%s' at %"UINT_64_FORMAT" (%x)", + inthash_print_key(hashtable, item.name), + (uint64_t) pos, cuckoo_hash); + + /* place at alternate free position ? */ + if (inthash_is_free(hashtable, pos)) { + inthash_trace(hashtable, "debug:free position"); + hashtable->items[pos] = item; + return 1; /* added */ + } + /* then cuckoo's place it is */ + else { + /* replace */ + const inthash_item backup_item = hashtable->items[pos]; + hashtable->items[pos] = item; + + /* statistics */ + hashtable->stats.cuckoo_moved++; + + /* take care of new lost item */ + item = backup_item; + + /* we just kicked this item from its position 1 */ + if (pos == inthash_hash_to_pos(hashtable, item.hashes.hash1)) { + /* then place it on position 2 on next run */ + inthash_trace(hashtable, "\tdebug:position 1"); + cuckoo_hash = item.hashes.hash2; + } + /* we just kicked this item from its position 2 */ + else if (pos == inthash_hash_to_pos(hashtable, item.hashes.hash2)) { + /* then place it on position 1 on next run */ + inthash_trace(hashtable, "\tdebug:position 2"); + cuckoo_hash = item.hashes.hash1; + } + else { + inthash_assert(hashtable, ! "hashtable internal error: unexpected position"); + } + + /* we are looping (back to same hash) */ + /* TODO FIXME: we should actually check the positions */ + if (cuckoo_hash == initial_cuckoo_hash) { + /* emergency stash */ + break; + } + } + } + + /* emergency stashing for the rare cases of collisions */ + if (hashtable->stash.size < STASH_SIZE) { + hashtable->stash.items[hashtable->stash.size] = item; + hashtable->stash.size++; + /* for statistics */ + hashtable->stats.stash_added++; + if (hashtable->stash.size > hashtable->stats.max_stash_size) { + hashtable->stats.max_stash_size = hashtable->stash.size; + } + inthash_debug(hashtable, "used stash because of collision (%d entries)", + (int) hashtable->stash.size); + return 1; /* added */ + } else { + /* debugging */ + if (hashtable->custom.print.key != NULL + && hashtable->custom.print.value != NULL) { + size_t i; + for(i = 0 ; i < hashtable->stash.size ; i++) { + inthash_item *const item = &hashtable->stash.items[i]; + const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1); + const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2); + inthash_crit(hashtable, + "stash[%u]: key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x", + (int) i, + hashtable->custom.print.key(hashtable->custom.print.arg, item->name), + hashtable->custom.print.value(hashtable->custom.print.arg, item->value), + (int) pos1, (int) pos2, + item->hashes.hash1, item->hashes.hash2); + if (!inthash_is_free(hashtable, pos1)) { + inthash_item *const item = &hashtable->items[pos1]; + const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1); + const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2); + inthash_crit(hashtable, + "\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x", + hashtable->custom.print.key(hashtable->custom.print.arg, item->name), + hashtable->custom.print.value(hashtable->custom.print.arg, item->value), + (int) pos1, (int) pos2, + item->hashes.hash1, item->hashes.hash2); + } else { + inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos1); + } + if (!inthash_is_free(hashtable, pos2)) { + inthash_item *const item = &hashtable->items[pos2]; + const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1); + const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2); + inthash_crit(hashtable, + "\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x", + hashtable->custom.print.key(hashtable->custom.print.arg, item->name), + hashtable->custom.print.value(hashtable->custom.print.arg, item->value), + (int) pos1, (int) pos2, + item->hashes.hash1, item->hashes.hash2); + } else { + inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos2); + } + } + //struct_inthash_enum e = inthash_enum_new(hashtable); + //while((item = inthash_enum_next(&e)) != NULL) { + // inthash_crit(hashtable, "element key='%s' value='%s' hash1=%04x hash2=%04x", + // hashtable->custom.print.key(hashtable->custom.print.arg, item->name), + // hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr), + // item->hashes.hash1, item->hashes.hash2); + //} + } + + /* we are doomed. hopefully the probability is lower than being killed + by a wandering radioactive monkey */ + inthash_log_stats(hashtable); + inthash_assert(hashtable, ! "hashtable internal error: cuckoo/stash collision"); + + /* not reachable code */ + return -1; + } +} + +int inthash_write_value(inthash hashtable, inthash_key_const name, + inthash_value_const value) { + /* replace of add item */ + const int ret = inthash_write_value_(hashtable, name, value); + + /* added ? */ + if (ret) { + /* size of half of the table */ + const size_t half_size = POW2(hashtable->lg_size - 1); + + /* size of half of the stash */ + const size_t half_stash_size = STASH_SIZE / 2; + + /* item was added: increase count */ + hashtable->used++; + + /* table is more than half-full, or stash is more than half-full */ + if (hashtable->used >= half_size + || hashtable->stash.size >= half_stash_size) { + size_t i; + + /* size before */ + const size_t prev_power = hashtable->lg_size; + const size_t prev_size = half_size * 2; + const size_t prev_alloc_size = prev_size*sizeof(inthash_item); + + /* size after doubling it */ + const size_t alloc_size = prev_alloc_size * 2; + + /* log stash issues */ + if (hashtable->stash.size >= half_stash_size + && half_size > POW2(16) + && hashtable->used < half_size / 4) { + inthash_warning(hashtable, + "stash size still full despite %"UINT_64_FORMAT + " elements used out of %"UINT_64_FORMAT, + (uint64_t) hashtable->used, (uint64_t) half_size*2); + } + + /* statistics */ + hashtable->stats.rehash_count++; + + /* realloc */ + hashtable->lg_size++; + hashtable->items = + (inthash_item *) realloc(hashtable->items, alloc_size); + if (hashtable->items == NULL) { + inthash_crit(hashtable, + "** hashtable allocation error: " + "could not allocate %"UINT_64_FORMAT" bytes", + (uint64_t) alloc_size); + inthash_assert(hashtable, ! "hashtable allocation error"); + } + + /* clear upper half */ + memset(&hashtable->items[prev_size], 0, prev_alloc_size); + + /* relocate lower half items when needed */ + for(i = 0 ; i < prev_size ; i++) { + if (!inthash_is_free(hashtable, i)) { + const inthash_hashkeys *const hashes = &hashtable->items[i].hashes; + + /* currently at old position 1 */ + if (inthash_hash_to_pos_(prev_power, hashes->hash1) == i) { + const size_t pos = inthash_hash_to_pos(hashtable, hashes->hash1); + /* no more the expected position */ + if (pos != i) { + inthash_assert(hashtable, pos >= prev_size); + hashtable->items[pos] = hashtable->items[i]; + memset(&hashtable->items[i], 0, sizeof(hashtable->items[i])); + } + } + else if (inthash_hash_to_pos_(prev_power, hashes->hash2) == i) { + const size_t pos = inthash_hash_to_pos(hashtable, hashes->hash2); + /* no more the expected position */ + if (pos != i) { + inthash_assert(hashtable, pos >= prev_size); + hashtable->items[pos] = hashtable->items[i]; + memset(&hashtable->items[i], 0, sizeof(hashtable->items[i])); + } + } + else { + inthash_assert(hashtable, ! "hashtable unexpected internal error (bad position)"); + } + } + } + + inthash_debug(hashtable, + "expanded hashtable to %"UINT_64_FORMAT" elements", + (uint64_t) POW2(hashtable->lg_size)); + + /* attempt to merge the stash if present */ + if (hashtable->stash.size != 0) { + const size_t old_size = hashtable->stash.size; + size_t i; + + /* backup stash and reset it */ + inthash_item stash[STASH_SIZE]; + memcpy(&stash, hashtable->stash.items, sizeof(hashtable->stash.items)); + hashtable->stash.size = 0; + + /* insert all items */ + for(i = 0 ; i < old_size ; i++) { + const int ret = inthash_add_item_(hashtable, stash[i]); + if (ret == 0) { + inthash_assert(hashtable, ! "hashtable duplicate key when merging the stash"); + } + } + + /* logging */ + inthash_assert(hashtable, hashtable->stash.size <= old_size); + if (hashtable->stash.size < old_size) { + inthash_debug(hashtable, "reduced stash size from %"UINT_64_FORMAT" " + "to %"UINT_64_FORMAT, + (uint64_t) old_size, (uint64_t) hashtable->stash.size); + } else { + inthash_trace(hashtable, "stash has still %"UINT_64_FORMAT" elements", + (uint64_t) hashtable->stash.size); + } + } + + } + } + + return ret; +} + +void inthash_add(inthash hashtable, inthash_key_const name, intptr_t intvalue) { + inthash_value value = INTHASH_VALUE_NULL; + + memset(&value, 0, sizeof(value)); + value.intg = intvalue; + inthash_write_value(hashtable, name, value); +} + +int inthash_read(inthash hashtable, inthash_key_const name, intptr_t * intvalue) { + inthash_value value = INTHASH_VALUE_NULL; + int ret = + inthash_read_value(hashtable, name, (intvalue != NULL) ? &value : NULL); + if (intvalue != NULL) + *intvalue = value.intg; + return ret; +} + +static inthash_value* inthash_read_value_(inthash hashtable, + inthash_key_const name) { + const inthash_hashkeys hashes = inthash_calc_hashes(hashtable, name); + size_t pos; + + /* found at position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash1); + if (inthash_matches(hashtable, pos, name, &hashes)) { + return &hashtable->items[pos].value; + } + + /* found at position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes.hash2); + if (inthash_matches(hashtable, pos, name, &hashes)) { + return &hashtable->items[pos].value; + } + + /* find in stash ? */ + if (hashtable->stash.size != 0) { + size_t i; + for(i = 0 ; i < hashtable->stash.size ; i++) { + if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, + &hashes)) { + return &hashtable->stash.items[i].value; + } + } + } + + /* not found */ + return NULL; +} + +int inthash_read_value(inthash hashtable, inthash_key_const name, + inthash_value * pvalue) { + inthash_value* const value = inthash_read_value_(hashtable, name); + if (value != NULL) { + if (pvalue != NULL) { + *pvalue = *value; + } + return 1; + } + return 0; +} + +static size_t inthash_inc_(inthash hashtable, inthash_key_const name, + size_t inc) { + inthash_value* const value = inthash_read_value_(hashtable, name); + if (value != NULL) { + value->uintg += inc; + return value->uintg; + } else { + /* create a new value */ + const int ret = inthash_write(hashtable, name, inc); + inthash_assert(hashtable, ret); + return inc; + } +} + +int inthash_inc(inthash hashtable, inthash_key_const name) { + return (int) inthash_inc_(hashtable, name, 1); +} + +int inthash_dec(inthash hashtable, inthash_key_const name) { + return (int) inthash_inc_(hashtable, name, (size_t) -1); +} + +int inthash_exists(inthash hashtable, inthash_key_const name) { + return inthash_read_value(hashtable, name, NULL); +} + +static int inthash_remove_(inthash hashtable, inthash_key_const name, + const inthash_hashkeys *hashes, size_t *removed) { + size_t pos; + + /* found at position 1 ? */ + pos = inthash_hash_to_pos(hashtable, hashes->hash1); + if (inthash_matches(hashtable, pos, name, hashes)) { + inthash_del_item(hashtable, &hashtable->items[pos]); + *removed = pos; + return 1; + } + + /* found at position 2 ? */ + pos = inthash_hash_to_pos(hashtable, hashes->hash2); + if (inthash_matches(hashtable, pos, name, hashes)) { + inthash_del_item(hashtable, &hashtable->items[pos]); + *removed = pos; + return 1; + } + + /* find in stash ? */ + if (hashtable->stash.size != 0) { + size_t i; + for(i = 0 ; i < hashtable->stash.size ; i++) { + if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, + hashes)) { + inthash_del_item(hashtable, &hashtable->stash.items[i]); + for( ; i + 1 < hashtable->stash.size ; i++) { + hashtable->stash.items[i] = hashtable->stash.items[i + 1]; + } + hashtable->stash.size--; + *removed = (size_t) -1; + inthash_debug(hashtable, "debug:deleted item in stash (%d entries)", + (int) hashtable->stash.size); + return 1; + } + } + } + + /* not found */ + return 0; +} + +int inthash_remove(inthash hashtable, inthash_key_const name) { + const inthash_hashkeys hashes = inthash_calc_hashes(hashtable, name); + size_t removed; + const int ret = inthash_remove_(hashtable, name, &hashes, &removed); + + if (ret) { + /* item was removed: decrease count */ + inthash_assert(hashtable, hashtable->used != 0); + hashtable->used--; + + /* can we place stash entry back to the table ? */ + if (hashtable->stash.size != 0 && removed != (size_t) -1) { + size_t i; + for(i = 0 ; i < hashtable->stash.size ; i++) { + const size_t pos1 = + inthash_hash_to_pos(hashtable, hashtable->stash.items[i].hashes.hash1); + const size_t pos2 = + inthash_hash_to_pos(hashtable, hashtable->stash.items[i].hashes.hash2); + if (pos1 == removed || pos2 == removed) { + if (pos1 == removed) { + hashtable->items[pos1] = hashtable->stash.items[i]; + } else if (pos2 == removed) { + hashtable->items[pos2] = hashtable->stash.items[i]; + } + for( ; i + 1 < hashtable->stash.size ; i++) { + hashtable->stash.items[i] = hashtable->stash.items[i + 1]; + } + hashtable->stash.size--; + inthash_debug(hashtable, "debug:moved item from stash (%d entries)", + (int) hashtable->stash.size); + break; + } + } + } + } + + return ret; +} + +int inthash_readptr(inthash hashtable, inthash_key_const name, intptr_t * value) { + int ret; + + *value = 0; + ret = inthash_read(hashtable, name, value); + if (*value == 0) + ret = 0; + return ret; +} + +inthash inthash_new(size_t initial_size) { + inthash hashtable = (inthash) calloc(1, sizeof(struct_inthash)); + + if (hashtable) { + size_t size; + for(size = MIN_LG_SIZE ; POW2(size) < initial_size ; size++) ; + if ((hashtable->items = + (inthash_item *) calloc(POW2(size), sizeof(inthash_item)))) { + hashtable->lg_size = size; + } + hashtable->used = 0; + hashtable->stash.size = 0; + hashtable->pool.buffer = NULL; + hashtable->pool.size = 0; + hashtable->pool.capacity = 0; + hashtable->pool.used = 0; + hashtable->stats.max_stash_size = 0; + hashtable->stats.write_count = 0; + hashtable->stats.add_count = 0; + hashtable->stats.cuckoo_moved = 0; + hashtable->stats.stash_added= 0; + hashtable->stats.pool_compact_count = 0; + hashtable->stats.pool_realloc_count = 0; + hashtable->stats.rehash_count = 0; + hashtable->custom.value.free = NULL; + hashtable->custom.value.arg = NULL; + hashtable->custom.key.dup = NULL; + hashtable->custom.key.free = NULL; + hashtable->custom.key.hash = NULL; + hashtable->custom.key.equals = NULL; + hashtable->custom.key.arg = NULL; + hashtable->custom.error.log = NULL; + hashtable->custom.error.fatal = NULL; + hashtable->custom.error.name = NULL; + hashtable->custom.error.arg = NULL; + hashtable->custom.print.key = NULL; + hashtable->custom.print.value = NULL; + hashtable->custom.print.arg = NULL; + } + return hashtable; +} + +int inthash_created(inthash hashtable) { + return hashtable != NULL && hashtable->items != NULL; +} + +void inthash_value_is_malloc(inthash hashtable, int flag) { + if (flag) { + if (hashtable->custom.value.free == NULL) { + hashtable->custom.value.free = inthash_default_free_handler; + hashtable->custom.value.arg = NULL; + } + } else { + hashtable->custom.value.free = NULL; + hashtable->custom.value.arg = NULL; + } +} + +void inthash_set_name(inthash hashtable, inthash_key_const name) { + hashtable->custom.error.name = name; +} + +void inthash_value_set_value_handler(inthash hashtable, + t_inthash_value_freehandler free, + inthash_opaque arg) { + hashtable->custom.value.free = free; + hashtable->custom.value.arg = arg; +} + +void inthash_value_set_key_handler(inthash hashtable, + t_inthash_duphandler dup, + t_inthash_key_freehandler free, + t_inthash_hasheshandler hash, + t_inthash_cmphandler equals, + inthash_opaque arg) { + /* dup and free must be consistent */ + inthash_assert(hashtable, ( dup == NULL ) == ( free == NULL ) ); + hashtable->custom.key.dup = dup; + hashtable->custom.key.free = free; + hashtable->custom.key.hash = hash; + hashtable->custom.key.equals = equals; + hashtable->custom.key.arg = arg; +} + +void inthash_set_assert_handler(inthash hashtable, + t_inthash_loghandler log, + t_inthash_asserthandler fatal, + inthash_opaque arg) { + hashtable->custom.error.log = log; + hashtable->custom.error.fatal = fatal; + hashtable->custom.error.arg = arg; +} + +void inthash_set_print_handler(inthash hashtable, + t_inthash_printkeyhandler key, + t_inthash_printvaluehandler value, + inthash_opaque arg) { + hashtable->custom.print.key = key; + hashtable->custom.print.value = value; + hashtable->custom.print.arg = arg; +} + +size_t inthash_nitems(inthash hashtable) { + if (hashtable != NULL) + return hashtable->used; + return 0; +} + +size_t inthash_memory_size(inthash hashtable) { + const size_t size_struct = sizeof(struct_inthash); + const size_t hash_size = POW2(hashtable->lg_size)*sizeof(inthash_item); + const size_t pool_size = hashtable->pool.capacity*sizeof(char); + return size_struct + hash_size + pool_size; +} + +void inthash_delete(inthash *phashtable) { + if (phashtable != NULL) { + inthash hashtable = *phashtable; + if (hashtable != NULL) { + inthash_log_stats(hashtable); + if (hashtable->items != NULL) { + /* we need to delete values */ + const size_t hash_size = POW2(hashtable->lg_size); + size_t i; + + /* wipe hashtable values (not names) */ + for(i = 0 ; i < hash_size ; i++) { + if (!inthash_is_free(hashtable, i)) { + inthash_del_value(hashtable, i); + } + } + + /* wipe auxiliary stash values (not names) if any */ + for(i = 0 ; i < hashtable->stash.size ; i++) { + inthash_del_value_(hashtable, &hashtable->stash.items[i].value); + } + } + /* wipe top-level */ + hashtable->lg_size = 0; + hashtable->used = 0; + free(hashtable->pool.buffer); + hashtable->pool.buffer = NULL; + free(hashtable->items); + hashtable->items = NULL; + free(hashtable); + *phashtable = NULL; + } + } +} + +/* Enumerator */ + +struct_inthash_enum inthash_enum_new(inthash hashtable) { + struct_inthash_enum e; + + e.index = 0; + e.table = hashtable; + return e; +} + +inthash_item *inthash_enum_next(struct_inthash_enum * e) { + const size_t hash_size = POW2(e->table->lg_size); + for( ; e->index < hash_size + && inthash_is_free(e->table, e->index) ; e->index++) ; + /* enumerate all table */ + if (e->index < hash_size) { + inthash_item *const next = &e->table->items[e->index]; + e->index++; + return next; + } + /* enumerate stash if present */ + else if (e->index < hash_size + e->table->stash.size) { + const size_t index = e->index - hash_size; + inthash_item *const next = &e->table->stash.items[index]; + e->index++; + return next; + } + /* eof */ + else { + return NULL; + } +} + +void inthash_set_global_assert_handler(t_inthash_loghandler log, + t_inthash_asserthandler fatal) { + global_log_handler = log; + global_assert_handler = fatal; +} diff --git a/src/coucal.h b/src/coucal.h new file mode 100644 index 0000000..fb62767 --- /dev/null +++ b/src/coucal.h @@ -0,0 +1,407 @@ +/* ------------------------------------------------------------ */ +/* +HTTrack Website Copier, Offline Browser for Windows and Unix +Copyright (C) 1998-2014 Xavier Roche and other contributors + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Important notes: + +- We hereby ask people using this source NOT to use it in purpose of grabbing +emails addresses, or collecting any other private information on persons. +This would disgrace our work, and spoil the many hours we spent on it. + +Please visit our Website: http://www.httrack.com +*/ + +/* ------------------------------------------------------------ */ +/* File: hash table system (fast index) */ +/* Author: Xavier Roche */ +/* ------------------------------------------------------------ */ + +/** + * Library notes: + * This small hashtable library provides key/value hashtable, with a string + * key, and integer/pointer value (with an associated optional allocator) + * It features O(1) average insertion, O(1) lookup, and O(1) delete. + * + * Implementation notes: + * Implementation is auto-rehashable, and uses cuckoo hashing of size 2**n + * with a MD5 or FNV-1 hash function, with one additional auxiliary hash + * function. + * It also uses a small stash area to handle rare cases of collisions. + * Enumeration of all key/values is possible, deletion is also possible, but + * currently without any auto-shrinking (ie. table will never shrink). + * Overall, two main blocks are allocated: one for the items, and one for + * the keys (pool). + * + * References: + * Cuckoo Hashing http://en.wikipedia.org/wiki/Cuckoo_hashing + * Cuckoo Stash http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf + * FNV http://www.isthe.com/chongo/tech/comp/fnv/ + * MD5 http://en.wikipedia.org/wiki/MD5 + **/ + +#ifndef HTSINTHASH_DEFH +#define HTSINTHASH_DEFH + +/* Includes */ +#ifdef _WIN32 +#include +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#elif (defined(SOLARIS) || defined(sun) || defined(HAVE_INTTYPES_H) \ + || defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) +#include +#else +#include +#endif +#include + +/** Key opaque type. May be a regular 'const char*'. **/ +typedef void* inthash_key; + +/** Key constant (can not be modified) opaque type. **/ +typedef const void* inthash_key_const; + +/** Opaque user-defined pointer. **/ +typedef void* inthash_opaque; + +/** Value (union of any value). **/ +typedef union inthash_value { + /** Integer value. **/ + intptr_t intg; + + /** Unsigned integer value. **/ + uintptr_t uintg; + + /** Pointer value. **/ + void *ptr; +} inthash_value; + +/** Value constant. **/ +typedef const inthash_value inthash_value_const; + +/** NULL Value. **/ +#define INTHASH_VALUE_NULL { 0 } + +#ifndef HTS_DEF_FWSTRUCT_inthash_item +#define HTS_DEF_FWSTRUCT_inthash_item +typedef struct inthash_item inthash_item; +#endif + +/** Hash key (32-bit) **/ +typedef uint32_t inthash_hashkey; + +/** Pair of hashes **/ +typedef struct inthash_hashkeys { + inthash_hashkey hash1; + inthash_hashkey hash2; +} inthash_hashkeys; + +/** NULL pair of hashes. **/ +#define INTHASH_KEYS_NULL { 0, 0 } + +/** Item holding a value. **/ +struct inthash_item { + /** Key. **/ + inthash_key name; + + /** Value. **/ + inthash_value value; + + /** Hashes of the key. **/ + inthash_hashkeys hashes; +}; + +/** Log level. **/ +typedef enum inthash_loglevel { + inthash_log_critical, + inthash_log_warning, + inthash_log_info, + inthash_log_debug, + inthash_log_trace +} inthash_loglevel; + +/** free handler. Only used when values are markes as xxc **/ +typedef void (*t_inthash_key_freehandler)(inthash_opaque arg, + inthash_key key); + +/** Value free handler. Only used when values are markes as xxc **/ +typedef void (*t_inthash_value_freehandler)(inthash_opaque arg, + inthash_value value); + +/** Key dup handler. **/ +typedef inthash_key (*t_inthash_duphandler)(inthash_opaque arg, + inthash_key_const name); + +/** Key hash computation handler. **/ +typedef inthash_hashkeys (*t_inthash_hasheshandler)(inthash_opaque arg, + inthash_key_const name); + +/** Hashtable logging handler. **/ +typedef void (*t_inthash_loghandler)(inthash_opaque arg, inthash_loglevel level, + const char* format, va_list args); + +/** Hashtable fatal assertion failure. **/ +typedef void (*t_inthash_asserthandler)(inthash_opaque arg, const char* exp, + const char* file, int line); + +/** Key printer (debug) **/ +typedef const char* (*t_inthash_printkeyhandler)(inthash_opaque arg, + inthash_key_const name); + +/** Value printer (debug) **/ +typedef const char* (*t_inthash_printvaluehandler)(inthash_opaque arg, + inthash_value_const value); + +/** + * Value comparison handler (returns non-zero value if strings are equal). + **/ +typedef int (*t_inthash_cmphandler)(inthash_opaque arg, + inthash_key_const a, + inthash_key_const b); + +/** Hashtable (opaque structure). **/ +#ifndef HTS_DEF_FWSTRUCT_struct_inthash +#define HTS_DEF_FWSTRUCT_struct_inthash +typedef struct struct_inthash struct_inthash, *inthash; +#endif + +/** Hashtable enumeration (opaque structure). **/ +#ifndef HTS_DEF_FWSTRUCT_struct_inthash_enum +#define HTS_DEF_FWSTRUCT_struct_inthash_enum +typedef struct struct_inthash_enum struct_inthash_enum; +#endif + +/** Enumeration. **/ +struct struct_inthash_enum { + inthash table; + size_t index; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Create a new hashtable, with initial bucket size of 'size'. + * If size is 0, use the default minimal bucket size. + * Return a non-NULL pointer upon success. + **/ +inthash inthash_new(size_t size); + +/** + * Was the hashtable successfully created ? + * Return non-zero value if the hashtable is valid. + **/ +int inthash_created(inthash hashtable); + +/** + * Delete a hashtable, freeing all entries. + **/ +void inthash_delete(inthash * hashtable); + +/** + * Return the number of items in the hashtable. + **/ +size_t inthash_nitems(inthash hashtable); + +/** + * Return the memory size taken by the hashtable. + * (This does not take account of the possible memory taken by values) + **/ +size_t inthash_memory_size(inthash hashtable); + +/** + * If 'flag' is non-zero, calls inthash_value_set_value_handler() with + * default system free() handler function, otherwise, free the value handlers. + **/ +void inthash_value_is_malloc(inthash hashtable, int flag); + +/** + * Set handlers for values. + * free: this handler will be called when a value is to be removed from + * the hashtable. if NULL, values won't be free'd. + * arg: opaque custom argument to be used by functions. + * Handler(s) MUST NOT be changed once elements have been added. + **/ +void inthash_value_set_value_handler(inthash hashtable, + t_inthash_value_freehandler free, + inthash_opaque arg); + +/** + * Set handlers for keys. + * dup: handler called to duplicate a key. if NULL, the internal pool is used. + * free: handler called to free a key. if NULL, the internal pool is used. + * hash: hashing handler, called to hash a key. if NULL, the default hash + * function is used. + * equals: comparison handler, returning non-zero value when two keys are + * identical. if NULL, the default comparison function is used. + * arg: opaque custom argument to be used by functions. + * Handler(s) MUST NOT be changed once elements have been added. + **/ +void inthash_value_set_key_handler(inthash hashtable, + t_inthash_duphandler dup, + t_inthash_key_freehandler free, + t_inthash_hasheshandler hash, + t_inthash_cmphandler equals, + inthash_opaque arg); + +/** + * Set assertion failure handler. + * log: handler called upon serious programming error + * fatal: handler called upon serious programming error + **/ +void inthash_set_assert_handler(inthash hashtable, + t_inthash_loghandler log, + t_inthash_asserthandler fatal, + inthash_opaque arg); + +/** + * Set pretty print loggers (debug). Both handlers must return a string + * pointer which shall be valid until the next call. Both key and value + * pointers shall be valid at the same time. + * name: handler called to print the string representation of the name + * value: handler called to print the string representation of the value + **/ +void inthash_set_print_handler(inthash hashtable, + t_inthash_printkeyhandler key, + t_inthash_printvaluehandler value, + inthash_opaque arg); + +/** + * Set the hashtable name, for degugging purpose. + * name: the hashtable name (ASCII or UTF-8) + */ +void inthash_set_name(inthash hashtable, inthash_key_const name); + +/** + * Get the hashtable name, for degugging purpose. + * Return NULL if no name was defined. + **/ +const char* inthash_get_name(inthash hashtable); + +/** + * Read an integer entry from the hashtable. + * Return non-zero value upon success and sets intvalue. + **/ +int inthash_read(inthash hashtable, inthash_key_const name, intptr_t * intvalue); + +/** + * Same as inthash_read(), but return 0 is the value was zero. + **/ +int inthash_readptr(inthash hashtable, inthash_key_const name, intptr_t * intvalue); + +/** + * Return non-zero value if the given entry exists. + **/ +int inthash_exists(inthash hashtable, inthash_key_const name); + +/** + * Read an entry from the hashtable. + * Return non-zero value upon success and sets value. + **/ +int inthash_read_value(inthash hashtable, inthash_key_const name, + inthash_value *value); + +/** + * Write an entry to the hashtable. + * Return non-zero value if the entry was added, zero if it was replaced. + **/ +int inthash_write_value(inthash hashtable, inthash_key_const name, + inthash_value_const value); +/** + * Read a pointer entry from the hashtable. + * Return non-zero value upon success and sets value. + **/ +int inthash_read_pvoid(inthash hashtable, inthash_key_const name, void **value); + +/** + * Write a pointer entry to the hashtable. + * Return non-zero value if the entry was added, zero if it was replaced. + **/ +int inthash_write_pvoid(inthash hashtable, inthash_key_const name, void *value); + +/** + * Alias to inthash_write_pvoid() + **/ +void inthash_add_pvoid(inthash hashtable, inthash_key_const name, void *value); + +/** + * Write an integer entry to the hashtable. + * Return non-zero value if the entry was added, zero if it was replaced. + **/ +int inthash_write(inthash hashtable, inthash_key_const name, intptr_t value); + +/** + * Alias to inthash_write() + **/ +void inthash_add(inthash hashtable, inthash_key_const name, intptr_t value); + +/** + * Increment an entry value in the hashtable + * (or create a new entry with value 1 if it does not yet exist) + * Return non-zero value if the entry was added, zero if it was changed. + **/ +int inthash_inc(inthash hashtable, inthash_key_const name); + +/** + * Decrement an entry value in the hashtable + * (or create a new entry with value -1 if it does not yet exist) + * Return non-zero value if the entry was added, zero if it was changed. + **/ +int inthash_dec(inthash hashtable, inthash_key_const name); + +/** + * Remove an entry from the hashtable + * Return non-zero value if the entry was removed, zero otherwise. + **/ +int inthash_remove(inthash hashtable, inthash_key_const name); + +/** + * Return a new enumerator. + * Note: deleting entries is safe while enumerating, but adding entries + * lead to undefined enumeration behavior (yet safe). + **/ +struct_inthash_enum inthash_enum_new(inthash hashtable); + +/** + * Enumerate the next entry. + **/ +inthash_item *inthash_enum_next(struct_inthash_enum * e); + +/** + * Compute a hash, given a string. This is the default function used for + * hashing keys, which are by default strings. + **/ +inthash_hashkeys inthash_hash_string(const char *value); + +/** + * Set default global assertion failure handler. + * The handler will be used if no specific handler was defined in the + * hashtable itself. + * log: handler called upon serious error log (opaque argument + * is the hashtable itself) + * fatal: handler called upon serious programming error (opaque argument + * is the hashtable itself) + **/ +void inthash_set_global_assert_handler(t_inthash_loghandler log, + t_inthash_asserthandler fatal); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htscore.h b/src/htscore.h index c3572cb..7f96039 100644 --- a/src/htscore.h +++ b/src/htscore.h @@ -123,7 +123,7 @@ typedef struct filecreate_params filecreate_params; // gestion hashage #include "htshash.h" -#include "htsinthash.h" +#include "coucal.h" #include "htsdefines.h" diff --git a/src/htshash.c b/src/htshash.c index 9cf4b66..5bdeb3e 100644 --- a/src/htshash.c +++ b/src/htshash.c @@ -42,7 +42,7 @@ Please visit our Website: http://www.httrack.com #include "htsglobal.h" #include "htsmd5.h" #include "htscore.h" -#include "htsinthash.h" +#include "coucal.h" /* END specific definitions */ /* Specific macros */ diff --git a/src/htsindex.c b/src/htsindex.c index 3c4c9da..8414d4b 100644 --- a/src/htsindex.c +++ b/src/htsindex.c @@ -40,7 +40,7 @@ Please visit our Website: http://www.httrack.com #if HTS_MAKE_KEYWORD_INDEX #include "htshash.h" -#include "htsinthash.h" +#include "coucal.h" /* Keyword Indexer Parameters */ diff --git a/src/htsinthash.c b/src/htsinthash.c deleted file mode 100644 index 593bf94..0000000 --- a/src/htsinthash.c +++ /dev/null @@ -1,1476 +0,0 @@ -/* ------------------------------------------------------------ */ -/* -HTTrack Website Copier, Offline Browser for Windows and Unix -Copyright (C) 1998-2014 Xavier Roche and other contributors - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Important notes: - -- We hereby ask people using this source NOT to use it in purpose of grabbing -emails addresses, or collecting any other private information on persons. -This would disgrace our work, and spoil the many hours we spent on it. - -Please visit our Website: http://www.httrack.com -*/ - -/* ------------------------------------------------------------ */ -/* File: hash tables */ -/* Author: Xavier Roche */ -/* ------------------------------------------------------------ */ - -/* Internal engine bytecode */ -#define HTS_INTERNAL_BYTECODE - -#include -#include -#include -#include -#include - -#include "htsinthash.h" - -/* We use md5 as hashing function for its quality regarding diffusion and - collisions. MD5 is slower than other hashing functions, but is known to be - an excellent hashing function. FNV-1 is generally good enough for this - purpose, too, but the performance gain is not sufficient to use it by - default. - - On several benchmarks, both MD5 and FNV were quite good (0.45 cuckoo moved - on average for each new item inserted in the hashtable), but FNV-1 was more - prone to mutual collisions (creating cycles requiring stash handling), and - was causing the stash area to be more filled than the MD5 variant. - - Simpler hashing functions, such as rolling hashes (LCG) were also tested, - but with collision rate and diffusion were terrible. - - [ On a 10M key tests, both variants acheived 0.45 cuckoo/add ration, - but the FNV-1 variant collided 11 times with a maximum stash area - filled with 4 entries ; whereas the MD5 variant did only collide - once ] -*/ -#if (!defined(HTS_INTHASH_USES_MD5) && !defined(HTS_INTHASH_USES_MURMUR)) -#define HTS_INTHASH_USES_MD5 1 -#endif - -#if HTS_INTHASH_USES_MD5 == 1 -#include "md5.h" -#elif (defined(HTS_INTHASH_USES_MURMUR)) -#include "murmurhash3.h" -#else -/* use the Openssl implementation */ -#if 0 -#include -#define MD5Init MD5_Init -#define MD5Update MD5_Update -#define MD5Final MD5_Final -#endif -#endif - -/** Size of auxiliary stash. **/ -#define STASH_SIZE 16 - -/** Minimum value for lg_size. **/ -#define MIN_LG_SIZE 4 - -/** Minimum value for pool.capacity. **/ -#define MIN_POOL_CAPACITY 256 - -/* 64-bit constant */ -#if defined(WIN32) -#define UINT_64_CONST(X) ((uint64_t) (X)) -#define UINT_64_FORMAT "I64d" -#elif (defined(_LP64) || defined(__x86_64__) \ - || defined(__powerpc64__) || defined(__64BIT__)) -#define UINT_64_CONST(X) ((uint64_t) X##UL) -#define UINT_64_FORMAT "ld" -#else -#define UINT_64_CONST(X) ((uint64_t) X##ULL) -#define UINT_64_FORMAT "lld" -#endif - -/** Hashtable. **/ -struct struct_inthash { - /** Hashtable items. **/ - inthash_item *items; - - /** Log-2 of the hashtable size. **/ - size_t lg_size; - - /** Number of used items (<= POW2(lg_size)). **/ - size_t used; - - /** Stash area for collisions. **/ - struct { - /** Stash items. **/ - inthash_item items[STASH_SIZE]; - - /** Stash size (<= STASH_SIZE). **/ - size_t size; - } stash; - - /** String pool. **/ - struct { - /** String buffer. **/ - char *buffer; - /** Buffer used size (high watermark). **/ - size_t size; - /** Buffer capacity. **/ - size_t capacity; - /** Used chars (== size if compacted). **/ - size_t used; - } pool; - - /** Statistics **/ - struct { - /** Highest stash.size seen. **/ - size_t max_stash_size; - /** Number of writes. **/ - size_t write_count; - /** Number of writes causing an add. **/ - size_t add_count; - /** Number of cuckoo moved during adds. **/ - size_t cuckoo_moved; - /** Number of items added to stash. **/ - size_t stash_added; - /** Number of hashtable rehash/expand operations. **/ - size_t rehash_count; - /** Number of pool compact operations. **/ - size_t pool_compact_count; - /** Number of pool realloc operations. **/ - size_t pool_realloc_count; - } stats; - - /** Settings. **/ - struct { - /** How to handle values (might be NULL). **/ - struct { - /** free() **/ - t_inthash_value_freehandler free; - /** opaque argument **/ - inthash_opaque arg; - } value; - - /** How to handle names (might be NULL). **/ - struct { - /** strdup() **/ - t_inthash_duphandler dup; - /** free() **/ - t_inthash_key_freehandler free; - /** hash **/ - t_inthash_hasheshandler hash; - /** comparison **/ - t_inthash_cmphandler equals; - /** opaque argument **/ - inthash_opaque arg; - } key; - - /** How to handle fatal assertions (might be NULL). **/ - struct { - /** logging **/ - t_inthash_loghandler log; - /** abort() **/ - t_inthash_asserthandler fatal; - /** opaque argument **/ - inthash_opaque arg; - /** hashtable name for logging **/ - inthash_key_const name; - } error; - - /** How to handle pretty-print (debug) (might be NULL). **/ - struct { - /** key print() **/ - t_inthash_printkeyhandler key; - /** value print() **/ - t_inthash_printvaluehandler value; - /** opaque argument **/ - inthash_opaque arg; - } print; - } custom; -}; - -/* Assertion check. */ -#define inthash_assert(HASHTABLE, EXP) \ - (void)( (EXP) || (inthash_assert_failed(HASHTABLE, #EXP, __FILE__, __LINE__), 0) ) - -/* Compiler-specific. */ -#ifdef __GNUC__ -#define INTHASH_PRINTF_FUN(fmt, arg) __attribute__ ((format (printf, fmt, arg))) -#define INTHASH_INLINE __inline__ -#elif (defined(_MSC_VER)) -#define INTHASH_PRINTF_FUN(FMT, ARGS) -#define INTHASH_INLINE __inline -#else -#define INTHASH_PRINTF_FUN(FMT, ARGS) -#define INTHASH_INLINE -#endif - -/* Logging level. */ -static void inthash_log(const inthash hashtable, inthash_loglevel level, - const char *format, va_list args); -#define DECLARE_LOG_FUNCTION(NAME, LEVEL) \ -static void NAME(const inthash hashtable, const char *format, ...) \ - INTHASH_PRINTF_FUN(2, 3); \ -static void NAME(const inthash hashtable, const char *format, ...) { \ - va_list args; \ - va_start(args, format); \ - inthash_log(hashtable, LEVEL, format, args); \ - va_end(args); \ -} -#if 0 -/* Verbose. */ -DECLARE_LOG_FUNCTION(inthash_crit, inthash_log_critical) -DECLARE_LOG_FUNCTION(inthash_warning, inthash_log_warning) -DECLARE_LOG_FUNCTION(inthash_info, inthash_log_info) -DECLARE_LOG_FUNCTION(inthash_debug, inthash_log_debug) -DECLARE_LOG_FUNCTION(inthash_trace, inthash_log_trace) -#elif 0 -/* Info. */ -DECLARE_LOG_FUNCTION(inthash_crit, inthash_log_critical) -DECLARE_LOG_FUNCTION(inthash_warning, inthash_log_warning) -DECLARE_LOG_FUNCTION(inthash_info, inthash_log_info) -#define inthash_debug inthash_log -#define inthash_trace inthash_nolog -#else -/* No logging except stats and critical. */ -DECLARE_LOG_FUNCTION(inthash_crit, inthash_log_critical) -DECLARE_LOG_FUNCTION(inthash_warning, inthash_log_warning) -DECLARE_LOG_FUNCTION(inthash_info, inthash_log_info) -#define inthash_debug inthash_nolog -#define inthash_trace inthash_nolog -#endif - -/* 2**X */ -#define POW2(X) ( (size_t) 1 << (X) ) - -/* the empty string for the string pool */ -static char the_empty_string[1] = { 0 }; - -/* global assertion handler */ -static t_inthash_asserthandler global_assert_handler = NULL; - -/* global assertion handler */ -static t_inthash_loghandler global_log_handler = NULL; - -/* default assertion handler, if neither hashtable one nor global one - were defined */ -static void inthash_fail(const char* exp, const char* file, int line) { - fprintf(stderr, "assertion '%s' failed at %s:%d\n", exp, file, line); - abort(); -} - -/* assert failed handler. */ -static void inthash_assert_failed(const inthash hashtable, const char* exp, const char* file, int line) { - const char *const name = inthash_get_name(hashtable); - inthash_crit(hashtable, "hashtable %s: %s failed at %s:%d", - name != NULL ? name : "", exp, file, line); - if (hashtable != NULL && hashtable->custom.error.fatal != NULL) { - hashtable->custom.error.fatal(hashtable->custom.error.arg, exp, file, line); - } else if (global_assert_handler != NULL) { - global_assert_handler(hashtable, exp, file, line); - } else { - inthash_fail(exp, file, line); - } - abort(); -} - -/* Logging */ -static void inthash_log(const inthash hashtable, inthash_loglevel level, - const char *format, va_list args) { - inthash_assert(hashtable, format != NULL); - if (hashtable != NULL && hashtable->custom.error.log != NULL) { - hashtable->custom.error.log(hashtable->custom.error.arg, level, format, args); - } else if (global_log_handler != NULL) { - global_log_handler(hashtable, level, format, args); - } else { - fprintf(stderr, "[%p] ", (void*) hashtable); - (void) vfprintf(stderr, format, args); - putc('\n', stderr); - } -} - -/* No logging (should be dropped by the compiler) */ -static void inthash_nolog(const inthash hashtable, const char *format, ...) - INTHASH_PRINTF_FUN(2, 3); -static void inthash_nolog(const inthash hashtable, const char *format, ...) { - (void) hashtable; - (void) format; -} - -const char* inthash_get_name(inthash hashtable) { - return hashtable->custom.error.name; -} - -static void inthash_log_stats(inthash hashtable) { - const char *const name = inthash_get_name(hashtable); - inthash_info(hashtable, "hashtable %s%s%ssummary: " - "size=%"UINT_64_FORMAT" (lg2=%"UINT_64_FORMAT") " - "used=%"UINT_64_FORMAT" " - "stash-size=%"UINT_64_FORMAT" " - "pool-size=%"UINT_64_FORMAT" " - "pool-capacity=%"UINT_64_FORMAT" " - "pool-used=%"UINT_64_FORMAT" " - "writes=%"UINT_64_FORMAT" " - "(new=%"UINT_64_FORMAT") " - "moved=%"UINT_64_FORMAT " " - "stashed=%"UINT_64_FORMAT" " - "max-stash-size=%"UINT_64_FORMAT" " - "avg-moved=%g " - "rehash=%"UINT_64_FORMAT" " - "pool-compact=%"UINT_64_FORMAT" " - "pool-realloc=%"UINT_64_FORMAT" " - "memory=%"UINT_64_FORMAT, - name != NULL ? "\"" : "", - name != NULL ? name : "", - name != NULL ? "\" " : "", - (uint64_t) POW2(hashtable->lg_size), - (uint64_t) hashtable->lg_size, - (uint64_t) hashtable->used, - (uint64_t) hashtable->stash.size, - (uint64_t) hashtable->pool.size, - (uint64_t) hashtable->pool.capacity, - (uint64_t) hashtable->pool.used, - (uint64_t) hashtable->stats.write_count, - (uint64_t) hashtable->stats.add_count, - (uint64_t) hashtable->stats.cuckoo_moved, - (uint64_t) hashtable->stats.stash_added, - (uint64_t) hashtable->stats.max_stash_size, - (double) hashtable->stats.cuckoo_moved / (double) hashtable->stats.add_count, - (uint64_t) hashtable->stats.rehash_count, - (uint64_t) hashtable->stats.pool_compact_count, - (uint64_t) hashtable->stats.pool_realloc_count, - (uint64_t) inthash_memory_size(hashtable) - ); -} - -/* default hash function when key is a regular C-string */ -inthash_hashkeys inthash_hash_string(const char *name) { -#if HTS_INTHASH_USES_MD5 == 1 - /* compute a regular MD5 and extract two 32-bit integers */ - MD5_CTX ctx; - union { - unsigned char md5digest[16]; - inthash_hashkeys mhashes[2]; - inthash_hashkeys hashes; - } u; - - /* compute MD5 */ - MD5Init(&ctx, 0); - MD5Update(&ctx, (const unsigned char *) name, - (unsigned int) strlen(name)); - MD5Final(u.md5digest, &ctx); - - /* mix mix mix */ - u.mhashes[0].hash1 ^= u.mhashes[1].hash1; - u.mhashes[0].hash2 ^= u.mhashes[1].hash2; - - /* do not keep identical hashes */ - if (u.hashes.hash1 == u.hashes.hash2) { - u.hashes.hash2 = ~u.hashes.hash2; - } - - return u.hashes; -#elif (defined(HTS_INTHASH_USES_MURMUR)) - union { - uint32_t result[4]; - inthash_hashkeys hashes; - } u; - MurmurHash3_x86_128(name, (const int) strlen(name), - 42, &u.result) ; - - /* mix mix mix */ - u.result[0] ^= u.result[2]; - u.result[1] ^= u.result[3]; - - /* do not keep identical hashes */ - if (u.hashes.hash1 == u.hashes.hash2) { - u.hashes.hash2 = ~u.hashes.hash2; - } - - return u.hashes; -#else - /* compute two Fowler-Noll-Vo hashes (64-bit FNV-1 variant) ; - each 64-bit hash being XOR-folded into a single 32-bit hash. */ - size_t i; - inthash_hashkeys hashes; - uint64_t h1, h2; - - /* FNV-1, 64-bit. */ -#define FNV1_PRIME UINT_64_CONST(1099511628211) -#define FNV1_OFFSET_BASIS UINT_64_CONST(14695981039346656037) - - /* compute the hashes ; second variant is using xored data */ - h1 = FNV1_OFFSET_BASIS; - h2 = ~FNV1_OFFSET_BASIS; - for(i = 0 ; name[i] != '\0' ; i++) { - const unsigned char c1 = name[i]; - const unsigned char c2 = ~c1; - h1 = ( h1 * FNV1_PRIME ) ^ c1; - h2 = ( h2 * FNV1_PRIME ) ^ c2; - } - - /* XOR-folding to improve diffusion (Wikipedia) */ - hashes.hash1 = ( (uint32_t) h1 ^ (uint32_t) ( h1 >> 32 ) ); - hashes.hash2 = ( (uint32_t) h2 ^ (uint32_t) ( h2 >> 32 ) ); - -#undef FNV1_PRIME -#undef FNV1_OFFSET_BASIS - - /* do not keep identical hashes */ - if (hashes.hash1 == hashes.hash2) { - hashes.hash2 = ~hashes.hash2; - } - - return hashes; -#endif -} - -static INTHASH_INLINE inthash_hashkeys inthash_calc_hashes(inthash hashtable, - inthash_key_const value) { - return hashtable->custom.key.hash == NULL - ? inthash_hash_string(value) - : hashtable->custom.key.hash(hashtable->custom.key.arg, value); -} - -/* position 'pos' is free ? */ -static INTHASH_INLINE int inthash_is_free(const inthash hashtable, size_t pos) { - return hashtable->items[pos].name == NULL; -} - -/* compare two keys ; by default using strcmp() */ -static INTHASH_INLINE int inthash_equals(inthash hashtable, - inthash_key_const a, - inthash_key_const b) { - return hashtable->custom.key.equals == NULL - ? strcmp((const char*) a, (const char*) b) == 0 - : hashtable->custom.key.equals(hashtable->custom.key.arg, a, b); -} - -static INTHASH_INLINE int inthash_matches_(inthash hashtable, - const inthash_item *const item, - inthash_key_const name, - const inthash_hashkeys *hashes) { - return item->name != NULL - && item->hashes.hash1 == hashes->hash1 - && item->hashes.hash2 == hashes->hash2 - && inthash_equals(hashtable, item->name, name); -} - -static INTHASH_INLINE int inthash_matches(inthash hashtable, size_t pos, - inthash_key_const name, - const inthash_hashkeys *hashes) { - const inthash_item *const item = &hashtable->items[pos]; - return inthash_matches_(hashtable, item, name, hashes); -} - -/* compact string pool ; does not change the capacity */ -static void inthash_compact_pool(inthash hashtable, size_t capacity) { - const size_t hash_size = POW2(hashtable->lg_size); - size_t i; - char*const old_pool = hashtable->pool.buffer; - const size_t old_size = hashtable->pool.size; - size_t count = 0; - - /* we manage the string pool */ - inthash_assert(hashtable, hashtable->custom.key.dup == NULL); - - /* statistics */ - hashtable->stats.pool_compact_count++; - - /* change capacity now */ - if (hashtable->pool.capacity != capacity) { - hashtable->pool.capacity = capacity; - } - - /* realloc */ - hashtable->pool.buffer = malloc(hashtable->pool.capacity); - hashtable->pool.size = 0; - hashtable->pool.used = 0; - if (hashtable->pool.buffer == NULL) { - inthash_debug(hashtable, - "** hashtable string pool compaction error: could not allocate " - "%"UINT_64_FORMAT" bytes", - (uint64_t) hashtable->pool.capacity); - inthash_assert(hashtable, ! "hashtable string pool compaction error"); - } - - /* relocate a string on a different pool */ -#define RELOCATE_STRING(S) do { \ - if (S != NULL && S != the_empty_string) { \ - const char *const src = (S); \ - char *const dest = \ - &hashtable->pool.buffer[hashtable->pool.size]; \ - const size_t capacity = hashtable->pool.capacity; \ - char *const max_dest = \ - &hashtable->pool.buffer[capacity]; \ - /* copy string */ \ - inthash_assert(hashtable, dest < max_dest); \ - dest[0] = src[0]; \ - { \ - size_t i; \ - for(i = 1 ; src[i - 1] != '\0' ; i++) { \ - inthash_assert(hashtable, &dest[i] < max_dest); \ - dest[i] = src[i]; \ - } \ - /* update pool size */ \ - hashtable->pool.size += i; \ - inthash_assert(hashtable, \ - hashtable->pool.size <= capacity); \ - } \ - /* update source */ \ - S = dest; \ - count++; \ - } \ -} while(0) - - /* relocate */ - for(i = 0 ; i < hash_size ; i++) { - RELOCATE_STRING(hashtable->items[i].name); - } - for(i = 0 ; i < hashtable->stash.size ; i++) { - RELOCATE_STRING(hashtable->stash.items[i].name); - } - -#undef RELOCATE_STRING - - /* compacted (used chars == current size) */ - hashtable->pool.used = hashtable->pool.size; - - /* wipe previous pool */ - free(old_pool); - - inthash_debug(hashtable, - "compacted string pool for %"UINT_64_FORMAT" strings: " - "%"UINT_64_FORMAT" bytes => %"UINT_64_FORMAT" bytes", - (uint64_t) count, (uint64_t) old_size, - (uint64_t) hashtable->pool.size); -} - -/* realloc (expand) string pool ; does not change the compacity */ -static void inthash_realloc_pool(inthash hashtable, size_t capacity) { - const size_t hash_size = POW2(hashtable->lg_size); - char *const oldbase = hashtable->pool.buffer; - size_t i; - size_t count = 0; - - /* we manage the string pool */ - inthash_assert(hashtable, hashtable->custom.key.dup == NULL); - - /* compact instead ? */ - if (hashtable->pool.used < ( hashtable->pool.size*3 ) / 4) { - inthash_compact_pool(hashtable, capacity); - return ; - } - - /* statistics */ - hashtable->stats.pool_realloc_count++; - - /* change capacity now */ - hashtable->pool.capacity = capacity; - - /* realloc */ - hashtable->pool.buffer = realloc(hashtable->pool.buffer, - hashtable->pool.capacity); - if (hashtable->pool.buffer == NULL) { - inthash_crit(hashtable, - "** hashtable string pool allocation error: could not allocate " - "%"UINT_64_FORMAT" bytes", - (uint64_t) hashtable->pool.capacity); - inthash_assert(hashtable, ! "hashtable string pool allocation error"); - } - - /* recompute string address */ -#define RECOMPUTE_STRING(S) do { \ - if (S != NULL && S != the_empty_string) { \ - const size_t offset = (const char*) (S) - oldbase; \ - inthash_assert(hashtable, offset < hashtable->pool.capacity); \ - S = &hashtable->pool.buffer[offset]; \ - count++; \ - } \ -} while(0) - - /* recompute */ - for(i = 0 ; i < hash_size ; i++) { - RECOMPUTE_STRING(hashtable->items[i].name); - } - for(i = 0 ; i < hashtable->stash.size ; i++) { - RECOMPUTE_STRING(hashtable->stash.items[i].name); - } - -#undef RECOMPUTE_STRING - - inthash_debug(hashtable, "reallocated string pool for " - "%"UINT_64_FORMAT" strings: %"UINT_64_FORMAT" bytes", - (uint64_t) count, (uint64_t) hashtable->pool.capacity); -} - -static inthash_key inthash_dup_name_internal(inthash hashtable, - inthash_key_const name_) { - const char *const name = (const char*) name_; - const size_t len = strlen(name) + 1; - char *s; - - /* the pool does not allow empty strings for safety purpose ; handhe that - (keys are being emptied when free'd to detect duplicate free) */ - if (len == 1) { - inthash_assert(hashtable, the_empty_string[0] == '\0'); - return the_empty_string; - } - - /* expand pool capacity */ - inthash_assert(hashtable, hashtable->pool.size <= hashtable->pool.capacity); - if (hashtable->pool.capacity - hashtable->pool.size < len) { - size_t capacity; - for(capacity = MIN_POOL_CAPACITY ; capacity < hashtable->pool.size + len - ; capacity <<= 1) ; - inthash_assert(hashtable, hashtable->pool.size < capacity); - inthash_realloc_pool(hashtable, capacity); - } - - /* copy */ - inthash_assert(hashtable, len + hashtable->pool.size <= hashtable->pool.capacity); - s = &hashtable->pool.buffer[hashtable->pool.size]; - memcpy(s, name, len); - hashtable->pool.size += len; - hashtable->pool.used += len; - - return s; -} - -/* duplicate a key. default is to use the internal pool. */ -static INTHASH_INLINE inthash_key inthash_dup_name(inthash hashtable, - inthash_key_const name) { - return hashtable->custom.key.dup == NULL - ? inthash_dup_name_internal(hashtable, name) - : hashtable->custom.key.dup(hashtable->custom.key.arg, name); -} - -/* internal pool free handler. - note: pointer must have been kicked from the pool first */ -static void inthash_free_key_internal(inthash hashtable, inthash_key name_) { - char *const name = (char*) name_; - const size_t len = strlen(name) + 1; - - /* see inthash_dup_name_internal() handling */ - if (len == 1 && name == the_empty_string) { - inthash_assert(hashtable, the_empty_string[0] == '\0'); - return ; - } - - inthash_assert(hashtable, *name != '\0' || !"duplicate or bad string pool release"); - hashtable->pool.used -= len; - *name = '\0'; /* the string is now invalidated */ - - /* compact the pool is too many holes */ - if (hashtable->pool.used < hashtable->pool.size / 2) { - size_t capacity = hashtable->pool.capacity; - /* compact and shrink */ - if (hashtable->pool.used < capacity / 4) { - capacity /= 2; - } - inthash_assert(hashtable, hashtable->pool.used < capacity); - inthash_compact_pool(hashtable, capacity); - } -} - -/* free a key. default is to use the internal pool. - note: pointer must have been kicked from the pool first */ -static void inthash_free_key(inthash hashtable, inthash_key name) { - if (hashtable->custom.key.free == NULL) { - inthash_free_key_internal(hashtable, name); - } else { - hashtable->custom.key.free(hashtable->custom.key.arg, name); - } -} - -static INTHASH_INLINE size_t inthash_hash_to_pos_(size_t lg_size, - inthash_hashkey hash) { - const inthash_hashkey mask = POW2(lg_size) - 1; - return hash & mask; -} - -static INTHASH_INLINE size_t inthash_hash_to_pos(const inthash hashtable, - inthash_hashkey hash) { - return inthash_hash_to_pos_(hashtable->lg_size, hash); -} - -int inthash_read_pvoid(inthash hashtable, inthash_key_const name, void **pvalue) { - inthash_value value = INTHASH_VALUE_NULL; - const int ret = - inthash_read_value(hashtable, name, (pvalue != NULL) ? &value : NULL); - if (pvalue != NULL) - *pvalue = value.ptr; - return ret; -} - -int inthash_write_pvoid(inthash hashtable, inthash_key_const name, void *pvalue) { - inthash_value value = INTHASH_VALUE_NULL; - - value.ptr = pvalue; - return inthash_write_value(hashtable, name, value); -} - -void inthash_add_pvoid(inthash hashtable, inthash_key_const name, void *pvalue) { - inthash_value value = INTHASH_VALUE_NULL; - - value.ptr = pvalue; - inthash_write_value(hashtable, name, value); -} - -int inthash_write(inthash hashtable, inthash_key_const name, intptr_t intvalue) { - inthash_value value = INTHASH_VALUE_NULL; - - value.intg = intvalue; - return inthash_write_value(hashtable, name, value); -} - -static void inthash_default_free_handler(inthash_opaque arg, - inthash_value value) { - (void) arg; - if (value.ptr != NULL) - free(value.ptr); -} - -static void inthash_del_value_(inthash hashtable, inthash_value *pvalue) { - if (hashtable->custom.value.free != NULL) - hashtable->custom.value.free(hashtable->custom.value.arg, *pvalue); - pvalue->ptr = NULL; -} - -static void inthash_del_value(inthash hashtable, size_t pos) { - inthash_del_value_(hashtable, &hashtable->items[pos].value); -} - -static void inthash_del_name(inthash hashtable, inthash_item *item) { - const inthash_hashkeys nullHash = INTHASH_KEYS_NULL; - char *const name = (char*) item->name; - item->name = NULL; /* there must be no reference remaining */ - item->hashes = nullHash; - /* free after detach (we may compact the pool) */ - inthash_free_key(hashtable, name); -} - -static void inthash_del_item(inthash hashtable, inthash_item *pitem) { - inthash_del_value_(hashtable, &pitem->value); - inthash_del_name(hashtable, pitem); -} - -static int inthash_add_item_(inthash hashtable, inthash_item item); - -/* Write (add or replace) a value in the hashtable. */ -static int inthash_write_value_(inthash hashtable, inthash_key_const name, - inthash_value value) { - inthash_item item; - size_t pos; - const inthash_hashkeys hashes = inthash_calc_hashes(hashtable, name); - - /* Statistics */ - hashtable->stats.write_count++; - - /* replace at position 1 ? */ - pos = inthash_hash_to_pos(hashtable, hashes.hash1); - if (inthash_matches(hashtable, pos, name, &hashes)) { - inthash_del_value(hashtable, pos); - hashtable->items[pos].value = value; - return 0; /* replaced */ - } - - /* replace at position 2 ? */ - pos = inthash_hash_to_pos(hashtable, hashes.hash2); - if (inthash_matches(hashtable, pos, name, &hashes)) { - inthash_del_value(hashtable, pos); - hashtable->items[pos].value = value; - return 0; /* replaced */ - } - - /* replace in the stash ? */ - if (hashtable->stash.size != 0) { - size_t i; - for(i = 0 ; i < hashtable->stash.size ; i++) { - if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, - &hashes)) { - inthash_del_value_(hashtable, &hashtable->stash.items[i].value); - hashtable->stash.items[i].value = value; - return 0; /* replaced */ - } - } - } - - /* Statistics */ - hashtable->stats.add_count++; - - /* otherwise we need to create a new item */ - item.name = inthash_dup_name(hashtable, name); - item.value = value; - item.hashes = hashes; - - return inthash_add_item_(hashtable, item); -} - -/* Return the string representation of a key */ -static const char* inthash_print_key(inthash hashtable, - inthash_key_const name) { - return hashtable->custom.print.key != NULL - ? hashtable->custom.print.key(hashtable->custom.print.arg, name) - : (const char*) name; -} - -/* Add a new item in the hashtable. The item SHALL NOT be alreasy present. */ -static int inthash_add_item_(inthash hashtable, inthash_item item) { - inthash_hashkey cuckoo_hash, initial_cuckoo_hash; - size_t loops; - size_t pos; - - /* place at free position 1 ? */ - pos = inthash_hash_to_pos(hashtable, item.hashes.hash1); - if (inthash_is_free(hashtable, pos)) { - hashtable->items[pos] = item; - return 1; /* added */ - } else { - /* place at free position 2 ? */ - pos = inthash_hash_to_pos(hashtable, item.hashes.hash2); - if (inthash_is_free(hashtable, pos)) { - hashtable->items[pos] = item; - return 1; /* added */ - } - /* prepare cuckoo ; let's take position 1 */ - else { - cuckoo_hash = initial_cuckoo_hash = item.hashes.hash1; - inthash_trace(hashtable, - "debug:collision with '%s' at %"UINT_64_FORMAT" (%x)", - inthash_print_key(hashtable, item.name), - (uint64_t) pos, cuckoo_hash); - } - } - - /* put 'item' in place with hash 'cuckoo_hash' */ - for(loops = POW2(hashtable->lg_size) ; loops != 0 ; --loops) { - const size_t pos = inthash_hash_to_pos(hashtable, cuckoo_hash); - - inthash_trace(hashtable, - "\tdebug:placing cuckoo '%s' at %"UINT_64_FORMAT" (%x)", - inthash_print_key(hashtable, item.name), - (uint64_t) pos, cuckoo_hash); - - /* place at alternate free position ? */ - if (inthash_is_free(hashtable, pos)) { - inthash_trace(hashtable, "debug:free position"); - hashtable->items[pos] = item; - return 1; /* added */ - } - /* then cuckoo's place it is */ - else { - /* replace */ - const inthash_item backup_item = hashtable->items[pos]; - hashtable->items[pos] = item; - - /* statistics */ - hashtable->stats.cuckoo_moved++; - - /* take care of new lost item */ - item = backup_item; - - /* we just kicked this item from its position 1 */ - if (pos == inthash_hash_to_pos(hashtable, item.hashes.hash1)) { - /* then place it on position 2 on next run */ - inthash_trace(hashtable, "\tdebug:position 1"); - cuckoo_hash = item.hashes.hash2; - } - /* we just kicked this item from its position 2 */ - else if (pos == inthash_hash_to_pos(hashtable, item.hashes.hash2)) { - /* then place it on position 1 on next run */ - inthash_trace(hashtable, "\tdebug:position 2"); - cuckoo_hash = item.hashes.hash1; - } - else { - inthash_assert(hashtable, ! "hashtable internal error: unexpected position"); - } - - /* we are looping (back to same hash) */ - /* TODO FIXME: we should actually check the positions */ - if (cuckoo_hash == initial_cuckoo_hash) { - /* emergency stash */ - break; - } - } - } - - /* emergency stashing for the rare cases of collisions */ - if (hashtable->stash.size < STASH_SIZE) { - hashtable->stash.items[hashtable->stash.size] = item; - hashtable->stash.size++; - /* for statistics */ - hashtable->stats.stash_added++; - if (hashtable->stash.size > hashtable->stats.max_stash_size) { - hashtable->stats.max_stash_size = hashtable->stash.size; - } - inthash_debug(hashtable, "used stash because of collision (%d entries)", - (int) hashtable->stash.size); - return 1; /* added */ - } else { - /* debugging */ - if (hashtable->custom.print.key != NULL - && hashtable->custom.print.value != NULL) { - size_t i; - for(i = 0 ; i < hashtable->stash.size ; i++) { - inthash_item *const item = &hashtable->stash.items[i]; - const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1); - const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2); - inthash_crit(hashtable, - "stash[%u]: key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x", - (int) i, - hashtable->custom.print.key(hashtable->custom.print.arg, item->name), - hashtable->custom.print.value(hashtable->custom.print.arg, item->value), - (int) pos1, (int) pos2, - item->hashes.hash1, item->hashes.hash2); - if (!inthash_is_free(hashtable, pos1)) { - inthash_item *const item = &hashtable->items[pos1]; - const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1); - const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2); - inthash_crit(hashtable, - "\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x", - hashtable->custom.print.key(hashtable->custom.print.arg, item->name), - hashtable->custom.print.value(hashtable->custom.print.arg, item->value), - (int) pos1, (int) pos2, - item->hashes.hash1, item->hashes.hash2); - } else { - inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos1); - } - if (!inthash_is_free(hashtable, pos2)) { - inthash_item *const item = &hashtable->items[pos2]; - const size_t pos1 = inthash_hash_to_pos(hashtable, item->hashes.hash1); - const size_t pos2 = inthash_hash_to_pos(hashtable, item->hashes.hash2); - inthash_crit(hashtable, - "\t.. collisionning with key='%s' value='%s' pos1=%d pos2=%d hash1=%04x hash2=%04x", - hashtable->custom.print.key(hashtable->custom.print.arg, item->name), - hashtable->custom.print.value(hashtable->custom.print.arg, item->value), - (int) pos1, (int) pos2, - item->hashes.hash1, item->hashes.hash2); - } else { - inthash_crit(hashtable, "\t.. collisionning with a free slot (%d)!", (int) pos2); - } - } - //struct_inthash_enum e = inthash_enum_new(hashtable); - //while((item = inthash_enum_next(&e)) != NULL) { - // inthash_crit(hashtable, "element key='%s' value='%s' hash1=%04x hash2=%04x", - // hashtable->custom.print.key(hashtable->custom.print.arg, item->name), - // hashtable->custom.print.value(hashtable->custom.print.arg, item->value.ptr), - // item->hashes.hash1, item->hashes.hash2); - //} - } - - /* we are doomed. hopefully the probability is lower than being killed - by a wandering radioactive monkey */ - inthash_log_stats(hashtable); - inthash_assert(hashtable, ! "hashtable internal error: cuckoo/stash collision"); - - /* not reachable code */ - return -1; - } -} - -int inthash_write_value(inthash hashtable, inthash_key_const name, - inthash_value_const value) { - /* replace of add item */ - const int ret = inthash_write_value_(hashtable, name, value); - - /* added ? */ - if (ret) { - /* size of half of the table */ - const size_t half_size = POW2(hashtable->lg_size - 1); - - /* size of half of the stash */ - const size_t half_stash_size = STASH_SIZE / 2; - - /* item was added: increase count */ - hashtable->used++; - - /* table is more than half-full, or stash is more than half-full */ - if (hashtable->used >= half_size - || hashtable->stash.size >= half_stash_size) { - size_t i; - - /* size before */ - const size_t prev_power = hashtable->lg_size; - const size_t prev_size = half_size * 2; - const size_t prev_alloc_size = prev_size*sizeof(inthash_item); - - /* size after doubling it */ - const size_t alloc_size = prev_alloc_size * 2; - - /* log stash issues */ - if (hashtable->stash.size >= half_stash_size - && half_size > POW2(16) - && hashtable->used < half_size / 4) { - inthash_warning(hashtable, - "stash size still full despite %"UINT_64_FORMAT - " elements used out of %"UINT_64_FORMAT, - (uint64_t) hashtable->used, (uint64_t) half_size*2); - } - - /* statistics */ - hashtable->stats.rehash_count++; - - /* realloc */ - hashtable->lg_size++; - hashtable->items = - (inthash_item *) realloc(hashtable->items, alloc_size); - if (hashtable->items == NULL) { - inthash_crit(hashtable, - "** hashtable allocation error: " - "could not allocate %"UINT_64_FORMAT" bytes", - (uint64_t) alloc_size); - inthash_assert(hashtable, ! "hashtable allocation error"); - } - - /* clear upper half */ - memset(&hashtable->items[prev_size], 0, prev_alloc_size); - - /* relocate lower half items when needed */ - for(i = 0 ; i < prev_size ; i++) { - if (!inthash_is_free(hashtable, i)) { - const inthash_hashkeys *const hashes = &hashtable->items[i].hashes; - - /* currently at old position 1 */ - if (inthash_hash_to_pos_(prev_power, hashes->hash1) == i) { - const size_t pos = inthash_hash_to_pos(hashtable, hashes->hash1); - /* no more the expected position */ - if (pos != i) { - inthash_assert(hashtable, pos >= prev_size); - hashtable->items[pos] = hashtable->items[i]; - memset(&hashtable->items[i], 0, sizeof(hashtable->items[i])); - } - } - else if (inthash_hash_to_pos_(prev_power, hashes->hash2) == i) { - const size_t pos = inthash_hash_to_pos(hashtable, hashes->hash2); - /* no more the expected position */ - if (pos != i) { - inthash_assert(hashtable, pos >= prev_size); - hashtable->items[pos] = hashtable->items[i]; - memset(&hashtable->items[i], 0, sizeof(hashtable->items[i])); - } - } - else { - inthash_assert(hashtable, ! "hashtable unexpected internal error (bad position)"); - } - } - } - - inthash_debug(hashtable, - "expanded hashtable to %"UINT_64_FORMAT" elements", - (uint64_t) POW2(hashtable->lg_size)); - - /* attempt to merge the stash if present */ - if (hashtable->stash.size != 0) { - const size_t old_size = hashtable->stash.size; - size_t i; - - /* backup stash and reset it */ - inthash_item stash[STASH_SIZE]; - memcpy(&stash, hashtable->stash.items, sizeof(hashtable->stash.items)); - hashtable->stash.size = 0; - - /* insert all items */ - for(i = 0 ; i < old_size ; i++) { - const int ret = inthash_add_item_(hashtable, stash[i]); - if (ret == 0) { - inthash_assert(hashtable, ! "hashtable duplicate key when merging the stash"); - } - } - - /* logging */ - inthash_assert(hashtable, hashtable->stash.size <= old_size); - if (hashtable->stash.size < old_size) { - inthash_debug(hashtable, "reduced stash size from %"UINT_64_FORMAT" " - "to %"UINT_64_FORMAT, - (uint64_t) old_size, (uint64_t) hashtable->stash.size); - } else { - inthash_trace(hashtable, "stash has still %"UINT_64_FORMAT" elements", - (uint64_t) hashtable->stash.size); - } - } - - } - } - - return ret; -} - -void inthash_add(inthash hashtable, inthash_key_const name, intptr_t intvalue) { - inthash_value value = INTHASH_VALUE_NULL; - - memset(&value, 0, sizeof(value)); - value.intg = intvalue; - inthash_write_value(hashtable, name, value); -} - -int inthash_read(inthash hashtable, inthash_key_const name, intptr_t * intvalue) { - inthash_value value = INTHASH_VALUE_NULL; - int ret = - inthash_read_value(hashtable, name, (intvalue != NULL) ? &value : NULL); - if (intvalue != NULL) - *intvalue = value.intg; - return ret; -} - -static inthash_value* inthash_read_value_(inthash hashtable, - inthash_key_const name) { - const inthash_hashkeys hashes = inthash_calc_hashes(hashtable, name); - size_t pos; - - /* found at position 1 ? */ - pos = inthash_hash_to_pos(hashtable, hashes.hash1); - if (inthash_matches(hashtable, pos, name, &hashes)) { - return &hashtable->items[pos].value; - } - - /* found at position 2 ? */ - pos = inthash_hash_to_pos(hashtable, hashes.hash2); - if (inthash_matches(hashtable, pos, name, &hashes)) { - return &hashtable->items[pos].value; - } - - /* find in stash ? */ - if (hashtable->stash.size != 0) { - size_t i; - for(i = 0 ; i < hashtable->stash.size ; i++) { - if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, - &hashes)) { - return &hashtable->stash.items[i].value; - } - } - } - - /* not found */ - return NULL; -} - -int inthash_read_value(inthash hashtable, inthash_key_const name, - inthash_value * pvalue) { - inthash_value* const value = inthash_read_value_(hashtable, name); - if (value != NULL) { - if (pvalue != NULL) { - *pvalue = *value; - } - return 1; - } - return 0; -} - -static size_t inthash_inc_(inthash hashtable, inthash_key_const name, - size_t inc) { - inthash_value* const value = inthash_read_value_(hashtable, name); - if (value != NULL) { - value->uintg += inc; - return value->uintg; - } else { - /* create a new value */ - const int ret = inthash_write(hashtable, name, inc); - inthash_assert(hashtable, ret); - return inc; - } -} - -int inthash_inc(inthash hashtable, inthash_key_const name) { - return (int) inthash_inc_(hashtable, name, 1); -} - -int inthash_dec(inthash hashtable, inthash_key_const name) { - return (int) inthash_inc_(hashtable, name, (size_t) -1); -} - -int inthash_exists(inthash hashtable, inthash_key_const name) { - return inthash_read_value(hashtable, name, NULL); -} - -static int inthash_remove_(inthash hashtable, inthash_key_const name, - const inthash_hashkeys *hashes, size_t *removed) { - size_t pos; - - /* found at position 1 ? */ - pos = inthash_hash_to_pos(hashtable, hashes->hash1); - if (inthash_matches(hashtable, pos, name, hashes)) { - inthash_del_item(hashtable, &hashtable->items[pos]); - *removed = pos; - return 1; - } - - /* found at position 2 ? */ - pos = inthash_hash_to_pos(hashtable, hashes->hash2); - if (inthash_matches(hashtable, pos, name, hashes)) { - inthash_del_item(hashtable, &hashtable->items[pos]); - *removed = pos; - return 1; - } - - /* find in stash ? */ - if (hashtable->stash.size != 0) { - size_t i; - for(i = 0 ; i < hashtable->stash.size ; i++) { - if (inthash_matches_(hashtable, &hashtable->stash.items[i], name, - hashes)) { - inthash_del_item(hashtable, &hashtable->stash.items[i]); - for( ; i + 1 < hashtable->stash.size ; i++) { - hashtable->stash.items[i] = hashtable->stash.items[i + 1]; - } - hashtable->stash.size--; - *removed = (size_t) -1; - inthash_debug(hashtable, "debug:deleted item in stash (%d entries)", - (int) hashtable->stash.size); - return 1; - } - } - } - - /* not found */ - return 0; -} - -int inthash_remove(inthash hashtable, inthash_key_const name) { - const inthash_hashkeys hashes = inthash_calc_hashes(hashtable, name); - size_t removed; - const int ret = inthash_remove_(hashtable, name, &hashes, &removed); - - if (ret) { - /* item was removed: decrease count */ - inthash_assert(hashtable, hashtable->used != 0); - hashtable->used--; - - /* can we place stash entry back to the table ? */ - if (hashtable->stash.size != 0 && removed != (size_t) -1) { - size_t i; - for(i = 0 ; i < hashtable->stash.size ; i++) { - const size_t pos1 = - inthash_hash_to_pos(hashtable, hashtable->stash.items[i].hashes.hash1); - const size_t pos2 = - inthash_hash_to_pos(hashtable, hashtable->stash.items[i].hashes.hash2); - if (pos1 == removed || pos2 == removed) { - if (pos1 == removed) { - hashtable->items[pos1] = hashtable->stash.items[i]; - } else if (pos2 == removed) { - hashtable->items[pos2] = hashtable->stash.items[i]; - } - for( ; i + 1 < hashtable->stash.size ; i++) { - hashtable->stash.items[i] = hashtable->stash.items[i + 1]; - } - hashtable->stash.size--; - inthash_debug(hashtable, "debug:moved item from stash (%d entries)", - (int) hashtable->stash.size); - break; - } - } - } - } - - return ret; -} - -int inthash_readptr(inthash hashtable, inthash_key_const name, intptr_t * value) { - int ret; - - *value = 0; - ret = inthash_read(hashtable, name, value); - if (*value == 0) - ret = 0; - return ret; -} - -inthash inthash_new(size_t initial_size) { - inthash hashtable = (inthash) calloc(1, sizeof(struct_inthash)); - - if (hashtable) { - size_t size; - for(size = MIN_LG_SIZE ; POW2(size) < initial_size ; size++) ; - if ((hashtable->items = - (inthash_item *) calloc(POW2(size), sizeof(inthash_item)))) { - hashtable->lg_size = size; - } - hashtable->used = 0; - hashtable->stash.size = 0; - hashtable->pool.buffer = NULL; - hashtable->pool.size = 0; - hashtable->pool.capacity = 0; - hashtable->pool.used = 0; - hashtable->stats.max_stash_size = 0; - hashtable->stats.write_count = 0; - hashtable->stats.add_count = 0; - hashtable->stats.cuckoo_moved = 0; - hashtable->stats.stash_added= 0; - hashtable->stats.pool_compact_count = 0; - hashtable->stats.pool_realloc_count = 0; - hashtable->stats.rehash_count = 0; - hashtable->custom.value.free = NULL; - hashtable->custom.value.arg = NULL; - hashtable->custom.key.dup = NULL; - hashtable->custom.key.free = NULL; - hashtable->custom.key.hash = NULL; - hashtable->custom.key.equals = NULL; - hashtable->custom.key.arg = NULL; - hashtable->custom.error.log = NULL; - hashtable->custom.error.fatal = NULL; - hashtable->custom.error.name = NULL; - hashtable->custom.error.arg = NULL; - hashtable->custom.print.key = NULL; - hashtable->custom.print.value = NULL; - hashtable->custom.print.arg = NULL; - } - return hashtable; -} - -int inthash_created(inthash hashtable) { - return hashtable != NULL && hashtable->items != NULL; -} - -void inthash_value_is_malloc(inthash hashtable, int flag) { - if (flag) { - if (hashtable->custom.value.free == NULL) { - hashtable->custom.value.free = inthash_default_free_handler; - hashtable->custom.value.arg = NULL; - } - } else { - hashtable->custom.value.free = NULL; - hashtable->custom.value.arg = NULL; - } -} - -void inthash_set_name(inthash hashtable, inthash_key_const name) { - hashtable->custom.error.name = name; -} - -void inthash_value_set_value_handler(inthash hashtable, - t_inthash_value_freehandler free, - inthash_opaque arg) { - hashtable->custom.value.free = free; - hashtable->custom.value.arg = arg; -} - -void inthash_value_set_key_handler(inthash hashtable, - t_inthash_duphandler dup, - t_inthash_key_freehandler free, - t_inthash_hasheshandler hash, - t_inthash_cmphandler equals, - inthash_opaque arg) { - /* dup and free must be consistent */ - inthash_assert(hashtable, ( dup == NULL ) == ( free == NULL ) ); - hashtable->custom.key.dup = dup; - hashtable->custom.key.free = free; - hashtable->custom.key.hash = hash; - hashtable->custom.key.equals = equals; - hashtable->custom.key.arg = arg; -} - -void inthash_set_assert_handler(inthash hashtable, - t_inthash_loghandler log, - t_inthash_asserthandler fatal, - inthash_opaque arg) { - hashtable->custom.error.log = log; - hashtable->custom.error.fatal = fatal; - hashtable->custom.error.arg = arg; -} - -void inthash_set_print_handler(inthash hashtable, - t_inthash_printkeyhandler key, - t_inthash_printvaluehandler value, - inthash_opaque arg) { - hashtable->custom.print.key = key; - hashtable->custom.print.value = value; - hashtable->custom.print.arg = arg; -} - -size_t inthash_nitems(inthash hashtable) { - if (hashtable != NULL) - return hashtable->used; - return 0; -} - -size_t inthash_memory_size(inthash hashtable) { - const size_t size_struct = sizeof(struct_inthash); - const size_t hash_size = POW2(hashtable->lg_size)*sizeof(inthash_item); - const size_t pool_size = hashtable->pool.capacity*sizeof(char); - return size_struct + hash_size + pool_size; -} - -void inthash_delete(inthash *phashtable) { - if (phashtable != NULL) { - inthash hashtable = *phashtable; - if (hashtable != NULL) { - inthash_log_stats(hashtable); - if (hashtable->items != NULL) { - /* we need to delete values */ - const size_t hash_size = POW2(hashtable->lg_size); - size_t i; - - /* wipe hashtable values (not names) */ - for(i = 0 ; i < hash_size ; i++) { - if (!inthash_is_free(hashtable, i)) { - inthash_del_value(hashtable, i); - } - } - - /* wipe auxiliary stash values (not names) if any */ - for(i = 0 ; i < hashtable->stash.size ; i++) { - inthash_del_value_(hashtable, &hashtable->stash.items[i].value); - } - } - /* wipe top-level */ - hashtable->lg_size = 0; - hashtable->used = 0; - free(hashtable->pool.buffer); - hashtable->pool.buffer = NULL; - free(hashtable->items); - hashtable->items = NULL; - free(hashtable); - *phashtable = NULL; - } - } -} - -/* Enumerator */ - -struct_inthash_enum inthash_enum_new(inthash hashtable) { - struct_inthash_enum e; - - e.index = 0; - e.table = hashtable; - return e; -} - -inthash_item *inthash_enum_next(struct_inthash_enum * e) { - const size_t hash_size = POW2(e->table->lg_size); - for( ; e->index < hash_size - && inthash_is_free(e->table, e->index) ; e->index++) ; - /* enumerate all table */ - if (e->index < hash_size) { - inthash_item *const next = &e->table->items[e->index]; - e->index++; - return next; - } - /* enumerate stash if present */ - else if (e->index < hash_size + e->table->stash.size) { - const size_t index = e->index - hash_size; - inthash_item *const next = &e->table->stash.items[index]; - e->index++; - return next; - } - /* eof */ - else { - return NULL; - } -} - -void inthash_set_global_assert_handler(t_inthash_loghandler log, - t_inthash_asserthandler fatal) { - global_log_handler = log; - global_assert_handler = fatal; -} diff --git a/src/htsinthash.h b/src/htsinthash.h deleted file mode 100644 index fb62767..0000000 --- a/src/htsinthash.h +++ /dev/null @@ -1,407 +0,0 @@ -/* ------------------------------------------------------------ */ -/* -HTTrack Website Copier, Offline Browser for Windows and Unix -Copyright (C) 1998-2014 Xavier Roche and other contributors - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Important notes: - -- We hereby ask people using this source NOT to use it in purpose of grabbing -emails addresses, or collecting any other private information on persons. -This would disgrace our work, and spoil the many hours we spent on it. - -Please visit our Website: http://www.httrack.com -*/ - -/* ------------------------------------------------------------ */ -/* File: hash table system (fast index) */ -/* Author: Xavier Roche */ -/* ------------------------------------------------------------ */ - -/** - * Library notes: - * This small hashtable library provides key/value hashtable, with a string - * key, and integer/pointer value (with an associated optional allocator) - * It features O(1) average insertion, O(1) lookup, and O(1) delete. - * - * Implementation notes: - * Implementation is auto-rehashable, and uses cuckoo hashing of size 2**n - * with a MD5 or FNV-1 hash function, with one additional auxiliary hash - * function. - * It also uses a small stash area to handle rare cases of collisions. - * Enumeration of all key/values is possible, deletion is also possible, but - * currently without any auto-shrinking (ie. table will never shrink). - * Overall, two main blocks are allocated: one for the items, and one for - * the keys (pool). - * - * References: - * Cuckoo Hashing http://en.wikipedia.org/wiki/Cuckoo_hashing - * Cuckoo Stash http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf - * FNV http://www.isthe.com/chongo/tech/comp/fnv/ - * MD5 http://en.wikipedia.org/wiki/MD5 - **/ - -#ifndef HTSINTHASH_DEFH -#define HTSINTHASH_DEFH - -/* Includes */ -#ifdef _WIN32 -#include -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -#elif (defined(SOLARIS) || defined(sun) || defined(HAVE_INTTYPES_H) \ - || defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__)) -#include -#else -#include -#endif -#include - -/** Key opaque type. May be a regular 'const char*'. **/ -typedef void* inthash_key; - -/** Key constant (can not be modified) opaque type. **/ -typedef const void* inthash_key_const; - -/** Opaque user-defined pointer. **/ -typedef void* inthash_opaque; - -/** Value (union of any value). **/ -typedef union inthash_value { - /** Integer value. **/ - intptr_t intg; - - /** Unsigned integer value. **/ - uintptr_t uintg; - - /** Pointer value. **/ - void *ptr; -} inthash_value; - -/** Value constant. **/ -typedef const inthash_value inthash_value_const; - -/** NULL Value. **/ -#define INTHASH_VALUE_NULL { 0 } - -#ifndef HTS_DEF_FWSTRUCT_inthash_item -#define HTS_DEF_FWSTRUCT_inthash_item -typedef struct inthash_item inthash_item; -#endif - -/** Hash key (32-bit) **/ -typedef uint32_t inthash_hashkey; - -/** Pair of hashes **/ -typedef struct inthash_hashkeys { - inthash_hashkey hash1; - inthash_hashkey hash2; -} inthash_hashkeys; - -/** NULL pair of hashes. **/ -#define INTHASH_KEYS_NULL { 0, 0 } - -/** Item holding a value. **/ -struct inthash_item { - /** Key. **/ - inthash_key name; - - /** Value. **/ - inthash_value value; - - /** Hashes of the key. **/ - inthash_hashkeys hashes; -}; - -/** Log level. **/ -typedef enum inthash_loglevel { - inthash_log_critical, - inthash_log_warning, - inthash_log_info, - inthash_log_debug, - inthash_log_trace -} inthash_loglevel; - -/** free handler. Only used when values are markes as xxc **/ -typedef void (*t_inthash_key_freehandler)(inthash_opaque arg, - inthash_key key); - -/** Value free handler. Only used when values are markes as xxc **/ -typedef void (*t_inthash_value_freehandler)(inthash_opaque arg, - inthash_value value); - -/** Key dup handler. **/ -typedef inthash_key (*t_inthash_duphandler)(inthash_opaque arg, - inthash_key_const name); - -/** Key hash computation handler. **/ -typedef inthash_hashkeys (*t_inthash_hasheshandler)(inthash_opaque arg, - inthash_key_const name); - -/** Hashtable logging handler. **/ -typedef void (*t_inthash_loghandler)(inthash_opaque arg, inthash_loglevel level, - const char* format, va_list args); - -/** Hashtable fatal assertion failure. **/ -typedef void (*t_inthash_asserthandler)(inthash_opaque arg, const char* exp, - const char* file, int line); - -/** Key printer (debug) **/ -typedef const char* (*t_inthash_printkeyhandler)(inthash_opaque arg, - inthash_key_const name); - -/** Value printer (debug) **/ -typedef const char* (*t_inthash_printvaluehandler)(inthash_opaque arg, - inthash_value_const value); - -/** - * Value comparison handler (returns non-zero value if strings are equal). - **/ -typedef int (*t_inthash_cmphandler)(inthash_opaque arg, - inthash_key_const a, - inthash_key_const b); - -/** Hashtable (opaque structure). **/ -#ifndef HTS_DEF_FWSTRUCT_struct_inthash -#define HTS_DEF_FWSTRUCT_struct_inthash -typedef struct struct_inthash struct_inthash, *inthash; -#endif - -/** Hashtable enumeration (opaque structure). **/ -#ifndef HTS_DEF_FWSTRUCT_struct_inthash_enum -#define HTS_DEF_FWSTRUCT_struct_inthash_enum -typedef struct struct_inthash_enum struct_inthash_enum; -#endif - -/** Enumeration. **/ -struct struct_inthash_enum { - inthash table; - size_t index; -}; - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Create a new hashtable, with initial bucket size of 'size'. - * If size is 0, use the default minimal bucket size. - * Return a non-NULL pointer upon success. - **/ -inthash inthash_new(size_t size); - -/** - * Was the hashtable successfully created ? - * Return non-zero value if the hashtable is valid. - **/ -int inthash_created(inthash hashtable); - -/** - * Delete a hashtable, freeing all entries. - **/ -void inthash_delete(inthash * hashtable); - -/** - * Return the number of items in the hashtable. - **/ -size_t inthash_nitems(inthash hashtable); - -/** - * Return the memory size taken by the hashtable. - * (This does not take account of the possible memory taken by values) - **/ -size_t inthash_memory_size(inthash hashtable); - -/** - * If 'flag' is non-zero, calls inthash_value_set_value_handler() with - * default system free() handler function, otherwise, free the value handlers. - **/ -void inthash_value_is_malloc(inthash hashtable, int flag); - -/** - * Set handlers for values. - * free: this handler will be called when a value is to be removed from - * the hashtable. if NULL, values won't be free'd. - * arg: opaque custom argument to be used by functions. - * Handler(s) MUST NOT be changed once elements have been added. - **/ -void inthash_value_set_value_handler(inthash hashtable, - t_inthash_value_freehandler free, - inthash_opaque arg); - -/** - * Set handlers for keys. - * dup: handler called to duplicate a key. if NULL, the internal pool is used. - * free: handler called to free a key. if NULL, the internal pool is used. - * hash: hashing handler, called to hash a key. if NULL, the default hash - * function is used. - * equals: comparison handler, returning non-zero value when two keys are - * identical. if NULL, the default comparison function is used. - * arg: opaque custom argument to be used by functions. - * Handler(s) MUST NOT be changed once elements have been added. - **/ -void inthash_value_set_key_handler(inthash hashtable, - t_inthash_duphandler dup, - t_inthash_key_freehandler free, - t_inthash_hasheshandler hash, - t_inthash_cmphandler equals, - inthash_opaque arg); - -/** - * Set assertion failure handler. - * log: handler called upon serious programming error - * fatal: handler called upon serious programming error - **/ -void inthash_set_assert_handler(inthash hashtable, - t_inthash_loghandler log, - t_inthash_asserthandler fatal, - inthash_opaque arg); - -/** - * Set pretty print loggers (debug). Both handlers must return a string - * pointer which shall be valid until the next call. Both key and value - * pointers shall be valid at the same time. - * name: handler called to print the string representation of the name - * value: handler called to print the string representation of the value - **/ -void inthash_set_print_handler(inthash hashtable, - t_inthash_printkeyhandler key, - t_inthash_printvaluehandler value, - inthash_opaque arg); - -/** - * Set the hashtable name, for degugging purpose. - * name: the hashtable name (ASCII or UTF-8) - */ -void inthash_set_name(inthash hashtable, inthash_key_const name); - -/** - * Get the hashtable name, for degugging purpose. - * Return NULL if no name was defined. - **/ -const char* inthash_get_name(inthash hashtable); - -/** - * Read an integer entry from the hashtable. - * Return non-zero value upon success and sets intvalue. - **/ -int inthash_read(inthash hashtable, inthash_key_const name, intptr_t * intvalue); - -/** - * Same as inthash_read(), but return 0 is the value was zero. - **/ -int inthash_readptr(inthash hashtable, inthash_key_const name, intptr_t * intvalue); - -/** - * Return non-zero value if the given entry exists. - **/ -int inthash_exists(inthash hashtable, inthash_key_const name); - -/** - * Read an entry from the hashtable. - * Return non-zero value upon success and sets value. - **/ -int inthash_read_value(inthash hashtable, inthash_key_const name, - inthash_value *value); - -/** - * Write an entry to the hashtable. - * Return non-zero value if the entry was added, zero if it was replaced. - **/ -int inthash_write_value(inthash hashtable, inthash_key_const name, - inthash_value_const value); -/** - * Read a pointer entry from the hashtable. - * Return non-zero value upon success and sets value. - **/ -int inthash_read_pvoid(inthash hashtable, inthash_key_const name, void **value); - -/** - * Write a pointer entry to the hashtable. - * Return non-zero value if the entry was added, zero if it was replaced. - **/ -int inthash_write_pvoid(inthash hashtable, inthash_key_const name, void *value); - -/** - * Alias to inthash_write_pvoid() - **/ -void inthash_add_pvoid(inthash hashtable, inthash_key_const name, void *value); - -/** - * Write an integer entry to the hashtable. - * Return non-zero value if the entry was added, zero if it was replaced. - **/ -int inthash_write(inthash hashtable, inthash_key_const name, intptr_t value); - -/** - * Alias to inthash_write() - **/ -void inthash_add(inthash hashtable, inthash_key_const name, intptr_t value); - -/** - * Increment an entry value in the hashtable - * (or create a new entry with value 1 if it does not yet exist) - * Return non-zero value if the entry was added, zero if it was changed. - **/ -int inthash_inc(inthash hashtable, inthash_key_const name); - -/** - * Decrement an entry value in the hashtable - * (or create a new entry with value -1 if it does not yet exist) - * Return non-zero value if the entry was added, zero if it was changed. - **/ -int inthash_dec(inthash hashtable, inthash_key_const name); - -/** - * Remove an entry from the hashtable - * Return non-zero value if the entry was removed, zero otherwise. - **/ -int inthash_remove(inthash hashtable, inthash_key_const name); - -/** - * Return a new enumerator. - * Note: deleting entries is safe while enumerating, but adding entries - * lead to undefined enumeration behavior (yet safe). - **/ -struct_inthash_enum inthash_enum_new(inthash hashtable); - -/** - * Enumerate the next entry. - **/ -inthash_item *inthash_enum_next(struct_inthash_enum * e); - -/** - * Compute a hash, given a string. This is the default function used for - * hashing keys, which are by default strings. - **/ -inthash_hashkeys inthash_hash_string(const char *value); - -/** - * Set default global assertion failure handler. - * The handler will be used if no specific handler was defined in the - * hashtable itself. - * log: handler called upon serious error log (opaque argument - * is the hashtable itself) - * fatal: handler called upon serious programming error (opaque argument - * is the hashtable itself) - **/ -void inthash_set_global_assert_handler(t_inthash_loghandler log, - t_inthash_asserthandler fatal); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htsserver.c b/src/htsserver.c index 401f131..e9f426c 100644 --- a/src/htsserver.c +++ b/src/htsserver.c @@ -67,7 +67,7 @@ Please visit our Website: http://www.httrack.com /* Bypass internal definition protection */ #define HTS_INTERNAL_BYTECODE -#include "htsinthash.h" +#include "coucal.h" #undef HTS_INTERNAL_BYTECODE int NewLangStrSz = 1024; diff --git a/src/htsweb.c b/src/htsweb.c index 6e3f4a5..c695f3a 100644 --- a/src/htsweb.c +++ b/src/htsweb.c @@ -58,7 +58,7 @@ Please visit our Website: http://www.httrack.com #include "htsthread.h" /* External modules */ -#include "htsinthash.c" +#include "coucal.c" #include "htsmd5.c" #include "md5.c" diff --git a/src/htswrap.c b/src/htswrap.c index 345f84a..7683034 100644 --- a/src/htswrap.c +++ b/src/htswrap.c @@ -36,7 +36,7 @@ Please visit our Website: http://www.httrack.com #include "htswrap.h" #include "htshash.h" -#include "htsinthash.h" +#include "coucal.h" #include "htslib.h" HTSEXT_API int htswrap_init(void) { // LEGACY diff --git a/src/htswrap.h b/src/htswrap.h index eb010d3..443abda 100644 --- a/src/htswrap.h +++ b/src/htswrap.h @@ -38,7 +38,7 @@ Please visit our Website: http://www.httrack.com #ifdef HTS_INTERNAL_BYTECODE #include "htsglobal.h" -#include "htsinthash.h" +#include "coucal.h" /* Forward definitions */ #ifndef HTS_DEF_FWSTRUCT_httrackp -- cgit v1.2.3