summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-09-28 17:41:06 +0000
committerDavid Robillard <d@drobilla.net>2011-09-28 17:41:06 +0000
commit0e3ef580b5d265ee59d50d35053e989b8c4277c2 (patch)
treec3989bba215b866ae8c56a276cd4d577b43f26be
parent95fd16cd2d7d2394418210199f1275caab8965d0 (diff)
downloadzix-0e3ef580b5d265ee59d50d35053e989b8c4277c2.tar.gz
zix-0e3ef580b5d265ee59d50d35053e989b8c4277c2.tar.bz2
zix-0e3ef580b5d265ee59d50d35053e989b8c4277c2.zip
Add ZixHash
git-svn-id: http://svn.drobilla.net/zix/trunk@39 df6676b4-ccc9-40e5-b5d6-7c4628a128e3
-rw-r--r--src/hash.c226
-rw-r--r--test/hash_test.c157
-rw-r--r--test/patree_bench.c41
-rw-r--r--wscript2
-rw-r--r--zix/common.h8
-rw-r--r--zix/hash.h75
6 files changed, 504 insertions, 5 deletions
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..c267757
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,226 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "zix/hash.h"
+
+/**
+ Primes, each slightly less than twice its predecessor, and as far away
+ from powers of two as possible.
+*/
+static const unsigned sizes[] = {
+ 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317,
+ 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843,
+ 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 0
+};
+
+typedef struct _Entry {
+ const void* key; ///< Hash key
+ void* data; ///< Value
+ struct _Entry* next; ///< Next entry in bucket
+ unsigned hash; ///< Non-modulo hash value (for cheap rehash)
+} Entry;
+
+struct ZixHashImpl {
+ ZixHashFunc hash_func;
+ ZixEqualFunc key_equal_func;
+ Entry** buckets;
+ const unsigned* n_buckets;
+ unsigned count;
+};
+
+ZixHash*
+zix_hash_new(ZixHashFunc hash_func,
+ ZixEqualFunc key_equal_func)
+
+{
+ ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash));
+ hash->hash_func = hash_func;
+ hash->key_equal_func = key_equal_func;
+ hash->count = 0;
+ hash->n_buckets = &sizes[0];
+ hash->buckets = malloc(*hash->n_buckets * sizeof(Entry*));
+ memset(hash->buckets, 0, *hash->n_buckets * sizeof(Entry*));
+
+ return hash;
+}
+
+void
+zix_hash_free(ZixHash* hash)
+{
+ for (unsigned b = 0; b < *hash->n_buckets; ++b) {
+ Entry* bucket = hash->buckets[b];
+ for (Entry* e = bucket; e;) {
+ Entry* next = e->next;
+ free(e);
+ e = next;
+ }
+ }
+
+ free(hash->buckets);
+ free(hash);
+}
+
+unsigned
+zix_string_hash(const void* key)
+{
+ // Trusty old DJB hash
+ const char* str = (const char*)key;
+ unsigned h = 5381;
+ for (const char* s = str; *s != '\0'; ++s) {
+ h = (h << 5) + h + *s; // h = h * 33 + c
+ }
+ return h;
+}
+
+bool
+zix_string_equal(const void* a, const void* b)
+{
+ return !strcmp((const char*)a, (const char*)b);
+}
+
+static void
+insert_entry(Entry** bucket,
+ Entry* entry)
+{
+ entry->next = *bucket;
+ *bucket = entry;
+}
+
+static ZixStatus
+rehash(ZixHash* hash, unsigned new_n_buckets)
+{
+ Entry** new_buckets = (Entry**)malloc(new_n_buckets * sizeof(Entry*));
+ if (!new_buckets) {
+ return ZIX_STATUS_NO_MEM;
+ }
+
+ memset(new_buckets, 0, new_n_buckets * sizeof(Entry*));
+
+ for (unsigned b = 0; b < *hash->n_buckets; ++b) {
+ for (Entry* e = hash->buckets[b]; e;) {
+ Entry* const next = e->next;
+ const unsigned h = e->hash % new_n_buckets;
+ insert_entry(&new_buckets[h], e);
+ e = next;
+ }
+ }
+
+ free(hash->buckets);
+ hash->buckets = new_buckets;
+
+ return ZIX_STATUS_SUCCESS;
+}
+
+static Entry*
+find_entry(const ZixHash* hash,
+ const void* key,
+ unsigned h)
+{
+ for (Entry* e = hash->buckets[h]; e; e = e->next) {
+ if (hash->key_equal_func(e->key, key)) {
+ return e;
+ }
+ }
+
+ return NULL;
+}
+
+void*
+zix_hash_find(const ZixHash* hash, const void* key)
+{
+ const unsigned h = hash->hash_func(key) % *hash->n_buckets;
+ Entry* const entry = find_entry(hash, key, h);
+ return entry ? entry->data : 0;
+}
+
+ZixStatus
+zix_hash_insert(ZixHash* hash, const void* key, void* data)
+{
+ unsigned h_nomod = hash->hash_func(key);
+ unsigned h = h_nomod % *hash->n_buckets;
+
+ Entry* elem = find_entry(hash, key, h);
+ if (elem) {
+ assert(elem->hash == h_nomod);
+ return ZIX_STATUS_EXISTS;
+ }
+
+ elem = (Entry*)malloc(sizeof(Entry));
+ if (!elem) {
+ return ZIX_STATUS_NO_MEM;
+ }
+ elem->key = key;
+ elem->data = data;
+ elem->next = NULL;
+ elem->hash = h_nomod;
+ const unsigned next_n_buckets = *(hash->n_buckets + 1);
+ if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) {
+ if (!rehash(hash, next_n_buckets)) {
+ h = h_nomod % *(++hash->n_buckets);
+ }
+ }
+
+ insert_entry(&hash->buckets[h], elem);
+ ++hash->count;
+ return ZIX_STATUS_SUCCESS;
+}
+
+ZixStatus
+zix_hash_remove(ZixHash* hash, const void* key)
+{
+ unsigned h = hash->hash_func(key) % *hash->n_buckets;
+
+ Entry** next_ptr = &hash->buckets[h];
+ for (Entry* e = hash->buckets[h]; e; e = e->next) {
+ if (hash->key_equal_func(e->key, key)) {
+ *next_ptr = e->next;
+ free(e);
+ return ZIX_STATUS_SUCCESS;
+ }
+ next_ptr = &e->next;
+ }
+
+ if (hash->n_buckets != sizes) {
+ const unsigned prev_n_buckets = *(hash->n_buckets - 1);
+ if (hash->count - 1 <= prev_n_buckets) {
+ if (!rehash(hash, prev_n_buckets)) {
+ --hash->n_buckets;
+ }
+ }
+ }
+
+ --hash->count;
+ return ZIX_STATUS_NOT_FOUND;
+}
+
+ZIX_API
+void
+zix_hash_foreach(const ZixHash* hash,
+ void (*f)(const void* key, void* value, void* user_data),
+ void* user_data)
+{
+ for (unsigned b = 0; b < *hash->n_buckets; ++b) {
+ Entry* bucket = hash->buckets[b];
+ for (Entry* e = bucket; e; e = e->next) {
+ f(e->key, e->data, user_data);
+ }
+ }
+}
+
diff --git a/test/hash_test.c b/test/hash_test.c
new file mode 100644
index 0000000..ec8ea6f
--- /dev/null
+++ b/test/hash_test.c
@@ -0,0 +1,157 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "zix/hash.h"
+
+static const char* strings[] = {
+ "one", "two", "three", "four", "five", "six", "seven", "eight",
+ "2one", "2two", "2three", "2four", "2five", "2six", "2seven", "2eight",
+ "3one", "3two", "3three", "3four", "3five", "3six", "3seven", "3eight",
+ "4one", "4two", "4three", "4four", "4five", "4six", "4seven", "4eight",
+ "5one", "5two", "5three", "5four", "5five", "5six", "5seven", "5eight",
+ "6one", "6two", "6three", "6four", "6five", "6six", "6seven", "6eight",
+ "7one", "7two", "7three", "7four", "7five", "7six", "7seven", "7eight",
+ "8one", "8two", "8three", "8four", "8five", "8six", "8seven", "8eight",
+ "9one", "9two", "9three", "9four", "9five", "9six", "9seven", "9eight",
+ "Aone", "Atwo", "Athree", "Afour", "Afive", "Asix", "Aseven", "Aeight",
+ "Bone", "Btwo", "Bthree", "Bfour", "Bfive", "Bsix", "Bseven", "Beight",
+ "Cone", "Ctwo", "Cthree", "Cfour", "Cfive", "Csix", "Cseven", "Ceight",
+ "Done", "Dtwo", "Dthree", "Dfour", "Dfive", "Dsix", "Dseven", "Deight",
+};
+
+static int
+test_fail(const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ fprintf(stderr, "error: ");
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ return 1;
+}
+
+static unsigned n_checked = 0;
+
+static void
+check(const void* key, void* value, void* user_data)
+{
+ if (key == value) {
+ ++n_checked;
+ } else {
+ fprintf(stderr, "ERROR: %s != %s\n",
+ (const char*)key, (const char*)value);
+ }
+}
+
+int
+main(int argc, char** argv)
+{
+ ZixHash* hash = zix_hash_new(zix_string_hash, zix_string_equal);
+
+ const size_t n_strings = sizeof(strings) / sizeof(char*);
+
+ // Insert each string
+ for (size_t i = 0; i < n_strings; ++i) {
+ ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]);
+ if (st) {
+ return test_fail("Failed to insert `%s'\n", strings[i]);
+ }
+ }
+
+ //zix_hash_print_dot(hash, stdout);
+
+ // Attempt to insert each string again
+ for (size_t i = 0; i < n_strings; ++i) {
+ ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]);
+ if (st != ZIX_STATUS_EXISTS) {
+ return test_fail("Double inserted `%s'\n", strings[i]);
+ }
+ }
+
+ // Search for each string
+ for (size_t i = 0; i < n_strings; ++i) {
+ const char* match = (const char*)zix_hash_find(hash, strings[i]);
+ if (!match) {
+ return test_fail("Failed to find `%s'\n", strings[i]);
+ }
+ if (match != strings[i]) {
+ return test_fail("Bad match for `%s'\n", strings[i]);
+ }
+ }
+
+ // Try some false matches
+ const char* not_indexed[] = {
+ "ftp://example.org/not-there-at-all",
+ "http://example.org/foobar",
+ "http://",
+ "http://otherdomain.com"
+ };
+ const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*);
+ for (size_t i = 0; i < n_not_indexed; ++i) {
+ const char* match = (const char*)zix_hash_find(hash, not_indexed[i]);
+ if (match) {
+ return test_fail("Unexpectedly found `%s'\n", not_indexed[i]);
+ }
+ }
+
+ // Remove strings
+ for (size_t i = 0; i < n_strings; ++i) {
+ // Remove string
+ ZixStatus st = zix_hash_remove(hash, strings[i]);
+ if (st) {
+ return test_fail("Failed to remove `%s'\n", strings[i]);
+ }
+
+ // Ensure second removal fails
+ st = zix_hash_remove(hash, strings[i]);
+ if (st != ZIX_STATUS_NOT_FOUND) {
+ return test_fail("Unexpectedly removed `%s' twice\n", strings[i]);
+ }
+
+ // Check to ensure remaining strings are still present
+ for (size_t j = i + 1; j < n_strings; ++j) {
+ const char* match = (const char*)zix_hash_find(hash, strings[j]);
+ if (!match) {
+ return test_fail("Failed to find `%s' after remove\n", strings[j]);
+ }
+ if (match != strings[j]) {
+ return test_fail("Bad match for `%s' after remove\n", strings[j]);
+ }
+ }
+ }
+
+ // Insert each string again (to check non-empty desruction)
+ for (size_t i = 0; i < n_strings; ++i) {
+ ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]);
+ if (st) {
+ return test_fail("Failed to insert `%s'\n", strings[i]);
+ }
+ }
+
+ // Check key == value (and test zix_hash_foreach)
+ zix_hash_foreach(hash, check, NULL);
+ if (n_checked != n_strings) {
+ return test_fail("Check failed\n");
+ }
+
+ zix_hash_free(hash);
+
+ return 0;
+}
diff --git a/test/patree_bench.c b/test/patree_bench.c
index 02998d6..64e8042 100644
--- a/test/patree_bench.c
+++ b/test/patree_bench.c
@@ -23,8 +23,9 @@
#include <glib.h>
-#include "zix/patree.h"
#include "zix/fat_patree.h"
+#include "zix/hash.h"
+#include "zix/patree.h"
const unsigned seed = 1;
@@ -52,7 +53,7 @@ main(int argc, char** argv)
return test_fail("Failed to open file %s\n", file);
}
- size_t max_n_strings = 100000;
+ size_t max_n_strings = 10000000;
/* Read input strings */
char** strings = NULL;
@@ -87,12 +88,13 @@ main(int argc, char** argv)
FILE* insert_dat = fopen("insert.dat", "w");
FILE* search_dat = fopen("search.dat", "w");
- fprintf(insert_dat, "# n\tGHashTable\tZixPatree\tZixFatPatree\n");
- fprintf(search_dat, "# n\tGHashTable\tZixPatree\tZixFatPatree\n");
+ fprintf(insert_dat, "# n\tGHashTable\tZixHash\tZixPatree\tZixFatPatree\n");
+ fprintf(search_dat, "# n\tGHashTable\tZixHash\tZixPatree\tZixFatPatree\n");
for (size_t n = 1; n <= n_strings; n *= 2) {
printf("Benchmarking n = %zu\n", n);
ZixPatree* patree = zix_patree_new();
+ ZixHash* zhash = zix_hash_new(zix_string_hash, zix_string_equal);
ZixFatPatree* fat_patree = zix_fat_patree_new();
GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal);
fprintf(insert_dat, "%zu", n);
@@ -107,6 +109,16 @@ main(int argc, char** argv)
}
fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
+ // ZixHash
+ insert_start = bench_start();
+ for (size_t i = 0; i < n; ++i) {
+ ZixStatus st = zix_hash_insert(zhash, strings[i], strings[i]);
+ if (st && st != ZIX_STATUS_EXISTS) {
+ return test_fail("Failed to insert `%s'\n", strings[i]);
+ }
+ }
+ fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
+
// ZixPatree
insert_start = bench_start();
for (size_t i = 0; i < n; ++i) {
@@ -119,12 +131,14 @@ main(int argc, char** argv)
// ZixFatPatree
insert_start = bench_start();
+ /*
for (size_t i = 0; i < n; ++i) {
ZixStatus st = zix_fat_patree_insert(fat_patree, strings[i]);
if (st && st != ZIX_STATUS_EXISTS) {
return test_fail("Failed to insert `%s'\n", strings[i]);
}
}
+ */
fprintf(insert_dat, "\t%lf\n", bench_end(&insert_start));
// Benchmark search
@@ -141,6 +155,21 @@ main(int argc, char** argv)
}
fprintf(search_dat, "\t%lf", bench_end(&search_start));
+ // ZixHash
+ srand(seed);
+ search_start = bench_start();
+ for (size_t i = 0; i < n; ++i) {
+ const size_t index = rand() % n;
+ const char* match = NULL;
+ if (!(match = zix_hash_find(zhash, strings[index]))) {
+ return test_fail("Hash: Failed to find `%s'\n", strings[index]);
+ }
+ if (strcmp(match, strings[index])) {
+ return test_fail("Hash: Bad match for `%s'\n", strings[index]);
+ }
+ }
+ fprintf(search_dat, "\t%lf", bench_end(&search_start));
+
// ZixPatree
srand(seed);
search_start = bench_start();
@@ -151,6 +180,7 @@ main(int argc, char** argv)
return test_fail("Patree: Failed to find `%s'\n", strings[index]);
}
if (strcmp(match, strings[index])) {
+
return test_fail("Patree: Bad match for `%s'\n", strings[index]);
}
}
@@ -159,6 +189,7 @@ main(int argc, char** argv)
// ZixFatPatree
srand(seed);
search_start = bench_start();
+ /*
for (size_t i = 0; i < n; ++i) {
const size_t index = rand() % n;
char* match = NULL;
@@ -169,10 +200,12 @@ main(int argc, char** argv)
return test_fail("FatPatree: Bad match for `%s'\n", strings[index]);
}
}
+ */
fprintf(search_dat, "\t%lf\n", bench_end(&search_start));
zix_patree_free(patree);
zix_fat_patree_free(fat_patree);
+ zix_hash_free(zhash);
g_hash_table_unref(hash);
}
diff --git a/wscript b/wscript
index b2521f1..27eb91f 100644
--- a/wscript
+++ b/wscript
@@ -63,6 +63,7 @@ def configure(conf):
print('')
tests = [
+ 'hash_test',
'patree_test',
'ring_test',
'sorted_array_test',
@@ -79,6 +80,7 @@ def build(bld):
lib_source = '''
src/fat_patree.c
+ src/hash.c
src/patree.c
src/ring.c
src/sorted_array.c
diff --git a/zix/common.h b/zix/common.h
index 2107df5..a7edf76 100644
--- a/zix/common.h
+++ b/zix/common.h
@@ -17,6 +17,8 @@
#ifndef ZIX_COMMON_H
#define ZIX_COMMON_H
+#include <stdbool.h>
+
/**
@addtogroup zix
@{
@@ -55,7 +57,11 @@ typedef enum {
typedef int (*ZixComparator)(const void* a, const void* b, void* user_data);
/**
- @}
+ Function for testing equality of two elements.
+*/
+typedef bool (*ZixEqualFunc)(const void* a, const void* b);
+
+/**@}
*/
#endif /* ZIX_COMMON_H */
diff --git a/zix/hash.h b/zix/hash.h
new file mode 100644
index 0000000..44521f1
--- /dev/null
+++ b/zix/hash.h
@@ -0,0 +1,75 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef ZIX_HASH_H
+#define ZIX_HASH_H
+
+#include "zix/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ZixHashImpl ZixHash;
+
+/**
+ Function for computing the hash of an element.
+*/
+typedef unsigned (*ZixHashFunc)(const void* key);
+
+ZIX_API
+ZixHash*
+zix_hash_new(ZixHashFunc hash_func,
+ ZixEqualFunc key_equal_func);
+
+ZIX_API
+void
+zix_hash_free(ZixHash* hash);
+
+ZIX_API
+unsigned
+zix_string_hash(const void* key);
+
+ZIX_API
+bool
+zix_string_equal(const void* a, const void* b);
+
+ZIX_API
+ZixStatus
+zix_hash_insert(ZixHash* hash,
+ const void* key,
+ void* data);
+
+ZIX_API
+ZixStatus
+zix_hash_remove(ZixHash* hash, const void* key);
+
+ZIX_API
+void*
+zix_hash_find(const ZixHash* hash,
+ const void* key);
+
+ZIX_API
+void
+zix_hash_foreach(const ZixHash* hash,
+ void (*f)(const void* key, void* value, void* user_data),
+ void* user_data);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* ZIX_HASH_H */