From 0bb59462ed60f87eb18effdd06e74a750e274ca8 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 10 Aug 2012 02:15:53 +0000 Subject: Minimal space overhead inline value hash table. Add ZixChunk. Add SSE 4.2 accelerated digest (with fallback) in zix/digest.h. Make library optionally header-only (define ZIX_INLINE). git-svn-id: http://svn.drobilla.net/zix/trunk@76 df6676b4-ccc9-40e5-b5d6-7c4628a128e3 --- test/hash_test.c | 55 ++++++++++++++++++++++++++++++++++++----------------- test/patree_bench.c | 33 ++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/test/hash_test.c b/test/hash_test.c index 953673a..9b48db1 100644 --- a/test/hash_test.c +++ b/test/hash_test.c @@ -50,26 +50,44 @@ test_fail(const char* fmt, ...) static unsigned n_checked = 0; static void -check(const void* key, void* value, void* user_data) +check(const void* value, void* user_data) { - if (key == value) { + if (strlen(*(const char*const*)value) >= 3) { ++n_checked; } else { - fprintf(stderr, "ERROR: %s != %s\n", - (const char*)key, (const char*)value); + fprintf(stderr, "ERROR: %s\n", (const char*)value); } } +static uint32_t +string_ptr_hash(const void* value) +{ + // Trusty old DJB hash + const char* str = *(const char*const*)value; + unsigned h = 5381; + for (const char* s = str; *s != '\0'; ++s) { + h = (h << 5) + h + *s; // h = h * 33 + c + } + return h; +} + +static bool +string_ptr_equal(const void* a, const void* b) +{ + return !strcmp(*(const char*const*)a, *(const char*const*)b); +} + int main(int argc, char** argv) { - ZixHash* hash = zix_hash_new(zix_string_hash, zix_string_equal); + ZixHash* hash = zix_hash_new( + string_ptr_hash, string_ptr_equal, sizeof(const char*)); - const size_t n_strings = sizeof(strings) / sizeof(char*); + const size_t n_strings = sizeof(strings) / sizeof(const char*); // Insert each string for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]); + ZixStatus st = zix_hash_insert(hash, &strings[i], NULL); if (st) { return test_fail("Failed to insert `%s'\n", strings[i]); } @@ -79,7 +97,7 @@ main(int argc, char** argv) // Attempt to insert each string again for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]); + ZixStatus st = zix_hash_insert(hash, &strings[i], NULL); if (st != ZIX_STATUS_EXISTS) { return test_fail("Double inserted `%s'\n", strings[i]); } @@ -87,12 +105,13 @@ main(int argc, char** argv) // Search for each string for (size_t i = 0; i < n_strings; ++i) { - const char* match = (const char*)zix_hash_find(hash, strings[i]); + const char*const* match = (const char*const*)zix_hash_find( + hash, &strings[i]); if (!match) { return test_fail("Failed to find `%s'\n", strings[i]); } - if (match != strings[i]) { - return test_fail("Bad match for `%s'\n", strings[i]); + if (*match != strings[i]) { + return test_fail("Bad match for `%s': `%s'\n", strings[i], match); } } @@ -105,7 +124,8 @@ main(int argc, char** argv) }; const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*); for (size_t i = 0; i < n_not_indexed; ++i) { - const char* match = (const char*)zix_hash_find(hash, not_indexed[i]); + const char*const* match = (const char*const*)zix_hash_find( + hash, ¬_indexed[i]); if (match) { return test_fail("Unexpectedly found `%s'\n", not_indexed[i]); } @@ -114,24 +134,25 @@ main(int argc, char** argv) // Remove strings for (size_t i = 0; i < n_strings; ++i) { // Remove string - ZixStatus st = zix_hash_remove(hash, strings[i]); + ZixStatus st = zix_hash_remove(hash, &strings[i]); if (st) { return test_fail("Failed to remove `%s'\n", strings[i]); } // Ensure second removal fails - st = zix_hash_remove(hash, strings[i]); + st = zix_hash_remove(hash, &strings[i]); if (st != ZIX_STATUS_NOT_FOUND) { return test_fail("Unexpectedly removed `%s' twice\n", strings[i]); } // Check to ensure remaining strings are still present for (size_t j = i + 1; j < n_strings; ++j) { - const char* match = (const char*)zix_hash_find(hash, strings[j]); + const char*const* match = (const char*const*)zix_hash_find( + hash, &strings[j]); if (!match) { return test_fail("Failed to find `%s' after remove\n", strings[j]); } - if (match != strings[j]) { + if (*match != strings[j]) { return test_fail("Bad match for `%s' after remove\n", strings[j]); } } @@ -139,7 +160,7 @@ main(int argc, char** argv) // Insert each string again (to check non-empty desruction) for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]); + ZixStatus st = zix_hash_insert(hash, &strings[i], NULL); if (st) { return test_fail("Failed to insert `%s'\n", strings[i]); } diff --git a/test/patree_bench.c b/test/patree_bench.c index 64e8042..3c13154 100644 --- a/test/patree_bench.c +++ b/test/patree_bench.c @@ -23,6 +23,7 @@ #include +#include "zix/chunk.h" #include "zix/fat_patree.h" #include "zix/hash.h" #include "zix/patree.h" @@ -56,18 +57,21 @@ main(int argc, char** argv) size_t max_n_strings = 10000000; /* Read input strings */ - char** strings = NULL; - size_t n_strings = 0; - char* buf = calloc(1, 1); - size_t buf_len = 1; - size_t this_str_len = 0; + char** strings = NULL; + size_t* lengths = NULL; + size_t n_strings = 0; + char* buf = calloc(1, 1); + size_t buf_len = 1; + size_t this_str_len = 0; for (char c; (c = fgetc(fd)) != EOF;) { if (c == '\n') { if (this_str_len == 0) { continue; } strings = realloc(strings, (n_strings + 1) * sizeof(char*)); + lengths = realloc(lengths, (n_strings + 1) * sizeof(size_t)); strings[n_strings] = malloc(buf_len); + lengths[n_strings] = this_str_len; memcpy(strings[n_strings], buf, buf_len); this_str_len = 0; if (++n_strings == max_n_strings) { @@ -94,9 +98,11 @@ main(int argc, char** argv) for (size_t n = 1; n <= n_strings; n *= 2) { printf("Benchmarking n = %zu\n", n); ZixPatree* patree = zix_patree_new(); - ZixHash* zhash = zix_hash_new(zix_string_hash, zix_string_equal); ZixFatPatree* fat_patree = zix_fat_patree_new(); GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal); + ZixHash* zhash = zix_hash_new((ZixHashFunc)zix_chunk_hash, + (ZixEqualFunc)zix_chunk_equal, + sizeof(ZixChunk)); fprintf(insert_dat, "%zu", n); fprintf(search_dat, "%zu", n); @@ -112,7 +118,8 @@ main(int argc, char** argv) // ZixHash insert_start = bench_start(); for (size_t i = 0; i < n; ++i) { - ZixStatus st = zix_hash_insert(zhash, strings[i], strings[i]); + const ZixChunk chunk = { strings[i], lengths[i] + 1 }; + ZixStatus st = zix_hash_insert(zhash, &chunk, NULL); if (st && st != ZIX_STATUS_EXISTS) { return test_fail("Failed to insert `%s'\n", strings[i]); } @@ -159,13 +166,15 @@ main(int argc, char** argv) srand(seed); search_start = bench_start(); for (size_t i = 0; i < n; ++i) { - const size_t index = rand() % n; - const char* match = NULL; - if (!(match = zix_hash_find(zhash, strings[index]))) { + const size_t index = rand() % n; + const ZixChunk key = { strings[index], lengths[index] + 1 }; + const ZixChunk* match = NULL; + if (!(match = zix_hash_find(zhash, &key))) { return test_fail("Hash: Failed to find `%s'\n", strings[index]); } - if (strcmp(match, strings[index])) { - return test_fail("Hash: Bad match for `%s'\n", strings[index]); + if (strcmp(match->buf, strings[index])) { + return test_fail("Hash: Bad match %p for `%s': `%s'\n", + (void*)match, strings[index], match->buf); } } fprintf(search_dat, "\t%lf", bench_end(&search_start)); -- cgit v1.2.1