diff options
-rw-r--r-- | src/hash.c | 226 | ||||
-rw-r--r-- | test/hash_test.c | 55 | ||||
-rw-r--r-- | test/patree_bench.c | 33 | ||||
-rw-r--r-- | wscript | 19 | ||||
-rw-r--r-- | zix/chunk.c | 32 | ||||
-rw-r--r-- | zix/chunk.h | 47 | ||||
-rw-r--r-- | zix/common.h | 5 | ||||
-rw-r--r-- | zix/digest.c | 56 | ||||
-rw-r--r-- | zix/digest.h | 39 | ||||
-rw-r--r-- | zix/fat_patree.c (renamed from src/fat_patree.c) | 0 | ||||
-rw-r--r-- | zix/hash.c | 228 | ||||
-rw-r--r-- | zix/hash.h | 123 | ||||
-rw-r--r-- | zix/patree.c (renamed from src/patree.c) | 0 | ||||
-rw-r--r-- | zix/ring.c (renamed from src/ring.c) | 0 | ||||
-rw-r--r-- | zix/sorted_array.c (renamed from src/sorted_array.c) | 0 | ||||
-rw-r--r-- | zix/strindex.c (renamed from src/strindex.c) | 0 | ||||
-rw-r--r-- | zix/tree.c (renamed from src/tree.c) | 58 | ||||
-rw-r--r-- | zix/tree.h | 45 | ||||
-rw-r--r-- | zix/tree_debug.h (renamed from src/tree_debug.h) | 0 |
19 files changed, 608 insertions, 358 deletions
diff --git a/src/hash.c b/src/hash.c deleted file mode 100644 index f654e91..0000000 --- a/src/hash.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - Copyright 2011 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include <assert.h> -#include <stdlib.h> -#include <string.h> - -#include "zix/hash.h" - -/** - Primes, each slightly less than twice its predecessor, and as far away - from powers of two as possible. -*/ -static const unsigned sizes[] = { - 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, - 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, - 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 0 -}; - -typedef struct _Entry { - const void* key; ///< Hash key - void* data; ///< Value - struct _Entry* next; ///< Next entry in bucket - unsigned hash; ///< Non-modulo hash value (for cheap rehash) -} Entry; - -struct ZixHashImpl { - ZixHashFunc hash_func; - ZixEqualFunc key_equal_func; - Entry** buckets; - const unsigned* n_buckets; - unsigned count; -}; - -ZixHash* -zix_hash_new(ZixHashFunc hash_func, - ZixEqualFunc key_equal_func) - -{ - ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); - hash->hash_func = hash_func; - hash->key_equal_func = key_equal_func; - hash->count = 0; - hash->n_buckets = &sizes[0]; - hash->buckets = (Entry**)malloc(*hash->n_buckets * sizeof(Entry*)); - memset(hash->buckets, 0, *hash->n_buckets * sizeof(Entry*)); - - return hash; -} - -void -zix_hash_free(ZixHash* hash) -{ - for (unsigned b = 0; b < *hash->n_buckets; ++b) { - Entry* bucket = hash->buckets[b]; - for (Entry* e = bucket; e;) { - Entry* next = e->next; - free(e); - e = next; - } - } - - free(hash->buckets); - free(hash); -} - -unsigned -zix_string_hash(const void* key) -{ - // Trusty old DJB hash - const char* str = (const char*)key; - unsigned h = 5381; - for (const char* s = str; *s != '\0'; ++s) { - h = (h << 5) + h + *s; // h = h * 33 + c - } - return h; -} - -bool -zix_string_equal(const void* a, const void* b) -{ - return !strcmp((const char*)a, (const char*)b); -} - -static void -insert_entry(Entry** bucket, - Entry* entry) -{ - entry->next = *bucket; - *bucket = entry; -} - -static ZixStatus -rehash(ZixHash* hash, unsigned new_n_buckets) -{ - Entry** new_buckets = (Entry**)malloc(new_n_buckets * sizeof(Entry*)); - if (!new_buckets) { - return ZIX_STATUS_NO_MEM; - } - - memset(new_buckets, 0, new_n_buckets * sizeof(Entry*)); - - for (unsigned b = 0; b < *hash->n_buckets; ++b) { - for (Entry* e = hash->buckets[b]; e;) { - Entry* const next = e->next; - const unsigned h = e->hash % new_n_buckets; - insert_entry(&new_buckets[h], e); - e = next; - } - } - - free(hash->buckets); - hash->buckets = new_buckets; - - return ZIX_STATUS_SUCCESS; -} - -static Entry* -find_entry(const ZixHash* hash, - const void* key, - unsigned h) -{ - for (Entry* e = hash->buckets[h]; e; e = e->next) { - if (hash->key_equal_func(e->key, key)) { - return e; - } - } - - return NULL; -} - -void* -zix_hash_find(const ZixHash* hash, const void* key) -{ - const unsigned h = hash->hash_func(key) % *hash->n_buckets; - Entry* const entry = find_entry(hash, key, h); - return entry ? entry->data : 0; -} - -ZixStatus -zix_hash_insert(ZixHash* hash, const void* key, void* data) -{ - unsigned h_nomod = hash->hash_func(key); - unsigned h = h_nomod % *hash->n_buckets; - - Entry* elem = find_entry(hash, key, h); - if (elem) { - assert(elem->hash == h_nomod); - return ZIX_STATUS_EXISTS; - } - - elem = (Entry*)malloc(sizeof(Entry)); - if (!elem) { - return ZIX_STATUS_NO_MEM; - } - elem->key = key; - elem->data = data; - elem->next = NULL; - elem->hash = h_nomod; - const unsigned next_n_buckets = *(hash->n_buckets + 1); - if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { - if (!rehash(hash, next_n_buckets)) { - h = h_nomod % *(++hash->n_buckets); - } - } - - insert_entry(&hash->buckets[h], elem); - ++hash->count; - return ZIX_STATUS_SUCCESS; -} - -ZixStatus -zix_hash_remove(ZixHash* hash, const void* key) -{ - unsigned h = hash->hash_func(key) % *hash->n_buckets; - - Entry** next_ptr = &hash->buckets[h]; - for (Entry* e = hash->buckets[h]; e; e = e->next) { - if (hash->key_equal_func(e->key, key)) { - *next_ptr = e->next; - free(e); - return ZIX_STATUS_SUCCESS; - } - next_ptr = &e->next; - } - - if (hash->n_buckets != sizes) { - const unsigned prev_n_buckets = *(hash->n_buckets - 1); - if (hash->count - 1 <= prev_n_buckets) { - if (!rehash(hash, prev_n_buckets)) { - --hash->n_buckets; - } - } - } - - --hash->count; - return ZIX_STATUS_NOT_FOUND; -} - -ZIX_API -void -zix_hash_foreach(const ZixHash* hash, - void (*f)(const void* key, void* value, void* user_data), - void* user_data) -{ - for (unsigned b = 0; b < *hash->n_buckets; ++b) { - Entry* bucket = hash->buckets[b]; - for (Entry* e = bucket; e; e = e->next) { - f(e->key, e->data, user_data); - } - } -} - diff --git a/test/hash_test.c b/test/hash_test.c index 953673a..9b48db1 100644 --- a/test/hash_test.c +++ b/test/hash_test.c @@ -50,26 +50,44 @@ test_fail(const char* fmt, ...) static unsigned n_checked = 0; static void -check(const void* key, void* value, void* user_data) +check(const void* value, void* user_data) { - if (key == value) { + if (strlen(*(const char*const*)value) >= 3) { ++n_checked; } else { - fprintf(stderr, "ERROR: %s != %s\n", - (const char*)key, (const char*)value); + fprintf(stderr, "ERROR: %s\n", (const char*)value); } } +static uint32_t +string_ptr_hash(const void* value) +{ + // Trusty old DJB hash + const char* str = *(const char*const*)value; + unsigned h = 5381; + for (const char* s = str; *s != '\0'; ++s) { + h = (h << 5) + h + *s; // h = h * 33 + c + } + return h; +} + +static bool +string_ptr_equal(const void* a, const void* b) +{ + return !strcmp(*(const char*const*)a, *(const char*const*)b); +} + int main(int argc, char** argv) { - ZixHash* hash = zix_hash_new(zix_string_hash, zix_string_equal); + ZixHash* hash = zix_hash_new( + string_ptr_hash, string_ptr_equal, sizeof(const char*)); - const size_t n_strings = sizeof(strings) / sizeof(char*); + const size_t n_strings = sizeof(strings) / sizeof(const char*); // Insert each string for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]); + ZixStatus st = zix_hash_insert(hash, &strings[i], NULL); if (st) { return test_fail("Failed to insert `%s'\n", strings[i]); } @@ -79,7 +97,7 @@ main(int argc, char** argv) // Attempt to insert each string again for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]); + ZixStatus st = zix_hash_insert(hash, &strings[i], NULL); if (st != ZIX_STATUS_EXISTS) { return test_fail("Double inserted `%s'\n", strings[i]); } @@ -87,12 +105,13 @@ main(int argc, char** argv) // Search for each string for (size_t i = 0; i < n_strings; ++i) { - const char* match = (const char*)zix_hash_find(hash, strings[i]); + const char*const* match = (const char*const*)zix_hash_find( + hash, &strings[i]); if (!match) { return test_fail("Failed to find `%s'\n", strings[i]); } - if (match != strings[i]) { - return test_fail("Bad match for `%s'\n", strings[i]); + if (*match != strings[i]) { + return test_fail("Bad match for `%s': `%s'\n", strings[i], match); } } @@ -105,7 +124,8 @@ main(int argc, char** argv) }; const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*); for (size_t i = 0; i < n_not_indexed; ++i) { - const char* match = (const char*)zix_hash_find(hash, not_indexed[i]); + const char*const* match = (const char*const*)zix_hash_find( + hash, ¬_indexed[i]); if (match) { return test_fail("Unexpectedly found `%s'\n", not_indexed[i]); } @@ -114,24 +134,25 @@ main(int argc, char** argv) // Remove strings for (size_t i = 0; i < n_strings; ++i) { // Remove string - ZixStatus st = zix_hash_remove(hash, strings[i]); + ZixStatus st = zix_hash_remove(hash, &strings[i]); if (st) { return test_fail("Failed to remove `%s'\n", strings[i]); } // Ensure second removal fails - st = zix_hash_remove(hash, strings[i]); + st = zix_hash_remove(hash, &strings[i]); if (st != ZIX_STATUS_NOT_FOUND) { return test_fail("Unexpectedly removed `%s' twice\n", strings[i]); } // Check to ensure remaining strings are still present for (size_t j = i + 1; j < n_strings; ++j) { - const char* match = (const char*)zix_hash_find(hash, strings[j]); + const char*const* match = (const char*const*)zix_hash_find( + hash, &strings[j]); if (!match) { return test_fail("Failed to find `%s' after remove\n", strings[j]); } - if (match != strings[j]) { + if (*match != strings[j]) { return test_fail("Bad match for `%s' after remove\n", strings[j]); } } @@ -139,7 +160,7 @@ main(int argc, char** argv) // Insert each string again (to check non-empty desruction) for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_hash_insert(hash, strings[i], (char*)strings[i]); + ZixStatus st = zix_hash_insert(hash, &strings[i], NULL); if (st) { return test_fail("Failed to insert `%s'\n", strings[i]); } diff --git a/test/patree_bench.c b/test/patree_bench.c index 64e8042..3c13154 100644 --- a/test/patree_bench.c +++ b/test/patree_bench.c @@ -23,6 +23,7 @@ #include <glib.h> +#include "zix/chunk.h" #include "zix/fat_patree.h" #include "zix/hash.h" #include "zix/patree.h" @@ -56,18 +57,21 @@ main(int argc, char** argv) size_t max_n_strings = 10000000; /* Read input strings */ - char** strings = NULL; - size_t n_strings = 0; - char* buf = calloc(1, 1); - size_t buf_len = 1; - size_t this_str_len = 0; + char** strings = NULL; + size_t* lengths = NULL; + size_t n_strings = 0; + char* buf = calloc(1, 1); + size_t buf_len = 1; + size_t this_str_len = 0; for (char c; (c = fgetc(fd)) != EOF;) { if (c == '\n') { if (this_str_len == 0) { continue; } strings = realloc(strings, (n_strings + 1) * sizeof(char*)); + lengths = realloc(lengths, (n_strings + 1) * sizeof(size_t)); strings[n_strings] = malloc(buf_len); + lengths[n_strings] = this_str_len; memcpy(strings[n_strings], buf, buf_len); this_str_len = 0; if (++n_strings == max_n_strings) { @@ -94,9 +98,11 @@ main(int argc, char** argv) for (size_t n = 1; n <= n_strings; n *= 2) { printf("Benchmarking n = %zu\n", n); ZixPatree* patree = zix_patree_new(); - ZixHash* zhash = zix_hash_new(zix_string_hash, zix_string_equal); ZixFatPatree* fat_patree = zix_fat_patree_new(); GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal); + ZixHash* zhash = zix_hash_new((ZixHashFunc)zix_chunk_hash, + (ZixEqualFunc)zix_chunk_equal, + sizeof(ZixChunk)); fprintf(insert_dat, "%zu", n); fprintf(search_dat, "%zu", n); @@ -112,7 +118,8 @@ main(int argc, char** argv) // ZixHash insert_start = bench_start(); for (size_t i = 0; i < n; ++i) { - ZixStatus st = zix_hash_insert(zhash, strings[i], strings[i]); + const ZixChunk chunk = { strings[i], lengths[i] + 1 }; + ZixStatus st = zix_hash_insert(zhash, &chunk, NULL); if (st && st != ZIX_STATUS_EXISTS) { return test_fail("Failed to insert `%s'\n", strings[i]); } @@ -159,13 +166,15 @@ main(int argc, char** argv) srand(seed); search_start = bench_start(); for (size_t i = 0; i < n; ++i) { - const size_t index = rand() % n; - const char* match = NULL; - if (!(match = zix_hash_find(zhash, strings[index]))) { + const size_t index = rand() % n; + const ZixChunk key = { strings[index], lengths[index] + 1 }; + const ZixChunk* match = NULL; + if (!(match = zix_hash_find(zhash, &key))) { return test_fail("Hash: Failed to find `%s'\n", strings[index]); } - if (strcmp(match, strings[index])) { - return test_fail("Hash: Bad match for `%s'\n", strings[index]); + if (strcmp(match->buf, strings[index])) { + return test_fail("Hash: Bad match %p for `%s': `%s'\n", + (void*)match, strings[index], match->buf); } } fprintf(search_dat, "\t%lf", bench_end(&search_start)); @@ -71,6 +71,7 @@ def configure(conf): tests = [ 'hash_test', + 'inline_test', 'patree_test', 'ring_test', 'sem_test', @@ -96,13 +97,15 @@ def build(bld): libflags = [] lib_source = ''' - src/fat_patree.c - src/hash.c - src/patree.c - src/ring.c - src/sorted_array.c - src/strindex.c - src/tree.c + zix/chunk.c + zix/digest.c + zix/fat_patree.c + zix/hash.c + zix/patree.c + zix/ring.c + zix/sorted_array.c + zix/strindex.c + zix/tree.c ''' # Library @@ -189,5 +192,5 @@ def upload_docs(ctx): def test(ctx): autowaf.pre_test(ctx, APPNAME) os.environ['PATH'] = 'test' + os.pathsep + os.getenv('PATH') - autowaf.run_tests(ctx, APPNAME, tests, dirs=['./src','./test']) + autowaf.run_tests(ctx, APPNAME, tests, dirs=['.','src','test']) autowaf.post_test(ctx, APPNAME) diff --git a/zix/chunk.c b/zix/chunk.c new file mode 100644 index 0000000..4533b26 --- /dev/null +++ b/zix/chunk.c @@ -0,0 +1,32 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <string.h> + +#include "zix/chunk.h" +#include "zix/digest.h" + +ZIX_API uint32_t +zix_chunk_hash(const ZixChunk* const chunk) +{ + return zix_digest_add(zix_digest_start(), chunk->buf, chunk->len); +} + +ZIX_API bool +zix_chunk_equal(const ZixChunk* a, const ZixChunk* b) +{ + return a->len == b->len && !memcmp(a->buf, b->buf, a->len); +} diff --git a/zix/chunk.h b/zix/chunk.h new file mode 100644 index 0000000..6efa766 --- /dev/null +++ b/zix/chunk.h @@ -0,0 +1,47 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_CHUNK_H +#define ZIX_CHUNK_H + +#include <stddef.h> +#include <stdint.h> + +#include "zix/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + A chunk of memory. +*/ +typedef struct { + void* buf; /**< Start of memory chunk */ + size_t len; /**< Length of memory chunk */ +} ZixChunk; + +ZIX_API uint32_t +zix_chunk_hash(const ZixChunk* chunk); + +ZIX_API bool +zix_chunk_equal(const ZixChunk* a, const ZixChunk* b); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_CHUNK_H */ diff --git a/zix/common.h b/zix/common.h index 59e1f55..f126cd1 100644 --- a/zix/common.h +++ b/zix/common.h @@ -36,8 +36,13 @@ # else # define ZIX_API ZIX_LIB_IMPORT # endif +# define ZIX_PRIVATE static +#elif defined(ZIX_INLINE) +# define ZIX_API static inline +# define ZIX_PRIVATE static inline #else # define ZIX_API +# define ZIX_PRIVATE static #endif /** @endcond */ diff --git a/zix/digest.c b/zix/digest.c new file mode 100644 index 0000000..4e8e974 --- /dev/null +++ b/zix/digest.c @@ -0,0 +1,56 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/digest.h" + +#ifdef __SSE4_2__ +# include <smmintrin.h> +#endif + +ZIX_API uint32_t +zix_digest_start(void) +{ +#ifdef __SSE4_2__ + return 1; // CRC32 initial value +#else + return 5381; // DJB hash initial value +#endif +} + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* buf, const size_t len) +{ +#ifdef __SSE4_2__ + // SSE 4.2 CRC32 + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *(uint32_t*)buf); + buf += sizeof(uint32_t); + } + if (len & sizeof(uint16_t)) { + hash = _mm_crc32_u16(hash, *(uint16_t*)buf); + buf += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + hash = _mm_crc32_u8(hash, *(uint8_t*)buf); + } +#else + // Classic DJB hash + for (size_t i = 0; i < len; ++i) { + hash = (hash << 5) + hash + ((const uint8_t*)buf)[i]; + } +#endif + return hash; +} diff --git a/zix/digest.h b/zix/digest.h new file mode 100644 index 0000000..34ab376 --- /dev/null +++ b/zix/digest.h @@ -0,0 +1,39 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_DIGEST_H +#define ZIX_DIGEST_H + +#include <stddef.h> +#include <stdint.h> + +#include "zix/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ZIX_API uint32_t +zix_digest_start(void); + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* buf, const size_t len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_DIGEST_H */ diff --git a/src/fat_patree.c b/zix/fat_patree.c index 454cfba..454cfba 100644 --- a/src/fat_patree.c +++ b/zix/fat_patree.c diff --git a/zix/hash.c b/zix/hash.c new file mode 100644 index 0000000..b24ee78 --- /dev/null +++ b/zix/hash.c @@ -0,0 +1,228 @@ +/* + Copyright 2011 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "zix/hash.h" + +/** + Primes, each slightly less than twice its predecessor, and as far away + from powers of two as possible. +*/ +static const unsigned sizes[] = { + 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, + 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, + 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 0 +}; + +typedef struct ZixHashEntry { + struct ZixHashEntry* next; ///< Next entry in bucket + uint32_t hash; ///< Non-modulo hash value + // Value follows here (access with zix_hash_value) +} ZixHashEntry; + +struct ZixHashImpl { + ZixHashFunc hash_func; + ZixEqualFunc equal_func; + ZixHashEntry** buckets; + const unsigned* n_buckets; + size_t value_size; + unsigned count; +}; + +static inline const void* +zix_hash_value(const ZixHashEntry* entry) +{ + return entry + 1; +} + +ZIX_API ZixHash* +zix_hash_new(ZixHashFunc hash_func, + ZixEqualFunc equal_func, + size_t value_size) +{ + ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); + hash->hash_func = hash_func; + hash->equal_func = equal_func; + hash->n_buckets = &sizes[0]; + hash->value_size = value_size; + hash->count = 0; + hash->buckets = (ZixHashEntry**)calloc(*hash->n_buckets, + sizeof(ZixHashEntry*)); + return hash; +} + +ZIX_API void +zix_hash_free(ZixHash* hash) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e;) { + ZixHashEntry* next = e->next; + free(e); + e = next; + } + } + + free(hash->buckets); + free(hash); +} + +ZIX_API size_t +zix_hash_size(const ZixHash* hash) +{ + return hash->count; +} + +static inline void +insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) +{ + entry->next = *bucket; + *bucket = entry; +} + +static inline ZixStatus +rehash(ZixHash* hash, unsigned new_n_buckets) +{ + ZixHashEntry** new_buckets = (ZixHashEntry**)calloc( + new_n_buckets, sizeof(ZixHashEntry*)); + if (!new_buckets) { + return ZIX_STATUS_NO_MEM; + } + + const unsigned old_n_buckets = *hash->n_buckets; + for (unsigned b = 0; b < old_n_buckets; ++b) { + for (ZixHashEntry* e = hash->buckets[b]; e;) { + ZixHashEntry* const next = e->next; + const unsigned h = e->hash % new_n_buckets; + insert_entry(&new_buckets[h], e); + e = next; + } + } + + free(hash->buckets); + hash->buckets = new_buckets; + + return ZIX_STATUS_SUCCESS; +} + +static inline ZixHashEntry* +find_entry(const ZixHash* hash, + const void* key, + const unsigned h, + const unsigned h_nomod) +{ + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) { + return e; + } + } + return NULL; +} + +ZIX_API const void* +zix_hash_find(const ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod); + return entry ? zix_hash_value(entry) : 0; +} + +ZIX_API ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, const void** inserted) +{ + unsigned h_nomod = hash->hash_func(value); + unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry* elem = find_entry(hash, value, h, h_nomod); + if (elem) { + assert(elem->hash == h_nomod); + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_EXISTS; + } + + elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size); + if (!elem) { + return ZIX_STATUS_NO_MEM; + } + elem->next = NULL; + elem->hash = h_nomod; + memcpy(elem + 1, value, hash->value_size); + + const unsigned next_n_buckets = *(hash->n_buckets + 1); + if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { + if (!rehash(hash, next_n_buckets)) { + h = h_nomod % *(++hash->n_buckets); + } + } + + insert_entry(&hash->buckets[h], elem); + ++hash->count; + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_SUCCESS; +} + +ZIX_API ZixStatus +zix_hash_remove(ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry** next_ptr = &hash->buckets[h]; + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (h_nomod == e->hash && + hash->equal_func(zix_hash_value(e), value)) { + *next_ptr = e->next; + free(e); + return ZIX_STATUS_SUCCESS; + } + next_ptr = &e->next; + } + + if (hash->n_buckets != sizes) { + const unsigned prev_n_buckets = *(hash->n_buckets - 1); + if (hash->count - 1 <= prev_n_buckets) { + if (!rehash(hash, prev_n_buckets)) { + --hash->n_buckets; + } + } + } + + --hash->count; + return ZIX_STATUS_NOT_FOUND; +} + +ZIX_API void +zix_hash_foreach(const ZixHash* hash, + ZixHashVisitFunc f, + void* user_data) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (const ZixHashEntry* e = bucket; e; e = e->next) { + f(zix_hash_value(e), user_data); + } + } +} + @@ -1,5 +1,5 @@ /* - Copyright 2011 David Robillard <http://drobilla.net> + Copyright 2011-2012 David Robillard <http://drobilla.net> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -17,56 +17,121 @@ #ifndef ZIX_HASH_H #define ZIX_HASH_H +#include <stddef.h> +#include <stdint.h> + #include "zix/common.h" #ifdef __cplusplus extern "C" { #endif +/** + @addtogroup zix + @{ + @name Hash + @{ +*/ + typedef struct ZixHashImpl ZixHash; /** Function for computing the hash of an element. */ -typedef unsigned (*ZixHashFunc)(const void* key); +typedef uint32_t (*ZixHashFunc)(const void* value); + +/** + Function to visit a hash element. +*/ +typedef void (*ZixHashVisitFunc)(const void* value, + void* user_data); -ZIX_API -ZixHash* +/** + Create a new hash table. + + To minimize space overhead, unlike many hash tables this stores a single + value, not a key and a value. Any size of value can be stored, but all the + values in the hash table must be the same size, and the values must be safe + to copy with memcpy. To get key:value behaviour, simply insert a struct + with a key and value into the hash. + + @param hash_func The hashing function. + @param equal_func A function to test value equality. + @param value_size The size of the values to be stored. +*/ +ZIX_API ZixHash* zix_hash_new(ZixHashFunc hash_func, - ZixEqualFunc key_equal_func); + ZixEqualFunc equal_func, + size_t value_size); -ZIX_API -void +/** + Free @p hash. +*/ +ZIX_API void zix_hash_free(ZixHash* hash); -ZIX_API -unsigned -zix_string_hash(const void* key); +/** + Return the number of elements in @p hash. +*/ +ZIX_API size_t +zix_hash_size(const ZixHash* hash); + +/** + Insert an item into @p hash. + + If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p + inserted will be pointed to the copy of @p value made in the new hash node. + + If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and + @p inserted will be pointed to the existing value. + + @param hash The hash table. + @param value The value to be inserted. + @param inserted The copy of @p value in the hash table. + @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM. +*/ +ZIX_API ZixStatus +zix_hash_insert(ZixHash* hash, + const void* value, + const void** inserted); -ZIX_API -bool -zix_string_equal(const void* a, const void* b); +/** + Remove an item from @p hash. -ZIX_API -ZixStatus -zix_hash_insert(ZixHash* hash, - const void* key, - void* data); + @param hash The hash table. + @param value The value to remove. + @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND. +*/ +ZIX_API ZixStatus +zix_hash_remove(ZixHash* hash, + const void* value); -ZIX_API -ZixStatus -zix_hash_remove(ZixHash* hash, const void* key); +/** + Search for an item in @p hash. -ZIX_API -void* + @param hash The hash table. + @param value The value to search for. +*/ +ZIX_API const void* zix_hash_find(const ZixHash* hash, - const void* key); + const void* value); + +/** + Call @p f on each value in @p hash. -ZIX_API -void -zix_hash_foreach(const ZixHash* hash, - void (*f)(const void* key, void* value, void* user_data), - void* user_data); + @param hash The hash table. + @param f The function to call on each value. + @param user_data The user_data parameter passed to @p f. +*/ +ZIX_API void +zix_hash_foreach(const ZixHash* hash, + ZixHashVisitFunc f, + void* user_data); + +/** + @} + @} +*/ #ifdef __cplusplus } /* extern "C" */ diff --git a/src/patree.c b/zix/patree.c index f3603a2..f3603a2 100644 --- a/src/patree.c +++ b/zix/patree.c diff --git a/src/sorted_array.c b/zix/sorted_array.c index f8e785d..f8e785d 100644 --- a/src/sorted_array.c +++ b/zix/sorted_array.c diff --git a/src/strindex.c b/zix/strindex.c index bd97db5..bd97db5 100644 --- a/src/strindex.c +++ b/zix/strindex.c @@ -66,8 +66,7 @@ struct ZixTreeNodeImpl { # define DEBUG_PRINTF(fmt, ...) #endif -ZIX_API -ZixTree* +ZIX_API ZixTree* zix_tree_new(bool allow_duplicates, ZixComparator cmp, void* cmp_data, @@ -83,7 +82,7 @@ zix_tree_new(bool allow_duplicates, return t; } -static void +ZIX_PRIVATE void zix_tree_free_rec(ZixTree* t, ZixTreeNode* n) { if (n) { @@ -96,8 +95,7 @@ zix_tree_free_rec(ZixTree* t, ZixTreeNode* n) } } -ZIX_API -void +ZIX_API void zix_tree_free(ZixTree* t) { if (t) { @@ -106,13 +104,13 @@ zix_tree_free(ZixTree* t) } } -size_t +ZIX_API size_t zix_tree_size(const ZixTree* t) { return t->size; } -static void +ZIX_PRIVATE void rotate(ZixTreeNode* p, ZixTreeNode* q) { assert(q->parent == p); @@ -156,7 +154,7 @@ rotate(ZixTreeNode* p, ZixTreeNode* q) * / \ / \ * B C A B */ -static ZixTreeNode* +ZIX_PRIVATE ZixTreeNode* rotate_left(ZixTreeNode* p, int* height_change) { ZixTreeNode* const q = p->right; @@ -189,7 +187,7 @@ rotate_left(ZixTreeNode* p, int* height_change) * A B B C * */ -static ZixTreeNode* +ZIX_PRIVATE ZixTreeNode* rotate_right(ZixTreeNode* p, int* height_change) { ZixTreeNode* const q = p->left; @@ -224,7 +222,7 @@ rotate_right(ZixTreeNode* p, int* height_change) * B C * */ -static ZixTreeNode* +ZIX_PRIVATE ZixTreeNode* rotate_left_right(ZixTreeNode* p, int* height_change) { ZixTreeNode* const q = p->left; @@ -268,7 +266,7 @@ rotate_left_right(ZixTreeNode* p, int* height_change) * B C * */ -static ZixTreeNode* +ZIX_PRIVATE ZixTreeNode* rotate_right_left(ZixTreeNode* p, int* height_change) { ZixTreeNode* const q = p->right; @@ -301,7 +299,7 @@ rotate_right_left(ZixTreeNode* p, int* height_change) return r; } -static ZixTreeNode* +ZIX_PRIVATE ZixTreeNode* zix_tree_rebalance(ZixTree* t, ZixTreeNode* node, int* height_change) { #ifdef ZIX_TREE_HYPER_VERIFY @@ -338,8 +336,7 @@ zix_tree_rebalance(ZixTree* t, ZixTreeNode* node, int* height_change) return replacement; } -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_tree_insert(ZixTree* t, void* e, ZixTreeIter** ti) { DEBUG_PRINTF("**** INSERT %ld\n", (intptr_t)e); @@ -438,8 +435,7 @@ zix_tree_insert(ZixTree* t, void* e, ZixTreeIter** ti) return ZIX_STATUS_SUCCESS; } -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_tree_remove(ZixTree* t, ZixTreeIter* ti) { ZixTreeNode* const n = ti; @@ -596,8 +592,7 @@ zix_tree_remove(ZixTree* t, ZixTreeIter* ti) return ZIX_STATUS_SUCCESS; } -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_tree_find(const ZixTree* t, const void* e, ZixTreeIter** ti) { ZixTreeNode* n = t->root; @@ -616,15 +611,13 @@ zix_tree_find(const ZixTree* t, const void* e, ZixTreeIter** ti) return (n) ? ZIX_STATUS_SUCCESS : ZIX_STATUS_NOT_FOUND; } -ZIX_API -void* +ZIX_API void* zix_tree_get(ZixTreeIter* ti) { return ti ? ti->data : NULL; } -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_begin(ZixTree* t) { if (!t->root) { @@ -638,15 +631,13 @@ zix_tree_begin(ZixTree* t) return n; } -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_end(ZixTree* t) { return NULL; } -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_rbegin(ZixTree* t) { if (!t->root) { @@ -660,29 +651,25 @@ zix_tree_rbegin(ZixTree* t) return n; } -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_rend(ZixTree* t) { return NULL; } -ZIX_API -bool +ZIX_API bool zix_tree_iter_is_end(ZixTreeIter* i) { return !i; } -ZIX_API -bool +ZIX_API bool zix_tree_iter_is_rend(ZixTreeIter* i) { return !i; } -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_iter_next(ZixTreeIter* i) { if (!i) { @@ -705,8 +692,7 @@ zix_tree_iter_next(ZixTreeIter* i) return i; } -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_iter_prev(ZixTreeIter* i) { if (!i) { @@ -45,8 +45,7 @@ typedef struct ZixTreeNodeImpl ZixTreeIter; /** Create a new (empty) tree. */ -ZIX_API -ZixTree* +ZIX_API ZixTree* zix_tree_new(bool allow_duplicates, ZixComparator cmp, void* cmp_data, @@ -55,100 +54,86 @@ zix_tree_new(bool allow_duplicates, /** Free @a t. */ -ZIX_API -void +ZIX_API void zix_tree_free(ZixTree* t); /** Return the number of elements in @a t. */ -ZIX_API -size_t +ZIX_API size_t zix_tree_size(const ZixTree* t); /** Insert the element @a e into @a t and point @a ti at the new element. */ -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_tree_insert(ZixTree* t, void* e, ZixTreeIter** ti); /** Remove the item pointed at by @a ti from @a t. */ -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_tree_remove(ZixTree* t, ZixTreeIter* ti); /** Set @a ti to an element equal to @a e in @a t. If no such item exists, @a ti is set to NULL. */ -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_tree_find(const ZixTree* t, const void* e, ZixTreeIter** ti); /** Return the data associated with the given tree item. */ -ZIX_API -void* +ZIX_API void* zix_tree_get(ZixTreeIter* ti); /** Return an iterator to the first (smallest) element in @a t. */ -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_begin(ZixTree* t); /** Return an iterator the the element one past the last element in @a t. */ -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_end(ZixTree* t); /** Return true iff @a i is an iterator to the end of its tree. */ -ZIX_API -bool +ZIX_API bool zix_tree_iter_is_end(ZixTreeIter* i); /** Return an iterator to the last (largest) element in @a t. */ -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_rbegin(ZixTree* t); /** Return an iterator the the element one before the first element in @a t. */ -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_rend(ZixTree* t); /** Return true iff @a i is an iterator to the reverse end of its tree. */ -ZIX_API -bool +ZIX_API bool zix_tree_iter_is_rend(ZixTreeIter* i); /** Return an iterator that points to the element one past @a i. */ -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_iter_next(ZixTreeIter* i); /** Return an iterator that points to the element one before @a i. */ -ZIX_API -ZixTreeIter* +ZIX_API ZixTreeIter* zix_tree_iter_prev(ZixTreeIter* i); /** diff --git a/src/tree_debug.h b/zix/tree_debug.h index e903265..e903265 100644 --- a/src/tree_debug.h +++ b/zix/tree_debug.h |