From be0379f60c704cc737ac0063c341121c7de7b2fb Mon Sep 17 00:00:00 2001 From: David Robillard Date: Fri, 10 Aug 2012 02:10:46 +0000 Subject: Use a single node hash for all types of nodes. Inline nodes in hash table nodes and eliminate key+value pointer overhead. Use SSE 4.2 accelerated node and string hashing when available. git-svn-id: http://svn.drobilla.net/sord/trunk@250 3d64ff67-21c5-427c-a301-fe4f08042e5a --- src/zix/digest.c | 56 ++++++++++++++++++++++++++ src/zix/digest.h | 39 +++++++++++++++++++ src/zix/hash.c | 117 +++++++++++++++++++++++++++++-------------------------- src/zix/hash.h | 107 ++++++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 247 insertions(+), 72 deletions(-) create mode 100644 src/zix/digest.c create mode 100644 src/zix/digest.h (limited to 'src/zix') diff --git a/src/zix/digest.c b/src/zix/digest.c new file mode 100644 index 0000000..4e8e974 --- /dev/null +++ b/src/zix/digest.c @@ -0,0 +1,56 @@ +/* + Copyright 2012 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "zix/digest.h" + +#ifdef __SSE4_2__ +# include +#endif + +ZIX_API uint32_t +zix_digest_start(void) +{ +#ifdef __SSE4_2__ + return 1; // CRC32 initial value +#else + return 5381; // DJB hash initial value +#endif +} + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* buf, const size_t len) +{ +#ifdef __SSE4_2__ + // SSE 4.2 CRC32 + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *(uint32_t*)buf); + buf += sizeof(uint32_t); + } + if (len & sizeof(uint16_t)) { + hash = _mm_crc32_u16(hash, *(uint16_t*)buf); + buf += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + hash = _mm_crc32_u8(hash, *(uint8_t*)buf); + } +#else + // Classic DJB hash + for (size_t i = 0; i < len; ++i) { + hash = (hash << 5) + hash + ((const uint8_t*)buf)[i]; + } +#endif + return hash; +} diff --git a/src/zix/digest.h b/src/zix/digest.h new file mode 100644 index 0000000..34ab376 --- /dev/null +++ b/src/zix/digest.h @@ -0,0 +1,39 @@ +/* + Copyright 2012 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_DIGEST_H +#define ZIX_DIGEST_H + +#include +#include + +#include "zix/common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ZIX_API uint32_t +zix_digest_start(void); + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* buf, const size_t len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_DIGEST_H */ diff --git a/src/zix/hash.c b/src/zix/hash.c index aacf993..b24ee78 100644 --- a/src/zix/hash.c +++ b/src/zix/hash.c @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -31,31 +32,39 @@ static const unsigned sizes[] = { }; typedef struct ZixHashEntry { - const void* key; ///< Hash key - void* data; ///< Value struct ZixHashEntry* next; ///< Next entry in bucket - unsigned hash; ///< Non-modulo hash value (for cheap rehash) + uint32_t hash; ///< Non-modulo hash value + // Value follows here (access with zix_hash_value) } ZixHashEntry; struct ZixHashImpl { ZixHashFunc hash_func; - ZixEqualFunc key_equal_func; + ZixEqualFunc equal_func; ZixHashEntry** buckets; const unsigned* n_buckets; + size_t value_size; unsigned count; }; +static inline const void* +zix_hash_value(const ZixHashEntry* entry) +{ + return entry + 1; +} + ZIX_API ZixHash* zix_hash_new(ZixHashFunc hash_func, - ZixEqualFunc key_equal_func) + ZixEqualFunc equal_func, + size_t value_size) { ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); - hash->hash_func = hash_func; - hash->key_equal_func = key_equal_func; - hash->count = 0; - hash->n_buckets = &sizes[0]; - hash->buckets = (ZixHashEntry**)calloc(*hash->n_buckets, - sizeof(ZixHashEntry*)); + hash->hash_func = hash_func; + hash->equal_func = equal_func; + hash->n_buckets = &sizes[0]; + hash->value_size = value_size; + hash->count = 0; + hash->buckets = (ZixHashEntry**)calloc(*hash->n_buckets, + sizeof(ZixHashEntry*)); return hash; } @@ -75,41 +84,30 @@ zix_hash_free(ZixHash* hash) free(hash); } -ZIX_API unsigned -zix_string_hash(const void* key) -{ - // Trusty old DJB hash - const char* str = (const char*)key; - unsigned h = 5381; - for (const char* s = str; *s != '\0'; ++s) { - h = (h << 5) + h + *s; // h = h * 33 + c - } - return h; -} - -ZIX_API bool -zix_string_equal(const void* a, const void* b) +ZIX_API size_t +zix_hash_size(const ZixHash* hash) { - return !strcmp((const char*)a, (const char*)b); + return hash->count; } -ZIX_PRIVATE void -insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) +static inline void +insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) { entry->next = *bucket; *bucket = entry; } -ZIX_PRIVATE ZixStatus +static inline ZixStatus rehash(ZixHash* hash, unsigned new_n_buckets) { - ZixHashEntry** new_buckets = (ZixHashEntry**)calloc(new_n_buckets, - sizeof(ZixHashEntry*)); + ZixHashEntry** new_buckets = (ZixHashEntry**)calloc( + new_n_buckets, sizeof(ZixHashEntry*)); if (!new_buckets) { return ZIX_STATUS_NO_MEM; } - for (unsigned b = 0; b < *hash->n_buckets; ++b) { + const unsigned old_n_buckets = *hash->n_buckets; + for (unsigned b = 0; b < old_n_buckets; ++b) { for (ZixHashEntry* e = hash->buckets[b]; e;) { ZixHashEntry* const next = e->next; const unsigned h = e->hash % new_n_buckets; @@ -124,48 +122,52 @@ rehash(ZixHash* hash, unsigned new_n_buckets) return ZIX_STATUS_SUCCESS; } -ZIX_PRIVATE ZixHashEntry* +static inline ZixHashEntry* find_entry(const ZixHash* hash, const void* key, - unsigned h) + const unsigned h, + const unsigned h_nomod) { for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { - if (hash->key_equal_func(e->key, key)) { + if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) { return e; } } - return NULL; } -ZIX_API void* -zix_hash_find(const ZixHash* hash, const void* key) +ZIX_API const void* +zix_hash_find(const ZixHash* hash, const void* value) { - const unsigned h = hash->hash_func(key) % *hash->n_buckets; - ZixHashEntry* const entry = find_entry(hash, key, h); - return entry ? entry->data : 0; + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod); + return entry ? zix_hash_value(entry) : 0; } ZIX_API ZixStatus -zix_hash_insert(ZixHash* hash, const void* key, void* data) +zix_hash_insert(ZixHash* hash, const void* value, const void** inserted) { - unsigned h_nomod = hash->hash_func(key); + unsigned h_nomod = hash->hash_func(value); unsigned h = h_nomod % *hash->n_buckets; - ZixHashEntry* elem = find_entry(hash, key, h); + ZixHashEntry* elem = find_entry(hash, value, h, h_nomod); if (elem) { assert(elem->hash == h_nomod); + if (inserted) { + *inserted = zix_hash_value(elem); + } return ZIX_STATUS_EXISTS; } - elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry)); + elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size); if (!elem) { return ZIX_STATUS_NO_MEM; } - elem->key = key; - elem->data = data; elem->next = NULL; elem->hash = h_nomod; + memcpy(elem + 1, value, hash->value_size); + const unsigned next_n_buckets = *(hash->n_buckets + 1); if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { if (!rehash(hash, next_n_buckets)) { @@ -175,17 +177,22 @@ zix_hash_insert(ZixHash* hash, const void* key, void* data) insert_entry(&hash->buckets[h], elem); ++hash->count; + if (inserted) { + *inserted = zix_hash_value(elem); + } return ZIX_STATUS_SUCCESS; } ZIX_API ZixStatus -zix_hash_remove(ZixHash* hash, const void* key) +zix_hash_remove(ZixHash* hash, const void* value) { - unsigned h = hash->hash_func(key) % *hash->n_buckets; + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; ZixHashEntry** next_ptr = &hash->buckets[h]; for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { - if (hash->key_equal_func(e->key, key)) { + if (h_nomod == e->hash && + hash->equal_func(zix_hash_value(e), value)) { *next_ptr = e->next; free(e); return ZIX_STATUS_SUCCESS; @@ -207,14 +214,14 @@ zix_hash_remove(ZixHash* hash, const void* key) } ZIX_API void -zix_hash_foreach(const ZixHash* hash, - void (*f)(const void* key, void* value, void* user_data), - void* user_data) +zix_hash_foreach(const ZixHash* hash, + ZixHashVisitFunc f, + void* user_data) { for (unsigned b = 0; b < *hash->n_buckets; ++b) { ZixHashEntry* bucket = hash->buckets[b]; - for (ZixHashEntry* e = bucket; e; e = e->next) { - f(e->key, e->data, user_data); + for (const ZixHashEntry* e = bucket; e; e = e->next) { + f(zix_hash_value(e), user_data); } } } diff --git a/src/zix/hash.h b/src/zix/hash.h index 4d93eaa..c992117 100644 --- a/src/zix/hash.h +++ b/src/zix/hash.h @@ -1,5 +1,5 @@ /* - Copyright 2011 David Robillard + Copyright 2011-2012 David Robillard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -17,48 +17,121 @@ #ifndef ZIX_HASH_H #define ZIX_HASH_H +#include +#include + #include "zix/common.h" #ifdef __cplusplus extern "C" { #endif +/** + @addtogroup zix + @{ + @name Hash + @{ +*/ + typedef struct ZixHashImpl ZixHash; /** Function for computing the hash of an element. */ -typedef unsigned (*ZixHashFunc)(const void* key); +typedef uint32_t (*ZixHashFunc)(const void* value); +/** + Function to visit a hash element. +*/ +typedef void (*ZixHashVisitFunc)(const void* value, + void* user_data); + +/** + Create a new hash table. + + To minimize space overhead, unlike many hash tables this stores a single + value, not a key and a value. Any size of value can be stored, but all the + values in the hash table must be the same size, and the values must be safe + to copy with memcpy. To get key:value behaviour, simply insert a struct + with a key and value into the hash. + + @param hash_func The hashing function. + @param equal_func A function to test value equality. + @param value_size The size of the values to be stored. +*/ ZIX_API ZixHash* zix_hash_new(ZixHashFunc hash_func, - ZixEqualFunc key_equal_func); + ZixEqualFunc equal_func, + size_t value_size); +/** + Free @p hash. +*/ ZIX_API void zix_hash_free(ZixHash* hash); -ZIX_API unsigned -zix_string_hash(const void* key); - -ZIX_API bool -zix_string_equal(const void* a, const void* b); +/** + Return the number of elements in @p hash. +*/ +ZIX_API size_t +zix_hash_size(const ZixHash* hash); +/** + Insert an item into @p hash. + + If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p + inserted will be pointed to the copy of @p value made in the new hash node. + + If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and + @p inserted will be pointed to the existing value. + + @param hash The hash table. + @param value The value to be inserted. + @param inserted The copy of @p value in the hash table. + @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM. +*/ ZIX_API ZixStatus -zix_hash_insert(ZixHash* hash, - const void* key, - void* data); +zix_hash_insert(ZixHash* hash, + const void* value, + const void** inserted); + +/** + Remove an item from @p hash. + @param hash The hash table. + @param value The value to remove. + @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND. +*/ ZIX_API ZixStatus -zix_hash_remove(ZixHash* hash, const void* key); +zix_hash_remove(ZixHash* hash, + const void* value); + +/** + Search for an item in @p hash. -ZIX_API void* + @param hash The hash table. + @param value The value to search for. +*/ +ZIX_API const void* zix_hash_find(const ZixHash* hash, - const void* key); + const void* value); + +/** + Call @p f on each value in @p hash. + @param hash The hash table. + @param f The function to call on each value. + @param user_data The user_data parameter passed to @p f. +*/ ZIX_API void -zix_hash_foreach(const ZixHash* hash, - void (*f)(const void* key, void* value, void* user_data), - void* user_data); +zix_hash_foreach(const ZixHash* hash, + ZixHashVisitFunc f, + void* user_data); + +/** + @} + @} +*/ #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.1