aboutsummaryrefslogtreecommitdiffstats
path: root/src/zix
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-22 18:35:31 -0400
committerDavid Robillard <d@drobilla.net>2022-01-14 19:37:51 -0500
commit955be33cead7596506cf86211da6b0e53827ef96 (patch)
tree0dac1a7a31e8c50d1ba7200648d6ce402f97867d /src/zix
parent34852e8faa380f12b11522cfa998df4f260e3856 (diff)
downloadserd-955be33cead7596506cf86211da6b0e53827ef96.tar.gz
serd-955be33cead7596506cf86211da6b0e53827ef96.tar.bz2
serd-955be33cead7596506cf86211da6b0e53827ef96.zip
Avoid dynamic allocation when fetching interned nodes
This is more or less a total rewrite of SerdNodes and the underlying ZixHash to make efficient use of the new node construction API.
Diffstat (limited to 'src/zix')
-rw-r--r--src/zix/digest.c250
-rw-r--r--src/zix/digest.h72
-rw-r--r--src/zix/hash.c408
-rw-r--r--src/zix/hash.h279
4 files changed, 726 insertions, 283 deletions
diff --git a/src/zix/digest.c b/src/zix/digest.c
index 47d27b94..dcfadf8f 100644
--- a/src/zix/digest.c
+++ b/src/zix/digest.c
@@ -1,5 +1,5 @@
/*
- Copyright 2012-2020 David Robillard <d@drobilla.net>
+ Copyright 2012-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -16,126 +16,218 @@
#include "zix/digest.h"
-#ifdef __SSE4_2__
-# include <smmintrin.h>
-#endif
-
#include <assert.h>
#include <stdint.h>
+#include <string.h>
-#ifdef __SSE4_2__
+#if defined(__clang__) && __clang_major__ >= 12
+# define FALLTHROUGH() __attribute__((fallthrough))
+#elif defined(__GNUC__) && !defined(__clang__)
+# define FALLTHROUGH() __attribute__((fallthrough))
+#else
+# define FALLTHROUGH()
+#endif
-// SSE 4.2 CRC32
+/*
+ 64-bit hash: Essentially fasthash64, implemented here in an aligned/padded
+ and a general UB-free variant.
+*/
-uint32_t
-zix_digest_start(void)
+static inline uint64_t
+mix64(uint64_t h)
{
- return 1;
+ h ^= h >> 23u;
+ h *= 0x2127599BF4325C37ull;
+ h ^= h >> 47u;
+ return h;
}
-uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64(const uint64_t seed, const void* const key, const size_t len)
{
- const uint8_t* str = (const uint8_t*)buf;
+ static const uint64_t m = 0x880355F21E6D1965ull;
-# ifdef __x86_64__
- for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
- hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str);
- str += sizeof(uint64_t);
- }
- if (len & sizeof(uint32_t)) {
- hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
- str += sizeof(uint32_t);
- }
-# else
- for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
- hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
- str += sizeof(uint32_t);
- }
-# endif
- if (len & sizeof(uint16_t)) {
- hash = _mm_crc32_u16(hash, *(const uint16_t*)str);
- str += sizeof(uint16_t);
+ const uint64_t* const blocks = (const uint64_t*)key;
+ const size_t n_blocks = len / sizeof(uint64_t);
+
+ uint64_t h = seed ^ (len * m);
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ h ^= mix64(blocks[i]);
+ h *= m;
}
- if (len & sizeof(uint8_t)) {
- hash = _mm_crc32_u8(hash, *(const uint8_t*)str);
+
+ const uint8_t* const tail = (const unsigned char*)(blocks + n_blocks);
+ uint64_t v = 0u;
+
+ switch (len & 7u) {
+ case 7:
+ v |= (uint64_t)tail[6] << 48u;
+ FALLTHROUGH();
+ case 6:
+ v |= (uint64_t)tail[5] << 40u;
+ FALLTHROUGH();
+ case 5:
+ v |= (uint64_t)tail[4] << 32u;
+ FALLTHROUGH();
+ case 4:
+ v |= (uint64_t)tail[3] << 24u;
+ FALLTHROUGH();
+ case 3:
+ v |= (uint64_t)tail[2] << 16u;
+ FALLTHROUGH();
+ case 2:
+ v |= (uint64_t)tail[1] << 8u;
+ FALLTHROUGH();
+ case 1:
+ v |= (uint64_t)tail[0];
+
+ h ^= mix64(v);
+ h *= m;
}
- return hash;
+ return mix64(h);
}
-uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len)
{
- assert((uintptr_t)buf % sizeof(uint64_t) == 0);
- assert(len % sizeof(uint64_t) == 0);
-
-# ifdef __x86_64__
- const uint64_t* ptr = (const uint64_t*)buf;
+ static const uint64_t m = 0x880355F21E6D1965ull;
- for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
- hash = (uint32_t)_mm_crc32_u64(hash, *ptr);
- ++ptr;
- }
+ assert((uintptr_t)key % sizeof(uint64_t) == 0u);
+ assert(len % sizeof(uint64_t) == 0u);
- return hash;
-# else
- const uint32_t* ptr = (const uint32_t*)buf;
+ const uint64_t* const blocks = (const uint64_t*)key;
+ const size_t n_blocks = len / sizeof(uint64_t);
+ uint64_t h = seed ^ (len * m);
- for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
- hash = _mm_crc32_u32(hash, *ptr);
- ++ptr;
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ h ^= mix64(blocks[i]);
+ h *= m;
}
- return hash;
-# endif
+ return mix64(h);
}
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
-{
-# ifdef __x86_64__
- return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr);
-# else
- return _mm_crc32_u32(hash, (uintptr_t)ptr);
-# endif
-}
+/*
+ 32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and
+ a general UB-free variant.
+*/
-#else
+/**
+ Rotate left by some count of bits.
-// Classic DJB hash
+ This is recognized by any halfway decent compiler and compiled to a single
+ instruction on architectures that have one.
+*/
+static inline uint32_t
+rotl32(const uint32_t val, const uint32_t bits)
+{
+ return ((val << bits) | (val >> (32 - bits)));
+}
-uint32_t
-zix_digest_start(void)
+static inline uint32_t
+mix32(uint32_t h)
{
- return 5381;
+ h ^= h >> 16u;
+ h *= 0x85EBCA6Bu;
+ h ^= h >> 13u;
+ h *= 0xC2B2AE35u;
+ h ^= h >> 16u;
+ return h;
}
uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32(const uint32_t seed, const void* const key, const size_t len)
{
- const uint8_t* str = (const uint8_t*)buf;
+ static const uint32_t c1 = 0xCC9E2D51u;
+ static const uint32_t c2 = 0x1B873593u;
+
+ // Process as many 32-bit blocks as possible
+ const size_t n_blocks = len / sizeof(uint32_t);
+ const uint8_t* data = (const uint8_t*)key;
+ const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t));
+ uint32_t h = seed;
+ for (; data != blocks_end; data += sizeof(uint32_t)) {
+ uint32_t k = 0u;
+ memcpy(&k, data, sizeof(uint32_t));
+
+ k *= c1;
+ k = rotl32(k, 15);
+ k *= c2;
+
+ h ^= k;
+ h = rotl32(h, 13);
+ h = h * 5u + 0xE6546B64u;
+ }
- for (size_t i = 0; i < len; ++i) {
- hash = (hash << 5u) + hash + str[i];
+ // Process any trailing bytes
+ uint32_t k = 0u;
+ switch (len & 3u) {
+ case 3u:
+ k ^= (uint32_t)data[2u] << 16u;
+ FALLTHROUGH();
+ case 2u:
+ k ^= (uint32_t)data[1u] << 8u;
+ FALLTHROUGH();
+ case 1u:
+ k ^= (uint32_t)data[0u];
+
+ k *= c1;
+ k = rotl32(k, 15u);
+ k *= c2;
+ h ^= k;
}
- return hash;
+ return mix32(h ^ (uint32_t)len);
}
uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32_aligned(const uint32_t seed,
+ const void* const key,
+ const size_t len)
{
- assert((uintptr_t)buf % sizeof(uint64_t) == 0);
- assert(len % sizeof(uint64_t) == 0);
+ static const uint32_t c1 = 0xCC9E2D51u;
+ static const uint32_t c2 = 0x1B873593u;
+
+ assert((uintptr_t)key % sizeof(uint32_t) == 0u);
+ assert(len % sizeof(uint32_t) == 0u);
+
+ const uint32_t* const blocks = (const uint32_t*)key;
+ const size_t n_blocks = len / sizeof(uint32_t);
+ uint32_t h = seed;
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ uint32_t k = blocks[i];
+
+ k *= c1;
+ k = rotl32(k, 15);
+ k *= c2;
- return zix_digest_add(hash, buf, len);
+ h ^= k;
+ h = rotl32(h, 13);
+ h = h * 5u + 0xE6546B64u;
+ }
+
+ return mix32(h ^ (uint32_t)len);
}
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
+// Native word size wrapper
+
+size_t
+zix_digest(const size_t seed, const void* const buf, const size_t len)
{
- return zix_digest_add(hash, &ptr, sizeof(ptr));
+#if UINTPTR_MAX >= UINT64_MAX
+ return zix_digest64(seed, buf, len);
+#else
+ return zix_digest32(seed, buf, len);
+#endif
}
+size_t
+zix_digest_aligned(const size_t seed, const void* const buf, const size_t len)
+{
+#if UINTPTR_MAX >= UINT64_MAX
+ return zix_digest64_aligned(seed, buf, len);
+#else
+ return zix_digest32_aligned(seed, buf, len);
#endif
+}
diff --git a/src/zix/digest.h b/src/zix/digest.h
index 1fde77a9..6df70024 100644
--- a/src/zix/digest.h
+++ b/src/zix/digest.h
@@ -1,5 +1,5 @@
/*
- Copyright 2012-2020 David Robillard <d@drobilla.net>
+ Copyright 2012-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -27,38 +27,80 @@ extern "C" {
#endif
/**
- Return an initial empty digest value.
+ @addtogroup zix
+ @{
+ @name Digest
+ Functions to generate a short "digest" of data with minimal collisions.
+
+ These are good general-purpose hash functions for indexing arbitrary data,
+ but are not necessarily stable across platforms and should never be used for
+ cryptographic purposes.
+ @{
*/
-ZIX_CONST_API
-uint32_t
-zix_digest_start(void);
/**
- Update `hash` to include `buf`, a buffer of `len` bytes.
+ Return a 32-bit hash of a buffer.
This can be used for any size or alignment.
*/
ZIX_PURE_API
uint32_t
-zix_digest_add(uint32_t hash, const void* buf, size_t len);
+zix_digest32(uint32_t seed, const void* buf, size_t len);
/**
- Update `hash` to include `buf`, a 64-bit aligned buffer of `len` bytes.
+ Return a 32-bit hash of an aligned buffer.
- Both `buf` and `len` must be evenly divisible by 8 (64 bits).
+ Both the buffer and size must be aligned to 32 bits. For data that fits
+ these requirements, this is equivalent to, but faster than, zix_digest32().
*/
ZIX_PURE_API
uint32_t
-zix_digest_add_64(uint32_t hash, const void* buf, size_t len);
+zix_digest32_aligned(uint32_t seed, const void* buf, size_t len);
/**
- Update `hash` to include `ptr`.
+ Return a 64-bit hash of a buffer.
- This hashes the value of the pointer itself, and does not dereference `ptr`.
+ This can be used for any size or alignment.
*/
-ZIX_CONST_API
-uint32_t
-zix_digest_add_ptr(uint32_t hash, const void* ptr);
+ZIX_PURE_API
+uint64_t
+zix_digest64(uint64_t seed, const void* buf, size_t len);
+
+/**
+ Return a 64-bit hash of an aligned buffer.
+
+ Both the buffer and size must be aligned to 64 bits. For data that fits
+ these requirements, this is equivalent to, but faster than, zix_digest64().
+*/
+ZIX_PURE_API
+uint64_t
+zix_digest64_aligned(uint64_t seed, const void* buf, size_t len);
+
+/**
+ Return a pointer-sized hash of a buffer.
+
+ This can be used for any size or alignment.
+
+ Internally, this simply dispatches to zix_digest32() or zix_digest64() as
+ appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest(size_t seed, const void* buf, size_t len);
+
+/**
+ Return a pointer-sized hash of an aligned buffer.
+
+ Both the buffer and size must be aligned to the pointer size. For data that
+ fits these requirements, this is equivalent to, but faster than,
+ zix_digest().
+
+ Internally, this simply dispatches to zix_digest32_aligned() or
+ zix_digest64_aligned() as appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest_aligned(size_t seed, const void* buf, size_t len);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/zix/hash.c b/src/zix/hash.c
index d9556edc..8837378e 100644
--- a/src/zix/hash.c
+++ b/src/zix/hash.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -17,214 +17,332 @@
#include "zix/hash.h"
#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <stdlib.h>
-#include <string.h>
-
-/**
- Primes, each slightly less than twice its predecessor, and as far away
- from powers of two as possible.
-*/
-static const unsigned sizes[] = {
- 53, 97, 193, 389, 769, 1543, 3079,
- 6151, 12289, 24593, 49157, 98317, 196613, 393241,
- 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653,
- 100663319, 201326611, 402653189, 805306457, 1610612741, 0};
typedef struct ZixHashEntry {
- struct ZixHashEntry* next; ///< Next entry in bucket
- uint32_t hash; ///< Non-modulo hash value
- // Value follows here (access with zix_hash_value)
+ ZixHashCode hash; ///< Non-folded hash value
+ ZixHashRecord* value; ///< Pointer to user-owned record
} ZixHashEntry;
struct ZixHashImpl {
- ZixHashFunc hash_func;
- ZixEqualFunc equal_func;
- ZixHashEntry** buckets;
- const unsigned* n_buckets;
- size_t value_size;
- unsigned count;
+ ZixKeyFunc key_func; ///< User-provided key accessor
+ ZixHashFunc hash_func; ///< User-provided hashing function
+ ZixKeyEqualFunc equal_func; ///< User-provided equality comparison function
+ size_t count; ///< Number of records stored in the table
+ size_t mask; ///< Bit mask for fast modulo (n_entries - 1)
+ size_t n_entries; ///< Power of two table size
+ ZixHashEntry* entries; ///< Pointer to dynamically allocated table
};
-static inline void*
-zix_hash_value(ZixHashEntry* entry)
-{
- return entry + 1;
-}
+static const size_t min_n_entries = 4u;
+static const size_t tombstone = 0xDEADu;
ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size)
+zix_hash_new(const ZixKeyFunc key_func,
+ const ZixHashFunc hash_func,
+ const ZixKeyEqualFunc equal_func)
{
- ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash));
- if (hash) {
- hash->hash_func = hash_func;
- hash->equal_func = equal_func;
- hash->n_buckets = &sizes[0];
- hash->value_size = value_size;
- hash->count = 0;
- if (!(hash->buckets =
- (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) {
- free(hash);
- return NULL;
- }
+ ZixHash* const hash = (ZixHash*)malloc(sizeof(ZixHash));
+ if (!hash) {
+ return NULL;
}
+
+ hash->key_func = key_func;
+ hash->hash_func = hash_func;
+ hash->equal_func = equal_func;
+ hash->count = 0u;
+ hash->n_entries = min_n_entries;
+ hash->mask = hash->n_entries - 1u;
+
+ hash->entries = (ZixHashEntry*)calloc(hash->n_entries, sizeof(ZixHashEntry));
+ if (!hash->entries) {
+ free(hash);
+ return NULL;
+ }
+
return hash;
}
void
-zix_hash_free(ZixHash* hash)
+zix_hash_free(ZixHash* const hash)
{
- if (!hash) {
- return;
+ if (hash) {
+ free(hash->entries);
+ free(hash);
}
+}
- for (unsigned b = 0; b < *hash->n_buckets; ++b) {
- ZixHashEntry* bucket = hash->buckets[b];
- for (ZixHashEntry* e = bucket; e;) {
- ZixHashEntry* next = e->next;
- free(e);
- e = next;
- }
- }
+ZixHashIter
+zix_hash_begin(const ZixHash* const hash)
+{
+ return hash->entries[0u].value ? 0u : zix_hash_next(hash, 0u);
+}
- free(hash->buckets);
- free(hash);
+ZixHashIter
+zix_hash_end(const ZixHash* const hash)
+{
+ return hash->n_entries;
+}
+
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, const ZixHashIter i)
+{
+ assert(i < hash->n_entries);
+
+ return hash->entries[i].value;
+}
+
+ZixHashIter
+zix_hash_next(const ZixHash* const hash, ZixHashIter i)
+{
+ do {
+ ++i;
+ } while (i < hash->n_entries && !hash->entries[i].value);
+
+ return i;
}
size_t
-zix_hash_size(const ZixHash* hash)
+zix_hash_size(const ZixHash* const hash)
{
return hash->count;
}
-static inline void
-insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry)
+static inline size_t
+fold_hash(const ZixHashCode h_nomod, const size_t mask)
+{
+ return h_nomod & mask;
+}
+
+static inline bool
+is_tombstone(const ZixHashEntry* const entry)
+{
+ return !entry->value && entry->hash == tombstone;
+}
+
+static inline bool
+is_empty(const ZixHashEntry* const entry)
{
- entry->next = *bucket;
- *bucket = entry;
+ return !entry->value && !entry->hash;
}
-static inline ZixStatus
-rehash(ZixHash* hash, unsigned new_n_buckets)
+static inline bool
+is_match(const ZixHash* const hash,
+ const ZixHashCode code,
+ const size_t entry_index,
+ ZixKeyEqualFunc predicate,
+ const void* const user_data)
{
- ZixHashEntry** new_buckets =
- (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*));
- if (!new_buckets) {
+ const ZixHashEntry* const entry = &hash->entries[entry_index];
+
+ return entry->value && entry->hash == code &&
+ predicate(hash->key_func(entry->value), user_data);
+}
+
+static inline size_t
+next_index(const ZixHash* const hash, const size_t i)
+{
+ return (i == hash->mask) ? 0u : (i + 1u);
+}
+
+static inline ZixHashIter
+find_entry(const ZixHash* const hash,
+ const ZixHashKey* const key,
+ const size_t h,
+ const ZixHashCode code)
+{
+ size_t i = h;
+
+ while (!is_match(hash, code, i, hash->equal_func, key) &&
+ !is_empty(&hash->entries[i])) {
+ i = next_index(hash, i);
+ }
+
+ return i;
+}
+
+static ZixStatus
+rehash(ZixHash* const hash, const size_t old_n_entries)
+{
+ ZixHashEntry* const old_entries = hash->entries;
+ const size_t new_n_entries = hash->n_entries;
+
+ // Replace the array in the hash first so we can use find_entry() normally
+ hash->entries = (ZixHashEntry*)calloc(new_n_entries, sizeof(ZixHashEntry));
+ if (!hash->entries) {
return ZIX_STATUS_NO_MEM;
}
- const unsigned old_n_buckets = *hash->n_buckets;
- for (unsigned b = 0; b < old_n_buckets; ++b) {
- for (ZixHashEntry* e = hash->buckets[b]; e;) {
- ZixHashEntry* const next = e->next;
- const unsigned h = e->hash % new_n_buckets;
- insert_entry(&new_buckets[h], e);
- e = next;
+ // Reinsert every element into the new array
+ for (size_t i = 0u; i < old_n_entries; ++i) {
+ ZixHashEntry* const entry = &old_entries[i];
+
+ if (entry->value) {
+ assert(hash->mask == hash->n_entries - 1u);
+ const size_t new_h = fold_hash(entry->hash, hash->mask);
+ const size_t new_i = find_entry(hash, entry->value, new_h, entry->hash);
+
+ hash->entries[new_i] = *entry;
}
}
- free(hash->buckets);
- hash->buckets = new_buckets;
+ free(old_entries);
+ return ZIX_STATUS_SUCCESS;
+}
+
+static ZixStatus
+grow(ZixHash* const hash)
+{
+ const size_t old_n_entries = hash->n_entries;
+
+ hash->n_entries <<= 1u;
+ hash->mask = hash->n_entries - 1u;
+
+ return rehash(hash, old_n_entries);
+}
+
+static ZixStatus
+shrink(ZixHash* const hash)
+{
+ if (hash->n_entries > min_n_entries) {
+ const size_t old_n_entries = hash->n_entries;
+
+ hash->n_entries >>= 1u;
+ hash->mask = hash->n_entries - 1u;
+
+ return rehash(hash, old_n_entries);
+ }
return ZIX_STATUS_SUCCESS;
}
-static inline ZixHashEntry*
-find_entry(const ZixHash* hash,
- const void* key,
- const unsigned h,
- const unsigned h_nomod)
+ZixHashIter
+zix_hash_find(const ZixHash* const hash, const ZixHashKey* const key)
{
- for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
- if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) {
- return e;
+ const ZixHashCode h_nomod = hash->hash_func(key);
+ const size_t h = fold_hash(h_nomod, hash->mask);
+ const ZixHashIter i = find_entry(hash, key, h, h_nomod);
+
+ return is_empty(&hash->entries[i]) ? hash->n_entries : i;
+}
+
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* const hash, const ZixHashKey* const key)
+{
+ const ZixHashCode h_nomod = hash->hash_func(key);
+ const size_t h = fold_hash(h_nomod, hash->mask);
+
+ return hash->entries[find_entry(hash, key, h, h_nomod)].value;
+}
+
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* const hash,
+ const ZixHashCode code,
+ const ZixKeyMatchFunc predicate,
+ const void* const user_data)
+{
+ // Calculate an ideal initial position
+ ZixHashInsertPlan pos = {code, fold_hash(code, hash->mask)};
+
+ // Search for a free position starting at the ideal one
+ const size_t start_index = pos.index;
+ size_t first_tombstone = SIZE_MAX;
+ while (!is_empty(&hash->entries[pos.index])) {
+ if (is_match(hash, code, pos.index, predicate, user_data)) {
+ return pos;
+ }
+
+ if (first_tombstone == SIZE_MAX &&
+ is_tombstone(&hash->entries[pos.index])) {
+ first_tombstone = pos.index; // Remember the first/best free index
+ }
+
+ if ((pos.index = next_index(hash, pos.index)) == start_index) {
+ break;
}
}
- return NULL;
+
+ // If we found a tombstone before an empty slot, place the new element there
+ pos.index = first_tombstone < SIZE_MAX ? first_tombstone : pos.index;
+ assert(!hash->entries[pos.index].value);
+
+ return pos;
}
-void*
-zix_hash_find(const ZixHash* hash, const void* value)
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* const hash, const ZixHashKey* const key)
{
- const unsigned h_nomod = hash->hash_func(value);
- const unsigned h = h_nomod % *hash->n_buckets;
- ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod);
- return entry ? zix_hash_value(entry) : 0;
+ return zix_hash_plan_insert_prehashed(
+ hash, hash->hash_func(key), hash->equal_func, key);
}
-ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted)
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* const hash, const ZixHashInsertPlan position)
{
- unsigned h_nomod = hash->hash_func(value);
- unsigned h = h_nomod % *hash->n_buckets;
+ return hash->entries[position.index].value;
+}
- ZixHashEntry* elem = find_entry(hash, value, h, h_nomod);
- if (elem) {
- assert(elem->hash == h_nomod);
- if (inserted) {
- *inserted = zix_hash_value(elem);
- }
+ZixStatus
+zix_hash_insert_at(ZixHash* const hash,
+ const ZixHashInsertPlan position,
+ ZixHashRecord* const record)
+{
+ if (hash->entries[position.index].value) {
return ZIX_STATUS_EXISTS;
}
- elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size);
- if (!elem) {
- return ZIX_STATUS_NO_MEM;
- }
- elem->next = NULL;
- elem->hash = h_nomod;
- memcpy(elem + 1, value, hash->value_size);
-
- const unsigned next_n_buckets = *(hash->n_buckets + 1);
- if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) {
- if (!rehash(hash, next_n_buckets)) {
- h = h_nomod % *(++hash->n_buckets);
- }
- }
+ // Set entry to new value
+ ZixHashEntry* const entry = &hash->entries[position.index];
+ assert(!entry->value);
+ entry->hash = position.code;
+ entry->value = record;
- insert_entry(&hash->buckets[h], elem);
- ++hash->count;
- if (inserted) {
- *inserted = zix_hash_value(elem);
+ // Update size and rehash if we exceeded the maximum load
+ const size_t max_load = hash->n_entries / 2u + hash->n_entries / 8u;
+ if (++hash->count >= max_load) {
+ return grow(hash);
}
+
return ZIX_STATUS_SUCCESS;
}
ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value)
-{
- const unsigned h_nomod = hash->hash_func(value);
- const unsigned h = h_nomod % *hash->n_buckets;
-
- ZixHashEntry** next_ptr = &hash->buckets[h];
- for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
- if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) {
- *next_ptr = e->next;
- free(e);
- --hash->count;
- return ZIX_STATUS_SUCCESS;
- }
- next_ptr = &e->next;
- }
+zix_hash_insert(ZixHash* const hash, ZixHashRecord* const record)
+{
+ const ZixHashKey* const key = hash->key_func(record);
+ const ZixHashInsertPlan position = zix_hash_plan_insert(hash, key);
- if (hash->n_buckets != sizes) {
- const unsigned prev_n_buckets = *(hash->n_buckets - 1);
- if (hash->count - 1 <= prev_n_buckets) {
- if (!rehash(hash, prev_n_buckets)) {
- --hash->n_buckets;
- }
- }
+ return zix_hash_insert_at(hash, position, record);
+}
+
+ZixStatus
+zix_hash_erase(ZixHash* const hash,
+ const ZixHashIter i,
+ ZixHashRecord** const removed)
+{
+ // Replace entry with a tombstone
+ *removed = hash->entries[i].value;
+ hash->entries[i].hash = tombstone;
+ hash->entries[i].value = NULL;
+
+ // Decrease element count and rehash if necessary
+ --hash->count;
+ if (hash->count < hash->n_entries / 4u) {
+ return shrink(hash);
}
- return ZIX_STATUS_NOT_FOUND;
+ return ZIX_STATUS_SUCCESS;
}
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data)
+ZixStatus
+zix_hash_remove(ZixHash* const hash,
+ const ZixHashKey* const key,
+ ZixHashRecord** const removed)
{
- for (unsigned b = 0; b < *hash->n_buckets; ++b) {
- ZixHashEntry* bucket = hash->buckets[b];
- for (ZixHashEntry* e = bucket; e; e = e->next) {
- f(zix_hash_value(e), user_data);
- }
- }
+ const ZixHashIter i = zix_hash_find(hash, key);
+
+ return i == hash->n_entries ? ZIX_STATUS_NOT_FOUND
+ : zix_hash_erase(hash, i, removed);
}
diff --git a/src/zix/hash.h b/src/zix/hash.h
index 39050381..f08d267b 100644
--- a/src/zix/hash.h
+++ b/src/zix/hash.h
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -19,8 +19,8 @@
#include "zix/common.h"
+#include <stdbool.h>
#include <stddef.h>
-#include <stdint.h>
#ifdef __cplusplus
extern "C" {
@@ -33,98 +33,289 @@ extern "C" {
@{
*/
+// ZIX_HASH_KEY_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_KEY_TYPE)
+typedef ZIX_HASH_KEY_TYPE ZixHashKey;
+#else
+typedef void ZixHashKey;
+#endif
+
+// ZIX_HASH_RECORD_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_RECORD_TYPE)
+typedef ZIX_HASH_RECORD_TYPE ZixHashRecord;
+#else
+typedef void ZixHashRecord;
+#endif
+
+// ZIX_HASH_SEARCH_DATA_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_SEARCH_DATA_TYPE)
+typedef ZIX_HASH_SEARCH_DATA_TYPE ZixHashSearchData;
+#else
+typedef void ZixHashSearchData;
+#endif
+
+/**
+ A hash table.
+
+ This is an open addressing hash table that stores pointers to arbitrary user
+ data. Internally, everything is stored in a single flat array that is
+ resized when necessary.
+
+ The single user-provided pointer that is stored in the table is called a
+ "record". A record contains a "key", which is accessed via a user-provided
+ function. This design supports storing large records (which may be
+ expensive to allocate/construct) without requiring an entire record to
+ search. Simple atomic values can be stored by providing a trivial identity
+ function as a key function.
+
+ The table uses power of 2 sizes with a growth factor of 2, so that hash
+ values can be folded into an array index using bitwise AND as a fast modulo.
+ This means that only the necessary low bits of the hash value will be used,
+ so the hash function must be well-balanced within this range. More or less
+ any good modern hash algorithm will be fine, but beware, for example, hash
+ functions that assume they are targeting a table with a prime size.
+
+ Since this doubles and halves in size, it may not be an optimal choice if
+ memory reuse is a priority. A growth factor of 1.5 with fast range
+ reduction may be a better choice there, at the cost of requiring 128-bit
+ arithmetic on 64-bit platforms, and indexing operations being slightly more
+ expensive.
+*/
typedef struct ZixHashImpl ZixHash;
+/// A full hash code for a key which is not folded down to the table size
+typedef size_t ZixHashCode;
+
/**
- Function for computing the hash of an element.
+ An iterator to an entry in a hash table.
+
+ This is really just an index, but should be considered opaque to the user
+ and only used via the provided API and equality comparison.
*/
-typedef uint32_t (*ZixHashFunc)(const void* value);
+typedef size_t ZixHashIter;
+
+/// User function for computing the hash of a key
+typedef ZixHashCode (*ZixHashFunc)(const ZixHashKey* key);
+
+/// User function for determining if two keys are truly equal
+typedef bool (*ZixKeyEqualFunc)(const ZixHashKey* a, const ZixHashKey* b);
+
+/// User function for determining if a key matches in a custom search
+typedef bool (*ZixKeyMatchFunc)(const ZixHashKey* key,
+ const ZixHashSearchData* user_data);
+
+/// User function for getting the key of a record
+typedef const ZixHashKey* (*ZixKeyFunc)(const ZixHashRecord* record);
/**
- Function to visit a hash element.
+ A "plan" (position) to insert a record in a hash table.
+
+ This contains everything necessary to insert a record, except the record
+ itself. It is exposed to split up insertion so that records only need to be
+ allocated if an existing record is not found, but the contents should be
+ considered opaque to the user.
*/
-typedef void (*ZixHashVisitFunc)(void* value, void* user_data);
+typedef struct {
+ ZixHashCode code; ///< Hash code used to find this position
+ ZixHashIter index; ///< Index into hash table
+} ZixHashInsertPlan;
/**
Create a new hash table.
- To minimize space overhead, unlike many hash tables this stores a single
- value, not a key and a value. Any size of value can be stored, but all the
- values in the hash table must be the same size, and the values must be safe
- to copy with memcpy. To get key:value behaviour, simply insert a struct
- with a key and value into the hash.
-
- @param hash_func The hashing function.
- @param equal_func A function to test value equality.
- @param value_size The size of the values to be stored.
+ @param key_func A function to retrieve the key from a record.
+ @param hash_func The key hashing function.
+ @param equal_func A function to test keys for equality.
*/
ZIX_API
ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size);
+zix_hash_new(ZixKeyFunc key_func,
+ ZixHashFunc hash_func,
+ ZixKeyEqualFunc equal_func);
-/**
- Free `hash`.
-*/
+/// Free `hash`
ZIX_API
void
zix_hash_free(ZixHash* hash);
-/**
- Return the number of elements in `hash`.
-*/
+/// Return an iterator to the first record in a hash, or the end if it is empty
+ZIX_PURE_API
+ZixHashIter
+zix_hash_begin(const ZixHash* hash);
+
+/// Return an iterator one past the last possible record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_end(const ZixHash* hash);
+
+/// Return the record pointed to by an iterator
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, ZixHashIter i);
+
+/// Return an iterator that has been advanced to the next record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_next(const ZixHash* hash, ZixHashIter i);
+
+/// Return the number of elements in a hash
ZIX_PURE_API
size_t
zix_hash_size(const ZixHash* hash);
/**
- Insert an item into `hash`.
+ Find the best position to insert a record with the given key.
+
+ This searches the hash table and returns either the position of an existing
+ equivalent record, or the best available position to insert a new one. The
+ difference can be determined with zix_hash_record_at().
+
+ If the returned position is not occupied, then it is valid to insert a
+ record with this key using zix_hash_insert_at() until the hash table is
+ modified (which invalidates the position).
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* hash, const ZixHashKey* key);
+
+/**
+ Find the best position to insert a record with a custom search.
- If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p
- inserted will be pointed to the copy of `value` made in the new hash node.
+ This is an advanced low-level version of zix_hash_plan_insert(). It allows
+ a precalculated hash code to be given, along with a custom search predicate.
+ These must be compatible with the functions that manage the hash table:
+ trivial usage would be to pass the equality function used by the hash and
+ the key to search for.
- If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and
- `inserted` will be pointed to the existing value.
+ This is exposed to make it possible to avoid constructing a key at all when
+ potentially inserting. For example, if keys are structures with a fixed
+ header and a variably-sized body, and you have a separate header and body
+ this can be used to find an insert position without having to allocate a
+ contiguous key.
+
+ Note that care must be taken when using this function: improper use can
+ corrupt the hash table. The hash code given must be correct for the key to
+ be inserted, and the predicate must return true only if the key it is called
+ with (the first argument) matches the key to be inserted.
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* hash,
+ ZixHashCode code,
+ ZixKeyMatchFunc predicate,
+ const ZixHashSearchData* user_data);
+
+/**
+ Return the record at the given position, or null.
+
+ This is used to determine if a position returned from zix_hash_plan_insert()
+ can be used to insert a new record, or to access the existing matching
+ record.
+*/
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* hash, ZixHashInsertPlan position);
+
+/**
+ Insert a record at a specific position.
+
+ The position must have been found with an earlier call to
+ zix_hash_plan_insert(), and no modifications must have been made to the hash
+ table since.
+
+ @param hash The hash table.
+
+ @param position The position for the new record.
+
+ @param record The record to insert which, on success, can now be considered
+ owned by the hash table.
+
+ @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS if a record already exists at
+ this position, or ZIX_STATUS_NO_MEM if growing the hash table failed.
+*/
+ZIX_API
+ZixStatus
+zix_hash_insert_at(ZixHash* hash,
+ ZixHashInsertPlan position,
+ ZixHashRecord* record);
+
+/**
+ Insert a record.
+
+ This is a trivial wrapper for zix_hash_plan_insert() and
+ zix_hash_insert_at() that is more convenient when record construction is not
+ expensive.
@param hash The hash table.
- @param value The value to be inserted.
- @param inserted The copy of `value` in the hash table.
+
+ @param record The record to insert which, on success, can now be considered
+ owned by the hash table.
+
@return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM.
*/
ZIX_API
ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted);
+zix_hash_insert(ZixHash* hash, ZixHashRecord* record);
/**
- Remove an item from `hash`.
+ Erase a record at a specific position.
+
+ @param hash The hash table to remove the record from.
+
+ @param i Iterator to the record to remove. This must be a valid iterator
+ from an earlier call to zix_hash_find(), that is, the hash table must not
+ have been modified since.
+
+ @param removed Set to the removed record, or null.
+
+ @return ZIX_STATUS_SUCCES or ZIX_STATUS_BAD_ARG if `i` does not point at a
+ removable record.
+*/
+ZIX_API
+ZixStatus
+zix_hash_erase(ZixHash* hash, ZixHashIter i, ZixHashRecord** removed);
+
+/**
+ Remove a record.
@param hash The hash table.
- @param value The value to remove.
+ @param key The key of the record to remove.
+ @param removed Set to the removed record, or null.
@return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND.
*/
ZIX_API
ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value);
+zix_hash_remove(ZixHash* hash, const ZixHashKey* key, ZixHashRecord** removed);
/**
- Search for an item in `hash`.
+ Find the position of a record with a given key.
- @param hash The hash table.
- @param value The value to search for.
+ @param hash The hash table to search.
+
+ @param key The key of the desired record.
+
+ @return An iterator to the matching record, or the end iterator if no such
+ record exists.
*/
ZIX_API
-void*
-zix_hash_find(const ZixHash* hash, const void* value);
+ZixHashIter
+zix_hash_find(const ZixHash* hash, const ZixHashKey* key);
/**
- Call `f` on each value in `hash`.
+ Find a record with a given key.
- @param hash The hash table.
- @param f The function to call on each value.
- @param user_data The user_data parameter passed to `f`.
+ This is essentially the same as zix_hash_find(), but returns a pointer to
+ the record for convenience.
+
+ @param hash The hash table to search.
+
+ @param key The key of the desired record.
+
+ @return A pointer to the matching record, of null if no such record exists.
*/
ZIX_API
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data);
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* hash, const ZixHashKey* key);
/**
@}