From 955be33cead7596506cf86211da6b0e53827ef96 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Thu, 22 Jul 2021 18:35:31 -0400
Subject: Avoid dynamic allocation when fetching interned nodes

This is more or less a total rewrite of SerdNodes and the underlying ZixHash to
make efficient use of the new node construction API.
---
 src/zix/digest.c | 250 +++++++++++++++++++++++-----------
 src/zix/digest.h |  72 ++++++++--
 src/zix/hash.c   | 408 +++++++++++++++++++++++++++++++++++--------------------
 src/zix/hash.h   | 279 +++++++++++++++++++++++++++++++------
 4 files changed, 726 insertions(+), 283 deletions(-)

(limited to 'src/zix')

diff --git a/src/zix/digest.c b/src/zix/digest.c
index 47d27b94..dcfadf8f 100644
--- a/src/zix/digest.c
+++ b/src/zix/digest.c
@@ -1,5 +1,5 @@
 /*
-  Copyright 2012-2020 David Robillard <d@drobilla.net>
+  Copyright 2012-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -16,126 +16,218 @@
 
 #include "zix/digest.h"
 
-#ifdef __SSE4_2__
-#  include <smmintrin.h>
-#endif
-
 #include <assert.h>
 #include <stdint.h>
+#include <string.h>
 
-#ifdef __SSE4_2__
+#if defined(__clang__) && __clang_major__ >= 12
+#  define FALLTHROUGH() __attribute__((fallthrough))
+#elif defined(__GNUC__) && !defined(__clang__)
+#  define FALLTHROUGH() __attribute__((fallthrough))
+#else
+#  define FALLTHROUGH()
+#endif
 
-// SSE 4.2 CRC32
+/*
+  64-bit hash: Essentially fasthash64, implemented here in an aligned/padded
+  and a general UB-free variant.
+*/
 
-uint32_t
-zix_digest_start(void)
+static inline uint64_t
+mix64(uint64_t h)
 {
-  return 1;
+  h ^= h >> 23u;
+  h *= 0x2127599BF4325C37ull;
+  h ^= h >> 47u;
+  return h;
 }
 
-uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64(const uint64_t seed, const void* const key, const size_t len)
 {
-  const uint8_t* str = (const uint8_t*)buf;
+  static const uint64_t m = 0x880355F21E6D1965ull;
 
-#  ifdef __x86_64__
-  for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
-    hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str);
-    str += sizeof(uint64_t);
-  }
-  if (len & sizeof(uint32_t)) {
-    hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
-    str += sizeof(uint32_t);
-  }
-#  else
-  for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
-    hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
-    str += sizeof(uint32_t);
-  }
-#  endif
-  if (len & sizeof(uint16_t)) {
-    hash = _mm_crc32_u16(hash, *(const uint16_t*)str);
-    str += sizeof(uint16_t);
+  const uint64_t* const blocks   = (const uint64_t*)key;
+  const size_t          n_blocks = len / sizeof(uint64_t);
+
+  uint64_t h = seed ^ (len * m);
+  for (size_t i = 0u; i < n_blocks; ++i) {
+    h ^= mix64(blocks[i]);
+    h *= m;
   }
-  if (len & sizeof(uint8_t)) {
-    hash = _mm_crc32_u8(hash, *(const uint8_t*)str);
+
+  const uint8_t* const tail = (const unsigned char*)(blocks + n_blocks);
+  uint64_t             v    = 0u;
+
+  switch (len & 7u) {
+  case 7:
+    v |= (uint64_t)tail[6] << 48u;
+    FALLTHROUGH();
+  case 6:
+    v |= (uint64_t)tail[5] << 40u;
+    FALLTHROUGH();
+  case 5:
+    v |= (uint64_t)tail[4] << 32u;
+    FALLTHROUGH();
+  case 4:
+    v |= (uint64_t)tail[3] << 24u;
+    FALLTHROUGH();
+  case 3:
+    v |= (uint64_t)tail[2] << 16u;
+    FALLTHROUGH();
+  case 2:
+    v |= (uint64_t)tail[1] << 8u;
+    FALLTHROUGH();
+  case 1:
+    v |= (uint64_t)tail[0];
+
+    h ^= mix64(v);
+    h *= m;
   }
 
-  return hash;
+  return mix64(h);
 }
 
-uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len)
 {
-  assert((uintptr_t)buf % sizeof(uint64_t) == 0);
-  assert(len % sizeof(uint64_t) == 0);
-
-#  ifdef __x86_64__
-  const uint64_t* ptr = (const uint64_t*)buf;
+  static const uint64_t m = 0x880355F21E6D1965ull;
 
-  for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
-    hash = (uint32_t)_mm_crc32_u64(hash, *ptr);
-    ++ptr;
-  }
+  assert((uintptr_t)key % sizeof(uint64_t) == 0u);
+  assert(len % sizeof(uint64_t) == 0u);
 
-  return hash;
-#  else
-  const uint32_t* ptr = (const uint32_t*)buf;
+  const uint64_t* const blocks   = (const uint64_t*)key;
+  const size_t          n_blocks = len / sizeof(uint64_t);
+  uint64_t              h        = seed ^ (len * m);
 
-  for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
-    hash = _mm_crc32_u32(hash, *ptr);
-    ++ptr;
+  for (size_t i = 0u; i < n_blocks; ++i) {
+    h ^= mix64(blocks[i]);
+    h *= m;
   }
 
-  return hash;
-#  endif
+  return mix64(h);
 }
 
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
-{
-#  ifdef __x86_64__
-  return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr);
-#  else
-  return _mm_crc32_u32(hash, (uintptr_t)ptr);
-#  endif
-}
+/*
+  32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and
+  a general UB-free variant.
+*/
 
-#else
+/**
+   Rotate left by some count of bits.
 
-// Classic DJB hash
+   This is recognized by any halfway decent compiler and compiled to a single
+   instruction on architectures that have one.
+*/
+static inline uint32_t
+rotl32(const uint32_t val, const uint32_t bits)
+{
+  return ((val << bits) | (val >> (32 - bits)));
+}
 
-uint32_t
-zix_digest_start(void)
+static inline uint32_t
+mix32(uint32_t h)
 {
-  return 5381;
+  h ^= h >> 16u;
+  h *= 0x85EBCA6Bu;
+  h ^= h >> 13u;
+  h *= 0xC2B2AE35u;
+  h ^= h >> 16u;
+  return h;
 }
 
 uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32(const uint32_t seed, const void* const key, const size_t len)
 {
-  const uint8_t* str = (const uint8_t*)buf;
+  static const uint32_t c1 = 0xCC9E2D51u;
+  static const uint32_t c2 = 0x1B873593u;
+
+  // Process as many 32-bit blocks as possible
+  const size_t         n_blocks   = len / sizeof(uint32_t);
+  const uint8_t*       data       = (const uint8_t*)key;
+  const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t));
+  uint32_t             h          = seed;
+  for (; data != blocks_end; data += sizeof(uint32_t)) {
+    uint32_t k = 0u;
+    memcpy(&k, data, sizeof(uint32_t));
+
+    k *= c1;
+    k = rotl32(k, 15);
+    k *= c2;
+
+    h ^= k;
+    h = rotl32(h, 13);
+    h = h * 5u + 0xE6546B64u;
+  }
 
-  for (size_t i = 0; i < len; ++i) {
-    hash = (hash << 5u) + hash + str[i];
+  // Process any trailing bytes
+  uint32_t k = 0u;
+  switch (len & 3u) {
+  case 3u:
+    k ^= (uint32_t)data[2u] << 16u;
+    FALLTHROUGH();
+  case 2u:
+    k ^= (uint32_t)data[1u] << 8u;
+    FALLTHROUGH();
+  case 1u:
+    k ^= (uint32_t)data[0u];
+
+    k *= c1;
+    k = rotl32(k, 15u);
+    k *= c2;
+    h ^= k;
   }
 
-  return hash;
+  return mix32(h ^ (uint32_t)len);
 }
 
 uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32_aligned(const uint32_t    seed,
+                     const void* const key,
+                     const size_t      len)
 {
-  assert((uintptr_t)buf % sizeof(uint64_t) == 0);
-  assert(len % sizeof(uint64_t) == 0);
+  static const uint32_t c1 = 0xCC9E2D51u;
+  static const uint32_t c2 = 0x1B873593u;
+
+  assert((uintptr_t)key % sizeof(uint32_t) == 0u);
+  assert(len % sizeof(uint32_t) == 0u);
+
+  const uint32_t* const blocks   = (const uint32_t*)key;
+  const size_t          n_blocks = len / sizeof(uint32_t);
+  uint32_t              h        = seed;
+  for (size_t i = 0u; i < n_blocks; ++i) {
+    uint32_t k = blocks[i];
+
+    k *= c1;
+    k = rotl32(k, 15);
+    k *= c2;
 
-  return zix_digest_add(hash, buf, len);
+    h ^= k;
+    h = rotl32(h, 13);
+    h = h * 5u + 0xE6546B64u;
+  }
+
+  return mix32(h ^ (uint32_t)len);
 }
 
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
+// Native word size wrapper
+
+size_t
+zix_digest(const size_t seed, const void* const buf, const size_t len)
 {
-  return zix_digest_add(hash, &ptr, sizeof(ptr));
+#if UINTPTR_MAX >= UINT64_MAX
+  return zix_digest64(seed, buf, len);
+#else
+  return zix_digest32(seed, buf, len);
+#endif
 }
 
+size_t
+zix_digest_aligned(const size_t seed, const void* const buf, const size_t len)
+{
+#if UINTPTR_MAX >= UINT64_MAX
+  return zix_digest64_aligned(seed, buf, len);
+#else
+  return zix_digest32_aligned(seed, buf, len);
 #endif
+}
diff --git a/src/zix/digest.h b/src/zix/digest.h
index 1fde77a9..6df70024 100644
--- a/src/zix/digest.h
+++ b/src/zix/digest.h
@@ -1,5 +1,5 @@
 /*
-  Copyright 2012-2020 David Robillard <d@drobilla.net>
+  Copyright 2012-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -27,38 +27,80 @@ extern "C" {
 #endif
 
 /**
-   Return an initial empty digest value.
+   @addtogroup zix
+   @{
+   @name Digest
+   Functions to generate a short "digest" of data with minimal collisions.
+
+   These are good general-purpose hash functions for indexing arbitrary data,
+   but are not necessarily stable across platforms and should never be used for
+   cryptographic purposes.
+   @{
 */
-ZIX_CONST_API
-uint32_t
-zix_digest_start(void);
 
 /**
-   Update `hash` to include `buf`, a buffer of `len` bytes.
+   Return a 32-bit hash of a buffer.
 
    This can be used for any size or alignment.
 */
 ZIX_PURE_API
 uint32_t
-zix_digest_add(uint32_t hash, const void* buf, size_t len);
+zix_digest32(uint32_t seed, const void* buf, size_t len);
 
 /**
-   Update `hash` to include `buf`, a 64-bit aligned buffer of `len` bytes.
+   Return a 32-bit hash of an aligned buffer.
 
-   Both `buf` and `len` must be evenly divisible by 8 (64 bits).
+   Both the buffer and size must be aligned to 32 bits.  For data that fits
+   these requirements, this is equivalent to, but faster than, zix_digest32().
 */
 ZIX_PURE_API
 uint32_t
-zix_digest_add_64(uint32_t hash, const void* buf, size_t len);
+zix_digest32_aligned(uint32_t seed, const void* buf, size_t len);
 
 /**
-   Update `hash` to include `ptr`.
+   Return a 64-bit hash of a buffer.
 
-   This hashes the value of the pointer itself, and does not dereference `ptr`.
+   This can be used for any size or alignment.
 */
-ZIX_CONST_API
-uint32_t
-zix_digest_add_ptr(uint32_t hash, const void* ptr);
+ZIX_PURE_API
+uint64_t
+zix_digest64(uint64_t seed, const void* buf, size_t len);
+
+/**
+   Return a 64-bit hash of an aligned buffer.
+
+   Both the buffer and size must be aligned to 64 bits.  For data that fits
+   these requirements, this is equivalent to, but faster than, zix_digest64().
+*/
+ZIX_PURE_API
+uint64_t
+zix_digest64_aligned(uint64_t seed, const void* buf, size_t len);
+
+/**
+   Return a pointer-sized hash of a buffer.
+
+   This can be used for any size or alignment.
+
+   Internally, this simply dispatches to zix_digest32() or zix_digest64() as
+   appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest(size_t seed, const void* buf, size_t len);
+
+/**
+   Return a pointer-sized hash of an aligned buffer.
+
+   Both the buffer and size must be aligned to the pointer size.  For data that
+   fits these requirements, this is equivalent to, but faster than,
+   zix_digest().
+
+   Internally, this simply dispatches to zix_digest32_aligned() or
+   zix_digest64_aligned() as appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest_aligned(size_t seed, const void* buf, size_t len);
 
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/src/zix/hash.c b/src/zix/hash.c
index d9556edc..8837378e 100644
--- a/src/zix/hash.c
+++ b/src/zix/hash.c
@@ -1,5 +1,5 @@
 /*
-  Copyright 2011-2020 David Robillard <d@drobilla.net>
+  Copyright 2011-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -17,214 +17,332 @@
 #include "zix/hash.h"
 
 #include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdlib.h>
-#include <string.h>
-
-/**
-   Primes, each slightly less than twice its predecessor, and as far away
-   from powers of two as possible.
-*/
-static const unsigned sizes[] = {
-  53,        97,        193,       389,       769,        1543,     3079,
-  6151,      12289,     24593,     49157,     98317,      196613,   393241,
-  786433,    1572869,   3145739,   6291469,   12582917,   25165843, 50331653,
-  100663319, 201326611, 402653189, 805306457, 1610612741, 0};
 
 typedef struct ZixHashEntry {
-  struct ZixHashEntry* next; ///< Next entry in bucket
-  uint32_t             hash; ///< Non-modulo hash value
-                             // Value follows here (access with zix_hash_value)
+  ZixHashCode    hash;  ///< Non-folded hash value
+  ZixHashRecord* value; ///< Pointer to user-owned record
 } ZixHashEntry;
 
 struct ZixHashImpl {
-  ZixHashFunc     hash_func;
-  ZixEqualFunc    equal_func;
-  ZixHashEntry**  buckets;
-  const unsigned* n_buckets;
-  size_t          value_size;
-  unsigned        count;
+  ZixKeyFunc      key_func;   ///< User-provided key accessor
+  ZixHashFunc     hash_func;  ///< User-provided hashing function
+  ZixKeyEqualFunc equal_func; ///< User-provided equality comparison function
+  size_t          count;      ///< Number of records stored in the table
+  size_t          mask;       ///< Bit mask for fast modulo (n_entries - 1)
+  size_t          n_entries;  ///< Power of two table size
+  ZixHashEntry*   entries;    ///< Pointer to dynamically allocated table
 };
 
-static inline void*
-zix_hash_value(ZixHashEntry* entry)
-{
-  return entry + 1;
-}
+static const size_t min_n_entries = 4u;
+static const size_t tombstone     = 0xDEADu;
 
 ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size)
+zix_hash_new(const ZixKeyFunc      key_func,
+             const ZixHashFunc     hash_func,
+             const ZixKeyEqualFunc equal_func)
 {
-  ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash));
-  if (hash) {
-    hash->hash_func  = hash_func;
-    hash->equal_func = equal_func;
-    hash->n_buckets  = &sizes[0];
-    hash->value_size = value_size;
-    hash->count      = 0;
-    if (!(hash->buckets =
-            (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) {
-      free(hash);
-      return NULL;
-    }
+  ZixHash* const hash = (ZixHash*)malloc(sizeof(ZixHash));
+  if (!hash) {
+    return NULL;
   }
+
+  hash->key_func   = key_func;
+  hash->hash_func  = hash_func;
+  hash->equal_func = equal_func;
+  hash->count      = 0u;
+  hash->n_entries  = min_n_entries;
+  hash->mask       = hash->n_entries - 1u;
+
+  hash->entries = (ZixHashEntry*)calloc(hash->n_entries, sizeof(ZixHashEntry));
+  if (!hash->entries) {
+    free(hash);
+    return NULL;
+  }
+
   return hash;
 }
 
 void
-zix_hash_free(ZixHash* hash)
+zix_hash_free(ZixHash* const hash)
 {
-  if (!hash) {
-    return;
+  if (hash) {
+    free(hash->entries);
+    free(hash);
   }
+}
 
-  for (unsigned b = 0; b < *hash->n_buckets; ++b) {
-    ZixHashEntry* bucket = hash->buckets[b];
-    for (ZixHashEntry* e = bucket; e;) {
-      ZixHashEntry* next = e->next;
-      free(e);
-      e = next;
-    }
-  }
+ZixHashIter
+zix_hash_begin(const ZixHash* const hash)
+{
+  return hash->entries[0u].value ? 0u : zix_hash_next(hash, 0u);
+}
 
-  free(hash->buckets);
-  free(hash);
+ZixHashIter
+zix_hash_end(const ZixHash* const hash)
+{
+  return hash->n_entries;
+}
+
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, const ZixHashIter i)
+{
+  assert(i < hash->n_entries);
+
+  return hash->entries[i].value;
+}
+
+ZixHashIter
+zix_hash_next(const ZixHash* const hash, ZixHashIter i)
+{
+  do {
+    ++i;
+  } while (i < hash->n_entries && !hash->entries[i].value);
+
+  return i;
 }
 
 size_t
-zix_hash_size(const ZixHash* hash)
+zix_hash_size(const ZixHash* const hash)
 {
   return hash->count;
 }
 
-static inline void
-insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry)
+static inline size_t
+fold_hash(const ZixHashCode h_nomod, const size_t mask)
+{
+  return h_nomod & mask;
+}
+
+static inline bool
+is_tombstone(const ZixHashEntry* const entry)
+{
+  return !entry->value && entry->hash == tombstone;
+}
+
+static inline bool
+is_empty(const ZixHashEntry* const entry)
 {
-  entry->next = *bucket;
-  *bucket     = entry;
+  return !entry->value && !entry->hash;
 }
 
-static inline ZixStatus
-rehash(ZixHash* hash, unsigned new_n_buckets)
+static inline bool
+is_match(const ZixHash* const hash,
+         const ZixHashCode    code,
+         const size_t         entry_index,
+         ZixKeyEqualFunc      predicate,
+         const void* const    user_data)
 {
-  ZixHashEntry** new_buckets =
-    (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*));
-  if (!new_buckets) {
+  const ZixHashEntry* const entry = &hash->entries[entry_index];
+
+  return entry->value && entry->hash == code &&
+         predicate(hash->key_func(entry->value), user_data);
+}
+
+static inline size_t
+next_index(const ZixHash* const hash, const size_t i)
+{
+  return (i == hash->mask) ? 0u : (i + 1u);
+}
+
+static inline ZixHashIter
+find_entry(const ZixHash* const    hash,
+           const ZixHashKey* const key,
+           const size_t            h,
+           const ZixHashCode       code)
+{
+  size_t i = h;
+
+  while (!is_match(hash, code, i, hash->equal_func, key) &&
+         !is_empty(&hash->entries[i])) {
+    i = next_index(hash, i);
+  }
+
+  return i;
+}
+
+static ZixStatus
+rehash(ZixHash* const hash, const size_t old_n_entries)
+{
+  ZixHashEntry* const old_entries   = hash->entries;
+  const size_t        new_n_entries = hash->n_entries;
+
+  // Replace the array in the hash first so we can use find_entry() normally
+  hash->entries = (ZixHashEntry*)calloc(new_n_entries, sizeof(ZixHashEntry));
+  if (!hash->entries) {
     return ZIX_STATUS_NO_MEM;
   }
 
-  const unsigned old_n_buckets = *hash->n_buckets;
-  for (unsigned b = 0; b < old_n_buckets; ++b) {
-    for (ZixHashEntry* e = hash->buckets[b]; e;) {
-      ZixHashEntry* const next = e->next;
-      const unsigned      h    = e->hash % new_n_buckets;
-      insert_entry(&new_buckets[h], e);
-      e = next;
+  // Reinsert every element into the new array
+  for (size_t i = 0u; i < old_n_entries; ++i) {
+    ZixHashEntry* const entry = &old_entries[i];
+
+    if (entry->value) {
+      assert(hash->mask == hash->n_entries - 1u);
+      const size_t new_h = fold_hash(entry->hash, hash->mask);
+      const size_t new_i = find_entry(hash, entry->value, new_h, entry->hash);
+
+      hash->entries[new_i] = *entry;
     }
   }
 
-  free(hash->buckets);
-  hash->buckets = new_buckets;
+  free(old_entries);
+  return ZIX_STATUS_SUCCESS;
+}
+
+static ZixStatus
+grow(ZixHash* const hash)
+{
+  const size_t old_n_entries = hash->n_entries;
+
+  hash->n_entries <<= 1u;
+  hash->mask = hash->n_entries - 1u;
+
+  return rehash(hash, old_n_entries);
+}
+
+static ZixStatus
+shrink(ZixHash* const hash)
+{
+  if (hash->n_entries > min_n_entries) {
+    const size_t old_n_entries = hash->n_entries;
+
+    hash->n_entries >>= 1u;
+    hash->mask = hash->n_entries - 1u;
+
+    return rehash(hash, old_n_entries);
+  }
 
   return ZIX_STATUS_SUCCESS;
 }
 
-static inline ZixHashEntry*
-find_entry(const ZixHash* hash,
-           const void*    key,
-           const unsigned h,
-           const unsigned h_nomod)
+ZixHashIter
+zix_hash_find(const ZixHash* const hash, const ZixHashKey* const key)
 {
-  for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
-    if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) {
-      return e;
+  const ZixHashCode h_nomod = hash->hash_func(key);
+  const size_t      h       = fold_hash(h_nomod, hash->mask);
+  const ZixHashIter i       = find_entry(hash, key, h, h_nomod);
+
+  return is_empty(&hash->entries[i]) ? hash->n_entries : i;
+}
+
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* const hash, const ZixHashKey* const key)
+{
+  const ZixHashCode h_nomod = hash->hash_func(key);
+  const size_t      h       = fold_hash(h_nomod, hash->mask);
+
+  return hash->entries[find_entry(hash, key, h, h_nomod)].value;
+}
+
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* const  hash,
+                               const ZixHashCode     code,
+                               const ZixKeyMatchFunc predicate,
+                               const void* const     user_data)
+{
+  // Calculate an ideal initial position
+  ZixHashInsertPlan pos = {code, fold_hash(code, hash->mask)};
+
+  // Search for a free position starting at the ideal one
+  const size_t start_index     = pos.index;
+  size_t       first_tombstone = SIZE_MAX;
+  while (!is_empty(&hash->entries[pos.index])) {
+    if (is_match(hash, code, pos.index, predicate, user_data)) {
+      return pos;
+    }
+
+    if (first_tombstone == SIZE_MAX &&
+        is_tombstone(&hash->entries[pos.index])) {
+      first_tombstone = pos.index; // Remember the first/best free index
+    }
+
+    if ((pos.index = next_index(hash, pos.index)) == start_index) {
+      break;
     }
   }
-  return NULL;
+
+  // If we found a tombstone before an empty slot, place the new element there
+  pos.index = first_tombstone < SIZE_MAX ? first_tombstone : pos.index;
+  assert(!hash->entries[pos.index].value);
+
+  return pos;
 }
 
-void*
-zix_hash_find(const ZixHash* hash, const void* value)
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* const hash, const ZixHashKey* const key)
 {
-  const unsigned      h_nomod = hash->hash_func(value);
-  const unsigned      h       = h_nomod % *hash->n_buckets;
-  ZixHashEntry* const entry   = find_entry(hash, value, h, h_nomod);
-  return entry ? zix_hash_value(entry) : 0;
+  return zix_hash_plan_insert_prehashed(
+    hash, hash->hash_func(key), hash->equal_func, key);
 }
 
-ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted)
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* const hash, const ZixHashInsertPlan position)
 {
-  unsigned h_nomod = hash->hash_func(value);
-  unsigned h       = h_nomod % *hash->n_buckets;
+  return hash->entries[position.index].value;
+}
 
-  ZixHashEntry* elem = find_entry(hash, value, h, h_nomod);
-  if (elem) {
-    assert(elem->hash == h_nomod);
-    if (inserted) {
-      *inserted = zix_hash_value(elem);
-    }
+ZixStatus
+zix_hash_insert_at(ZixHash* const          hash,
+                   const ZixHashInsertPlan position,
+                   ZixHashRecord* const    record)
+{
+  if (hash->entries[position.index].value) {
     return ZIX_STATUS_EXISTS;
   }
 
-  elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size);
-  if (!elem) {
-    return ZIX_STATUS_NO_MEM;
-  }
-  elem->next = NULL;
-  elem->hash = h_nomod;
-  memcpy(elem + 1, value, hash->value_size);
-
-  const unsigned next_n_buckets = *(hash->n_buckets + 1);
-  if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) {
-    if (!rehash(hash, next_n_buckets)) {
-      h = h_nomod % *(++hash->n_buckets);
-    }
-  }
+  // Set entry to new value
+  ZixHashEntry* const entry = &hash->entries[position.index];
+  assert(!entry->value);
+  entry->hash  = position.code;
+  entry->value = record;
 
-  insert_entry(&hash->buckets[h], elem);
-  ++hash->count;
-  if (inserted) {
-    *inserted = zix_hash_value(elem);
+  // Update size and rehash if we exceeded the maximum load
+  const size_t max_load = hash->n_entries / 2u + hash->n_entries / 8u;
+  if (++hash->count >= max_load) {
+    return grow(hash);
   }
+
   return ZIX_STATUS_SUCCESS;
 }
 
 ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value)
-{
-  const unsigned h_nomod = hash->hash_func(value);
-  const unsigned h       = h_nomod % *hash->n_buckets;
-
-  ZixHashEntry** next_ptr = &hash->buckets[h];
-  for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
-    if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) {
-      *next_ptr = e->next;
-      free(e);
-      --hash->count;
-      return ZIX_STATUS_SUCCESS;
-    }
-    next_ptr = &e->next;
-  }
+zix_hash_insert(ZixHash* const hash, ZixHashRecord* const record)
+{
+  const ZixHashKey* const key      = hash->key_func(record);
+  const ZixHashInsertPlan position = zix_hash_plan_insert(hash, key);
 
-  if (hash->n_buckets != sizes) {
-    const unsigned prev_n_buckets = *(hash->n_buckets - 1);
-    if (hash->count - 1 <= prev_n_buckets) {
-      if (!rehash(hash, prev_n_buckets)) {
-        --hash->n_buckets;
-      }
-    }
+  return zix_hash_insert_at(hash, position, record);
+}
+
+ZixStatus
+zix_hash_erase(ZixHash* const        hash,
+               const ZixHashIter     i,
+               ZixHashRecord** const removed)
+{
+  // Replace entry with a tombstone
+  *removed               = hash->entries[i].value;
+  hash->entries[i].hash  = tombstone;
+  hash->entries[i].value = NULL;
+
+  // Decrease element count and rehash if necessary
+  --hash->count;
+  if (hash->count < hash->n_entries / 4u) {
+    return shrink(hash);
   }
 
-  return ZIX_STATUS_NOT_FOUND;
+  return ZIX_STATUS_SUCCESS;
 }
 
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data)
+ZixStatus
+zix_hash_remove(ZixHash* const          hash,
+                const ZixHashKey* const key,
+                ZixHashRecord** const   removed)
 {
-  for (unsigned b = 0; b < *hash->n_buckets; ++b) {
-    ZixHashEntry* bucket = hash->buckets[b];
-    for (ZixHashEntry* e = bucket; e; e = e->next) {
-      f(zix_hash_value(e), user_data);
-    }
-  }
+  const ZixHashIter i = zix_hash_find(hash, key);
+
+  return i == hash->n_entries ? ZIX_STATUS_NOT_FOUND
+                              : zix_hash_erase(hash, i, removed);
 }
diff --git a/src/zix/hash.h b/src/zix/hash.h
index 39050381..f08d267b 100644
--- a/src/zix/hash.h
+++ b/src/zix/hash.h
@@ -1,5 +1,5 @@
 /*
-  Copyright 2011-2020 David Robillard <d@drobilla.net>
+  Copyright 2011-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -19,8 +19,8 @@
 
 #include "zix/common.h"
 
+#include <stdbool.h>
 #include <stddef.h>
-#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -33,98 +33,289 @@ extern "C" {
    @{
 */
 
+// ZIX_HASH_KEY_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_KEY_TYPE)
+typedef ZIX_HASH_KEY_TYPE ZixHashKey;
+#else
+typedef void ZixHashKey;
+#endif
+
+// ZIX_HASH_RECORD_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_RECORD_TYPE)
+typedef ZIX_HASH_RECORD_TYPE ZixHashRecord;
+#else
+typedef void ZixHashRecord;
+#endif
+
+// ZIX_HASH_SEARCH_DATA_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_SEARCH_DATA_TYPE)
+typedef ZIX_HASH_SEARCH_DATA_TYPE ZixHashSearchData;
+#else
+typedef void ZixHashSearchData;
+#endif
+
+/**
+   A hash table.
+
+   This is an open addressing hash table that stores pointers to arbitrary user
+   data.  Internally, everything is stored in a single flat array that is
+   resized when necessary.
+
+   The single user-provided pointer that is stored in the table is called a
+   "record".  A record contains a "key", which is accessed via a user-provided
+   function.  This design supports storing large records (which may be
+   expensive to allocate/construct) without requiring an entire record to
+   search.  Simple atomic values can be stored by providing a trivial identity
+   function as a key function.
+
+   The table uses power of 2 sizes with a growth factor of 2, so that hash
+   values can be folded into an array index using bitwise AND as a fast modulo.
+   This means that only the necessary low bits of the hash value will be used,
+   so the hash function must be well-balanced within this range.  More or less
+   any good modern hash algorithm will be fine, but beware, for example, hash
+   functions that assume they are targeting a table with a prime size.
+
+   Since this doubles and halves in size, it may not be an optimal choice if
+   memory reuse is a priority.  A growth factor of 1.5 with fast range
+   reduction may be a better choice there, at the cost of requiring 128-bit
+   arithmetic on 64-bit platforms, and indexing operations being slightly more
+   expensive.
+*/
 typedef struct ZixHashImpl ZixHash;
 
+/// A full hash code for a key which is not folded down to the table size
+typedef size_t ZixHashCode;
+
 /**
-   Function for computing the hash of an element.
+   An iterator to an entry in a hash table.
+
+   This is really just an index, but should be considered opaque to the user
+   and only used via the provided API and equality comparison.
 */
-typedef uint32_t (*ZixHashFunc)(const void* value);
+typedef size_t ZixHashIter;
+
+/// User function for computing the hash of a key
+typedef ZixHashCode (*ZixHashFunc)(const ZixHashKey* key);
+
+/// User function for determining if two keys are truly equal
+typedef bool (*ZixKeyEqualFunc)(const ZixHashKey* a, const ZixHashKey* b);
+
+/// User function for determining if a key matches in a custom search
+typedef bool (*ZixKeyMatchFunc)(const ZixHashKey*        key,
+                                const ZixHashSearchData* user_data);
+
+/// User function for getting the key of a record
+typedef const ZixHashKey* (*ZixKeyFunc)(const ZixHashRecord* record);
 
 /**
-   Function to visit a hash element.
+   A "plan" (position) to insert a record in a hash table.
+
+   This contains everything necessary to insert a record, except the record
+   itself.  It is exposed to split up insertion so that records only need to be
+   allocated if an existing record is not found, but the contents should be
+   considered opaque to the user.
 */
-typedef void (*ZixHashVisitFunc)(void* value, void* user_data);
+typedef struct {
+  ZixHashCode code;  ///< Hash code used to find this position
+  ZixHashIter index; ///< Index into hash table
+} ZixHashInsertPlan;
 
 /**
    Create a new hash table.
 
-   To minimize space overhead, unlike many hash tables this stores a single
-   value, not a key and a value.  Any size of value can be stored, but all the
-   values in the hash table must be the same size, and the values must be safe
-   to copy with memcpy.  To get key:value behaviour, simply insert a struct
-   with a key and value into the hash.
-
-   @param hash_func The hashing function.
-   @param equal_func A function to test value equality.
-   @param value_size The size of the values to be stored.
+   @param key_func A function to retrieve the key from a record.
+   @param hash_func The key hashing function.
+   @param equal_func A function to test keys for equality.
 */
 ZIX_API
 ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size);
+zix_hash_new(ZixKeyFunc      key_func,
+             ZixHashFunc     hash_func,
+             ZixKeyEqualFunc equal_func);
 
-/**
-   Free `hash`.
-*/
+/// Free `hash`
 ZIX_API
 void
 zix_hash_free(ZixHash* hash);
 
-/**
-   Return the number of elements in `hash`.
-*/
+/// Return an iterator to the first record in a hash, or the end if it is empty
+ZIX_PURE_API
+ZixHashIter
+zix_hash_begin(const ZixHash* hash);
+
+/// Return an iterator one past the last possible record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_end(const ZixHash* hash);
+
+/// Return the record pointed to by an iterator
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, ZixHashIter i);
+
+/// Return an iterator that has been advanced to the next record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_next(const ZixHash* hash, ZixHashIter i);
+
+/// Return the number of elements in a hash
 ZIX_PURE_API
 size_t
 zix_hash_size(const ZixHash* hash);
 
 /**
-   Insert an item into `hash`.
+   Find the best position to insert a record with the given key.
+
+   This searches the hash table and returns either the position of an existing
+   equivalent record, or the best available position to insert a new one.  The
+   difference can be determined with zix_hash_record_at().
+
+   If the returned position is not occupied, then it is valid to insert a
+   record with this key using zix_hash_insert_at() until the hash table is
+   modified (which invalidates the position).
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* hash, const ZixHashKey* key);
+
+/**
+   Find the best position to insert a record with a custom search.
 
-   If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p
-   inserted will be pointed to the copy of `value` made in the new hash node.
+   This is an advanced low-level version of zix_hash_plan_insert().  It allows
+   a precalculated hash code to be given, along with a custom search predicate.
+   These must be compatible with the functions that manage the hash table:
+   trivial usage would be to pass the equality function used by the hash and
+   the key to search for.
 
-   If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and
-   `inserted` will be pointed to the existing value.
+   This is exposed to make it possible to avoid constructing a key at all when
+   potentially inserting.  For example, if keys are structures with a fixed
+   header and a variably-sized body, and you have a separate header and body
+   this can be used to find an insert position without having to allocate a
+   contiguous key.
+
+   Note that care must be taken when using this function: improper use can
+   corrupt the hash table.  The hash code given must be correct for the key to
+   be inserted, and the predicate must return true only if the key it is called
+   with (the first argument) matches the key to be inserted.
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash*           hash,
+                               ZixHashCode              code,
+                               ZixKeyMatchFunc          predicate,
+                               const ZixHashSearchData* user_data);
+
+/**
+   Return the record at the given position, or null.
+
+   This is used to determine if a position returned from zix_hash_plan_insert()
+   can be used to insert a new record, or to access the existing matching
+   record.
+*/
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* hash, ZixHashInsertPlan position);
+
+/**
+   Insert a record at a specific position.
+
+   The position must have been found with an earlier call to
+   zix_hash_plan_insert(), and no modifications must have been made to the hash
+   table since.
+
+   @param hash The hash table.
+
+   @param position The position for the new record.
+
+   @param record The record to insert which, on success, can now be considered
+   owned by the hash table.
+
+   @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS if a record already exists at
+   this position, or ZIX_STATUS_NO_MEM if growing the hash table failed.
+*/
+ZIX_API
+ZixStatus
+zix_hash_insert_at(ZixHash*          hash,
+                   ZixHashInsertPlan position,
+                   ZixHashRecord*    record);
+
+/**
+   Insert a record.
+
+   This is a trivial wrapper for zix_hash_plan_insert() and
+   zix_hash_insert_at() that is more convenient when record construction is not
+   expensive.
 
    @param hash The hash table.
-   @param value The value to be inserted.
-   @param inserted The copy of `value` in the hash table.
+
+   @param record The record to insert which, on success, can now be considered
+   owned by the hash table.
+
    @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM.
 */
 ZIX_API
 ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted);
+zix_hash_insert(ZixHash* hash, ZixHashRecord* record);
 
 /**
-   Remove an item from `hash`.
+   Erase a record at a specific position.
+
+   @param hash The hash table to remove the record from.
+
+   @param i Iterator to the record to remove.  This must be a valid iterator
+   from an earlier call to zix_hash_find(), that is, the hash table must not
+   have been modified since.
+
+   @param removed Set to the removed record, or null.
+
+   @return ZIX_STATUS_SUCCES or ZIX_STATUS_BAD_ARG if `i` does not point at a
+   removable record.
+*/
+ZIX_API
+ZixStatus
+zix_hash_erase(ZixHash* hash, ZixHashIter i, ZixHashRecord** removed);
+
+/**
+   Remove a record.
 
    @param hash The hash table.
-   @param value The value to remove.
+   @param key The key of the record to remove.
+   @param removed Set to the removed record, or null.
    @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND.
 */
 ZIX_API
 ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value);
+zix_hash_remove(ZixHash* hash, const ZixHashKey* key, ZixHashRecord** removed);
 
 /**
-   Search for an item in `hash`.
+   Find the position of a record with a given key.
 
-   @param hash The hash table.
-   @param value The value to search for.
+   @param hash The hash table to search.
+
+   @param key The key of the desired record.
+
+   @return An iterator to the matching record, or the end iterator if no such
+   record exists.
 */
 ZIX_API
-void*
-zix_hash_find(const ZixHash* hash, const void* value);
+ZixHashIter
+zix_hash_find(const ZixHash* hash, const ZixHashKey* key);
 
 /**
-   Call `f` on each value in `hash`.
+   Find a record with a given key.
 
-   @param hash The hash table.
-   @param f The function to call on each value.
-   @param user_data The user_data parameter passed to `f`.
+   This is essentially the same as zix_hash_find(), but returns a pointer to
+   the record for convenience.
+
+   @param hash The hash table to search.
+
+   @param key The key of the desired record.
+
+   @return A pointer to the matching record, of null if no such record exists.
 */
 ZIX_API
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data);
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* hash, const ZixHashKey* key);
 
 /**
    @}
-- 
cgit v1.2.1