Avoid dynamic allocation when fetching interned nodes

This is more or less a total rewrite of SerdNodes and the underlying ZixHash to make efficient use of the new node construction API.
author: David Robillard <d@drobilla.net> 2021-07-22 18:35:31 -0400
committer: David Robillard <d@drobilla.net> 2022-01-14 19:37:51 -0500
commit: 955be33cead7596506cf86211da6b0e53827ef96 (patch)
tree: 0dac1a7a31e8c50d1ba7200648d6ce402f97867d
parent: 34852e8faa380f12b11522cfa998df4f260e3856 (diff)
download: serd-955be33cead7596506cf86211da6b0e53827ef96.tar.gz
serd-955be33cead7596506cf86211da6b0e53827ef96.tar.bz2
serd-955be33cead7596506cf86211da6b0e53827ef96.zip
9 files changed, 1582 insertions, 391 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index f67752a5..77986150 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -1223,25 +1223,33 @@ serd_nodes_intern(SerdNodes* SERD_NONNULL       nodes,
                   const SerdNode* SERD_NULLABLE node);
 
 /**
-   Manage `node`.
+   Make a string node.
+
+   A new node will be added if an equivalent node is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_string(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
 
-   Like `serd_nodes_intern`, but takes ownership of `node`, freeing it and
-   returning a previously interned/managed equivalent node if necessary.
+/**
+   Make a URI node from a string.
 
-   @return A node that is equivalent to `node`.
+   A new node will be constructed with serd_node_construct_token() if an
+   equivalent one is not already in the set.
 */
 SERD_API
 const SerdNode* SERD_ALLOCATED
-serd_nodes_manage(SerdNodes* SERD_NONNULL nodes, SerdNode* SERD_NULLABLE node);
+serd_nodes_uri(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
 
 /**
-   Make a string node.
+   Make a URI node from a parsed URI.
 
-   A new node will be added if an equivalent node is not already in the set.
+   A new node will be constructed with serd_node_construct_uri() if an
+   equivalent one is not already in the set.
 */
 SERD_API
 const SerdNode* SERD_ALLOCATED
-serd_nodes_string(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
+serd_nodes_parsed_uri(SerdNodes* SERD_NONNULL nodes, SerdURIView uri);
 
 /**
    Make a literal node with optional datatype or language.
@@ -1273,18 +1281,75 @@ serd_nodes_literal(SerdNodes* SERD_NONNULL nodes,
                    SerdStringView          meta);
 
 /**
-   Make a URI node.
+   Make a canonical xsd:boolean node.
 
-   A new node will be added if an equivalent node is not already in the set.
+   A new node will be constructed with serd_node_construct_boolean() if an
+   equivalent one is not already in the set.
 */
 SERD_API
 const SerdNode* SERD_ALLOCATED
-serd_nodes_uri(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
+serd_nodes_boolean(SerdNodes* SERD_NONNULL nodes, bool value);
+
+/**
+   Make a canonical xsd:decimal node.
+
+   A new node will be constructed with serd_node_construct_decimal() if an
+   equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_decimal(SerdNodes* SERD_NONNULL nodes, double value);
+
+/**
+   Make a canonical xsd:double node.
+
+   A new node will be constructed with serd_node_construct_double() if an
+   equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_double(SerdNodes* SERD_NONNULL nodes, double value);
+
+/**
+   Make a canonical xsd:float node.
+
+   A new node will be constructed with serd_node_construct_float() if an
+   equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_float(SerdNodes* SERD_NONNULL nodes, float value);
+
+/**
+   Make a canonical xsd:integer node.
+
+   A new node will be constructed with serd_node_construct_integer() if an
+   equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_integer(SerdNodes* SERD_NONNULL nodes,
+                   int64_t                 value,
+                   SerdStringView          datatype);
+
+/**
+   Make a canonical xsd:base64Binary node.
+
+   A new node will be constructed with serd_node_construct_base64() if an
+   equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_base64(SerdNodes* SERD_NONNULL  nodes,
+                  const void* SERD_NONNULL value,
+                  size_t                   value_size,
+                  SerdStringView           datatype);
 
 /**
    Make a blank node.
 
-   A new node will be added if an equivalent node is not already in the set.
+   A new node will be constructed with serd_node_construct_token() if an
+   equivalent one is not already in the set.
 */
 SERD_API
 const SerdNode* SERD_ALLOCATED
@@ -1299,8 +1364,8 @@ serd_nodes_blank(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
 */
 SERD_API
 void
-serd_nodes_deref(SerdNodes* SERD_NONNULL      nodes,
-                 const SerdNode* SERD_NONNULL node);
+serd_nodes_deref(SerdNodes* SERD_NONNULL       nodes,
+                 const SerdNode* SERD_NULLABLE node);
 
 /**
    @}
diff --git a/src/node_spec.h b/src/node_spec.h
new file mode 100644
index 00000000..fcf19712
--- /dev/null
+++ b/src/node_spec.h
@@ -0,0 +1,53 @@
+/*
+  Copyright 2021 David Robillard <d@drobilla.net>
+
+  Permission to use, copy, modify, and/or distribute this software for any
+  purpose with or without fee is hereby granted, provided that the above
+  copyright notice and this permission notice appear in all copies.
+
+  THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_NODE_SPEC_H
+#define SERD_NODE_SPEC_H
+
+#include "serd/serd.h"
+
+/**
+   A lightweight "specification" of a node.
+
+   This is essentially the arguments needed to construct any node combined into
+   a single structure.  Since it only refers to strings elsewhere, it is
+   convenient as a way to completely specify a node without having to actually
+   allocate one.
+*/
+typedef struct {
+  SerdNodeType   type;   ///< Basic type of this node
+  SerdStringView string; ///< String contents of this node
+  SerdNodeFlags  flags;  ///< Additional node flags
+  SerdStringView meta;   ///< String contents of datatype or language node
+} NodeSpec;
+
+static inline NodeSpec
+token_spec(const SerdNodeType type, const SerdStringView string)
+{
+  NodeSpec spec = {type, string, 0u, SERD_EMPTY_STRING()};
+  return spec;
+}
+
+static inline NodeSpec
+literal_spec(const SerdStringView string,
+             const SerdNodeFlags  flags,
+             const SerdStringView meta)
+{
+  NodeSpec spec = {SERD_LITERAL, string, flags, meta};
+  return spec;
+}
+
+#endif // SERD_NODE_SPEC_H
diff --git a/src/nodes.c b/src/nodes.c
index 885731cc..a7d0ab2b 100644
--- a/src/nodes.c
+++ b/src/nodes.c
@@ -14,10 +14,18 @@
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
 
+#include "nodes.h"
+
 #include "node.h"
+#include "node_spec.h"
+#include "system.h"
+
+// Define the types used in the hash interface for more type safety
+#define ZIX_HASH_KEY_TYPE SerdNode
+#define ZIX_HASH_RECORD_TYPE NodesEntry
+#define ZIX_HASH_SEARCH_DATA_TYPE NodeSpec
 
 #include "serd/serd.h"
-#include "zix/common.h"
 #include "zix/digest.h"
 #include "zix/hash.h"
 
@@ -27,53 +35,175 @@
 #include <stdlib.h>
 #include <string.h>
 
-typedef struct {
-  size_t    refs;
-  SerdNode* node;
-} NodesEntry;
+#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \
+     (defined(__cplusplus) && __cplusplus >= 201103L))
+
+static_assert(sizeof(NodesEntryHead) == sizeof(SerdNode),
+              "NodesEntryHead must be the same size as SerdNode for alignment");
+
+#endif
+
+/*
+  The main goal here is to make getting an existing node as fast as possible,
+  so that this can be used as a convenient node cache with minimal performance
+  overhead.  Mainly this means striving to avoid allocation when potentially
+  inserting a new node.
+
+  This is achieved by generating a hash code from node components without
+  actually allocating that node, and using the low-level insertion interface of
+  ZixHash to do a custom search.  This way, an entry is only allocated when
+  necessary, and the hash table is only searched once.
+
+  The downside is that subtle and very bad things will happen if the hash
+  generated for the node does not match the actual hash of the node.  The
+  exessive number of assertions around this are there to provide some defense
+  against such mistakes.  Cave operatur.
+*/
+
+/**
+   The maximum size of a node that will be made statically on the stack.
+
+   This mostly applies to things like numeric literal nodes, where a small
+   maximum size is exploited to avoid allocation.  The largest static node
+   string is the longest xsd:decimal, which is 327 bytes.  We need a bit more
+   space than that here for the node header, padding, and datatype.
+*/
+#define MAX_STATIC_NODE_SIZE 384
 
 typedef struct {
-  size_t          refs;
-  const SerdNode* node;
-} NodesSearchKey;
+  SerdNode node;
+  char     body[MAX_STATIC_NODE_SIZE];
+} StaticNode;
 
 struct SerdNodesImpl {
   ZixHash* hash;
 };
 
-static uint32_t
-nodes_hash(const void* n)
+static const StaticNode empty_static_node = {{0u, 0u, SERD_LITERAL}, {'\0'}};
+
+static const SerdNode*
+nodes_key(const NodesEntry* const entry)
+{
+  return &entry->node;
+}
+
+static ZixHashCode
+token_hash(const ZixHashCode    seed,
+           const SerdNodeType   type,
+           const SerdStringView string)
 {
-  const SerdNode* node = ((const NodesEntry*)n)->node;
+  const SerdNode node_header = {string.len, 0u, type};
+  ZixHashCode    h           = seed;
 
-  return zix_digest_add(zix_digest_start(), node, serd_node_total_size(node));
+  h = zix_digest_aligned(h, &node_header, sizeof(node_header));
+  h = zix_digest(h, string.buf, string.len);
+  return h;
+}
+
+static ZixHashCode
+spec_hash(const NodeSpec spec)
+{
+  ZixHashCode h = token_hash(0u, spec.type, spec.string);
+
+  if (spec.flags & SERD_HAS_DATATYPE) {
+    h = token_hash(h, SERD_URI, spec.meta);
+  } else if (spec.flags & SERD_HAS_LANGUAGE) {
+    h = token_hash(h, SERD_LITERAL, spec.meta);
+  }
+
+  return h;
+}
+
+static ZixHashCode
+nodes_hash(const SerdNode* const node)
+{
+  /* If you're thinking "I know, I'll make this faster by just hashing the
+     entire node in one call!", think harder.  Since zero bytes affect the hash
+     value, that would mean that the hash code for the node won't match the one
+     for the node spec, and the above-mentioned subtle and very bad things will
+     happen.  This function deliberately mirrors spec_hash() above to make it
+     relatively easy to see that the two will match.
+
+     It would be more elegant to construct a spec from the node and then hash
+     that with the exact same code used for searching, but there's currently no
+     use for that anywhere else and this way is a bit faster. */
+
+  ZixHashCode h = token_hash(0u, node->type, serd_node_string_view(node));
+
+  if (node->flags & SERD_HAS_DATATYPE) {
+    h = token_hash(h, SERD_URI, serd_node_string_view(serd_node_meta_c(node)));
+  } else if (node->flags & SERD_HAS_LANGUAGE) {
+    h = token_hash(
+      h, SERD_LITERAL, serd_node_string_view(serd_node_meta_c(node)));
+  }
+
+  return h;
 }
 
 static bool
-nodes_equal(const void* a, const void* b)
+node_equals_spec(const SerdNode* const node, const NodeSpec* const spec)
 {
-  const SerdNode* a_node = ((const NodesEntry*)a)->node;
-  const SerdNode* b_node = ((const NodesEntry*)b)->node;
-  const size_t    a_size = serd_node_total_size(a_node);
-  const size_t    b_size = serd_node_total_size(b_node);
-  return ((a_node == b_node) ||
-          (a_size == b_size && !memcmp(a_node, b_node, a_size)));
+  // Only datatype and language are relevant for equality
+  static const SerdNodeFlags flag_mask = SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE;
+
+  const SerdNodeFlags flags = spec->flags & flag_mask;
+
+  return serd_node_type(node) == spec->type &&
+         serd_node_length(node) == spec->string.len &&
+         (node->flags & flag_mask) == flags &&
+         !strcmp(serd_node_string_i(node), spec->string.buf) &&
+         (!flags ||
+          !strcmp(serd_node_string_i(serd_node_meta_c(node)), spec->meta.buf));
 }
 
-static void
-free_entry(void* value, void* user_data)
+static bool
+nodes_equal(const SerdNode* const a, const SerdNode* const b)
 {
-  (void)user_data;
+  static const SerdNodeFlags meta_mask =
+    (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE);
 
-  NodesEntry* entry = (NodesEntry*)value;
-  serd_node_free(entry->node);
+  if (a == b) {
+    return true;
+  }
+
+  if (a->length != b->length || a->flags != b->flags || a->type != b->type ||
+      !!memcmp(serd_node_string_i(a), serd_node_string_i(b), a->length)) {
+    return false;
+  }
+
+  if (a->flags & meta_mask) {
+    const size_t          padded_length = serd_node_pad_length(a->length);
+    const size_t          offset        = padded_length / sizeof(SerdNode);
+    const SerdNode* const am            = a + 1u + offset;
+    const SerdNode* const bm            = b + 1u + offset;
+
+    return am->length == bm->length && am->type == bm->type &&
+           !memcmp(serd_node_string_i(am), serd_node_string_i(bm), am->length);
+  }
+
+  return true;
+}
+
+static NodesEntry*
+new_entry(const size_t node_size)
+{
+  NodesEntry* const entry = (NodesEntry*)serd_calloc_aligned(
+    serd_node_align, sizeof(NodesEntryHead) + node_size);
+
+  if (entry) {
+    entry->head.refs = 1u;
+  }
+
+  return entry;
 }
 
 SerdNodes*
 serd_nodes_new(void)
 {
-  SerdNodes* nodes = (SerdNodes*)calloc(1, sizeof(SerdNodes));
-  nodes->hash      = zix_hash_new(nodes_hash, nodes_equal, sizeof(NodesEntry));
+  SerdNodes* const nodes = (SerdNodes*)calloc(1, sizeof(SerdNodes));
+
+  nodes->hash = zix_hash_new(nodes_key, nodes_hash, nodes_equal);
+
   return nodes;
 }
 
@@ -81,7 +211,12 @@ void
 serd_nodes_free(SerdNodes* nodes)
 {
   if (nodes) {
-    zix_hash_foreach(nodes->hash, free_entry, nodes);
+    for (ZixHashIter i = zix_hash_begin(nodes->hash);
+         i != zix_hash_end(nodes->hash);
+         i = zix_hash_next(nodes->hash, i)) {
+      serd_free_aligned(zix_hash_get(nodes->hash, i));
+    }
+
     zix_hash_free(nodes->hash);
     free(nodes);
   }
@@ -96,22 +231,27 @@ serd_nodes_size(const SerdNodes* nodes)
 const SerdNode*
 serd_nodes_intern(SerdNodes* nodes, const SerdNode* node)
 {
+  assert(nodes);
   if (!node) {
     return NULL;
   }
 
-  NodesSearchKey key      = {1, node};
-  NodesEntry*    inserted = NULL;
-
-  const ZixStatus st = zix_hash_insert(nodes->hash, &key, (void**)&inserted);
-  if (st == ZIX_STATUS_SUCCESS) {
-    inserted->node = serd_node_copy(node);
-  } else if (st == ZIX_STATUS_EXISTS) {
-    assert(serd_node_equals(inserted->node, node));
-    ++inserted->refs;
+  const ZixHashInsertPlan plan     = zix_hash_plan_insert(nodes->hash, node);
+  NodesEntry* const       existing = zix_hash_record_at(nodes->hash, plan);
+  if (existing) {
+    assert(serd_node_equals(&existing->node, node));
+    ++existing->head.refs;
+    return &existing->node;
   }
 
-  return inserted ? inserted->node : NULL;
+  const size_t      node_size = serd_node_total_size(node);
+  NodesEntry* const entry     = new_entry(node_size);
+
+  memcpy(&entry->node, node, node_size);
+
+  // Insert the entry (blissfully ignoring a failed hash size increase)
+  zix_hash_insert_at(nodes->hash, plan, entry);
+  return &entry->node;
 }
 
 const SerdNode*
@@ -121,72 +261,263 @@ serd_nodes_get(const SerdNodes* const nodes, const SerdNode* const node)
     return NULL;
   }
 
-  const NodesSearchKey key   = {1, node};
-  NodesEntry* const    entry = (NodesEntry*)zix_hash_find(nodes->hash, &key);
+  NodesEntry* const entry = zix_hash_find_record(nodes->hash, node);
+
+  return entry ? &entry->node : NULL;
+}
+
+static const SerdNode*
+serd_nodes_manage_entry(SerdNodes* const nodes, NodesEntry* const entry)
+{
+  const SerdNode* const   node     = &entry->node;
+  const ZixHashInsertPlan plan     = zix_hash_plan_insert(nodes->hash, node);
+  NodesEntry* const       existing = zix_hash_record_at(nodes->hash, plan);
+  if (existing) {
+    assert(serd_node_equals(&existing->node, node));
+    assert(nodes_hash(&existing->node) == plan.code);
+    ++existing->head.refs;
+    serd_free_aligned(entry);
+    return &existing->node;
+  }
 
-  return entry ? entry->node : NULL;
+  // Insert the entry (blissfully ignoring a failed hash size increase)
+  zix_hash_insert_at(nodes->hash, plan, entry);
+  assert(nodes_hash(&entry->node) == plan.code);
+  return &entry->node;
+}
+
+static const SerdNode*
+serd_nodes_token(SerdNodes* const     nodes,
+                 const SerdNodeType   type,
+                 const SerdStringView string)
+{
+  const NodeSpec          key  = token_spec(type, string);
+  const ZixHashCode       code = spec_hash(key);
+  const ZixHashInsertPlan plan =
+    zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &key);
+
+  NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+  if (existing) {
+    assert(nodes_hash(&existing->node) == code);
+    ++existing->head.refs;
+    return &existing->node;
+  }
+
+  const size_t      padded_length = serd_node_pad_length(string.len);
+  const size_t      node_size     = sizeof(SerdNode) + padded_length;
+  NodesEntry* const entry         = new_entry(node_size);
+  SerdNode* const   node          = entry ? &entry->node : NULL;
+
+  if (node) {
+    // Construct the token directly into the node in the new entry
+    const SerdWriteResult r =
+      serd_node_construct_token(node_size, &entry->node, type, string);
+
+    assert(!r.status); // Never fails with sufficient space
+    (void)r;
+
+    // Insert the entry (blissfully ignoring a failed hash size increase)
+    zix_hash_insert_at(nodes->hash, plan, entry);
+    assert(nodes_hash(node) == code);
+  }
+
+  return node;
 }
 
 const SerdNode*
-serd_nodes_manage(SerdNodes* nodes, SerdNode* node)
+serd_nodes_literal(SerdNodes* const     nodes,
+                   const SerdStringView string,
+                   const SerdNodeFlags  flags,
+                   const SerdStringView meta)
 {
-  if (!node) {
+  // Calculate a hash code for the literal without actually constructing it
+  const NodeSpec    spec = literal_spec(string, flags, meta);
+  const ZixHashCode code = spec_hash(spec);
+
+  // Find an insert position in the hash table
+  const ZixHashInsertPlan plan =
+    zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &spec);
+
+  // If we found an existing node, bump its reference count and return it
+  NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+  if (existing) {
+    assert(nodes_hash(&existing->node) == code);
+    ++existing->head.refs;
+    return &existing->node;
+  }
+
+  // We need to insert a new entry, so determine how much space the node needs
+  SerdWriteResult r = serd_node_construct_literal(0, NULL, string, flags, meta);
+  if (r.status != SERD_ERR_OVERFLOW) {
     return NULL;
   }
 
-  NodesSearchKey key      = {1, node};
-  NodesEntry*    inserted = NULL;
+  // Allocate a new entry with enough space for the node
+  NodesEntry* const entry = new_entry(r.count);
+  SerdNode* const   node  = entry ? &entry->node : NULL;
+
+  if (node) {
+    // Construct the literal directly into the node in the new entry
+    r = serd_node_construct_literal(r.count, node, string, flags, meta);
+    assert(!r.status);
+    (void)r;
 
-  const ZixStatus st = zix_hash_insert(nodes->hash, &key, (void**)&inserted);
-  if (st == ZIX_STATUS_EXISTS) {
-    assert(serd_node_equals(inserted->node, node));
-    serd_node_free(node);
-    ++inserted->refs;
+    // Insert the entry (blissfully ignoring a failed hash size increase)
+    zix_hash_insert_at(nodes->hash, plan, entry);
+    assert(nodes_hash(node) == code);
   }
 
-  return inserted ? inserted->node : NULL;
+  return node;
 }
 
-/* TODO: Make these methods faster by being smarter internally and avoiding
-   unnecessary allocatio in cases where the node is already in the set. */
-
 const SerdNode*
 serd_nodes_string(SerdNodes* const nodes, const SerdStringView string)
 {
-  return serd_nodes_manage(nodes, serd_new_string(string));
+  return serd_nodes_token(nodes, SERD_LITERAL, string);
 }
 
-const SerdNode* SERD_ALLOCATED
-serd_nodes_literal(SerdNodes* const     nodes,
-                   const SerdStringView string,
-                   const SerdNodeFlags  flags,
-                   const SerdStringView meta)
+static const SerdNode*
+try_intern(SerdNodes* const      nodes,
+           const SerdWriteResult r,
+           const SerdNode* const node)
+{
+  return r.status ? NULL : serd_nodes_intern(nodes, node);
+}
+
+const SerdNode*
+serd_nodes_boolean(SerdNodes* const nodes, bool value)
+{
+  StaticNode key = empty_static_node;
+
+  return try_intern(
+    nodes, serd_node_construct_boolean(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_decimal(SerdNodes* const nodes, const double value)
+{
+  StaticNode key = empty_static_node;
+
+  return try_intern(
+    nodes, serd_node_construct_decimal(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_double(SerdNodes* const nodes, const double value)
+{
+  StaticNode key = empty_static_node;
+
+  return try_intern(
+    nodes, serd_node_construct_double(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_float(SerdNodes* const nodes, const float value)
 {
-  return serd_nodes_manage(nodes, serd_new_literal(string, flags, meta));
+  StaticNode key = empty_static_node;
+
+  return try_intern(
+    nodes, serd_node_construct_float(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_integer(SerdNodes* const     nodes,
+                   const int64_t        value,
+                   const SerdStringView datatype)
+{
+  StaticNode key = empty_static_node;
+
+  return try_intern(
+    nodes,
+    serd_node_construct_integer(sizeof(key), &key, value, datatype),
+    &key.node);
+}
+
+const SerdNode*
+serd_nodes_base64(SerdNodes* const     nodes,
+                  const void* const    value,
+                  const size_t         value_size,
+                  const SerdStringView datatype)
+{
+  assert(nodes);
+  assert(value);
+
+  /* We're more or less forced to allocate and construct an entry here, since
+     we need the base64 string to hash.  Though it would be possible to
+     calculate it in a streaming fashion, that would be a severe pessimisation
+     in the presumably common case of raw data not being cached, since it would
+     only need to be serialised again.  Keeping a tentative entry buffer around
+     when possible would probably be a better improvement if this ever becomes
+     a performance issue.  More ambitiously, adding support for binary nodes
+     like a Real Database(TM) would largely avoid this problem. */
+
+  // Determine how much space the node needs
+  SerdWriteResult r =
+    serd_node_construct_base64(0, NULL, value_size, value, datatype);
+
+  // Allocate a new entry to and construct the node into it
+  NodesEntry* const entry = new_entry(r.count);
+
+  r = serd_node_construct_base64(
+    r.count, &entry->node, value_size, value, datatype);
+
+  assert(!r.status);
+  (void)r;
+  return serd_nodes_manage_entry(nodes, entry);
 }
 
 const SerdNode*
 serd_nodes_uri(SerdNodes* const nodes, const SerdStringView string)
 {
-  return serd_nodes_manage(nodes, serd_new_uri(string));
+  return serd_nodes_token(nodes, SERD_URI, string);
+}
+
+const SerdNode*
+serd_nodes_parsed_uri(SerdNodes* const nodes, const SerdURIView uri)
+{
+  assert(nodes);
+
+  /* Computing a hash for the serialised URI here would be quite complex, so,
+     since this isn't expected to be a particularly hot case, we just allocate
+     a new entry and try to do a normal insertion. */
+
+  // Determine how much space the node needs
+  SerdWriteResult r = serd_node_construct_uri(0u, NULL, uri);
+  assert(r.status == SERD_ERR_OVERFLOW); // Currently no other errors
+
+  // Allocate a new entry to write the URI node into
+  NodesEntry* const entry = new_entry(r.count);
+
+  r = serd_node_construct_uri(r.count, &entry->node, uri);
+  assert(!r.status);
+  (void)r;
+
+  return serd_nodes_manage_entry(nodes, entry);
 }
 
 const SerdNode*
 serd_nodes_blank(SerdNodes* const nodes, const SerdStringView string)
 {
-  return serd_nodes_manage(nodes, serd_new_token(SERD_BLANK, string));
+  return serd_nodes_token(nodes, SERD_BLANK, string);
 }
 
 void
 serd_nodes_deref(SerdNodes* const nodes, const SerdNode* const node)
 {
-  const NodesSearchKey key   = {1, node};
-  NodesEntry* const    entry = (NodesEntry*)zix_hash_find(nodes->hash, &key);
+  if (!node) {
+    return;
+  }
 
-  if (entry && --entry->refs == 0) {
-    SerdNode* const intern_node = entry->node;
+  ZixHashIter i = zix_hash_find(nodes->hash, node);
+  if (i == zix_hash_end(nodes->hash)) {
+    return;
+  }
 
-    zix_hash_remove(nodes->hash, entry);
-    serd_node_free(intern_node);
+  NodesEntry* const entry = zix_hash_get(nodes->hash, i);
+  if (--entry->head.refs == 0u) {
+    NodesEntry* removed = NULL;
+    zix_hash_erase(nodes->hash, i, &removed);
+    assert(removed == entry);
+    serd_free_aligned(removed);
   }
 }
diff --git a/src/nodes.h b/src/nodes.h
new file mode 100644
index 00000000..b39bac66
--- /dev/null
+++ b/src/nodes.h
@@ -0,0 +1,69 @@
+/*
+  Copyright 2021 David Robillard <d@drobilla.net>
+
+  Permission to use, copy, modify, and/or distribute this software for any
+  purpose with or without fee is hereby granted, provided that the above
+  copyright notice and this permission notice appear in all copies.
+
+  THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+/*
+  Nothing here is actually used outside the nodes implementation, but we need
+  the types to be defined before including zix/hash.h to enable its type-safe
+  interface.  Putting those here is a way of doing that without messy hacks
+  like including headers half-way through the implementation.
+*/
+
+#ifndef SERD_NODES_H
+#define SERD_NODES_H
+
+#include "node.h"
+
+#include "serd/serd.h"
+
+#include <stddef.h>
+
+/**
+   The header of an entry in the nodes table.
+
+   The table stores nodes with an additional reference count.  For efficiency,
+   entries are allocated as a single block of memory, which start with this
+   header and are followed by the body of the node.
+
+   This structure must be the same size as SerdNode to preserve the alignment
+   of the contained node.  This is a bit wasteful, but the alignment guarantee
+   allows the node implementation to avoid messy casts and byte-based pointer
+   arithmetic that could cause alignment problems.  This might be worth
+   reconsidering, since this wasted space has a real (if small) negative
+   impact, while the alignment guarantee just allows the implementation to use
+   stricter compiler settings.
+
+   Better yet, shrink SerdNode down to size_t, which is malloc's alignment
+   guarantee, and all of this goes away, at the cost of a reduced maximum
+   length for literal nodes.
+*/
+typedef struct {
+  size_t   refs; ///< Reference count
+  unsigned pad1; ///< Padding to align the following SerdNode
+  unsigned pad2; ///< Padding to align the following SerdNode
+} NodesEntryHead;
+
+/**
+   An entry in the nodes table.
+
+   This is a variably-sized structure that is allocated specifically to contain
+   the node.
+*/
+typedef struct {
+  NodesEntryHead head; ///< Extra data associated with the node
+  SerdNode       node; ///< Node header (body immediately follows)
+} NodesEntry;
+
+#endif // SERD_NODES_H
diff --git a/src/zix/digest.c b/src/zix/digest.c
index 47d27b94..dcfadf8f 100644
--- a/src/zix/digest.c
+++ b/src/zix/digest.c
@@ -1,5 +1,5 @@
 /*
-  Copyright 2012-2020 David Robillard <d@drobilla.net>
+  Copyright 2012-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -16,126 +16,218 @@
 
 #include "zix/digest.h"
 
-#ifdef __SSE4_2__
-#  include <smmintrin.h>
-#endif
-
 #include <assert.h>
 #include <stdint.h>
+#include <string.h>
 
-#ifdef __SSE4_2__
+#if defined(__clang__) && __clang_major__ >= 12
+#  define FALLTHROUGH() __attribute__((fallthrough))
+#elif defined(__GNUC__) && !defined(__clang__)
+#  define FALLTHROUGH() __attribute__((fallthrough))
+#else
+#  define FALLTHROUGH()
+#endif
 
-// SSE 4.2 CRC32
+/*
+  64-bit hash: Essentially fasthash64, implemented here in an aligned/padded
+  and a general UB-free variant.
+*/
 
-uint32_t
-zix_digest_start(void)
+static inline uint64_t
+mix64(uint64_t h)
 {
-  return 1;
+  h ^= h >> 23u;
+  h *= 0x2127599BF4325C37ull;
+  h ^= h >> 47u;
+  return h;
 }
 
-uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64(const uint64_t seed, const void* const key, const size_t len)
 {
-  const uint8_t* str = (const uint8_t*)buf;
+  static const uint64_t m = 0x880355F21E6D1965ull;
 
-#  ifdef __x86_64__
-  for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
-    hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str);
-    str += sizeof(uint64_t);
-  }
-  if (len & sizeof(uint32_t)) {
-    hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
-    str += sizeof(uint32_t);
-  }
-#  else
-  for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
-    hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
-    str += sizeof(uint32_t);
-  }
-#  endif
-  if (len & sizeof(uint16_t)) {
-    hash = _mm_crc32_u16(hash, *(const uint16_t*)str);
-    str += sizeof(uint16_t);
+  const uint64_t* const blocks   = (const uint64_t*)key;
+  const size_t          n_blocks = len / sizeof(uint64_t);
+
+  uint64_t h = seed ^ (len * m);
+  for (size_t i = 0u; i < n_blocks; ++i) {
+    h ^= mix64(blocks[i]);
+    h *= m;
   }
-  if (len & sizeof(uint8_t)) {
-    hash = _mm_crc32_u8(hash, *(const uint8_t*)str);
+
+  const uint8_t* const tail = (const unsigned char*)(blocks + n_blocks);
+  uint64_t             v    = 0u;
+
+  switch (len & 7u) {
+  case 7:
+    v |= (uint64_t)tail[6] << 48u;
+    FALLTHROUGH();
+  case 6:
+    v |= (uint64_t)tail[5] << 40u;
+    FALLTHROUGH();
+  case 5:
+    v |= (uint64_t)tail[4] << 32u;
+    FALLTHROUGH();
+  case 4:
+    v |= (uint64_t)tail[3] << 24u;
+    FALLTHROUGH();
+  case 3:
+    v |= (uint64_t)tail[2] << 16u;
+    FALLTHROUGH();
+  case 2:
+    v |= (uint64_t)tail[1] << 8u;
+    FALLTHROUGH();
+  case 1:
+    v |= (uint64_t)tail[0];
+
+    h ^= mix64(v);
+    h *= m;
   }
 
-  return hash;
+  return mix64(h);
 }
 
-uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len)
 {
-  assert((uintptr_t)buf % sizeof(uint64_t) == 0);
-  assert(len % sizeof(uint64_t) == 0);
-
-#  ifdef __x86_64__
-  const uint64_t* ptr = (const uint64_t*)buf;
+  static const uint64_t m = 0x880355F21E6D1965ull;
 
-  for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
-    hash = (uint32_t)_mm_crc32_u64(hash, *ptr);
-    ++ptr;
-  }
+  assert((uintptr_t)key % sizeof(uint64_t) == 0u);
+  assert(len % sizeof(uint64_t) == 0u);
 
-  return hash;
-#  else
-  const uint32_t* ptr = (const uint32_t*)buf;
+  const uint64_t* const blocks   = (const uint64_t*)key;
+  const size_t          n_blocks = len / sizeof(uint64_t);
+  uint64_t              h        = seed ^ (len * m);
 
-  for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
-    hash = _mm_crc32_u32(hash, *ptr);
-    ++ptr;
+  for (size_t i = 0u; i < n_blocks; ++i) {
+    h ^= mix64(blocks[i]);
+    h *= m;
   }
 
-  return hash;
-#  endif
+  return mix64(h);
 }
 
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
-{
-#  ifdef __x86_64__
-  return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr);
-#  else
-  return _mm_crc32_u32(hash, (uintptr_t)ptr);
-#  endif
-}
+/*
+  32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and
+  a general UB-free variant.
+*/
 
-#else
+/**
+   Rotate left by some count of bits.
 
-// Classic DJB hash
+   This is recognized by any halfway decent compiler and compiled to a single
+   instruction on architectures that have one.
+*/
+static inline uint32_t
+rotl32(const uint32_t val, const uint32_t bits)
+{
+  return ((val << bits) | (val >> (32 - bits)));
+}
 
-uint32_t
-zix_digest_start(void)
+static inline uint32_t
+mix32(uint32_t h)
 {
-  return 5381;
+  h ^= h >> 16u;
+  h *= 0x85EBCA6Bu;
+  h ^= h >> 13u;
+  h *= 0xC2B2AE35u;
+  h ^= h >> 16u;
+  return h;
 }
 
 uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32(const uint32_t seed, const void* const key, const size_t len)
 {
-  const uint8_t* str = (const uint8_t*)buf;
+  static const uint32_t c1 = 0xCC9E2D51u;
+  static const uint32_t c2 = 0x1B873593u;
+
+  // Process as many 32-bit blocks as possible
+  const size_t         n_blocks   = len / sizeof(uint32_t);
+  const uint8_t*       data       = (const uint8_t*)key;
+  const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t));
+  uint32_t             h          = seed;
+  for (; data != blocks_end; data += sizeof(uint32_t)) {
+    uint32_t k = 0u;
+    memcpy(&k, data, sizeof(uint32_t));
+
+    k *= c1;
+    k = rotl32(k, 15);
+    k *= c2;
+
+    h ^= k;
+    h = rotl32(h, 13);
+    h = h * 5u + 0xE6546B64u;
+  }
 
-  for (size_t i = 0; i < len; ++i) {
-    hash = (hash << 5u) + hash + str[i];
+  // Process any trailing bytes
+  uint32_t k = 0u;
+  switch (len & 3u) {
+  case 3u:
+    k ^= (uint32_t)data[2u] << 16u;
+    FALLTHROUGH();
+  case 2u:
+    k ^= (uint32_t)data[1u] << 8u;
+    FALLTHROUGH();
+  case 1u:
+    k ^= (uint32_t)data[0u];
+
+    k *= c1;
+    k = rotl32(k, 15u);
+    k *= c2;
+    h ^= k;
   }
 
-  return hash;
+  return mix32(h ^ (uint32_t)len);
 }
 
 uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32_aligned(const uint32_t    seed,
+                     const void* const key,
+                     const size_t      len)
 {
-  assert((uintptr_t)buf % sizeof(uint64_t) == 0);
-  assert(len % sizeof(uint64_t) == 0);
+  static const uint32_t c1 = 0xCC9E2D51u;
+  static const uint32_t c2 = 0x1B873593u;
+
+  assert((uintptr_t)key % sizeof(uint32_t) == 0u);
+  assert(len % sizeof(uint32_t) == 0u);
+
+  const uint32_t* const blocks   = (const uint32_t*)key;
+  const size_t          n_blocks = len / sizeof(uint32_t);
+  uint32_t              h        = seed;
+  for (size_t i = 0u; i < n_blocks; ++i) {
+    uint32_t k = blocks[i];
+
+    k *= c1;
+    k = rotl32(k, 15);
+    k *= c2;
 
-  return zix_digest_add(hash, buf, len);
+    h ^= k;
+    h = rotl32(h, 13);
+    h = h * 5u + 0xE6546B64u;
+  }
+
+  return mix32(h ^ (uint32_t)len);
 }
 
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
+// Native word size wrapper
+
+size_t
+zix_digest(const size_t seed, const void* const buf, const size_t len)
 {
-  return zix_digest_add(hash, &ptr, sizeof(ptr));
+#if UINTPTR_MAX >= UINT64_MAX
+  return zix_digest64(seed, buf, len);
+#else
+  return zix_digest32(seed, buf, len);
+#endif
 }
 
+size_t
+zix_digest_aligned(const size_t seed, const void* const buf, const size_t len)
+{
+#if UINTPTR_MAX >= UINT64_MAX
+  return zix_digest64_aligned(seed, buf, len);
+#else
+  return zix_digest32_aligned(seed, buf, len);
 #endif
+}
diff --git a/src/zix/digest.h b/src/zix/digest.h
index 1fde77a9..6df70024 100644
--- a/src/zix/digest.h
+++ b/src/zix/digest.h
@@ -1,5 +1,5 @@
 /*
-  Copyright 2012-2020 David Robillard <d@drobilla.net>
+  Copyright 2012-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -27,38 +27,80 @@ extern "C" {
 #endif
 
 /**
-   Return an initial empty digest value.
+   @addtogroup zix
+   @{
+   @name Digest
+   Functions to generate a short "digest" of data with minimal collisions.
+
+   These are good general-purpose hash functions for indexing arbitrary data,
+   but are not necessarily stable across platforms and should never be used for
+   cryptographic purposes.
+   @{
 */
-ZIX_CONST_API
-uint32_t
-zix_digest_start(void);
 
 /**
-   Update `hash` to include `buf`, a buffer of `len` bytes.
+   Return a 32-bit hash of a buffer.
 
    This can be used for any size or alignment.
 */
 ZIX_PURE_API
 uint32_t
-zix_digest_add(uint32_t hash, const void* buf, size_t len);
+zix_digest32(uint32_t seed, const void* buf, size_t len);
 
 /**
-   Update `hash` to include `buf`, a 64-bit aligned buffer of `len` bytes.
+   Return a 32-bit hash of an aligned buffer.
 
-   Both `buf` and `len` must be evenly divisible by 8 (64 bits).
+   Both the buffer and size must be aligned to 32 bits.  For data that fits
+   these requirements, this is equivalent to, but faster than, zix_digest32().
 */
 ZIX_PURE_API
 uint32_t
-zix_digest_add_64(uint32_t hash, const void* buf, size_t len);
+zix_digest32_aligned(uint32_t seed, const void* buf, size_t len);
 
 /**
-   Update `hash` to include `ptr`.
+   Return a 64-bit hash of a buffer.
 
-   This hashes the value of the pointer itself, and does not dereference `ptr`.
+   This can be used for any size or alignment.
 */
-ZIX_CONST_API
-uint32_t
-zix_digest_add_ptr(uint32_t hash, const void* ptr);
+ZIX_PURE_API
+uint64_t
+zix_digest64(uint64_t seed, const void* buf, size_t len);
+
+/**
+   Return a 64-bit hash of an aligned buffer.
+
+   Both the buffer and size must be aligned to 64 bits.  For data that fits
+   these requirements, this is equivalent to, but faster than, zix_digest64().
+*/
+ZIX_PURE_API
+uint64_t
+zix_digest64_aligned(uint64_t seed, const void* buf, size_t len);
+
+/**
+   Return a pointer-sized hash of a buffer.
+
+   This can be used for any size or alignment.
+
+   Internally, this simply dispatches to zix_digest32() or zix_digest64() as
+   appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest(size_t seed, const void* buf, size_t len);
+
+/**
+   Return a pointer-sized hash of an aligned buffer.
+
+   Both the buffer and size must be aligned to the pointer size.  For data that
+   fits these requirements, this is equivalent to, but faster than,
+   zix_digest().
+
+   Internally, this simply dispatches to zix_digest32_aligned() or
+   zix_digest64_aligned() as appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest_aligned(size_t seed, const void* buf, size_t len);
 
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/src/zix/hash.c b/src/zix/hash.c
index d9556edc..8837378e 100644
--- a/src/zix/hash.c
+++ b/src/zix/hash.c
@@ -1,5 +1,5 @@
 /*
-  Copyright 2011-2020 David Robillard <d@drobilla.net>
+  Copyright 2011-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -17,214 +17,332 @@
 #include "zix/hash.h"
 
 #include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdlib.h>
-#include <string.h>
-
-/**
-   Primes, each slightly less than twice its predecessor, and as far away
-   from powers of two as possible.
-*/
-static const unsigned sizes[] = {
-  53,        97,        193,       389,       769,        1543,     3079,
-  6151,      12289,     24593,     49157,     98317,      196613,   393241,
-  786433,    1572869,   3145739,   6291469,   12582917,   25165843, 50331653,
-  100663319, 201326611, 402653189, 805306457, 1610612741, 0};
 
 typedef struct ZixHashEntry {
-  struct ZixHashEntry* next; ///< Next entry in bucket
-  uint32_t             hash; ///< Non-modulo hash value
-                             // Value follows here (access with zix_hash_value)
+  ZixHashCode    hash;  ///< Non-folded hash value
+  ZixHashRecord* value; ///< Pointer to user-owned record
 } ZixHashEntry;
 
 struct ZixHashImpl {
-  ZixHashFunc     hash_func;
-  ZixEqualFunc    equal_func;
-  ZixHashEntry**  buckets;
-  const unsigned* n_buckets;
-  size_t          value_size;
-  unsigned        count;
+  ZixKeyFunc      key_func;   ///< User-provided key accessor
+  ZixHashFunc     hash_func;  ///< User-provided hashing function
+  ZixKeyEqualFunc equal_func; ///< User-provided equality comparison function
+  size_t          count;      ///< Number of records stored in the table
+  size_t          mask;       ///< Bit mask for fast modulo (n_entries - 1)
+  size_t          n_entries;  ///< Power of two table size
+  ZixHashEntry*   entries;    ///< Pointer to dynamically allocated table
 };
 
-static inline void*
-zix_hash_value(ZixHashEntry* entry)
-{
-  return entry + 1;
-}
+static const size_t min_n_entries = 4u;
+static const size_t tombstone     = 0xDEADu;
 
 ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size)
+zix_hash_new(const ZixKeyFunc      key_func,
+             const ZixHashFunc     hash_func,
+             const ZixKeyEqualFunc equal_func)
 {
-  ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash));
-  if (hash) {
-    hash->hash_func  = hash_func;
-    hash->equal_func = equal_func;
-    hash->n_buckets  = &sizes[0];
-    hash->value_size = value_size;
-    hash->count      = 0;
-    if (!(hash->buckets =
-            (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) {
-      free(hash);
-      return NULL;
-    }
+  ZixHash* const hash = (ZixHash*)malloc(sizeof(ZixHash));
+  if (!hash) {
+    return NULL;
   }
+
+  hash->key_func   = key_func;
+  hash->hash_func  = hash_func;
+  hash->equal_func = equal_func;
+  hash->count      = 0u;
+  hash->n_entries  = min_n_entries;
+  hash->mask       = hash->n_entries - 1u;
+
+  hash->entries = (ZixHashEntry*)calloc(hash->n_entries, sizeof(ZixHashEntry));
+  if (!hash->entries) {
+    free(hash);
+    return NULL;
+  }
+
   return hash;
 }
 
 void
-zix_hash_free(ZixHash* hash)
+zix_hash_free(ZixHash* const hash)
 {
-  if (!hash) {
-    return;
+  if (hash) {
+    free(hash->entries);
+    free(hash);
   }
+}
 
-  for (unsigned b = 0; b < *hash->n_buckets; ++b) {
-    ZixHashEntry* bucket = hash->buckets[b];
-    for (ZixHashEntry* e = bucket; e;) {
-      ZixHashEntry* next = e->next;
-      free(e);
-      e = next;
-    }
-  }
+ZixHashIter
+zix_hash_begin(const ZixHash* const hash)
+{
+  return hash->entries[0u].value ? 0u : zix_hash_next(hash, 0u);
+}
 
-  free(hash->buckets);
-  free(hash);
+ZixHashIter
+zix_hash_end(const ZixHash* const hash)
+{
+  return hash->n_entries;
+}
+
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, const ZixHashIter i)
+{
+  assert(i < hash->n_entries);
+
+  return hash->entries[i].value;
+}
+
+ZixHashIter
+zix_hash_next(const ZixHash* const hash, ZixHashIter i)
+{
+  do {
+    ++i;
+  } while (i < hash->n_entries && !hash->entries[i].value);
+
+  return i;
 }
 
 size_t
-zix_hash_size(const ZixHash* hash)
+zix_hash_size(const ZixHash* const hash)
 {
   return hash->count;
 }
 
-static inline void
-insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry)
+static inline size_t
+fold_hash(const ZixHashCode h_nomod, const size_t mask)
+{
+  return h_nomod & mask;
+}
+
+static inline bool
+is_tombstone(const ZixHashEntry* const entry)
+{
+  return !entry->value && entry->hash == tombstone;
+}
+
+static inline bool
+is_empty(const ZixHashEntry* const entry)
 {
-  entry->next = *bucket;
-  *bucket     = entry;
+  return !entry->value && !entry->hash;
 }
 
-static inline ZixStatus
-rehash(ZixHash* hash, unsigned new_n_buckets)
+static inline bool
+is_match(const ZixHash* const hash,
+         const ZixHashCode    code,
+         const size_t         entry_index,
+         ZixKeyEqualFunc      predicate,
+         const void* const    user_data)
 {
-  ZixHashEntry** new_buckets =
-    (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*));
-  if (!new_buckets) {
+  const ZixHashEntry* const entry = &hash->entries[entry_index];
+
+  return entry->value && entry->hash == code &&
+         predicate(hash->key_func(entry->value), user_data);
+}
+
+static inline size_t
+next_index(const ZixHash* const hash, const size_t i)
+{
+  return (i == hash->mask) ? 0u : (i + 1u);
+}
+
+static inline ZixHashIter
+find_entry(const ZixHash* const    hash,
+           const ZixHashKey* const key,
+           const size_t            h,
+           const ZixHashCode       code)
+{
+  size_t i = h;
+
+  while (!is_match(hash, code, i, hash->equal_func, key) &&
+         !is_empty(&hash->entries[i])) {
+    i = next_index(hash, i);
+  }
+
+  return i;
+}
+
+static ZixStatus
+rehash(ZixHash* const hash, const size_t old_n_entries)
+{
+  ZixHashEntry* const old_entries   = hash->entries;
+  const size_t        new_n_entries = hash->n_entries;
+
+  // Replace the array in the hash first so we can use find_entry() normally
+  hash->entries = (ZixHashEntry*)calloc(new_n_entries, sizeof(ZixHashEntry));
+  if (!hash->entries) {
     return ZIX_STATUS_NO_MEM;
   }
 
-  const unsigned old_n_buckets = *hash->n_buckets;
-  for (unsigned b = 0; b < old_n_buckets; ++b) {
-    for (ZixHashEntry* e = hash->buckets[b]; e;) {
-      ZixHashEntry* const next = e->next;
-      const unsigned      h    = e->hash % new_n_buckets;
-      insert_entry(&new_buckets[h], e);
-      e = next;
+  // Reinsert every element into the new array
+  for (size_t i = 0u; i < old_n_entries; ++i) {
+    ZixHashEntry* const entry = &old_entries[i];
+
+    if (entry->value) {
+      assert(hash->mask == hash->n_entries - 1u);
+      const size_t new_h = fold_hash(entry->hash, hash->mask);
+      const size_t new_i = find_entry(hash, entry->value, new_h, entry->hash);
+
+      hash->entries[new_i] = *entry;
     }
   }
 
-  free(hash->buckets);
-  hash->buckets = new_buckets;
+  free(old_entries);
+  return ZIX_STATUS_SUCCESS;
+}
+
+static ZixStatus
+grow(ZixHash* const hash)
+{
+  const size_t old_n_entries = hash->n_entries;
+
+  hash->n_entries <<= 1u;
+  hash->mask = hash->n_entries - 1u;
+
+  return rehash(hash, old_n_entries);
+}
+
+static ZixStatus
+shrink(ZixHash* const hash)
+{
+  if (hash->n_entries > min_n_entries) {
+    const size_t old_n_entries = hash->n_entries;
+
+    hash->n_entries >>= 1u;
+    hash->mask = hash->n_entries - 1u;
+
+    return rehash(hash, old_n_entries);
+  }
 
   return ZIX_STATUS_SUCCESS;
 }
 
-static inline ZixHashEntry*
-find_entry(const ZixHash* hash,
-           const void*    key,
-           const unsigned h,
-           const unsigned h_nomod)
+ZixHashIter
+zix_hash_find(const ZixHash* const hash, const ZixHashKey* const key)
 {
-  for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
-    if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) {
-      return e;
+  const ZixHashCode h_nomod = hash->hash_func(key);
+  const size_t      h       = fold_hash(h_nomod, hash->mask);
+  const ZixHashIter i       = find_entry(hash, key, h, h_nomod);
+
+  return is_empty(&hash->entries[i]) ? hash->n_entries : i;
+}
+
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* const hash, const ZixHashKey* const key)
+{
+  const ZixHashCode h_nomod = hash->hash_func(key);
+  const size_t      h       = fold_hash(h_nomod, hash->mask);
+
+  return hash->entries[find_entry(hash, key, h, h_nomod)].value;
+}
+
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* const  hash,
+                               const ZixHashCode     code,
+                               const ZixKeyMatchFunc predicate,
+                               const void* const     user_data)
+{
+  // Calculate an ideal initial position
+  ZixHashInsertPlan pos = {code, fold_hash(code, hash->mask)};
+
+  // Search for a free position starting at the ideal one
+  const size_t start_index     = pos.index;
+  size_t       first_tombstone = SIZE_MAX;
+  while (!is_empty(&hash->entries[pos.index])) {
+    if (is_match(hash, code, pos.index, predicate, user_data)) {
+      return pos;
+    }
+
+    if (first_tombstone == SIZE_MAX &&
+        is_tombstone(&hash->entries[pos.index])) {
+      first_tombstone = pos.index; // Remember the first/best free index
+    }
+
+    if ((pos.index = next_index(hash, pos.index)) == start_index) {
+      break;
     }
   }
-  return NULL;
+
+  // If we found a tombstone before an empty slot, place the new element there
+  pos.index = first_tombstone < SIZE_MAX ? first_tombstone : pos.index;
+  assert(!hash->entries[pos.index].value);
+
+  return pos;
 }
 
-void*
-zix_hash_find(const ZixHash* hash, const void* value)
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* const hash, const ZixHashKey* const key)
 {
-  const unsigned      h_nomod = hash->hash_func(value);
-  const unsigned      h       = h_nomod % *hash->n_buckets;
-  ZixHashEntry* const entry   = find_entry(hash, value, h, h_nomod);
-  return entry ? zix_hash_value(entry) : 0;
+  return zix_hash_plan_insert_prehashed(
+    hash, hash->hash_func(key), hash->equal_func, key);
 }
 
-ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted)
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* const hash, const ZixHashInsertPlan position)
 {
-  unsigned h_nomod = hash->hash_func(value);
-  unsigned h       = h_nomod % *hash->n_buckets;
+  return hash->entries[position.index].value;
+}
 
-  ZixHashEntry* elem = find_entry(hash, value, h, h_nomod);
-  if (elem) {
-    assert(elem->hash == h_nomod);
-    if (inserted) {
-      *inserted = zix_hash_value(elem);
-    }
+ZixStatus
+zix_hash_insert_at(ZixHash* const          hash,
+                   const ZixHashInsertPlan position,
+                   ZixHashRecord* const    record)
+{
+  if (hash->entries[position.index].value) {
     return ZIX_STATUS_EXISTS;
   }
 
-  elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size);
-  if (!elem) {
-    return ZIX_STATUS_NO_MEM;
-  }
-  elem->next = NULL;
-  elem->hash = h_nomod;
-  memcpy(elem + 1, value, hash->value_size);
-
-  const unsigned next_n_buckets = *(hash->n_buckets + 1);
-  if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) {
-    if (!rehash(hash, next_n_buckets)) {
-      h = h_nomod % *(++hash->n_buckets);
-    }
-  }
+  // Set entry to new value
+  ZixHashEntry* const entry = &hash->entries[position.index];
+  assert(!entry->value);
+  entry->hash  = position.code;
+  entry->value = record;
 
-  insert_entry(&hash->buckets[h], elem);
-  ++hash->count;
-  if (inserted) {
-    *inserted = zix_hash_value(elem);
+  // Update size and rehash if we exceeded the maximum load
+  const size_t max_load = hash->n_entries / 2u + hash->n_entries / 8u;
+  if (++hash->count >= max_load) {
+    return grow(hash);
   }
+
   return ZIX_STATUS_SUCCESS;
 }
 
 ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value)
-{
-  const unsigned h_nomod = hash->hash_func(value);
-  const unsigned h       = h_nomod % *hash->n_buckets;
-
-  ZixHashEntry** next_ptr = &hash->buckets[h];
-  for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
-    if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) {
-      *next_ptr = e->next;
-      free(e);
-      --hash->count;
-      return ZIX_STATUS_SUCCESS;
-    }
-    next_ptr = &e->next;
-  }
+zix_hash_insert(ZixHash* const hash, ZixHashRecord* const record)
+{
+  const ZixHashKey* const key      = hash->key_func(record);
+  const ZixHashInsertPlan position = zix_hash_plan_insert(hash, key);
 
-  if (hash->n_buckets != sizes) {
-    const unsigned prev_n_buckets = *(hash->n_buckets - 1);
-    if (hash->count - 1 <= prev_n_buckets) {
-      if (!rehash(hash, prev_n_buckets)) {
-        --hash->n_buckets;
-      }
-    }
+  return zix_hash_insert_at(hash, position, record);
+}
+
+ZixStatus
+zix_hash_erase(ZixHash* const        hash,
+               const ZixHashIter     i,
+               ZixHashRecord** const removed)
+{
+  // Replace entry with a tombstone
+  *removed               = hash->entries[i].value;
+  hash->entries[i].hash  = tombstone;
+  hash->entries[i].value = NULL;
+
+  // Decrease element count and rehash if necessary
+  --hash->count;
+  if (hash->count < hash->n_entries / 4u) {
+    return shrink(hash);
   }
 
-  return ZIX_STATUS_NOT_FOUND;
+  return ZIX_STATUS_SUCCESS;
 }
 
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data)
+ZixStatus
+zix_hash_remove(ZixHash* const          hash,
+                const ZixHashKey* const key,
+                ZixHashRecord** const   removed)
 {
-  for (unsigned b = 0; b < *hash->n_buckets; ++b) {
-    ZixHashEntry* bucket = hash->buckets[b];
-    for (ZixHashEntry* e = bucket; e; e = e->next) {
-      f(zix_hash_value(e), user_data);
-    }
-  }
+  const ZixHashIter i = zix_hash_find(hash, key);
+
+  return i == hash->n_entries ? ZIX_STATUS_NOT_FOUND
+                              : zix_hash_erase(hash, i, removed);
 }
diff --git a/src/zix/hash.h b/src/zix/hash.h
index 39050381..f08d267b 100644
--- a/src/zix/hash.h
+++ b/src/zix/hash.h
@@ -1,5 +1,5 @@
 /*
-  Copyright 2011-2020 David Robillard <d@drobilla.net>
+  Copyright 2011-2021 David Robillard <d@drobilla.net>
 
   Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted, provided that the above
@@ -19,8 +19,8 @@
 
 #include "zix/common.h"
 
+#include <stdbool.h>
 #include <stddef.h>
-#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -33,98 +33,289 @@ extern "C" {
    @{
 */
 
+// ZIX_HASH_KEY_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_KEY_TYPE)
+typedef ZIX_HASH_KEY_TYPE ZixHashKey;
+#else
+typedef void ZixHashKey;
+#endif
+
+// ZIX_HASH_RECORD_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_RECORD_TYPE)
+typedef ZIX_HASH_RECORD_TYPE ZixHashRecord;
+#else
+typedef void ZixHashRecord;
+#endif
+
+// ZIX_HASH_SEARCH_DATA_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_SEARCH_DATA_TYPE)
+typedef ZIX_HASH_SEARCH_DATA_TYPE ZixHashSearchData;
+#else
+typedef void ZixHashSearchData;
+#endif
+
+/**
+   A hash table.
+
+   This is an open addressing hash table that stores pointers to arbitrary user
+   data.  Internally, everything is stored in a single flat array that is
+   resized when necessary.
+
+   The single user-provided pointer that is stored in the table is called a
+   "record".  A record contains a "key", which is accessed via a user-provided
+   function.  This design supports storing large records (which may be
+   expensive to allocate/construct) without requiring an entire record to
+   search.  Simple atomic values can be stored by providing a trivial identity
+   function as a key function.
+
+   The table uses power of 2 sizes with a growth factor of 2, so that hash
+   values can be folded into an array index using bitwise AND as a fast modulo.
+   This means that only the necessary low bits of the hash value will be used,
+   so the hash function must be well-balanced within this range.  More or less
+   any good modern hash algorithm will be fine, but beware, for example, hash
+   functions that assume they are targeting a table with a prime size.
+
+   Since this doubles and halves in size, it may not be an optimal choice if
+   memory reuse is a priority.  A growth factor of 1.5 with fast range
+   reduction may be a better choice there, at the cost of requiring 128-bit
+   arithmetic on 64-bit platforms, and indexing operations being slightly more
+   expensive.
+*/
 typedef struct ZixHashImpl ZixHash;
 
+/// A full hash code for a key which is not folded down to the table size
+typedef size_t ZixHashCode;
+
 /**
-   Function for computing the hash of an element.
+   An iterator to an entry in a hash table.
+
+   This is really just an index, but should be considered opaque to the user
+   and only used via the provided API and equality comparison.
 */
-typedef uint32_t (*ZixHashFunc)(const void* value);
+typedef size_t ZixHashIter;
+
+/// User function for computing the hash of a key
+typedef ZixHashCode (*ZixHashFunc)(const ZixHashKey* key);
+
+/// User function for determining if two keys are truly equal
+typedef bool (*ZixKeyEqualFunc)(const ZixHashKey* a, const ZixHashKey* b);
+
+/// User function for determining if a key matches in a custom search
+typedef bool (*ZixKeyMatchFunc)(const ZixHashKey*        key,
+                                const ZixHashSearchData* user_data);
+
+/// User function for getting the key of a record
+typedef const ZixHashKey* (*ZixKeyFunc)(const ZixHashRecord* record);
 
 /**
-   Function to visit a hash element.
+   A "plan" (position) to insert a record in a hash table.
+
+   This contains everything necessary to insert a record, except the record
+   itself.  It is exposed to split up insertion so that records only need to be
+   allocated if an existing record is not found, but the contents should be
+   considered opaque to the user.
 */
-typedef void (*ZixHashVisitFunc)(void* value, void* user_data);
+typedef struct {
+  ZixHashCode code;  ///< Hash code used to find this position
+  ZixHashIter index; ///< Index into hash table
+} ZixHashInsertPlan;
 
 /**
    Create a new hash table.
 
-   To minimize space overhead, unlike many hash tables this stores a single
-   value, not a key and a value.  Any size of value can be stored, but all the
-   values in the hash table must be the same size, and the values must be safe
-   to copy with memcpy.  To get key:value behaviour, simply insert a struct
-   with a key and value into the hash.
-
-   @param hash_func The hashing function.
-   @param equal_func A function to test value equality.
-   @param value_size The size of the values to be stored.
+   @param key_func A function to retrieve the key from a record.
+   @param hash_func The key hashing function.
+   @param equal_func A function to test keys for equality.
 */
 ZIX_API
 ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size);
+zix_hash_new(ZixKeyFunc      key_func,
+             ZixHashFunc     hash_func,
+             ZixKeyEqualFunc equal_func);
 
-/**
-   Free `hash`.
-*/
+/// Free `hash`
 ZIX_API
 void
 zix_hash_free(ZixHash* hash);
 
-/**
-   Return the number of elements in `hash`.
-*/
+/// Return an iterator to the first record in a hash, or the end if it is empty
+ZIX_PURE_API
+ZixHashIter
+zix_hash_begin(const ZixHash* hash);
+
+/// Return an iterator one past the last possible record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_end(const ZixHash* hash);
+
+/// Return the record pointed to by an iterator
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, ZixHashIter i);
+
+/// Return an iterator that has been advanced to the next record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_next(const ZixHash* hash, ZixHashIter i);
+
+/// Return the number of elements in a hash
 ZIX_PURE_API
 size_t
 zix_hash_size(const ZixHash* hash);
 
 /**
-   Insert an item into `hash`.
+   Find the best position to insert a record with the given key.
+
+   This searches the hash table and returns either the position of an existing
+   equivalent record, or the best available position to insert a new one.  The
+   difference can be determined with zix_hash_record_at().
+
+   If the returned position is not occupied, then it is valid to insert a
+   record with this key using zix_hash_insert_at() until the hash table is
+   modified (which invalidates the position).
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* hash, const ZixHashKey* key);
+
+/**
+   Find the best position to insert a record with a custom search.
 
-   If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p
-   inserted will be pointed to the copy of `value` made in the new hash node.
+   This is an advanced low-level version of zix_hash_plan_insert().  It allows
+   a precalculated hash code to be given, along with a custom search predicate.
+   These must be compatible with the functions that manage the hash table:
+   trivial usage would be to pass the equality function used by the hash and
+   the key to search for.
 
-   If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and
-   `inserted` will be pointed to the existing value.
+   This is exposed to make it possible to avoid constructing a key at all when
+   potentially inserting.  For example, if keys are structures with a fixed
+   header and a variably-sized body, and you have a separate header and body
+   this can be used to find an insert position without having to allocate a
+   contiguous key.
+
+   Note that care must be taken when using this function: improper use can
+   corrupt the hash table.  The hash code given must be correct for the key to
+   be inserted, and the predicate must return true only if the key it is called
+   with (the first argument) matches the key to be inserted.
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash*           hash,
+                               ZixHashCode              code,
+                               ZixKeyMatchFunc          predicate,
+                               const ZixHashSearchData* user_data);
+
+/**
+   Return the record at the given position, or null.
+
+   This is used to determine if a position returned from zix_hash_plan_insert()
+   can be used to insert a new record, or to access the existing matching
+   record.
+*/
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* hash, ZixHashInsertPlan position);
+
+/**
+   Insert a record at a specific position.
+
+   The position must have been found with an earlier call to
+   zix_hash_plan_insert(), and no modifications must have been made to the hash
+   table since.
+
+   @param hash The hash table.
+
+   @param position The position for the new record.
+
+   @param record The record to insert which, on success, can now be considered
+   owned by the hash table.
+
+   @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS if a record already exists at
+   this position, or ZIX_STATUS_NO_MEM if growing the hash table failed.
+*/
+ZIX_API
+ZixStatus
+zix_hash_insert_at(ZixHash*          hash,
+                   ZixHashInsertPlan position,
+                   ZixHashRecord*    record);
+
+/**
+   Insert a record.
+
+   This is a trivial wrapper for zix_hash_plan_insert() and
+   zix_hash_insert_at() that is more convenient when record construction is not
+   expensive.
 
    @param hash The hash table.
-   @param value The value to be inserted.
-   @param inserted The copy of `value` in the hash table.
+
+   @param record The record to insert which, on success, can now be considered
+   owned by the hash table.
+
    @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM.
 */
 ZIX_API
 ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted);
+zix_hash_insert(ZixHash* hash, ZixHashRecord* record);
 
 /**
-   Remove an item from `hash`.
+   Erase a record at a specific position.
+
+   @param hash The hash table to remove the record from.
+
+   @param i Iterator to the record to remove.  This must be a valid iterator
+   from an earlier call to zix_hash_find(), that is, the hash table must not
+   have been modified since.
+
+   @param removed Set to the removed record, or null.
+
+   @return ZIX_STATUS_SUCCES or ZIX_STATUS_BAD_ARG if `i` does not point at a
+   removable record.
+*/
+ZIX_API
+ZixStatus
+zix_hash_erase(ZixHash* hash, ZixHashIter i, ZixHashRecord** removed);
+
+/**
+   Remove a record.
 
    @param hash The hash table.
-   @param value The value to remove.
+   @param key The key of the record to remove.
+   @param removed Set to the removed record, or null.
    @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND.
 */
 ZIX_API
 ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value);
+zix_hash_remove(ZixHash* hash, const ZixHashKey* key, ZixHashRecord** removed);
 
 /**
-   Search for an item in `hash`.
+   Find the position of a record with a given key.
 
-   @param hash The hash table.
-   @param value The value to search for.
+   @param hash The hash table to search.
+
+   @param key The key of the desired record.
+
+   @return An iterator to the matching record, or the end iterator if no such
+   record exists.
 */
 ZIX_API
-void*
-zix_hash_find(const ZixHash* hash, const void* value);
+ZixHashIter
+zix_hash_find(const ZixHash* hash, const ZixHashKey* key);
 
 /**
-   Call `f` on each value in `hash`.
+   Find a record with a given key.
 
-   @param hash The hash table.
-   @param f The function to call on each value.
-   @param user_data The user_data parameter passed to `f`.
+   This is essentially the same as zix_hash_find(), but returns a pointer to
+   the record for convenience.
+
+   @param hash The hash table to search.
+
+   @param key The key of the desired record.
+
+   @return A pointer to the matching record, of null if no such record exists.
 */
 ZIX_API
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data);
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* hash, const ZixHashKey* key);
 
 /**
    @}
diff --git a/test/test_nodes.c b/test/test_nodes.c
index 74c99c11..c841acc4 100644
--- a/test/test_nodes.c
+++ b/test/test_nodes.c
@@ -19,9 +19,12 @@
 #include "serd/serd.h"
 
 #include <assert.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 
+#define NS_XSD "http://www.w3.org/2001/XMLSchema#"
+
 static void
 test_intern(void)
 {
@@ -45,24 +48,6 @@ test_intern(void)
 }
 
 static void
-test_manage(void)
-{
-  SerdNodes* nodes = serd_nodes_new();
-  SerdNode*  node  = serd_new_string(SERD_STRING("node"));
-
-  assert(!serd_nodes_manage(nodes, NULL));
-
-  const SerdNode* managed1 = serd_nodes_manage(nodes, node);
-  assert(managed1 == node);
-
-  SerdNode*       equal    = serd_new_string(SERD_STRING("node"));
-  const SerdNode* managed2 = serd_nodes_manage(nodes, equal);
-  assert(managed2 == node);
-
-  serd_nodes_free(nodes);
-}
-
-static void
 test_string(void)
 {
   static const SerdStringView string = SERD_STRING("string");
@@ -81,6 +66,29 @@ test_string(void)
 }
 
 static void
+test_invalid_literal(void)
+{
+  SerdNodes* const nodes = serd_nodes_new();
+
+  assert(!serd_nodes_literal(nodes,
+                             SERD_STRING("double meta"),
+                             SERD_HAS_LANGUAGE | SERD_HAS_DATATYPE,
+                             SERD_STRING("What am I?")));
+
+  assert(!serd_nodes_literal(nodes,
+                             SERD_STRING("empty language"),
+                             SERD_HAS_LANGUAGE,
+                             SERD_EMPTY_STRING()));
+
+  assert(!serd_nodes_literal(nodes,
+                             SERD_STRING("empty datatype"),
+                             SERD_HAS_DATATYPE,
+                             SERD_EMPTY_STRING()));
+
+  serd_nodes_free(nodes);
+}
+
+static void
 test_plain_literal(void)
 {
   static const SerdStringView string   = SERD_STRING("string");
@@ -102,6 +110,24 @@ test_plain_literal(void)
   assert(serd_node_length(node) == string.len);
   assert(!strcmp(serd_node_string(node), string.buf));
 
+  const SerdNode* const alias =
+    serd_nodes_literal(nodes, string, SERD_HAS_LANGUAGE, language);
+
+  assert(alias == node);
+
+  const SerdNode* const other =
+    serd_nodes_literal(nodes, string, SERD_HAS_LANGUAGE, SERD_STRING("de"));
+
+  assert(other != node);
+
+  const SerdNode* const other_language_node = serd_node_language(other);
+  assert(other_language_node);
+  assert(serd_node_type(other_language_node) == SERD_LITERAL);
+  assert(serd_node_length(other_language_node) == 2);
+  assert(!strcmp(serd_node_string(other_language_node), "de"));
+  assert(serd_node_length(other) == string.len);
+  assert(!strcmp(serd_node_string(other), string.buf));
+
   serd_nodes_free(nodes);
 }
 
@@ -128,6 +154,152 @@ test_typed_literal(void)
   assert(serd_node_length(node) == string.len);
   assert(!strcmp(serd_node_string(node), string.buf));
 
+  const SerdNode* const alias =
+    serd_nodes_literal(nodes, string, SERD_HAS_DATATYPE, datatype);
+
+  assert(alias == node);
+
+  serd_nodes_free(nodes);
+}
+
+static void
+test_boolean(void)
+{
+  SerdNodes* const nodes = serd_nodes_new();
+
+  const SerdNode* const false1 = serd_nodes_boolean(nodes, false);
+  const SerdNode* const false2 = serd_nodes_boolean(nodes, false);
+  const SerdNode* const true1  = serd_nodes_boolean(nodes, true);
+  const SerdNode* const true2  = serd_nodes_boolean(nodes, true);
+
+  assert(false1 == false2);
+  assert(true1 == true2);
+  assert(false1 != true1);
+  assert(!strcmp(serd_node_string(false1), "false"));
+  assert(!strcmp(serd_node_string(true1), "true"));
+  assert(serd_node_length(false1) == strlen(serd_node_string(false1)));
+  assert(serd_node_length(true1) == strlen(serd_node_string(true1)));
+  assert(serd_nodes_size(nodes) == 2);
+
+  const SerdNode* const true_datatype = serd_node_datatype(true1);
+  assert(true_datatype);
+  assert(!strcmp(serd_node_string(true_datatype), NS_XSD "boolean"));
+
+  const SerdNode* const false_datatype = serd_node_datatype(false1);
+  assert(false_datatype);
+  assert(!strcmp(serd_node_string(false_datatype), NS_XSD "boolean"));
+
+  serd_nodes_free(nodes);
+}
+
+static void
+test_decimal(void)
+{
+  SerdNodes* const      nodes = serd_nodes_new();
+  const SerdNode* const a     = serd_nodes_decimal(nodes, -12.3456789);
+  const SerdNode* const b     = serd_nodes_decimal(nodes, -12.3456789);
+
+  assert(a == b);
+  assert(!strcmp(serd_node_string(a), "-12.3456789"));
+  assert(serd_node_length(a) == strlen(serd_node_string(a)));
+  assert(serd_nodes_size(nodes) == 1);
+
+  const SerdNode* const default_datatype = serd_node_datatype(a);
+  assert(default_datatype);
+  assert(!strcmp(serd_node_string(default_datatype), NS_XSD "decimal"));
+
+  serd_nodes_free(nodes);
+}
+
+static void
+test_double(void)
+{
+  SerdNodes* const nodes = serd_nodes_new();
+
+  const SerdNode* const a = serd_nodes_double(nodes, -1.2E3);
+  const SerdNode* const b = serd_nodes_double(nodes, -1.2E3);
+
+  assert(a == b);
+  assert(!strcmp(serd_node_string(a), "-1.2E3"));
+  assert(serd_node_length(a) == strlen(serd_node_string(a)));
+  assert(serd_nodes_size(nodes) == 1);
+
+  serd_nodes_free(nodes);
+}
+
+static void
+test_float(void)
+{
+  SerdNodes* const nodes = serd_nodes_new();
+
+  const SerdNode* const a = serd_nodes_float(nodes, -1.2E3f);
+  const SerdNode* const b = serd_nodes_float(nodes, -1.2E3f);
+
+  assert(a == b);
+  assert(!strcmp(serd_node_string(a), "-1.2E3"));
+  assert(serd_node_length(a) == strlen(serd_node_string(a)));
+  assert(serd_nodes_size(nodes) == 1);
+
+  serd_nodes_free(nodes);
+}
+
+static void
+test_integer(void)
+{
+  SerdNodes* const nodes = serd_nodes_new();
+
+  const SerdNode* const a =
+    serd_nodes_integer(nodes, -1234567890, SERD_EMPTY_STRING());
+
+  const SerdNode* const b =
+    serd_nodes_integer(nodes, -1234567890, SERD_EMPTY_STRING());
+
+  assert(a == b);
+  assert(!strcmp(serd_node_string(a), "-1234567890"));
+  assert(serd_node_length(a) == strlen(serd_node_string(a)));
+  assert(serd_nodes_size(nodes) == 1);
+
+  const SerdNode* const default_datatype = serd_node_datatype(a);
+  assert(default_datatype);
+  assert(!strcmp(serd_node_string(default_datatype), NS_XSD "integer"));
+
+  serd_nodes_free(nodes);
+}
+
+static void
+test_base64(void)
+{
+  static const char data[] = {'f', 'o', 'o', 'b', 'a', 'r'};
+
+  SerdNodes* const nodes = serd_nodes_new();
+
+  const SerdNode* const a =
+    serd_nodes_base64(nodes, &data, sizeof(data), SERD_EMPTY_STRING());
+
+  const SerdNode* const b =
+    serd_nodes_base64(nodes, &data, sizeof(data), SERD_EMPTY_STRING());
+
+  assert(a == b);
+  assert(!strcmp(serd_node_string(a), "Zm9vYmFy"));
+  assert(serd_node_length(a) == strlen(serd_node_string(a)));
+  assert(serd_nodes_size(nodes) == 1);
+
+  const SerdNode* const default_datatype = serd_node_datatype(a);
+  assert(default_datatype);
+  assert(!strcmp(serd_node_string(default_datatype), NS_XSD "base64Binary"));
+
+  const SerdStringView user_datatype =
+    SERD_STRING("http://example.org/UserDatatype");
+
+  const SerdNode* const custom =
+    serd_nodes_base64(nodes, &data, sizeof(data), user_datatype);
+  assert(custom);
+  assert(!strcmp(serd_node_string(custom), "Zm9vYmFy"));
+
+  const SerdNode* const custom_datatype = serd_node_datatype(custom);
+  assert(custom_datatype);
+  assert(!strcmp(serd_node_string(custom_datatype), user_datatype.buf));
+
   serd_nodes_free(nodes);
 }
 
@@ -150,6 +322,34 @@ test_uri(void)
 }
 
 static void
+test_parsed_uri(void)
+{
+  static const SerdStringView string = SERD_STRING("http://example.org/");
+
+  SerdNodes* const      nodes = serd_nodes_new();
+  const SerdURIView     uri   = serd_parse_uri(string.buf);
+  const SerdNode* const node  = serd_nodes_parsed_uri(nodes, uri);
+
+  assert(node);
+  assert(serd_node_type(node) == SERD_URI);
+  assert(!serd_node_flags(node));
+  assert(!serd_node_datatype(node));
+  assert(!serd_node_language(node));
+  assert(serd_node_length(node) == string.len);
+  assert(!strcmp(serd_node_string(node), string.buf));
+
+  const SerdNode* const alias = serd_nodes_parsed_uri(nodes, uri);
+  assert(alias == node);
+
+  const SerdNode* const other =
+    serd_nodes_parsed_uri(nodes, serd_parse_uri("http://example.org/x"));
+
+  assert(other != node);
+
+  serd_nodes_free(nodes);
+}
+
+static void
 test_blank(void)
 {
   static const SerdStringView string = SERD_STRING("b42");
@@ -170,17 +370,40 @@ test_blank(void)
 static void
 test_deref(void)
 {
-  SerdNodes*      nodes   = serd_nodes_new();
-  const SerdNode* managed = serd_nodes_string(nodes, SERD_STRING("node"));
+  SerdNodes*      nodes    = serd_nodes_new();
+  const SerdNode* original = serd_nodes_string(nodes, SERD_STRING("node"));
+  const SerdNode* another  = serd_nodes_string(nodes, SERD_STRING("node"));
 
-  serd_nodes_deref(nodes, managed);
+  assert(original == another);
 
-  SerdNode*       node     = serd_new_string(SERD_STRING("node"));
-  const SerdNode* interned = serd_nodes_intern(nodes, node);
+  // Dereference the original and ensure the other reference kept it alive
+  serd_nodes_deref(nodes, original);
+  assert(serd_node_length(another) == 4);
+  assert(!strcmp(serd_node_string(another), "node"));
 
-  assert(interned != node);
+  // Drop the other/final reference (freeing the node)
+  serd_nodes_deref(nodes, another);
 
-  serd_node_free(node);
+  /* Intern some other irrelevant node first to (hopefully) avoid the allocator
+     just giving us back the same piece of memory. */
+
+  const SerdNode* other = serd_nodes_string(nodes, SERD_STRING("XXXX"));
+  assert(!strcmp(serd_node_string(other), "XXXX"));
+
+  // Intern a new equivalent node to the original and check that it's really new
+  const SerdNode* imposter = serd_nodes_string(nodes, SERD_STRING("node"));
+  assert(imposter != original);
+  assert(serd_node_length(imposter) == 4);
+  assert(!strcmp(serd_node_string(imposter), "node"));
+
+  // Check that dereferencing some random unknown node doesn't crash
+  SerdNode* unmanaged = serd_new_string(SERD_STRING("unmanaged"));
+  serd_nodes_deref(nodes, unmanaged);
+  serd_node_free(unmanaged);
+
+  serd_nodes_deref(nodes, NULL);
+  serd_nodes_deref(nodes, imposter);
+  serd_nodes_deref(nodes, other);
   serd_nodes_free(nodes);
 }
 
@@ -205,11 +428,18 @@ int
 main(void)
 {
   test_intern();
-  test_manage();
   test_string();
+  test_invalid_literal();
   test_plain_literal();
   test_typed_literal();
+  test_boolean();
+  test_decimal();
+  test_double();
+  test_float();
+  test_integer();
+  test_base64();
   test_uri();
+  test_parsed_uri();
   test_blank();
   test_deref();
   test_get();
author	David Robillard <d@drobilla.net>	2021-07-22 18:35:31 -0400
committer	David Robillard <d@drobilla.net>	2022-01-14 19:37:51 -0500
commit	955be33cead7596506cf86211da6b0e53827ef96 (patch)
tree	0dac1a7a31e8c50d1ba7200648d6ce402f97867d
parent	34852e8faa380f12b11522cfa998df4f260e3856 (diff)
download	serd-955be33cead7596506cf86211da6b0e53827ef96.tar.gz serd-955be33cead7596506cf86211da6b0e53827ef96.tar.bz2 serd-955be33cead7596506cf86211da6b0e53827ef96.zip