aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2021-07-22 18:35:31 -0400
committerDavid Robillard <d@drobilla.net>2022-01-14 19:37:51 -0500
commit955be33cead7596506cf86211da6b0e53827ef96 (patch)
tree0dac1a7a31e8c50d1ba7200648d6ce402f97867d
parent34852e8faa380f12b11522cfa998df4f260e3856 (diff)
downloadserd-955be33cead7596506cf86211da6b0e53827ef96.tar.gz
serd-955be33cead7596506cf86211da6b0e53827ef96.tar.bz2
serd-955be33cead7596506cf86211da6b0e53827ef96.zip
Avoid dynamic allocation when fetching interned nodes
This is more or less a total rewrite of SerdNodes and the underlying ZixHash to make efficient use of the new node construction API.
-rw-r--r--include/serd/serd.h93
-rw-r--r--src/node_spec.h53
-rw-r--r--src/nodes.c467
-rw-r--r--src/nodes.h69
-rw-r--r--src/zix/digest.c250
-rw-r--r--src/zix/digest.h72
-rw-r--r--src/zix/hash.c408
-rw-r--r--src/zix/hash.h279
-rw-r--r--test/test_nodes.c282
9 files changed, 1582 insertions, 391 deletions
diff --git a/include/serd/serd.h b/include/serd/serd.h
index f67752a5..77986150 100644
--- a/include/serd/serd.h
+++ b/include/serd/serd.h
@@ -1223,25 +1223,33 @@ serd_nodes_intern(SerdNodes* SERD_NONNULL nodes,
const SerdNode* SERD_NULLABLE node);
/**
- Manage `node`.
+ Make a string node.
+
+ A new node will be added if an equivalent node is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_string(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
- Like `serd_nodes_intern`, but takes ownership of `node`, freeing it and
- returning a previously interned/managed equivalent node if necessary.
+/**
+ Make a URI node from a string.
- @return A node that is equivalent to `node`.
+ A new node will be constructed with serd_node_construct_token() if an
+ equivalent one is not already in the set.
*/
SERD_API
const SerdNode* SERD_ALLOCATED
-serd_nodes_manage(SerdNodes* SERD_NONNULL nodes, SerdNode* SERD_NULLABLE node);
+serd_nodes_uri(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
/**
- Make a string node.
+ Make a URI node from a parsed URI.
- A new node will be added if an equivalent node is not already in the set.
+ A new node will be constructed with serd_node_construct_uri() if an
+ equivalent one is not already in the set.
*/
SERD_API
const SerdNode* SERD_ALLOCATED
-serd_nodes_string(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
+serd_nodes_parsed_uri(SerdNodes* SERD_NONNULL nodes, SerdURIView uri);
/**
Make a literal node with optional datatype or language.
@@ -1273,18 +1281,75 @@ serd_nodes_literal(SerdNodes* SERD_NONNULL nodes,
SerdStringView meta);
/**
- Make a URI node.
+ Make a canonical xsd:boolean node.
- A new node will be added if an equivalent node is not already in the set.
+ A new node will be constructed with serd_node_construct_boolean() if an
+ equivalent one is not already in the set.
*/
SERD_API
const SerdNode* SERD_ALLOCATED
-serd_nodes_uri(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
+serd_nodes_boolean(SerdNodes* SERD_NONNULL nodes, bool value);
+
+/**
+ Make a canonical xsd:decimal node.
+
+ A new node will be constructed with serd_node_construct_decimal() if an
+ equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_decimal(SerdNodes* SERD_NONNULL nodes, double value);
+
+/**
+ Make a canonical xsd:double node.
+
+ A new node will be constructed with serd_node_construct_double() if an
+ equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_double(SerdNodes* SERD_NONNULL nodes, double value);
+
+/**
+ Make a canonical xsd:float node.
+
+ A new node will be constructed with serd_node_construct_float() if an
+ equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_float(SerdNodes* SERD_NONNULL nodes, float value);
+
+/**
+ Make a canonical xsd:integer node.
+
+ A new node will be constructed with serd_node_construct_integer() if an
+ equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_integer(SerdNodes* SERD_NONNULL nodes,
+ int64_t value,
+ SerdStringView datatype);
+
+/**
+ Make a canonical xsd:base64Binary node.
+
+ A new node will be constructed with serd_node_construct_base64() if an
+ equivalent one is not already in the set.
+*/
+SERD_API
+const SerdNode* SERD_ALLOCATED
+serd_nodes_base64(SerdNodes* SERD_NONNULL nodes,
+ const void* SERD_NONNULL value,
+ size_t value_size,
+ SerdStringView datatype);
/**
Make a blank node.
- A new node will be added if an equivalent node is not already in the set.
+ A new node will be constructed with serd_node_construct_token() if an
+ equivalent one is not already in the set.
*/
SERD_API
const SerdNode* SERD_ALLOCATED
@@ -1299,8 +1364,8 @@ serd_nodes_blank(SerdNodes* SERD_NONNULL nodes, SerdStringView string);
*/
SERD_API
void
-serd_nodes_deref(SerdNodes* SERD_NONNULL nodes,
- const SerdNode* SERD_NONNULL node);
+serd_nodes_deref(SerdNodes* SERD_NONNULL nodes,
+ const SerdNode* SERD_NULLABLE node);
/**
@}
diff --git a/src/node_spec.h b/src/node_spec.h
new file mode 100644
index 00000000..fcf19712
--- /dev/null
+++ b/src/node_spec.h
@@ -0,0 +1,53 @@
+/*
+ Copyright 2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef SERD_NODE_SPEC_H
+#define SERD_NODE_SPEC_H
+
+#include "serd/serd.h"
+
+/**
+ A lightweight "specification" of a node.
+
+ This is essentially the arguments needed to construct any node combined into
+ a single structure. Since it only refers to strings elsewhere, it is
+ convenient as a way to completely specify a node without having to actually
+ allocate one.
+*/
+typedef struct {
+ SerdNodeType type; ///< Basic type of this node
+ SerdStringView string; ///< String contents of this node
+ SerdNodeFlags flags; ///< Additional node flags
+ SerdStringView meta; ///< String contents of datatype or language node
+} NodeSpec;
+
+static inline NodeSpec
+token_spec(const SerdNodeType type, const SerdStringView string)
+{
+ NodeSpec spec = {type, string, 0u, SERD_EMPTY_STRING()};
+ return spec;
+}
+
+static inline NodeSpec
+literal_spec(const SerdStringView string,
+ const SerdNodeFlags flags,
+ const SerdStringView meta)
+{
+ NodeSpec spec = {SERD_LITERAL, string, flags, meta};
+ return spec;
+}
+
+#endif // SERD_NODE_SPEC_H
diff --git a/src/nodes.c b/src/nodes.c
index 885731cc..a7d0ab2b 100644
--- a/src/nodes.c
+++ b/src/nodes.c
@@ -14,10 +14,18 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include "nodes.h"
+
#include "node.h"
+#include "node_spec.h"
+#include "system.h"
+
+// Define the types used in the hash interface for more type safety
+#define ZIX_HASH_KEY_TYPE SerdNode
+#define ZIX_HASH_RECORD_TYPE NodesEntry
+#define ZIX_HASH_SEARCH_DATA_TYPE NodeSpec
#include "serd/serd.h"
-#include "zix/common.h"
#include "zix/digest.h"
#include "zix/hash.h"
@@ -27,53 +35,175 @@
#include <stdlib.h>
#include <string.h>
-typedef struct {
- size_t refs;
- SerdNode* node;
-} NodesEntry;
+#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \
+ (defined(__cplusplus) && __cplusplus >= 201103L))
+
+static_assert(sizeof(NodesEntryHead) == sizeof(SerdNode),
+ "NodesEntryHead must be the same size as SerdNode for alignment");
+
+#endif
+
+/*
+ The main goal here is to make getting an existing node as fast as possible,
+ so that this can be used as a convenient node cache with minimal performance
+ overhead. Mainly this means striving to avoid allocation when potentially
+ inserting a new node.
+
+ This is achieved by generating a hash code from node components without
+ actually allocating that node, and using the low-level insertion interface of
+ ZixHash to do a custom search. This way, an entry is only allocated when
+ necessary, and the hash table is only searched once.
+
+ The downside is that subtle and very bad things will happen if the hash
+ generated for the node does not match the actual hash of the node. The
+ exessive number of assertions around this are there to provide some defense
+ against such mistakes. Cave operatur.
+*/
+
+/**
+ The maximum size of a node that will be made statically on the stack.
+
+ This mostly applies to things like numeric literal nodes, where a small
+ maximum size is exploited to avoid allocation. The largest static node
+ string is the longest xsd:decimal, which is 327 bytes. We need a bit more
+ space than that here for the node header, padding, and datatype.
+*/
+#define MAX_STATIC_NODE_SIZE 384
typedef struct {
- size_t refs;
- const SerdNode* node;
-} NodesSearchKey;
+ SerdNode node;
+ char body[MAX_STATIC_NODE_SIZE];
+} StaticNode;
struct SerdNodesImpl {
ZixHash* hash;
};
-static uint32_t
-nodes_hash(const void* n)
+static const StaticNode empty_static_node = {{0u, 0u, SERD_LITERAL}, {'\0'}};
+
+static const SerdNode*
+nodes_key(const NodesEntry* const entry)
+{
+ return &entry->node;
+}
+
+static ZixHashCode
+token_hash(const ZixHashCode seed,
+ const SerdNodeType type,
+ const SerdStringView string)
{
- const SerdNode* node = ((const NodesEntry*)n)->node;
+ const SerdNode node_header = {string.len, 0u, type};
+ ZixHashCode h = seed;
- return zix_digest_add(zix_digest_start(), node, serd_node_total_size(node));
+ h = zix_digest_aligned(h, &node_header, sizeof(node_header));
+ h = zix_digest(h, string.buf, string.len);
+ return h;
+}
+
+static ZixHashCode
+spec_hash(const NodeSpec spec)
+{
+ ZixHashCode h = token_hash(0u, spec.type, spec.string);
+
+ if (spec.flags & SERD_HAS_DATATYPE) {
+ h = token_hash(h, SERD_URI, spec.meta);
+ } else if (spec.flags & SERD_HAS_LANGUAGE) {
+ h = token_hash(h, SERD_LITERAL, spec.meta);
+ }
+
+ return h;
+}
+
+static ZixHashCode
+nodes_hash(const SerdNode* const node)
+{
+ /* If you're thinking "I know, I'll make this faster by just hashing the
+ entire node in one call!", think harder. Since zero bytes affect the hash
+ value, that would mean that the hash code for the node won't match the one
+ for the node spec, and the above-mentioned subtle and very bad things will
+ happen. This function deliberately mirrors spec_hash() above to make it
+ relatively easy to see that the two will match.
+
+ It would be more elegant to construct a spec from the node and then hash
+ that with the exact same code used for searching, but there's currently no
+ use for that anywhere else and this way is a bit faster. */
+
+ ZixHashCode h = token_hash(0u, node->type, serd_node_string_view(node));
+
+ if (node->flags & SERD_HAS_DATATYPE) {
+ h = token_hash(h, SERD_URI, serd_node_string_view(serd_node_meta_c(node)));
+ } else if (node->flags & SERD_HAS_LANGUAGE) {
+ h = token_hash(
+ h, SERD_LITERAL, serd_node_string_view(serd_node_meta_c(node)));
+ }
+
+ return h;
}
static bool
-nodes_equal(const void* a, const void* b)
+node_equals_spec(const SerdNode* const node, const NodeSpec* const spec)
{
- const SerdNode* a_node = ((const NodesEntry*)a)->node;
- const SerdNode* b_node = ((const NodesEntry*)b)->node;
- const size_t a_size = serd_node_total_size(a_node);
- const size_t b_size = serd_node_total_size(b_node);
- return ((a_node == b_node) ||
- (a_size == b_size && !memcmp(a_node, b_node, a_size)));
+ // Only datatype and language are relevant for equality
+ static const SerdNodeFlags flag_mask = SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE;
+
+ const SerdNodeFlags flags = spec->flags & flag_mask;
+
+ return serd_node_type(node) == spec->type &&
+ serd_node_length(node) == spec->string.len &&
+ (node->flags & flag_mask) == flags &&
+ !strcmp(serd_node_string_i(node), spec->string.buf) &&
+ (!flags ||
+ !strcmp(serd_node_string_i(serd_node_meta_c(node)), spec->meta.buf));
}
-static void
-free_entry(void* value, void* user_data)
+static bool
+nodes_equal(const SerdNode* const a, const SerdNode* const b)
{
- (void)user_data;
+ static const SerdNodeFlags meta_mask =
+ (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE);
- NodesEntry* entry = (NodesEntry*)value;
- serd_node_free(entry->node);
+ if (a == b) {
+ return true;
+ }
+
+ if (a->length != b->length || a->flags != b->flags || a->type != b->type ||
+ !!memcmp(serd_node_string_i(a), serd_node_string_i(b), a->length)) {
+ return false;
+ }
+
+ if (a->flags & meta_mask) {
+ const size_t padded_length = serd_node_pad_length(a->length);
+ const size_t offset = padded_length / sizeof(SerdNode);
+ const SerdNode* const am = a + 1u + offset;
+ const SerdNode* const bm = b + 1u + offset;
+
+ return am->length == bm->length && am->type == bm->type &&
+ !memcmp(serd_node_string_i(am), serd_node_string_i(bm), am->length);
+ }
+
+ return true;
+}
+
+static NodesEntry*
+new_entry(const size_t node_size)
+{
+ NodesEntry* const entry = (NodesEntry*)serd_calloc_aligned(
+ serd_node_align, sizeof(NodesEntryHead) + node_size);
+
+ if (entry) {
+ entry->head.refs = 1u;
+ }
+
+ return entry;
}
SerdNodes*
serd_nodes_new(void)
{
- SerdNodes* nodes = (SerdNodes*)calloc(1, sizeof(SerdNodes));
- nodes->hash = zix_hash_new(nodes_hash, nodes_equal, sizeof(NodesEntry));
+ SerdNodes* const nodes = (SerdNodes*)calloc(1, sizeof(SerdNodes));
+
+ nodes->hash = zix_hash_new(nodes_key, nodes_hash, nodes_equal);
+
return nodes;
}
@@ -81,7 +211,12 @@ void
serd_nodes_free(SerdNodes* nodes)
{
if (nodes) {
- zix_hash_foreach(nodes->hash, free_entry, nodes);
+ for (ZixHashIter i = zix_hash_begin(nodes->hash);
+ i != zix_hash_end(nodes->hash);
+ i = zix_hash_next(nodes->hash, i)) {
+ serd_free_aligned(zix_hash_get(nodes->hash, i));
+ }
+
zix_hash_free(nodes->hash);
free(nodes);
}
@@ -96,22 +231,27 @@ serd_nodes_size(const SerdNodes* nodes)
const SerdNode*
serd_nodes_intern(SerdNodes* nodes, const SerdNode* node)
{
+ assert(nodes);
if (!node) {
return NULL;
}
- NodesSearchKey key = {1, node};
- NodesEntry* inserted = NULL;
-
- const ZixStatus st = zix_hash_insert(nodes->hash, &key, (void**)&inserted);
- if (st == ZIX_STATUS_SUCCESS) {
- inserted->node = serd_node_copy(node);
- } else if (st == ZIX_STATUS_EXISTS) {
- assert(serd_node_equals(inserted->node, node));
- ++inserted->refs;
+ const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node);
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(serd_node_equals(&existing->node, node));
+ ++existing->head.refs;
+ return &existing->node;
}
- return inserted ? inserted->node : NULL;
+ const size_t node_size = serd_node_total_size(node);
+ NodesEntry* const entry = new_entry(node_size);
+
+ memcpy(&entry->node, node, node_size);
+
+ // Insert the entry (blissfully ignoring a failed hash size increase)
+ zix_hash_insert_at(nodes->hash, plan, entry);
+ return &entry->node;
}
const SerdNode*
@@ -121,72 +261,263 @@ serd_nodes_get(const SerdNodes* const nodes, const SerdNode* const node)
return NULL;
}
- const NodesSearchKey key = {1, node};
- NodesEntry* const entry = (NodesEntry*)zix_hash_find(nodes->hash, &key);
+ NodesEntry* const entry = zix_hash_find_record(nodes->hash, node);
+
+ return entry ? &entry->node : NULL;
+}
+
+static const SerdNode*
+serd_nodes_manage_entry(SerdNodes* const nodes, NodesEntry* const entry)
+{
+ const SerdNode* const node = &entry->node;
+ const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node);
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(serd_node_equals(&existing->node, node));
+ assert(nodes_hash(&existing->node) == plan.code);
+ ++existing->head.refs;
+ serd_free_aligned(entry);
+ return &existing->node;
+ }
- return entry ? entry->node : NULL;
+ // Insert the entry (blissfully ignoring a failed hash size increase)
+ zix_hash_insert_at(nodes->hash, plan, entry);
+ assert(nodes_hash(&entry->node) == plan.code);
+ return &entry->node;
+}
+
+static const SerdNode*
+serd_nodes_token(SerdNodes* const nodes,
+ const SerdNodeType type,
+ const SerdStringView string)
+{
+ const NodeSpec key = token_spec(type, string);
+ const ZixHashCode code = spec_hash(key);
+ const ZixHashInsertPlan plan =
+ zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &key);
+
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(nodes_hash(&existing->node) == code);
+ ++existing->head.refs;
+ return &existing->node;
+ }
+
+ const size_t padded_length = serd_node_pad_length(string.len);
+ const size_t node_size = sizeof(SerdNode) + padded_length;
+ NodesEntry* const entry = new_entry(node_size);
+ SerdNode* const node = entry ? &entry->node : NULL;
+
+ if (node) {
+ // Construct the token directly into the node in the new entry
+ const SerdWriteResult r =
+ serd_node_construct_token(node_size, &entry->node, type, string);
+
+ assert(!r.status); // Never fails with sufficient space
+ (void)r;
+
+ // Insert the entry (blissfully ignoring a failed hash size increase)
+ zix_hash_insert_at(nodes->hash, plan, entry);
+ assert(nodes_hash(node) == code);
+ }
+
+ return node;
}
const SerdNode*
-serd_nodes_manage(SerdNodes* nodes, SerdNode* node)
+serd_nodes_literal(SerdNodes* const nodes,
+ const SerdStringView string,
+ const SerdNodeFlags flags,
+ const SerdStringView meta)
{
- if (!node) {
+ // Calculate a hash code for the literal without actually constructing it
+ const NodeSpec spec = literal_spec(string, flags, meta);
+ const ZixHashCode code = spec_hash(spec);
+
+ // Find an insert position in the hash table
+ const ZixHashInsertPlan plan =
+ zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &spec);
+
+ // If we found an existing node, bump its reference count and return it
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(nodes_hash(&existing->node) == code);
+ ++existing->head.refs;
+ return &existing->node;
+ }
+
+ // We need to insert a new entry, so determine how much space the node needs
+ SerdWriteResult r = serd_node_construct_literal(0, NULL, string, flags, meta);
+ if (r.status != SERD_ERR_OVERFLOW) {
return NULL;
}
- NodesSearchKey key = {1, node};
- NodesEntry* inserted = NULL;
+ // Allocate a new entry with enough space for the node
+ NodesEntry* const entry = new_entry(r.count);
+ SerdNode* const node = entry ? &entry->node : NULL;
+
+ if (node) {
+ // Construct the literal directly into the node in the new entry
+ r = serd_node_construct_literal(r.count, node, string, flags, meta);
+ assert(!r.status);
+ (void)r;
- const ZixStatus st = zix_hash_insert(nodes->hash, &key, (void**)&inserted);
- if (st == ZIX_STATUS_EXISTS) {
- assert(serd_node_equals(inserted->node, node));
- serd_node_free(node);
- ++inserted->refs;
+ // Insert the entry (blissfully ignoring a failed hash size increase)
+ zix_hash_insert_at(nodes->hash, plan, entry);
+ assert(nodes_hash(node) == code);
}
- return inserted ? inserted->node : NULL;
+ return node;
}
-/* TODO: Make these methods faster by being smarter internally and avoiding
- unnecessary allocatio in cases where the node is already in the set. */
-
const SerdNode*
serd_nodes_string(SerdNodes* const nodes, const SerdStringView string)
{
- return serd_nodes_manage(nodes, serd_new_string(string));
+ return serd_nodes_token(nodes, SERD_LITERAL, string);
}
-const SerdNode* SERD_ALLOCATED
-serd_nodes_literal(SerdNodes* const nodes,
- const SerdStringView string,
- const SerdNodeFlags flags,
- const SerdStringView meta)
+static const SerdNode*
+try_intern(SerdNodes* const nodes,
+ const SerdWriteResult r,
+ const SerdNode* const node)
+{
+ return r.status ? NULL : serd_nodes_intern(nodes, node);
+}
+
+const SerdNode*
+serd_nodes_boolean(SerdNodes* const nodes, bool value)
+{
+ StaticNode key = empty_static_node;
+
+ return try_intern(
+ nodes, serd_node_construct_boolean(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_decimal(SerdNodes* const nodes, const double value)
+{
+ StaticNode key = empty_static_node;
+
+ return try_intern(
+ nodes, serd_node_construct_decimal(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_double(SerdNodes* const nodes, const double value)
+{
+ StaticNode key = empty_static_node;
+
+ return try_intern(
+ nodes, serd_node_construct_double(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_float(SerdNodes* const nodes, const float value)
{
- return serd_nodes_manage(nodes, serd_new_literal(string, flags, meta));
+ StaticNode key = empty_static_node;
+
+ return try_intern(
+ nodes, serd_node_construct_float(sizeof(key), &key, value), &key.node);
+}
+
+const SerdNode*
+serd_nodes_integer(SerdNodes* const nodes,
+ const int64_t value,
+ const SerdStringView datatype)
+{
+ StaticNode key = empty_static_node;
+
+ return try_intern(
+ nodes,
+ serd_node_construct_integer(sizeof(key), &key, value, datatype),
+ &key.node);
+}
+
+const SerdNode*
+serd_nodes_base64(SerdNodes* const nodes,
+ const void* const value,
+ const size_t value_size,
+ const SerdStringView datatype)
+{
+ assert(nodes);
+ assert(value);
+
+ /* We're more or less forced to allocate and construct an entry here, since
+ we need the base64 string to hash. Though it would be possible to
+ calculate it in a streaming fashion, that would be a severe pessimisation
+ in the presumably common case of raw data not being cached, since it would
+ only need to be serialised again. Keeping a tentative entry buffer around
+ when possible would probably be a better improvement if this ever becomes
+ a performance issue. More ambitiously, adding support for binary nodes
+ like a Real Database(TM) would largely avoid this problem. */
+
+ // Determine how much space the node needs
+ SerdWriteResult r =
+ serd_node_construct_base64(0, NULL, value_size, value, datatype);
+
+ // Allocate a new entry to and construct the node into it
+ NodesEntry* const entry = new_entry(r.count);
+
+ r = serd_node_construct_base64(
+ r.count, &entry->node, value_size, value, datatype);
+
+ assert(!r.status);
+ (void)r;
+ return serd_nodes_manage_entry(nodes, entry);
}
const SerdNode*
serd_nodes_uri(SerdNodes* const nodes, const SerdStringView string)
{
- return serd_nodes_manage(nodes, serd_new_uri(string));
+ return serd_nodes_token(nodes, SERD_URI, string);
+}
+
+const SerdNode*
+serd_nodes_parsed_uri(SerdNodes* const nodes, const SerdURIView uri)
+{
+ assert(nodes);
+
+ /* Computing a hash for the serialised URI here would be quite complex, so,
+ since this isn't expected to be a particularly hot case, we just allocate
+ a new entry and try to do a normal insertion. */
+
+ // Determine how much space the node needs
+ SerdWriteResult r = serd_node_construct_uri(0u, NULL, uri);
+ assert(r.status == SERD_ERR_OVERFLOW); // Currently no other errors
+
+ // Allocate a new entry to write the URI node into
+ NodesEntry* const entry = new_entry(r.count);
+
+ r = serd_node_construct_uri(r.count, &entry->node, uri);
+ assert(!r.status);
+ (void)r;
+
+ return serd_nodes_manage_entry(nodes, entry);
}
const SerdNode*
serd_nodes_blank(SerdNodes* const nodes, const SerdStringView string)
{
- return serd_nodes_manage(nodes, serd_new_token(SERD_BLANK, string));
+ return serd_nodes_token(nodes, SERD_BLANK, string);
}
void
serd_nodes_deref(SerdNodes* const nodes, const SerdNode* const node)
{
- const NodesSearchKey key = {1, node};
- NodesEntry* const entry = (NodesEntry*)zix_hash_find(nodes->hash, &key);
+ if (!node) {
+ return;
+ }
- if (entry && --entry->refs == 0) {
- SerdNode* const intern_node = entry->node;
+ ZixHashIter i = zix_hash_find(nodes->hash, node);
+ if (i == zix_hash_end(nodes->hash)) {
+ return;
+ }
- zix_hash_remove(nodes->hash, entry);
- serd_node_free(intern_node);
+ NodesEntry* const entry = zix_hash_get(nodes->hash, i);
+ if (--entry->head.refs == 0u) {
+ NodesEntry* removed = NULL;
+ zix_hash_erase(nodes->hash, i, &removed);
+ assert(removed == entry);
+ serd_free_aligned(removed);
}
}
diff --git a/src/nodes.h b/src/nodes.h
new file mode 100644
index 00000000..b39bac66
--- /dev/null
+++ b/src/nodes.h
@@ -0,0 +1,69 @@
+/*
+ Copyright 2021 David Robillard <d@drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+/*
+ Nothing here is actually used outside the nodes implementation, but we need
+ the types to be defined before including zix/hash.h to enable its type-safe
+ interface. Putting those here is a way of doing that without messy hacks
+ like including headers half-way through the implementation.
+*/
+
+#ifndef SERD_NODES_H
+#define SERD_NODES_H
+
+#include "node.h"
+
+#include "serd/serd.h"
+
+#include <stddef.h>
+
+/**
+ The header of an entry in the nodes table.
+
+ The table stores nodes with an additional reference count. For efficiency,
+ entries are allocated as a single block of memory, which start with this
+ header and are followed by the body of the node.
+
+ This structure must be the same size as SerdNode to preserve the alignment
+ of the contained node. This is a bit wasteful, but the alignment guarantee
+ allows the node implementation to avoid messy casts and byte-based pointer
+ arithmetic that could cause alignment problems. This might be worth
+ reconsidering, since this wasted space has a real (if small) negative
+ impact, while the alignment guarantee just allows the implementation to use
+ stricter compiler settings.
+
+ Better yet, shrink SerdNode down to size_t, which is malloc's alignment
+ guarantee, and all of this goes away, at the cost of a reduced maximum
+ length for literal nodes.
+*/
+typedef struct {
+ size_t refs; ///< Reference count
+ unsigned pad1; ///< Padding to align the following SerdNode
+ unsigned pad2; ///< Padding to align the following SerdNode
+} NodesEntryHead;
+
+/**
+ An entry in the nodes table.
+
+ This is a variably-sized structure that is allocated specifically to contain
+ the node.
+*/
+typedef struct {
+ NodesEntryHead head; ///< Extra data associated with the node
+ SerdNode node; ///< Node header (body immediately follows)
+} NodesEntry;
+
+#endif // SERD_NODES_H
diff --git a/src/zix/digest.c b/src/zix/digest.c
index 47d27b94..dcfadf8f 100644
--- a/src/zix/digest.c
+++ b/src/zix/digest.c
@@ -1,5 +1,5 @@
/*
- Copyright 2012-2020 David Robillard <d@drobilla.net>
+ Copyright 2012-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -16,126 +16,218 @@
#include "zix/digest.h"
-#ifdef __SSE4_2__
-# include <smmintrin.h>
-#endif
-
#include <assert.h>
#include <stdint.h>
+#include <string.h>
-#ifdef __SSE4_2__
+#if defined(__clang__) && __clang_major__ >= 12
+# define FALLTHROUGH() __attribute__((fallthrough))
+#elif defined(__GNUC__) && !defined(__clang__)
+# define FALLTHROUGH() __attribute__((fallthrough))
+#else
+# define FALLTHROUGH()
+#endif
-// SSE 4.2 CRC32
+/*
+ 64-bit hash: Essentially fasthash64, implemented here in an aligned/padded
+ and a general UB-free variant.
+*/
-uint32_t
-zix_digest_start(void)
+static inline uint64_t
+mix64(uint64_t h)
{
- return 1;
+ h ^= h >> 23u;
+ h *= 0x2127599BF4325C37ull;
+ h ^= h >> 47u;
+ return h;
}
-uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64(const uint64_t seed, const void* const key, const size_t len)
{
- const uint8_t* str = (const uint8_t*)buf;
+ static const uint64_t m = 0x880355F21E6D1965ull;
-# ifdef __x86_64__
- for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
- hash = (uint32_t)_mm_crc32_u64(hash, *(const uint64_t*)str);
- str += sizeof(uint64_t);
- }
- if (len & sizeof(uint32_t)) {
- hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
- str += sizeof(uint32_t);
- }
-# else
- for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
- hash = _mm_crc32_u32(hash, *(const uint32_t*)str);
- str += sizeof(uint32_t);
- }
-# endif
- if (len & sizeof(uint16_t)) {
- hash = _mm_crc32_u16(hash, *(const uint16_t*)str);
- str += sizeof(uint16_t);
+ const uint64_t* const blocks = (const uint64_t*)key;
+ const size_t n_blocks = len / sizeof(uint64_t);
+
+ uint64_t h = seed ^ (len * m);
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ h ^= mix64(blocks[i]);
+ h *= m;
}
- if (len & sizeof(uint8_t)) {
- hash = _mm_crc32_u8(hash, *(const uint8_t*)str);
+
+ const uint8_t* const tail = (const unsigned char*)(blocks + n_blocks);
+ uint64_t v = 0u;
+
+ switch (len & 7u) {
+ case 7:
+ v |= (uint64_t)tail[6] << 48u;
+ FALLTHROUGH();
+ case 6:
+ v |= (uint64_t)tail[5] << 40u;
+ FALLTHROUGH();
+ case 5:
+ v |= (uint64_t)tail[4] << 32u;
+ FALLTHROUGH();
+ case 4:
+ v |= (uint64_t)tail[3] << 24u;
+ FALLTHROUGH();
+ case 3:
+ v |= (uint64_t)tail[2] << 16u;
+ FALLTHROUGH();
+ case 2:
+ v |= (uint64_t)tail[1] << 8u;
+ FALLTHROUGH();
+ case 1:
+ v |= (uint64_t)tail[0];
+
+ h ^= mix64(v);
+ h *= m;
}
- return hash;
+ return mix64(h);
}
-uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+uint64_t
+zix_digest64_aligned(const uint64_t seed, const void* const key, size_t len)
{
- assert((uintptr_t)buf % sizeof(uint64_t) == 0);
- assert(len % sizeof(uint64_t) == 0);
-
-# ifdef __x86_64__
- const uint64_t* ptr = (const uint64_t*)buf;
+ static const uint64_t m = 0x880355F21E6D1965ull;
- for (size_t i = 0; i < (len / sizeof(uint64_t)); ++i) {
- hash = (uint32_t)_mm_crc32_u64(hash, *ptr);
- ++ptr;
- }
+ assert((uintptr_t)key % sizeof(uint64_t) == 0u);
+ assert(len % sizeof(uint64_t) == 0u);
- return hash;
-# else
- const uint32_t* ptr = (const uint32_t*)buf;
+ const uint64_t* const blocks = (const uint64_t*)key;
+ const size_t n_blocks = len / sizeof(uint64_t);
+ uint64_t h = seed ^ (len * m);
- for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) {
- hash = _mm_crc32_u32(hash, *ptr);
- ++ptr;
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ h ^= mix64(blocks[i]);
+ h *= m;
}
- return hash;
-# endif
+ return mix64(h);
}
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
-{
-# ifdef __x86_64__
- return (uint32_t)_mm_crc32_u64(hash, (uintptr_t)ptr);
-# else
- return _mm_crc32_u32(hash, (uintptr_t)ptr);
-# endif
-}
+/*
+ 32-bit hash: Essentially murmur3, reimplemented here in an aligned/padded and
+ a general UB-free variant.
+*/
-#else
+/**
+ Rotate left by some count of bits.
-// Classic DJB hash
+ This is recognized by any halfway decent compiler and compiled to a single
+ instruction on architectures that have one.
+*/
+static inline uint32_t
+rotl32(const uint32_t val, const uint32_t bits)
+{
+ return ((val << bits) | (val >> (32 - bits)));
+}
-uint32_t
-zix_digest_start(void)
+static inline uint32_t
+mix32(uint32_t h)
{
- return 5381;
+ h ^= h >> 16u;
+ h *= 0x85EBCA6Bu;
+ h ^= h >> 13u;
+ h *= 0xC2B2AE35u;
+ h ^= h >> 16u;
+ return h;
}
uint32_t
-zix_digest_add(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32(const uint32_t seed, const void* const key, const size_t len)
{
- const uint8_t* str = (const uint8_t*)buf;
+ static const uint32_t c1 = 0xCC9E2D51u;
+ static const uint32_t c2 = 0x1B873593u;
+
+ // Process as many 32-bit blocks as possible
+ const size_t n_blocks = len / sizeof(uint32_t);
+ const uint8_t* data = (const uint8_t*)key;
+ const uint8_t* const blocks_end = data + (n_blocks * sizeof(uint32_t));
+ uint32_t h = seed;
+ for (; data != blocks_end; data += sizeof(uint32_t)) {
+ uint32_t k = 0u;
+ memcpy(&k, data, sizeof(uint32_t));
+
+ k *= c1;
+ k = rotl32(k, 15);
+ k *= c2;
+
+ h ^= k;
+ h = rotl32(h, 13);
+ h = h * 5u + 0xE6546B64u;
+ }
- for (size_t i = 0; i < len; ++i) {
- hash = (hash << 5u) + hash + str[i];
+ // Process any trailing bytes
+ uint32_t k = 0u;
+ switch (len & 3u) {
+ case 3u:
+ k ^= (uint32_t)data[2u] << 16u;
+ FALLTHROUGH();
+ case 2u:
+ k ^= (uint32_t)data[1u] << 8u;
+ FALLTHROUGH();
+ case 1u:
+ k ^= (uint32_t)data[0u];
+
+ k *= c1;
+ k = rotl32(k, 15u);
+ k *= c2;
+ h ^= k;
}
- return hash;
+ return mix32(h ^ (uint32_t)len);
}
uint32_t
-zix_digest_add_64(uint32_t hash, const void* const buf, const size_t len)
+zix_digest32_aligned(const uint32_t seed,
+ const void* const key,
+ const size_t len)
{
- assert((uintptr_t)buf % sizeof(uint64_t) == 0);
- assert(len % sizeof(uint64_t) == 0);
+ static const uint32_t c1 = 0xCC9E2D51u;
+ static const uint32_t c2 = 0x1B873593u;
+
+ assert((uintptr_t)key % sizeof(uint32_t) == 0u);
+ assert(len % sizeof(uint32_t) == 0u);
+
+ const uint32_t* const blocks = (const uint32_t*)key;
+ const size_t n_blocks = len / sizeof(uint32_t);
+ uint32_t h = seed;
+ for (size_t i = 0u; i < n_blocks; ++i) {
+ uint32_t k = blocks[i];
+
+ k *= c1;
+ k = rotl32(k, 15);
+ k *= c2;
- return zix_digest_add(hash, buf, len);
+ h ^= k;
+ h = rotl32(h, 13);
+ h = h * 5u + 0xE6546B64u;
+ }
+
+ return mix32(h ^ (uint32_t)len);
}
-uint32_t
-zix_digest_add_ptr(const uint32_t hash, const void* const ptr)
+// Native word size wrapper
+
+size_t
+zix_digest(const size_t seed, const void* const buf, const size_t len)
{
- return zix_digest_add(hash, &ptr, sizeof(ptr));
+#if UINTPTR_MAX >= UINT64_MAX
+ return zix_digest64(seed, buf, len);
+#else
+ return zix_digest32(seed, buf, len);
+#endif
}
+size_t
+zix_digest_aligned(const size_t seed, const void* const buf, const size_t len)
+{
+#if UINTPTR_MAX >= UINT64_MAX
+ return zix_digest64_aligned(seed, buf, len);
+#else
+ return zix_digest32_aligned(seed, buf, len);
#endif
+}
diff --git a/src/zix/digest.h b/src/zix/digest.h
index 1fde77a9..6df70024 100644
--- a/src/zix/digest.h
+++ b/src/zix/digest.h
@@ -1,5 +1,5 @@
/*
- Copyright 2012-2020 David Robillard <d@drobilla.net>
+ Copyright 2012-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -27,38 +27,80 @@ extern "C" {
#endif
/**
- Return an initial empty digest value.
+ @addtogroup zix
+ @{
+ @name Digest
+ Functions to generate a short "digest" of data with minimal collisions.
+
+ These are good general-purpose hash functions for indexing arbitrary data,
+ but are not necessarily stable across platforms and should never be used for
+ cryptographic purposes.
+ @{
*/
-ZIX_CONST_API
-uint32_t
-zix_digest_start(void);
/**
- Update `hash` to include `buf`, a buffer of `len` bytes.
+ Return a 32-bit hash of a buffer.
This can be used for any size or alignment.
*/
ZIX_PURE_API
uint32_t
-zix_digest_add(uint32_t hash, const void* buf, size_t len);
+zix_digest32(uint32_t seed, const void* buf, size_t len);
/**
- Update `hash` to include `buf`, a 64-bit aligned buffer of `len` bytes.
+ Return a 32-bit hash of an aligned buffer.
- Both `buf` and `len` must be evenly divisible by 8 (64 bits).
+ Both the buffer and size must be aligned to 32 bits. For data that fits
+ these requirements, this is equivalent to, but faster than, zix_digest32().
*/
ZIX_PURE_API
uint32_t
-zix_digest_add_64(uint32_t hash, const void* buf, size_t len);
+zix_digest32_aligned(uint32_t seed, const void* buf, size_t len);
/**
- Update `hash` to include `ptr`.
+ Return a 64-bit hash of a buffer.
- This hashes the value of the pointer itself, and does not dereference `ptr`.
+ This can be used for any size or alignment.
*/
-ZIX_CONST_API
-uint32_t
-zix_digest_add_ptr(uint32_t hash, const void* ptr);
+ZIX_PURE_API
+uint64_t
+zix_digest64(uint64_t seed, const void* buf, size_t len);
+
+/**
+ Return a 64-bit hash of an aligned buffer.
+
+ Both the buffer and size must be aligned to 64 bits. For data that fits
+ these requirements, this is equivalent to, but faster than, zix_digest64().
+*/
+ZIX_PURE_API
+uint64_t
+zix_digest64_aligned(uint64_t seed, const void* buf, size_t len);
+
+/**
+ Return a pointer-sized hash of a buffer.
+
+ This can be used for any size or alignment.
+
+ Internally, this simply dispatches to zix_digest32() or zix_digest64() as
+ appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest(size_t seed, const void* buf, size_t len);
+
+/**
+ Return a pointer-sized hash of an aligned buffer.
+
+ Both the buffer and size must be aligned to the pointer size. For data that
+ fits these requirements, this is equivalent to, but faster than,
+ zix_digest().
+
+ Internally, this simply dispatches to zix_digest32_aligned() or
+ zix_digest64_aligned() as appropriate.
+*/
+ZIX_PURE_API
+size_t
+zix_digest_aligned(size_t seed, const void* buf, size_t len);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/zix/hash.c b/src/zix/hash.c
index d9556edc..8837378e 100644
--- a/src/zix/hash.c
+++ b/src/zix/hash.c
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -17,214 +17,332 @@
#include "zix/hash.h"
#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <stdlib.h>
-#include <string.h>
-
-/**
- Primes, each slightly less than twice its predecessor, and as far away
- from powers of two as possible.
-*/
-static const unsigned sizes[] = {
- 53, 97, 193, 389, 769, 1543, 3079,
- 6151, 12289, 24593, 49157, 98317, 196613, 393241,
- 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653,
- 100663319, 201326611, 402653189, 805306457, 1610612741, 0};
typedef struct ZixHashEntry {
- struct ZixHashEntry* next; ///< Next entry in bucket
- uint32_t hash; ///< Non-modulo hash value
- // Value follows here (access with zix_hash_value)
+ ZixHashCode hash; ///< Non-folded hash value
+ ZixHashRecord* value; ///< Pointer to user-owned record
} ZixHashEntry;
struct ZixHashImpl {
- ZixHashFunc hash_func;
- ZixEqualFunc equal_func;
- ZixHashEntry** buckets;
- const unsigned* n_buckets;
- size_t value_size;
- unsigned count;
+ ZixKeyFunc key_func; ///< User-provided key accessor
+ ZixHashFunc hash_func; ///< User-provided hashing function
+ ZixKeyEqualFunc equal_func; ///< User-provided equality comparison function
+ size_t count; ///< Number of records stored in the table
+ size_t mask; ///< Bit mask for fast modulo (n_entries - 1)
+ size_t n_entries; ///< Power of two table size
+ ZixHashEntry* entries; ///< Pointer to dynamically allocated table
};
-static inline void*
-zix_hash_value(ZixHashEntry* entry)
-{
- return entry + 1;
-}
+static const size_t min_n_entries = 4u;
+static const size_t tombstone = 0xDEADu;
ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size)
+zix_hash_new(const ZixKeyFunc key_func,
+ const ZixHashFunc hash_func,
+ const ZixKeyEqualFunc equal_func)
{
- ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash));
- if (hash) {
- hash->hash_func = hash_func;
- hash->equal_func = equal_func;
- hash->n_buckets = &sizes[0];
- hash->value_size = value_size;
- hash->count = 0;
- if (!(hash->buckets =
- (ZixHashEntry**)calloc(*hash->n_buckets, sizeof(ZixHashEntry*)))) {
- free(hash);
- return NULL;
- }
+ ZixHash* const hash = (ZixHash*)malloc(sizeof(ZixHash));
+ if (!hash) {
+ return NULL;
}
+
+ hash->key_func = key_func;
+ hash->hash_func = hash_func;
+ hash->equal_func = equal_func;
+ hash->count = 0u;
+ hash->n_entries = min_n_entries;
+ hash->mask = hash->n_entries - 1u;
+
+ hash->entries = (ZixHashEntry*)calloc(hash->n_entries, sizeof(ZixHashEntry));
+ if (!hash->entries) {
+ free(hash);
+ return NULL;
+ }
+
return hash;
}
void
-zix_hash_free(ZixHash* hash)
+zix_hash_free(ZixHash* const hash)
{
- if (!hash) {
- return;
+ if (hash) {
+ free(hash->entries);
+ free(hash);
}
+}
- for (unsigned b = 0; b < *hash->n_buckets; ++b) {
- ZixHashEntry* bucket = hash->buckets[b];
- for (ZixHashEntry* e = bucket; e;) {
- ZixHashEntry* next = e->next;
- free(e);
- e = next;
- }
- }
+ZixHashIter
+zix_hash_begin(const ZixHash* const hash)
+{
+ return hash->entries[0u].value ? 0u : zix_hash_next(hash, 0u);
+}
- free(hash->buckets);
- free(hash);
+ZixHashIter
+zix_hash_end(const ZixHash* const hash)
+{
+ return hash->n_entries;
+}
+
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, const ZixHashIter i)
+{
+ assert(i < hash->n_entries);
+
+ return hash->entries[i].value;
+}
+
+ZixHashIter
+zix_hash_next(const ZixHash* const hash, ZixHashIter i)
+{
+ do {
+ ++i;
+ } while (i < hash->n_entries && !hash->entries[i].value);
+
+ return i;
}
size_t
-zix_hash_size(const ZixHash* hash)
+zix_hash_size(const ZixHash* const hash)
{
return hash->count;
}
-static inline void
-insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry)
+static inline size_t
+fold_hash(const ZixHashCode h_nomod, const size_t mask)
+{
+ return h_nomod & mask;
+}
+
+static inline bool
+is_tombstone(const ZixHashEntry* const entry)
+{
+ return !entry->value && entry->hash == tombstone;
+}
+
+static inline bool
+is_empty(const ZixHashEntry* const entry)
{
- entry->next = *bucket;
- *bucket = entry;
+ return !entry->value && !entry->hash;
}
-static inline ZixStatus
-rehash(ZixHash* hash, unsigned new_n_buckets)
+static inline bool
+is_match(const ZixHash* const hash,
+ const ZixHashCode code,
+ const size_t entry_index,
+ ZixKeyEqualFunc predicate,
+ const void* const user_data)
{
- ZixHashEntry** new_buckets =
- (ZixHashEntry**)calloc(new_n_buckets, sizeof(ZixHashEntry*));
- if (!new_buckets) {
+ const ZixHashEntry* const entry = &hash->entries[entry_index];
+
+ return entry->value && entry->hash == code &&
+ predicate(hash->key_func(entry->value), user_data);
+}
+
+static inline size_t
+next_index(const ZixHash* const hash, const size_t i)
+{
+ return (i == hash->mask) ? 0u : (i + 1u);
+}
+
+static inline ZixHashIter
+find_entry(const ZixHash* const hash,
+ const ZixHashKey* const key,
+ const size_t h,
+ const ZixHashCode code)
+{
+ size_t i = h;
+
+ while (!is_match(hash, code, i, hash->equal_func, key) &&
+ !is_empty(&hash->entries[i])) {
+ i = next_index(hash, i);
+ }
+
+ return i;
+}
+
+static ZixStatus
+rehash(ZixHash* const hash, const size_t old_n_entries)
+{
+ ZixHashEntry* const old_entries = hash->entries;
+ const size_t new_n_entries = hash->n_entries;
+
+ // Replace the array in the hash first so we can use find_entry() normally
+ hash->entries = (ZixHashEntry*)calloc(new_n_entries, sizeof(ZixHashEntry));
+ if (!hash->entries) {
return ZIX_STATUS_NO_MEM;
}
- const unsigned old_n_buckets = *hash->n_buckets;
- for (unsigned b = 0; b < old_n_buckets; ++b) {
- for (ZixHashEntry* e = hash->buckets[b]; e;) {
- ZixHashEntry* const next = e->next;
- const unsigned h = e->hash % new_n_buckets;
- insert_entry(&new_buckets[h], e);
- e = next;
+ // Reinsert every element into the new array
+ for (size_t i = 0u; i < old_n_entries; ++i) {
+ ZixHashEntry* const entry = &old_entries[i];
+
+ if (entry->value) {
+ assert(hash->mask == hash->n_entries - 1u);
+ const size_t new_h = fold_hash(entry->hash, hash->mask);
+ const size_t new_i = find_entry(hash, entry->value, new_h, entry->hash);
+
+ hash->entries[new_i] = *entry;
}
}
- free(hash->buckets);
- hash->buckets = new_buckets;
+ free(old_entries);
+ return ZIX_STATUS_SUCCESS;
+}
+
+static ZixStatus
+grow(ZixHash* const hash)
+{
+ const size_t old_n_entries = hash->n_entries;
+
+ hash->n_entries <<= 1u;
+ hash->mask = hash->n_entries - 1u;
+
+ return rehash(hash, old_n_entries);
+}
+
+static ZixStatus
+shrink(ZixHash* const hash)
+{
+ if (hash->n_entries > min_n_entries) {
+ const size_t old_n_entries = hash->n_entries;
+
+ hash->n_entries >>= 1u;
+ hash->mask = hash->n_entries - 1u;
+
+ return rehash(hash, old_n_entries);
+ }
return ZIX_STATUS_SUCCESS;
}
-static inline ZixHashEntry*
-find_entry(const ZixHash* hash,
- const void* key,
- const unsigned h,
- const unsigned h_nomod)
+ZixHashIter
+zix_hash_find(const ZixHash* const hash, const ZixHashKey* const key)
{
- for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
- if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) {
- return e;
+ const ZixHashCode h_nomod = hash->hash_func(key);
+ const size_t h = fold_hash(h_nomod, hash->mask);
+ const ZixHashIter i = find_entry(hash, key, h, h_nomod);
+
+ return is_empty(&hash->entries[i]) ? hash->n_entries : i;
+}
+
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* const hash, const ZixHashKey* const key)
+{
+ const ZixHashCode h_nomod = hash->hash_func(key);
+ const size_t h = fold_hash(h_nomod, hash->mask);
+
+ return hash->entries[find_entry(hash, key, h, h_nomod)].value;
+}
+
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* const hash,
+ const ZixHashCode code,
+ const ZixKeyMatchFunc predicate,
+ const void* const user_data)
+{
+ // Calculate an ideal initial position
+ ZixHashInsertPlan pos = {code, fold_hash(code, hash->mask)};
+
+ // Search for a free position starting at the ideal one
+ const size_t start_index = pos.index;
+ size_t first_tombstone = SIZE_MAX;
+ while (!is_empty(&hash->entries[pos.index])) {
+ if (is_match(hash, code, pos.index, predicate, user_data)) {
+ return pos;
+ }
+
+ if (first_tombstone == SIZE_MAX &&
+ is_tombstone(&hash->entries[pos.index])) {
+ first_tombstone = pos.index; // Remember the first/best free index
+ }
+
+ if ((pos.index = next_index(hash, pos.index)) == start_index) {
+ break;
}
}
- return NULL;
+
+ // If we found a tombstone before an empty slot, place the new element there
+ pos.index = first_tombstone < SIZE_MAX ? first_tombstone : pos.index;
+ assert(!hash->entries[pos.index].value);
+
+ return pos;
}
-void*
-zix_hash_find(const ZixHash* hash, const void* value)
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* const hash, const ZixHashKey* const key)
{
- const unsigned h_nomod = hash->hash_func(value);
- const unsigned h = h_nomod % *hash->n_buckets;
- ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod);
- return entry ? zix_hash_value(entry) : 0;
+ return zix_hash_plan_insert_prehashed(
+ hash, hash->hash_func(key), hash->equal_func, key);
}
-ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted)
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* const hash, const ZixHashInsertPlan position)
{
- unsigned h_nomod = hash->hash_func(value);
- unsigned h = h_nomod % *hash->n_buckets;
+ return hash->entries[position.index].value;
+}
- ZixHashEntry* elem = find_entry(hash, value, h, h_nomod);
- if (elem) {
- assert(elem->hash == h_nomod);
- if (inserted) {
- *inserted = zix_hash_value(elem);
- }
+ZixStatus
+zix_hash_insert_at(ZixHash* const hash,
+ const ZixHashInsertPlan position,
+ ZixHashRecord* const record)
+{
+ if (hash->entries[position.index].value) {
return ZIX_STATUS_EXISTS;
}
- elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size);
- if (!elem) {
- return ZIX_STATUS_NO_MEM;
- }
- elem->next = NULL;
- elem->hash = h_nomod;
- memcpy(elem + 1, value, hash->value_size);
-
- const unsigned next_n_buckets = *(hash->n_buckets + 1);
- if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) {
- if (!rehash(hash, next_n_buckets)) {
- h = h_nomod % *(++hash->n_buckets);
- }
- }
+ // Set entry to new value
+ ZixHashEntry* const entry = &hash->entries[position.index];
+ assert(!entry->value);
+ entry->hash = position.code;
+ entry->value = record;
- insert_entry(&hash->buckets[h], elem);
- ++hash->count;
- if (inserted) {
- *inserted = zix_hash_value(elem);
+ // Update size and rehash if we exceeded the maximum load
+ const size_t max_load = hash->n_entries / 2u + hash->n_entries / 8u;
+ if (++hash->count >= max_load) {
+ return grow(hash);
}
+
return ZIX_STATUS_SUCCESS;
}
ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value)
-{
- const unsigned h_nomod = hash->hash_func(value);
- const unsigned h = h_nomod % *hash->n_buckets;
-
- ZixHashEntry** next_ptr = &hash->buckets[h];
- for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) {
- if (h_nomod == e->hash && hash->equal_func(zix_hash_value(e), value)) {
- *next_ptr = e->next;
- free(e);
- --hash->count;
- return ZIX_STATUS_SUCCESS;
- }
- next_ptr = &e->next;
- }
+zix_hash_insert(ZixHash* const hash, ZixHashRecord* const record)
+{
+ const ZixHashKey* const key = hash->key_func(record);
+ const ZixHashInsertPlan position = zix_hash_plan_insert(hash, key);
- if (hash->n_buckets != sizes) {
- const unsigned prev_n_buckets = *(hash->n_buckets - 1);
- if (hash->count - 1 <= prev_n_buckets) {
- if (!rehash(hash, prev_n_buckets)) {
- --hash->n_buckets;
- }
- }
+ return zix_hash_insert_at(hash, position, record);
+}
+
+ZixStatus
+zix_hash_erase(ZixHash* const hash,
+ const ZixHashIter i,
+ ZixHashRecord** const removed)
+{
+ // Replace entry with a tombstone
+ *removed = hash->entries[i].value;
+ hash->entries[i].hash = tombstone;
+ hash->entries[i].value = NULL;
+
+ // Decrease element count and rehash if necessary
+ --hash->count;
+ if (hash->count < hash->n_entries / 4u) {
+ return shrink(hash);
}
- return ZIX_STATUS_NOT_FOUND;
+ return ZIX_STATUS_SUCCESS;
}
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data)
+ZixStatus
+zix_hash_remove(ZixHash* const hash,
+ const ZixHashKey* const key,
+ ZixHashRecord** const removed)
{
- for (unsigned b = 0; b < *hash->n_buckets; ++b) {
- ZixHashEntry* bucket = hash->buckets[b];
- for (ZixHashEntry* e = bucket; e; e = e->next) {
- f(zix_hash_value(e), user_data);
- }
- }
+ const ZixHashIter i = zix_hash_find(hash, key);
+
+ return i == hash->n_entries ? ZIX_STATUS_NOT_FOUND
+ : zix_hash_erase(hash, i, removed);
}
diff --git a/src/zix/hash.h b/src/zix/hash.h
index 39050381..f08d267b 100644
--- a/src/zix/hash.h
+++ b/src/zix/hash.h
@@ -1,5 +1,5 @@
/*
- Copyright 2011-2020 David Robillard <d@drobilla.net>
+ Copyright 2011-2021 David Robillard <d@drobilla.net>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@@ -19,8 +19,8 @@
#include "zix/common.h"
+#include <stdbool.h>
#include <stddef.h>
-#include <stdint.h>
#ifdef __cplusplus
extern "C" {
@@ -33,98 +33,289 @@ extern "C" {
@{
*/
+// ZIX_HASH_KEY_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_KEY_TYPE)
+typedef ZIX_HASH_KEY_TYPE ZixHashKey;
+#else
+typedef void ZixHashKey;
+#endif
+
+// ZIX_HASH_RECORD_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_RECORD_TYPE)
+typedef ZIX_HASH_RECORD_TYPE ZixHashRecord;
+#else
+typedef void ZixHashRecord;
+#endif
+
+// ZIX_HASH_SEARCH_DATA_TYPE can be defined to make the API more type-safe
+#if defined(ZIX_HASH_SEARCH_DATA_TYPE)
+typedef ZIX_HASH_SEARCH_DATA_TYPE ZixHashSearchData;
+#else
+typedef void ZixHashSearchData;
+#endif
+
+/**
+ A hash table.
+
+ This is an open addressing hash table that stores pointers to arbitrary user
+ data. Internally, everything is stored in a single flat array that is
+ resized when necessary.
+
+ The single user-provided pointer that is stored in the table is called a
+ "record". A record contains a "key", which is accessed via a user-provided
+ function. This design supports storing large records (which may be
+ expensive to allocate/construct) without requiring an entire record to
+ search. Simple atomic values can be stored by providing a trivial identity
+ function as a key function.
+
+ The table uses power of 2 sizes with a growth factor of 2, so that hash
+ values can be folded into an array index using bitwise AND as a fast modulo.
+ This means that only the necessary low bits of the hash value will be used,
+ so the hash function must be well-balanced within this range. More or less
+ any good modern hash algorithm will be fine, but beware, for example, hash
+ functions that assume they are targeting a table with a prime size.
+
+ Since this doubles and halves in size, it may not be an optimal choice if
+ memory reuse is a priority. A growth factor of 1.5 with fast range
+ reduction may be a better choice there, at the cost of requiring 128-bit
+ arithmetic on 64-bit platforms, and indexing operations being slightly more
+ expensive.
+*/
typedef struct ZixHashImpl ZixHash;
+/// A full hash code for a key which is not folded down to the table size
+typedef size_t ZixHashCode;
+
/**
- Function for computing the hash of an element.
+ An iterator to an entry in a hash table.
+
+ This is really just an index, but should be considered opaque to the user
+ and only used via the provided API and equality comparison.
*/
-typedef uint32_t (*ZixHashFunc)(const void* value);
+typedef size_t ZixHashIter;
+
+/// User function for computing the hash of a key
+typedef ZixHashCode (*ZixHashFunc)(const ZixHashKey* key);
+
+/// User function for determining if two keys are truly equal
+typedef bool (*ZixKeyEqualFunc)(const ZixHashKey* a, const ZixHashKey* b);
+
+/// User function for determining if a key matches in a custom search
+typedef bool (*ZixKeyMatchFunc)(const ZixHashKey* key,
+ const ZixHashSearchData* user_data);
+
+/// User function for getting the key of a record
+typedef const ZixHashKey* (*ZixKeyFunc)(const ZixHashRecord* record);
/**
- Function to visit a hash element.
+ A "plan" (position) to insert a record in a hash table.
+
+ This contains everything necessary to insert a record, except the record
+ itself. It is exposed to split up insertion so that records only need to be
+ allocated if an existing record is not found, but the contents should be
+ considered opaque to the user.
*/
-typedef void (*ZixHashVisitFunc)(void* value, void* user_data);
+typedef struct {
+ ZixHashCode code; ///< Hash code used to find this position
+ ZixHashIter index; ///< Index into hash table
+} ZixHashInsertPlan;
/**
Create a new hash table.
- To minimize space overhead, unlike many hash tables this stores a single
- value, not a key and a value. Any size of value can be stored, but all the
- values in the hash table must be the same size, and the values must be safe
- to copy with memcpy. To get key:value behaviour, simply insert a struct
- with a key and value into the hash.
-
- @param hash_func The hashing function.
- @param equal_func A function to test value equality.
- @param value_size The size of the values to be stored.
+ @param key_func A function to retrieve the key from a record.
+ @param hash_func The key hashing function.
+ @param equal_func A function to test keys for equality.
*/
ZIX_API
ZixHash*
-zix_hash_new(ZixHashFunc hash_func, ZixEqualFunc equal_func, size_t value_size);
+zix_hash_new(ZixKeyFunc key_func,
+ ZixHashFunc hash_func,
+ ZixKeyEqualFunc equal_func);
-/**
- Free `hash`.
-*/
+/// Free `hash`
ZIX_API
void
zix_hash_free(ZixHash* hash);
-/**
- Return the number of elements in `hash`.
-*/
+/// Return an iterator to the first record in a hash, or the end if it is empty
+ZIX_PURE_API
+ZixHashIter
+zix_hash_begin(const ZixHash* hash);
+
+/// Return an iterator one past the last possible record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_end(const ZixHash* hash);
+
+/// Return the record pointed to by an iterator
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_get(const ZixHash* hash, ZixHashIter i);
+
+/// Return an iterator that has been advanced to the next record in a hash
+ZIX_PURE_API
+ZixHashIter
+zix_hash_next(const ZixHash* hash, ZixHashIter i);
+
+/// Return the number of elements in a hash
ZIX_PURE_API
size_t
zix_hash_size(const ZixHash* hash);
/**
- Insert an item into `hash`.
+ Find the best position to insert a record with the given key.
+
+ This searches the hash table and returns either the position of an existing
+ equivalent record, or the best available position to insert a new one. The
+ difference can be determined with zix_hash_record_at().
+
+ If the returned position is not occupied, then it is valid to insert a
+ record with this key using zix_hash_insert_at() until the hash table is
+ modified (which invalidates the position).
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert(const ZixHash* hash, const ZixHashKey* key);
+
+/**
+ Find the best position to insert a record with a custom search.
- If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p
- inserted will be pointed to the copy of `value` made in the new hash node.
+ This is an advanced low-level version of zix_hash_plan_insert(). It allows
+ a precalculated hash code to be given, along with a custom search predicate.
+ These must be compatible with the functions that manage the hash table:
+ trivial usage would be to pass the equality function used by the hash and
+ the key to search for.
- If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and
- `inserted` will be pointed to the existing value.
+ This is exposed to make it possible to avoid constructing a key at all when
+ potentially inserting. For example, if keys are structures with a fixed
+ header and a variably-sized body, and you have a separate header and body
+ this can be used to find an insert position without having to allocate a
+ contiguous key.
+
+ Note that care must be taken when using this function: improper use can
+ corrupt the hash table. The hash code given must be correct for the key to
+ be inserted, and the predicate must return true only if the key it is called
+ with (the first argument) matches the key to be inserted.
+*/
+ZIX_API
+ZixHashInsertPlan
+zix_hash_plan_insert_prehashed(const ZixHash* hash,
+ ZixHashCode code,
+ ZixKeyMatchFunc predicate,
+ const ZixHashSearchData* user_data);
+
+/**
+ Return the record at the given position, or null.
+
+ This is used to determine if a position returned from zix_hash_plan_insert()
+ can be used to insert a new record, or to access the existing matching
+ record.
+*/
+ZIX_PURE_API
+ZixHashRecord*
+zix_hash_record_at(const ZixHash* hash, ZixHashInsertPlan position);
+
+/**
+ Insert a record at a specific position.
+
+ The position must have been found with an earlier call to
+ zix_hash_plan_insert(), and no modifications must have been made to the hash
+ table since.
+
+ @param hash The hash table.
+
+ @param position The position for the new record.
+
+ @param record The record to insert which, on success, can now be considered
+ owned by the hash table.
+
+ @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS if a record already exists at
+ this position, or ZIX_STATUS_NO_MEM if growing the hash table failed.
+*/
+ZIX_API
+ZixStatus
+zix_hash_insert_at(ZixHash* hash,
+ ZixHashInsertPlan position,
+ ZixHashRecord* record);
+
+/**
+ Insert a record.
+
+ This is a trivial wrapper for zix_hash_plan_insert() and
+ zix_hash_insert_at() that is more convenient when record construction is not
+ expensive.
@param hash The hash table.
- @param value The value to be inserted.
- @param inserted The copy of `value` in the hash table.
+
+ @param record The record to insert which, on success, can now be considered
+ owned by the hash table.
+
@return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM.
*/
ZIX_API
ZixStatus
-zix_hash_insert(ZixHash* hash, const void* value, void** inserted);
+zix_hash_insert(ZixHash* hash, ZixHashRecord* record);
/**
- Remove an item from `hash`.
+ Erase a record at a specific position.
+
+ @param hash The hash table to remove the record from.
+
+ @param i Iterator to the record to remove. This must be a valid iterator
+ from an earlier call to zix_hash_find(), that is, the hash table must not
+ have been modified since.
+
+ @param removed Set to the removed record, or null.
+
+ @return ZIX_STATUS_SUCCES or ZIX_STATUS_BAD_ARG if `i` does not point at a
+ removable record.
+*/
+ZIX_API
+ZixStatus
+zix_hash_erase(ZixHash* hash, ZixHashIter i, ZixHashRecord** removed);
+
+/**
+ Remove a record.
@param hash The hash table.
- @param value The value to remove.
+ @param key The key of the record to remove.
+ @param removed Set to the removed record, or null.
@return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND.
*/
ZIX_API
ZixStatus
-zix_hash_remove(ZixHash* hash, const void* value);
+zix_hash_remove(ZixHash* hash, const ZixHashKey* key, ZixHashRecord** removed);
/**
- Search for an item in `hash`.
+ Find the position of a record with a given key.
- @param hash The hash table.
- @param value The value to search for.
+ @param hash The hash table to search.
+
+ @param key The key of the desired record.
+
+ @return An iterator to the matching record, or the end iterator if no such
+ record exists.
*/
ZIX_API
-void*
-zix_hash_find(const ZixHash* hash, const void* value);
+ZixHashIter
+zix_hash_find(const ZixHash* hash, const ZixHashKey* key);
/**
- Call `f` on each value in `hash`.
+ Find a record with a given key.
- @param hash The hash table.
- @param f The function to call on each value.
- @param user_data The user_data parameter passed to `f`.
+ This is essentially the same as zix_hash_find(), but returns a pointer to
+ the record for convenience.
+
+ @param hash The hash table to search.
+
+ @param key The key of the desired record.
+
+ @return A pointer to the matching record, of null if no such record exists.
*/
ZIX_API
-void
-zix_hash_foreach(ZixHash* hash, ZixHashVisitFunc f, void* user_data);
+ZixHashRecord*
+zix_hash_find_record(const ZixHash* hash, const ZixHashKey* key);
/**
@}
diff --git a/test/test_nodes.c b/test/test_nodes.c
index 74c99c11..c841acc4 100644
--- a/test/test_nodes.c
+++ b/test/test_nodes.c
@@ -19,9 +19,12 @@
#include "serd/serd.h"
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include <string.h>
+#define NS_XSD "http://www.w3.org/2001/XMLSchema#"
+
static void
test_intern(void)
{
@@ -45,24 +48,6 @@ test_intern(void)
}
static void
-test_manage(void)
-{
- SerdNodes* nodes = serd_nodes_new();
- SerdNode* node = serd_new_string(SERD_STRING("node"));
-
- assert(!serd_nodes_manage(nodes, NULL));
-
- const SerdNode* managed1 = serd_nodes_manage(nodes, node);
- assert(managed1 == node);
-
- SerdNode* equal = serd_new_string(SERD_STRING("node"));
- const SerdNode* managed2 = serd_nodes_manage(nodes, equal);
- assert(managed2 == node);
-
- serd_nodes_free(nodes);
-}
-
-static void
test_string(void)
{
static const SerdStringView string = SERD_STRING("string");
@@ -81,6 +66,29 @@ test_string(void)
}
static void
+test_invalid_literal(void)
+{
+ SerdNodes* const nodes = serd_nodes_new();
+
+ assert(!serd_nodes_literal(nodes,
+ SERD_STRING("double meta"),
+ SERD_HAS_LANGUAGE | SERD_HAS_DATATYPE,
+ SERD_STRING("What am I?")));
+
+ assert(!serd_nodes_literal(nodes,
+ SERD_STRING("empty language"),
+ SERD_HAS_LANGUAGE,
+ SERD_EMPTY_STRING()));
+
+ assert(!serd_nodes_literal(nodes,
+ SERD_STRING("empty datatype"),
+ SERD_HAS_DATATYPE,
+ SERD_EMPTY_STRING()));
+
+ serd_nodes_free(nodes);
+}
+
+static void
test_plain_literal(void)
{
static const SerdStringView string = SERD_STRING("string");
@@ -102,6 +110,24 @@ test_plain_literal(void)
assert(serd_node_length(node) == string.len);
assert(!strcmp(serd_node_string(node), string.buf));
+ const SerdNode* const alias =
+ serd_nodes_literal(nodes, string, SERD_HAS_LANGUAGE, language);
+
+ assert(alias == node);
+
+ const SerdNode* const other =
+ serd_nodes_literal(nodes, string, SERD_HAS_LANGUAGE, SERD_STRING("de"));
+
+ assert(other != node);
+
+ const SerdNode* const other_language_node = serd_node_language(other);
+ assert(other_language_node);
+ assert(serd_node_type(other_language_node) == SERD_LITERAL);
+ assert(serd_node_length(other_language_node) == 2);
+ assert(!strcmp(serd_node_string(other_language_node), "de"));
+ assert(serd_node_length(other) == string.len);
+ assert(!strcmp(serd_node_string(other), string.buf));
+
serd_nodes_free(nodes);
}
@@ -128,6 +154,152 @@ test_typed_literal(void)
assert(serd_node_length(node) == string.len);
assert(!strcmp(serd_node_string(node), string.buf));
+ const SerdNode* const alias =
+ serd_nodes_literal(nodes, string, SERD_HAS_DATATYPE, datatype);
+
+ assert(alias == node);
+
+ serd_nodes_free(nodes);
+}
+
+static void
+test_boolean(void)
+{
+ SerdNodes* const nodes = serd_nodes_new();
+
+ const SerdNode* const false1 = serd_nodes_boolean(nodes, false);
+ const SerdNode* const false2 = serd_nodes_boolean(nodes, false);
+ const SerdNode* const true1 = serd_nodes_boolean(nodes, true);
+ const SerdNode* const true2 = serd_nodes_boolean(nodes, true);
+
+ assert(false1 == false2);
+ assert(true1 == true2);
+ assert(false1 != true1);
+ assert(!strcmp(serd_node_string(false1), "false"));
+ assert(!strcmp(serd_node_string(true1), "true"));
+ assert(serd_node_length(false1) == strlen(serd_node_string(false1)));
+ assert(serd_node_length(true1) == strlen(serd_node_string(true1)));
+ assert(serd_nodes_size(nodes) == 2);
+
+ const SerdNode* const true_datatype = serd_node_datatype(true1);
+ assert(true_datatype);
+ assert(!strcmp(serd_node_string(true_datatype), NS_XSD "boolean"));
+
+ const SerdNode* const false_datatype = serd_node_datatype(false1);
+ assert(false_datatype);
+ assert(!strcmp(serd_node_string(false_datatype), NS_XSD "boolean"));
+
+ serd_nodes_free(nodes);
+}
+
+static void
+test_decimal(void)
+{
+ SerdNodes* const nodes = serd_nodes_new();
+ const SerdNode* const a = serd_nodes_decimal(nodes, -12.3456789);
+ const SerdNode* const b = serd_nodes_decimal(nodes, -12.3456789);
+
+ assert(a == b);
+ assert(!strcmp(serd_node_string(a), "-12.3456789"));
+ assert(serd_node_length(a) == strlen(serd_node_string(a)));
+ assert(serd_nodes_size(nodes) == 1);
+
+ const SerdNode* const default_datatype = serd_node_datatype(a);
+ assert(default_datatype);
+ assert(!strcmp(serd_node_string(default_datatype), NS_XSD "decimal"));
+
+ serd_nodes_free(nodes);
+}
+
+static void
+test_double(void)
+{
+ SerdNodes* const nodes = serd_nodes_new();
+
+ const SerdNode* const a = serd_nodes_double(nodes, -1.2E3);
+ const SerdNode* const b = serd_nodes_double(nodes, -1.2E3);
+
+ assert(a == b);
+ assert(!strcmp(serd_node_string(a), "-1.2E3"));
+ assert(serd_node_length(a) == strlen(serd_node_string(a)));
+ assert(serd_nodes_size(nodes) == 1);
+
+ serd_nodes_free(nodes);
+}
+
+static void
+test_float(void)
+{
+ SerdNodes* const nodes = serd_nodes_new();
+
+ const SerdNode* const a = serd_nodes_float(nodes, -1.2E3f);
+ const SerdNode* const b = serd_nodes_float(nodes, -1.2E3f);
+
+ assert(a == b);
+ assert(!strcmp(serd_node_string(a), "-1.2E3"));
+ assert(serd_node_length(a) == strlen(serd_node_string(a)));
+ assert(serd_nodes_size(nodes) == 1);
+
+ serd_nodes_free(nodes);
+}
+
+static void
+test_integer(void)
+{
+ SerdNodes* const nodes = serd_nodes_new();
+
+ const SerdNode* const a =
+ serd_nodes_integer(nodes, -1234567890, SERD_EMPTY_STRING());
+
+ const SerdNode* const b =
+ serd_nodes_integer(nodes, -1234567890, SERD_EMPTY_STRING());
+
+ assert(a == b);
+ assert(!strcmp(serd_node_string(a), "-1234567890"));
+ assert(serd_node_length(a) == strlen(serd_node_string(a)));
+ assert(serd_nodes_size(nodes) == 1);
+
+ const SerdNode* const default_datatype = serd_node_datatype(a);
+ assert(default_datatype);
+ assert(!strcmp(serd_node_string(default_datatype), NS_XSD "integer"));
+
+ serd_nodes_free(nodes);
+}
+
+static void
+test_base64(void)
+{
+ static const char data[] = {'f', 'o', 'o', 'b', 'a', 'r'};
+
+ SerdNodes* const nodes = serd_nodes_new();
+
+ const SerdNode* const a =
+ serd_nodes_base64(nodes, &data, sizeof(data), SERD_EMPTY_STRING());
+
+ const SerdNode* const b =
+ serd_nodes_base64(nodes, &data, sizeof(data), SERD_EMPTY_STRING());
+
+ assert(a == b);
+ assert(!strcmp(serd_node_string(a), "Zm9vYmFy"));
+ assert(serd_node_length(a) == strlen(serd_node_string(a)));
+ assert(serd_nodes_size(nodes) == 1);
+
+ const SerdNode* const default_datatype = serd_node_datatype(a);
+ assert(default_datatype);
+ assert(!strcmp(serd_node_string(default_datatype), NS_XSD "base64Binary"));
+
+ const SerdStringView user_datatype =
+ SERD_STRING("http://example.org/UserDatatype");
+
+ const SerdNode* const custom =
+ serd_nodes_base64(nodes, &data, sizeof(data), user_datatype);
+ assert(custom);
+ assert(!strcmp(serd_node_string(custom), "Zm9vYmFy"));
+
+ const SerdNode* const custom_datatype = serd_node_datatype(custom);
+ assert(custom_datatype);
+ assert(!strcmp(serd_node_string(custom_datatype), user_datatype.buf));
+
serd_nodes_free(nodes);
}
@@ -150,6 +322,34 @@ test_uri(void)
}
static void
+test_parsed_uri(void)
+{
+ static const SerdStringView string = SERD_STRING("http://example.org/");
+
+ SerdNodes* const nodes = serd_nodes_new();
+ const SerdURIView uri = serd_parse_uri(string.buf);
+ const SerdNode* const node = serd_nodes_parsed_uri(nodes, uri);
+
+ assert(node);
+ assert(serd_node_type(node) == SERD_URI);
+ assert(!serd_node_flags(node));
+ assert(!serd_node_datatype(node));
+ assert(!serd_node_language(node));
+ assert(serd_node_length(node) == string.len);
+ assert(!strcmp(serd_node_string(node), string.buf));
+
+ const SerdNode* const alias = serd_nodes_parsed_uri(nodes, uri);
+ assert(alias == node);
+
+ const SerdNode* const other =
+ serd_nodes_parsed_uri(nodes, serd_parse_uri("http://example.org/x"));
+
+ assert(other != node);
+
+ serd_nodes_free(nodes);
+}
+
+static void
test_blank(void)
{
static const SerdStringView string = SERD_STRING("b42");
@@ -170,17 +370,40 @@ test_blank(void)
static void
test_deref(void)
{
- SerdNodes* nodes = serd_nodes_new();
- const SerdNode* managed = serd_nodes_string(nodes, SERD_STRING("node"));
+ SerdNodes* nodes = serd_nodes_new();
+ const SerdNode* original = serd_nodes_string(nodes, SERD_STRING("node"));
+ const SerdNode* another = serd_nodes_string(nodes, SERD_STRING("node"));
- serd_nodes_deref(nodes, managed);
+ assert(original == another);
- SerdNode* node = serd_new_string(SERD_STRING("node"));
- const SerdNode* interned = serd_nodes_intern(nodes, node);
+ // Dereference the original and ensure the other reference kept it alive
+ serd_nodes_deref(nodes, original);
+ assert(serd_node_length(another) == 4);
+ assert(!strcmp(serd_node_string(another), "node"));
- assert(interned != node);
+ // Drop the other/final reference (freeing the node)
+ serd_nodes_deref(nodes, another);
- serd_node_free(node);
+ /* Intern some other irrelevant node first to (hopefully) avoid the allocator
+ just giving us back the same piece of memory. */
+
+ const SerdNode* other = serd_nodes_string(nodes, SERD_STRING("XXXX"));
+ assert(!strcmp(serd_node_string(other), "XXXX"));
+
+ // Intern a new equivalent node to the original and check that it's really new
+ const SerdNode* imposter = serd_nodes_string(nodes, SERD_STRING("node"));
+ assert(imposter != original);
+ assert(serd_node_length(imposter) == 4);
+ assert(!strcmp(serd_node_string(imposter), "node"));
+
+ // Check that dereferencing some random unknown node doesn't crash
+ SerdNode* unmanaged = serd_new_string(SERD_STRING("unmanaged"));
+ serd_nodes_deref(nodes, unmanaged);
+ serd_node_free(unmanaged);
+
+ serd_nodes_deref(nodes, NULL);
+ serd_nodes_deref(nodes, imposter);
+ serd_nodes_deref(nodes, other);
serd_nodes_free(nodes);
}
@@ -205,11 +428,18 @@ int
main(void)
{
test_intern();
- test_manage();
test_string();
+ test_invalid_literal();
test_plain_literal();
test_typed_literal();
+ test_boolean();
+ test_decimal();
+ test_double();
+ test_float();
+ test_integer();
+ test_base64();
test_uri();
+ test_parsed_uri();
test_blank();
test_deref();
test_get();