aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2023-05-10 21:06:16 -0400
committerDavid Robillard <d@drobilla.net>2023-12-02 18:49:08 -0500
commite750f4b6734d086e433e3c9c05b2252f43f4be8f (patch)
tree6eb84ef00642ac32f40bca8a242a9b0d2a6ef3f3 /src
parent8346ac7f529f5aeb8d8b0e48837e680ea14e8893 (diff)
downloadserd-e750f4b6734d086e433e3c9c05b2252f43f4be8f.tar.gz
serd-e750f4b6734d086e433e3c9c05b2252f43f4be8f.tar.bz2
serd-e750f4b6734d086e433e3c9c05b2252f43f4be8f.zip
Add SerdNodes for storing a cache of interned nodes
Diffstat (limited to 'src')
-rw-r--r--src/.clang-tidy1
-rw-r--r--src/node_spec.h41
-rw-r--r--src/nodes.c506
-rw-r--r--src/nodes.h56
4 files changed, 603 insertions, 1 deletions
diff --git a/src/.clang-tidy b/src/.clang-tidy
index 53834f98..161eabda 100644
--- a/src/.clang-tidy
+++ b/src/.clang-tidy
@@ -10,5 +10,4 @@ Checks: >
-hicpp-signed-bitwise,
-llvm-header-guard,
-misc-no-recursion,
- -modernize-macro-to-enum,
InheritParentConfig: true
diff --git a/src/node_spec.h b/src/node_spec.h
new file mode 100644
index 00000000..97243bf6
--- /dev/null
+++ b/src/node_spec.h
@@ -0,0 +1,41 @@
+// Copyright 2021 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#ifndef SERD_SRC_NODE_SPEC_H
+#define SERD_SRC_NODE_SPEC_H
+
+#include "serd/node.h"
+#include "serd/string_view.h"
+
+/**
+ A lightweight "specification" of a node.
+
+ This is essentially the arguments needed to construct any node combined into
+ a single structure. Since it only refers to strings elsewhere, it is
+ convenient as a way to completely specify a node without having to actually
+ allocate one.
+*/
+typedef struct {
+ SerdNodeType type; ///< Basic type of this node
+ SerdStringView string; ///< String contents of this node
+ SerdNodeFlags flags; ///< Additional node flags
+ SerdStringView meta; ///< String contents of datatype or language node
+} NodeSpec;
+
+static inline NodeSpec
+token_spec(const SerdNodeType type, const SerdStringView string)
+{
+ NodeSpec spec = {type, string, 0U, serd_empty_string()};
+ return spec;
+}
+
+static inline NodeSpec
+literal_spec(const SerdStringView string,
+ const SerdNodeFlags flags,
+ const SerdStringView meta)
+{
+ NodeSpec spec = {SERD_LITERAL, string, flags, meta};
+ return spec;
+}
+
+#endif // SERD_SRC_NODE_SPEC_H
diff --git a/src/nodes.c b/src/nodes.c
new file mode 100644
index 00000000..68d9cf3c
--- /dev/null
+++ b/src/nodes.c
@@ -0,0 +1,506 @@
+// Copyright 2011-2022 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+#include "nodes.h"
+
+#include "memory.h"
+#include "node.h"
+#include "node_spec.h"
+
+// Define the types used in the hash interface for more type safety
+#define ZIX_HASH_KEY_TYPE SerdNode
+#define ZIX_HASH_RECORD_TYPE NodesEntry
+#define ZIX_HASH_SEARCH_DATA_TYPE NodeSpec
+
+#include "serd/memory.h"
+#include "serd/nodes.h"
+#include "serd/string_view.h"
+#include "serd/write_result.h"
+#include "zix/allocator.h"
+#include "zix/attributes.h"
+#include "zix/digest.h"
+#include "zix/hash.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \
+ (defined(__cplusplus) && __cplusplus >= 201103L))
+
+static_assert(sizeof(NodesEntryHead) == sizeof(SerdNode),
+ "NodesEntryHead must be the same size as SerdNode for alignment");
+
+#endif
+
+/*
+ The main goal here is to make getting an existing node as fast as possible,
+ so that this can be used as a convenient node cache with minimal performance
+ overhead. Mainly this means striving to avoid allocation when potentially
+ inserting a new node.
+
+ This is achieved by generating a hash code from node components without
+ actually allocating that node, and using the low-level insertion interface of
+ ZixHash to do a custom search. This way, an entry is only allocated when
+ necessary, and the hash table is only searched once.
+
+ The downside is that subtle and very bad things will happen if the hash
+ generated for the node does not match the actual hash of the node. The
+ exessive number of assertions around this are there to provide some defense
+ against such mistakes. Cave operatur.
+*/
+
+/**
+ The maximum size of a node that will be made statically on the stack.
+
+ This mostly applies to things like numeric literal nodes, where a small
+ maximum size is exploited to avoid allocation. The largest static node
+ string is the longest xsd:decimal, which is 327 bytes. We need a bit more
+ space than that here for the node header, padding, and datatype.
+*/
+#define MAX_STATIC_NODE_SIZE 384
+
+typedef struct {
+ SerdNode node;
+ char body[MAX_STATIC_NODE_SIZE];
+} StaticNode;
+
+/**
+ Allocator for allocating entries in the node hash table.
+
+ This allocator implements only the methods used by the serd_node_*
+ constructors, and transparently increases the allocation size so there is
+ room for an extra NodesEntryHead at the start. This allows the serd_node_*
+ constructors to be re-used here, even though the table stores entries (nodes
+ with an extra header) rather than node pointers directly.
+*/
+typedef struct {
+ SerdAllocator base; ///< Implementation of SerdAllocator (base "class")
+ SerdAllocator* real; ///< Underlying "real" memory allocator
+} SerdNodesEntryAllocator;
+
+ZIX_MALLOC_FUNC static void*
+serd_nodes_entry_aligned_alloc(SerdAllocator* const allocator,
+ const size_t alignment,
+ const size_t size)
+{
+ SerdAllocator* const real = ((SerdNodesEntryAllocator*)allocator)->real;
+
+ void* const ptr =
+ real->aligned_alloc(real, alignment, serd_node_align + size);
+
+ return ptr ? (((uint8_t*)ptr) + serd_node_align) : NULL;
+}
+
+static void
+serd_nodes_entry_aligned_free(SerdAllocator* const allocator, void* const ptr)
+{
+ SerdAllocator* const real = ((SerdNodesEntryAllocator*)allocator)->real;
+
+ if (ptr) {
+ real->aligned_free(real, (((uint8_t*)ptr) - serd_node_align));
+ }
+}
+
+static SerdNodesEntryAllocator
+serd_nodes_entry_allocator(SerdAllocator* const real)
+{
+ const SerdNodesEntryAllocator entry_allocator = {
+ {
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ serd_nodes_entry_aligned_alloc,
+ serd_nodes_entry_aligned_free,
+ },
+ real ? real : serd_default_allocator(),
+ };
+
+ return entry_allocator;
+}
+
+struct SerdNodesImpl {
+ SerdNodesEntryAllocator allocator;
+ ZixHash* hash;
+};
+
+static const StaticNode empty_static_node = {{0U, 0U, SERD_LITERAL}, {'\0'}};
+
+static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE);
+
+static const SerdNode*
+nodes_key(const NodesEntry* const entry)
+{
+ return &entry->node;
+}
+
+static ZixHashCode
+token_hash(const ZixHashCode seed,
+ const SerdNodeType type,
+ const SerdStringView string)
+{
+ const SerdNode node_header = {string.length, 0U, type};
+ ZixHashCode h = seed;
+
+ h = zix_digest_aligned(h, &node_header, sizeof(node_header));
+ h = zix_digest(h, string.data, string.length);
+ return h;
+}
+
+static ZixHashCode
+spec_hash(const NodeSpec spec)
+{
+ ZixHashCode h = token_hash(0U, spec.type, spec.string);
+
+ if (spec.flags & SERD_HAS_DATATYPE) {
+ h = token_hash(h, SERD_URI, spec.meta);
+ } else if (spec.flags & SERD_HAS_LANGUAGE) {
+ h = token_hash(h, SERD_LITERAL, spec.meta);
+ }
+
+ return zix_digest(h, &spec.flags, sizeof(spec.flags));
+}
+
+static ZixHashCode
+nodes_hash(const SerdNode* const node)
+{
+ /* If you're thinking "I know, I'll make this faster by just hashing the
+ entire node in one call!", think harder. Since zero bytes affect the hash
+ value, that would mean that the hash code for the node won't match the one
+ for the node spec, and the above-mentioned subtle and very bad things will
+ happen. This function deliberately mirrors spec_hash() above to make it
+ relatively easy to see that the two will match.
+
+ It would be more elegant to construct a spec from the node and then hash
+ that with the exact same code used for searching, but there's currently no
+ use for that anywhere else and this way is a bit faster. */
+
+ ZixHashCode h = token_hash(0U, node->type, serd_node_string_view(node));
+
+ if (node->flags & SERD_HAS_DATATYPE) {
+ h = token_hash(h, SERD_URI, serd_node_string_view(serd_node_meta_c(node)));
+ } else if (node->flags & SERD_HAS_LANGUAGE) {
+ h = token_hash(
+ h, SERD_LITERAL, serd_node_string_view(serd_node_meta_c(node)));
+ }
+
+ return zix_digest(h, &node->flags, sizeof(node->flags));
+}
+
+static bool
+node_equals_spec(const SerdNode* const node, const NodeSpec* const spec)
+{
+ return serd_node_type(node) == spec->type &&
+ serd_node_length(node) == spec->string.length &&
+ node->flags == spec->flags &&
+ !strcmp(serd_node_string_i(node), spec->string.data) &&
+ (!(node->flags & meta_mask) ||
+ !strcmp(serd_node_string_i(serd_node_meta_c(node)), spec->meta.data));
+}
+
+static bool
+nodes_meta_equal(const SerdNode* const a, const SerdNode* const b)
+{
+ assert(a->flags & meta_mask);
+ assert(b->flags & meta_mask);
+
+ const size_t padded_length = serd_node_pad_length(a->length);
+ const size_t offset = padded_length / sizeof(SerdNode);
+ const SerdNode* const am = a + 1U + offset;
+ const SerdNode* const bm = b + 1U + offset;
+
+ return am->length == bm->length && am->type == bm->type &&
+ !memcmp(serd_node_string_i(am), serd_node_string_i(bm), am->length);
+}
+
+static bool
+nodes_equal(const SerdNode* const a, const SerdNode* const b)
+{
+ return (a == b) ||
+ (a->length == b->length && a->flags == b->flags &&
+ a->type == b->type &&
+ !memcmp(serd_node_string_i(a), serd_node_string_i(b), a->length) &&
+ (!(a->flags & meta_mask) || nodes_meta_equal(a, b)));
+}
+
+static void
+free_entry(SerdNodes* const nodes, NodesEntry* const entry)
+{
+ serd_aaligned_free(&nodes->allocator.base, &entry->node);
+}
+
+SerdNodes*
+serd_nodes_new(SerdAllocator* const allocator)
+{
+ SerdNodes* const nodes =
+ (SerdNodes*)serd_acalloc(allocator, 1, sizeof(SerdNodes));
+
+ if (nodes) {
+ nodes->allocator = serd_nodes_entry_allocator(allocator);
+
+ if (!(nodes->hash = zix_hash_new(
+ (ZixAllocator*)allocator, nodes_key, nodes_hash, nodes_equal))) {
+ serd_afree(allocator, nodes);
+ return NULL;
+ }
+ }
+
+ return nodes;
+}
+
+void
+serd_nodes_free(SerdNodes* nodes)
+{
+ if (nodes) {
+ for (ZixHashIter i = zix_hash_begin(nodes->hash);
+ i != zix_hash_end(nodes->hash);
+ i = zix_hash_next(nodes->hash, i)) {
+ free_entry(nodes, (NodesEntry*)zix_hash_get(nodes->hash, i));
+ }
+
+ zix_hash_free(nodes->hash);
+ serd_afree(nodes->allocator.real, nodes);
+ }
+}
+
+size_t
+serd_nodes_size(const SerdNodes* nodes)
+{
+ assert(nodes);
+
+ return zix_hash_size(nodes->hash);
+}
+
+const SerdNode*
+serd_nodes_intern(SerdNodes* nodes, const SerdNode* node)
+{
+ assert(nodes);
+ if (!node) {
+ return NULL;
+ }
+
+ const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node);
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(serd_node_equals(&existing->node, node));
+ ++existing->head.refs;
+ return &existing->node;
+ }
+
+ SerdNode* const new_node = serd_node_copy(&nodes->allocator.base, node);
+ if (!new_node) {
+ return NULL;
+ }
+
+ NodesEntry* const entry = (NodesEntry*)(new_node - 1U);
+
+ entry->head.refs = 1U;
+
+ // Insert the entry (blissfully ignoring a failed hash size increase)
+ if (zix_hash_insert_at(nodes->hash, plan, entry)) {
+ free_entry(nodes, entry);
+ return NULL;
+ }
+
+ return &entry->node;
+}
+
+const SerdNode*
+serd_nodes_existing(const SerdNodes* const nodes, const SerdNode* const node)
+{
+ assert(nodes);
+
+ if (!node) {
+ return NULL;
+ }
+
+ NodesEntry* const entry = zix_hash_find_record(nodes->hash, node);
+
+ return entry ? &entry->node : NULL;
+}
+
+static const SerdNode*
+serd_nodes_manage_entry_at(SerdNodes* const nodes,
+ NodesEntry* const entry,
+ const ZixHashInsertPlan plan)
+{
+ assert(nodes);
+ assert(entry);
+
+ entry->head.refs = 1U;
+
+ // Insert the entry (blissfully ignoring a failed hash size increase)
+ if (zix_hash_insert_at(nodes->hash, plan, entry)) {
+ free_entry(nodes, entry);
+ return NULL;
+ }
+
+ return &entry->node;
+}
+
+static const SerdNode*
+serd_nodes_manage_entry_node_at(SerdNodes* const nodes,
+ SerdNode* const node,
+ const ZixHashInsertPlan plan)
+{
+ if (!node) {
+ return NULL;
+ }
+
+ NodesEntry* const entry = (NodesEntry*)(node - 1U);
+
+ return serd_nodes_manage_entry_at(nodes, entry, plan);
+}
+
+static const SerdNode*
+serd_nodes_manage_entry_node(SerdNodes* const nodes, SerdNode* const node)
+{
+ if (!node) {
+ return NULL;
+ }
+
+ NodesEntry* const entry = (NodesEntry*)(node - 1U);
+ const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node);
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(serd_node_equals(&existing->node, node));
+ assert(nodes_hash(&existing->node) == plan.code);
+ ++existing->head.refs;
+ free_entry(nodes, entry);
+ return &existing->node;
+ }
+
+ assert(nodes_hash(&entry->node) == plan.code);
+
+ return serd_nodes_manage_entry_at(nodes, entry, plan);
+}
+
+static const SerdNode*
+serd_nodes_token(SerdNodes* const nodes,
+ const SerdNodeType type,
+ const SerdStringView string)
+{
+ // Calculate a hash code for the token without actually constructing it
+ const NodeSpec key = token_spec(type, string);
+ const ZixHashCode code = spec_hash(key);
+
+ // Find an insert position in the hash table
+ const ZixHashInsertPlan plan =
+ zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &key);
+
+ // If we found an existing node, bump its reference count and return it
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(nodes_hash(&existing->node) == code);
+ ++existing->head.refs;
+ return &existing->node;
+ }
+
+ // Otherwise, allocate and manage a new one
+ SerdAllocator* const alloc = &nodes->allocator.base;
+ SerdNode* const node = serd_node_new(alloc, serd_a_token(type, string));
+
+ return serd_nodes_manage_entry_node_at(nodes, node, plan);
+}
+
+static const SerdNode*
+serd_nodes_literal(SerdNodes* const nodes,
+ const SerdStringView string,
+ const SerdNodeFlags flags,
+ const SerdStringView meta)
+{
+ // Calculate a hash code for the literal without actually constructing it
+ const NodeSpec spec = literal_spec(string, flags, meta);
+ const ZixHashCode code = spec_hash(spec);
+
+ // Find an insert position in the hash table
+ const ZixHashInsertPlan plan =
+ zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &spec);
+
+ // If we found an existing node, bump its reference count and return it
+ NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan);
+ if (existing) {
+ assert(nodes_hash(&existing->node) == code);
+ ++existing->head.refs;
+ return &existing->node;
+ }
+
+ // Otherwise, allocate and manage a new one
+ SerdAllocator* const alloc = &nodes->allocator.base;
+ SerdNode* const node =
+ serd_node_new(alloc, serd_a_literal(string, flags, meta));
+
+ return serd_nodes_manage_entry_node_at(nodes, node, plan);
+}
+
+static const SerdNode*
+try_intern(SerdNodes* const nodes,
+ const SerdWriteResult r,
+ const SerdNode* const node)
+{
+ return r.status ? NULL : serd_nodes_intern(nodes, node);
+}
+
+const SerdNode*
+serd_nodes_get(SerdNodes* const nodes, const SerdNodeArgs args)
+{
+ StaticNode key = empty_static_node;
+
+ /* Some types here are cleverly hashed without allocating a node, but others
+ simply allocate a new node and attempt to manage it. It would be possible
+ to calculate an in-place hash for some of them, but this is quite
+ complicated and error-prone, so more difficult edge cases aren't yet
+ implemented. */
+
+ switch (args.type) {
+ case SERD_NODE_ARGS_TOKEN:
+ return serd_nodes_token(
+ nodes, args.data.as_token.type, args.data.as_token.string);
+
+ case SERD_NODE_ARGS_PARSED_URI:
+ case SERD_NODE_ARGS_FILE_URI:
+ break;
+
+ case SERD_NODE_ARGS_LITERAL:
+ return serd_nodes_literal(nodes,
+ args.data.as_literal.string,
+ args.data.as_literal.flags,
+ args.data.as_literal.meta);
+
+ case SERD_NODE_ARGS_PRIMITIVE:
+ case SERD_NODE_ARGS_DECIMAL:
+ case SERD_NODE_ARGS_INTEGER:
+ return try_intern(
+ nodes, serd_node_construct(sizeof(key), &key, args), &key.node);
+
+ case SERD_NODE_ARGS_HEX:
+ case SERD_NODE_ARGS_BASE64:
+ break;
+ }
+
+ return serd_nodes_manage_entry_node(
+ nodes, serd_node_new(&nodes->allocator.base, args));
+}
+
+void
+serd_nodes_deref(SerdNodes* const nodes, const SerdNode* const node)
+{
+ if (!node) {
+ return;
+ }
+
+ ZixHashIter i = zix_hash_find(nodes->hash, node);
+ if (i == zix_hash_end(nodes->hash)) {
+ return;
+ }
+
+ NodesEntry* const entry = zix_hash_get(nodes->hash, i);
+ if (--entry->head.refs == 0U) {
+ NodesEntry* removed = NULL;
+ zix_hash_erase(nodes->hash, i, &removed);
+ assert(removed == entry);
+ free_entry(nodes, removed);
+ }
+}
diff --git a/src/nodes.h b/src/nodes.h
new file mode 100644
index 00000000..daec673d
--- /dev/null
+++ b/src/nodes.h
@@ -0,0 +1,56 @@
+// Copyright 2021 David Robillard <d@drobilla.net>
+// SPDX-License-Identifier: ISC
+
+/*
+ Nothing here is actually used outside the nodes implementation, but we need
+ the types to be defined before including zix/hash.h to enable its type-safe
+ interface. Putting those here is a way of doing that without messy hacks
+ like including headers half-way through the implementation.
+*/
+
+#ifndef SERD_SRC_NODES_H
+#define SERD_SRC_NODES_H
+
+#include "node.h"
+
+#include "serd/node.h"
+
+#include <stddef.h>
+
+/**
+ The header of an entry in the nodes table.
+
+ The table stores nodes with an additional reference count. For efficiency,
+ entries are allocated as a single block of memory, which start with this
+ header and are followed by the body of the node.
+
+ This structure must be the same size as SerdNode to preserve the alignment
+ of the contained node. This is a bit wasteful, but the alignment guarantee
+ allows the node implementation to avoid messy casts and byte-based pointer
+ arithmetic that could cause alignment problems. This might be worth
+ reconsidering, since this wasted space has a real (if small) negative
+ impact, while the alignment guarantee just allows the implementation to use
+ stricter compiler settings.
+
+ Better yet, shrink SerdNode down to size_t, which is malloc's alignment
+ guarantee, and all of this goes away, at the cost of a reduced maximum
+ length for literal nodes.
+*/
+typedef struct {
+ size_t refs; ///< Reference count
+ unsigned pad1; ///< Padding to align the following SerdNode
+ unsigned pad2; ///< Padding to align the following SerdNode
+} NodesEntryHead;
+
+/**
+ An entry in the nodes table.
+
+ This is a variably-sized structure that is allocated specifically to contain
+ the node.
+*/
+typedef struct {
+ NodesEntryHead head; ///< Extra data associated with the node
+ SerdNode node; ///< Node header (body immediately follows)
+} NodesEntry;
+
+#endif // SERD_SRC_NODES_H