diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/.clang-tidy | 1 | ||||
-rw-r--r-- | src/node_spec.h | 41 | ||||
-rw-r--r-- | src/nodes.c | 506 | ||||
-rw-r--r-- | src/nodes.h | 56 |
4 files changed, 603 insertions, 1 deletions
diff --git a/src/.clang-tidy b/src/.clang-tidy index 53834f98..161eabda 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -10,5 +10,4 @@ Checks: > -hicpp-signed-bitwise, -llvm-header-guard, -misc-no-recursion, - -modernize-macro-to-enum, InheritParentConfig: true diff --git a/src/node_spec.h b/src/node_spec.h new file mode 100644 index 00000000..97243bf6 --- /dev/null +++ b/src/node_spec.h @@ -0,0 +1,41 @@ +// Copyright 2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#ifndef SERD_SRC_NODE_SPEC_H +#define SERD_SRC_NODE_SPEC_H + +#include "serd/node.h" +#include "serd/string_view.h" + +/** + A lightweight "specification" of a node. + + This is essentially the arguments needed to construct any node combined into + a single structure. Since it only refers to strings elsewhere, it is + convenient as a way to completely specify a node without having to actually + allocate one. +*/ +typedef struct { + SerdNodeType type; ///< Basic type of this node + SerdStringView string; ///< String contents of this node + SerdNodeFlags flags; ///< Additional node flags + SerdStringView meta; ///< String contents of datatype or language node +} NodeSpec; + +static inline NodeSpec +token_spec(const SerdNodeType type, const SerdStringView string) +{ + NodeSpec spec = {type, string, 0U, serd_empty_string()}; + return spec; +} + +static inline NodeSpec +literal_spec(const SerdStringView string, + const SerdNodeFlags flags, + const SerdStringView meta) +{ + NodeSpec spec = {SERD_LITERAL, string, flags, meta}; + return spec; +} + +#endif // SERD_SRC_NODE_SPEC_H diff --git a/src/nodes.c b/src/nodes.c new file mode 100644 index 00000000..68d9cf3c --- /dev/null +++ b/src/nodes.c @@ -0,0 +1,506 @@ +// Copyright 2011-2022 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +#include "nodes.h" + +#include "memory.h" +#include "node.h" +#include "node_spec.h" + +// Define the types used in the hash interface for more type safety +#define ZIX_HASH_KEY_TYPE SerdNode +#define ZIX_HASH_RECORD_TYPE NodesEntry +#define ZIX_HASH_SEARCH_DATA_TYPE NodeSpec + +#include "serd/memory.h" +#include "serd/nodes.h" +#include "serd/string_view.h" +#include "serd/write_result.h" +#include "zix/allocator.h" +#include "zix/attributes.h" +#include "zix/digest.h" +#include "zix/hash.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> + +#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ + (defined(__cplusplus) && __cplusplus >= 201103L)) + +static_assert(sizeof(NodesEntryHead) == sizeof(SerdNode), + "NodesEntryHead must be the same size as SerdNode for alignment"); + +#endif + +/* + The main goal here is to make getting an existing node as fast as possible, + so that this can be used as a convenient node cache with minimal performance + overhead. Mainly this means striving to avoid allocation when potentially + inserting a new node. + + This is achieved by generating a hash code from node components without + actually allocating that node, and using the low-level insertion interface of + ZixHash to do a custom search. This way, an entry is only allocated when + necessary, and the hash table is only searched once. + + The downside is that subtle and very bad things will happen if the hash + generated for the node does not match the actual hash of the node. The + exessive number of assertions around this are there to provide some defense + against such mistakes. Cave operatur. +*/ + +/** + The maximum size of a node that will be made statically on the stack. + + This mostly applies to things like numeric literal nodes, where a small + maximum size is exploited to avoid allocation. The largest static node + string is the longest xsd:decimal, which is 327 bytes. We need a bit more + space than that here for the node header, padding, and datatype. +*/ +#define MAX_STATIC_NODE_SIZE 384 + +typedef struct { + SerdNode node; + char body[MAX_STATIC_NODE_SIZE]; +} StaticNode; + +/** + Allocator for allocating entries in the node hash table. + + This allocator implements only the methods used by the serd_node_* + constructors, and transparently increases the allocation size so there is + room for an extra NodesEntryHead at the start. This allows the serd_node_* + constructors to be re-used here, even though the table stores entries (nodes + with an extra header) rather than node pointers directly. +*/ +typedef struct { + SerdAllocator base; ///< Implementation of SerdAllocator (base "class") + SerdAllocator* real; ///< Underlying "real" memory allocator +} SerdNodesEntryAllocator; + +ZIX_MALLOC_FUNC static void* +serd_nodes_entry_aligned_alloc(SerdAllocator* const allocator, + const size_t alignment, + const size_t size) +{ + SerdAllocator* const real = ((SerdNodesEntryAllocator*)allocator)->real; + + void* const ptr = + real->aligned_alloc(real, alignment, serd_node_align + size); + + return ptr ? (((uint8_t*)ptr) + serd_node_align) : NULL; +} + +static void +serd_nodes_entry_aligned_free(SerdAllocator* const allocator, void* const ptr) +{ + SerdAllocator* const real = ((SerdNodesEntryAllocator*)allocator)->real; + + if (ptr) { + real->aligned_free(real, (((uint8_t*)ptr) - serd_node_align)); + } +} + +static SerdNodesEntryAllocator +serd_nodes_entry_allocator(SerdAllocator* const real) +{ + const SerdNodesEntryAllocator entry_allocator = { + { + NULL, + NULL, + NULL, + NULL, + serd_nodes_entry_aligned_alloc, + serd_nodes_entry_aligned_free, + }, + real ? real : serd_default_allocator(), + }; + + return entry_allocator; +} + +struct SerdNodesImpl { + SerdNodesEntryAllocator allocator; + ZixHash* hash; +}; + +static const StaticNode empty_static_node = {{0U, 0U, SERD_LITERAL}, {'\0'}}; + +static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE); + +static const SerdNode* +nodes_key(const NodesEntry* const entry) +{ + return &entry->node; +} + +static ZixHashCode +token_hash(const ZixHashCode seed, + const SerdNodeType type, + const SerdStringView string) +{ + const SerdNode node_header = {string.length, 0U, type}; + ZixHashCode h = seed; + + h = zix_digest_aligned(h, &node_header, sizeof(node_header)); + h = zix_digest(h, string.data, string.length); + return h; +} + +static ZixHashCode +spec_hash(const NodeSpec spec) +{ + ZixHashCode h = token_hash(0U, spec.type, spec.string); + + if (spec.flags & SERD_HAS_DATATYPE) { + h = token_hash(h, SERD_URI, spec.meta); + } else if (spec.flags & SERD_HAS_LANGUAGE) { + h = token_hash(h, SERD_LITERAL, spec.meta); + } + + return zix_digest(h, &spec.flags, sizeof(spec.flags)); +} + +static ZixHashCode +nodes_hash(const SerdNode* const node) +{ + /* If you're thinking "I know, I'll make this faster by just hashing the + entire node in one call!", think harder. Since zero bytes affect the hash + value, that would mean that the hash code for the node won't match the one + for the node spec, and the above-mentioned subtle and very bad things will + happen. This function deliberately mirrors spec_hash() above to make it + relatively easy to see that the two will match. + + It would be more elegant to construct a spec from the node and then hash + that with the exact same code used for searching, but there's currently no + use for that anywhere else and this way is a bit faster. */ + + ZixHashCode h = token_hash(0U, node->type, serd_node_string_view(node)); + + if (node->flags & SERD_HAS_DATATYPE) { + h = token_hash(h, SERD_URI, serd_node_string_view(serd_node_meta_c(node))); + } else if (node->flags & SERD_HAS_LANGUAGE) { + h = token_hash( + h, SERD_LITERAL, serd_node_string_view(serd_node_meta_c(node))); + } + + return zix_digest(h, &node->flags, sizeof(node->flags)); +} + +static bool +node_equals_spec(const SerdNode* const node, const NodeSpec* const spec) +{ + return serd_node_type(node) == spec->type && + serd_node_length(node) == spec->string.length && + node->flags == spec->flags && + !strcmp(serd_node_string_i(node), spec->string.data) && + (!(node->flags & meta_mask) || + !strcmp(serd_node_string_i(serd_node_meta_c(node)), spec->meta.data)); +} + +static bool +nodes_meta_equal(const SerdNode* const a, const SerdNode* const b) +{ + assert(a->flags & meta_mask); + assert(b->flags & meta_mask); + + const size_t padded_length = serd_node_pad_length(a->length); + const size_t offset = padded_length / sizeof(SerdNode); + const SerdNode* const am = a + 1U + offset; + const SerdNode* const bm = b + 1U + offset; + + return am->length == bm->length && am->type == bm->type && + !memcmp(serd_node_string_i(am), serd_node_string_i(bm), am->length); +} + +static bool +nodes_equal(const SerdNode* const a, const SerdNode* const b) +{ + return (a == b) || + (a->length == b->length && a->flags == b->flags && + a->type == b->type && + !memcmp(serd_node_string_i(a), serd_node_string_i(b), a->length) && + (!(a->flags & meta_mask) || nodes_meta_equal(a, b))); +} + +static void +free_entry(SerdNodes* const nodes, NodesEntry* const entry) +{ + serd_aaligned_free(&nodes->allocator.base, &entry->node); +} + +SerdNodes* +serd_nodes_new(SerdAllocator* const allocator) +{ + SerdNodes* const nodes = + (SerdNodes*)serd_acalloc(allocator, 1, sizeof(SerdNodes)); + + if (nodes) { + nodes->allocator = serd_nodes_entry_allocator(allocator); + + if (!(nodes->hash = zix_hash_new( + (ZixAllocator*)allocator, nodes_key, nodes_hash, nodes_equal))) { + serd_afree(allocator, nodes); + return NULL; + } + } + + return nodes; +} + +void +serd_nodes_free(SerdNodes* nodes) +{ + if (nodes) { + for (ZixHashIter i = zix_hash_begin(nodes->hash); + i != zix_hash_end(nodes->hash); + i = zix_hash_next(nodes->hash, i)) { + free_entry(nodes, (NodesEntry*)zix_hash_get(nodes->hash, i)); + } + + zix_hash_free(nodes->hash); + serd_afree(nodes->allocator.real, nodes); + } +} + +size_t +serd_nodes_size(const SerdNodes* nodes) +{ + assert(nodes); + + return zix_hash_size(nodes->hash); +} + +const SerdNode* +serd_nodes_intern(SerdNodes* nodes, const SerdNode* node) +{ + assert(nodes); + if (!node) { + return NULL; + } + + const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node); + NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); + if (existing) { + assert(serd_node_equals(&existing->node, node)); + ++existing->head.refs; + return &existing->node; + } + + SerdNode* const new_node = serd_node_copy(&nodes->allocator.base, node); + if (!new_node) { + return NULL; + } + + NodesEntry* const entry = (NodesEntry*)(new_node - 1U); + + entry->head.refs = 1U; + + // Insert the entry (blissfully ignoring a failed hash size increase) + if (zix_hash_insert_at(nodes->hash, plan, entry)) { + free_entry(nodes, entry); + return NULL; + } + + return &entry->node; +} + +const SerdNode* +serd_nodes_existing(const SerdNodes* const nodes, const SerdNode* const node) +{ + assert(nodes); + + if (!node) { + return NULL; + } + + NodesEntry* const entry = zix_hash_find_record(nodes->hash, node); + + return entry ? &entry->node : NULL; +} + +static const SerdNode* +serd_nodes_manage_entry_at(SerdNodes* const nodes, + NodesEntry* const entry, + const ZixHashInsertPlan plan) +{ + assert(nodes); + assert(entry); + + entry->head.refs = 1U; + + // Insert the entry (blissfully ignoring a failed hash size increase) + if (zix_hash_insert_at(nodes->hash, plan, entry)) { + free_entry(nodes, entry); + return NULL; + } + + return &entry->node; +} + +static const SerdNode* +serd_nodes_manage_entry_node_at(SerdNodes* const nodes, + SerdNode* const node, + const ZixHashInsertPlan plan) +{ + if (!node) { + return NULL; + } + + NodesEntry* const entry = (NodesEntry*)(node - 1U); + + return serd_nodes_manage_entry_at(nodes, entry, plan); +} + +static const SerdNode* +serd_nodes_manage_entry_node(SerdNodes* const nodes, SerdNode* const node) +{ + if (!node) { + return NULL; + } + + NodesEntry* const entry = (NodesEntry*)(node - 1U); + const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node); + NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); + if (existing) { + assert(serd_node_equals(&existing->node, node)); + assert(nodes_hash(&existing->node) == plan.code); + ++existing->head.refs; + free_entry(nodes, entry); + return &existing->node; + } + + assert(nodes_hash(&entry->node) == plan.code); + + return serd_nodes_manage_entry_at(nodes, entry, plan); +} + +static const SerdNode* +serd_nodes_token(SerdNodes* const nodes, + const SerdNodeType type, + const SerdStringView string) +{ + // Calculate a hash code for the token without actually constructing it + const NodeSpec key = token_spec(type, string); + const ZixHashCode code = spec_hash(key); + + // Find an insert position in the hash table + const ZixHashInsertPlan plan = + zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &key); + + // If we found an existing node, bump its reference count and return it + NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); + if (existing) { + assert(nodes_hash(&existing->node) == code); + ++existing->head.refs; + return &existing->node; + } + + // Otherwise, allocate and manage a new one + SerdAllocator* const alloc = &nodes->allocator.base; + SerdNode* const node = serd_node_new(alloc, serd_a_token(type, string)); + + return serd_nodes_manage_entry_node_at(nodes, node, plan); +} + +static const SerdNode* +serd_nodes_literal(SerdNodes* const nodes, + const SerdStringView string, + const SerdNodeFlags flags, + const SerdStringView meta) +{ + // Calculate a hash code for the literal without actually constructing it + const NodeSpec spec = literal_spec(string, flags, meta); + const ZixHashCode code = spec_hash(spec); + + // Find an insert position in the hash table + const ZixHashInsertPlan plan = + zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &spec); + + // If we found an existing node, bump its reference count and return it + NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); + if (existing) { + assert(nodes_hash(&existing->node) == code); + ++existing->head.refs; + return &existing->node; + } + + // Otherwise, allocate and manage a new one + SerdAllocator* const alloc = &nodes->allocator.base; + SerdNode* const node = + serd_node_new(alloc, serd_a_literal(string, flags, meta)); + + return serd_nodes_manage_entry_node_at(nodes, node, plan); +} + +static const SerdNode* +try_intern(SerdNodes* const nodes, + const SerdWriteResult r, + const SerdNode* const node) +{ + return r.status ? NULL : serd_nodes_intern(nodes, node); +} + +const SerdNode* +serd_nodes_get(SerdNodes* const nodes, const SerdNodeArgs args) +{ + StaticNode key = empty_static_node; + + /* Some types here are cleverly hashed without allocating a node, but others + simply allocate a new node and attempt to manage it. It would be possible + to calculate an in-place hash for some of them, but this is quite + complicated and error-prone, so more difficult edge cases aren't yet + implemented. */ + + switch (args.type) { + case SERD_NODE_ARGS_TOKEN: + return serd_nodes_token( + nodes, args.data.as_token.type, args.data.as_token.string); + + case SERD_NODE_ARGS_PARSED_URI: + case SERD_NODE_ARGS_FILE_URI: + break; + + case SERD_NODE_ARGS_LITERAL: + return serd_nodes_literal(nodes, + args.data.as_literal.string, + args.data.as_literal.flags, + args.data.as_literal.meta); + + case SERD_NODE_ARGS_PRIMITIVE: + case SERD_NODE_ARGS_DECIMAL: + case SERD_NODE_ARGS_INTEGER: + return try_intern( + nodes, serd_node_construct(sizeof(key), &key, args), &key.node); + + case SERD_NODE_ARGS_HEX: + case SERD_NODE_ARGS_BASE64: + break; + } + + return serd_nodes_manage_entry_node( + nodes, serd_node_new(&nodes->allocator.base, args)); +} + +void +serd_nodes_deref(SerdNodes* const nodes, const SerdNode* const node) +{ + if (!node) { + return; + } + + ZixHashIter i = zix_hash_find(nodes->hash, node); + if (i == zix_hash_end(nodes->hash)) { + return; + } + + NodesEntry* const entry = zix_hash_get(nodes->hash, i); + if (--entry->head.refs == 0U) { + NodesEntry* removed = NULL; + zix_hash_erase(nodes->hash, i, &removed); + assert(removed == entry); + free_entry(nodes, removed); + } +} diff --git a/src/nodes.h b/src/nodes.h new file mode 100644 index 00000000..daec673d --- /dev/null +++ b/src/nodes.h @@ -0,0 +1,56 @@ +// Copyright 2021 David Robillard <d@drobilla.net> +// SPDX-License-Identifier: ISC + +/* + Nothing here is actually used outside the nodes implementation, but we need + the types to be defined before including zix/hash.h to enable its type-safe + interface. Putting those here is a way of doing that without messy hacks + like including headers half-way through the implementation. +*/ + +#ifndef SERD_SRC_NODES_H +#define SERD_SRC_NODES_H + +#include "node.h" + +#include "serd/node.h" + +#include <stddef.h> + +/** + The header of an entry in the nodes table. + + The table stores nodes with an additional reference count. For efficiency, + entries are allocated as a single block of memory, which start with this + header and are followed by the body of the node. + + This structure must be the same size as SerdNode to preserve the alignment + of the contained node. This is a bit wasteful, but the alignment guarantee + allows the node implementation to avoid messy casts and byte-based pointer + arithmetic that could cause alignment problems. This might be worth + reconsidering, since this wasted space has a real (if small) negative + impact, while the alignment guarantee just allows the implementation to use + stricter compiler settings. + + Better yet, shrink SerdNode down to size_t, which is malloc's alignment + guarantee, and all of this goes away, at the cost of a reduced maximum + length for literal nodes. +*/ +typedef struct { + size_t refs; ///< Reference count + unsigned pad1; ///< Padding to align the following SerdNode + unsigned pad2; ///< Padding to align the following SerdNode +} NodesEntryHead; + +/** + An entry in the nodes table. + + This is a variably-sized structure that is allocated specifically to contain + the node. +*/ +typedef struct { + NodesEntryHead head; ///< Extra data associated with the node + SerdNode node; ///< Node header (body immediately follows) +} NodesEntry; + +#endif // SERD_SRC_NODES_H |