/* Copyright 2011-2020 David Robillard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "nodes.h" #include "node.h" #include "node_spec.h" #include "system.h" // Define the types used in the hash interface for more type safety #define ZIX_HASH_KEY_TYPE SerdNode #define ZIX_HASH_RECORD_TYPE NodesEntry #define ZIX_HASH_SEARCH_DATA_TYPE NodeSpec #include "serd/serd.h" #include "zix/digest.h" #include "zix/hash.h" #include #include #include #include #include #if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112l) || \ (defined(__cplusplus) && __cplusplus >= 201103L)) static_assert(sizeof(NodesEntryHead) == sizeof(SerdNode), "NodesEntryHead must be the same size as SerdNode for alignment"); #endif /* The main goal here is to make getting an existing node as fast as possible, so that this can be used as a convenient node cache with minimal performance overhead. Mainly this means striving to avoid allocation when potentially inserting a new node. This is achieved by generating a hash code from node components without actually allocating that node, and using the low-level insertion interface of ZixHash to do a custom search. This way, an entry is only allocated when necessary, and the hash table is only searched once. The downside is that subtle and very bad things will happen if the hash generated for the node does not match the actual hash of the node. The exessive number of assertions around this are there to provide some defense against such mistakes. Cave operatur. */ /** The maximum size of a node that will be made statically on the stack. This mostly applies to things like numeric literal nodes, where a small maximum size is exploited to avoid allocation. The largest static node string is the longest xsd:decimal, which is 327 bytes. We need a bit more space than that here for the node header, padding, and datatype. */ #define MAX_STATIC_NODE_SIZE 384 typedef struct { SerdNode node; char body[MAX_STATIC_NODE_SIZE]; } StaticNode; struct SerdNodesImpl { ZixHash* hash; }; static const StaticNode empty_static_node = {{0u, 0u, SERD_LITERAL}, {'\0'}}; static const SerdNode* nodes_key(const NodesEntry* const entry) { return &entry->node; } static ZixHashCode token_hash(const ZixHashCode seed, const SerdNodeType type, const SerdStringView string) { const SerdNode node_header = {string.len, 0u, type}; ZixHashCode h = seed; h = zix_digest_aligned(h, &node_header, sizeof(node_header)); h = zix_digest(h, string.buf, string.len); return h; } static ZixHashCode spec_hash(const NodeSpec spec) { ZixHashCode h = token_hash(0u, spec.type, spec.string); if (spec.flags & SERD_HAS_DATATYPE) { h = token_hash(h, SERD_URI, spec.meta); } else if (spec.flags & SERD_HAS_LANGUAGE) { h = token_hash(h, SERD_LITERAL, spec.meta); } return h; } static ZixHashCode nodes_hash(const SerdNode* const node) { /* If you're thinking "I know, I'll make this faster by just hashing the entire node in one call!", think harder. Since zero bytes affect the hash value, that would mean that the hash code for the node won't match the one for the node spec, and the above-mentioned subtle and very bad things will happen. This function deliberately mirrors spec_hash() above to make it relatively easy to see that the two will match. It would be more elegant to construct a spec from the node and then hash that with the exact same code used for searching, but there's currently no use for that anywhere else and this way is a bit faster. */ ZixHashCode h = token_hash(0u, node->type, serd_node_string_view(node)); if (node->flags & SERD_HAS_DATATYPE) { h = token_hash(h, SERD_URI, serd_node_string_view(serd_node_meta_c(node))); } else if (node->flags & SERD_HAS_LANGUAGE) { h = token_hash( h, SERD_LITERAL, serd_node_string_view(serd_node_meta_c(node))); } return h; } static bool node_equals_spec(const SerdNode* const node, const NodeSpec* const spec) { // Only datatype and language are relevant for equality static const SerdNodeFlags flag_mask = SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE; const SerdNodeFlags flags = spec->flags & flag_mask; return serd_node_type(node) == spec->type && serd_node_length(node) == spec->string.len && (node->flags & flag_mask) == flags && !strcmp(serd_node_string_i(node), spec->string.buf) && (!flags || !strcmp(serd_node_string_i(serd_node_meta_c(node)), spec->meta.buf)); } static bool nodes_equal(const SerdNode* const a, const SerdNode* const b) { static const SerdNodeFlags meta_mask = (SERD_HAS_DATATYPE | SERD_HAS_LANGUAGE); if (a == b) { return true; } if (a->length != b->length || a->flags != b->flags || a->type != b->type || !!memcmp(serd_node_string_i(a), serd_node_string_i(b), a->length)) { return false; } if (a->flags & meta_mask) { const size_t padded_length = serd_node_pad_length(a->length); const size_t offset = padded_length / sizeof(SerdNode); const SerdNode* const am = a + 1u + offset; const SerdNode* const bm = b + 1u + offset; return am->length == bm->length && am->type == bm->type && !memcmp(serd_node_string_i(am), serd_node_string_i(bm), am->length); } return true; } static NodesEntry* new_entry(const size_t node_size) { NodesEntry* const entry = (NodesEntry*)serd_calloc_aligned( serd_node_align, sizeof(NodesEntryHead) + node_size); if (entry) { entry->head.refs = 1u; } return entry; } SerdNodes* serd_nodes_new(void) { SerdNodes* const nodes = (SerdNodes*)calloc(1, sizeof(SerdNodes)); nodes->hash = zix_hash_new(nodes_key, nodes_hash, nodes_equal); return nodes; } void serd_nodes_free(SerdNodes* nodes) { if (nodes) { for (ZixHashIter i = zix_hash_begin(nodes->hash); i != zix_hash_end(nodes->hash); i = zix_hash_next(nodes->hash, i)) { serd_free_aligned(zix_hash_get(nodes->hash, i)); } zix_hash_free(nodes->hash); free(nodes); } } size_t serd_nodes_size(const SerdNodes* nodes) { return zix_hash_size(nodes->hash); } const SerdNode* serd_nodes_intern(SerdNodes* nodes, const SerdNode* node) { assert(nodes); if (!node) { return NULL; } const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node); NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); if (existing) { assert(serd_node_equals(&existing->node, node)); ++existing->head.refs; return &existing->node; } const size_t node_size = serd_node_total_size(node); NodesEntry* const entry = new_entry(node_size); memcpy(&entry->node, node, node_size); // Insert the entry (blissfully ignoring a failed hash size increase) zix_hash_insert_at(nodes->hash, plan, entry); return &entry->node; } const SerdNode* serd_nodes_get(const SerdNodes* const nodes, const SerdNode* const node) { if (!node) { return NULL; } NodesEntry* const entry = zix_hash_find_record(nodes->hash, node); return entry ? &entry->node : NULL; } static const SerdNode* serd_nodes_manage_entry(SerdNodes* const nodes, NodesEntry* const entry) { const SerdNode* const node = &entry->node; const ZixHashInsertPlan plan = zix_hash_plan_insert(nodes->hash, node); NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); if (existing) { assert(serd_node_equals(&existing->node, node)); assert(nodes_hash(&existing->node) == plan.code); ++existing->head.refs; serd_free_aligned(entry); return &existing->node; } // Insert the entry (blissfully ignoring a failed hash size increase) zix_hash_insert_at(nodes->hash, plan, entry); assert(nodes_hash(&entry->node) == plan.code); return &entry->node; } static const SerdNode* serd_nodes_token(SerdNodes* const nodes, const SerdNodeType type, const SerdStringView string) { const NodeSpec key = token_spec(type, string); const ZixHashCode code = spec_hash(key); const ZixHashInsertPlan plan = zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &key); NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); if (existing) { assert(nodes_hash(&existing->node) == code); ++existing->head.refs; return &existing->node; } const size_t padded_length = serd_node_pad_length(string.len); const size_t node_size = sizeof(SerdNode) + padded_length; NodesEntry* const entry = new_entry(node_size); SerdNode* const node = entry ? &entry->node : NULL; if (node) { // Construct the token directly into the node in the new entry const SerdWriteResult r = serd_node_construct_token(node_size, &entry->node, type, string); assert(!r.status); // Never fails with sufficient space (void)r; // Insert the entry (blissfully ignoring a failed hash size increase) zix_hash_insert_at(nodes->hash, plan, entry); assert(nodes_hash(node) == code); } return node; } const SerdNode* serd_nodes_literal(SerdNodes* const nodes, const SerdStringView string, const SerdNodeFlags flags, const SerdStringView meta) { // Calculate a hash code for the literal without actually constructing it const NodeSpec spec = literal_spec(string, flags, meta); const ZixHashCode code = spec_hash(spec); // Find an insert position in the hash table const ZixHashInsertPlan plan = zix_hash_plan_insert_prehashed(nodes->hash, code, node_equals_spec, &spec); // If we found an existing node, bump its reference count and return it NodesEntry* const existing = zix_hash_record_at(nodes->hash, plan); if (existing) { assert(nodes_hash(&existing->node) == code); ++existing->head.refs; return &existing->node; } // We need to insert a new entry, so determine how much space the node needs SerdWriteResult r = serd_node_construct_literal(0, NULL, string, flags, meta); if (r.status != SERD_ERR_OVERFLOW) { return NULL; } // Allocate a new entry with enough space for the node NodesEntry* const entry = new_entry(r.count); SerdNode* const node = entry ? &entry->node : NULL; if (node) { // Construct the literal directly into the node in the new entry r = serd_node_construct_literal(r.count, node, string, flags, meta); assert(!r.status); (void)r; // Insert the entry (blissfully ignoring a failed hash size increase) zix_hash_insert_at(nodes->hash, plan, entry); assert(nodes_hash(node) == code); } return node; } const SerdNode* serd_nodes_string(SerdNodes* const nodes, const SerdStringView string) { return serd_nodes_token(nodes, SERD_LITERAL, string); } static const SerdNode* try_intern(SerdNodes* const nodes, const SerdWriteResult r, const SerdNode* const node) { return r.status ? NULL : serd_nodes_intern(nodes, node); } const SerdNode* serd_nodes_boolean(SerdNodes* const nodes, bool value) { StaticNode key = empty_static_node; return try_intern( nodes, serd_node_construct_boolean(sizeof(key), &key, value), &key.node); } const SerdNode* serd_nodes_decimal(SerdNodes* const nodes, const double value) { StaticNode key = empty_static_node; return try_intern( nodes, serd_node_construct_decimal(sizeof(key), &key, value), &key.node); } const SerdNode* serd_nodes_double(SerdNodes* const nodes, const double value) { StaticNode key = empty_static_node; return try_intern( nodes, serd_node_construct_double(sizeof(key), &key, value), &key.node); } const SerdNode* serd_nodes_float(SerdNodes* const nodes, const float value) { StaticNode key = empty_static_node; return try_intern( nodes, serd_node_construct_float(sizeof(key), &key, value), &key.node); } const SerdNode* serd_nodes_integer(SerdNodes* const nodes, const int64_t value, const SerdStringView datatype) { StaticNode key = empty_static_node; return try_intern( nodes, serd_node_construct_integer(sizeof(key), &key, value, datatype), &key.node); } const SerdNode* serd_nodes_base64(SerdNodes* const nodes, const void* const value, const size_t value_size, const SerdStringView datatype) { assert(nodes); assert(value); /* We're more or less forced to allocate and construct an entry here, since we need the base64 string to hash. Though it would be possible to calculate it in a streaming fashion, that would be a severe pessimisation in the presumably common case of raw data not being cached, since it would only need to be serialised again. Keeping a tentative entry buffer around when possible would probably be a better improvement if this ever becomes a performance issue. More ambitiously, adding support for binary nodes like a Real Database(TM) would largely avoid this problem. */ // Determine how much space the node needs SerdWriteResult r = serd_node_construct_base64(0, NULL, value_size, value, datatype); // Allocate a new entry to and construct the node into it NodesEntry* const entry = new_entry(r.count); r = serd_node_construct_base64( r.count, &entry->node, value_size, value, datatype); assert(!r.status); (void)r; return serd_nodes_manage_entry(nodes, entry); } const SerdNode* serd_nodes_uri(SerdNodes* const nodes, const SerdStringView string) { return serd_nodes_token(nodes, SERD_URI, string); } const SerdNode* serd_nodes_parsed_uri(SerdNodes* const nodes, const SerdURIView uri) { assert(nodes); /* Computing a hash for the serialised URI here would be quite complex, so, since this isn't expected to be a particularly hot case, we just allocate a new entry and try to do a normal insertion. */ // Determine how much space the node needs SerdWriteResult r = serd_node_construct_uri(0u, NULL, uri); assert(r.status == SERD_ERR_OVERFLOW); // Currently no other errors // Allocate a new entry to write the URI node into NodesEntry* const entry = new_entry(r.count); r = serd_node_construct_uri(r.count, &entry->node, uri); assert(!r.status); (void)r; return serd_nodes_manage_entry(nodes, entry); } const SerdNode* serd_nodes_blank(SerdNodes* const nodes, const SerdStringView string) { return serd_nodes_token(nodes, SERD_BLANK, string); } void serd_nodes_deref(SerdNodes* const nodes, const SerdNode* const node) { if (!node) { return; } ZixHashIter i = zix_hash_find(nodes->hash, node); if (i == zix_hash_end(nodes->hash)) { return; } NodesEntry* const entry = zix_hash_get(nodes->hash, i); if (--entry->head.refs == 0u) { NodesEntry* removed = NULL; zix_hash_erase(nodes->hash, i, &removed); assert(removed == entry); serd_free_aligned(removed); } }