From 4cfc8dc3521480672938a74813ca8bf19eaee964 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Sun, 28 Oct 2018 14:00:47 +0100 Subject: Add SerdNodes for storing a cache of interned nodes --- include/serd/serd.h | 139 +++++++++++++++++++++++++++++ meson.build | 5 +- src/node.c | 1 - src/node.h | 4 + src/nodes.c | 205 +++++++++++++++++++++++++++++++++++++++++++ test/meson.build | 1 + test/test_free_null.c | 1 + test/test_nodes.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 590 insertions(+), 2 deletions(-) create mode 100644 src/nodes.c create mode 100644 test/test_nodes.c diff --git a/include/serd/serd.h b/include/serd/serd.h index 49cfcee4..bf346a99 100644 --- a/include/serd/serd.h +++ b/include/serd/serd.h @@ -825,6 +825,145 @@ bool serd_node_equals(const SerdNode* SERD_NULLABLE a, const SerdNode* SERD_NULLABLE b); +/** + @} + @defgroup serd_nodes Nodes + @{ +*/ + +/// Hashing node container for interning and simplified memory management +typedef struct SerdNodesImpl SerdNodes; + +/// Create a new node set +SERD_API +SerdNodes* SERD_ALLOCATED +serd_nodes_new(void); + +/** + Free `nodes` and all nodes that are stored in it. + + Note that this invalidates any node pointers previously returned from + `nodes`. +*/ +SERD_API +void +serd_nodes_free(SerdNodes* SERD_NULLABLE nodes); + +/// Return the number of interned nodes +SERD_PURE_API +size_t +serd_nodes_size(const SerdNodes* SERD_NONNULL nodes); + +/** + Return the existing interned copy of a node if it exists. + + This either returns an equivalent to the given node, or null if this node + has not been interned. +*/ +SERD_API +const SerdNode* SERD_NULLABLE +serd_nodes_get(const SerdNodes* SERD_NONNULL nodes, + const SerdNode* SERD_NULLABLE node); + +/** + Intern `node`. + + Multiple calls with equivalent nodes will return the same pointer. + + @return A node that is different than, but equivalent to, `node`. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_intern(SerdNodes* SERD_NONNULL nodes, + const SerdNode* SERD_NULLABLE node); + +/** + Manage `node`. + + Like `serd_nodes_intern`, but takes ownership of `node`, freeing it and + returning a previously interned/managed equivalent node if necessary. + + @return A node that is equivalent to `node`. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_manage(SerdNodes* SERD_NONNULL nodes, SerdNode* SERD_NULLABLE node); + +/** + Make a string node. + + A new node will be added if an equivalent node is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_string(SerdNodes* SERD_NONNULL nodes, SerdStringView string); + +/** + Return a plain literal node with an optional language. + + If the language is empty, then this is equivalent to serd_nodes_string(). + + A new node will be added if an equivalent node is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_plain_literal(SerdNodes* SERD_NONNULL nodes, + SerdStringView string, + SerdStringView language); + +/** + Return a typed literal node with an datatype URI. + + If the datatype URI is empty, then this is equivalent to + serd_nodes_string(). + + A new node will be added if an equivalent node is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_typed_literal(SerdNodes* SERD_NONNULL nodes, + SerdStringView string, + SerdStringView datatype_uri); + +/** + Make a URI node. + + A new node will be added if an equivalent node is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_uri(SerdNodes* SERD_NONNULL nodes, SerdStringView string); + +/** + Make a CURIE node. + + A new node will be added if an equivalent node is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_curie(SerdNodes* SERD_NONNULL nodes, SerdStringView string); + +/** + Make a blank node. + + A new node will be added if an equivalent node is not already in the set. +*/ +SERD_API +const SerdNode* SERD_ALLOCATED +serd_nodes_blank(SerdNodes* SERD_NONNULL nodes, SerdStringView string); + +/** + Dereference `node`. + + Decrements the reference count of `node`, and frees the internally stored + equivalent node if this was the last reference. Does nothing if no node + equivalent to `node` is stored in `nodes`. +*/ +SERD_API +void +serd_nodes_deref(SerdNodes* SERD_NONNULL nodes, + const SerdNode* SERD_NONNULL node); + /** @} @defgroup serd_caret Caret diff --git a/meson.build b/meson.build index afcc39f1..4e3c3e31 100644 --- a/meson.build +++ b/meson.build @@ -91,6 +91,7 @@ sources = [ 'src/env.c', 'src/n3.c', 'src/node.c', + 'src/nodes.c', 'src/reader.c', 'src/sink.c', 'src/statement.c', @@ -101,6 +102,8 @@ sources = [ 'src/uri.c', 'src/world.c', 'src/writer.c', + 'src/zix/digest.c', + 'src/zix/hash.c', ] # System libraries @@ -151,7 +154,7 @@ libserd = build_target( library_name, sources, version: meson.project_version(), - include_directories: include_directories(['include']), + include_directories: include_directories('include', 'src'), c_args: c_warnings + platform_args + library_args, dependencies: [exess_dep, m_dep], gnu_symbol_visibility: 'hidden', diff --git a/src/node.c b/src/node.c index eafcec38..25c1a9df 100644 --- a/src/node.c +++ b/src/node.c @@ -117,7 +117,6 @@ serd_node_check_padding(const SerdNode* node) #endif } -static SERD_PURE_FUNC size_t serd_node_total_size(const SerdNode* const node) { diff --git a/src/node.h b/src/node.h index bb03f949..eed7cb04 100644 --- a/src/node.h +++ b/src/node.h @@ -50,6 +50,10 @@ void serd_node_set(SerdNode* SERD_NULLABLE* SERD_NONNULL dst, const SerdNode* SERD_NULLABLE src); +SERD_PURE_FUNC +size_t +serd_node_total_size(const SerdNode* SERD_NULLABLE node); + void serd_node_zero_pad(SerdNode* SERD_NONNULL node); diff --git a/src/nodes.c b/src/nodes.c new file mode 100644 index 00000000..e6476f81 --- /dev/null +++ b/src/nodes.c @@ -0,0 +1,205 @@ +/* + Copyright 2011-2020 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "node.h" + +#include "serd/serd.h" +#include "zix/common.h" +#include "zix/digest.h" +#include "zix/hash.h" + +#include +#include +#include +#include +#include + +typedef struct { + size_t refs; + SerdNode* node; +} NodesEntry; + +typedef struct { + size_t refs; + const SerdNode* node; +} NodesSearchKey; + +struct SerdNodesImpl { + ZixHash* hash; +}; + +static uint32_t +nodes_hash(const void* n) +{ + const SerdNode* node = ((const NodesEntry*)n)->node; + + return zix_digest_add(zix_digest_start(), node, serd_node_total_size(node)); +} + +static bool +nodes_equal(const void* a, const void* b) +{ + const SerdNode* a_node = ((const NodesEntry*)a)->node; + const SerdNode* b_node = ((const NodesEntry*)b)->node; + const size_t a_size = serd_node_total_size(a_node); + const size_t b_size = serd_node_total_size(b_node); + return ((a_node == b_node) || + (a_size == b_size && !memcmp(a_node, b_node, a_size))); +} + +static void +free_entry(void* value, void* user_data) +{ + (void)user_data; + + NodesEntry* entry = (NodesEntry*)value; + serd_node_free(entry->node); +} + +SerdNodes* +serd_nodes_new(void) +{ + SerdNodes* nodes = (SerdNodes*)calloc(1, sizeof(SerdNodes)); + nodes->hash = zix_hash_new(nodes_hash, nodes_equal, sizeof(NodesEntry)); + return nodes; +} + +void +serd_nodes_free(SerdNodes* nodes) +{ + if (nodes) { + zix_hash_foreach(nodes->hash, free_entry, nodes); + zix_hash_free(nodes->hash); + free(nodes); + } +} + +size_t +serd_nodes_size(const SerdNodes* nodes) +{ + return zix_hash_size(nodes->hash); +} + +const SerdNode* +serd_nodes_intern(SerdNodes* nodes, const SerdNode* node) +{ + if (!node) { + return NULL; + } + + NodesSearchKey key = {1, node}; + NodesEntry* inserted = NULL; + + const ZixStatus st = zix_hash_insert(nodes->hash, &key, (void**)&inserted); + if (st == ZIX_STATUS_SUCCESS) { + inserted->node = serd_node_copy(node); + } else if (st == ZIX_STATUS_EXISTS) { + assert(serd_node_equals(inserted->node, node)); + ++inserted->refs; + } + + return inserted ? inserted->node : NULL; +} + +const SerdNode* +serd_nodes_get(const SerdNodes* const nodes, const SerdNode* const node) +{ + if (!node) { + return NULL; + } + + const NodesSearchKey key = {1, node}; + NodesEntry* const entry = (NodesEntry*)zix_hash_find(nodes->hash, &key); + + return entry ? entry->node : NULL; +} + +const SerdNode* +serd_nodes_manage(SerdNodes* nodes, SerdNode* node) +{ + if (!node) { + return NULL; + } + + NodesSearchKey key = {1, node}; + NodesEntry* inserted = NULL; + + const ZixStatus st = zix_hash_insert(nodes->hash, &key, (void**)&inserted); + if (st == ZIX_STATUS_EXISTS) { + assert(serd_node_equals(inserted->node, node)); + serd_node_free(node); + ++inserted->refs; + } + + return inserted ? inserted->node : NULL; +} + +/* TODO: Make these methods faster by being smarter internally and avoiding + unnecessary allocatio in cases where the node is already in the set. */ + +const SerdNode* +serd_nodes_string(SerdNodes* const nodes, const SerdStringView string) +{ + return serd_nodes_manage(nodes, serd_new_string(string)); +} + +const SerdNode* +serd_nodes_plain_literal(SerdNodes* const nodes, + const SerdStringView string, + const SerdStringView language) +{ + return serd_nodes_manage(nodes, serd_new_plain_literal(string, language)); +} + +const SerdNode* +serd_nodes_typed_literal(SerdNodes* const nodes, + const SerdStringView string, + const SerdStringView datatype_uri) +{ + return serd_nodes_manage(nodes, serd_new_typed_literal(string, datatype_uri)); +} + +const SerdNode* +serd_nodes_uri(SerdNodes* const nodes, const SerdStringView string) +{ + return serd_nodes_manage(nodes, serd_new_uri(string)); +} + +const SerdNode* +serd_nodes_curie(SerdNodes* const nodes, const SerdStringView string) +{ + return serd_nodes_manage(nodes, serd_new_curie(string)); +} + +const SerdNode* +serd_nodes_blank(SerdNodes* const nodes, const SerdStringView string) +{ + return serd_nodes_manage(nodes, serd_new_blank(string)); +} + +void +serd_nodes_deref(SerdNodes* const nodes, const SerdNode* const node) +{ + const NodesSearchKey key = {1, node}; + NodesEntry* const entry = (NodesEntry*)zix_hash_find(nodes->hash, &key); + + if (entry && --entry->refs == 0) { + SerdNode* const intern_node = entry->node; + + zix_hash_remove(nodes->hash, entry); + serd_node_free(intern_node); + } +} diff --git a/test/meson.build b/test/meson.build index 57163f23..04c89731 100644 --- a/test/meson.build +++ b/test/meson.build @@ -9,6 +9,7 @@ unit_tests = [ 'env', 'free_null', 'node', + 'nodes', 'overflow', 'read_chunk', 'reader_writer', diff --git a/test/test_free_null.c b/test/test_free_null.c index 0968f3fa..195daeeb 100644 --- a/test/test_free_null.c +++ b/test/test_free_null.c @@ -30,6 +30,7 @@ main(void) serd_sink_free(NULL); serd_reader_free(NULL); serd_writer_free(NULL); + serd_nodes_free(NULL); serd_caret_free(NULL); return 0; diff --git a/test/test_nodes.c b/test/test_nodes.c new file mode 100644 index 00000000..9605f3b1 --- /dev/null +++ b/test/test_nodes.c @@ -0,0 +1,236 @@ +/* + Copyright 2018 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#undef NDEBUG + +#include "serd/serd.h" + +#include +#include +#include + +static void +test_intern(void) +{ + SerdNodes* nodes = serd_nodes_new(); + SerdNode* node = serd_new_string(SERD_STRING("node")); + + assert(serd_nodes_size(nodes) == 0u); + assert(!serd_nodes_intern(nodes, NULL)); + + const SerdNode* interned1 = serd_nodes_intern(nodes, node); + assert(serd_node_equals(node, interned1)); + assert(serd_nodes_size(nodes) == 1u); + + const SerdNode* interned2 = serd_nodes_intern(nodes, node); + assert(serd_node_equals(node, interned2)); + assert(interned1 == interned2); + assert(serd_nodes_size(nodes) == 1u); + + serd_node_free(node); + serd_nodes_free(nodes); +} + +static void +test_manage(void) +{ + SerdNodes* nodes = serd_nodes_new(); + SerdNode* node = serd_new_string(SERD_STRING("node")); + + assert(!serd_nodes_manage(nodes, NULL)); + + const SerdNode* managed1 = serd_nodes_manage(nodes, node); + assert(managed1 == node); + + SerdNode* equal = serd_new_string(SERD_STRING("node")); + const SerdNode* managed2 = serd_nodes_manage(nodes, equal); + assert(managed2 == node); + + serd_nodes_free(nodes); +} + +static void +test_string(void) +{ + static const SerdStringView string = SERD_STRING("string"); + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* const node = serd_nodes_string(nodes, string); + + assert(node); + assert(serd_node_type(node) == SERD_LITERAL); + assert(!serd_node_datatype(node)); + assert(!serd_node_language(node)); + assert(serd_node_length(node) == string.len); + assert(!strcmp(serd_node_string(node), string.buf)); + + serd_nodes_free(nodes); +} + +static void +test_plain_literal(void) +{ + static const SerdStringView string = SERD_STRING("string"); + static const SerdStringView language = SERD_STRING("en"); + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* const node = + serd_nodes_plain_literal(nodes, string, language); + + assert(node); + assert(serd_node_type(node) == SERD_LITERAL); + assert(!serd_node_datatype(node)); + + const SerdNode* const language_node = serd_node_language(node); + assert(language_node); + assert(serd_node_type(language_node) == SERD_LITERAL); + assert(serd_node_length(language_node) == language.len); + assert(!strcmp(serd_node_string(language_node), language.buf)); + assert(serd_node_length(node) == string.len); + assert(!strcmp(serd_node_string(node), string.buf)); + + serd_nodes_free(nodes); +} + +static void +test_typed_literal(void) +{ + static const SerdStringView string = SERD_STRING("string"); + static const SerdStringView datatype = SERD_STRING("http://example.org/Type"); + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* const node = + serd_nodes_typed_literal(nodes, string, datatype); + + assert(node); + assert(serd_node_type(node) == SERD_LITERAL); + assert(!serd_node_language(node)); + + const SerdNode* const datatype_node = serd_node_datatype(node); + assert(datatype_node); + + assert(serd_node_type(datatype_node) == SERD_URI); + assert(serd_node_length(datatype_node) == datatype.len); + assert(!strcmp(serd_node_string(datatype_node), datatype.buf)); + assert(serd_node_length(node) == string.len); + assert(!strcmp(serd_node_string(node), string.buf)); + + serd_nodes_free(nodes); +} + +static void +test_uri(void) +{ + static const SerdStringView string = SERD_STRING("http://example.org/"); + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* const node = serd_nodes_uri(nodes, string); + + assert(node); + assert(serd_node_type(node) == SERD_URI); + assert(!serd_node_datatype(node)); + assert(!serd_node_language(node)); + assert(serd_node_length(node) == string.len); + assert(!strcmp(serd_node_string(node), string.buf)); + + serd_nodes_free(nodes); +} + +static void +test_curie(void) +{ + static const SerdStringView string = SERD_STRING("eg:name"); + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* const node = serd_nodes_curie(nodes, string); + + assert(node); + assert(serd_node_type(node) == SERD_CURIE); + assert(!serd_node_datatype(node)); + assert(!serd_node_language(node)); + assert(serd_node_length(node) == string.len); + assert(!strcmp(serd_node_string(node), string.buf)); + + serd_nodes_free(nodes); +} + +static void +test_blank(void) +{ + static const SerdStringView string = SERD_STRING("b42"); + + SerdNodes* const nodes = serd_nodes_new(); + const SerdNode* const node = serd_nodes_blank(nodes, string); + + assert(node); + assert(serd_node_type(node) == SERD_BLANK); + assert(!serd_node_datatype(node)); + assert(!serd_node_language(node)); + assert(serd_node_length(node) == string.len); + assert(!strcmp(serd_node_string(node), string.buf)); + + serd_nodes_free(nodes); +} + +static void +test_deref(void) +{ + SerdNodes* nodes = serd_nodes_new(); + const SerdNode* managed = serd_nodes_string(nodes, SERD_STRING("node")); + + serd_nodes_deref(nodes, managed); + + SerdNode* node = serd_new_string(SERD_STRING("node")); + const SerdNode* interned = serd_nodes_intern(nodes, node); + + assert(interned != node); + + serd_node_free(node); + serd_nodes_free(nodes); +} + +static void +test_get(void) +{ + SerdNodes* nodes = serd_nodes_new(); + SerdNode* node = serd_new_string(SERD_STRING("node")); + + assert(!serd_nodes_get(nodes, NULL)); + assert(!serd_nodes_get(nodes, node)); + + const SerdNode* interned1 = serd_nodes_intern(nodes, node); + assert(serd_node_equals(node, interned1)); + assert(serd_nodes_get(nodes, node) == interned1); + + serd_node_free(node); + serd_nodes_free(nodes); +} + +int +main(void) +{ + test_intern(); + test_manage(); + test_string(); + test_plain_literal(); + test_typed_literal(); + test_uri(); + test_curie(); + test_blank(); + test_deref(); + test_get(); + return 0; +} -- cgit v1.2.1