summaryrefslogtreecommitdiffstats
path: root/src/sord.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sord.c')
-rw-r--r--src/sord.c1299
1 files changed, 1299 insertions, 0 deletions
diff --git a/src/sord.c b/src/sord.c
new file mode 100644
index 0000000..c23b2c1
--- /dev/null
+++ b/src/sord.c
@@ -0,0 +1,1299 @@
+/*
+ Copyright 2011-2016 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+// C99
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define ZIX_INLINE
+#include "zix/digest.c"
+#include "zix/hash.c"
+#include "zix/btree.c"
+
+#include "sord_config.h"
+#include "sord_internal.h"
+
+#define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__)
+
+#ifdef SORD_DEBUG_ITER
+# define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__)
+#else
+# define SORD_ITER_LOG(...)
+#endif
+#ifdef SORD_DEBUG_SEARCH
+# define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__)
+#else
+# define SORD_FIND_LOG(...)
+#endif
+#ifdef SORD_DEBUG_WRITE
+# define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__)
+#else
+# define SORD_WRITE_LOG(...)
+#endif
+
+#define NUM_ORDERS 12
+#define STATEMENT_LEN 3
+#define TUP_LEN (STATEMENT_LEN + 1)
+#define DEFAULT_ORDER SPO
+#define DEFAULT_GRAPH_ORDER GSPO
+
+#define TUP_FMT "(%s %s %s %s)"
+#define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*")
+#define TUP_FMT_ARGS(t) \
+ TUP_FMT_ELEM((t)[0]), \
+ TUP_FMT_ELEM((t)[1]), \
+ TUP_FMT_ELEM((t)[2]), \
+ TUP_FMT_ELEM((t)[3])
+
+#define TUP_S 0
+#define TUP_P 1
+#define TUP_O 2
+#define TUP_G 3
+
+/** Triple ordering */
+typedef enum {
+ SPO, ///< Subject, Predicate, Object
+ SOP, ///< Subject, Object, Predicate
+ OPS, ///< Object, Predicate, Subject
+ OSP, ///< Object, Subject, Predicate
+ PSO, ///< Predicate, Subject, Object
+ POS, ///< Predicate, Object, Subject
+ GSPO, ///< Graph, Subject, Predicate, Object
+ GSOP, ///< Graph, Subject, Object, Predicate
+ GOPS, ///< Graph, Object, Predicate, Subject
+ GOSP, ///< Graph, Object, Subject, Predicate
+ GPSO, ///< Graph, Predicate, Subject, Object
+ GPOS ///< Graph, Predicate, Object, Subject
+} SordOrder;
+
+#ifdef SORD_DEBUG_SEARCH
+/** String name of each ordering (array indexed by SordOrder) */
+static const char* const order_names[NUM_ORDERS] = {
+ "spo", "sop", "ops", "osp", "pso", "pos",
+ "gspo", "gsop", "gops", "gosp", "gpso", "gpos"
+};
+#endif
+
+/**
+ Quads of indices for each order, from most to least significant
+ (array indexed by SordOrder)
+*/
+static const int orderings[NUM_ORDERS][TUP_LEN] = {
+ { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP
+ { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP
+ { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS
+ { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP
+ { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP
+ { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS
+};
+
+/** World */
+struct SordWorldImpl {
+ ZixHash* nodes;
+ SerdErrorSink error_sink;
+ void* error_handle;
+};
+
+/** Store */
+struct SordModelImpl {
+ SordWorld* world;
+
+ /** Index for each possible triple ordering (may or may not exist).
+ * Each index is a tree of SordQuad with the appropriate ordering.
+ */
+ ZixBTree* indices[NUM_ORDERS];
+
+ size_t n_quads;
+ size_t n_iters;
+};
+
+/** Mode for searching or iteration */
+typedef enum {
+ ALL, ///< Iterate over entire store
+ SINGLE, ///< Iteration over a single element (exact search)
+ RANGE, ///< Iterate over range with equal prefix
+ FILTER_RANGE, ///< Iterate over range with equal prefix, filtering
+ FILTER_ALL ///< Iterate to end of store, filtering
+} SearchMode;
+
+/** Iterator over some range of a store */
+struct SordIterImpl {
+ const SordModel* sord; ///< Model being iterated over
+ ZixBTreeIter* cur; ///< Current DB cursor
+ SordQuad pat; ///< Pattern (in ordering order)
+ SordOrder order; ///< Store order (which index)
+ SearchMode mode; ///< Iteration mode
+ int n_prefix; ///< Prefix for RANGE and FILTER_RANGE
+ bool end; ///< True iff reached end
+ bool skip_graphs; ///< Iteration should ignore graphs
+};
+
+static uint32_t
+sord_node_hash(const void* n)
+{
+ const SordNode* node = (const SordNode*)n;
+ uint32_t hash = zix_digest_start();
+ hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes);
+ hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type));
+ if (node->node.type == SERD_LITERAL) {
+ hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit));
+ }
+ return hash;
+}
+
+static bool
+sord_node_hash_equal(const void* a, const void* b)
+{
+ const SordNode* a_node = (const SordNode*)a;
+ const SordNode* b_node = (const SordNode*)b;
+ return (a_node == b_node)
+ || ((a_node->node.type == b_node->node.type) &&
+ (a_node->node.type != SERD_LITERAL ||
+ (a_node->meta.lit.datatype == b_node->meta.lit.datatype &&
+ !strncmp(a_node->meta.lit.lang,
+ b_node->meta.lit.lang,
+ sizeof(a_node->meta.lit.lang)))) &&
+ (serd_node_equals(&a_node->node, &b_node->node)));
+}
+
+static void
+error(SordWorld* world, SerdStatus st, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ const SerdError e = { st, NULL, 0, 0, fmt, &args };
+ if (world->error_sink) {
+ world->error_sink(world->error_handle, &e);
+ } else {
+ fprintf(stderr, "error: ");
+ vfprintf(stderr, fmt, args);
+ }
+ va_end(args);
+}
+
+SordWorld*
+sord_world_new(void)
+{
+ SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld));
+ world->error_sink = NULL;
+ world->error_handle = NULL;
+
+ world->nodes = zix_hash_new(
+ sord_node_hash, sord_node_hash_equal, sizeof(SordNode));
+
+ return world;
+}
+
+static void
+free_node_entry(void* value, void* user_data)
+{
+ SordNode* node = (SordNode*)value;
+ if (node->node.type == SERD_LITERAL) {
+ sord_node_free((SordWorld*)user_data, node->meta.lit.datatype);
+ }
+ free((uint8_t*)node->node.buf);
+}
+
+void
+sord_world_free(SordWorld* world)
+{
+ zix_hash_foreach(world->nodes, free_node_entry, world);
+ zix_hash_free(world->nodes);
+ free(world);
+}
+
+void
+sord_world_set_error_sink(SordWorld* world,
+ SerdErrorSink error_sink,
+ void* handle)
+{
+ world->error_sink = error_sink;
+ world->error_handle = handle;
+}
+
+/** Compare nodes, considering NULL a wildcard match. */
+static inline int
+sord_node_compare(const SordNode* a, const SordNode* b)
+{
+ if (a == b || !a || !b) {
+ return 0; // Exact or wildcard match
+ } else if (a->node.type != b->node.type) {
+ return a->node.type - b->node.type;
+ }
+
+ int cmp = 0;
+ switch (a->node.type) {
+ case SERD_URI:
+ case SERD_BLANK:
+ return strcmp((const char*)a->node.buf, (const char*)b->node.buf);
+ case SERD_LITERAL:
+ cmp = strcmp((const char*)sord_node_get_string(a),
+ (const char*)sord_node_get_string(b));
+ if (cmp == 0) {
+ // Note: Can't use sord_node_compare here since it does wildcards
+ if (!a->meta.lit.datatype || !b->meta.lit.datatype) {
+ cmp = a->meta.lit.datatype - b->meta.lit.datatype;
+ } else {
+ cmp = strcmp((const char*)a->meta.lit.datatype->node.buf,
+ (const char*)b->meta.lit.datatype->node.buf);
+ }
+ }
+ if (cmp == 0) {
+ cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang);
+ }
+ default:
+ break;
+ }
+ return cmp;
+}
+
+bool
+sord_node_equals(const SordNode* a, const SordNode* b)
+{
+ return a == b; // Nodes are interned
+}
+
+/** Return true iff IDs are equivalent, or one is a wildcard */
+static inline bool
+sord_id_match(const SordNode* a, const SordNode* b)
+{
+ return !a || !b || (a == b);
+}
+
+static inline bool
+sord_quad_match_inline(const SordQuad x, const SordQuad y)
+{
+ return sord_id_match(x[0], y[0])
+ && sord_id_match(x[1], y[1])
+ && sord_id_match(x[2], y[2])
+ && sord_id_match(x[3], y[3]);
+}
+
+bool
+sord_quad_match(const SordQuad x, const SordQuad y)
+{
+ return sord_quad_match_inline(x, y);
+}
+
+/**
+ Compare two quad IDs lexicographically.
+ NULL IDs (equal to 0) are treated as wildcards, always less than every
+ other possible ID, except itself.
+*/
+static int
+sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data)
+{
+ const int* const ordering = (const int*)user_data;
+ const SordNode*const*const x = (const SordNode*const*)x_ptr;
+ const SordNode*const*const y = (const SordNode*const*)y_ptr;
+
+ for (int i = 0; i < TUP_LEN; ++i) {
+ const int idx = ordering[i];
+ const int cmp = sord_node_compare(x[idx], y[idx]);
+ if (cmp) {
+ return cmp;
+ }
+ }
+
+ return 0;
+}
+
+static inline bool
+sord_iter_forward(SordIter* iter)
+{
+ if (!iter->skip_graphs) {
+ zix_btree_iter_increment(iter->cur);
+ return zix_btree_iter_is_end(iter->cur);
+ }
+
+ SordNode** key = (SordNode**)zix_btree_get(iter->cur);
+ const SordQuad initial = { key[0], key[1], key[2], key[3] };
+ zix_btree_iter_increment(iter->cur);
+ while (!zix_btree_iter_is_end(iter->cur)) {
+ key = (SordNode**)zix_btree_get(iter->cur);
+ for (int i = 0; i < 3; ++i) {
+ if (key[i] != initial[i]) {
+ return false;
+ }
+ }
+
+ zix_btree_iter_increment(iter->cur);
+ }
+
+ return true;
+}
+
+/**
+ Seek forward as necessary until `iter` points at a match.
+ @return true iff iterator reached end of valid range.
+*/
+static inline bool
+sord_iter_seek_match(SordIter* iter)
+{
+ for (iter->end = true;
+ !zix_btree_iter_is_end(iter->cur);
+ sord_iter_forward(iter)) {
+ const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur);
+ if (sord_quad_match_inline(key, iter->pat)) {
+ return (iter->end = false);
+ }
+ }
+ return true;
+}
+
+/**
+ Seek forward as necessary until `iter` points at a match, or the prefix
+ no longer matches iter->pat.
+ @return true iff iterator reached end of valid range.
+*/
+static inline bool
+sord_iter_seek_match_range(SordIter* iter)
+{
+ assert(!iter->end);
+
+ do {
+ const SordNode** key = (const SordNode**)zix_btree_get(iter->cur);
+
+ if (sord_quad_match_inline(key, iter->pat)) {
+ return false; // Found match
+ }
+
+ for (int i = 0; i < iter->n_prefix; ++i) {
+ const int idx = orderings[iter->order][i];
+ if (!sord_id_match(key[idx], iter->pat[idx])) {
+ iter->end = true; // Reached end of valid range
+ return true;
+ }
+ }
+ } while (!sord_iter_forward(iter));
+
+ return (iter->end = true); // Reached end
+}
+
+static SordIter*
+sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat,
+ SordOrder order, SearchMode mode, int n_prefix)
+{
+ SordIter* iter = (SordIter*)malloc(sizeof(SordIter));
+ iter->sord = sord;
+ iter->cur = cur;
+ iter->order = order;
+ iter->mode = mode;
+ iter->n_prefix = n_prefix;
+ iter->end = false;
+ iter->skip_graphs = order < GSPO;
+ for (int i = 0; i < TUP_LEN; ++i) {
+ iter->pat[i] = pat[i];
+ }
+
+ switch (iter->mode) {
+ case ALL:
+ case SINGLE:
+ case RANGE:
+ assert(
+ sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur),
+ iter->pat));
+ break;
+ case FILTER_RANGE:
+ sord_iter_seek_match_range(iter);
+ break;
+ case FILTER_ALL:
+ sord_iter_seek_match(iter);
+ break;
+ }
+
+#ifdef SORD_DEBUG_ITER
+ SordQuad value;
+ sord_iter_get(iter, value);
+ SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n",
+ (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value),
+ iter->end, iter->skip_graphs);
+#endif
+
+ ++((SordModel*)sord)->n_iters;
+ return iter;
+}
+
+const SordModel*
+sord_iter_get_model(SordIter* iter)
+{
+ return iter->sord;
+}
+
+void
+sord_iter_get(const SordIter* iter, SordQuad tup)
+{
+ SordNode** key = (SordNode**)zix_btree_get(iter->cur);
+ for (int i = 0; i < TUP_LEN; ++i) {
+ tup[i] = key[i];
+ }
+}
+
+const SordNode*
+sord_iter_get_node(const SordIter* iter, SordQuadIndex index)
+{
+ return (!sord_iter_end(iter)
+ ? ((SordNode**)zix_btree_get(iter->cur))[index]
+ : NULL);
+}
+
+static bool
+sord_iter_scan_next(SordIter* iter)
+{
+ if (iter->end) {
+ return true;
+ }
+
+ const SordNode** key;
+ if (!iter->end) {
+ switch (iter->mode) {
+ case ALL:
+ // At the end if the cursor is (assigned above)
+ break;
+ case SINGLE:
+ iter->end = true;
+ SORD_ITER_LOG("%p reached single end\n", (void*)iter);
+ break;
+ case RANGE:
+ SORD_ITER_LOG("%p range next\n", (void*)iter);
+ // At the end if the MSNs no longer match
+ key = (const SordNode**)zix_btree_get(iter->cur);
+ assert(key);
+ for (int i = 0; i < iter->n_prefix; ++i) {
+ const int idx = orderings[iter->order][i];
+ if (!sord_id_match(key[idx], iter->pat[idx])) {
+ iter->end = true;
+ SORD_ITER_LOG("%p reached non-match end\n", (void*)iter);
+ break;
+ }
+ }
+ break;
+ case FILTER_RANGE:
+ // Seek forward to next match, stopping if prefix changes
+ sord_iter_seek_match_range(iter);
+ break;
+ case FILTER_ALL:
+ // Seek forward to next match
+ sord_iter_seek_match(iter);
+ break;
+ }
+ } else {
+ SORD_ITER_LOG("%p reached index end\n", (void*)iter);
+ }
+
+ if (iter->end) {
+ SORD_ITER_LOG("%p Reached end\n", (void*)iter);
+ return true;
+ } else {
+#ifdef SORD_DEBUG_ITER
+ SordQuad tup;
+ sord_iter_get(iter, tup);
+ SORD_ITER_LOG("%p Increment to " TUP_FMT "\n",
+ (void*)iter, TUP_FMT_ARGS(tup));
+#endif
+ return false;
+ }
+}
+
+bool
+sord_iter_next(SordIter* iter)
+{
+ if (iter->end) {
+ return true;
+ }
+
+ iter->end = sord_iter_forward(iter);
+ return sord_iter_scan_next(iter);
+}
+
+bool
+sord_iter_end(const SordIter* iter)
+{
+ return !iter || iter->end;
+}
+
+void
+sord_iter_free(SordIter* iter)
+{
+ SORD_ITER_LOG("%p Free\n", (void*)iter);
+ if (iter) {
+ --((SordModel*)iter->sord)->n_iters;
+ zix_btree_iter_free(iter->cur);
+ free(iter);
+ }
+}
+
+/**
+ Return true iff `sord` has an index for `order`.
+ If `graphs` is true, `order` will be modified to be the
+ corresponding order with a G prepended (so G will be the MSN).
+*/
+static inline bool
+sord_has_index(SordModel* model, SordOrder* order, int* n_prefix, bool graphs)
+{
+ if (graphs) {
+ *order = (SordOrder)(*order + GSPO);
+ *n_prefix += 1;
+ }
+
+ return model->indices[*order];
+}
+
+/**
+ Return the best available index for a pattern.
+ @param pat Pattern in standard (S P O G) order
+ @param mode Set to the (best) iteration mode for iterating over results
+ @param n_prefix Set to the length of the range prefix
+ (for `mode` == RANGE and `mode` == FILTER_RANGE)
+*/
+static inline SordOrder
+sord_best_index(SordModel* sord,
+ const SordQuad pat,
+ SearchMode* mode,
+ int* n_prefix)
+{
+ const bool graph_search = (pat[TUP_G] != 0);
+
+ const unsigned sig
+ = (pat[0] ? 1 : 0) * 0x100
+ + (pat[1] ? 1 : 0) * 0x010
+ + (pat[2] ? 1 : 0) * 0x001;
+
+ SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 };
+
+#define PAT_CASE(sig, m, g0, g1, np) \
+ case sig: \
+ *mode = m; \
+ good[0] = g0; \
+ good[1] = g1; \
+ *n_prefix = np; \
+ break
+
+ // Good orderings that don't require filtering
+ *mode = RANGE;
+ *n_prefix = 0;
+ switch (sig) {
+ case 0x000:
+ assert(graph_search);
+ *mode = RANGE;
+ *n_prefix = 1;
+ return DEFAULT_GRAPH_ORDER;
+ case 0x111:
+ *mode = SINGLE;
+ return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
+
+ PAT_CASE(0x001, RANGE, OPS, OSP, 1);
+ PAT_CASE(0x010, RANGE, POS, PSO, 1);
+ PAT_CASE(0x011, RANGE, OPS, POS, 2);
+ PAT_CASE(0x100, RANGE, SPO, SOP, 1);
+ PAT_CASE(0x101, RANGE, SOP, OSP, 2);
+ PAT_CASE(0x110, RANGE, SPO, PSO, 2);
+ }
+
+ if (*mode == RANGE) {
+ if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
+ return good[0];
+ } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
+ return good[1];
+ }
+ }
+
+ // Not so good orderings that require filtering, but can
+ // still be constrained to a range
+ switch (sig) {
+ PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1);
+ PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1);
+ // SPO is always present, so 0x110 is never reached here
+ default: break;
+ }
+
+ if (*mode == FILTER_RANGE) {
+ if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
+ return good[0];
+ } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
+ return good[1];
+ }
+ }
+
+ if (graph_search) {
+ *mode = FILTER_RANGE;
+ *n_prefix = 1;
+ return DEFAULT_GRAPH_ORDER;
+ } else {
+ *mode = FILTER_ALL;
+ return DEFAULT_ORDER;
+ }
+}
+
+SordModel*
+sord_new(SordWorld* world, unsigned indices, bool graphs)
+{
+ SordModel* model = (SordModel*)malloc(sizeof(struct SordModelImpl));
+ model->world = world;
+ model->n_quads = 0;
+ model->n_iters = 0;
+
+ for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) {
+ const int* const ordering = orderings[i];
+ const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)];
+
+ if (indices & (1 << i)) {
+ model->indices[i] = zix_btree_new(
+ sord_quad_compare, (void*)ordering, NULL);
+ if (graphs) {
+ model->indices[i + (NUM_ORDERS / 2)] = zix_btree_new(
+ sord_quad_compare, (void*)g_ordering, NULL);
+ } else {
+ model->indices[i + (NUM_ORDERS / 2)] = NULL;
+ }
+ } else {
+ model->indices[i] = NULL;
+ model->indices[i + (NUM_ORDERS / 2)] = NULL;
+ }
+ }
+
+ if (!model->indices[DEFAULT_ORDER]) {
+ model->indices[DEFAULT_ORDER] = zix_btree_new(
+ sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL);
+ }
+ if (graphs && !model->indices[DEFAULT_GRAPH_ORDER]) {
+ model->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new(
+ sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL);
+ }
+
+ return model;
+}
+
+static void
+sord_node_free_internal(SordWorld* world, SordNode* node)
+{
+ assert(node->refs == 0);
+
+ // Cache pointer to buffer to free after node removal and destruction
+ const uint8_t* const buf = node->node.buf;
+
+ // Remove node from hash (which frees the node)
+ if (zix_hash_remove(world->nodes, node)) {
+ error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n");
+ }
+
+ // Free buffer
+ free((uint8_t*)buf);
+}
+
+static void
+sord_add_quad_ref(SordModel* model, const SordNode* node, SordQuadIndex i)
+{
+ if (node) {
+ assert(node->refs > 0);
+ ++((SordNode*)node)->refs;
+ if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
+ ++((SordNode*)node)->meta.res.refs_as_obj;
+ }
+ }
+}
+
+static void
+sord_drop_quad_ref(SordModel* model, const SordNode* node, SordQuadIndex i)
+{
+ if (!node) {
+ return;
+ }
+
+ assert(node->refs > 0);
+ if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
+ assert(node->meta.res.refs_as_obj > 0);
+ --((SordNode*)node)->meta.res.refs_as_obj;
+ }
+ if (--((SordNode*)node)->refs == 0) {
+ sord_node_free_internal(sord_get_world(model), (SordNode*)node);
+ }
+}
+
+void
+sord_free(SordModel* model)
+{
+ if (!model) {
+ return;
+ }
+
+ // Free nodes
+ SordQuad tup;
+ SordIter* i = sord_begin(model);
+ for (; !sord_iter_end(i); sord_iter_next(i)) {
+ sord_iter_get(i, tup);
+ for (int t = 0; t < TUP_LEN; ++t) {
+ sord_drop_quad_ref(model, tup[t], (SordQuadIndex)t);
+ }
+ }
+ sord_iter_free(i);
+
+ // Free quads
+ ZixBTreeIter* t = zix_btree_begin(model->indices[DEFAULT_ORDER]);
+ for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) {
+ free(zix_btree_get(t));
+ }
+ zix_btree_iter_free(t);
+
+ // Free indices
+ for (unsigned o = 0; o < NUM_ORDERS; ++o) {
+ if (model->indices[o]) {
+ zix_btree_free(model->indices[o]);
+ }
+ }
+
+ free(model);
+}
+
+SordWorld*
+sord_get_world(SordModel* model)
+{
+ return model->world;
+}
+
+size_t
+sord_num_quads(const SordModel* model)
+{
+ return model->n_quads;
+}
+
+size_t
+sord_num_nodes(const SordWorld* world)
+{
+ return zix_hash_size(world->nodes);
+}
+
+SordIter*
+sord_begin(const SordModel* model)
+{
+ if (sord_num_quads(model) == 0) {
+ return NULL;
+ } else {
+ ZixBTreeIter* cur = zix_btree_begin(model->indices[DEFAULT_ORDER]);
+ SordQuad pat = { 0, 0, 0, 0 };
+ return sord_iter_new(model, cur, pat, DEFAULT_ORDER, ALL, 0);
+ }
+}
+
+SordIter*
+sord_find(SordModel* model, const SordQuad pat)
+{
+ if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) {
+ return sord_begin(model);
+ }
+
+ SearchMode mode;
+ int n_prefix;
+ const SordOrder index_order = sord_best_index(model, pat, &mode, &n_prefix);
+
+ SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n",
+ TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix);
+
+ if (pat[0] && pat[1] && pat[2] && pat[3]) {
+ mode = SINGLE; // No duplicate quads (Sord is a set)
+ }
+
+ ZixBTree* const db = model->indices[index_order];
+ ZixBTreeIter* cur = NULL;
+ zix_btree_lower_bound(db, pat, &cur);
+ if (zix_btree_iter_is_end(cur)) {
+ SORD_FIND_LOG("No match found\n");
+ zix_btree_iter_free(cur);
+ return NULL;
+ }
+ const SordNode** const key = (const SordNode**)zix_btree_get(cur);
+ if (!key || ( (mode == RANGE || mode == SINGLE)
+ && !sord_quad_match_inline(pat, key) )) {
+ SORD_FIND_LOG("No match found\n");
+ zix_btree_iter_free(cur);
+ return NULL;
+ }
+
+ return sord_iter_new(model, cur, pat, index_order, mode, n_prefix);
+}
+
+SordIter*
+sord_search(SordModel* model,
+ const SordNode* s,
+ const SordNode* p,
+ const SordNode* o,
+ const SordNode* g)
+{
+ SordQuad pat = { s, p, o, g };
+ return sord_find(model, pat);
+}
+
+SordNode*
+sord_get(SordModel* model,
+ const SordNode* s,
+ const SordNode* p,
+ const SordNode* o,
+ const SordNode* g)
+{
+ if ((bool)s + (bool)p + (bool)o != 2) {
+ return NULL;
+ }
+
+ SordIter* i = sord_search(model, s, p, o, g);
+ SordNode* ret = NULL;
+ if (!s) {
+ ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT));
+ } else if (!p) {
+ ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE));
+ } else if (!o) {
+ ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT));
+ }
+
+ sord_iter_free(i);
+ return ret;
+}
+
+bool
+sord_ask(SordModel* model,
+ const SordNode* s,
+ const SordNode* p,
+ const SordNode* o,
+ const SordNode* g)
+{
+ SordQuad pat = { s, p, o, g };
+ return sord_contains(model, pat);
+}
+
+uint64_t
+sord_count(SordModel* model,
+ const SordNode* s,
+ const SordNode* p,
+ const SordNode* o,
+ const SordNode* g)
+{
+ SordIter* i = sord_search(model, s, p, o, g);
+ uint64_t n = 0;
+ for (; !sord_iter_end(i); sord_iter_next(i)) {
+ ++n;
+ }
+ sord_iter_free(i);
+ return n;
+}
+
+bool
+sord_contains(SordModel* model, const SordQuad pat)
+{
+ SordIter* iter = sord_find(model, pat);
+ bool ret = (iter != NULL);
+ sord_iter_free(iter);
+ return ret;
+}
+
+static uint8_t*
+sord_strndup(const uint8_t* str, size_t len)
+{
+ uint8_t* dup = (uint8_t*)malloc(len + 1);
+ memcpy(dup, str, len + 1);
+ return dup;
+}
+
+SordNodeType
+sord_node_get_type(const SordNode* node)
+{
+ switch (node->node.type) {
+ case SERD_URI:
+ return SORD_URI;
+ case SERD_BLANK:
+ return SORD_BLANK;
+ default:
+ return SORD_LITERAL;
+ }
+ SORD_UNREACHABLE();
+}
+
+const uint8_t*
+sord_node_get_string(const SordNode* node)
+{
+ return node->node.buf;
+}
+
+const uint8_t*
+sord_node_get_string_counted(const SordNode* node, size_t* bytes)
+{
+ *bytes = node->node.n_bytes;
+ return node->node.buf;
+}
+
+const uint8_t*
+sord_node_get_string_measured(const SordNode* node,
+ size_t* bytes,
+ size_t* chars)
+{
+ *bytes = node->node.n_bytes;
+ *chars = node->node.n_chars;
+ return node->node.buf;
+}
+
+const char*
+sord_node_get_language(const SordNode* node)
+{
+ if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) {
+ return NULL;
+ }
+ return node->meta.lit.lang;
+}
+
+SordNode*
+sord_node_get_datatype(const SordNode* node)
+{
+ return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL;
+}
+
+SerdNodeFlags
+sord_node_get_flags(const SordNode* node)
+{
+ return node->node.flags;
+}
+
+bool
+sord_node_is_inline_object(const SordNode* node)
+{
+ return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1);
+}
+
+static SordNode*
+sord_insert_node(SordWorld* world, const SordNode* key, bool copy)
+{
+ SordNode* node = NULL;
+ ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node);
+ switch (st) {
+ case ZIX_STATUS_EXISTS:
+ ++node->refs;
+ break;
+ case ZIX_STATUS_SUCCESS:
+ assert(node->refs == 1);
+ if (copy) {
+ node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes);
+ }
+ if (node->node.type == SERD_LITERAL) {
+ node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype);
+ }
+ return node;
+ default:
+ error(world, SERD_ERR_INTERNAL,
+ "error inserting node `%s'\n", key->node.buf);
+ }
+
+ if (!copy) {
+ // Free the buffer we would have copied if a new node was created
+ free((uint8_t*)key->node.buf);
+ }
+
+ return node;
+}
+
+static SordNode*
+sord_new_uri_counted(SordWorld* world, const uint8_t* str,
+ size_t n_bytes, size_t n_chars, bool copy)
+{
+ if (!serd_uri_string_has_scheme(str)) {
+ error(world, SERD_ERR_BAD_ARG,
+ "attempt to map invalid URI `%s'\n", str);
+ return NULL; // Can't intern relative URIs
+ }
+
+ const SordNode key = {
+ { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } }
+ };
+
+ return sord_insert_node(world, &key, copy);
+}
+
+SordNode*
+sord_new_uri(SordWorld* world, const uint8_t* uri)
+{
+ const SerdNode node = serd_node_from_string(SERD_URI, uri);
+ return sord_new_uri_counted(world, uri, node.n_bytes, node.n_chars, true);
+}
+
+SordNode*
+sord_new_relative_uri(SordWorld* world,
+ const uint8_t* uri,
+ const uint8_t* base_uri)
+{
+ if (serd_uri_string_has_scheme(uri)) {
+ return sord_new_uri(world, uri);
+ }
+ SerdURI buri = SERD_URI_NULL;
+ SerdNode base = serd_node_new_uri_from_string(base_uri, NULL, &buri);
+ SerdNode node = serd_node_new_uri_from_string(uri, &buri, NULL);
+
+ SordNode* ret = sord_new_uri_counted(
+ world, node.buf, node.n_bytes, node.n_chars, false);
+
+ serd_node_free(&base);
+ return ret;
+}
+
+static SordNode*
+sord_new_blank_counted(SordWorld* world, const uint8_t* str,
+ size_t n_bytes, size_t n_chars)
+{
+ const SordNode key = {
+ { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } }
+ };
+
+ return sord_insert_node(world, &key, true);
+}
+
+SordNode*
+sord_new_blank(SordWorld* world, const uint8_t* str)
+{
+ const SerdNode node = serd_node_from_string(SERD_URI, str);
+ return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars);
+}
+
+static SordNode*
+sord_new_literal_counted(SordWorld* world,
+ SordNode* datatype,
+ const uint8_t* str,
+ size_t n_bytes,
+ size_t n_chars,
+ SerdNodeFlags flags,
+ const char* lang)
+{
+ SordNode key = {
+ { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } }
+ };
+ key.meta.lit.datatype = sord_node_copy(datatype);
+ memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang));
+ if (lang) {
+ strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang));
+ }
+
+ return sord_insert_node(world, &key, true);
+}
+
+SordNode*
+sord_new_literal(SordWorld* world, SordNode* datatype,
+ const uint8_t* str, const char* lang)
+{
+ SerdNodeFlags flags = 0;
+ size_t n_bytes = 0;
+ size_t n_chars = serd_strlen(str, &n_bytes, &flags);
+ return sord_new_literal_counted(world, datatype,
+ str, n_bytes, n_chars, flags,
+ lang);
+}
+
+SordNode*
+sord_node_from_serd_node(SordWorld* world,
+ SerdEnv* env,
+ const SerdNode* node,
+ const SerdNode* datatype,
+ const SerdNode* lang)
+{
+ if (!node) {
+ return NULL;
+ }
+
+ SordNode* datatype_node = NULL;
+ SordNode* ret = NULL;
+ switch (node->type) {
+ case SERD_NOTHING:
+ return NULL;
+ case SERD_LITERAL:
+ datatype_node = sord_node_from_serd_node(
+ world, env, datatype, NULL, NULL),
+ ret = sord_new_literal_counted(
+ world,
+ datatype_node,
+ node->buf,
+ node->n_bytes,
+ node->n_chars,
+ node->flags,
+ lang ? (const char*)lang->buf : NULL);
+ sord_node_free(world, datatype_node);
+ return ret;
+ case SERD_URI:
+ if (serd_uri_string_has_scheme(node->buf)) {
+ return sord_new_uri_counted(
+ world, node->buf, node->n_bytes, node->n_chars, true);
+ } else {
+ SerdURI base_uri;
+ serd_env_get_base_uri(env, &base_uri);
+ SerdURI abs_uri;
+ SerdNode abs_uri_node = serd_node_new_uri_from_node(
+ node, &base_uri, &abs_uri);
+ ret = sord_new_uri_counted(world,
+ abs_uri_node.buf,
+ abs_uri_node.n_bytes,
+ abs_uri_node.n_chars,
+ true);
+ serd_node_free(&abs_uri_node);
+ return ret;
+ }
+ case SERD_CURIE: {
+ SerdChunk uri_prefix;
+ SerdChunk uri_suffix;
+ if (serd_env_expand(env, node, &uri_prefix, &uri_suffix)) {
+ error(world, SERD_ERR_BAD_CURIE,
+ "failed to expand CURIE `%s'\n", node->buf);
+ return NULL;
+ }
+ const size_t uri_len = uri_prefix.len + uri_suffix.len;
+ uint8_t* buf = (uint8_t*)malloc(uri_len + 1);
+ memcpy(buf, uri_prefix.buf, uri_prefix.len);
+ memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len);
+ buf[uri_len] = '\0';
+ ret = sord_new_uri_counted(
+ world, buf, uri_len, serd_strlen(buf, NULL, NULL), false);
+ return ret;
+ }
+ case SERD_BLANK:
+ return sord_new_blank_counted(
+ world, node->buf, node->n_bytes, node->n_chars);
+ }
+ return NULL;
+}
+
+const SerdNode*
+sord_node_to_serd_node(const SordNode* node)
+{
+ return node ? &node->node : &SERD_NODE_NULL;
+}
+
+void
+sord_node_free(SordWorld* world, SordNode* node)
+{
+ if (!node) {
+ return;
+ } else if (node->refs == 0) {
+ error(world, SERD_ERR_BAD_ARG, "attempt to free garbage node\n");
+ } else if (--node->refs == 0) {
+ sord_node_free_internal(world, node);
+ }
+}
+
+SordNode*
+sord_node_copy(const SordNode* node)
+{
+ SordNode* copy = (SordNode*)node;
+ if (copy) {
+ ++copy->refs;
+ }
+ return copy;
+}
+
+static inline bool
+sord_add_to_index(SordModel* model, const SordNode** tup, SordOrder order)
+{
+ return !zix_btree_insert(model->indices[order], tup);
+}
+
+bool
+sord_add(SordModel* model, const SordQuad tup)
+{
+ SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup));
+ if (!tup[0] || !tup[1] || !tup[2]) {
+ error(model->world, SERD_ERR_BAD_ARG,
+ "attempt to add quad with NULL field\n");
+ return false;
+ } else if (model->n_iters > 0) {
+ error(model->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n");
+ }
+
+ const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad));
+ memcpy(quad, tup, sizeof(SordQuad));
+
+ for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+ if (model->indices[i] && (i < GSPO || tup[3])) {
+ if (!sord_add_to_index(model, quad, (SordOrder)i)) {
+ assert(i == 0); // Assuming index coherency
+ free(quad);
+ return false; // Quad already stored, do nothing
+ }
+ }
+ }
+
+ for (int i = 0; i < TUP_LEN; ++i) {
+ sord_add_quad_ref(model, tup[i], (SordQuadIndex)i);
+ }
+
+ ++model->n_quads;
+ return true;
+}
+
+void
+sord_remove(SordModel* model, const SordQuad tup)
+{
+ SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
+ if (model->n_iters > 0) {
+ error(model->world, SERD_ERR_BAD_ARG, "remove with iterator\n");
+ }
+
+ SordNode* quad = NULL;
+ for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+ if (model->indices[i] && (i < GSPO || tup[3])) {
+ if (zix_btree_remove(model->indices[i], tup, (void**)&quad, NULL)) {
+ assert(i == 0); // Assuming index coherency
+ return; // Quad not found, do nothing
+ }
+ }
+ }
+
+ free(quad);
+
+ for (int i = 0; i < TUP_LEN; ++i) {
+ sord_drop_quad_ref(model, tup[i], (SordQuadIndex)i);
+ }
+
+ --model->n_quads;
+}
+
+SerdStatus
+sord_erase(SordModel* model, SordIter* iter)
+{
+ if (model->n_iters > 1) {
+ error(model->world, SERD_ERR_BAD_ARG, "erased with many iterators\n");
+ return SERD_ERR_BAD_ARG;
+ }
+
+ SordQuad tup;
+ sord_iter_get(iter, tup);
+
+ SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
+
+ SordNode* quad = NULL;
+ for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+ if (model->indices[i] && (i < GSPO || tup[3])) {
+ if (zix_btree_remove(model->indices[i], tup, (void**)&quad,
+ i == iter->order ? &iter->cur : NULL)) {
+ return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL;
+ }
+ }
+ }
+ iter->end = zix_btree_iter_is_end(iter->cur);
+ sord_iter_scan_next(iter);
+
+ free(quad);
+
+ for (int i = 0; i < TUP_LEN; ++i) {
+ sord_drop_quad_ref(model, tup[i], (SordQuadIndex)i);
+ }
+
+ --model->n_quads;
+ return SERD_SUCCESS;
+}