1 files changed, 1142 insertions, 0 deletions
diff --git a/src/sord.c b/src/sord.c
new file mode 100644
index 0000000..23c1de4
--- /dev/null
+++ b/src/sord.c
@@ -0,0 +1,1142 @@
+/* Sord, a lightweight RDF model library.
+ * Copyright 2010-2011 David Robillard <d@drobilla.net>
+ *
+ * Sord is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Sord is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Sord Implementation.
+ *
+ * Tuples are represented as simple arrays of SordID, of length 4,
+ * which represent statements (RDF triples) with an optional
+ * context.  When contexts are not used, only the first 3 elements
+ * are considered.
+ */
+
+// C99
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// GLib
+#include <glib.h>
+
+#include "sord-config.h"
+#include "sord/sord.h"
+
+#define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__)
+
+#ifdef SORD_DEBUG_ITER
+	#define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__)
+#else
+	#define SORD_ITER_LOG(...)
+#endif
+#ifdef SORD_DEBUG_SEARCH
+	#define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__)
+#else
+	#define SORD_FIND_LOG(...)
+#endif
+#ifdef SORD_DEBUG_WRITE
+	#define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__)
+#else
+	#define SORD_WRITE_LOG(...)
+#endif
+
+#define NUM_ORDERS          12
+#define STATEMENT_LEN       3
+#define TUP_LEN             STATEMENT_LEN + 1
+#define DEFAULT_ORDER       SPO
+#define DEFAULT_GRAPH_ORDER GSPO
+
+#define TUP_FMT         "(%d %d %d %d)"
+#define TUP_FMT_ARGS(t) ((t)[0]), ((t)[1]), ((t)[2]), ((t)[3])
+
+#define TUP_S 0
+#define TUP_P 1
+#define TUP_O 2
+#define TUP_G 3
+
+/** Triple ordering */
+typedef enum {
+	 SPO, ///<         Subject,   Predicate, Object
+	 SOP, ///<         Subject,   Object,    Predicate
+	 OPS, ///<         Object,    Predicate, Subject
+	 OSP, ///<         Object,    Subject,   Predicate
+	 PSO, ///<         Predicate, Subject,   Object
+	 POS, ///<         Predicate, Object,    Subject
+	GSPO, ///< Graph,  Subject,   Predicate, Object
+	GSOP, ///< Graph,  Subject,   Object,    Predicate
+	GOPS, ///< Graph,  Object,    Predicate, Subject
+	GOSP, ///< Graph,  Object,    Subject,   Predicate
+	GPSO, ///< Graph,  Predicate, Subject,   Object
+	GPOS, ///< Graph,  Predicate, Object,    Subject
+} SordOrder;
+
+/** String name of each ordering (array indexed by SordOrder) */
+static const char* const order_names[NUM_ORDERS] = {
+	 "spo",  "sop",  "ops",  "osp",  "pso",  "pos",
+	"gspo", "gsop", "gops", "gosp", "gpso", "gpos"
+};
+
+/** Tuples of indices for each order, from most to least significant
+ * (array indexed by SordOrder)
+ */
+static const int orderings[NUM_ORDERS][TUP_LEN] = {
+	{  0,1,2,3}, {  0,2,1,3}, {  2,1,0,3}, {  2,0,1,3}, {  1,0,2,3}, {  1,2,0,3},
+	{3,0,1,2  }, {3,0,2,1  }, {3,2,1,0  }, {3,2,0,1  }, {3,1,0,2  }, {3,1,2,0  }
+};
+
+/** Store Metadata */
+typedef struct {
+	SordCount n_tuples;
+	SordCount n_nodes;
+} SordMeta;
+
+/** Store */
+struct _Sord {
+	GHashTable* names;    ///< URI or blank node identifier string => ID
+	GHashTable* literals; ///< Literal => ID
+
+	/** Index for each possible triple ordering (may or may not exist).
+	 * If an index for e.g. SPO exists, it is a dictionary with
+	 * (S P O) keys (as a SordTuplrID), and ignored values.
+	 */
+	GSequence* indices[NUM_ORDERS];
+
+	void (*user_data_free)(void*); ///< Destructor for node user data
+
+	SordMeta* meta;
+};
+
+/** Mode for searching or iteration */
+typedef enum {
+	ALL,          ///< Iterate to end of store, returning all results, no filtering
+	SINGLE,       ///< Iteration over a single element (exact search)
+	RANGE,        ///< Iterate over range with equal prefix
+	FILTER_RANGE, ///< Iterate over range with equal prefix, filtering
+	FILTER_ALL    ///< Iterate to end of store, filtering
+} SearchMode;
+
+/** Iterator over some range of a store */
+struct _SordIter {
+	Sord           sord;              ///< Store this is an iterator for
+	GSequenceIter* cur;               ///< Current DB cursor
+	SordTuple      pat;               ///< Iteration pattern (in ordering order)
+	int            ordering[TUP_LEN]; ///< Store ordering
+	SearchMode     mode;              ///< Iteration mode
+	int            n_prefix;          ///< Length of range prefix (RANGE, FILTER_RANGE)
+	bool           end;               ///< True iff reached end
+	bool           skip_graphs;       ///< True iff iteration should ignore graphs
+};
+
+#define PACKED __attribute__((__packed__))
+
+/** Node (value in `nodes' hash table) */
+struct _SordNode {
+	uint8_t   type;   ///< SordNodeType
+	SordCount refs;
+	int       size;   ///< Size of node's data (not including type)
+	void*     user_data; ///< Opaque user data
+	char      data[]; ///< String for URI or BLANK, Literal for LITERAL
+} PACKED;
+
+/** Literal value (possibly with type and/or language) */
+typedef struct _SordLiteral {
+	SordNode type;      ///< Literal data type (ID of a URI node, or 0)
+	uint8_t  lang_size; ///< Length of language, including terminator (or 0)
+	char     data[];    ///< Language (terminated), followed by value string
+} PACKED Literal;
+
+static char*
+sord_literal_node_get_value(SordNode node)
+{
+	assert(node->type == SORD_LITERAL);
+	Literal* lit = (Literal*)node->data;
+	return lit->data + lit->lang_size;
+}
+
+static unsigned
+sord_literal_hash(const void* n)
+{
+	SordNode node = (SordNode)n;
+	return g_str_hash(sord_literal_node_get_value(node));
+}
+		
+static gboolean
+sord_literal_equal(const void* a, const void* b)
+{
+	SordNode a_node = (SordNode)a;
+	SordNode b_node = (SordNode)b;
+	// FIXME: type, lang
+	return g_str_equal(sord_literal_node_get_value(a_node),
+	                   sord_literal_node_get_value(b_node));
+}
+
+static inline int
+sord_node_compare(Sord sord, const SordNode a, const SordNode b)
+{
+	if (a->type != b->type)
+		return a->type - b->type;
+
+	Literal* lit_a;
+	Literal* lit_b;
+	switch ((SordNodeType)a->type) {
+	case SORD_URI:
+	case SORD_BLANK:
+		return strcmp(a->data, b->data);
+	case SORD_LITERAL:
+		// TODO: lang, type
+		lit_a = (Literal*)a->data;
+		lit_b = (Literal*)b->data;
+		return strcmp(sord_literal_node_get_value(a), sord_literal_node_get_value(b));
+	}
+	assert(false);
+	return 0;
+}
+
+/** Compare two IDs (dereferencing if necessary).
+ * The null ID, 0, is treated as a minimum (it is less than every other
+ * possible ID, except itself).  This allows it to be used as a wildcard
+ * when searching, ensuring the search will reach the minimum possible
+ * key in the tree and iteration from that point will produce the entire
+ * result set.
+ */
+static inline int
+sord_id_compare(Sord sord, const SordID a, const SordID b)
+{
+	if (a == b || !a || !b) {
+		return (const char*)a - (const char*)b;
+	} else {
+		SordNode a_node = sord_node_load(sord, a);
+		SordNode b_node = sord_node_load(sord, b);
+		const int ret = sord_node_compare(sord, a_node, b_node);
+		return ret;
+	}
+}
+
+/** Return true iff IDs are equivalent, or one is a wildcard */
+static inline bool
+sord_id_match(const SordID a, const SordID b)
+{
+	return !a || !b || (a == b);
+}
+
+static inline bool
+sord_tuple_match_inline(const SordTuple x, const SordTuple y)
+{
+	return sord_id_match(x[0], y[0])
+		&& sord_id_match(x[1], y[1])
+		&& sord_id_match(x[2], y[2])
+		&& sord_id_match(x[3], y[3]);
+}
+
+bool
+sord_tuple_match(const SordTuple x, const SordTuple y)
+{
+	return sord_tuple_match_inline(x, y);
+}
+
+void
+sord_tuple_load(Sord sord, SordTuple tup, SordNode* s, SordNode* p, SordNode* o)
+{
+	*s = sord_node_load(sord, tup[TUP_S]);
+	*p = sord_node_load(sord, tup[TUP_P]);
+	*o = sord_node_load(sord, tup[TUP_O]);
+}
+
+/** Compare two tuple IDs lexicographically.
+ * NULL IDs (equal to 0) are treated as wildcards, always less than every
+ * other possible ID, except itself.
+ */
+static int
+sord_tuple_compare(const void* x_ptr, const void* y_ptr, void* user_data)
+{
+	Sord    const sord = (Sord)user_data;
+	SordID* const x    = (SordID*)x_ptr;
+	SordID* const y    = (SordID*)y_ptr;
+
+	for (int i = 0; i < TUP_LEN; ++i) {
+		const int cmp = sord_id_compare(sord, x[i], y[i]);
+		if (cmp)
+			return cmp;
+	}
+
+	return 0;
+}
+
+static inline bool
+sord_iter_next(SordIter iter)
+{
+	if (!iter->skip_graphs) {
+		iter->cur = g_sequence_iter_next(iter->cur);
+		return !g_sequence_iter_is_end(iter->cur);
+	}
+
+	const SordID*   key     = (const SordID*)g_sequence_get(iter->cur);
+	const SordTuple initial = { key[0], key[1], key[2], key[3] };
+	while (true) {
+		iter->cur = g_sequence_iter_next(iter->cur);
+		if (g_sequence_iter_is_end(iter->cur))
+			return true;
+
+		key = (const SordID*)g_sequence_get(iter->cur);
+		for (int i = 0; i < 3; ++i)
+			if (key[i] != initial[i])
+				return false;
+	}
+	assert(false);
+}
+
+/** Seek forward as necessary until @a iter points at a match.
+ * @return true iff iterator reached end of valid range.
+ */
+static inline bool
+sord_iter_seek_match(SordIter iter)
+{
+	for (iter->end = true;
+	     !g_sequence_iter_is_end(iter->cur);
+	     sord_iter_next(iter)) {
+		const SordID* const key = (const SordID*)g_sequence_get(iter->cur);
+		if (sord_tuple_match_inline(key, iter->pat))
+			return (iter->end = false);
+	}
+	return true;
+}
+
+/** Seek forward as necessary until @a iter points at a match, or the prefix
+ * no longer matches iter->pat.
+ * @return true iff iterator reached end of valid range.
+ */
+static inline bool
+sord_iter_seek_match_range(SordIter iter)
+{
+	if (iter->end)
+		return true;
+
+	do {
+		const SordID* key = (const SordID*)g_sequence_get(iter->cur);
+
+		if (sord_tuple_match_inline(key, iter->pat))
+			return false; // Found match
+
+		for (int i = 0; i < iter->n_prefix; ++i) {
+			if (!sord_id_match(key[i], iter->pat[i])) {
+				iter->end = true; // Reached end of valid range
+				return true;
+			}
+		}
+	} while (!sord_iter_next(iter));
+
+	return (iter->end = true); // Reached end
+}
+
+static SordIter
+sord_iter_new(Sord sord, GSequenceIter* cur, const SordTuple pat,
+              SordOrder order, SearchMode mode, int n_prefix)
+{
+	const int* ordering = orderings[order];
+
+	SordIter iter = malloc(sizeof(struct _SordIter));
+	iter->sord        = sord;
+	iter->cur         = cur;
+	iter->mode        = mode;
+	iter->n_prefix    = n_prefix;
+	iter->end         = false;
+	iter->skip_graphs = order < GSPO;
+	for (int i = 0; i < TUP_LEN; ++i) {
+		iter->pat[i]      = pat[ordering[i]];
+		iter->ordering[i] = ordering[i];
+	}
+
+	switch (iter->mode) {
+	case ALL:
+	case SINGLE:
+	case RANGE:
+		assert(sord_tuple_match_inline(
+			       (const SordID*)g_sequence_get(iter->cur),
+		           iter->pat));
+		break;
+	case FILTER_RANGE:
+		sord_iter_seek_match_range(iter);
+		break;
+	case FILTER_ALL:
+		sord_iter_seek_match(iter);
+		break;
+	}
+
+#ifdef SORD_DEBUG_ITER
+	SordTuple value;
+	sord_iter_get(iter, value);
+	SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d\n", (void*)iter,
+	              TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value), iter->end);
+#endif
+	return iter;
+}
+
+Sord
+sord_iter_get_sord(SordIter iter)
+{
+	return iter->sord;
+}
+
+void
+sord_iter_get(SordIter iter, SordTuple id)
+{
+	const SordID* key = (const SordID*)g_sequence_get(iter->cur);
+	id[iter->ordering[0]] = key[0];
+	id[iter->ordering[1]] = key[1];
+	id[iter->ordering[2]] = key[2];
+	id[iter->ordering[3]] = key[3];
+}
+
+bool
+sord_iter_increment(SordIter iter)
+{
+	if (iter->end)
+		return true;
+
+	const SordID* key;
+	iter->end = sord_iter_next(iter);
+	if (!iter->end) {
+		switch (iter->mode) {
+		case ALL:
+			// At the end if the cursor is (assigned above)
+			break;
+		case SINGLE:
+			iter->end = true;
+			break;
+		case RANGE:
+			// At the end if the MSN no longer matches
+			key = (const SordID*)g_sequence_get(iter->cur);
+			assert(key);
+			for (int i = 0; i < iter->n_prefix; ++i) {
+				if (!sord_id_match(key[i], iter->pat[i])) {
+					iter->end = true;
+					break;
+				}
+			}
+			break;
+		case FILTER_RANGE:
+			// Seek forward to next match, stopping if prefix changes
+			sord_iter_seek_match_range(iter);
+			break;
+		case FILTER_ALL:
+			// Seek forward to next match
+			sord_iter_seek_match(iter);
+			break;
+		}
+	}
+
+	if (iter->end) {
+		SORD_ITER_LOG("%p Reached end\n", (void*)iter);
+		return true;
+	} else {
+#ifdef SORD_DEBUG_ITER
+		SordTuple tup;
+		sord_iter_get(iter, tup);
+		SORD_ITER_LOG("%p Increment to " TUP_FMT "\n", (void*)iter, TUP_FMT_ARGS(tup));
+#endif
+		return false;
+	}
+}
+
+bool
+sord_iter_is_end(SordIter iter)
+{
+	return !iter || iter->end;
+}
+
+void
+sord_iter_free(SordIter iter)
+{
+	SORD_ITER_LOG("%p Free\n", (void*)iter);
+	if (iter) {
+		free(iter);
+	}
+}
+
+/** Return true iff @a sord has an index for @a order.
+ * If @a graph_search is true, @a order will be modified to be the
+ * corresponding order with a G prepended (so G will be the MSN).
+ */
+static inline bool
+sord_has_index(Sord sord, SordOrder* order, int* n_prefix, bool graph_search)
+{
+	if (graph_search) {
+		*order    += GSPO;
+		*n_prefix += 1;
+	}
+
+	return sord->indices[*order];
+}
+
+/** Return the best available index for a pattern.
+ * @param pat Pattern in standard (S P O G) order
+ * @param mode Set to the (best) iteration mode for iterating over results
+ * @param n_prefix Set to the length of the range prefix
+ *        (for @a mode == RANGE and @a mode == FILTER_RANGE)
+ */
+static inline SordOrder
+sord_best_index(Sord sord, const SordTuple pat, SearchMode* mode, int* n_prefix)
+{
+	const bool graph_search = (pat[TUP_G] != 0);
+
+	const unsigned sig
+		= (pat[0] ? 1 : 0) * 0x100
+		+ (pat[1] ? 1 : 0) * 0x010
+		+ (pat[2] ? 1 : 0) * 0x001;
+
+	SordOrder good[2];
+
+	// Good orderings that don't require filtering
+	*mode     = RANGE;
+	*n_prefix = 0;
+	switch (sig) {
+	case 0x000: *mode = ALL; return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
+	case 0x001: *mode = RANGE; good[0] = OPS; good[1] = OSP; *n_prefix = 1; break;
+	case 0x010: *mode = RANGE; good[0] = POS; good[1] = PSO; *n_prefix = 1; break;
+	case 0x011: *mode = RANGE; good[0] = OPS; good[1] = POS; *n_prefix = 2; break;
+	case 0x100: *mode = RANGE; good[0] = SPO; good[1] = SOP; *n_prefix = 1; break;
+	case 0x101: *mode = RANGE; good[0] = SOP; good[1] = OSP; *n_prefix = 2; break;
+	case 0x110: *mode = RANGE; good[0] = SPO; good[1] = PSO; *n_prefix = 2; break;
+	case 0x111: *mode = SINGLE; return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
+	}
+
+	if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
+		return good[0];
+	} else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
+		return good[1];
+	}
+
+	// Not so good orderings that require filtering, but can
+	// still be constrained to a range
+	switch (sig) {
+	case 0x011: *mode = FILTER_RANGE; good[0] = OSP; good[1] = PSO; *n_prefix = 1; break;
+	case 0x101: *mode = FILTER_RANGE; good[0] = SPO; good[1] = OPS; *n_prefix = 1; break;
+	case 0x110: *mode = FILTER_RANGE; good[0] = SOP; good[1] = POS; *n_prefix = 1; break;
+	default: break;
+	}
+
+	if (*mode == FILTER_RANGE) {
+		if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
+			return good[0];
+		} else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
+			return good[1];
+		}
+	}
+
+	if (graph_search) {
+		*mode = FILTER_RANGE;
+		*n_prefix = 1;
+		return DEFAULT_GRAPH_ORDER;
+	} else {
+		*mode = FILTER_ALL;
+		return DEFAULT_ORDER;
+	}
+}
+
+static GSequence*
+sord_new_btree()
+{
+	return g_sequence_new(NULL);
+}
+
+Sord
+sord_new()
+{
+	Sord sord = (Sord)malloc(sizeof(struct _Sord));
+	sord->names    = g_hash_table_new_full(g_str_hash, g_str_equal, 0, 0);
+	sord->literals = g_hash_table_new_full(sord_literal_hash, sord_literal_equal, 0, 0);
+	sord->user_data_free = NULL;
+
+	for (unsigned i = 0; i < NUM_ORDERS; ++i)
+		sord->indices[i] = NULL;
+
+	return sord;
+}
+
+void
+sord_free(Sord sord)
+{
+	if (!sord)
+		return;
+
+	free(sord->meta);
+	g_hash_table_unref(sord->names);
+	g_hash_table_unref(sord->literals);
+	for (unsigned i = 0; i < NUM_ORDERS; ++i)
+		if (sord->indices[i])
+			g_sequence_free(sord->indices[i]);
+
+	free(sord);
+}
+
+void
+sord_set_option(Sord sord, const char* key, const char* value,
+                SordNodeType type, const char* datatype, const char* lang)
+{
+	const char* const prefix     = "http://drobilla.net/ns/sord#";
+	const size_t      prefix_len =  strlen(prefix);
+	if (strncmp(key, prefix, prefix_len)) {
+		fprintf(stderr, "Unknown option %s\n", key);
+		return;
+	}
+
+	const char* option        = key + prefix_len;
+	const bool  value_is_true = !strcmp(value, "true") || !strcmp(value, "1") || !strcmp(value, "yes");
+	if (!strcmp(option, "index-all")) {
+		for (int i = 0; i < NUM_ORDERS; ++i) {
+			sord->indices[i] = sord_new_btree();
+		}
+	} else if (!strncmp(option, "index-", 6) && value_is_true) {
+		for (int i = 0; i < NUM_ORDERS; ++i) {
+			if (!strcmp(option + 6, order_names[i])) {
+				sord->indices[i] = sord_new_btree();
+				return;
+			}
+		}
+	} else {
+		fprintf(stderr, "Unknown option %s\n", key);
+	}
+}
+
+bool
+sord_open(Sord sord)
+{
+	sord->meta = malloc(sizeof(SordMeta));
+	memset(sord->meta, 0, sizeof(SordMeta));
+
+	bool no_indices = true;
+	for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+		if (sord->indices[i]) {
+			no_indices = false;
+			break;
+		}
+	}
+
+	if (no_indices) {
+		// Use default indexing, avoids O(n) in all cases
+		sord->indices[SPO]  = sord_new_btree();
+		sord->indices[OPS]  = sord_new_btree();
+		sord->indices[PSO]  = sord_new_btree();
+		sord->indices[GSPO] = sord_new_btree(); // XXX: default?  do on demand?
+	}
+
+	if (!sord->indices[DEFAULT_ORDER])
+		sord->indices[DEFAULT_ORDER] = sord_new_btree();
+
+	return true;
+}
+
+static void
+sord_drop_node(Sord sord, SordID id)
+{
+	// FIXME: leak?
+}
+
+int
+sord_num_tuples(Sord sord)
+{
+	return sord->meta->n_tuples;
+}
+
+int
+sord_num_nodes(Sord sord)
+{
+	return sord->meta->n_nodes;
+}
+
+void
+sord_node_set_user_data_free_function(Sord sord, void (*f)(void*))
+{
+	sord->user_data_free = f;
+}
+
+SordIter
+sord_begin(Sord sord)
+{
+	if (sord_num_tuples(sord) == 0) {
+		return NULL;
+	} else {
+		GSequenceIter* cur = g_sequence_get_begin_iter(sord->indices[DEFAULT_ORDER]);
+		SordTuple pat = { 0, 0, 0, 0 };
+		return sord_iter_new(sord, cur, pat, DEFAULT_ORDER, ALL, 0);
+	}
+}
+
+SordIter
+sord_graphs_begin(Sord read)
+{
+	return NULL;
+}
+
+static inline GSequenceIter*
+index_search(Sord sord, GSequence* db, const SordTuple search_key)
+{
+	return  g_sequence_search(
+		db, (void*)search_key, sord_tuple_compare, sord);
+}
+
+static inline GSequenceIter*
+index_lower_bound(Sord sord, GSequence* db, const SordTuple search_key)
+{
+	GSequenceIter* i = g_sequence_search(
+		db, (void*)search_key, sord_tuple_compare, sord);
+
+	/* i is now at the position where search_key would be inserted,
+	   but this is not necessarily a match, and we need the leftmost...
+	*/
+
+	if (g_sequence_iter_is_begin(i)) {
+		return i;
+	} else if (g_sequence_iter_is_end(i)) {
+		i = g_sequence_iter_prev(i);
+	}
+
+	if (!sord_tuple_match_inline(search_key, g_sequence_get(i))) {
+		// No match, but perhaps immediately after a match
+		GSequenceIter* const prev = g_sequence_iter_prev(i);
+		if (!sord_tuple_match_inline(search_key, g_sequence_get(prev))) {
+			return i; // No match (caller must check)
+		} else {
+			i = prev;
+		}
+	}
+
+	/* i now points to some matching element,
+	   but not necessarily the first...
+	*/
+	assert(sord_tuple_match_inline(search_key, g_sequence_get(i)));
+
+	while (!g_sequence_iter_is_begin(i)) {
+		if (sord_tuple_match_inline(search_key, g_sequence_get(i))) {
+			GSequenceIter* const prev = g_sequence_iter_prev(i);
+			if (sord_tuple_match_inline(search_key, g_sequence_get(prev))) {
+				i = prev;
+				continue;
+			}
+		}
+		break;
+	}
+
+	return i;
+}
+
+SordIter
+sord_find(Sord sord, const SordTuple pat)
+{
+	if (!pat[0] && !pat[1] && !pat[2] && !pat[3])
+		return sord_begin(sord);
+
+	SearchMode          mode;
+	int                 prefix_len;
+	const SordOrder     index_order = sord_best_index(sord, pat, &mode, &prefix_len);
+	const int* const    ordering    = orderings[index_order];
+
+	SORD_FIND_LOG("Find " TUP_FMT "  index=%s  mode=%d  prefix_len=%d ordering=%d%d%d%d\n",
+			TUP_FMT_ARGS(pat), order_names[index_order], mode, prefix_len,
+			ordering[0], ordering[1], ordering[2], ordering[3]);
+
+	// It's easiest to think about this algorithm in terms of (S P O) ordering,
+	// assuming (A B C) == (S P O).  For other orderings this is not actually
+	// the case, but it works the same way.
+	const SordID a = pat[ordering[0]]; // Most Significant Node (MSN)
+	const SordID b = pat[ordering[1]]; // ...
+	const SordID c = pat[ordering[2]]; // ...
+	const SordID d = pat[ordering[3]]; // Least Significant Node (LSN)
+
+	if (a && b && c)
+		mode = SINGLE; // No duplicate tuples (Sord is a set)
+
+	SordTuple            search_key = { a, b, c, d };
+	GSequence* const     db         = sord->indices[index_order];
+	GSequenceIter* const cur        = index_lower_bound(sord, db, search_key);
+	const SordID* const  key        = (const SordID*)g_sequence_get(cur);
+	if (!key || ( (mode == RANGE || mode == SINGLE)
+	              && !sord_tuple_match_inline(search_key, key) )) {
+		SORD_FIND_LOG("No match found\n");
+		return NULL;
+	}
+
+	return sord_iter_new(sord, cur, pat, index_order, mode, prefix_len);
+}
+
+static SordID
+sord_lookup_name(Sord sord, const char* str, int str_len)
+{
+	return g_hash_table_lookup(sord->names, str);
+}
+
+static SordNode
+sord_new_literal_node(Sord sord, SordNode type,
+                      const char* str,  int str_len,
+                      const char* lang, uint8_t lang_len,
+                      int* node_size)
+{
+	const uint8_t lang_size = lang ? lang_len + 1 : lang_len;
+	*node_size = sizeof(struct _SordNode) + sizeof(struct _SordLiteral) + str_len + 1 + lang_size;
+	SordNode node = malloc(*node_size);
+	node->type = SORD_LITERAL;
+	Literal* lit = (Literal*)node->data;
+	lit->type = type;
+	lit->lang_size = lang_size;
+	if (lang)
+		memcpy(lit->data, lang, lang_size);
+
+	memcpy(lit->data + lang_size, str, str_len + 1);
+	return node;
+}
+
+static SordNode
+sord_lookup_literal(Sord sord, SordNode type,
+                    const char* str,  int     str_len,
+                    const char* lang, uint8_t lang_len)
+{
+	int node_size;
+	SordNode node = sord_new_literal_node(sord, type, str, str_len, lang, lang_len, &node_size);
+
+	SordNode id = g_hash_table_lookup(sord->literals, node);
+	free(node);
+	if (id) {
+		return id;
+	} else {
+		return 0;
+	}
+}
+
+SordNode
+sord_node_load(Sord sord, SordID id)
+{
+	return (SordNode)id;
+}
+
+static SordNode
+sord_new_node(Sord sord, SordNodeType type, const char* buf, int buf_len, int* node_size)
+{
+	*node_size = sizeof(struct _SordNode) + buf_len;
+	SordNode node = malloc(*node_size);
+	node->type = type;
+	memcpy(node->data, buf, buf_len);
+	return node;
+}
+
+SordNodeType
+sord_node_get_type(SordNode ref)
+{
+	return ref->type;
+}
+
+const char*
+sord_node_get_string(SordNode ref)
+{
+	switch (ref->type) {
+	case SORD_URI:
+	case SORD_BLANK:
+		return ref->data;
+		break;
+	case SORD_LITERAL:
+		return ((Literal*)&ref->data)->data;
+		break;
+	}
+	assert(false);
+	return NULL;
+}
+
+void
+sord_node_set_user_data(SordNode ref, void* user_data)
+{
+	ref->user_data = user_data;
+}
+
+void*
+sord_node_get_user_data(SordNode ref)
+{
+	return ref->user_data;
+}
+
+const char*
+sord_literal_get_lang(SordNode ref)
+{
+	Literal* lit = (Literal*)&ref->data;
+	if (lit->lang_size > 0)
+		return lit->data;
+	else
+		return NULL;
+}
+
+SordNode
+sord_literal_get_datatype(SordNode ref)
+{
+	Literal* lit = (Literal*)&ref->data;
+	return lit->type;
+}
+
+static void
+sord_add_node(Sord sord, SordNode node, int node_size)
+{
+	node->refs = 0;
+	++sord->meta->n_nodes;
+}
+
+SordID
+sord_get_uri_counted(Sord sord, bool create, const char* str, int str_len)
+{
+	SordID id = sord_lookup_name(sord, str, str_len);
+	if (id || !create)
+		return id;
+
+	int node_size;
+	id = sord_new_node(sord, SORD_URI, str, str_len + 1, &node_size);
+
+	assert(id);
+	g_hash_table_insert(sord->names, (void*)g_strdup(str), (void*)id);
+	sord_add_node(sord, id, node_size);
+	
+	return id;
+}
+
+SordID
+sord_get_uri(Sord sord, bool create, const char* str)
+{
+	return sord_get_uri_counted(sord, create, str, strlen(str));
+}
+
+SordID
+sord_get_blank_counted(Sord sord, bool create, const char* str, int str_len)
+{
+	SordID id = sord_lookup_name(sord, str, str_len);
+	if (id || !create)
+		return id;
+
+	int node_size;
+	id = sord_new_node(sord, SORD_BLANK, str, str_len + 1, &node_size);
+
+	assert(id);
+	g_hash_table_insert(sord->names, (void*)g_strdup(str), (void*)id);
+	sord_add_node(sord, id, node_size);
+	
+	return id;
+}
+
+SordID
+sord_get_blank(Sord sord, bool create, const char* str)
+{
+	return sord_get_blank_counted(sord, create, str, strlen(str));
+}
+
+SordID
+sord_get_literal_counted(Sord sord, bool create, SordID type,
+                         const char* str,  int     str_len,
+                         const char* lang, uint8_t lang_len)
+{
+	SordID id = sord_lookup_literal(sord, type, str, str_len, lang, lang_len);
+	if (id || !create)
+		return id;
+
+	int node_size;
+	id = sord_new_literal_node(sord, type, str, str_len, lang, lang_len, &node_size);
+
+	g_hash_table_insert(sord->literals, (void*)id, id);
+	sord_add_node(sord, id, node_size);
+	
+	return id;
+}
+
+SordID
+sord_get_literal(Sord sord, bool create, SordID type, const char* str, const char* lang)
+{
+	return sord_get_literal_counted(sord, create, type, str, strlen(str),
+			lang, lang ? strlen(lang) : 0);
+}
+
+static void
+sord_add_tuple_ref(Sord sord, const SordID id)
+{
+	if (id) {
+		SordNode node = sord_node_load(sord, id);
+		++node->refs;
+	}
+}
+
+static void
+sord_drop_tuple_ref(Sord sord, const SordID id)
+{
+	if (id) {
+		SordNode node = sord_node_load(sord, id);
+		if (--node->refs == 0) {
+			sord_drop_node(sord, id);
+		}
+	}
+}
+
+static inline bool
+sord_add_to_index(Sord sord, const SordTuple tup, SordOrder order)
+{
+	assert(sord->indices[order]);
+	const int* const ordering = orderings[order];
+	const SordTuple key = {
+		tup[ordering[0]], tup[ordering[1]], tup[ordering[2]], tup[ordering[3]]
+	};
+	GSequenceIter* const cur = index_search(sord, sord->indices[order], key);
+	if (!g_sequence_iter_is_end(cur)
+	    && !sord_tuple_compare(g_sequence_get(cur), key, sord)) {
+		return false;  // Tuple already stored in this index
+	}
+
+	// FIXME: memory leak?
+	SordID* key_copy = malloc(sizeof(SordTuple));
+	memcpy(key_copy, key, sizeof(SordTuple));
+	g_sequence_insert_before(cur, key_copy);
+	return true;
+}
+
+void
+sord_add(Sord sord, const SordTuple tup)
+{
+	SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup));
+	assert(tup[0] && tup[1] && tup[2]);
+
+	for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+		if (sord->indices[i]) {
+			if (!sord_add_to_index(sord, tup, i)) {
+				assert(i == 0); // Assuming index coherency
+				return; // Tuple already stored, do nothing
+			}
+		}
+	}
+
+	for (int i = 0; i < TUP_LEN; ++i)
+		sord_add_tuple_ref(sord, tup[i]);
+
+	++sord->meta->n_tuples;
+	assert(sord->meta->n_tuples == g_sequence_get_length(sord->indices[SPO]));
+}
+
+void
+sord_remove(Sord sord, const SordTuple tup)
+{
+	SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
+		
+	for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+		if (sord->indices[i]) {
+			const int* const ordering = orderings[i];
+			const SordTuple key = {
+				tup[ordering[0]], tup[ordering[1]], tup[ordering[2]], tup[ordering[3]]
+			};
+			GSequenceIter* const cur = index_search(sord, sord->indices[i], key);
+			if (!g_sequence_iter_is_end(cur)) {
+				g_sequence_remove(cur);
+			} else {
+				assert(i == 0); // Assuming index coherency
+				return; // Tuple not found, do nothing
+			}
+		}
+	}
+
+	for (int i = 0; i < TUP_LEN; ++i)
+		sord_drop_tuple_ref(sord, tup[i]);
+
+	--sord->meta->n_tuples;
+}
+
+void
+sord_remove_iter(Sord sord, SordIter iter)
+{
+	SordTuple tup;
+	sord_iter_get(iter, tup);
+
+	SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
+	
+	// TODO: Directly remove the iterator's index (avoid one search)
+
+	for (unsigned i = 0; i < NUM_ORDERS; ++i) {
+		if (sord->indices[i]) {
+			const int* const ordering = orderings[i];
+			const SordTuple key = {
+				tup[ordering[0]], tup[ordering[1]], tup[ordering[2]], tup[ordering[3]]
+			};
+			GSequenceIter* const cur = index_search(sord, sord->indices[i], key);
+			if (!g_sequence_iter_is_end(cur)) {
+				g_sequence_remove(cur);
+			} else {
+				assert(i == 0); // Assuming index coherency
+			}
+		}
+	}
+	
+	for (int i = 0; i < TUP_LEN; ++i)
+		sord_drop_tuple_ref(sord, tup[i]);
+
+	--sord->meta->n_tuples;
+
+	iter->end = g_sequence_iter_is_end(iter->cur);
+}
+
+void
+sord_remove_graph(Sord sord, SordID graph)
+{
+	#if 0
+	if (!sord->indices[GSPO])
+		return;
+
+	// Remove all tuples in graph from non-graph indices
+	BDBCUR*         cur        = tcbdbcurnew(sord->indices[GSPO]);
+	const SordTuple search_key = { graph, 0, 0, 0 };
+	int             key_size   = sizeof(SordTuple);
+	tcbdbcurjump(cur, &search_key, key_size);
+	do {
+		const SordID* key = (const SordID*)tcbdbcurkey3(cur, &key_size);
+		if (!key || key[0] != graph)
+			break;
+
+		for (unsigned i = 0; i < GSPO; ++i) {
+			if (sord->indices[i]) {
+				const int* const ordering = orderings[i];
+				const SordTuple tup = { key[1], key[2], key[3], key[0] }; // Key in SPOG order
+				const SordTuple subkey = {
+					tup[ordering[0]], tup[ordering[1]], tup[ordering[2]], tup[ordering[3]]
+				};
+				if (!tcbdbout(sord->indices[i], &subkey, sizeof(SordTuple)))
+					fprintf(stderr, "Failed to remove key " TUP_FMT "\n", TUP_FMT_ARGS(subkey));
+			}
+		}
+
+		--sord->meta->n_tuples;
+	} while (tcbdbcurnext(cur));
+
+	// Remove all tuples in graph from graph indices
+	for (unsigned i = GSPO; i < NUM_ORDERS; ++i) {
+		if (sord->indices[i]) {
+			BDBCUR* cur = tcbdbcurnew(sord->indices[i]);
+			tcbdbcurjump(cur, &search_key, key_size);
+			while (true) {
+				const SordID* key = (const SordID*)tcbdbcurkey3(cur, &key_size);
+				if (!key || key[0] != graph) {
+					break;
+				} else if (i == GSPO) {
+					for (int i = 0; i < TUP_LEN; ++i) {
+						sord_drop_tuple_ref(sord, key[i]);
+					}
+				}
+				if (!tcbdbcurout(cur))
+					break;
+			}
+		}
+	}
+	#endif
+}